ZennyKenny commited on
Commit
6e9b692
·
verified ·
1 Parent(s): 9bd6659

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +6 -26
app.py CHANGED
@@ -2,42 +2,22 @@ import gradio as gr
2
  from transformers import TrOCRProcessor, VisionEncoderDecoderModel
3
  from PIL import Image
4
  import torch
5
- from torchvision import transforms
6
- import matplotlib.pyplot as plt
7
  import spaces
8
 
9
  # Load TrOCR model
10
  processor = TrOCRProcessor.from_pretrained("microsoft/trocr-large-handwritten")
11
  model = VisionEncoderDecoderModel.from_pretrained("microsoft/trocr-large-handwritten")
12
 
13
- def preprocess_image(image):
14
- # Convert image to RGB
15
- image = image.convert("RGB")
16
-
17
- # Resize and normalize the image to [0, 1]
18
- transform = transforms.Compose([
19
- transforms.Resize((384, 384)), # Resize to the expected input size
20
- transforms.ToTensor(), # Convert to tensor and scale to [0, 1]
21
- ])
22
- pixel_values = transform(image).unsqueeze(0) # Add batch dimension
23
- return pixel_values
24
-
25
- def visualize_image(pixel_values):
26
- # Convert tensor to numpy array and permute dimensions for visualization
27
- image = pixel_values.squeeze().permute(1, 2, 0).numpy()
28
- plt.imshow(image)
29
- plt.title("Preprocessed Image")
30
- plt.show()
31
-
32
  @spaces.GPU
33
  def recognize_text(image):
34
  try:
35
- # Preprocess the image
36
- pixel_values = preprocess_image(image)
37
- print("Image preprocessed. Pixel values shape:", pixel_values.shape)
38
 
39
- # Visualize preprocessed image
40
- visualize_image(pixel_values)
 
41
 
42
  # Generate text from the image
43
  with torch.no_grad():
 
2
  from transformers import TrOCRProcessor, VisionEncoderDecoderModel
3
  from PIL import Image
4
  import torch
 
 
5
  import spaces
6
 
7
  # Load TrOCR model
8
  processor = TrOCRProcessor.from_pretrained("microsoft/trocr-large-handwritten")
9
  model = VisionEncoderDecoderModel.from_pretrained("microsoft/trocr-large-handwritten")
10
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
11
  @spaces.GPU
12
  def recognize_text(image):
13
  try:
14
+ # Convert image to RGB
15
+ image = image.convert("RGB")
16
+ print("Image converted to RGB.")
17
 
18
+ # Preprocess the image using the processor
19
+ pixel_values = processor(image, return_tensors="pt").pixel_values
20
+ print("Image preprocessed. Pixel values shape:", pixel_values.shape)
21
 
22
  # Generate text from the image
23
  with torch.no_grad():