Spaces:

ZennyKenny
/

note-to-text

Running on Zero

ZennyKenny commited on 7 days ago

Commit

6e9b692

verified ·

1 Parent(s): 9bd6659

Update app.py

Files changed (1) hide show

app.py CHANGED Viewed

@@ -2,42 +2,22 @@ import gradio as gr
 from transformers import TrOCRProcessor, VisionEncoderDecoderModel
 from PIL import Image
 import torch
-from torchvision import transforms
-import matplotlib.pyplot as plt
 import spaces
 # Load TrOCR model
 processor = TrOCRProcessor.from_pretrained("microsoft/trocr-large-handwritten")
 model = VisionEncoderDecoderModel.from_pretrained("microsoft/trocr-large-handwritten")
-def preprocess_image(image):
-    # Convert image to RGB
-    image = image.convert("RGB")
-    # Resize and normalize the image to [0, 1]
-    transform = transforms.Compose([
-        transforms.Resize((384, 384)),  # Resize to the expected input size
-        transforms.ToTensor(),          # Convert to tensor and scale to [0, 1]
-    ])
-    pixel_values = transform(image).unsqueeze(0)  # Add batch dimension
-    return pixel_values
-def visualize_image(pixel_values):
-    # Convert tensor to numpy array and permute dimensions for visualization
-    image = pixel_values.squeeze().permute(1, 2, 0).numpy()
-    plt.imshow(image)
-    plt.title("Preprocessed Image")
-    plt.show()
 @spaces.GPU
 def recognize_text(image):
     try:
-        # Preprocess the image
-        pixel_values = preprocess_image(image)
-        print("Image preprocessed. Pixel values shape:", pixel_values.shape)
-        # Visualize preprocessed image
-        visualize_image(pixel_values)
         # Generate text from the image
         with torch.no_grad():

 from transformers import TrOCRProcessor, VisionEncoderDecoderModel
 from PIL import Image
 import torch
 import spaces
 # Load TrOCR model
 processor = TrOCRProcessor.from_pretrained("microsoft/trocr-large-handwritten")
 model = VisionEncoderDecoderModel.from_pretrained("microsoft/trocr-large-handwritten")
 @spaces.GPU
 def recognize_text(image):
     try:
+        # Convert image to RGB
+        image = image.convert("RGB")
+        print("Image converted to RGB.")
+        # Preprocess the image using the processor
+        pixel_values = processor(image, return_tensors="pt").pixel_values
+        print("Image preprocessed. Pixel values shape:", pixel_values.shape)
         # Generate text from the image
         with torch.no_grad():