Spaces:
Running
Running
Update app.py
Browse files
app.py
CHANGED
@@ -59,7 +59,7 @@ def process_image(input_image):
|
|
59 |
|
60 |
# OPTIMIZATION: Resize the image to reduce processing time
|
61 |
# Calculate aspect ratio to maintain proportions
|
62 |
-
max_size = (
|
63 |
cropped_image.thumbnail(max_size, Image.LANCZOS)
|
64 |
|
65 |
# Select the corresponding OCR prompt based on the YOLO label
|
@@ -98,15 +98,30 @@ def process_image(input_image):
|
|
98 |
)
|
99 |
inputs = {k: v.to(device) for k, v in inputs.items()}
|
100 |
|
101 |
-
#
|
|
|
|
|
|
|
102 |
output = ocr_model.generate(
|
103 |
**inputs,
|
104 |
-
|
105 |
-
|
106 |
-
|
107 |
-
do_sample=False
|
108 |
-
early_stopping=True # Add early stopping to prevent unnecessary generation
|
109 |
)
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
110 |
prompt_length = inputs["input_ids"].shape[1]
|
111 |
new_tokens = output[:, prompt_length:]
|
112 |
text_output = ocr_processor.tokenizer.batch_decode(new_tokens, skip_special_tokens=True)
|
@@ -129,7 +144,8 @@ iface = gr.Interface(
|
|
129 |
"to detect and crop the document (front/back) with a confidence threshold of 0.85, and "
|
130 |
"then extracts text using an OCR model with a corresponding prompt."
|
131 |
),
|
|
|
132 |
)
|
133 |
|
134 |
-
# Enable queue
|
135 |
iface.launch(share=True)
|
|
|
59 |
|
60 |
# OPTIMIZATION: Resize the image to reduce processing time
|
61 |
# Calculate aspect ratio to maintain proportions
|
62 |
+
max_size = (640, 640) # Further reduced from 800x800
|
63 |
cropped_image.thumbnail(max_size, Image.LANCZOS)
|
64 |
|
65 |
# Select the corresponding OCR prompt based on the YOLO label
|
|
|
98 |
)
|
99 |
inputs = {k: v.to(device) for k, v in inputs.items()}
|
100 |
|
101 |
+
# FIXED: Generation parameters with proper combinations to avoid warnings
|
102 |
+
# Choose one of these two approaches:
|
103 |
+
|
104 |
+
# Approach 1: Greedy decoding (fastest)
|
105 |
output = ocr_model.generate(
|
106 |
**inputs,
|
107 |
+
max_new_tokens=40,
|
108 |
+
temperature=0.2,
|
109 |
+
num_beams=1,
|
110 |
+
do_sample=False # Greedy decoding
|
|
|
111 |
)
|
112 |
+
|
113 |
+
# Uncomment this block and comment the above if you want sampling instead
|
114 |
+
# # Approach 2: Sampling (more natural but slower)
|
115 |
+
# output = ocr_model.generate(
|
116 |
+
# **inputs,
|
117 |
+
# max_new_tokens=40,
|
118 |
+
# do_sample=True,
|
119 |
+
# temperature=0.2,
|
120 |
+
# top_p=0.95,
|
121 |
+
# top_k=50,
|
122 |
+
# num_return_sequences=1
|
123 |
+
# )
|
124 |
+
|
125 |
prompt_length = inputs["input_ids"].shape[1]
|
126 |
new_tokens = output[:, prompt_length:]
|
127 |
text_output = ocr_processor.tokenizer.batch_decode(new_tokens, skip_special_tokens=True)
|
|
|
144 |
"to detect and crop the document (front/back) with a confidence threshold of 0.85, and "
|
145 |
"then extracts text using an OCR model with a corresponding prompt."
|
146 |
),
|
147 |
+
allow_flagging="never" # Disable flagging to simplify UI
|
148 |
)
|
149 |
|
150 |
+
# Enable queue and sharing for Hugging Face Space
|
151 |
iface.launch(share=True)
|