ammariii08 commited on
Commit
2c26fe7
·
verified ·
1 Parent(s): 6b7919b

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +24 -8
app.py CHANGED
@@ -59,7 +59,7 @@ def process_image(input_image):
59
 
60
  # OPTIMIZATION: Resize the image to reduce processing time
61
  # Calculate aspect ratio to maintain proportions
62
- max_size = (800, 800) # Reduced from original size
63
  cropped_image.thumbnail(max_size, Image.LANCZOS)
64
 
65
  # Select the corresponding OCR prompt based on the YOLO label
@@ -98,15 +98,30 @@ def process_image(input_image):
98
  )
99
  inputs = {k: v.to(device) for k, v in inputs.items()}
100
 
101
- # OPTIMIZATION: Modified generation parameters for faster processing
 
 
 
102
  output = ocr_model.generate(
103
  **inputs,
104
- temperature=0.2, # Reduced from 0.8 to 0.2 for faster, more deterministic output
105
- max_new_tokens=40, # Slightly reduced from 50
106
- num_return_sequences=1,
107
- do_sample=False, # Changed to deterministic for speed
108
- early_stopping=True # Add early stopping to prevent unnecessary generation
109
  )
 
 
 
 
 
 
 
 
 
 
 
 
 
110
  prompt_length = inputs["input_ids"].shape[1]
111
  new_tokens = output[:, prompt_length:]
112
  text_output = ocr_processor.tokenizer.batch_decode(new_tokens, skip_special_tokens=True)
@@ -129,7 +144,8 @@ iface = gr.Interface(
129
  "to detect and crop the document (front/back) with a confidence threshold of 0.85, and "
130
  "then extracts text using an OCR model with a corresponding prompt."
131
  ),
 
132
  )
133
 
134
- # Enable queue for better handling of processing time
135
  iface.launch(share=True)
 
59
 
60
  # OPTIMIZATION: Resize the image to reduce processing time
61
  # Calculate aspect ratio to maintain proportions
62
+ max_size = (640, 640) # Further reduced from 800x800
63
  cropped_image.thumbnail(max_size, Image.LANCZOS)
64
 
65
  # Select the corresponding OCR prompt based on the YOLO label
 
98
  )
99
  inputs = {k: v.to(device) for k, v in inputs.items()}
100
 
101
+ # FIXED: Generation parameters with proper combinations to avoid warnings
102
+ # Choose one of these two approaches:
103
+
104
+ # Approach 1: Greedy decoding (fastest)
105
  output = ocr_model.generate(
106
  **inputs,
107
+ max_new_tokens=40,
108
+ temperature=0.2,
109
+ num_beams=1,
110
+ do_sample=False # Greedy decoding
 
111
  )
112
+
113
+ # Uncomment this block and comment the above if you want sampling instead
114
+ # # Approach 2: Sampling (more natural but slower)
115
+ # output = ocr_model.generate(
116
+ # **inputs,
117
+ # max_new_tokens=40,
118
+ # do_sample=True,
119
+ # temperature=0.2,
120
+ # top_p=0.95,
121
+ # top_k=50,
122
+ # num_return_sequences=1
123
+ # )
124
+
125
  prompt_length = inputs["input_ids"].shape[1]
126
  new_tokens = output[:, prompt_length:]
127
  text_output = ocr_processor.tokenizer.batch_decode(new_tokens, skip_special_tokens=True)
 
144
  "to detect and crop the document (front/back) with a confidence threshold of 0.85, and "
145
  "then extracts text using an OCR model with a corresponding prompt."
146
  ),
147
+ allow_flagging="never" # Disable flagging to simplify UI
148
  )
149
 
150
+ # Enable queue and sharing for Hugging Face Space
151
  iface.launch(share=True)