Spaces:

ammariii08
/

dxf_test

Running

App Files Files Community

ammariii08 commited on 10 days ago

Commit

2c26fe7

verified ·

1 Parent(s): 6b7919b

Update app.py

Browse files

Files changed (1) hide show

app.py +24 -8

app.py CHANGED Viewed

@@ -59,7 +59,7 @@ def process_image(input_image):
     # OPTIMIZATION: Resize the image to reduce processing time
     # Calculate aspect ratio to maintain proportions
-    max_size = (800, 800)  # Reduced from original size
     cropped_image.thumbnail(max_size, Image.LANCZOS)
     # Select the corresponding OCR prompt based on the YOLO label
@@ -98,15 +98,30 @@ def process_image(input_image):
     )
     inputs = {k: v.to(device) for k, v in inputs.items()}
-    # OPTIMIZATION: Modified generation parameters for faster processing
     output = ocr_model.generate(
         **inputs,
-        temperature=0.2,  # Reduced from 0.8 to 0.2 for faster, more deterministic output
-        max_new_tokens=40,  # Slightly reduced from 50
-        num_return_sequences=1,
-        do_sample=False,  # Changed to deterministic for speed
-        early_stopping=True  # Add early stopping to prevent unnecessary generation
     )
     prompt_length = inputs["input_ids"].shape[1]
     new_tokens = output[:, prompt_length:]
     text_output = ocr_processor.tokenizer.batch_decode(new_tokens, skip_special_tokens=True)
@@ -129,7 +144,8 @@ iface = gr.Interface(
         "to detect and crop the document (front/back) with a confidence threshold of 0.85, and "
         "then extracts text using an OCR model with a corresponding prompt."
     ),
 )
-# Enable queue for better handling of processing time
 iface.launch(share=True)

     # OPTIMIZATION: Resize the image to reduce processing time
     # Calculate aspect ratio to maintain proportions
+    max_size = (640, 640)  # Further reduced from 800x800
     cropped_image.thumbnail(max_size, Image.LANCZOS)
     # Select the corresponding OCR prompt based on the YOLO label
     )
     inputs = {k: v.to(device) for k, v in inputs.items()}
+    # FIXED: Generation parameters with proper combinations to avoid warnings
+    # Choose one of these two approaches:
+    # Approach 1: Greedy decoding (fastest)
     output = ocr_model.generate(
         **inputs,
+        max_new_tokens=40,
+        temperature=0.2,
+        num_beams=1,
+        do_sample=False  # Greedy decoding
     )
+    # Uncomment this block and comment the above if you want sampling instead
+    # # Approach 2: Sampling (more natural but slower)
+    # output = ocr_model.generate(
+    #     **inputs,
+    #     max_new_tokens=40,
+    #     do_sample=True,
+    #     temperature=0.2,
+    #     top_p=0.95,
+    #     top_k=50,
+    #     num_return_sequences=1
+    # )
     prompt_length = inputs["input_ids"].shape[1]
     new_tokens = output[:, prompt_length:]
     text_output = ocr_processor.tokenizer.batch_decode(new_tokens, skip_special_tokens=True)
         "to detect and crop the document (front/back) with a confidence threshold of 0.85, and "
         "then extracts text using an OCR model with a corresponding prompt."
     ),
+    allow_flagging="never"  # Disable flagging to simplify UI
 )
+# Enable queue and sharing for Hugging Face Space
 iface.launch(share=True)