Spaces:

gtata
/

low-budget-document-cleaning

Runtime error

App Files Files Community

gtata commited on May 12, 2023

Commit

2ecfea1

1 Parent(s): 43e62b9

FEAT: Tesseract OCR text recog added

Browse files

- UI displays text crops and corresponding text for different preprocessed images

Files changed (7) hide show

app.py +43 -32
ocr_libs.py +28 -0
packages.txt +6 -0
receipt_images/0.png +0 -0
receipt_images/2.png +0 -0
receipt_images/4.png +0 -0
receipt_images/5.png +0 -0

app.py CHANGED Viewed

@@ -7,6 +7,7 @@ from streamlit_image_select import image_select
 import torchvision.transforms as transforms
 import os
 import torch
 def process_image(image):
     target_size = (400, 512)
@@ -31,23 +32,29 @@ def process_image(image):
     # image = torch.tensor(image)
     return image
 def load_models():
     model_paths = ["models/prep_50.pt", "models/prep_4.pt", "models/prep_4.pt"]
     models = [torch.load(mpath, map_location='cpu').eval() for mpath in model_paths]
     return models
 def clean_image(image, model):
     img_out = model(image.unsqueeze(0))
-    img_out = img_out.reshape(400, 512).detach().numpy()
     return img_out
 image_folder = "receipt_images"
 NUM_IMAGES = 3
 image_paths = [f"{image_folder}/{i}.png" for i in range(NUM_IMAGES)]
 img = None
 img_index = image_select(
@@ -64,42 +71,46 @@ with st.form("my-form", clear_on_submit=True):
         if submitted and image_file is not None:
             img = Image.open(image_file).convert("L")
-print(img_index)
-if img is None:
     img = Image.open(image_paths[img_index]).convert("L")
 cols = st.columns(4)
-cols[0].text("Input Image")
 cols[1].text("Full Training")
 cols[2].text("8%")
 cols[3].text("4%")
 models = load_models()
 if img is not None:
-    img_tensor = process_image(img)
-    clned_imgs = [clean_image(img_tensor, m) for m in models]
-    cols[0].image(img_tensor.permute(1,2, 0).numpy())
-    cols[1].image(clned_imgs[0])
-    cols[2].image(clned_imgs[1])
-    cols[3].image(clned_imgs[2])
-# cols = st.columns(NUM_IMAGES)
-# for i in range(NUM_IMAGES):
-#     image = Image.open(os.path.join(image_folder, f"{i}.png"))
-#     cols[i].image(image)

 import torchvision.transforms as transforms
 import os
 import torch
+from ocr_libs import tess_ocr
 def process_image(image):
     target_size = (400, 512)
     # image = torch.tensor(image)
     return image
+@st.cache_resource
 def load_models():
     model_paths = ["models/prep_50.pt", "models/prep_4.pt", "models/prep_4.pt"]
     models = [torch.load(mpath, map_location='cpu').eval() for mpath in model_paths]
     return models
+@st.cache_resource
+def load_ocr():
+    return tess_ocr()
+def get_text_boxes(_ocr, image):
+    return _ocr.detect_text(image)
 def clean_image(image, model):
     img_out = model(image.unsqueeze(0))
+    img_out = transforms.ToPILImage()(img_out.reshape(400, 512).detach())
     return img_out
+ocr = load_ocr()
 image_folder = "receipt_images"
 NUM_IMAGES = 3
 image_paths = [f"{image_folder}/{i}.png" for i in range(NUM_IMAGES)]
 img = None
 img_index = image_select(
         if submitted and image_file is not None:
             img = Image.open(image_file).convert("L")
+# If no image was uploaded, use selected image
+if img is None and img_index >= 0:
     img = Image.open(image_paths[img_index]).convert("L")
 cols = st.columns(4)
+# Set Text
+cols[0].text("Input Image")
 cols[1].text("Full Training")
 cols[2].text("8%")
 cols[3].text("4%")
 models = load_models()
 if img is not None:
+    with st.spinner('Document Cleaning in progress ...'):
+        img_tensor = process_image(img)
+        pil_image = transforms.ToPILImage()(img_tensor)
+        clned_imgs = [clean_image(torch.clone(img_tensor), m) for m in models]
+        cols[0].image(pil_image)
+        for i in range(3):
+            cols[i + 1].image(clned_imgs[i])
+        text_boxes = get_text_boxes(ocr, pil_image)
+        all_texts = list()
+        all_texts.append(ocr.extract_text(pil_image, text_boxes))
+        for i in range(3):
+            all_texts.append(ocr.extract_text(clned_imgs[i], text_boxes))
+        # text_boxes_more = get_text_boxes(ocr, clned_imgs[3])
+        print(all_texts)
+        for i, box in enumerate(text_boxes):
+            txt_box_cols = st.columns(5)
+            txt_box_cols[0].image(box[0], use_column_width="always")
+            for j in range(4):
+                txt_box_cols[j + 1].text(all_texts[j][i])

ocr_libs.py ADDED Viewed

	@@ -0,0 +1,28 @@

+import tesserocr
+class tess_ocr:
+    def __init__(self):
+        self.api = tesserocr.PyTessBaseAPI(lang='eng')
+        self.api_line = tesserocr.PyTessBaseAPI(lang='eng', psm=tesserocr.PSM.SINGLE_LINE, oem=tesserocr.OEM.LSTM_ONLY)
+    def detect_text(self, image):
+        self.api.SetImage(image)
+        boxes = self.api.GetComponentImages(tesserocr.RIL.WORD, True)
+        return boxes
+    def extract_text(self, image, boxes):
+        OFFSET = 6
+        texts = list()
+        for i, (im, box, _, _) in enumerate(boxes):
+            cropped = image.crop((box["x"] - OFFSET, box["y"] - OFFSET , box["x"] + box["w"] + OFFSET, box["y"] + box["h"] + OFFSET))
+            self.api_line.SetImage(cropped)
+            ocrResult = self.api_line.GetUTF8Text().strip()
+            conf = self.api_line.MeanTextConf()
+            texts.append(ocrResult)
+        return texts

packages.txt ADDED Viewed

	@@ -0,0 +1,6 @@

+libgl1
+cmake
+libssl-dev
+libtesseract-dev
+pkg-config
+tesseract-ocr

receipt_images/0.png CHANGED Viewed

receipt_images/2.png CHANGED Viewed

receipt_images/4.png ADDED Viewed

receipt_images/5.png ADDED Viewed