Spaces:

gtata
/

low-budget-document-cleaning

Runtime error

App Files Files Community

gtata commited on May 18, 2023

Commit

0755539

1 Parent(s): d9a169f

FEAT: Basic VGG inference added

Browse files

Files changed (9) hide show

app.py +113 -67
models/vgg_4.pt +3 -0
models/vgg_50.pt +3 -0
models/vgg_8.pt +3 -0
text_images/0.png +0 -0
text_images/1.png +0 -0
text_images/2.png +0 -0
text_images/3.png +0 -0
text_images/4.png +0 -0

app.py CHANGED Viewed

@@ -9,7 +9,26 @@ import os
 import torch
 from ocr_libs import tess_ocr
-def process_image(image):
     target_size = (400, 512)
     # image = Image.open(img_name).convert("L")
     w, h = image.size
@@ -32,9 +51,19 @@ def process_image(image):
     # image = torch.tensor(image)
     return image
 @st.cache_resource
-def load_models():
-    model_paths = ["models/prep_50.pt", "models/prep_4.pt", "models/prep_4.pt"]
     models = [torch.load(mpath, map_location='cpu').eval() for mpath in model_paths]
     return models
@@ -44,74 +73,91 @@ def load_ocr():
 def get_text_boxes(_ocr, image):
     return _ocr.detect_text(image)
-def clean_image(image, model):
     img_out = model(image.unsqueeze(0))
-    img_out = transforms.ToPILImage()(img_out.reshape(400, 512).detach())
     return img_out
 ocr = load_ocr()
-image_folder = "receipt_images"
-NUM_IMAGES = 3
-image_paths = [f"{image_folder}/{i}.png" for i in range(NUM_IMAGES)]
-img = None
-img_index = image_select(
-    label="Select Image",
-    images=image_paths,
-    use_container_width=False,
-    index=-1,
-    return_value="index"
-)
-img = None
-with st.form("my-form", clear_on_submit=True):
-        image_file = st.file_uploader("Upload Image",type=['png','jpeg','jpg'])
-        submitted = st.form_submit_button("UPLOAD!")
-        if submitted and image_file is not None:
-            img = Image.open(image_file).convert("L")
-# If no image was uploaded, use selected image
-if img is None and img_index >= 0:
-    img = Image.open(image_paths[img_index]).convert("L")
-cols = st.columns(4)
-# Set Text
-cols[0].text("Input Image")
-cols[1].text("Full Training")
-cols[2].text("8%")
-cols[3].text("4%")
-models = load_models()
-if img is not None:
-    with st.spinner('Document Cleaning in progress ...'):
-        img_tensor = process_image(img)
-        pil_image = transforms.ToPILImage()(img_tensor)
-        clned_imgs = [clean_image(torch.clone(img_tensor), m) for m in models]
-        cols[0].image(pil_image)
-        for i in range(3):
-            cols[i + 1].image(clned_imgs[i])
-    with st.spinner('Text Detection and Recognition in progress ...'):
-        text_boxes = get_text_boxes(ocr, pil_image)
-        all_texts = list()
-        all_texts.append(ocr.extract_text(pil_image, text_boxes))
-        for i in range(3):
-            all_texts.append(ocr.extract_text(clned_imgs[i], text_boxes))
-        # text_boxes_more = get_text_boxes(ocr, clned_imgs[3])
-        title_cols = st.columns(5)
-        headings = ["Word Image", "Original", "Cleaned (100%)", "Cleaned (8%)", "Cleaned (4%)"]
-        for i, heading in  enumerate(headings):
-            title_cols[i].markdown(f"## {heading}")
-        for i, box in enumerate(text_boxes):
-            txt_box_cols = st.columns(5)
-            txt_box_cols[0].image(box[0], use_column_width="always")
-            for j in range(4):
-                txt_box_cols[j + 1].text(all_texts[j][i])

 import torch
 from ocr_libs import tess_ocr
+class PadWhite(object):
+    def __init__(self, size):
+        assert isinstance(size, (int, tuple))
+        if isinstance(size, tuple):
+            self.height, self.width = size
+        elif isinstance(size, int):
+            self.height = self.width = size
+    def __call__(self, img):
+        if img.size[0] > self.width or img.size[1] > self.height:
+            img.thumbnail((self.width, self.height))
+        delta_width = self.width - img.size[0]
+        delta_height = self.height - img.size[1]
+        pad_width = delta_width // 2
+        pad_height = delta_height // 2
+        padding = (pad_width, pad_height, delta_width -
+                   pad_width, delta_height-pad_height)
+        return ImageOps.expand(img, padding, fill=255)
+def process_image_pos(image):
     target_size = (400, 512)
     # image = Image.open(img_name).convert("L")
     w, h = image.size
     # image = torch.tensor(image)
     return image
+def process_image_vgg(image):
+    input_size = (32, 128)
+    transform = transforms.Compose([
+        PadWhite(input_size),
+        transforms.ToTensor(),
+        ])
+    image = transform(image)
+    return image
 @st.cache_resource
+def load_models(model_paths):
     models = [torch.load(mpath, map_location='cpu').eval() for mpath in model_paths]
     return models
 def get_text_boxes(_ocr, image):
     return _ocr.detect_text(image)
+def clean_image(image, model, shape=(400, 512)):
     img_out = model(image.unsqueeze(0))
+    img_out = transforms.ToPILImage()(img_out.reshape(*shape).detach())
     return img_out
 ocr = load_ocr()
+dataset = st.radio(
+    "Choose image type ",
+    ('POS', 'VGG'))
+if dataset == "POS":
+    model_paths = ["models/prep_50.pt", "models/prep_8.pt", "models/prep_4.pt"]
+    process_image = process_image_pos
+    image_folder = "receipt_images"
+    shape = (400, 512)
+elif dataset == "VGG":
+    model_paths = ["models/vgg_50.pt", "models/vgg_8.pt", "models/vgg_4.pt"]
+    process_image = process_image_vgg
+    image_folder = "text_images"
+    shape = (32, 128)
+if dataset:
+    NUM_IMAGES = 3
+    image_paths = [f"{image_folder}/{i}.png" for i in range(NUM_IMAGES)]
+    img = None
+    img_index = image_select(
+        label="Select Image",
+        images=image_paths,
+        use_container_width=False,
+        index=-1,
+        return_value="index"
+    )
+    img = None
+    with st.form("my-form", clear_on_submit=True):
+            image_file = st.file_uploader("Upload Image",type=['png','jpeg','jpg'])
+            submitted = st.form_submit_button("UPLOAD!")
+            if submitted and image_file is not None:
+                img = Image.open(image_file).convert("L")
+    # If no image was uploaded, use selected image
+    if img is None and img_index >= 0:
+        img = Image.open(image_paths[img_index]).convert("L")
+    cols = st.columns(4)
+    # Set Text
+    cols[0].text("Input Image")
+    cols[1].text("Full Training")
+    cols[2].text("8%")
+    cols[3].text("4%")
+    models = load_models(model_paths)
+    if img is not None:
+        with st.spinner('Document Cleaning in progress ...'):
+            img_tensor = process_image(img)
+            pil_image = transforms.ToPILImage()(img_tensor)
+            clned_imgs = [clean_image(torch.clone(img_tensor), m, shape) for m in models]
+            cols[0].image(pil_image)
+            for i in range(3):
+                cols[i + 1].image(clned_imgs[i])
+        with st.spinner('Text Detection and Recognition in progress ...'):
+            text_boxes = get_text_boxes(ocr, pil_image)
+            all_texts = list()
+            all_texts.append(ocr.extract_text(pil_image, text_boxes))
+            for i in range(3):
+                all_texts.append(ocr.extract_text(clned_imgs[i], text_boxes))
+            # text_boxes_more = get_text_boxes(ocr, clned_imgs[3])
+            title_cols = st.columns(5)
+            headings = ["Word Image", "Original", "Cleaned (100%)", "Cleaned (8%)", "Cleaned (4%)"]
+            for i, heading in  enumerate(headings):
+                title_cols[i].markdown(f"## {heading}")
+            for i, box in enumerate(text_boxes):
+                txt_box_cols = st.columns(5)
+                txt_box_cols[0].image(box[0], use_column_width="always")
+                for j in range(4):
+                    txt_box_cols[j + 1].text(all_texts[j][i])