Spaces:
Runtime error
Runtime error
FEAT: Some improvements to text detection
Browse files- Dockerfile +0 -32
- app.py +9 -1
- ocr_libs.py +8 -5
- requirements.txt +42 -2
Dockerfile
DELETED
@@ -1,32 +0,0 @@
|
|
1 |
-
FROM python:3.10-slim
|
2 |
-
WORKDIR /code
|
3 |
-
|
4 |
-
# Install gcc and cc1plus
|
5 |
-
RUN apt-get update && apt-get install -y gcc g++ make
|
6 |
-
|
7 |
-
# Install tesseract
|
8 |
-
RUN apt-get update && apt-get install -y tesseract-ocr libtesseract-dev libleptonica-dev pkg-config
|
9 |
-
|
10 |
-
# Install python dependencies
|
11 |
-
COPY requirements.txt .
|
12 |
-
RUN pip install -r requirements.txt
|
13 |
-
|
14 |
-
# Set up a new user named "user" with user ID 1000
|
15 |
-
RUN useradd -m -u 1000 user
|
16 |
-
# Switch to the "user" user
|
17 |
-
USER user
|
18 |
-
# Set home to the user's home directory
|
19 |
-
ENV HOME=/home/user \
|
20 |
-
PATH=/home/user/.local/bin:$PATH
|
21 |
-
|
22 |
-
# Set the working directory to the user's home directory
|
23 |
-
WORKDIR $HOME/app
|
24 |
-
|
25 |
-
# Copy the current directory contents into the container at $HOME/app setting the owner to the user
|
26 |
-
COPY --chown=user . $HOME/app
|
27 |
-
|
28 |
-
RUN mkdir -p flagged
|
29 |
-
RUN chmod 777 flagged
|
30 |
-
|
31 |
-
# Run the app
|
32 |
-
CMD ["python", "app.py"]
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
app.py
CHANGED
@@ -92,13 +92,21 @@ if img is not None:
|
|
92 |
cols[0].image(pil_image)
|
93 |
for i in range(3):
|
94 |
cols[i + 1].image(clned_imgs[i])
|
95 |
-
|
|
|
|
|
96 |
text_boxes = get_text_boxes(ocr, pil_image)
|
97 |
all_texts = list()
|
98 |
all_texts.append(ocr.extract_text(pil_image, text_boxes))
|
99 |
for i in range(3):
|
100 |
all_texts.append(ocr.extract_text(clned_imgs[i], text_boxes))
|
101 |
# text_boxes_more = get_text_boxes(ocr, clned_imgs[3])
|
|
|
|
|
|
|
|
|
|
|
|
|
102 |
for i, box in enumerate(text_boxes):
|
103 |
txt_box_cols = st.columns(5)
|
104 |
txt_box_cols[0].image(box[0], use_column_width="always")
|
|
|
92 |
cols[0].image(pil_image)
|
93 |
for i in range(3):
|
94 |
cols[i + 1].image(clned_imgs[i])
|
95 |
+
|
96 |
+
|
97 |
+
with st.spinner('Text Detection and Recognition in progress ...'):
|
98 |
text_boxes = get_text_boxes(ocr, pil_image)
|
99 |
all_texts = list()
|
100 |
all_texts.append(ocr.extract_text(pil_image, text_boxes))
|
101 |
for i in range(3):
|
102 |
all_texts.append(ocr.extract_text(clned_imgs[i], text_boxes))
|
103 |
# text_boxes_more = get_text_boxes(ocr, clned_imgs[3])
|
104 |
+
title_cols = st.columns(5)
|
105 |
+
headings = ["Word Image", "Original", "Cleaned (100%)", "Cleaned (8%)", "Cleaned (4%)"]
|
106 |
+
for i, heading in enumerate(headings):
|
107 |
+
title_cols[i].markdown(f"## {heading}")
|
108 |
+
|
109 |
+
|
110 |
for i, box in enumerate(text_boxes):
|
111 |
txt_box_cols = st.columns(5)
|
112 |
txt_box_cols[0].image(box[0], use_column_width="always")
|
ocr_libs.py
CHANGED
@@ -1,20 +1,18 @@
|
|
1 |
# import tesserocr
|
2 |
import pytesseract
|
3 |
from pprint import pprint
|
4 |
-
|
5 |
-
|
6 |
|
7 |
class tess_ocr:
|
8 |
|
9 |
def __init__(self):
|
10 |
pass
|
11 |
-
|
12 |
def detect_text(self, image):
|
13 |
-
boxes = pytesseract.image_to_data(image, output_type='data.frame')
|
14 |
boxes = boxes.dropna().to_dict(orient='list')
|
15 |
text_labels = boxes['text']
|
16 |
text_boxes = list()
|
17 |
-
pprint(boxes)
|
18 |
for i in range(len(boxes)):
|
19 |
x1, y1 = boxes["left"][i], boxes["top"][i]
|
20 |
x2, y2 = x1 + boxes["width"][i], y1 + boxes["height"][i]
|
@@ -24,6 +22,11 @@ class tess_ocr:
|
|
24 |
crops.append(image.crop((box['x1'], box['y1'], box['x2'], box['y2'],)))
|
25 |
return list(zip(crops, text_boxes))
|
26 |
|
|
|
|
|
|
|
|
|
|
|
27 |
def extract_text(self, image, boxes):
|
28 |
OFFSET = 6
|
29 |
texts = list()
|
|
|
1 |
# import tesserocr
|
2 |
import pytesseract
|
3 |
from pprint import pprint
|
4 |
+
import numpy as np
|
|
|
5 |
|
6 |
class tess_ocr:
|
7 |
|
8 |
def __init__(self):
|
9 |
pass
|
10 |
+
|
11 |
def detect_text(self, image):
|
12 |
+
boxes = pytesseract.image_to_data(image, config='--oem 1 --psm 3', output_type='data.frame')
|
13 |
boxes = boxes.dropna().to_dict(orient='list')
|
14 |
text_labels = boxes['text']
|
15 |
text_boxes = list()
|
|
|
16 |
for i in range(len(boxes)):
|
17 |
x1, y1 = boxes["left"][i], boxes["top"][i]
|
18 |
x2, y2 = x1 + boxes["width"][i], y1 + boxes["height"][i]
|
|
|
22 |
crops.append(image.crop((box['x1'], box['y1'], box['x2'], box['y2'],)))
|
23 |
return list(zip(crops, text_boxes))
|
24 |
|
25 |
+
# def detect_text(self, image):
|
26 |
+
# boxes = self.reader.readtext(np.asarray(image))
|
27 |
+
# print(boxes)
|
28 |
+
# return []
|
29 |
+
|
30 |
def extract_text(self, image, boxes):
|
31 |
OFFSET = 6
|
32 |
texts = list()
|
requirements.txt
CHANGED
@@ -1,54 +1,94 @@
|
|
|
|
1 |
altair==4.0.0
|
2 |
attrs==23.1.0
|
|
|
3 |
blinker==1.6.2
|
4 |
cachetools==5.3.0
|
5 |
certifi==2023.5.7
|
6 |
charset-normalizer==3.1.0
|
7 |
click==8.1.3
|
|
|
|
|
|
|
8 |
decorator==5.1.1
|
9 |
entrypoints==0.4
|
10 |
filelock==3.12.0
|
|
|
|
|
11 |
gitdb==4.0.10
|
12 |
GitPython==3.1.31
|
13 |
idna==3.4
|
|
|
|
|
14 |
importlib-metadata==6.6.0
|
|
|
15 |
Jinja2==3.1.2
|
16 |
jsonschema==4.17.3
|
|
|
|
|
|
|
|
|
17 |
markdown-it-py==2.2.0
|
18 |
MarkupSafe==2.1.2
|
|
|
19 |
mdurl==0.1.2
|
|
|
20 |
mpmath==1.3.0
|
21 |
networkx==3.1
|
|
|
22 |
numpy==1.24.3
|
|
|
|
|
|
|
|
|
23 |
packaging==23.1
|
24 |
pandas==2.0.1
|
25 |
Pillow==9.5.0
|
26 |
protobuf==3.20.3
|
27 |
pyarrow==12.0.0
|
|
|
|
|
28 |
pydeck==0.8.1b0
|
29 |
Pygments==2.15.1
|
30 |
Pympler==1.0.1
|
|
|
31 |
pyrsistent==0.19.3
|
|
|
32 |
pytesseract==0.3.10
|
|
|
33 |
python-dateutil==2.8.2
|
34 |
pytz==2023.3
|
35 |
pytz-deprecation-shim==0.1.0.post0
|
|
|
|
|
|
|
36 |
requests==2.30.0
|
37 |
rich==13.3.5
|
|
|
|
|
|
|
38 |
six==1.16.0
|
39 |
smmap==5.0.0
|
|
|
40 |
streamlit==1.22.0
|
41 |
streamlit-image-select==0.6.0
|
42 |
sympy==1.12
|
|
|
43 |
tenacity==8.2.2
|
|
|
|
|
|
|
44 |
toml==0.10.2
|
|
|
45 |
toolz==0.12.0
|
46 |
-
torch==
|
47 |
-
torchvision==0.
|
48 |
tornado==6.3.1
|
|
|
49 |
typing_extensions==4.5.0
|
50 |
tzdata==2023.3
|
51 |
tzlocal==4.3
|
52 |
urllib3==2.0.2
|
53 |
validators==0.20.0
|
|
|
54 |
zipp==3.15.0
|
|
|
1 |
+
addict==2.4.0
|
2 |
altair==4.0.0
|
3 |
attrs==23.1.0
|
4 |
+
beautifulsoup4==4.12.2
|
5 |
blinker==1.6.2
|
6 |
cachetools==5.3.0
|
7 |
certifi==2023.5.7
|
8 |
charset-normalizer==3.1.0
|
9 |
click==8.1.3
|
10 |
+
colorama==0.4.6
|
11 |
+
contourpy==1.0.7
|
12 |
+
cycler==0.11.0
|
13 |
decorator==5.1.1
|
14 |
entrypoints==0.4
|
15 |
filelock==3.12.0
|
16 |
+
fonttools==4.39.4
|
17 |
+
gdown==4.7.1
|
18 |
gitdb==4.0.10
|
19 |
GitPython==3.1.31
|
20 |
idna==3.4
|
21 |
+
imageio==2.28.1
|
22 |
+
imgaug==0.4.0
|
23 |
importlib-metadata==6.6.0
|
24 |
+
importlib-resources==5.12.0
|
25 |
Jinja2==3.1.2
|
26 |
jsonschema==4.17.3
|
27 |
+
kiwisolver==1.4.4
|
28 |
+
lazy_loader==0.2
|
29 |
+
lmdb==1.4.1
|
30 |
+
Markdown==3.4.3
|
31 |
markdown-it-py==2.2.0
|
32 |
MarkupSafe==2.1.2
|
33 |
+
matplotlib==3.7.1
|
34 |
mdurl==0.1.2
|
35 |
+
model-index==0.1.11
|
36 |
mpmath==1.3.0
|
37 |
networkx==3.1
|
38 |
+
ninja==1.11.1
|
39 |
numpy==1.24.3
|
40 |
+
opencv-python==4.5.4.60
|
41 |
+
opencv-python-headless==4.5.4.60
|
42 |
+
openmim==0.3.7
|
43 |
+
ordered-set==4.1.0
|
44 |
packaging==23.1
|
45 |
pandas==2.0.1
|
46 |
Pillow==9.5.0
|
47 |
protobuf==3.20.3
|
48 |
pyarrow==12.0.0
|
49 |
+
pyclipper==1.3.0.post4
|
50 |
+
pycocotools==2.0.6
|
51 |
pydeck==0.8.1b0
|
52 |
Pygments==2.15.1
|
53 |
Pympler==1.0.1
|
54 |
+
pyparsing==3.0.9
|
55 |
pyrsistent==0.19.3
|
56 |
+
PySocks==1.7.1
|
57 |
pytesseract==0.3.10
|
58 |
+
python-bidi==0.4.2
|
59 |
python-dateutil==2.8.2
|
60 |
pytz==2023.3
|
61 |
pytz-deprecation-shim==0.1.0.post0
|
62 |
+
PyWavelets==1.4.1
|
63 |
+
PyYAML==6.0
|
64 |
+
rapidfuzz==3.0.0
|
65 |
requests==2.30.0
|
66 |
rich==13.3.5
|
67 |
+
scikit-image==0.20.0
|
68 |
+
scipy==1.9.1
|
69 |
+
shapely==2.0.1
|
70 |
six==1.16.0
|
71 |
smmap==5.0.0
|
72 |
+
soupsieve==2.4.1
|
73 |
streamlit==1.22.0
|
74 |
streamlit-image-select==0.6.0
|
75 |
sympy==1.12
|
76 |
+
tabulate==0.9.0
|
77 |
tenacity==8.2.2
|
78 |
+
termcolor==2.3.0
|
79 |
+
terminaltables==3.1.10
|
80 |
+
tifffile==2023.4.12
|
81 |
toml==0.10.2
|
82 |
+
tomli==2.0.1
|
83 |
toolz==0.12.0
|
84 |
+
torch==1.9.0
|
85 |
+
torchvision==0.10.0
|
86 |
tornado==6.3.1
|
87 |
+
tqdm==4.65.0
|
88 |
typing_extensions==4.5.0
|
89 |
tzdata==2023.3
|
90 |
tzlocal==4.3
|
91 |
urllib3==2.0.2
|
92 |
validators==0.20.0
|
93 |
+
yapf==0.33.0
|
94 |
zipp==3.15.0
|