Spaces:
Running
Running
commit
Browse files- app.py +65 -54
- assets/apple.jpg +3 -0
- assets/board.jpg +3 -0
- assets/car.jpg +3 -0
- assets/cartoon_cat.png +3 -0
- assets/cartoon_girl.jpeg +3 -0
- assets/cat.jpg +3 -0
- assets/lion.jpg +3 -0
- assets/room.jpg +3 -0
- assets/room2.jpg +3 -0
- assets/sheep.jpg +3 -0
- assets/woman.jpg +3 -0
- assets/woman2.jpg +3 -0
- assets/woman3.jpg +3 -0
app.py
CHANGED
@@ -10,6 +10,7 @@ from PIL import Image
|
|
10 |
|
11 |
import cv2
|
12 |
import numpy as np
|
|
|
13 |
|
14 |
class Examples(gr.helpers.Examples):
|
15 |
def __init__(self, *args, cached_folder=None, **kwargs):
|
@@ -41,7 +42,7 @@ def postprocess(output, prompt):
|
|
41 |
|
42 |
# user click the image to get points, and show the points on the image
|
43 |
def get_point(img, sel_pix, evt: gr.SelectData):
|
44 |
-
print(sel_pix)
|
45 |
if len(sel_pix) < 5:
|
46 |
sel_pix.append((evt.index, 1)) # default foreground_point
|
47 |
img = cv2.imread(img)
|
@@ -55,6 +56,18 @@ def get_point(img, sel_pix, evt: gr.SelectData):
|
|
55 |
print(sel_pix)
|
56 |
return img, sel_pix
|
57 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
58 |
|
59 |
# undo the selected point
|
60 |
def undo_points(orig_img, sel_pix):
|
@@ -92,22 +105,6 @@ map_prompt = {
|
|
92 |
'semantic segmentation': '[[image2semantic]]',
|
93 |
}
|
94 |
|
95 |
-
def download_additional_params(model_name, filename="add_params.bin"):
|
96 |
-
# 下载文件并返回文件路径
|
97 |
-
file_path = hf_hub_download(repo_id=model_name, filename=filename, use_auth_token=HF_TOKEN)
|
98 |
-
return file_path
|
99 |
-
|
100 |
-
# 加载 additional_params.bin 文件
|
101 |
-
def load_additional_params(model_name):
|
102 |
-
# 下载 additional_params.bin
|
103 |
-
params_path = download_additional_params(model_name)
|
104 |
-
|
105 |
-
# 使用 torch.load() 加载文件内容
|
106 |
-
additional_params = torch.load(params_path, map_location='cpu')
|
107 |
-
|
108 |
-
# 返回加载的参数内容
|
109 |
-
return additional_params
|
110 |
-
|
111 |
def process_image_check(path_input, prompt, sel_points, semantic):
|
112 |
if path_input is None:
|
113 |
raise gr.Error(
|
@@ -119,30 +116,9 @@ def process_image_check(path_input, prompt, sel_points, semantic):
|
|
119 |
)
|
120 |
|
121 |
|
122 |
-
|
123 |
-
def process_image_4(image_path, prompt):
|
124 |
-
|
125 |
-
inputs = []
|
126 |
-
for p in prompt:
|
127 |
-
cur_p = map_prompt[p]
|
128 |
-
|
129 |
-
coor_point = []
|
130 |
-
point_labels = []
|
131 |
-
|
132 |
-
|
133 |
-
cur_input = {
|
134 |
-
# 'original_size': [[w,h]],
|
135 |
-
# 'target_size': [[768, 768]],
|
136 |
-
'prompt': [cur_p],
|
137 |
-
'coor_point': coor_point,
|
138 |
-
'point_labels': point_labels,
|
139 |
-
}
|
140 |
-
inputs.append(cur_input)
|
141 |
-
|
142 |
-
return inputs
|
143 |
-
|
144 |
-
|
145 |
def inf(image_path, prompt, sel_points, semantic):
|
|
|
|
|
146 |
print('=========== PROCESS IMAGE CHECK ===========')
|
147 |
print(f"Image Path: {image_path}")
|
148 |
print(f"Prompt: {prompt}")
|
@@ -191,6 +167,9 @@ def inf(image_path, prompt, sel_points, semantic):
|
|
191 |
def clear_cache():
|
192 |
return None, None
|
193 |
|
|
|
|
|
|
|
194 |
def run_demo_server():
|
195 |
options = ['depth', 'normal', 'entity segmentation', 'human pose', 'point segmentation', 'semantic segmentation']
|
196 |
gradio_theme = gr.themes.Default()
|
@@ -227,6 +206,9 @@ def run_demo_server():
|
|
227 |
.md_feedback li {
|
228 |
margin-bottom: 0px !important;
|
229 |
}
|
|
|
|
|
|
|
230 |
""",
|
231 |
head="""
|
232 |
<script async src="https://www.googletagmanager.com/gtag/js?id=G-1FWSVCGZTG"></script>
|
@@ -258,13 +240,22 @@ def run_demo_server():
|
|
258 |
|
259 |
"""
|
260 |
)
|
|
|
261 |
|
262 |
with gr.Row():
|
263 |
-
checkbox_group = gr.CheckboxGroup(choices=options, label="
|
264 |
with gr.Row():
|
265 |
-
semantic_input = gr.Textbox(label="Category Name
|
266 |
with gr.Row():
|
267 |
gr.Markdown('For non-human image inputs, the pose results may have issues. Same when perform semantic segmentation with categories that are not in COCO.')
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
268 |
with gr.Row():
|
269 |
with gr.Column():
|
270 |
input_image = gr.Image(
|
@@ -314,7 +305,7 @@ def run_demo_server():
|
|
314 |
).success(
|
315 |
# fn=process_pipe_matting,
|
316 |
fn=inf,
|
317 |
-
inputs=[
|
318 |
outputs=[matting_image_output],
|
319 |
concurrency_limit=1,
|
320 |
)
|
@@ -346,7 +337,7 @@ def run_demo_server():
|
|
346 |
|
347 |
input_image.select(
|
348 |
get_point,
|
349 |
-
[
|
350 |
[input_image, selected_points],
|
351 |
)
|
352 |
|
@@ -356,16 +347,36 @@ def run_demo_server():
|
|
356 |
[input_image, selected_points]
|
357 |
)
|
358 |
|
359 |
-
|
360 |
-
|
361 |
-
|
362 |
-
|
363 |
-
|
364 |
-
|
365 |
-
|
366 |
-
|
367 |
-
|
368 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
369 |
# )
|
370 |
|
371 |
demo.queue(
|
|
|
10 |
|
11 |
import cv2
|
12 |
import numpy as np
|
13 |
+
import ast
|
14 |
|
15 |
class Examples(gr.helpers.Examples):
|
16 |
def __init__(self, *args, cached_folder=None, **kwargs):
|
|
|
42 |
|
43 |
# user click the image to get points, and show the points on the image
|
44 |
def get_point(img, sel_pix, evt: gr.SelectData):
|
45 |
+
# print(img, sel_pix)
|
46 |
if len(sel_pix) < 5:
|
47 |
sel_pix.append((evt.index, 1)) # default foreground_point
|
48 |
img = cv2.imread(img)
|
|
|
56 |
print(sel_pix)
|
57 |
return img, sel_pix
|
58 |
|
59 |
+
def set_point(img, checkbox_group, sel_pix, semantic_input):
|
60 |
+
ori_img = img
|
61 |
+
# print(img, checkbox_group, sel_pix, semantic_input)
|
62 |
+
sel_pix = ast.literal_eval(sel_pix)
|
63 |
+
img = cv2.imread(img)
|
64 |
+
img = cv2.cvtColor(img, cv2.COLOR_BGR2RGB)
|
65 |
+
if len(sel_pix) <= 5 and len(sel_pix) > 0:
|
66 |
+
for point, label in sel_pix:
|
67 |
+
cv2.drawMarker(img, point, colors[label], markerType=markers[label], markerSize=20, thickness=5)
|
68 |
+
|
69 |
+
return ori_img, img, sel_pix
|
70 |
+
|
71 |
|
72 |
# undo the selected point
|
73 |
def undo_points(orig_img, sel_pix):
|
|
|
105 |
'semantic segmentation': '[[image2semantic]]',
|
106 |
}
|
107 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
108 |
def process_image_check(path_input, prompt, sel_points, semantic):
|
109 |
if path_input is None:
|
110 |
raise gr.Error(
|
|
|
116 |
)
|
117 |
|
118 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
119 |
def inf(image_path, prompt, sel_points, semantic):
|
120 |
+
if isinstance(sel_points, str):
|
121 |
+
sel_points = ast.literal_eval(selected_points)
|
122 |
print('=========== PROCESS IMAGE CHECK ===========')
|
123 |
print(f"Image Path: {image_path}")
|
124 |
print(f"Prompt: {prompt}")
|
|
|
167 |
def clear_cache():
|
168 |
return None, None
|
169 |
|
170 |
+
def dummy():
|
171 |
+
pass
|
172 |
+
|
173 |
def run_demo_server():
|
174 |
options = ['depth', 'normal', 'entity segmentation', 'human pose', 'point segmentation', 'semantic segmentation']
|
175 |
gradio_theme = gr.themes.Default()
|
|
|
206 |
.md_feedback li {
|
207 |
margin-bottom: 0px !important;
|
208 |
}
|
209 |
+
.hideme {
|
210 |
+
display: none;
|
211 |
+
}
|
212 |
""",
|
213 |
head="""
|
214 |
<script async src="https://www.googletagmanager.com/gtag/js?id=G-1FWSVCGZTG"></script>
|
|
|
240 |
|
241 |
"""
|
242 |
)
|
243 |
+
selected_points_tmp = gr.Textbox(label="Points", elem_classes="hideme")
|
244 |
|
245 |
with gr.Row():
|
246 |
+
checkbox_group = gr.CheckboxGroup(choices=options, label="Task")
|
247 |
with gr.Row():
|
248 |
+
semantic_input = gr.Textbox(label="Category Name", placeholder="e.g. person/cat/dog/elephant...... (for semantic segmentation only, in COCO)")
|
249 |
with gr.Row():
|
250 |
gr.Markdown('For non-human image inputs, the pose results may have issues. Same when perform semantic segmentation with categories that are not in COCO.')
|
251 |
+
with gr.Row():
|
252 |
+
gr.Markdown('The results of semantic segmentation may be unstable because:')
|
253 |
+
with gr.Row():
|
254 |
+
gr.Markdown('1. We only trained on COCO, whose quality and quantity are insufficient to meet the requirements.')
|
255 |
+
with gr.Row():
|
256 |
+
gr.Markdown('2. Semantic segmentation is more complex than other tasks, as it requires accurately learning the relationship between semantics and objects.')
|
257 |
+
with gr.Row():
|
258 |
+
gr.Markdown('However, we are still able to produce some high-quality semantic segmentation results, strongly demonstrating the potential of our approach.')
|
259 |
with gr.Row():
|
260 |
with gr.Column():
|
261 |
input_image = gr.Image(
|
|
|
305 |
).success(
|
306 |
# fn=process_pipe_matting,
|
307 |
fn=inf,
|
308 |
+
inputs=[original_image, checkbox_group, selected_points, semantic_input],
|
309 |
outputs=[matting_image_output],
|
310 |
concurrency_limit=1,
|
311 |
)
|
|
|
337 |
|
338 |
input_image.select(
|
339 |
get_point,
|
340 |
+
[original_image, selected_points],
|
341 |
[input_image, selected_points],
|
342 |
)
|
343 |
|
|
|
347 |
[input_image, selected_points]
|
348 |
)
|
349 |
|
350 |
+
examples = gr.Examples(
|
351 |
+
fn=set_point,
|
352 |
+
run_on_click=True,
|
353 |
+
examples=[
|
354 |
+
["assets/woman.jpg", ['point segmentation', 'depth', 'normal', 'entity segmentation', 'human pose', 'semantic segmentation'], '[([2744, 975], 1), ([3440, 1954], 1), ([2123, 2405], 1), ([838, 1678], 1), ([4688, 1922], 1)]', 'person'],
|
355 |
+
["assets/woman2.jpg", ['point segmentation', 'depth', 'entity segmentation', 'semantic segmentation', 'human pose'], '[([687, 1416], 1), ([1021, 707], 1), ([1138, 1138], 1), ([1182, 1583], 1), ([1188, 2172], 1)]', 'person'],
|
356 |
+
["assets/board.jpg", ['point segmentation', 'depth', 'entity segmentation', 'normal'], '[([1003, 2163], 1)]', ''],
|
357 |
+
["assets/lion.jpg", ['point segmentation', 'depth', 'semantic segmentation'], '[([1287, 671], 1)]', 'lion'],
|
358 |
+
["assets/apple.jpg", ['point segmentation', 'depth', 'semantic segmentation', 'normal', 'entity segmentation'], '[([1287, 671], 1)]', 'apple'],
|
359 |
+
["assets/room.jpg", ['point segmentation', 'depth', 'semantic segmentation', 'normal', 'entity segmentation'], '[([1308, 2215], 1)]', 'chair'],
|
360 |
+
["assets/car.jpg", ['point segmentation', 'depth', 'semantic segmentation', 'normal', 'entity segmentation'], '[([1276, 1369], 1)]', 'car'],
|
361 |
+
["assets/person.jpg", ['point segmentation', 'depth', 'semantic segmentation', 'normal', 'entity segmentation', 'human pose'], '[([3253, 1459], 1)]', 'tie'],
|
362 |
+
["assets/woman3.jpg", ['point segmentation', 'depth', 'entity segmentation'], '[([420, 692], 1)]', ''],
|
363 |
+
["assets/cat.jpg", ['point segmentation', 'depth', 'entity segmentation', 'semantic segmentation'], '[([756, 661], 1)]', 'cat'],
|
364 |
+
["assets/room2.jpg", ['point segmentation', 'depth', 'entity segmentation', 'semantic segmentation', 'normal'], '[([3946, 224], 1)]', 'laptop'],
|
365 |
+
["assets/cartoon_cat.png", ['point segmentation', 'depth', 'entity segmentation', 'semantic segmentation', 'normal'], '[([1478, 3048], 1)]', 'cat'],
|
366 |
+
["assets/sheep.jpg", ['point segmentation', 'depth', 'entity segmentation', 'semantic segmentation'], '[([1789, 1791], 1), ([1869, 1333], 1)]', 'sheep'],
|
367 |
+
["assets/cartoon_girl.jpeg", ['point segmentation', 'depth', 'entity segmentation', 'normal', 'human pose', 'semantic segmentation'], '[([1208, 2089], 1), ([635, 2731], 1), ([1070, 2888], 1), ([1493, 2350], 1)]', 'person'],
|
368 |
+
],
|
369 |
+
inputs=[input_image, checkbox_group, selected_points_tmp, semantic_input],
|
370 |
+
outputs=[original_image, input_image, selected_points],
|
371 |
+
cache_examples=False,
|
372 |
+
)
|
373 |
+
|
374 |
+
# examples.dataset.click(
|
375 |
+
# fn=dummy
|
376 |
+
# ).success(
|
377 |
+
# fn=set_point, # Now run the actual function after inputs are populated
|
378 |
+
# inputs=[input_image, checkbox_group, selected_points_tmp, semantic_input],
|
379 |
+
# outputs=[input_image, selected_points]
|
380 |
# )
|
381 |
|
382 |
demo.queue(
|
assets/apple.jpg
ADDED
![]() |
Git LFS Details
|
assets/board.jpg
ADDED
![]() |
Git LFS Details
|
assets/car.jpg
ADDED
![]() |
Git LFS Details
|
assets/cartoon_cat.png
ADDED
![]() |
Git LFS Details
|
assets/cartoon_girl.jpeg
ADDED
![]() |
Git LFS Details
|
assets/cat.jpg
ADDED
![]() |
Git LFS Details
|
assets/lion.jpg
ADDED
![]() |
Git LFS Details
|
assets/room.jpg
ADDED
![]() |
Git LFS Details
|
assets/room2.jpg
ADDED
![]() |
Git LFS Details
|
assets/sheep.jpg
ADDED
![]() |
Git LFS Details
|
assets/woman.jpg
ADDED
![]() |
Git LFS Details
|
assets/woman2.jpg
ADDED
![]() |
Git LFS Details
|
assets/woman3.jpg
ADDED
![]() |
Git LFS Details
|