Spaces:

Canyu
/

Diception-Demo

Running

App Files Files Community

Canyu commited on 17 days ago

Commit

cd0e33c

1 Parent(s): a51380e

commit

Browse files

Files changed (1) hide show

app.py +139 -43

app.py CHANGED Viewed

@@ -20,8 +20,28 @@ class Examples(gr.helpers.Examples):
         self.create()
 # user click the image to get points, and show the points on the image
 def get_point(img, sel_pix, evt: gr.SelectData):
     if len(sel_pix) < 5:
         sel_pix.append((evt.index, 1))    # default foreground_point
     img = cv2.imread(img)
@@ -54,11 +74,11 @@ def undo_points(orig_img, sel_pix):
     return temp, sel_pix
-# HF_TOKEN = os.environ.get('HF_KEY')
-# client = Client("Canyu/Diception",
-#                 max_workers=3,
-#                 hf_token=HF_TOKEN)
 colors = [(255, 0, 0), (0, 255, 0)]
 markers = [1, 5]
@@ -89,12 +109,6 @@ def load_additional_params(model_name):
     return additional_params
 def process_image_check(path_input, prompt, sel_points, semantic):
-    print('=========== PROCESS IMAGE CHECK ===========')
-    print(f"Image Path: {path_input}")
-    print(f"Prompt: {prompt}")
-    print(f"Selected Points (before processing): {sel_points}")
-    print(f"Semantic Input: {semantic}")
-    print('===========================================')
     if path_input is None:
         raise gr.Error(
             "Missing image in the left pane: please upload an image first."
@@ -103,23 +117,6 @@ def process_image_check(path_input, prompt, sel_points, semantic):
         raise gr.Error(
             "At least 1 prediction type is needed."
         )
-    if 'point segmentation' in prompt and len(sel_points) == 0:
-        raise gr.Error(
-            "At least 1 point is needed."
-        )
-    if 'point segmentation' not in prompt and len(sel_points) != 0:
-        raise gr.Error(
-            "You must select 'point segmentation' when performing point segmentation."
-        )
-    if 'semantic segmentation' in prompt and semantic == None:
-        raise gr.Error(
-            "Target category is needed."
-        )
-    if 'semantic segmentation' not in prompt and semantic != None:
-        raise gr.Error(
-            "You must select 'semantic segmentation' when performing semantic segmentation."
-        )
@@ -146,14 +143,51 @@ def process_image_4(image_path, prompt):
 def inf(image_path, prompt, sel_points, semantic):
-    inputs = process_image_4(image_path, prompt, sel_points, semantic)
     # return None
-    return client.predict(
-      image=handle_file(image_path),
-      data=inputs,
       api_name="/inf"
     )
 def clear_cache():
     return None, None
@@ -162,18 +196,76 @@ def run_demo_server():
     gradio_theme = gr.themes.Default()
     with gr.Blocks(
         theme=gradio_theme,
-        title="Matting",
     ) as demo:
         selected_points = gr.State([])      # store points
         original_image = gr.State(value=None)   # store original image without points, default None
-        with gr.Row():
-            gr.Markdown("# Diception Demo")
-        with gr.Row():
-            gr.Markdown("### All results are generated using the same single model. To facilitate input processing, we separate point-prompted segmentation and semantic segmentation, as they require input points and segmentation targets.")
         with gr.Row():
             checkbox_group = gr.CheckboxGroup(choices=options, label="Select options:")
         with gr.Row():
             semantic_input = gr.Textbox(label="Category Name (for semantic segmentation only, in COCO)", placeholder="e.g. person/cat/dog/elephant......")
         with gr.Row():
             with gr.Column():
                 input_image = gr.Image(
@@ -184,20 +276,22 @@ def run_demo_server():
                 with gr.Column():
                     with gr.Row():
                         gr.Markdown('You can click on the image to select points prompt. At most 5 point.')
-                    undo_button = gr.Button('Undo point')
-                with gr.Row():
                     matting_image_submit_btn = gr.Button(
-                        value="Estimate Matting", variant="primary"
                     )
                     matting_image_reset_btn = gr.Button(value="Reset")
-                with gr.Row():
-                    img_clear_button = gr.Button("Clear Cache")
             with gr.Column():
                 # matting_image_output = gr.Image(label='Output')
-                matting_image_output =  gr.Image(label='Matting Output')
                         #     label="Matting Output",
                         #     type="filepath",
@@ -210,7 +304,7 @@ def run_demo_server():
-        img_clear_button.click(clear_cache, outputs=[input_image, matting_image_output])
         matting_image_submit_btn.click(
             fn=process_image_check,
@@ -230,11 +324,13 @@ def run_demo_server():
             fn=lambda: (
                 None,
                 None,
             ),
             inputs=[],
             outputs=[
                 input_image,
                 matting_image_output,
             ],
             queue=False,
         )

         self.create()
+def postprocess(output, prompt):
+    result = []
+    image = Image.open(output)
+    w, h = image.size
+    n = len(prompt)
+    slice_width = w // n
+    for i in range(n):
+        left = i * slice_width
+        right = (i + 1) * slice_width if i < n - 1 else w
+        cropped_img = image.crop((left, 0, right, h))
+        # 生成 caption
+        caption = prompt[i]
+        # 存入列表
+        result.append((cropped_img, caption))
+    return result
 # user click the image to get points, and show the points on the image
 def get_point(img, sel_pix, evt: gr.SelectData):
+    print(sel_pix)
     if len(sel_pix) < 5:
         sel_pix.append((evt.index, 1))    # default foreground_point
     img = cv2.imread(img)
     return temp, sel_pix
+HF_TOKEN = os.environ.get('HF_KEY')
+client = Client("Canyu/Diception",
+                max_workers=3,
+                hf_token=HF_TOKEN)
 colors = [(255, 0, 0), (0, 255, 0)]
 markers = [1, 5]
     return additional_params
 def process_image_check(path_input, prompt, sel_points, semantic):
     if path_input is None:
         raise gr.Error(
             "Missing image in the left pane: please upload an image first."
         raise gr.Error(
             "At least 1 prediction type is needed."
         )
 def inf(image_path, prompt, sel_points, semantic):
+    print('=========== PROCESS IMAGE CHECK ===========')
+    print(f"Image Path: {image_path}")
+    print(f"Prompt: {prompt}")
+    print(f"Selected Points (before processing): {sel_points}")
+    print(f"Semantic Input: {semantic}")
+    print('===========================================')
+    if 'point segmentation' in prompt and len(sel_points) == 0:
+        raise gr.Error(
+            "At least 1 point is needed."
+        )
+        return
+    if 'point segmentation' not in prompt and len(sel_points) != 0:
+        raise gr.Error(
+            "You must select 'point segmentation' when performing point segmentation."
+        )
+        return
+    if 'semantic segmentation' in prompt and semantic == '':
+        raise gr.Error(
+            "Target category is needed."
+        )
+        return
+    if 'semantic segmentation' not in prompt and semantic != '':
+        raise gr.Error(
+            "You must select 'semantic segmentation' when performing semantic segmentation."
+        )
+        return
     # return None
+    # inputs = process_image_4(image_path, prompt, sel_points, semantic)
+    prompt_str = str(sel_points)
+    result = client.predict(
+      input_image=handle_file(image_path),
+      checkbox_group=prompt,
+      selected_points=prompt_str,
+      semantic_input=semantic,
       api_name="/inf"
     )
+    result = postprocess(result, prompt)
+    return result
 def clear_cache():
     return None, None
     gradio_theme = gr.themes.Default()
     with gr.Blocks(
         theme=gradio_theme,
+        title="Diception",
+        css="""
+            #download {
+                height: 118px;
+            }
+            .slider .inner {
+                width: 5px;
+                background: #FFF;
+            }
+            .viewport {
+                aspect-ratio: 4/3;
+            }
+            .tabs button.selected {
+                font-size: 20px !important;
+                color: crimson !important;
+            }
+            h1 {
+                text-align: center;
+                display: block;
+            }
+            h2 {
+                text-align: center;
+                display: block;
+            }
+            h3 {
+                text-align: center;
+                display: block;
+            }
+            .md_feedback li {
+                margin-bottom: 0px !important;
+            }
+        """,
+        head="""
+            <script async src="https://www.googletagmanager.com/gtag/js?id=G-1FWSVCGZTG"></script>
+            <script>
+                window.dataLayer = window.dataLayer || [];
+                function gtag() {dataLayer.push(arguments);}
+                gtag('js', new Date());
+                gtag('config', 'G-1FWSVCGZTG');
+            </script>
+        """,
     ) as demo:
         selected_points = gr.State([])      # store points
         original_image = gr.State(value=None)   # store original image without points, default None
+        gr.Markdown(
+            """
+            # DICEPTION: A Generalist Diffusion Model for Vision Perception
+            <p align="center">
+            <a title="arXiv" href="https://arxiv.org" target="_blank" rel="noopener noreferrer"
+                    style="display: inline-block;">
+                <img src="https://www.obukhov.ai/img/badges/badge-pdf.svg">
+            </a>
+            <a title="Github" href="https://github.com/aim-uofa/Diception" target="_blank" rel="noopener noreferrer"
+                    style="display: inline-block;">
+                <img src="https://img.shields.io/github/stars/aim-uofa/GenPercept?label=GitHub%20%E2%98%85&logo=github&color=C8C"
+                        alt="badge-github-stars">
+            </a>
+            </p>
+            <p align="justify">
+                One single model solves multiple perception tasks, producing impressive results!
+            </p>
+        """
+        )
         with gr.Row():
             checkbox_group = gr.CheckboxGroup(choices=options, label="Select options:")
         with gr.Row():
             semantic_input = gr.Textbox(label="Category Name (for semantic segmentation only, in COCO)", placeholder="e.g. person/cat/dog/elephant......")
+        with gr.Row():
+            gr.Markdown('For non-human image inputs, the pose results may have issues. Same when perform semantic segmentation with categories that are not in COCO.')
         with gr.Row():
             with gr.Column():
                 input_image = gr.Image(
                 with gr.Column():
                     with gr.Row():
                         gr.Markdown('You can click on the image to select points prompt. At most 5 point.')
                     matting_image_submit_btn = gr.Button(
+                        value="Run", variant="primary"
                     )
+                with gr.Row():
+                    undo_button = gr.Button('Undo point')
                     matting_image_reset_btn = gr.Button(value="Reset")
+                # with gr.Row():
+                #     img_clear_button = gr.Button("Clear Cache")
             with gr.Column():
                 # matting_image_output = gr.Image(label='Output')
+                # matting_image_output =  gr.Image(label='Results')
+                matting_image_output = gr.Gallery(label="Results")
                         #     label="Matting Output",
                         #     type="filepath",
+        # img_clear_button.click(clear_cache, outputs=[input_image, matting_image_output])
         matting_image_submit_btn.click(
             fn=process_image_check,
             fn=lambda: (
                 None,
                 None,
+                []
             ),
             inputs=[],
             outputs=[
                 input_image,
                 matting_image_output,
+                selected_points
             ],
             queue=False,
         )