Spaces:

Ligeng-Zhu
/

Recap-Human-Test

Sleeping

App Files Files Community

Ligeng-Zhu commited on Aug 3, 2024

Commit

371d3fb

1 Parent(s): cc7235c

update

Browse files

Files changed (3) hide show

README.md +1 -1
app.py +39 -35
run.sh +7 -2

README.md CHANGED Viewed

@@ -1,5 +1,5 @@
 ---
-title: VILA Video Benchmark
 emoji: 🔥
 colorFrom: gray
 colorTo: red

 ---
+title: Recaption Human Test
 emoji: 🔥
 colorFrom: gray
 colorTo: red

app.py CHANGED Viewed

@@ -15,13 +15,15 @@ from huggingface_hub import HfApi, repo_exists, file_exists
 from huggingface_hub.hf_api import CommitOperationAdd
-def calc_file_md5(fpath):
     with open(fpath, "rb") as f:
         file_hash = hashlib.md5()
         while chunk := f.read(8192):
             file_hash.update(chunk)
-    return file_hash.hexdigest()[:6]
 finfo = [
     json.load(open("f1/coyo25m-0-000000.tar.json")),
@@ -35,7 +37,7 @@ keys = list(finfo[0].keys())
 api = HfApi()
-def get_random_captino(k):
     indexs = random.sample(list(range(5)), k=2)
     output = []
@@ -51,31 +53,34 @@ def get_random_captino(k):
 def load_image(idx):
     k = keys[idx]
-    infos, indexs = get_random_captino(k)
-    return k, f"{k}", infos[0], infos[1], str(indexs)
 def random_image(idx):
     k = random.choice(keys)
     index = keys.index(k)
-    infos, indexs = get_random_captino(k)
-    return k, index, f"{k}", infos[0], infos[1], str(indexs)
-def save_labeling(url, cap1, cap2, labeler, indexs, preference="left"):
     os.makedirs("flagged", exist_ok=True)
     output_info = {
         "cap1": cap1,
         "cap2": cap2,
-        "preference": preference,
-        "indexs": indexs,
         "labeler": labeler,
     }
     # print(url)
     lid = (
         labeler.replace(" ", "_").replace("@", "_").replace(".", "_").replace("/", "-")
     )
-    output_path = osp.join(f"flagged", url.replace("/", "--") + f".{lid}.json")
     with open(output_path, "w") as fp:
         json.dump(output_info, fp, indent=2)
@@ -110,9 +115,6 @@ def save_labeling(url, cap1, cap2, labeler, indexs, preference="left"):
 with gr.Blocks(
     title="VILA Video Benchmark",
 ) as demo:
-    with gr.Row():
-        slider = gr.Slider(maximum=len(keys), label="Video Index", value=0)
     with gr.Row():
         with gr.Column(scale=4):
             image_input = gr.Image(
@@ -121,7 +123,8 @@ with gr.Blocks(
                 value="https://github.com/NVlabs/VILA/raw/main/demo_images/vila-logo.jpg",
             )
         with gr.Column(scale=1):
-            random_img = gr.Button(value="Random Image")
             labeler = gr.Text(
                 value="placeholder",
                 label="Labeler ID (your name or email)",
@@ -129,43 +132,44 @@ with gr.Blocks(
             )
             logging = gr.Markdown(label="Logging info")
     with gr.Row():
-        btn_left = gr.Button("Left better")
-        btn_tie = gr.Button("tie")
-        btn_right = gr.Button("Right better")
     with gr.Row():
         vcap1 = gr.Textbox(label="Anoymous Caption 1")
         vcap2 = gr.Textbox(label="Anoymous Caption 2")
     cap_res = gr.Textbox(label="Caption Saving Results")
-    tmp_info = gr.Textbox(label="Temp Info", visible=False)
     from functools import partial
-    btn_left.click(
-        partial(save_labeling, preference="left"),
-        inputs=[logging, vcap1, vcap2, labeler, tmp_info],
         outputs=[cap_res],
-    )
-    btn_tie.click(
-        partial(save_labeling, preference="tie"),
-        inputs=[logging, vcap1, vcap2, labeler, tmp_info],
-        outputs=[cap_res],
-    )
-    btn_right.click(
-        partial(save_labeling, preference="right"),
-        inputs=[logging, vcap1, vcap2, labeler, tmp_info],
-        outputs=[cap_res],
     )
     slider.change(
         load_image,
         inputs=[slider],
-        outputs=[image_input, logging, vcap1, vcap2, tmp_info],
     )
     random_img.click(
         random_image,
         inputs=[random_img],
-        outputs=[image_input, slider, logging, vcap1, vcap2, tmp_info],
     )
     # btn_save.click(

 from huggingface_hub.hf_api import CommitOperationAdd
+def calc_file_md5(fpath, max_digits=6):
     with open(fpath, "rb") as f:
         file_hash = hashlib.md5()
         while chunk := f.read(8192):
             file_hash.update(chunk)
+    return file_hash.hexdigest()[:max_digits]
+def string_to_md5(string, max_digits=6):
+    return hashlib.md5(string.encode()).hexdigest()[:max_digits]
 finfo = [
     json.load(open("f1/coyo25m-0-000000.tar.json")),
 api = HfApi()
+def get_random_caption(k):
     indexs = random.sample(list(range(5)), k=2)
     output = []
 def load_image(idx):
     k = keys[idx]
+    infos, indexs = get_random_caption(k)
+    return k, f"{k}", infos[0], infos[1], str(indexs), None, None
 def random_image(idx):
     k = random.choice(keys)
     index = keys.index(k)
+    infos, indexs = get_random_caption(k)
+    return k, index, f"{k}", infos[0], infos[1], str(indexs), None, None
+def save_labeling(url, cap1, cap2, labeler, caption_source, rate1, rate2):
     os.makedirs("flagged", exist_ok=True)
     output_info = {
+        "url": url,
         "cap1": cap1,
         "cap2": cap2,
+        "rate-details": rate1,
+        "rate-halluication": rate2,
+        "caption_source": caption_source,
         "labeler": labeler,
     }
     # print(url)
     lid = (
         labeler.replace(" ", "_").replace("@", "_").replace(".", "_").replace("/", "-")
     )
+    # output_path = osp.join(f"flagged", url.replace("/", "--") + f".{lid}.json")
+    output_path = osp.join(f"flagged", "md5-" + string_to_md5(url, max_digits=12) + f".{lid}.json")
     with open(output_path, "w") as fp:
         json.dump(output_info, fp, indent=2)
 with gr.Blocks(
     title="VILA Video Benchmark",
 ) as demo:
     with gr.Row():
         with gr.Column(scale=4):
             image_input = gr.Image(
                 value="https://github.com/NVlabs/VILA/raw/main/demo_images/vila-logo.jpg",
             )
         with gr.Column(scale=1):
+            slider = gr.Slider(maximum=len(keys), label="Video Index", value=0)
+            gr.Markdown("## Step-0, put in your name")
             labeler = gr.Text(
                 value="placeholder",
                 label="Labeler ID (your name or email)",
             )
             logging = gr.Markdown(label="Logging info")
     with gr.Row():
+        with gr.Column():
+            gr.Markdown("## Step-1, randomly pick a image")
+            random_img = gr.Button(value="Random Image", variant="primary")
+        with gr.Column(scale=3):
+            gr.Markdown("## Step-2, randomly pick a image")
+            with gr.Row():
+                r1 = gr.Radio(choices=["Left better", "Tie", "Right better"], label="Detailness")
+                r2 = gr.Radio(choices=["Left better", "Tie", "Right better"], label="Halluciation")
+        with gr.Column():
+            gr.Markdown("## Step-3, randomly pick a image")
+            submit = gr.Button(value="submit",variant="stop")
     with gr.Row():
+        gr.Markdown("if you find two caption identical, please ignore and random next")
+    with gr.Row():
         vcap1 = gr.Textbox(label="Anoymous Caption 1")
         vcap2 = gr.Textbox(label="Anoymous Caption 2")
     cap_res = gr.Textbox(label="Caption Saving Results")
+    caption_source = gr.Textbox(label="Temp Info", visible=False)
     from functools import partial
+    submit.click(
+        save_labeling,
+        inputs=[logging, vcap1, vcap2, labeler, caption_source, r1, r2],
         outputs=[cap_res],
     )
     slider.change(
         load_image,
         inputs=[slider],
+        outputs=[image_input, logging, vcap1, vcap2, caption_source, r1, r2],
     )
     random_img.click(
         random_image,
         inputs=[random_img],
+        outputs=[image_input, slider, logging, vcap1, vcap2, caption_source, r1, r2],
     )
     # btn_save.click(

run.sh CHANGED Viewed

@@ -1,4 +1,9 @@
-ss=f1
 scp \
     draco1dc:/lustre/fs11/portfolios/nvr/users/ligengz/workspace/VILA-internal/captioner_bk_new/Llama-2-7b-hf-siglip-large-patch16-384-align-llava_1_5_mm_align-pretrain-coyo_25m_wds_0to5_recap_${ss}_concat+coyo_25m_wds_5to10+mmc4core_10_subset+sharegpt4v_pretrain-SFT-sharegpt4v_sft+valley+vflan+video_chatgpt/coyo25m-0-000000.tar.json \
-    f1/coyo25m-0-000000.tar.json

+for ss in f1 f2 f3 f4; do
+mkdir -p ${ss}
 scp \
     draco1dc:/lustre/fs11/portfolios/nvr/users/ligengz/workspace/VILA-internal/captioner_bk_new/Llama-2-7b-hf-siglip-large-patch16-384-align-llava_1_5_mm_align-pretrain-coyo_25m_wds_0to5_recap_${ss}_concat+coyo_25m_wds_5to10+mmc4core_10_subset+sharegpt4v_pretrain-SFT-sharegpt4v_sft+valley+vflan+video_chatgpt/coyo25m-0-000000.tar.json \
+    ${ss}/coyo25m-0-000000.tar.json
+done