Spaces:
Sleeping
Sleeping
Commit
·
371d3fb
1
Parent(s):
cc7235c
update
Browse files
README.md
CHANGED
@@ -1,5 +1,5 @@
|
|
1 |
---
|
2 |
-
title:
|
3 |
emoji: 🔥
|
4 |
colorFrom: gray
|
5 |
colorTo: red
|
|
|
1 |
---
|
2 |
+
title: Recaption Human Test
|
3 |
emoji: 🔥
|
4 |
colorFrom: gray
|
5 |
colorTo: red
|
app.py
CHANGED
@@ -15,13 +15,15 @@ from huggingface_hub import HfApi, repo_exists, file_exists
|
|
15 |
from huggingface_hub.hf_api import CommitOperationAdd
|
16 |
|
17 |
|
18 |
-
def calc_file_md5(fpath):
|
19 |
with open(fpath, "rb") as f:
|
20 |
file_hash = hashlib.md5()
|
21 |
while chunk := f.read(8192):
|
22 |
file_hash.update(chunk)
|
23 |
-
return file_hash.hexdigest()[:
|
24 |
|
|
|
|
|
25 |
|
26 |
finfo = [
|
27 |
json.load(open("f1/coyo25m-0-000000.tar.json")),
|
@@ -35,7 +37,7 @@ keys = list(finfo[0].keys())
|
|
35 |
api = HfApi()
|
36 |
|
37 |
|
38 |
-
def
|
39 |
indexs = random.sample(list(range(5)), k=2)
|
40 |
|
41 |
output = []
|
@@ -51,31 +53,34 @@ def get_random_captino(k):
|
|
51 |
|
52 |
def load_image(idx):
|
53 |
k = keys[idx]
|
54 |
-
infos, indexs =
|
55 |
-
return k, f"{k}", infos[0], infos[1], str(indexs)
|
56 |
|
57 |
|
58 |
def random_image(idx):
|
59 |
k = random.choice(keys)
|
60 |
index = keys.index(k)
|
61 |
-
infos, indexs =
|
62 |
-
return k, index, f"{k}", infos[0], infos[1], str(indexs)
|
63 |
|
64 |
|
65 |
-
def save_labeling(url, cap1, cap2, labeler,
|
66 |
os.makedirs("flagged", exist_ok=True)
|
67 |
output_info = {
|
|
|
68 |
"cap1": cap1,
|
69 |
"cap2": cap2,
|
70 |
-
"
|
71 |
-
"
|
|
|
72 |
"labeler": labeler,
|
73 |
}
|
74 |
# print(url)
|
75 |
lid = (
|
76 |
labeler.replace(" ", "_").replace("@", "_").replace(".", "_").replace("/", "-")
|
77 |
)
|
78 |
-
output_path = osp.join(f"flagged", url.replace("/", "--") + f".{lid}.json")
|
|
|
79 |
with open(output_path, "w") as fp:
|
80 |
json.dump(output_info, fp, indent=2)
|
81 |
|
@@ -110,9 +115,6 @@ def save_labeling(url, cap1, cap2, labeler, indexs, preference="left"):
|
|
110 |
with gr.Blocks(
|
111 |
title="VILA Video Benchmark",
|
112 |
) as demo:
|
113 |
-
with gr.Row():
|
114 |
-
slider = gr.Slider(maximum=len(keys), label="Video Index", value=0)
|
115 |
-
|
116 |
with gr.Row():
|
117 |
with gr.Column(scale=4):
|
118 |
image_input = gr.Image(
|
@@ -121,7 +123,8 @@ with gr.Blocks(
|
|
121 |
value="https://github.com/NVlabs/VILA/raw/main/demo_images/vila-logo.jpg",
|
122 |
)
|
123 |
with gr.Column(scale=1):
|
124 |
-
|
|
|
125 |
labeler = gr.Text(
|
126 |
value="placeholder",
|
127 |
label="Labeler ID (your name or email)",
|
@@ -129,43 +132,44 @@ with gr.Blocks(
|
|
129 |
)
|
130 |
logging = gr.Markdown(label="Logging info")
|
131 |
with gr.Row():
|
132 |
-
|
133 |
-
|
134 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
135 |
with gr.Row():
|
|
|
|
|
|
|
136 |
vcap1 = gr.Textbox(label="Anoymous Caption 1")
|
137 |
vcap2 = gr.Textbox(label="Anoymous Caption 2")
|
138 |
cap_res = gr.Textbox(label="Caption Saving Results")
|
139 |
-
|
140 |
from functools import partial
|
141 |
|
142 |
-
|
143 |
-
|
144 |
-
inputs=[logging, vcap1, vcap2, labeler,
|
145 |
outputs=[cap_res],
|
146 |
-
)
|
147 |
|
148 |
-
btn_tie.click(
|
149 |
-
partial(save_labeling, preference="tie"),
|
150 |
-
inputs=[logging, vcap1, vcap2, labeler, tmp_info],
|
151 |
-
outputs=[cap_res],
|
152 |
-
)
|
153 |
-
|
154 |
-
btn_right.click(
|
155 |
-
partial(save_labeling, preference="right"),
|
156 |
-
inputs=[logging, vcap1, vcap2, labeler, tmp_info],
|
157 |
-
outputs=[cap_res],
|
158 |
)
|
159 |
|
160 |
slider.change(
|
161 |
load_image,
|
162 |
inputs=[slider],
|
163 |
-
outputs=[image_input, logging, vcap1, vcap2,
|
164 |
)
|
165 |
random_img.click(
|
166 |
random_image,
|
167 |
inputs=[random_img],
|
168 |
-
outputs=[image_input, slider, logging, vcap1, vcap2,
|
169 |
)
|
170 |
|
171 |
# btn_save.click(
|
|
|
15 |
from huggingface_hub.hf_api import CommitOperationAdd
|
16 |
|
17 |
|
18 |
+
def calc_file_md5(fpath, max_digits=6):
|
19 |
with open(fpath, "rb") as f:
|
20 |
file_hash = hashlib.md5()
|
21 |
while chunk := f.read(8192):
|
22 |
file_hash.update(chunk)
|
23 |
+
return file_hash.hexdigest()[:max_digits]
|
24 |
|
25 |
+
def string_to_md5(string, max_digits=6):
|
26 |
+
return hashlib.md5(string.encode()).hexdigest()[:max_digits]
|
27 |
|
28 |
finfo = [
|
29 |
json.load(open("f1/coyo25m-0-000000.tar.json")),
|
|
|
37 |
api = HfApi()
|
38 |
|
39 |
|
40 |
+
def get_random_caption(k):
|
41 |
indexs = random.sample(list(range(5)), k=2)
|
42 |
|
43 |
output = []
|
|
|
53 |
|
54 |
def load_image(idx):
|
55 |
k = keys[idx]
|
56 |
+
infos, indexs = get_random_caption(k)
|
57 |
+
return k, f"{k}", infos[0], infos[1], str(indexs), None, None
|
58 |
|
59 |
|
60 |
def random_image(idx):
|
61 |
k = random.choice(keys)
|
62 |
index = keys.index(k)
|
63 |
+
infos, indexs = get_random_caption(k)
|
64 |
+
return k, index, f"{k}", infos[0], infos[1], str(indexs), None, None
|
65 |
|
66 |
|
67 |
+
def save_labeling(url, cap1, cap2, labeler, caption_source, rate1, rate2):
|
68 |
os.makedirs("flagged", exist_ok=True)
|
69 |
output_info = {
|
70 |
+
"url": url,
|
71 |
"cap1": cap1,
|
72 |
"cap2": cap2,
|
73 |
+
"rate-details": rate1,
|
74 |
+
"rate-halluication": rate2,
|
75 |
+
"caption_source": caption_source,
|
76 |
"labeler": labeler,
|
77 |
}
|
78 |
# print(url)
|
79 |
lid = (
|
80 |
labeler.replace(" ", "_").replace("@", "_").replace(".", "_").replace("/", "-")
|
81 |
)
|
82 |
+
# output_path = osp.join(f"flagged", url.replace("/", "--") + f".{lid}.json")
|
83 |
+
output_path = osp.join(f"flagged", "md5-" + string_to_md5(url, max_digits=12) + f".{lid}.json")
|
84 |
with open(output_path, "w") as fp:
|
85 |
json.dump(output_info, fp, indent=2)
|
86 |
|
|
|
115 |
with gr.Blocks(
|
116 |
title="VILA Video Benchmark",
|
117 |
) as demo:
|
|
|
|
|
|
|
118 |
with gr.Row():
|
119 |
with gr.Column(scale=4):
|
120 |
image_input = gr.Image(
|
|
|
123 |
value="https://github.com/NVlabs/VILA/raw/main/demo_images/vila-logo.jpg",
|
124 |
)
|
125 |
with gr.Column(scale=1):
|
126 |
+
slider = gr.Slider(maximum=len(keys), label="Video Index", value=0)
|
127 |
+
gr.Markdown("## Step-0, put in your name")
|
128 |
labeler = gr.Text(
|
129 |
value="placeholder",
|
130 |
label="Labeler ID (your name or email)",
|
|
|
132 |
)
|
133 |
logging = gr.Markdown(label="Logging info")
|
134 |
with gr.Row():
|
135 |
+
with gr.Column():
|
136 |
+
gr.Markdown("## Step-1, randomly pick a image")
|
137 |
+
random_img = gr.Button(value="Random Image", variant="primary")
|
138 |
+
with gr.Column(scale=3):
|
139 |
+
gr.Markdown("## Step-2, randomly pick a image")
|
140 |
+
with gr.Row():
|
141 |
+
r1 = gr.Radio(choices=["Left better", "Tie", "Right better"], label="Detailness")
|
142 |
+
r2 = gr.Radio(choices=["Left better", "Tie", "Right better"], label="Halluciation")
|
143 |
+
with gr.Column():
|
144 |
+
gr.Markdown("## Step-3, randomly pick a image")
|
145 |
+
submit = gr.Button(value="submit",variant="stop")
|
146 |
+
|
147 |
with gr.Row():
|
148 |
+
gr.Markdown("if you find two caption identical, please ignore and random next")
|
149 |
+
with gr.Row():
|
150 |
+
|
151 |
vcap1 = gr.Textbox(label="Anoymous Caption 1")
|
152 |
vcap2 = gr.Textbox(label="Anoymous Caption 2")
|
153 |
cap_res = gr.Textbox(label="Caption Saving Results")
|
154 |
+
caption_source = gr.Textbox(label="Temp Info", visible=False)
|
155 |
from functools import partial
|
156 |
|
157 |
+
submit.click(
|
158 |
+
save_labeling,
|
159 |
+
inputs=[logging, vcap1, vcap2, labeler, caption_source, r1, r2],
|
160 |
outputs=[cap_res],
|
|
|
161 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
162 |
)
|
163 |
|
164 |
slider.change(
|
165 |
load_image,
|
166 |
inputs=[slider],
|
167 |
+
outputs=[image_input, logging, vcap1, vcap2, caption_source, r1, r2],
|
168 |
)
|
169 |
random_img.click(
|
170 |
random_image,
|
171 |
inputs=[random_img],
|
172 |
+
outputs=[image_input, slider, logging, vcap1, vcap2, caption_source, r1, r2],
|
173 |
)
|
174 |
|
175 |
# btn_save.click(
|
run.sh
CHANGED
@@ -1,4 +1,9 @@
|
|
1 |
-
|
|
|
|
|
|
|
2 |
scp \
|
3 |
draco1dc:/lustre/fs11/portfolios/nvr/users/ligengz/workspace/VILA-internal/captioner_bk_new/Llama-2-7b-hf-siglip-large-patch16-384-align-llava_1_5_mm_align-pretrain-coyo_25m_wds_0to5_recap_${ss}_concat+coyo_25m_wds_5to10+mmc4core_10_subset+sharegpt4v_pretrain-SFT-sharegpt4v_sft+valley+vflan+video_chatgpt/coyo25m-0-000000.tar.json \
|
4 |
-
|
|
|
|
|
|
1 |
+
|
2 |
+
for ss in f1 f2 f3 f4; do
|
3 |
+
|
4 |
+
mkdir -p ${ss}
|
5 |
scp \
|
6 |
draco1dc:/lustre/fs11/portfolios/nvr/users/ligengz/workspace/VILA-internal/captioner_bk_new/Llama-2-7b-hf-siglip-large-patch16-384-align-llava_1_5_mm_align-pretrain-coyo_25m_wds_0to5_recap_${ss}_concat+coyo_25m_wds_5to10+mmc4core_10_subset+sharegpt4v_pretrain-SFT-sharegpt4v_sft+valley+vflan+video_chatgpt/coyo25m-0-000000.tar.json \
|
7 |
+
${ss}/coyo25m-0-000000.tar.json
|
8 |
+
|
9 |
+
done
|