Spaces:
Running
Running
Joschka Strueber
commited on
Commit
·
c608f7f
1
Parent(s):
3dfa66b
[Ref, Fix] indentation error in answer key selection, longer explanation in demo, exclusion of broken dataset
Browse files- app.py +1 -1
- src/dataloading.py +3 -1
- src/utils.py +6 -4
app.py
CHANGED
@@ -69,7 +69,7 @@ with gr.Blocks(title="LLM Similarity Analyzer", css=app_util.custom_css) as demo
|
|
69 |
outputs=heatmap
|
70 |
)
|
71 |
|
72 |
-
gr.Markdown("\* Self-similarity is only 1.0 for
|
73 |
|
74 |
clear_btn = gr.Button("Clear Selection")
|
75 |
clear_btn.click(
|
|
|
69 |
outputs=heatmap
|
70 |
)
|
71 |
|
72 |
+
gr.Markdown("\* Self-similarity is only 1.0 for CAPA if the model predicts a single option with 100% confidence for each question. If the model is uncertain, the self-similarity will be lower.")
|
73 |
|
74 |
clear_btn = gr.Button("Clear Selection")
|
75 |
clear_btn.click(
|
src/dataloading.py
CHANGED
@@ -88,7 +88,7 @@ def get_leaderboard_datasets(model_ids):
|
|
88 |
common_datasets = set.intersection(*model_datasets.values())
|
89 |
|
90 |
# Filter datasets that are not MCQ or currently do not work
|
91 |
-
ignore = ["math_", "ifeval"]
|
92 |
discard = []
|
93 |
for dataset in common_datasets:
|
94 |
for ignore_data in ignore:
|
@@ -132,6 +132,8 @@ def filter_labels(dataset_name, doc):
|
|
132 |
labels.append(1)
|
133 |
elif test_target.isdigit():
|
134 |
labels = [int(d[target_key]) for d in doc]
|
|
|
|
|
135 |
|
136 |
return labels
|
137 |
|
|
|
88 |
common_datasets = set.intersection(*model_datasets.values())
|
89 |
|
90 |
# Filter datasets that are not MCQ or currently do not work
|
91 |
+
ignore = ["bbh_temporal_sequences", "math_", "ifeval"]
|
92 |
discard = []
|
93 |
for dataset in common_datasets:
|
94 |
for ignore_data in ignore:
|
|
|
132 |
labels.append(1)
|
133 |
elif test_target.isdigit():
|
134 |
labels = [int(d[target_key]) for d in doc]
|
135 |
+
|
136 |
+
print(f"Number of labels: {len(labels)}")
|
137 |
|
138 |
return labels
|
139 |
|
src/utils.py
CHANGED
@@ -18,7 +18,9 @@ def opt_in_pars_to_index(s):
|
|
18 |
raise ValueError("Invalid format")
|
19 |
|
20 |
def get_test_target(doc):
|
21 |
-
|
22 |
-
|
23 |
-
|
24 |
-
|
|
|
|
|
|
18 |
raise ValueError("Invalid format")
|
19 |
|
20 |
def get_test_target(doc):
|
21 |
+
if "target" in doc:
|
22 |
+
return doc["target"], "target"
|
23 |
+
elif "answer" in doc:
|
24 |
+
return doc["answer"], "answer"
|
25 |
+
else:
|
26 |
+
return "", ""
|