Spaces:
Running
Running
Joschka Strueber
commited on
Commit
·
35404bc
1
Parent(s):
3eeaa4c
[Add, Fix] better warnings for missing models, better description
Browse files
app.py
CHANGED
@@ -3,13 +3,10 @@ import gradio as gr
|
|
3 |
import numpy as np
|
4 |
import matplotlib.pyplot as plt
|
5 |
import seaborn as sns
|
6 |
-
import re
|
7 |
from io import BytesIO
|
8 |
from PIL import Image
|
9 |
from datasets.exceptions import DatasetNotFoundError
|
10 |
|
11 |
-
print(gr.__version__)
|
12 |
-
|
13 |
from src.dataloading import get_leaderboard_models_cached, get_leaderboard_datasets
|
14 |
from src.similarity import load_data_and_compute_similarities
|
15 |
|
@@ -82,15 +79,15 @@ def update_datasets_based_on_models(selected_models, current_dataset):
|
|
82 |
)
|
83 |
except DatasetNotFoundError as e:
|
84 |
# Extract model name from error message
|
85 |
-
|
86 |
-
model_name =
|
87 |
|
88 |
# Display a shorter warning
|
89 |
gr.Warning(f"Data for '{model_name}' is gated or unavailable.")
|
90 |
return gr.update(choices=[], value=None)
|
91 |
|
92 |
with gr.Blocks(title="LLM Similarity Analyzer") as demo:
|
93 |
-
gr.Markdown("## Model Similarity Comparison Tool
|
94 |
|
95 |
with gr.Row():
|
96 |
dataset_dropdown = gr.Dropdown(
|
@@ -116,8 +113,6 @@ with gr.Blocks(title="LLM Similarity Analyzer") as demo:
|
|
116 |
info="Search and select multiple models"
|
117 |
)
|
118 |
|
119 |
-
gr.Markdown("* For the probabilistic Kappa_p metric self-similarity is only 1, if the model predicts a single option with 100% confidence.")
|
120 |
-
|
121 |
model_dropdown.change(
|
122 |
fn=update_datasets_based_on_models,
|
123 |
inputs=[model_dropdown, dataset_dropdown],
|
@@ -137,11 +132,21 @@ with gr.Blocks(title="LLM Similarity Analyzer") as demo:
|
|
137 |
outputs=heatmap
|
138 |
)
|
139 |
|
|
|
|
|
140 |
clear_btn = gr.Button("Clear Selection")
|
141 |
clear_btn.click(
|
142 |
lambda: [[], None, None],
|
143 |
outputs=[model_dropdown, dataset_dropdown, heatmap]
|
144 |
)
|
145 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
146 |
if __name__ == "__main__":
|
147 |
demo.launch(ssr_mode=False)
|
|
|
3 |
import numpy as np
|
4 |
import matplotlib.pyplot as plt
|
5 |
import seaborn as sns
|
|
|
6 |
from io import BytesIO
|
7 |
from PIL import Image
|
8 |
from datasets.exceptions import DatasetNotFoundError
|
9 |
|
|
|
|
|
10 |
from src.dataloading import get_leaderboard_models_cached, get_leaderboard_datasets
|
11 |
from src.similarity import load_data_and_compute_similarities
|
12 |
|
|
|
79 |
)
|
80 |
except DatasetNotFoundError as e:
|
81 |
# Extract model name from error message
|
82 |
+
model_name = e.args[0].split("'")[1]
|
83 |
+
model_name = model_name.split("/")[-1].replace("__", "/").replace("_details", "")
|
84 |
|
85 |
# Display a shorter warning
|
86 |
gr.Warning(f"Data for '{model_name}' is gated or unavailable.")
|
87 |
return gr.update(choices=[], value=None)
|
88 |
|
89 |
with gr.Blocks(title="LLM Similarity Analyzer") as demo:
|
90 |
+
gr.Markdown("## Model Similarity Comparison Tool")
|
91 |
|
92 |
with gr.Row():
|
93 |
dataset_dropdown = gr.Dropdown(
|
|
|
113 |
info="Search and select multiple models"
|
114 |
)
|
115 |
|
|
|
|
|
116 |
model_dropdown.change(
|
117 |
fn=update_datasets_based_on_models,
|
118 |
inputs=[model_dropdown, dataset_dropdown],
|
|
|
132 |
outputs=heatmap
|
133 |
)
|
134 |
|
135 |
+
gr.Markdown("\* Self-similarity is only 1.0 for the probabilistic Kappa_p metric if the model predicts a single option with 100% confidence for each question.")
|
136 |
+
|
137 |
clear_btn = gr.Button("Clear Selection")
|
138 |
clear_btn.click(
|
139 |
lambda: [[], None, None],
|
140 |
outputs=[model_dropdown, dataset_dropdown, heatmap]
|
141 |
)
|
142 |
|
143 |
+
gr.Markdown("""### Information \n
|
144 |
+
- **Datasets**: [Open LLM Leaderboard v2](https://huggingface.co/spaces/open-llm-leaderboard/open_llm_leaderboard#/) benchmark datasets \n
|
145 |
+
- Some datasets are not multiple-choice - for these, the metrics are not applicable. \n
|
146 |
+
- **Models**: Open LLM Leaderboard models \n
|
147 |
+
- Every model is gated on Hugging Face and access has to be requested. \n
|
148 |
+
- We requested access to the most popular models, but some may be missing. \n
|
149 |
+
- **Metrics**: Kappa_p (probabilistic), Kappa_p (deterministic), Error Consistency""")
|
150 |
+
|
151 |
if __name__ == "__main__":
|
152 |
demo.launch(ssr_mode=False)
|