Spaces:
Running
Running
Joschka Strueber
commited on
Commit
·
ce6be70
1
Parent(s):
5d4059c
[Add, Fix] change to CAPA, fix error in dataloading
Browse files- app.py +2 -2
- src/dataloading.py +5 -3
- src/similarity.py +3 -3
app.py
CHANGED
@@ -110,7 +110,7 @@ with gr.Blocks(title="LLM Similarity Analyzer") as demo:
|
|
110 |
info="Open LLM Leaderboard v2 benchmark datasets"
|
111 |
)
|
112 |
metric_dropdown = gr.Dropdown(
|
113 |
-
choices=["
|
114 |
label="Select Metric",
|
115 |
info="Select a similarity metric to compute"
|
116 |
)
|
@@ -158,7 +158,7 @@ with gr.Blocks(title="LLM Similarity Analyzer") as demo:
|
|
158 |
- **Models**: Open LLM Leaderboard models \n
|
159 |
- Every model evaluation is gated on Hugging Face and access has to be requested. \n
|
160 |
- We requested access for the most popular models, but some may be missing. \n
|
161 |
-
- **Metrics**:
|
162 |
|
163 |
if __name__ == "__main__":
|
164 |
demo.launch(ssr_mode=False)
|
|
|
110 |
info="Open LLM Leaderboard v2 benchmark datasets"
|
111 |
)
|
112 |
metric_dropdown = gr.Dropdown(
|
113 |
+
choices=["CAPA", "CAPA (det.)", "Error Consistency"],
|
114 |
label="Select Metric",
|
115 |
info="Select a similarity metric to compute"
|
116 |
)
|
|
|
158 |
- **Models**: Open LLM Leaderboard models \n
|
159 |
- Every model evaluation is gated on Hugging Face and access has to be requested. \n
|
160 |
- We requested access for the most popular models, but some may be missing. \n
|
161 |
+
- **Metrics**: CAPA (probabilistic), CAPA (deterministic), Error Consistency""")
|
162 |
|
163 |
if __name__ == "__main__":
|
164 |
demo.launch(ssr_mode=False)
|
src/dataloading.py
CHANGED
@@ -9,17 +9,19 @@ def get_leaderboard_models():
|
|
9 |
api = HfApi()
|
10 |
|
11 |
# List all datasets in the open-llm-leaderboard organization
|
12 |
-
|
13 |
|
14 |
models = []
|
15 |
-
for dataset in
|
16 |
if dataset.id.endswith("-details"):
|
17 |
dataset_id = dataset.id
|
18 |
try:
|
19 |
# Check if the dataset can be loaded
|
|
|
20 |
check_gated = datasets.get_dataset_config_names(dataset_id)
|
|
|
21 |
# Format: "open-llm-leaderboard/<provider>__<model_name>-details"
|
22 |
-
model_part =
|
23 |
if "__" in model_part:
|
24 |
provider, model = model_part.split("__", 1)
|
25 |
models.append(f"{provider}/{model}")
|
|
|
9 |
api = HfApi()
|
10 |
|
11 |
# List all datasets in the open-llm-leaderboard organization
|
12 |
+
dataset_list = api.list_datasets(author="open-llm-leaderboard")
|
13 |
|
14 |
models = []
|
15 |
+
for dataset in dataset_list:
|
16 |
if dataset.id.endswith("-details"):
|
17 |
dataset_id = dataset.id
|
18 |
try:
|
19 |
# Check if the dataset can be loaded
|
20 |
+
print(dataset_id)
|
21 |
check_gated = datasets.get_dataset_config_names(dataset_id)
|
22 |
+
print(check_gated)
|
23 |
# Format: "open-llm-leaderboard/<provider>__<model_name>-details"
|
24 |
+
model_part = dataset_id.split("/")[-1].replace("-details", "")
|
25 |
if "__" in model_part:
|
26 |
provider, model = model_part.split("__", 1)
|
27 |
models.append(f"{provider}/{model}")
|
src/similarity.py
CHANGED
@@ -1,6 +1,6 @@
|
|
1 |
import numpy as np
|
2 |
|
3 |
-
from lmsim.metrics import Metrics,
|
4 |
|
5 |
from src.dataloading import load_run_data
|
6 |
from src.utils import softmax, one_hot
|
@@ -32,9 +32,9 @@ def compute_similarity(metric: Metrics, outputs_a: list[np.array], outputs_b: li
|
|
32 |
def compute_pairwise_similarities(metric_name: str, probs: list[list[np.array]], gts: list[list[int]]) -> np.array:
|
33 |
# Select chosen metric
|
34 |
if metric_name == "Kappa_p (prob.)":
|
35 |
-
metric =
|
36 |
elif metric_name == "Kappa_p (det.)":
|
37 |
-
metric =
|
38 |
# Convert probabilities to one-hot
|
39 |
probs = [[one_hot(p) for p in model_probs] for model_probs in probs]
|
40 |
elif metric_name == "Error Consistency":
|
|
|
1 |
import numpy as np
|
2 |
|
3 |
+
from lmsim.metrics import Metrics, CAPA, EC
|
4 |
|
5 |
from src.dataloading import load_run_data
|
6 |
from src.utils import softmax, one_hot
|
|
|
32 |
def compute_pairwise_similarities(metric_name: str, probs: list[list[np.array]], gts: list[list[int]]) -> np.array:
|
33 |
# Select chosen metric
|
34 |
if metric_name == "Kappa_p (prob.)":
|
35 |
+
metric = CAPA()
|
36 |
elif metric_name == "Kappa_p (det.)":
|
37 |
+
metric = CAPA(prob=False)
|
38 |
# Convert probabilities to one-hot
|
39 |
probs = [[one_hot(p) for p in model_probs] for model_probs in probs]
|
40 |
elif metric_name == "Error Consistency":
|