Joschka Strueber commited on
Commit
ce6be70
·
1 Parent(s): 5d4059c

[Add, Fix] change to CAPA, fix error in dataloading

Browse files
Files changed (3) hide show
  1. app.py +2 -2
  2. src/dataloading.py +5 -3
  3. src/similarity.py +3 -3
app.py CHANGED
@@ -110,7 +110,7 @@ with gr.Blocks(title="LLM Similarity Analyzer") as demo:
110
  info="Open LLM Leaderboard v2 benchmark datasets"
111
  )
112
  metric_dropdown = gr.Dropdown(
113
- choices=["Kappa_p (prob.)", "Kappa_p (det.)", "Error Consistency"],
114
  label="Select Metric",
115
  info="Select a similarity metric to compute"
116
  )
@@ -158,7 +158,7 @@ with gr.Blocks(title="LLM Similarity Analyzer") as demo:
158
  - **Models**: Open LLM Leaderboard models \n
159
  - Every model evaluation is gated on Hugging Face and access has to be requested. \n
160
  - We requested access for the most popular models, but some may be missing. \n
161
- - **Metrics**: Kappa_p (probabilistic), Kappa_p (deterministic), Error Consistency""")
162
 
163
  if __name__ == "__main__":
164
  demo.launch(ssr_mode=False)
 
110
  info="Open LLM Leaderboard v2 benchmark datasets"
111
  )
112
  metric_dropdown = gr.Dropdown(
113
+ choices=["CAPA", "CAPA (det.)", "Error Consistency"],
114
  label="Select Metric",
115
  info="Select a similarity metric to compute"
116
  )
 
158
  - **Models**: Open LLM Leaderboard models \n
159
  - Every model evaluation is gated on Hugging Face and access has to be requested. \n
160
  - We requested access for the most popular models, but some may be missing. \n
161
+ - **Metrics**: CAPA (probabilistic), CAPA (deterministic), Error Consistency""")
162
 
163
  if __name__ == "__main__":
164
  demo.launch(ssr_mode=False)
src/dataloading.py CHANGED
@@ -9,17 +9,19 @@ def get_leaderboard_models():
9
  api = HfApi()
10
 
11
  # List all datasets in the open-llm-leaderboard organization
12
- datasets = api.list_datasets(author="open-llm-leaderboard")
13
 
14
  models = []
15
- for dataset in datasets:
16
  if dataset.id.endswith("-details"):
17
  dataset_id = dataset.id
18
  try:
19
  # Check if the dataset can be loaded
 
20
  check_gated = datasets.get_dataset_config_names(dataset_id)
 
21
  # Format: "open-llm-leaderboard/<provider>__<model_name>-details"
22
- model_part = dataset.id.split("/")[-1].replace("-details", "")
23
  if "__" in model_part:
24
  provider, model = model_part.split("__", 1)
25
  models.append(f"{provider}/{model}")
 
9
  api = HfApi()
10
 
11
  # List all datasets in the open-llm-leaderboard organization
12
+ dataset_list = api.list_datasets(author="open-llm-leaderboard")
13
 
14
  models = []
15
+ for dataset in dataset_list:
16
  if dataset.id.endswith("-details"):
17
  dataset_id = dataset.id
18
  try:
19
  # Check if the dataset can be loaded
20
+ print(dataset_id)
21
  check_gated = datasets.get_dataset_config_names(dataset_id)
22
+ print(check_gated)
23
  # Format: "open-llm-leaderboard/<provider>__<model_name>-details"
24
+ model_part = dataset_id.split("/")[-1].replace("-details", "")
25
  if "__" in model_part:
26
  provider, model = model_part.split("__", 1)
27
  models.append(f"{provider}/{model}")
src/similarity.py CHANGED
@@ -1,6 +1,6 @@
1
  import numpy as np
2
 
3
- from lmsim.metrics import Metrics, Kappa_p, EC
4
 
5
  from src.dataloading import load_run_data
6
  from src.utils import softmax, one_hot
@@ -32,9 +32,9 @@ def compute_similarity(metric: Metrics, outputs_a: list[np.array], outputs_b: li
32
  def compute_pairwise_similarities(metric_name: str, probs: list[list[np.array]], gts: list[list[int]]) -> np.array:
33
  # Select chosen metric
34
  if metric_name == "Kappa_p (prob.)":
35
- metric = Kappa_p()
36
  elif metric_name == "Kappa_p (det.)":
37
- metric = Kappa_p(prob=False)
38
  # Convert probabilities to one-hot
39
  probs = [[one_hot(p) for p in model_probs] for model_probs in probs]
40
  elif metric_name == "Error Consistency":
 
1
  import numpy as np
2
 
3
+ from lmsim.metrics import Metrics, CAPA, EC
4
 
5
  from src.dataloading import load_run_data
6
  from src.utils import softmax, one_hot
 
32
  def compute_pairwise_similarities(metric_name: str, probs: list[list[np.array]], gts: list[list[int]]) -> np.array:
33
  # Select chosen metric
34
  if metric_name == "Kappa_p (prob.)":
35
+ metric = CAPA()
36
  elif metric_name == "Kappa_p (det.)":
37
+ metric = CAPA(prob=False)
38
  # Convert probabilities to one-hot
39
  probs = [[one_hot(p) for p in model_probs] for model_probs in probs]
40
  elif metric_name == "Error Consistency":