Spaces:

bethgelab
/

lm-similarity

Running

Joschka Strueber commited on 16 days ago

Commit

93d753c

1 Parent(s): 1e010df

[Add, Fix] add loading mechanism for cached models, change error to warning when computing heatmap

Files changed (2) hide show

app.py CHANGED Viewed

@@ -29,7 +29,7 @@ def create_heatmap(selected_models, selected_dataset, selected_metric):
             failed_models.append(selected_models[i])
     if failed_models:
-        raise gr.Error(f"Failed to load data for models: {', '.join(failed_models)}")
     # Create figure and heatmap using seaborn
     plt.figure(figsize=(8, 6))
@@ -94,6 +94,8 @@ links_markdown = """
 [🤗 Data](https://huggingface.co/datasets/bethgelab/lm-similarity)
 """
 # Create Gradio interface
 with gr.Blocks(title="LLM Similarity Analyzer") as demo:
     gr.Markdown("## Model Similarity Comparison Tool")
@@ -101,7 +103,7 @@ with gr.Blocks(title="LLM Similarity Analyzer") as demo:
     with gr.Row():
         dataset_dropdown = gr.Dropdown(
-            choices=get_leaderboard_datasets(None),
             label="Select Dataset",
             value="mmlu_pro",
             filterable=True,
@@ -118,7 +120,7 @@ with gr.Blocks(title="LLM Similarity Analyzer") as demo:
     model_dropdown = gr.Dropdown(
         choices=get_leaderboard_models_cached(),
         label="Select Models",
-        value=["HuggingFaceTB/SmolLM2-1.7B-Instruct", "tiiuae/Falcon3-7B-Instruct", "google/gemma-2-27b-it", "Qwen/Qwen2.5-72B-Instruct"],
         multiselect=True,
         filterable=True,
         allow_custom_value=False,

             failed_models.append(selected_models[i])
     if failed_models:
+        gr.Warning(f"Failed to load data for models: {'\n'.join(failed_models)}")
     # Create figure and heatmap using seaborn
     plt.figure(figsize=(8, 6))
 [🤗 Data](https://huggingface.co/datasets/bethgelab/lm-similarity)
 """
+model_init = ["HuggingFaceTB/SmolLM2-1.7B-Instruct", "tiiuae/Falcon3-7B-Instruct", "google/gemma-2-27b-it", "Qwen/Qwen2.5-72B-Instruct"]
 # Create Gradio interface
 with gr.Blocks(title="LLM Similarity Analyzer") as demo:
     gr.Markdown("## Model Similarity Comparison Tool")
     with gr.Row():
         dataset_dropdown = gr.Dropdown(
+            choices=get_leaderboard_datasets(model_init),
             label="Select Dataset",
             value="mmlu_pro",
             filterable=True,
     model_dropdown = gr.Dropdown(
         choices=get_leaderboard_models_cached(),
         label="Select Models",
+        value=model_init,
         multiselect=True,
         filterable=True,
         allow_custom_value=False,

src/dataloading.py CHANGED Viewed

@@ -8,6 +8,9 @@ from datasets.exceptions import DatasetNotFoundError
 def get_leaderboard_models():
     api = HfApi()
     # List all datasets in the open-llm-leaderboard organization
     dataset_list = api.list_datasets(author="open-llm-leaderboard")
@@ -15,19 +18,23 @@ def get_leaderboard_models():
     models = []
     for dataset in dataset_list:
         if dataset.id.endswith("-details"):
-            dataset_id = dataset.id
-            try:
-                # Check if the dataset can be loaded
-                check_gated = datasets.get_dataset_config_names(dataset_id)
-                # Format: "open-llm-leaderboard/<provider>__<model_name>-details"
-                model_part = dataset_id.split("/")[-1].replace("-details", "")
-                if "__" in model_part:
-                    provider, model = model_part.split("__", 1)
-                    models.append(f"{provider}/{model}")
-                else:
-                    models.append(model_part)
-            except Exception as e:
-                pass
     # Save model list as txt file
     with open("models.txt", "w") as f:

 def get_leaderboard_models():
     api = HfApi()
+    # Load prechecked models
+    ungated_models = set(line.strip() for line in open("models.txt"))
     # List all datasets in the open-llm-leaderboard organization
     dataset_list = api.list_datasets(author="open-llm-leaderboard")
     models = []
     for dataset in dataset_list:
         if dataset.id.endswith("-details"):
+            # Format: "open-llm-leaderboard/<provider>__<model_name>-details"
+            model_part = dataset.id.split("/")[-1].replace("-details", "")
+            if "__" in model_part:
+                provider, model = model_part.split("__", 1)
+                model_name = f"{provider}/{model}"
+            else:
+                model_name = model_part
+            # Only perform the check if dataset_id is not in the ungated_models list.
+            if model_name not in ungated_models:
+                try:
+                    # Check if the dataset can be loaded; if not, skip it.
+                    datasets.get_dataset_config_names(model_name)
+                except Exception as e:
+                    continue  # Skip dataset if an exception occurs
+            models.append(model_name)
     # Save model list as txt file
     with open("models.txt", "w") as f: