Spaces:
Runtime error
Runtime error
import pickle | |
import datasets | |
import os | |
import umap | |
if __name__ == "__main__": | |
cache_file = "dataset_cache.pkl" | |
if os.path.exists(cache_file): | |
# Load dataset from cache | |
with open(cache_file, "rb") as file: | |
dataset = pickle.load(file) | |
print("Dataset loaded from cache.") | |
else: | |
# Load dataset using datasets.load_dataset() | |
ds = datasets.load_dataset("renumics/cifar100-enriched", split="test") | |
print("Dataset loaded using datasets.load_dataset().") | |
df = ds.to_pandas() | |
df = ds.rename_columns({"fine_label": "labels"}).to_pandas() | |
from tabulate import tabulate | |
from cleanlab import Datalab | |
import pandas as pd | |
import numpy as np | |
lab = Datalab(data=ds, label_name="fine_label") | |
features=np.array([x.tolist() for x in df["embedding"]]) | |
pred_probs= np.array([x.tolist() for x in df["probabilities"]]) | |
lab.find_issues(features=features,pred_probs=pred_probs) | |
print(tabulate(lab.get_issues().iloc[[0,1,2,3,-3,-2,-1]], headers='keys', tablefmt='psql')) | |
df_with_score = pd.concat([df, lab.get_issues()], axis=1) | |
df = df_with_score | |
# Save dataset to cache | |
with open(cache_file, "wb") as file: | |
pickle.dump(df, file) | |
print("Dataset saved to cache.") | |