Spaces:
Runtime error
Runtime error
import pickle | |
from renumics import spotlight | |
import os | |
import requests | |
import pandas as pd | |
from renumics import spotlight | |
from renumics.spotlight.analysis import DataIssue | |
if __name__ == "__main__": | |
cache_file = "dataset_cache.pkl" | |
if os.path.exists(cache_file): | |
# Load dataset from cache | |
with open(cache_file, "rb") as file: | |
df = pickle.load(file) | |
print("Dataset loaded from cache.") | |
label_issue_rows = df[df["is_label_issue"]].sort_values("label_score").index.tolist() | |
label_issue = DataIssue( | |
severity="medium", | |
title="label-issue", | |
rows=label_issue_rows, | |
description="Label issue found by cleanlab - Review and correct if necessary", | |
) | |
outlier_issue_row = ( | |
df[df["outlier_score"] < 0.6].sort_values("outlier_score").index.tolist() | |
) | |
outlier_issue = DataIssue( | |
severity="medium", | |
title="outlier-issue", | |
rows=outlier_issue_row, | |
description="Outlier score < 0.6 - Review and remove or collect more data", | |
) | |
near_duplicate_issue_row = ( | |
df[df["is_near_duplicate_issue"]].sort_values("near_duplicate_score").index.tolist() | |
) | |
near_duplicate_issue = DataIssue( | |
severity="medium", | |
title="near-duplicate-issue", | |
rows=near_duplicate_issue_row, | |
description="Near duplicate issue found by cleanlab - Review and remove if necessary", | |
) | |
while True: | |
dtypes = { | |
"image": spotlight.Image, | |
"image_full": spotlight.Image, | |
"embedding": spotlight.Embedding, | |
"embedding_reduced": spotlight.Embedding, | |
"probabilities": spotlight.Embedding, | |
} | |
view = spotlight.show( | |
df.rename(columns={"fine_label_str": "label", "fine_label_prediction_str":"pred"}), | |
dtype=dtypes, | |
issues=[label_issue,outlier_issue,near_duplicate_issue], | |
layout="layout.json", | |
port=7860, | |
host="0.0.0.0", | |
allow_filebrowsing=False, | |
) | |
view.close() | |
else: | |
print(f"Dataset {cache_file} not found. Please run prepare.py first.") | |