sps44 commited on
Commit
6f912a5
Β·
1 Parent(s): f7bfdc4

convert to cae dataset

Browse files
Dockerfile CHANGED
@@ -8,12 +8,10 @@ ENV HOME=/code
8
  RUN apt install curl
9
  RUN pip install pip -U
10
 
11
- RUN pip install renumics-spotlight==1.3.0rc8 pyarrow
12
-
13
- RUN pip install datasets cleanvision
14
 
15
  COPY . .
16
  RUN mkdir -p /code/.cache
17
  RUN chmod -R 777 /code
18
- RUN python prepare.py
19
  CMD ["python", "run.py"]
 
8
  RUN apt install curl
9
  RUN pip install pip -U
10
 
11
+ RUN pip install renumics-spotlight==1.3.0
 
 
12
 
13
  COPY . .
14
  RUN mkdir -p /code/.cache
15
  RUN chmod -R 777 /code
16
+ #RUN python prepare.py
17
  CMD ["python", "run.py"]
README.md CHANGED
@@ -1,5 +1,5 @@
1
  ---
2
- title: Explore data slices in the CIFAR-100 benchmark
3
  emoji: πŸ“Š
4
  colorFrom: gray
5
  colorTo: blue
@@ -8,14 +8,13 @@ pinned: false
8
  license: mit
9
  app_file: run.py
10
  datasets:
11
- - renumics/cifar100-enriched
12
- - cifar100
13
  tags:
14
  - renumics
15
  - spotlight
16
- - sliceline
 
17
  - data-centric-ai
18
- duplicated_from: renumics/cifar100-sliceline-demo
19
  ---
20
 
21
  Check out the configuration reference at https://huggingface.co/docs/hub/spaces-config-reference
 
1
  ---
2
+ title: Explore issues in a crash simulation dataset
3
  emoji: πŸ“Š
4
  colorFrom: gray
5
  colorTo: blue
 
8
  license: mit
9
  app_file: run.py
10
  datasets:
 
 
11
  tags:
12
  - renumics
13
  - spotlight
14
+ - simulation
15
+ - CAE
16
  - data-centric-ai
17
+
18
  ---
19
 
20
  Check out the configuration reference at https://huggingface.co/docs/hub/spaces-config-reference
cifar100-enrichment-cv.parquet β†’ converted_cae_dataset.h5 RENAMED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:0bc7e67b598b765cce75309f7c414a13ca9fcc8004f436a0490822c899bfa66c
3
- size 544628
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:1b4c599f0fcd7715b7bc76da66fa1591053693edfad7035cf7050192cac57d79
3
+ size 799730101
sliceline.pkl β†’ issues.pkl RENAMED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:6e55a43568ba74521a1b62c44329c8b8dccf862715a5d067cc68844bc28e52d7
3
- size 8738
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:5072424adc9edb6027ab8cf3eb8cb6677681b38433db5be35677a60c1e789030
3
+ size 428
requirements.txt CHANGED
@@ -1,3 +1 @@
1
  renumics-spotlight
2
- datasets
3
- fastparquet
 
1
  renumics-spotlight
 
 
run.py CHANGED
@@ -5,36 +5,16 @@ import os
5
  import pandas as pd
6
 
7
  if __name__ == "__main__":
8
- cache_file = "dataset_cache.parquet"
9
- cache_file_enrichment="cifar100-enrichment-cv.parquet"
10
- cache_file_issues="sliceline.pkl"
11
-
12
- if os.path.exists(cache_file):
13
- # Load dataset from cache
14
- df = pd.read_parquet(cache_file)
15
- print("Dataset loaded from cache.")
16
- else:
17
- # Load dataset using datasets.load_dataset()
18
- dataset = datasets.load_dataset("renumics/cifar100-enriched", split="test")
19
- print("Dataset loaded using datasets.load_dataset().")
20
-
21
- df = dataset.to_pandas()
22
-
23
- # Save dataset to cache
24
- df.to_parquet(cache_file)
25
-
26
- print("Dataset saved to cache.")
27
-
28
- df_cv=pd.read_parquet(cache_file_enrichment)
29
-
30
- df = pd.concat([df, df_cv], axis=1)
31
 
32
  with open(cache_file_issues, "rb") as issue_file:
33
  issues = pickle.load(issue_file)
34
 
35
  #df = dataset.to_pandas()
36
- df_show = df.drop(columns=['embedding', 'probabilities'])
37
  while True:
38
- view = spotlight.show(df_show, issues=issues, port=7860, host="0.0.0.0", layout="sliceline-layout.json",
39
- dtype={"image": spotlight.Image, "embedding_reduced": spotlight.Embedding}, allow_filebrowsing=False)
40
  view.close()
 
5
  import pandas as pd
6
 
7
  if __name__ == "__main__":
8
+ cache_file = "converted_cae_dataset.h5"
9
+ cache_file_issues="issues.pkl"
10
+
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
11
 
12
  with open(cache_file_issues, "rb") as issue_file:
13
  issues = pickle.load(issue_file)
14
 
15
  #df = dataset.to_pandas()
16
+
17
  while True:
18
+ view = spotlight.show(cache_file, issues=issues, port=7860, host="0.0.0.0", #layout="cae-layout.json",
19
+ allow_filebrowsing=False)
20
  view.close()
sliceline-layout.json DELETED
@@ -1 +0,0 @@
1
- {"orientation":"vertical","children":[{"kind":"split","weight":1,"orientation":"horizontal","children":[{"kind":"tab","weight":1,"children":[{"kind":"widget","name":"Table","type":"table","config":{"tableView":"selected","visibleColumns":null,"sorting":null,"orderByRelevance":false}}]},{"kind":"tab","weight":1,"children":[{"kind":"widget","name":"Similarity Map","type":"similaritymap","config":{"placeBy":null,"reductionMethod":null,"colorBy":"fine_label_prediction_error","sizeBy":null,"filter":false,"umapNNeighbors":20,"umapMetric":null,"umapMinDist":0.15,"pcaNormalization":null,"umapMenuLocalGlobalBalance":null,"umapMenuIsAdvanced":false}}]},{"kind":"tab","weight":1,"children":[{"kind":"widget","name":"Issues","type":"IssuesWidget","config":null}]}]},{"kind":"tab","weight":1,"children":[{"kind":"widget","name":"Inspector","type":"inspector","config":{"views":[{"view":"ArrayLens","key":"12MuLXBrGWyR1UqByhSAGR","name":"embedding_reduced","columns":["embedding_reduced"]},{"view":"ImageView","key":"tuCdyeyADoxB7jtazsiLt2","name":"image","columns":["image"]},{"view":"TextLens","columns":["fine_label_prediction_str"],"name":"view","key":"45099023-3dcc-4f56-baa3-b0447a98a3c6"},{"view":"TextLens","columns":["fine_label_str"],"name":"view","key":"f057b3df-7920-4adb-bf6f-03c260a56b94"}]}}]}]}