lhoestq HF staff commited on
Commit
ec912e5
Β·
1 Parent(s): 2eb23f1
Files changed (3) hide show
  1. __pycache__/app.cpython-39.pyc +0 -0
  2. app.py +42 -0
  3. requirements.txt +2 -0
__pycache__/app.cpython-39.pyc ADDED
Binary file (1.79 kB). View file
 
app.py ADDED
@@ -0,0 +1,42 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import gradio as gr
2
+ import pyarrow.parquet as pq
3
+ from gradio_huggingfacehub_search import HuggingfaceHubSearch
4
+ from huggingface_hub import HfFileSystem
5
+
6
+ fs = HfFileSystem()
7
+
8
+ with gr.Blocks() as demo:
9
+ with gr.Column():
10
+ dataset_search = HuggingfaceHubSearch(
11
+ label="Hub Dataset ID",
12
+ placeholder="Search for dataset id on Huggingface",
13
+ search_type="dataset",
14
+ )
15
+ revision_textbox = gr.Textbox("main")
16
+ parquet_file_dropdown = gr.Dropdown()
17
+ with gr.Column():
18
+ output_dataframe = gr.DataFrame()
19
+
20
+ def _show_input_preview(dataset, revision, parquet_file):
21
+ yield {revision_textbox: revision}
22
+ if isinstance(parquet_file, int):
23
+ parquet_files = fs.glob(f"datasets/{dataset}@{revision}/**/*.parquet")
24
+ parquet_file = parquet_files[parquet_file]
25
+ yield {parquet_file_dropdown: gr.Dropdown(choices=parquet_files, value=parquet_file)}
26
+ else:
27
+ yield {parquet_file_dropdown: gr.Dropdown(value=parquet_file)}
28
+ yield {output_dataframe: pq.ParquetFile(parquet_file, filesystem=fs).read_row_group(0).to_pandas()}
29
+
30
+ @dataset_search.change(inputs=[dataset_search], outputs=[revision_textbox, parquet_file_dropdown, output_dataframe])
31
+ def show_input_from_dataset_search(dataset):
32
+ yield from _show_input_preview(dataset, revision="main", parquet_file=0)
33
+
34
+ @revision_textbox.change(inputs=[dataset_search, revision_textbox], outputs=[revision_textbox, parquet_file_dropdown, output_dataframe])
35
+ def show_input_from_revision(dataset, revision):
36
+ yield from _show_input_preview(dataset, revision=revision, parquet_file=0)
37
+
38
+ @revision_textbox.change(inputs=[dataset_search, revision_textbox, parquet_file_dropdown], outputs=[revision_textbox, parquet_file_dropdown, output_dataframe])
39
+ def show_input_from_parquet_file(dataset, revision, parquet_file):
40
+ yield from _show_input_preview(dataset, revision=revision, parquet_file=parquet_file)
41
+
42
+ demo.launch()
requirements.txt ADDED
@@ -0,0 +1,2 @@
 
 
 
1
+ pyarrow
2
+ gradio_huggingfacehub_search