Upload 9 files
Browse files- .gitattributes +1 -0
- .replit +39 -0
- README.md +1 -14
- app.py +200 -0
- generated-icon.png +3 -0
- pyproject.toml +17 -0
- replit.nix +23 -0
- security_scanner.py +74 -0
- utils.py +300 -0
- uv.lock +0 -0
.gitattributes
CHANGED
@@ -33,3 +33,4 @@ saved_model/**/* filter=lfs diff=lfs merge=lfs -text
|
|
33 |
*.zip filter=lfs diff=lfs merge=lfs -text
|
34 |
*.zst filter=lfs diff=lfs merge=lfs -text
|
35 |
*tfevents* filter=lfs diff=lfs merge=lfs -text
|
|
|
|
33 |
*.zip filter=lfs diff=lfs merge=lfs -text
|
34 |
*.zst filter=lfs diff=lfs merge=lfs -text
|
35 |
*tfevents* filter=lfs diff=lfs merge=lfs -text
|
36 |
+
generated-icon.png filter=lfs diff=lfs merge=lfs -text
|
.replit
ADDED
@@ -0,0 +1,39 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
modules = ["python-3.11"]
|
2 |
+
|
3 |
+
[nix]
|
4 |
+
channel = "stable-24_05"
|
5 |
+
|
6 |
+
[deployment]
|
7 |
+
deploymentTarget = "autoscale"
|
8 |
+
run = ["sh", "-c", "streamlit run app.py"]
|
9 |
+
|
10 |
+
[workflows]
|
11 |
+
runButton = "Project"
|
12 |
+
|
13 |
+
[[workflows.workflow]]
|
14 |
+
name = "Project"
|
15 |
+
mode = "parallel"
|
16 |
+
author = "agent"
|
17 |
+
|
18 |
+
[[workflows.workflow.tasks]]
|
19 |
+
task = "workflow.run"
|
20 |
+
args = "Streamlit Server"
|
21 |
+
|
22 |
+
[[workflows.workflow]]
|
23 |
+
name = "Streamlit Server"
|
24 |
+
author = "agent"
|
25 |
+
|
26 |
+
[workflows.workflow.metadata]
|
27 |
+
agentRequireRestartOnSave = false
|
28 |
+
|
29 |
+
[[workflows.workflow.tasks]]
|
30 |
+
task = "packager.installForAll"
|
31 |
+
|
32 |
+
[[workflows.workflow.tasks]]
|
33 |
+
task = "shell.exec"
|
34 |
+
args = "streamlit run app.py"
|
35 |
+
waitForPort = 5000
|
36 |
+
|
37 |
+
[[ports]]
|
38 |
+
localPort = 5000
|
39 |
+
externalPort = 80
|
README.md
CHANGED
@@ -1,16 +1,3 @@
|
|
1 |
-
---
|
2 |
-
title: PythonScriptShowcase
|
3 |
-
emoji: ⚡
|
4 |
-
colorFrom: blue
|
5 |
-
colorTo: yellow
|
6 |
-
sdk: streamlit
|
7 |
-
sdk_version: 1.42.2
|
8 |
-
app_file: app.py
|
9 |
-
pinned: true
|
10 |
-
license: mit
|
11 |
-
short_description: Python scripts and Hugging Face datasets
|
12 |
-
---
|
13 |
-
|
14 |
# Python & HuggingFace Explorer
|
15 |
|
16 |
A Streamlit-based demonstration platform for showcasing Python scripts and Hugging Face datasets with interactive visualization.
|
@@ -72,4 +59,4 @@ The application uses a custom styling inspired by Hugging Face:
|
|
72 |
|
73 |
## License
|
74 |
|
75 |
-
This project is open source and available under the MIT License.
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
# Python & HuggingFace Explorer
|
2 |
|
3 |
A Streamlit-based demonstration platform for showcasing Python scripts and Hugging Face datasets with interactive visualization.
|
|
|
59 |
|
60 |
## License
|
61 |
|
62 |
+
This project is open source and available under the MIT License.
|
app.py
ADDED
@@ -0,0 +1,200 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
import streamlit as st
|
2 |
+
from components.code_editor import render_code_editor
|
3 |
+
from components.dataset_explorer import render_dataset_explorer
|
4 |
+
from components.visualization import render_visualization
|
5 |
+
from components.model_metrics import render_model_metrics
|
6 |
+
import os
|
7 |
+
import sys
|
8 |
+
import time
|
9 |
+
from utils import load_css, create_logo
|
10 |
+
|
11 |
+
# Page configuration
|
12 |
+
st.set_page_config(
|
13 |
+
page_title="Python & HuggingFace Explorer",
|
14 |
+
page_icon="🤗",
|
15 |
+
layout="wide",
|
16 |
+
initial_sidebar_state="expanded"
|
17 |
+
)
|
18 |
+
|
19 |
+
# Load custom CSS
|
20 |
+
load_css()
|
21 |
+
|
22 |
+
# Main content
|
23 |
+
def main():
|
24 |
+
# Create sidebar
|
25 |
+
with st.sidebar:
|
26 |
+
create_logo()
|
27 |
+
st.title("Navigation")
|
28 |
+
page = st.radio(
|
29 |
+
"Select a page:",
|
30 |
+
["Home", "Code Editor", "Dataset Explorer", "Visualizations", "Model Metrics"]
|
31 |
+
)
|
32 |
+
|
33 |
+
# HF Dataset search
|
34 |
+
st.sidebar.markdown("---")
|
35 |
+
st.sidebar.subheader("Dataset Quick Search")
|
36 |
+
dataset_name = st.sidebar.text_input("Enter a HuggingFace dataset name")
|
37 |
+
if dataset_name and st.sidebar.button("Load Dataset"):
|
38 |
+
st.session_state.dataset_name = dataset_name
|
39 |
+
if page != "Dataset Explorer":
|
40 |
+
st.sidebar.info("Dataset loaded! Go to Dataset Explorer to view it.")
|
41 |
+
|
42 |
+
st.sidebar.markdown("---")
|
43 |
+
st.sidebar.markdown("""
|
44 |
+
<div style="font-size: 0.8em; color: #666; text-align: center;">
|
45 |
+
<p>Built with ❤️ using</p>
|
46 |
+
<p>Streamlit & HuggingFace</p>
|
47 |
+
<p style="font-size: 0.9em; margin-top: 5px;">© 2025 Python Explorer</p>
|
48 |
+
</div>
|
49 |
+
""", unsafe_allow_html=True)
|
50 |
+
|
51 |
+
# Initialize session state for dataset
|
52 |
+
if 'dataset_name' not in st.session_state:
|
53 |
+
st.session_state.dataset_name = None
|
54 |
+
|
55 |
+
if 'code_content' not in st.session_state:
|
56 |
+
st.session_state.code_content = """# Sample Python code
|
57 |
+
from datasets import load_dataset
|
58 |
+
import pandas as pd
|
59 |
+
import matplotlib.pyplot as plt
|
60 |
+
|
61 |
+
# Load a dataset from Hugging Face
|
62 |
+
dataset = load_dataset("glue", "sst2", split="train")
|
63 |
+
df = pd.DataFrame(dataset)
|
64 |
+
|
65 |
+
# Display the first few rows
|
66 |
+
print(df.head())
|
67 |
+
|
68 |
+
# Simple analysis
|
69 |
+
print(f"Number of examples: {len(df)}")
|
70 |
+
print(f"Columns: {df.columns}")
|
71 |
+
|
72 |
+
# Visualize class distribution
|
73 |
+
plt.figure(figsize=(8, 5))
|
74 |
+
df['label'].value_counts().plot(kind='bar')
|
75 |
+
plt.title('Class Distribution')
|
76 |
+
plt.xlabel('Class')
|
77 |
+
plt.ylabel('Count')
|
78 |
+
plt.tight_layout()
|
79 |
+
plt.show()
|
80 |
+
"""
|
81 |
+
|
82 |
+
# Page content
|
83 |
+
if page == "Home":
|
84 |
+
render_home()
|
85 |
+
elif page == "Code Editor":
|
86 |
+
render_code_editor()
|
87 |
+
elif page == "Dataset Explorer":
|
88 |
+
render_dataset_explorer()
|
89 |
+
elif page == "Visualizations":
|
90 |
+
render_visualization()
|
91 |
+
elif page == "Model Metrics":
|
92 |
+
render_model_metrics()
|
93 |
+
|
94 |
+
def render_home():
|
95 |
+
# Display header image instead of using a title
|
96 |
+
from PIL import Image
|
97 |
+
import os
|
98 |
+
|
99 |
+
# Path to the logo image in the center of the page
|
100 |
+
center_logo_path = "assets/python_huggingface_logo.png"
|
101 |
+
|
102 |
+
# Check if the logo exists and display it
|
103 |
+
if os.path.exists(center_logo_path):
|
104 |
+
center_col1, center_col2, center_col3 = st.columns([1, 2, 1])
|
105 |
+
with center_col2:
|
106 |
+
image = Image.open(center_logo_path)
|
107 |
+
# Resize image to 25% of original dimensions
|
108 |
+
width, height = image.size
|
109 |
+
resized_image = image.resize((width//4, height//4))
|
110 |
+
st.image(resized_image, use_container_width=True)
|
111 |
+
else:
|
112 |
+
st.title("Python & HuggingFace Explorer")
|
113 |
+
|
114 |
+
# Introduction with improved styling
|
115 |
+
st.markdown("""
|
116 |
+
<div style="background-color: #FFFFFF; padding: 20px; border-radius: 10px; margin-bottom: 20px; box-shadow: 0 4px 6px rgba(0, 0, 0, 0.1);">
|
117 |
+
<h2 style="color: #2196F3; text-align: center;">Welcome to the Explorer!</h2>
|
118 |
+
<p style="font-size: 1.1em; line-height: 1.6;">This interactive platform brings together the power of Python and the HuggingFace ecosystem.
|
119 |
+
Write and execute code, explore datasets from the HuggingFace Hub, create beautiful visualizations,
|
120 |
+
and analyze model performance metrics - all in one seamless environment.</p>
|
121 |
+
</div>
|
122 |
+
""", unsafe_allow_html=True)
|
123 |
+
|
124 |
+
# Feature cards
|
125 |
+
col1, col2 = st.columns(2)
|
126 |
+
|
127 |
+
with col1:
|
128 |
+
st.markdown("""
|
129 |
+
<div style="background-color: #FFFFFF; padding: 20px; border-radius: 10px; height: 200px;">
|
130 |
+
<h3 style="color: #2196F3;">💻 Code Editor</h3>
|
131 |
+
<p>Write, edit, and execute Python code with syntax highlighting. See your results instantly and experiment with different scripts.</p>
|
132 |
+
<p>Features include:</p>
|
133 |
+
<ul>
|
134 |
+
<li>Syntax highlighting</li>
|
135 |
+
<li>Code execution</li>
|
136 |
+
<li>Output display</li>
|
137 |
+
</ul>
|
138 |
+
</div>
|
139 |
+
""", unsafe_allow_html=True)
|
140 |
+
|
141 |
+
st.markdown("""
|
142 |
+
<div style="background-color: #FFFFFF; padding: 20px; border-radius: 10px; margin-top: 20px; height: 200px;">
|
143 |
+
<h3 style="color: #2196F3;">📊 Visualizations</h3>
|
144 |
+
<p>Create and customize visualizations from your datasets. Explore data through charts, graphs, and interactive plots.</p>
|
145 |
+
<p>Visualization types:</p>
|
146 |
+
<ul>
|
147 |
+
<li>Bar charts & histograms</li>
|
148 |
+
<li>Scatter plots</li>
|
149 |
+
<li>Line charts</li>
|
150 |
+
<li>And more!</li>
|
151 |
+
</ul>
|
152 |
+
</div>
|
153 |
+
""", unsafe_allow_html=True)
|
154 |
+
|
155 |
+
with col2:
|
156 |
+
st.markdown("""
|
157 |
+
<div style="background-color: #FFFFFF; padding: 20px; border-radius: 10px; height: 200px;">
|
158 |
+
<h3 style="color: #2196F3;">🗃️ Dataset Explorer</h3>
|
159 |
+
<p>Browse and analyze datasets from the HuggingFace Hub. Filter, sort, and examine data with ease.</p>
|
160 |
+
<p>Explorer features:</p>
|
161 |
+
<ul>
|
162 |
+
<li>Dataset previews</li>
|
163 |
+
<li>Basic statistics</li>
|
164 |
+
<li>Filtering options</li>
|
165 |
+
<li>Data exports</li>
|
166 |
+
</ul>
|
167 |
+
</div>
|
168 |
+
""", unsafe_allow_html=True)
|
169 |
+
|
170 |
+
st.markdown("""
|
171 |
+
<div style="background-color: #FFFFFF; padding: 20px; border-radius: 10px; margin-top: 20px; height: 200px;">
|
172 |
+
<h3 style="color: #2196F3;">📈 Model Metrics</h3>
|
173 |
+
<p>Analyze model performance with detailed metrics and comparisons. Understand how your models perform on different datasets.</p>
|
174 |
+
<p>Metrics available:</p>
|
175 |
+
<ul>
|
176 |
+
<li>Accuracy, precision, recall</li>
|
177 |
+
<li>Confusion matrices</li>
|
178 |
+
<li>Performance comparisons</li>
|
179 |
+
<li>Custom metric calculations</li>
|
180 |
+
</ul>
|
181 |
+
</div>
|
182 |
+
""", unsafe_allow_html=True)
|
183 |
+
|
184 |
+
# Getting started section
|
185 |
+
st.markdown("""
|
186 |
+
<div style="background-color: #FFFFFF; padding: 20px; border-radius: 10px; margin-top: 20px;">
|
187 |
+
<h3 style="color: #2196F3;">Getting Started</h3>
|
188 |
+
<p>To begin exploring, select a page from the sidebar navigation. You can:</p>
|
189 |
+
<ol>
|
190 |
+
<li>Write and test Python code in the <b>Code Editor</b></li>
|
191 |
+
<li>Search for and explore datasets in the <b>Dataset Explorer</b></li>
|
192 |
+
<li>Create visualizations in the <b>Visualizations</b> section</li>
|
193 |
+
<li>Analyze model performance in the <b>Model Metrics</b> page</li>
|
194 |
+
</ol>
|
195 |
+
<p>Ready to dive in? Select a page from the sidebar to get started!</p>
|
196 |
+
</div>
|
197 |
+
""", unsafe_allow_html=True)
|
198 |
+
|
199 |
+
if __name__ == "__main__":
|
200 |
+
main()
|
generated-icon.png
ADDED
![]() |
Git LFS Details
|
pyproject.toml
ADDED
@@ -0,0 +1,17 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
[project]
|
2 |
+
name = "repl-nix-workspace"
|
3 |
+
version = "0.1.0"
|
4 |
+
description = "Add your description here"
|
5 |
+
requires-python = ">=3.11"
|
6 |
+
dependencies = [
|
7 |
+
"datasets>=3.3.2",
|
8 |
+
"matplotlib>=3.10.1",
|
9 |
+
"numpy>=2.2.3",
|
10 |
+
"pandas>=2.2.3",
|
11 |
+
"pillow>=11.1.0",
|
12 |
+
"plotly>=6.0.0",
|
13 |
+
"scikit-learn>=1.6.1",
|
14 |
+
"seaborn>=0.13.2",
|
15 |
+
"streamlit>=1.42.2",
|
16 |
+
"transformers>=4.49.0",
|
17 |
+
]
|
replit.nix
ADDED
@@ -0,0 +1,23 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
{pkgs}: {
|
2 |
+
deps = [
|
3 |
+
pkgs.zlib
|
4 |
+
pkgs.openjpeg
|
5 |
+
pkgs.libxcrypt
|
6 |
+
pkgs.libwebp
|
7 |
+
pkgs.libtiff
|
8 |
+
pkgs.libjpeg
|
9 |
+
pkgs.libimagequant
|
10 |
+
pkgs.lcms2
|
11 |
+
pkgs.tk
|
12 |
+
pkgs.tcl
|
13 |
+
pkgs.qhull
|
14 |
+
pkgs.pkg-config
|
15 |
+
pkgs.gtk3
|
16 |
+
pkgs.gobject-introspection
|
17 |
+
pkgs.ghostscript
|
18 |
+
pkgs.freetype
|
19 |
+
pkgs.ffmpeg-full
|
20 |
+
pkgs.cairo
|
21 |
+
pkgs.glibcLocales
|
22 |
+
];
|
23 |
+
}
|
security_scanner.py
ADDED
@@ -0,0 +1,74 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
|
2 |
+
import requests
|
3 |
+
import os
|
4 |
+
import json
|
5 |
+
from typing import Dict, Any, Optional
|
6 |
+
|
7 |
+
def scan_code_for_security(
|
8 |
+
code: str,
|
9 |
+
api_key: Optional[str] = None
|
10 |
+
) -> Dict[str, Any]:
|
11 |
+
"""
|
12 |
+
Scan code for security vulnerabilities using the CodePal Security Scanner API.
|
13 |
+
|
14 |
+
Args:
|
15 |
+
code: The code to scan as a string
|
16 |
+
api_key: Your CodePal API key (falls back to environment variable)
|
17 |
+
|
18 |
+
Returns:
|
19 |
+
Dict containing the API response
|
20 |
+
|
21 |
+
Raises:
|
22 |
+
ValueError: If API key is not provided
|
23 |
+
requests.RequestException: If the API request fails
|
24 |
+
"""
|
25 |
+
# Get API key from parameter or environment
|
26 |
+
api_key = api_key or os.environ.get('CODEPAL_API_KEY')
|
27 |
+
|
28 |
+
if not api_key:
|
29 |
+
raise ValueError(
|
30 |
+
"API key is required. Either pass it as a parameter or set "
|
31 |
+
"the CODEPAL_API_KEY environment variable."
|
32 |
+
)
|
33 |
+
|
34 |
+
# API endpoint and headers
|
35 |
+
url = "https://api.codepal.ai/v1/security-code-scanner/query"
|
36 |
+
headers = {
|
37 |
+
"Authorization": f"Bearer {api_key}"
|
38 |
+
}
|
39 |
+
|
40 |
+
# Create multipart form data
|
41 |
+
files = {
|
42 |
+
'code': (None, code)
|
43 |
+
}
|
44 |
+
|
45 |
+
try:
|
46 |
+
# Make the API request
|
47 |
+
response = requests.post(url, headers=headers, files=files)
|
48 |
+
response.raise_for_status() # Raise exception for non-2xx status codes
|
49 |
+
|
50 |
+
return response.json()
|
51 |
+
except requests.RequestException as e:
|
52 |
+
print(f"Error scanning code: {e}")
|
53 |
+
if response and hasattr(response, 'text'):
|
54 |
+
print(f"Response content: {response.text}")
|
55 |
+
raise
|
56 |
+
|
57 |
+
if __name__ == "__main__":
|
58 |
+
# Example usage
|
59 |
+
sample_code = """
|
60 |
+
import os
|
61 |
+
|
62 |
+
def run_command(user_input):
|
63 |
+
os.system(user_input)
|
64 |
+
|
65 |
+
run_command("ls")
|
66 |
+
"""
|
67 |
+
|
68 |
+
# For testing, replace this with your actual API key
|
69 |
+
# or set the CODEPAL_API_KEY environment variable
|
70 |
+
try:
|
71 |
+
result = scan_code_for_security(sample_code)
|
72 |
+
print(json.dumps(result, indent=2))
|
73 |
+
except Exception as e:
|
74 |
+
print(f"Failed to scan code: {e}")
|
utils.py
ADDED
@@ -0,0 +1,300 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
import streamlit as st
|
2 |
+
import pandas as pd
|
3 |
+
import os
|
4 |
+
import base64
|
5 |
+
from pathlib import Path
|
6 |
+
import matplotlib.pyplot as plt
|
7 |
+
import seaborn as sns
|
8 |
+
import numpy as np
|
9 |
+
from datasets import load_dataset
|
10 |
+
|
11 |
+
def load_css():
|
12 |
+
"""Load custom CSS"""
|
13 |
+
with open('styles/custom.css') as f:
|
14 |
+
st.markdown(f'<style>{f.read()}</style>', unsafe_allow_html=True)
|
15 |
+
|
16 |
+
def create_logo():
|
17 |
+
"""Create and display the logo"""
|
18 |
+
from PIL import Image
|
19 |
+
import os
|
20 |
+
|
21 |
+
# Path to the logo image
|
22 |
+
logo_path = "assets/python_huggingface_logo.png"
|
23 |
+
|
24 |
+
# Check if the logo exists
|
25 |
+
if os.path.exists(logo_path):
|
26 |
+
# Display the logo image
|
27 |
+
image = Image.open(logo_path)
|
28 |
+
st.image(image, width=200)
|
29 |
+
else:
|
30 |
+
# Fallback to text if image is not found
|
31 |
+
st.markdown(
|
32 |
+
"""
|
33 |
+
<div style="display: flex; justify-content: center; margin-bottom: 20px;">
|
34 |
+
<h2 style="color: #2196F3;">Python & HuggingFace Explorer</h2>
|
35 |
+
</div>
|
36 |
+
""",
|
37 |
+
unsafe_allow_html=True
|
38 |
+
)
|
39 |
+
|
40 |
+
def get_dataset_info(dataset_name):
|
41 |
+
"""Get basic information about a HuggingFace dataset"""
|
42 |
+
if not dataset_name or not isinstance(dataset_name, str):
|
43 |
+
st.error("Invalid dataset name")
|
44 |
+
return None, None
|
45 |
+
|
46 |
+
try:
|
47 |
+
# Attempt to load the dataset with default configuration
|
48 |
+
st.info(f"Loading dataset: {dataset_name}...")
|
49 |
+
|
50 |
+
try:
|
51 |
+
# First try to load the dataset with streaming=False for better compatibility
|
52 |
+
dataset = load_dataset(dataset_name, streaming=False)
|
53 |
+
# Get the first split
|
54 |
+
first_split = next(iter(dataset.keys()))
|
55 |
+
data = dataset[first_split]
|
56 |
+
except Exception as e:
|
57 |
+
st.warning(f"Couldn't load dataset with default configuration: {str(e)}. Trying specific splits...")
|
58 |
+
# If that fails, try loading with specific splits
|
59 |
+
for split_name in ["train", "test", "validation"]:
|
60 |
+
try:
|
61 |
+
st.info(f"Trying to load '{split_name}' split...")
|
62 |
+
data = load_dataset(dataset_name, split=split_name, streaming=False)
|
63 |
+
break
|
64 |
+
except Exception as split_error:
|
65 |
+
if split_name == "validation": # Last attempt
|
66 |
+
st.error(f"Failed to load dataset with any standard split: {str(split_error)}")
|
67 |
+
return None, None
|
68 |
+
continue
|
69 |
+
|
70 |
+
# Get basic info
|
71 |
+
info = {
|
72 |
+
"Dataset": dataset_name,
|
73 |
+
"Number of examples": len(data),
|
74 |
+
"Features": list(data.features.keys()),
|
75 |
+
"Sample": data[0] if len(data) > 0 else None
|
76 |
+
}
|
77 |
+
|
78 |
+
st.success(f"Successfully loaded dataset with {info['Number of examples']} examples")
|
79 |
+
return info, data
|
80 |
+
except Exception as e:
|
81 |
+
st.error(f"Error loading dataset: {str(e)}")
|
82 |
+
if "Connection error" in str(e) or "timeout" in str(e).lower():
|
83 |
+
st.warning("Network issue detected. Please check your internet connection and try again.")
|
84 |
+
elif "not found" in str(e).lower():
|
85 |
+
st.warning(f"Dataset '{dataset_name}' not found. Please check the dataset name and try again.")
|
86 |
+
return None, None
|
87 |
+
|
88 |
+
def run_code(code):
|
89 |
+
"""Run Python code and capture output"""
|
90 |
+
import io
|
91 |
+
import sys
|
92 |
+
import time
|
93 |
+
from contextlib import redirect_stdout, redirect_stderr
|
94 |
+
|
95 |
+
# Create StringIO objects to capture stdout and stderr
|
96 |
+
stdout_capture = io.StringIO()
|
97 |
+
stderr_capture = io.StringIO()
|
98 |
+
|
99 |
+
# Dictionary for storing results
|
100 |
+
results = {
|
101 |
+
"output": "",
|
102 |
+
"error": "",
|
103 |
+
"figures": []
|
104 |
+
}
|
105 |
+
|
106 |
+
# Safety check - limit code size
|
107 |
+
if len(code) > 100000:
|
108 |
+
results["error"] = "Code submission too large. Please reduce the size."
|
109 |
+
return results
|
110 |
+
|
111 |
+
# Basic security check - this is not comprehensive
|
112 |
+
dangerous_imports = ['os.system', 'subprocess', 'eval(', 'shutil.rmtree', 'open(', 'with open']
|
113 |
+
for dangerous_import in dangerous_imports:
|
114 |
+
if dangerous_import in code:
|
115 |
+
results["error"] = f"Potential security risk: {dangerous_import} is not allowed."
|
116 |
+
return results
|
117 |
+
|
118 |
+
# Capture current figures to avoid including existing ones
|
119 |
+
initial_figs = plt.get_fignums()
|
120 |
+
|
121 |
+
# Set execution timeout
|
122 |
+
MAX_EXECUTION_TIME = 30 # seconds
|
123 |
+
start_time = time.time()
|
124 |
+
|
125 |
+
try:
|
126 |
+
# Create a restricted globals dictionary
|
127 |
+
safe_globals = {
|
128 |
+
'plt': plt,
|
129 |
+
'pd': pd,
|
130 |
+
'np': np,
|
131 |
+
'sns': sns,
|
132 |
+
'print': print,
|
133 |
+
'__builtins__': __builtins__,
|
134 |
+
}
|
135 |
+
|
136 |
+
# Add common data science libraries
|
137 |
+
for module_name in ['datasets', 'transformers', 'sklearn', 'math']:
|
138 |
+
try:
|
139 |
+
module = __import__(module_name)
|
140 |
+
safe_globals[module_name] = module
|
141 |
+
except ImportError:
|
142 |
+
pass # Module not available
|
143 |
+
|
144 |
+
# Redirect stdout and stderr
|
145 |
+
with redirect_stdout(stdout_capture), redirect_stderr(stderr_capture):
|
146 |
+
# Execute the code with timeout check
|
147 |
+
exec(code, safe_globals)
|
148 |
+
|
149 |
+
if time.time() - start_time > MAX_EXECUTION_TIME:
|
150 |
+
raise TimeoutError("Code execution exceeded maximum allowed time.")
|
151 |
+
|
152 |
+
# Get the captured output
|
153 |
+
results["output"] = stdout_capture.getvalue()
|
154 |
+
|
155 |
+
# Also capture stderr
|
156 |
+
stderr_output = stderr_capture.getvalue()
|
157 |
+
if stderr_output:
|
158 |
+
if results["output"]:
|
159 |
+
results["output"] += "\n\n--- Warnings/Errors ---\n" + stderr_output
|
160 |
+
else:
|
161 |
+
results["output"] = "--- Warnings/Errors ---\n" + stderr_output
|
162 |
+
|
163 |
+
# Capture any figures that were created
|
164 |
+
final_figs = plt.get_fignums()
|
165 |
+
new_figs = set(final_figs) - set(initial_figs)
|
166 |
+
|
167 |
+
for fig_num in new_figs:
|
168 |
+
fig = plt.figure(fig_num)
|
169 |
+
results["figures"].append(fig)
|
170 |
+
|
171 |
+
except Exception as e:
|
172 |
+
# Capture the error
|
173 |
+
results["error"] = f"{type(e).__name__}: {str(e)}"
|
174 |
+
|
175 |
+
return results
|
176 |
+
|
177 |
+
def get_dataset_preview(data, max_rows=10):
|
178 |
+
"""Convert a HuggingFace dataset to a pandas DataFrame for preview"""
|
179 |
+
try:
|
180 |
+
# Convert to pandas DataFrame
|
181 |
+
df = pd.DataFrame(data[:max_rows])
|
182 |
+
return df
|
183 |
+
except Exception as e:
|
184 |
+
st.error(f"Error converting dataset to DataFrame: {str(e)}")
|
185 |
+
return None
|
186 |
+
|
187 |
+
def generate_basic_stats(data):
|
188 |
+
"""Generate basic statistics for a dataset"""
|
189 |
+
try:
|
190 |
+
# Convert to pandas DataFrame
|
191 |
+
df = pd.DataFrame(data)
|
192 |
+
|
193 |
+
# Get column types
|
194 |
+
column_types = df.dtypes
|
195 |
+
|
196 |
+
# Initialize stats dictionary
|
197 |
+
stats = {}
|
198 |
+
|
199 |
+
for col in df.columns:
|
200 |
+
col_stats = {}
|
201 |
+
|
202 |
+
# Check if column is numeric
|
203 |
+
if pd.api.types.is_numeric_dtype(df[col]):
|
204 |
+
col_stats["mean"] = df[col].mean()
|
205 |
+
col_stats["median"] = df[col].median()
|
206 |
+
col_stats["std"] = df[col].std()
|
207 |
+
col_stats["min"] = df[col].min()
|
208 |
+
col_stats["max"] = df[col].max()
|
209 |
+
col_stats["missing"] = df[col].isna().sum()
|
210 |
+
# Check if column is string/object
|
211 |
+
elif pd.api.types.is_string_dtype(df[col]) or pd.api.types.is_object_dtype(df[col]):
|
212 |
+
col_stats["unique_values"] = df[col].nunique()
|
213 |
+
col_stats["most_common"] = df[col].value_counts().head(5).to_dict() if df[col].nunique() < 100 else "Too many unique values"
|
214 |
+
col_stats["missing"] = df[col].isna().sum()
|
215 |
+
|
216 |
+
stats[col] = col_stats
|
217 |
+
|
218 |
+
return stats
|
219 |
+
except Exception as e:
|
220 |
+
st.error(f"Error generating statistics: {str(e)}")
|
221 |
+
return None
|
222 |
+
|
223 |
+
def create_visualization(data, viz_type, x_col=None, y_col=None, hue_col=None):
|
224 |
+
"""Create a visualization based on the selected type and columns"""
|
225 |
+
try:
|
226 |
+
df = pd.DataFrame(data)
|
227 |
+
|
228 |
+
fig, ax = plt.subplots(figsize=(10, 6))
|
229 |
+
|
230 |
+
if viz_type == "Bar Chart":
|
231 |
+
if x_col and y_col:
|
232 |
+
sns.barplot(x=x_col, y=y_col, hue=hue_col, data=df, ax=ax)
|
233 |
+
else:
|
234 |
+
st.warning("Bar charts require both X and Y columns.")
|
235 |
+
return None
|
236 |
+
|
237 |
+
elif viz_type == "Line Chart":
|
238 |
+
if x_col and y_col:
|
239 |
+
sns.lineplot(x=x_col, y=y_col, hue=hue_col, data=df, ax=ax)
|
240 |
+
else:
|
241 |
+
st.warning("Line charts require both X and Y columns.")
|
242 |
+
return None
|
243 |
+
|
244 |
+
elif viz_type == "Scatter Plot":
|
245 |
+
if x_col and y_col:
|
246 |
+
sns.scatterplot(x=x_col, y=y_col, hue=hue_col, data=df, ax=ax)
|
247 |
+
else:
|
248 |
+
st.warning("Scatter plots require both X and Y columns.")
|
249 |
+
return None
|
250 |
+
|
251 |
+
elif viz_type == "Histogram":
|
252 |
+
if x_col:
|
253 |
+
sns.histplot(df[x_col], ax=ax)
|
254 |
+
else:
|
255 |
+
st.warning("Histograms require an X column.")
|
256 |
+
return None
|
257 |
+
|
258 |
+
elif viz_type == "Box Plot":
|
259 |
+
if x_col and y_col:
|
260 |
+
sns.boxplot(x=x_col, y=y_col, hue=hue_col, data=df, ax=ax)
|
261 |
+
else:
|
262 |
+
st.warning("Box plots require both X and Y columns.")
|
263 |
+
return None
|
264 |
+
|
265 |
+
elif viz_type == "Count Plot":
|
266 |
+
if x_col:
|
267 |
+
sns.countplot(x=x_col, hue=hue_col, data=df, ax=ax)
|
268 |
+
else:
|
269 |
+
st.warning("Count plots require an X column.")
|
270 |
+
return None
|
271 |
+
|
272 |
+
# Set title and labels
|
273 |
+
plt.title(f"{viz_type} of {y_col if y_col else ''} vs {x_col if x_col else ''}")
|
274 |
+
plt.xlabel(x_col if x_col else "")
|
275 |
+
plt.ylabel(y_col if y_col else "")
|
276 |
+
plt.tight_layout()
|
277 |
+
|
278 |
+
return fig
|
279 |
+
|
280 |
+
except Exception as e:
|
281 |
+
st.error(f"Error creating visualization: {str(e)}")
|
282 |
+
return None
|
283 |
+
|
284 |
+
def get_popular_datasets(category=None, limit=10):
|
285 |
+
"""Get popular HuggingFace datasets, optionally filtered by category"""
|
286 |
+
popular_datasets = {
|
287 |
+
"Text": ["glue", "imdb", "squad", "wikitext", "ag_news"],
|
288 |
+
"Image": ["cifar10", "cifar100", "mnist", "fashion_mnist", "coco"],
|
289 |
+
"Audio": ["common_voice", "librispeech_asr", "voxpopuli", "voxceleb", "audiofolder"],
|
290 |
+
"Multimodal": ["conceptual_captions", "flickr8k", "hateful_memes", "nlvr", "vqa"]
|
291 |
+
}
|
292 |
+
|
293 |
+
if category and category in popular_datasets:
|
294 |
+
return popular_datasets[category][:limit]
|
295 |
+
else:
|
296 |
+
# Return all datasets flattened
|
297 |
+
all_datasets = []
|
298 |
+
for cat_datasets in popular_datasets.values():
|
299 |
+
all_datasets.extend(cat_datasets)
|
300 |
+
return all_datasets[:limit]
|
uv.lock
ADDED
The diff for this file is too large to render.
See raw diff
|
|