whackthejacker commited on
Commit
2a64443
·
verified ·
1 Parent(s): 987dcaf

Upload 9 files

Browse files
Files changed (10) hide show
  1. .gitattributes +1 -0
  2. .replit +39 -0
  3. README.md +1 -14
  4. app.py +200 -0
  5. generated-icon.png +3 -0
  6. pyproject.toml +17 -0
  7. replit.nix +23 -0
  8. security_scanner.py +74 -0
  9. utils.py +300 -0
  10. uv.lock +0 -0
.gitattributes CHANGED
@@ -33,3 +33,4 @@ saved_model/**/* filter=lfs diff=lfs merge=lfs -text
33
  *.zip filter=lfs diff=lfs merge=lfs -text
34
  *.zst filter=lfs diff=lfs merge=lfs -text
35
  *tfevents* filter=lfs diff=lfs merge=lfs -text
 
 
33
  *.zip filter=lfs diff=lfs merge=lfs -text
34
  *.zst filter=lfs diff=lfs merge=lfs -text
35
  *tfevents* filter=lfs diff=lfs merge=lfs -text
36
+ generated-icon.png filter=lfs diff=lfs merge=lfs -text
.replit ADDED
@@ -0,0 +1,39 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ modules = ["python-3.11"]
2
+
3
+ [nix]
4
+ channel = "stable-24_05"
5
+
6
+ [deployment]
7
+ deploymentTarget = "autoscale"
8
+ run = ["sh", "-c", "streamlit run app.py"]
9
+
10
+ [workflows]
11
+ runButton = "Project"
12
+
13
+ [[workflows.workflow]]
14
+ name = "Project"
15
+ mode = "parallel"
16
+ author = "agent"
17
+
18
+ [[workflows.workflow.tasks]]
19
+ task = "workflow.run"
20
+ args = "Streamlit Server"
21
+
22
+ [[workflows.workflow]]
23
+ name = "Streamlit Server"
24
+ author = "agent"
25
+
26
+ [workflows.workflow.metadata]
27
+ agentRequireRestartOnSave = false
28
+
29
+ [[workflows.workflow.tasks]]
30
+ task = "packager.installForAll"
31
+
32
+ [[workflows.workflow.tasks]]
33
+ task = "shell.exec"
34
+ args = "streamlit run app.py"
35
+ waitForPort = 5000
36
+
37
+ [[ports]]
38
+ localPort = 5000
39
+ externalPort = 80
README.md CHANGED
@@ -1,16 +1,3 @@
1
- ---
2
- title: PythonScriptShowcase
3
- emoji: ⚡
4
- colorFrom: blue
5
- colorTo: yellow
6
- sdk: streamlit
7
- sdk_version: 1.42.2
8
- app_file: app.py
9
- pinned: true
10
- license: mit
11
- short_description: Python scripts and Hugging Face datasets
12
- ---
13
-
14
  # Python & HuggingFace Explorer
15
 
16
  A Streamlit-based demonstration platform for showcasing Python scripts and Hugging Face datasets with interactive visualization.
@@ -72,4 +59,4 @@ The application uses a custom styling inspired by Hugging Face:
72
 
73
  ## License
74
 
75
- This project is open source and available under the MIT License.
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
  # Python & HuggingFace Explorer
2
 
3
  A Streamlit-based demonstration platform for showcasing Python scripts and Hugging Face datasets with interactive visualization.
 
59
 
60
  ## License
61
 
62
+ This project is open source and available under the MIT License.
app.py ADDED
@@ -0,0 +1,200 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import streamlit as st
2
+ from components.code_editor import render_code_editor
3
+ from components.dataset_explorer import render_dataset_explorer
4
+ from components.visualization import render_visualization
5
+ from components.model_metrics import render_model_metrics
6
+ import os
7
+ import sys
8
+ import time
9
+ from utils import load_css, create_logo
10
+
11
+ # Page configuration
12
+ st.set_page_config(
13
+ page_title="Python & HuggingFace Explorer",
14
+ page_icon="🤗",
15
+ layout="wide",
16
+ initial_sidebar_state="expanded"
17
+ )
18
+
19
+ # Load custom CSS
20
+ load_css()
21
+
22
+ # Main content
23
+ def main():
24
+ # Create sidebar
25
+ with st.sidebar:
26
+ create_logo()
27
+ st.title("Navigation")
28
+ page = st.radio(
29
+ "Select a page:",
30
+ ["Home", "Code Editor", "Dataset Explorer", "Visualizations", "Model Metrics"]
31
+ )
32
+
33
+ # HF Dataset search
34
+ st.sidebar.markdown("---")
35
+ st.sidebar.subheader("Dataset Quick Search")
36
+ dataset_name = st.sidebar.text_input("Enter a HuggingFace dataset name")
37
+ if dataset_name and st.sidebar.button("Load Dataset"):
38
+ st.session_state.dataset_name = dataset_name
39
+ if page != "Dataset Explorer":
40
+ st.sidebar.info("Dataset loaded! Go to Dataset Explorer to view it.")
41
+
42
+ st.sidebar.markdown("---")
43
+ st.sidebar.markdown("""
44
+ <div style="font-size: 0.8em; color: #666; text-align: center;">
45
+ <p>Built with ❤️ using</p>
46
+ <p>Streamlit & HuggingFace</p>
47
+ <p style="font-size: 0.9em; margin-top: 5px;">© 2025 Python Explorer</p>
48
+ </div>
49
+ """, unsafe_allow_html=True)
50
+
51
+ # Initialize session state for dataset
52
+ if 'dataset_name' not in st.session_state:
53
+ st.session_state.dataset_name = None
54
+
55
+ if 'code_content' not in st.session_state:
56
+ st.session_state.code_content = """# Sample Python code
57
+ from datasets import load_dataset
58
+ import pandas as pd
59
+ import matplotlib.pyplot as plt
60
+
61
+ # Load a dataset from Hugging Face
62
+ dataset = load_dataset("glue", "sst2", split="train")
63
+ df = pd.DataFrame(dataset)
64
+
65
+ # Display the first few rows
66
+ print(df.head())
67
+
68
+ # Simple analysis
69
+ print(f"Number of examples: {len(df)}")
70
+ print(f"Columns: {df.columns}")
71
+
72
+ # Visualize class distribution
73
+ plt.figure(figsize=(8, 5))
74
+ df['label'].value_counts().plot(kind='bar')
75
+ plt.title('Class Distribution')
76
+ plt.xlabel('Class')
77
+ plt.ylabel('Count')
78
+ plt.tight_layout()
79
+ plt.show()
80
+ """
81
+
82
+ # Page content
83
+ if page == "Home":
84
+ render_home()
85
+ elif page == "Code Editor":
86
+ render_code_editor()
87
+ elif page == "Dataset Explorer":
88
+ render_dataset_explorer()
89
+ elif page == "Visualizations":
90
+ render_visualization()
91
+ elif page == "Model Metrics":
92
+ render_model_metrics()
93
+
94
+ def render_home():
95
+ # Display header image instead of using a title
96
+ from PIL import Image
97
+ import os
98
+
99
+ # Path to the logo image in the center of the page
100
+ center_logo_path = "assets/python_huggingface_logo.png"
101
+
102
+ # Check if the logo exists and display it
103
+ if os.path.exists(center_logo_path):
104
+ center_col1, center_col2, center_col3 = st.columns([1, 2, 1])
105
+ with center_col2:
106
+ image = Image.open(center_logo_path)
107
+ # Resize image to 25% of original dimensions
108
+ width, height = image.size
109
+ resized_image = image.resize((width//4, height//4))
110
+ st.image(resized_image, use_container_width=True)
111
+ else:
112
+ st.title("Python & HuggingFace Explorer")
113
+
114
+ # Introduction with improved styling
115
+ st.markdown("""
116
+ <div style="background-color: #FFFFFF; padding: 20px; border-radius: 10px; margin-bottom: 20px; box-shadow: 0 4px 6px rgba(0, 0, 0, 0.1);">
117
+ <h2 style="color: #2196F3; text-align: center;">Welcome to the Explorer!</h2>
118
+ <p style="font-size: 1.1em; line-height: 1.6;">This interactive platform brings together the power of Python and the HuggingFace ecosystem.
119
+ Write and execute code, explore datasets from the HuggingFace Hub, create beautiful visualizations,
120
+ and analyze model performance metrics - all in one seamless environment.</p>
121
+ </div>
122
+ """, unsafe_allow_html=True)
123
+
124
+ # Feature cards
125
+ col1, col2 = st.columns(2)
126
+
127
+ with col1:
128
+ st.markdown("""
129
+ <div style="background-color: #FFFFFF; padding: 20px; border-radius: 10px; height: 200px;">
130
+ <h3 style="color: #2196F3;">💻 Code Editor</h3>
131
+ <p>Write, edit, and execute Python code with syntax highlighting. See your results instantly and experiment with different scripts.</p>
132
+ <p>Features include:</p>
133
+ <ul>
134
+ <li>Syntax highlighting</li>
135
+ <li>Code execution</li>
136
+ <li>Output display</li>
137
+ </ul>
138
+ </div>
139
+ """, unsafe_allow_html=True)
140
+
141
+ st.markdown("""
142
+ <div style="background-color: #FFFFFF; padding: 20px; border-radius: 10px; margin-top: 20px; height: 200px;">
143
+ <h3 style="color: #2196F3;">📊 Visualizations</h3>
144
+ <p>Create and customize visualizations from your datasets. Explore data through charts, graphs, and interactive plots.</p>
145
+ <p>Visualization types:</p>
146
+ <ul>
147
+ <li>Bar charts & histograms</li>
148
+ <li>Scatter plots</li>
149
+ <li>Line charts</li>
150
+ <li>And more!</li>
151
+ </ul>
152
+ </div>
153
+ """, unsafe_allow_html=True)
154
+
155
+ with col2:
156
+ st.markdown("""
157
+ <div style="background-color: #FFFFFF; padding: 20px; border-radius: 10px; height: 200px;">
158
+ <h3 style="color: #2196F3;">🗃️ Dataset Explorer</h3>
159
+ <p>Browse and analyze datasets from the HuggingFace Hub. Filter, sort, and examine data with ease.</p>
160
+ <p>Explorer features:</p>
161
+ <ul>
162
+ <li>Dataset previews</li>
163
+ <li>Basic statistics</li>
164
+ <li>Filtering options</li>
165
+ <li>Data exports</li>
166
+ </ul>
167
+ </div>
168
+ """, unsafe_allow_html=True)
169
+
170
+ st.markdown("""
171
+ <div style="background-color: #FFFFFF; padding: 20px; border-radius: 10px; margin-top: 20px; height: 200px;">
172
+ <h3 style="color: #2196F3;">📈 Model Metrics</h3>
173
+ <p>Analyze model performance with detailed metrics and comparisons. Understand how your models perform on different datasets.</p>
174
+ <p>Metrics available:</p>
175
+ <ul>
176
+ <li>Accuracy, precision, recall</li>
177
+ <li>Confusion matrices</li>
178
+ <li>Performance comparisons</li>
179
+ <li>Custom metric calculations</li>
180
+ </ul>
181
+ </div>
182
+ """, unsafe_allow_html=True)
183
+
184
+ # Getting started section
185
+ st.markdown("""
186
+ <div style="background-color: #FFFFFF; padding: 20px; border-radius: 10px; margin-top: 20px;">
187
+ <h3 style="color: #2196F3;">Getting Started</h3>
188
+ <p>To begin exploring, select a page from the sidebar navigation. You can:</p>
189
+ <ol>
190
+ <li>Write and test Python code in the <b>Code Editor</b></li>
191
+ <li>Search for and explore datasets in the <b>Dataset Explorer</b></li>
192
+ <li>Create visualizations in the <b>Visualizations</b> section</li>
193
+ <li>Analyze model performance in the <b>Model Metrics</b> page</li>
194
+ </ol>
195
+ <p>Ready to dive in? Select a page from the sidebar to get started!</p>
196
+ </div>
197
+ """, unsafe_allow_html=True)
198
+
199
+ if __name__ == "__main__":
200
+ main()
generated-icon.png ADDED

Git LFS Details

  • SHA256: 771ac7175544a98d5b19009e5ba5afebb3051130a5c62e53b44cdaed6b6ce6cd
  • Pointer size: 131 Bytes
  • Size of remote file: 226 kB
pyproject.toml ADDED
@@ -0,0 +1,17 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ [project]
2
+ name = "repl-nix-workspace"
3
+ version = "0.1.0"
4
+ description = "Add your description here"
5
+ requires-python = ">=3.11"
6
+ dependencies = [
7
+ "datasets>=3.3.2",
8
+ "matplotlib>=3.10.1",
9
+ "numpy>=2.2.3",
10
+ "pandas>=2.2.3",
11
+ "pillow>=11.1.0",
12
+ "plotly>=6.0.0",
13
+ "scikit-learn>=1.6.1",
14
+ "seaborn>=0.13.2",
15
+ "streamlit>=1.42.2",
16
+ "transformers>=4.49.0",
17
+ ]
replit.nix ADDED
@@ -0,0 +1,23 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {pkgs}: {
2
+ deps = [
3
+ pkgs.zlib
4
+ pkgs.openjpeg
5
+ pkgs.libxcrypt
6
+ pkgs.libwebp
7
+ pkgs.libtiff
8
+ pkgs.libjpeg
9
+ pkgs.libimagequant
10
+ pkgs.lcms2
11
+ pkgs.tk
12
+ pkgs.tcl
13
+ pkgs.qhull
14
+ pkgs.pkg-config
15
+ pkgs.gtk3
16
+ pkgs.gobject-introspection
17
+ pkgs.ghostscript
18
+ pkgs.freetype
19
+ pkgs.ffmpeg-full
20
+ pkgs.cairo
21
+ pkgs.glibcLocales
22
+ ];
23
+ }
security_scanner.py ADDED
@@ -0,0 +1,74 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+
2
+ import requests
3
+ import os
4
+ import json
5
+ from typing import Dict, Any, Optional
6
+
7
+ def scan_code_for_security(
8
+ code: str,
9
+ api_key: Optional[str] = None
10
+ ) -> Dict[str, Any]:
11
+ """
12
+ Scan code for security vulnerabilities using the CodePal Security Scanner API.
13
+
14
+ Args:
15
+ code: The code to scan as a string
16
+ api_key: Your CodePal API key (falls back to environment variable)
17
+
18
+ Returns:
19
+ Dict containing the API response
20
+
21
+ Raises:
22
+ ValueError: If API key is not provided
23
+ requests.RequestException: If the API request fails
24
+ """
25
+ # Get API key from parameter or environment
26
+ api_key = api_key or os.environ.get('CODEPAL_API_KEY')
27
+
28
+ if not api_key:
29
+ raise ValueError(
30
+ "API key is required. Either pass it as a parameter or set "
31
+ "the CODEPAL_API_KEY environment variable."
32
+ )
33
+
34
+ # API endpoint and headers
35
+ url = "https://api.codepal.ai/v1/security-code-scanner/query"
36
+ headers = {
37
+ "Authorization": f"Bearer {api_key}"
38
+ }
39
+
40
+ # Create multipart form data
41
+ files = {
42
+ 'code': (None, code)
43
+ }
44
+
45
+ try:
46
+ # Make the API request
47
+ response = requests.post(url, headers=headers, files=files)
48
+ response.raise_for_status() # Raise exception for non-2xx status codes
49
+
50
+ return response.json()
51
+ except requests.RequestException as e:
52
+ print(f"Error scanning code: {e}")
53
+ if response and hasattr(response, 'text'):
54
+ print(f"Response content: {response.text}")
55
+ raise
56
+
57
+ if __name__ == "__main__":
58
+ # Example usage
59
+ sample_code = """
60
+ import os
61
+
62
+ def run_command(user_input):
63
+ os.system(user_input)
64
+
65
+ run_command("ls")
66
+ """
67
+
68
+ # For testing, replace this with your actual API key
69
+ # or set the CODEPAL_API_KEY environment variable
70
+ try:
71
+ result = scan_code_for_security(sample_code)
72
+ print(json.dumps(result, indent=2))
73
+ except Exception as e:
74
+ print(f"Failed to scan code: {e}")
utils.py ADDED
@@ -0,0 +1,300 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import streamlit as st
2
+ import pandas as pd
3
+ import os
4
+ import base64
5
+ from pathlib import Path
6
+ import matplotlib.pyplot as plt
7
+ import seaborn as sns
8
+ import numpy as np
9
+ from datasets import load_dataset
10
+
11
+ def load_css():
12
+ """Load custom CSS"""
13
+ with open('styles/custom.css') as f:
14
+ st.markdown(f'<style>{f.read()}</style>', unsafe_allow_html=True)
15
+
16
+ def create_logo():
17
+ """Create and display the logo"""
18
+ from PIL import Image
19
+ import os
20
+
21
+ # Path to the logo image
22
+ logo_path = "assets/python_huggingface_logo.png"
23
+
24
+ # Check if the logo exists
25
+ if os.path.exists(logo_path):
26
+ # Display the logo image
27
+ image = Image.open(logo_path)
28
+ st.image(image, width=200)
29
+ else:
30
+ # Fallback to text if image is not found
31
+ st.markdown(
32
+ """
33
+ <div style="display: flex; justify-content: center; margin-bottom: 20px;">
34
+ <h2 style="color: #2196F3;">Python & HuggingFace Explorer</h2>
35
+ </div>
36
+ """,
37
+ unsafe_allow_html=True
38
+ )
39
+
40
+ def get_dataset_info(dataset_name):
41
+ """Get basic information about a HuggingFace dataset"""
42
+ if not dataset_name or not isinstance(dataset_name, str):
43
+ st.error("Invalid dataset name")
44
+ return None, None
45
+
46
+ try:
47
+ # Attempt to load the dataset with default configuration
48
+ st.info(f"Loading dataset: {dataset_name}...")
49
+
50
+ try:
51
+ # First try to load the dataset with streaming=False for better compatibility
52
+ dataset = load_dataset(dataset_name, streaming=False)
53
+ # Get the first split
54
+ first_split = next(iter(dataset.keys()))
55
+ data = dataset[first_split]
56
+ except Exception as e:
57
+ st.warning(f"Couldn't load dataset with default configuration: {str(e)}. Trying specific splits...")
58
+ # If that fails, try loading with specific splits
59
+ for split_name in ["train", "test", "validation"]:
60
+ try:
61
+ st.info(f"Trying to load '{split_name}' split...")
62
+ data = load_dataset(dataset_name, split=split_name, streaming=False)
63
+ break
64
+ except Exception as split_error:
65
+ if split_name == "validation": # Last attempt
66
+ st.error(f"Failed to load dataset with any standard split: {str(split_error)}")
67
+ return None, None
68
+ continue
69
+
70
+ # Get basic info
71
+ info = {
72
+ "Dataset": dataset_name,
73
+ "Number of examples": len(data),
74
+ "Features": list(data.features.keys()),
75
+ "Sample": data[0] if len(data) > 0 else None
76
+ }
77
+
78
+ st.success(f"Successfully loaded dataset with {info['Number of examples']} examples")
79
+ return info, data
80
+ except Exception as e:
81
+ st.error(f"Error loading dataset: {str(e)}")
82
+ if "Connection error" in str(e) or "timeout" in str(e).lower():
83
+ st.warning("Network issue detected. Please check your internet connection and try again.")
84
+ elif "not found" in str(e).lower():
85
+ st.warning(f"Dataset '{dataset_name}' not found. Please check the dataset name and try again.")
86
+ return None, None
87
+
88
+ def run_code(code):
89
+ """Run Python code and capture output"""
90
+ import io
91
+ import sys
92
+ import time
93
+ from contextlib import redirect_stdout, redirect_stderr
94
+
95
+ # Create StringIO objects to capture stdout and stderr
96
+ stdout_capture = io.StringIO()
97
+ stderr_capture = io.StringIO()
98
+
99
+ # Dictionary for storing results
100
+ results = {
101
+ "output": "",
102
+ "error": "",
103
+ "figures": []
104
+ }
105
+
106
+ # Safety check - limit code size
107
+ if len(code) > 100000:
108
+ results["error"] = "Code submission too large. Please reduce the size."
109
+ return results
110
+
111
+ # Basic security check - this is not comprehensive
112
+ dangerous_imports = ['os.system', 'subprocess', 'eval(', 'shutil.rmtree', 'open(', 'with open']
113
+ for dangerous_import in dangerous_imports:
114
+ if dangerous_import in code:
115
+ results["error"] = f"Potential security risk: {dangerous_import} is not allowed."
116
+ return results
117
+
118
+ # Capture current figures to avoid including existing ones
119
+ initial_figs = plt.get_fignums()
120
+
121
+ # Set execution timeout
122
+ MAX_EXECUTION_TIME = 30 # seconds
123
+ start_time = time.time()
124
+
125
+ try:
126
+ # Create a restricted globals dictionary
127
+ safe_globals = {
128
+ 'plt': plt,
129
+ 'pd': pd,
130
+ 'np': np,
131
+ 'sns': sns,
132
+ 'print': print,
133
+ '__builtins__': __builtins__,
134
+ }
135
+
136
+ # Add common data science libraries
137
+ for module_name in ['datasets', 'transformers', 'sklearn', 'math']:
138
+ try:
139
+ module = __import__(module_name)
140
+ safe_globals[module_name] = module
141
+ except ImportError:
142
+ pass # Module not available
143
+
144
+ # Redirect stdout and stderr
145
+ with redirect_stdout(stdout_capture), redirect_stderr(stderr_capture):
146
+ # Execute the code with timeout check
147
+ exec(code, safe_globals)
148
+
149
+ if time.time() - start_time > MAX_EXECUTION_TIME:
150
+ raise TimeoutError("Code execution exceeded maximum allowed time.")
151
+
152
+ # Get the captured output
153
+ results["output"] = stdout_capture.getvalue()
154
+
155
+ # Also capture stderr
156
+ stderr_output = stderr_capture.getvalue()
157
+ if stderr_output:
158
+ if results["output"]:
159
+ results["output"] += "\n\n--- Warnings/Errors ---\n" + stderr_output
160
+ else:
161
+ results["output"] = "--- Warnings/Errors ---\n" + stderr_output
162
+
163
+ # Capture any figures that were created
164
+ final_figs = plt.get_fignums()
165
+ new_figs = set(final_figs) - set(initial_figs)
166
+
167
+ for fig_num in new_figs:
168
+ fig = plt.figure(fig_num)
169
+ results["figures"].append(fig)
170
+
171
+ except Exception as e:
172
+ # Capture the error
173
+ results["error"] = f"{type(e).__name__}: {str(e)}"
174
+
175
+ return results
176
+
177
+ def get_dataset_preview(data, max_rows=10):
178
+ """Convert a HuggingFace dataset to a pandas DataFrame for preview"""
179
+ try:
180
+ # Convert to pandas DataFrame
181
+ df = pd.DataFrame(data[:max_rows])
182
+ return df
183
+ except Exception as e:
184
+ st.error(f"Error converting dataset to DataFrame: {str(e)}")
185
+ return None
186
+
187
+ def generate_basic_stats(data):
188
+ """Generate basic statistics for a dataset"""
189
+ try:
190
+ # Convert to pandas DataFrame
191
+ df = pd.DataFrame(data)
192
+
193
+ # Get column types
194
+ column_types = df.dtypes
195
+
196
+ # Initialize stats dictionary
197
+ stats = {}
198
+
199
+ for col in df.columns:
200
+ col_stats = {}
201
+
202
+ # Check if column is numeric
203
+ if pd.api.types.is_numeric_dtype(df[col]):
204
+ col_stats["mean"] = df[col].mean()
205
+ col_stats["median"] = df[col].median()
206
+ col_stats["std"] = df[col].std()
207
+ col_stats["min"] = df[col].min()
208
+ col_stats["max"] = df[col].max()
209
+ col_stats["missing"] = df[col].isna().sum()
210
+ # Check if column is string/object
211
+ elif pd.api.types.is_string_dtype(df[col]) or pd.api.types.is_object_dtype(df[col]):
212
+ col_stats["unique_values"] = df[col].nunique()
213
+ col_stats["most_common"] = df[col].value_counts().head(5).to_dict() if df[col].nunique() < 100 else "Too many unique values"
214
+ col_stats["missing"] = df[col].isna().sum()
215
+
216
+ stats[col] = col_stats
217
+
218
+ return stats
219
+ except Exception as e:
220
+ st.error(f"Error generating statistics: {str(e)}")
221
+ return None
222
+
223
+ def create_visualization(data, viz_type, x_col=None, y_col=None, hue_col=None):
224
+ """Create a visualization based on the selected type and columns"""
225
+ try:
226
+ df = pd.DataFrame(data)
227
+
228
+ fig, ax = plt.subplots(figsize=(10, 6))
229
+
230
+ if viz_type == "Bar Chart":
231
+ if x_col and y_col:
232
+ sns.barplot(x=x_col, y=y_col, hue=hue_col, data=df, ax=ax)
233
+ else:
234
+ st.warning("Bar charts require both X and Y columns.")
235
+ return None
236
+
237
+ elif viz_type == "Line Chart":
238
+ if x_col and y_col:
239
+ sns.lineplot(x=x_col, y=y_col, hue=hue_col, data=df, ax=ax)
240
+ else:
241
+ st.warning("Line charts require both X and Y columns.")
242
+ return None
243
+
244
+ elif viz_type == "Scatter Plot":
245
+ if x_col and y_col:
246
+ sns.scatterplot(x=x_col, y=y_col, hue=hue_col, data=df, ax=ax)
247
+ else:
248
+ st.warning("Scatter plots require both X and Y columns.")
249
+ return None
250
+
251
+ elif viz_type == "Histogram":
252
+ if x_col:
253
+ sns.histplot(df[x_col], ax=ax)
254
+ else:
255
+ st.warning("Histograms require an X column.")
256
+ return None
257
+
258
+ elif viz_type == "Box Plot":
259
+ if x_col and y_col:
260
+ sns.boxplot(x=x_col, y=y_col, hue=hue_col, data=df, ax=ax)
261
+ else:
262
+ st.warning("Box plots require both X and Y columns.")
263
+ return None
264
+
265
+ elif viz_type == "Count Plot":
266
+ if x_col:
267
+ sns.countplot(x=x_col, hue=hue_col, data=df, ax=ax)
268
+ else:
269
+ st.warning("Count plots require an X column.")
270
+ return None
271
+
272
+ # Set title and labels
273
+ plt.title(f"{viz_type} of {y_col if y_col else ''} vs {x_col if x_col else ''}")
274
+ plt.xlabel(x_col if x_col else "")
275
+ plt.ylabel(y_col if y_col else "")
276
+ plt.tight_layout()
277
+
278
+ return fig
279
+
280
+ except Exception as e:
281
+ st.error(f"Error creating visualization: {str(e)}")
282
+ return None
283
+
284
+ def get_popular_datasets(category=None, limit=10):
285
+ """Get popular HuggingFace datasets, optionally filtered by category"""
286
+ popular_datasets = {
287
+ "Text": ["glue", "imdb", "squad", "wikitext", "ag_news"],
288
+ "Image": ["cifar10", "cifar100", "mnist", "fashion_mnist", "coco"],
289
+ "Audio": ["common_voice", "librispeech_asr", "voxpopuli", "voxceleb", "audiofolder"],
290
+ "Multimodal": ["conceptual_captions", "flickr8k", "hateful_memes", "nlvr", "vqa"]
291
+ }
292
+
293
+ if category and category in popular_datasets:
294
+ return popular_datasets[category][:limit]
295
+ else:
296
+ # Return all datasets flattened
297
+ all_datasets = []
298
+ for cat_datasets in popular_datasets.values():
299
+ all_datasets.extend(cat_datasets)
300
+ return all_datasets[:limit]
uv.lock ADDED
The diff for this file is too large to render. See raw diff