Spaces:

whackthejacker
/

PythonScriptShowcase

Running

App Files Files Community

whackthejacker commited on 6 days ago

Commit

2a64443

verified ·

1 Parent(s): 987dcaf

Upload 9 files

Browse files

Files changed (10) hide show

.gitattributes +1 -0
.replit +39 -0
README.md +1 -14
app.py +200 -0
generated-icon.png +3 -0
pyproject.toml +17 -0
replit.nix +23 -0
security_scanner.py +74 -0
utils.py +300 -0
uv.lock +0 -0

.gitattributes CHANGED Viewed

@@ -33,3 +33,4 @@ saved_model/**/* filter=lfs diff=lfs merge=lfs -text
 *.zip filter=lfs diff=lfs merge=lfs -text
 *.zst filter=lfs diff=lfs merge=lfs -text
 *tfevents* filter=lfs diff=lfs merge=lfs -text

 *.zip filter=lfs diff=lfs merge=lfs -text
 *.zst filter=lfs diff=lfs merge=lfs -text
 *tfevents* filter=lfs diff=lfs merge=lfs -text
+generated-icon.png filter=lfs diff=lfs merge=lfs -text

.replit ADDED Viewed

	@@ -0,0 +1,39 @@

+modules = ["python-3.11"]
+[nix]
+channel = "stable-24_05"
+[deployment]
+deploymentTarget = "autoscale"
+run = ["sh", "-c", "streamlit run app.py"]
+[workflows]
+runButton = "Project"
+[[workflows.workflow]]
+name = "Project"
+mode = "parallel"
+author = "agent"
+[[workflows.workflow.tasks]]
+task = "workflow.run"
+args = "Streamlit Server"
+[[workflows.workflow]]
+name = "Streamlit Server"
+author = "agent"
+[workflows.workflow.metadata]
+agentRequireRestartOnSave = false
+[[workflows.workflow.tasks]]
+task = "packager.installForAll"
+[[workflows.workflow.tasks]]
+task = "shell.exec"
+args = "streamlit run app.py"
+waitForPort = 5000
+[[ports]]
+localPort = 5000
+externalPort = 80

README.md CHANGED Viewed

@@ -1,16 +1,3 @@
----
-title: PythonScriptShowcase
-emoji: ⚡
-colorFrom: blue
-colorTo: yellow
-sdk: streamlit
-sdk_version: 1.42.2
-app_file: app.py
-pinned: true
-license: mit
-short_description: Python scripts and Hugging Face datasets
----
 # Python & HuggingFace Explorer
 A Streamlit-based demonstration platform for showcasing Python scripts and Hugging Face datasets with interactive visualization.
@@ -72,4 +59,4 @@ The application uses a custom styling inspired by Hugging Face:
 ## License
-This project is open source and available under the MIT License.

 # Python & HuggingFace Explorer
 A Streamlit-based demonstration platform for showcasing Python scripts and Hugging Face datasets with interactive visualization.
 ## License
+This project is open source and available under the MIT License.

app.py ADDED Viewed

	@@ -0,0 +1,200 @@

+import streamlit as st
+from components.code_editor import render_code_editor
+from components.dataset_explorer import render_dataset_explorer
+from components.visualization import render_visualization
+from components.model_metrics import render_model_metrics
+import os
+import sys
+import time
+from utils import load_css, create_logo
+# Page configuration
+st.set_page_config(
+    page_title="Python & HuggingFace Explorer",
+    page_icon="🤗",
+    layout="wide",
+    initial_sidebar_state="expanded"
+)
+# Load custom CSS
+load_css()
+# Main content
+def main():
+    # Create sidebar
+    with st.sidebar:
+        create_logo()
+        st.title("Navigation")
+        page = st.radio(
+            "Select a page:",
+            ["Home", "Code Editor", "Dataset Explorer", "Visualizations", "Model Metrics"]
+        )
+        # HF Dataset search
+        st.sidebar.markdown("---")
+        st.sidebar.subheader("Dataset Quick Search")
+        dataset_name = st.sidebar.text_input("Enter a HuggingFace dataset name")
+        if dataset_name and st.sidebar.button("Load Dataset"):
+            st.session_state.dataset_name = dataset_name
+            if page != "Dataset Explorer":
+                st.sidebar.info("Dataset loaded! Go to Dataset Explorer to view it.")
+        st.sidebar.markdown("---")
+        st.sidebar.markdown("""
+        <div style="font-size: 0.8em; color: #666; text-align: center;">
+            <p>Built with ❤️ using</p>
+            <p>Streamlit & HuggingFace</p>
+            <p style="font-size: 0.9em; margin-top: 5px;">© 2025 Python Explorer</p>
+        </div>
+        """, unsafe_allow_html=True)
+    # Initialize session state for dataset
+    if 'dataset_name' not in st.session_state:
+        st.session_state.dataset_name = None
+    if 'code_content' not in st.session_state:
+        st.session_state.code_content = """# Sample Python code
+from datasets import load_dataset
+import pandas as pd
+import matplotlib.pyplot as plt
+# Load a dataset from Hugging Face
+dataset = load_dataset("glue", "sst2", split="train")
+df = pd.DataFrame(dataset)
+# Display the first few rows
+print(df.head())
+# Simple analysis
+print(f"Number of examples: {len(df)}")
+print(f"Columns: {df.columns}")
+# Visualize class distribution
+plt.figure(figsize=(8, 5))
+df['label'].value_counts().plot(kind='bar')
+plt.title('Class Distribution')
+plt.xlabel('Class')
+plt.ylabel('Count')
+plt.tight_layout()
+plt.show()
+"""
+    # Page content
+    if page == "Home":
+        render_home()
+    elif page == "Code Editor":
+        render_code_editor()
+    elif page == "Dataset Explorer":
+        render_dataset_explorer()
+    elif page == "Visualizations":
+        render_visualization()
+    elif page == "Model Metrics":
+        render_model_metrics()
+def render_home():
+    # Display header image instead of using a title
+    from PIL import Image
+    import os
+    # Path to the logo image in the center of the page
+    center_logo_path = "assets/python_huggingface_logo.png"
+    # Check if the logo exists and display it
+    if os.path.exists(center_logo_path):
+        center_col1, center_col2, center_col3 = st.columns([1, 2, 1])
+        with center_col2:
+            image = Image.open(center_logo_path)
+            # Resize image to 25% of original dimensions
+            width, height = image.size
+            resized_image = image.resize((width//4, height//4))
+            st.image(resized_image, use_container_width=True)
+    else:
+        st.title("Python & HuggingFace Explorer")
+    # Introduction with improved styling
+    st.markdown("""
+    <div style="background-color: #FFFFFF; padding: 20px; border-radius: 10px; margin-bottom: 20px; box-shadow: 0 4px 6px rgba(0, 0, 0, 0.1);">
+        <h2 style="color: #2196F3; text-align: center;">Welcome to the Explorer!</h2>
+        <p style="font-size: 1.1em; line-height: 1.6;">This interactive platform brings together the power of Python and the HuggingFace ecosystem.
+        Write and execute code, explore datasets from the HuggingFace Hub, create beautiful visualizations,
+        and analyze model performance metrics - all in one seamless environment.</p>
+    </div>
+    """, unsafe_allow_html=True)
+    # Feature cards
+    col1, col2 = st.columns(2)
+    with col1:
+        st.markdown("""
+        <div style="background-color: #FFFFFF; padding: 20px; border-radius: 10px; height: 200px;">
+            <h3 style="color: #2196F3;">💻 Code Editor</h3>
+            <p>Write, edit, and execute Python code with syntax highlighting. See your results instantly and experiment with different scripts.</p>
+            <p>Features include:</p>
+            <ul>
+                <li>Syntax highlighting</li>
+                <li>Code execution</li>
+                <li>Output display</li>
+            </ul>
+        </div>
+        """, unsafe_allow_html=True)
+        st.markdown("""
+        <div style="background-color: #FFFFFF; padding: 20px; border-radius: 10px; margin-top: 20px; height: 200px;">
+            <h3 style="color: #2196F3;">📊 Visualizations</h3>
+            <p>Create and customize visualizations from your datasets. Explore data through charts, graphs, and interactive plots.</p>
+            <p>Visualization types:</p>
+            <ul>
+                <li>Bar charts & histograms</li>
+                <li>Scatter plots</li>
+                <li>Line charts</li>
+                <li>And more!</li>
+            </ul>
+        </div>
+        """, unsafe_allow_html=True)
+    with col2:
+        st.markdown("""
+        <div style="background-color: #FFFFFF; padding: 20px; border-radius: 10px; height: 200px;">
+            <h3 style="color: #2196F3;">🗃️ Dataset Explorer</h3>
+            <p>Browse and analyze datasets from the HuggingFace Hub. Filter, sort, and examine data with ease.</p>
+            <p>Explorer features:</p>
+            <ul>
+                <li>Dataset previews</li>
+                <li>Basic statistics</li>
+                <li>Filtering options</li>
+                <li>Data exports</li>
+            </ul>
+        </div>
+        """, unsafe_allow_html=True)
+        st.markdown("""
+        <div style="background-color: #FFFFFF; padding: 20px; border-radius: 10px; margin-top: 20px; height: 200px;">
+            <h3 style="color: #2196F3;">📈 Model Metrics</h3>
+            <p>Analyze model performance with detailed metrics and comparisons. Understand how your models perform on different datasets.</p>
+            <p>Metrics available:</p>
+            <ul>
+                <li>Accuracy, precision, recall</li>
+                <li>Confusion matrices</li>
+                <li>Performance comparisons</li>
+                <li>Custom metric calculations</li>
+            </ul>
+        </div>
+        """, unsafe_allow_html=True)
+    # Getting started section
+    st.markdown("""
+    <div style="background-color: #FFFFFF; padding: 20px; border-radius: 10px; margin-top: 20px;">
+        <h3 style="color: #2196F3;">Getting Started</h3>
+        <p>To begin exploring, select a page from the sidebar navigation. You can:</p>
+        <ol>
+            <li>Write and test Python code in the <b>Code Editor</b></li>
+            <li>Search for and explore datasets in the <b>Dataset Explorer</b></li>
+            <li>Create visualizations in the <b>Visualizations</b> section</li>
+            <li>Analyze model performance in the <b>Model Metrics</b> page</li>
+        </ol>
+        <p>Ready to dive in? Select a page from the sidebar to get started!</p>
+    </div>
+    """, unsafe_allow_html=True)
+if __name__ == "__main__":
+    main()

generated-icon.png ADDED Viewed

Git LFS Details

SHA256: 771ac7175544a98d5b19009e5ba5afebb3051130a5c62e53b44cdaed6b6ce6cd
Pointer size: 131 Bytes
Size of remote file: 226 kB

pyproject.toml ADDED Viewed

	@@ -0,0 +1,17 @@

+[project]
+name = "repl-nix-workspace"
+version = "0.1.0"
+description = "Add your description here"
+requires-python = ">=3.11"
+dependencies = [
+    "datasets>=3.3.2",
+    "matplotlib>=3.10.1",
+    "numpy>=2.2.3",
+    "pandas>=2.2.3",
+    "pillow>=11.1.0",
+    "plotly>=6.0.0",
+    "scikit-learn>=1.6.1",
+    "seaborn>=0.13.2",
+    "streamlit>=1.42.2",
+    "transformers>=4.49.0",
+]

replit.nix ADDED Viewed

	@@ -0,0 +1,23 @@

+{pkgs}: {
+  deps = [
+    pkgs.zlib
+    pkgs.openjpeg
+    pkgs.libxcrypt
+    pkgs.libwebp
+    pkgs.libtiff
+    pkgs.libjpeg
+    pkgs.libimagequant
+    pkgs.lcms2
+    pkgs.tk
+    pkgs.tcl
+    pkgs.qhull
+    pkgs.pkg-config
+    pkgs.gtk3
+    pkgs.gobject-introspection
+    pkgs.ghostscript
+    pkgs.freetype
+    pkgs.ffmpeg-full
+    pkgs.cairo
+    pkgs.glibcLocales
+  ];
+}

security_scanner.py ADDED Viewed

	@@ -0,0 +1,74 @@

+import requests
+import os
+import json
+from typing import Dict, Any, Optional
+def scan_code_for_security(
+    code: str,
+    api_key: Optional[str] = None
+) -> Dict[str, Any]:
+    """
+    Scan code for security vulnerabilities using the CodePal Security Scanner API.
+    Args:
+        code: The code to scan as a string
+        api_key: Your CodePal API key (falls back to environment variable)
+    Returns:
+        Dict containing the API response
+    Raises:
+        ValueError: If API key is not provided
+        requests.RequestException: If the API request fails
+    """
+    # Get API key from parameter or environment
+    api_key = api_key or os.environ.get('CODEPAL_API_KEY')
+    if not api_key:
+        raise ValueError(
+            "API key is required. Either pass it as a parameter or set "
+            "the CODEPAL_API_KEY environment variable."
+        )
+    # API endpoint and headers
+    url = "https://api.codepal.ai/v1/security-code-scanner/query"
+    headers = {
+        "Authorization": f"Bearer {api_key}"
+    }
+    # Create multipart form data
+    files = {
+        'code': (None, code)
+    }
+    try:
+        # Make the API request
+        response = requests.post(url, headers=headers, files=files)
+        response.raise_for_status()  # Raise exception for non-2xx status codes
+        return response.json()
+    except requests.RequestException as e:
+        print(f"Error scanning code: {e}")
+        if response and hasattr(response, 'text'):
+            print(f"Response content: {response.text}")
+        raise
+if __name__ == "__main__":
+    # Example usage
+    sample_code = """
+    import os
+    def run_command(user_input):
+        os.system(user_input)
+    run_command("ls")
+    """
+    # For testing, replace this with your actual API key
+    # or set the CODEPAL_API_KEY environment variable
+    try:
+        result = scan_code_for_security(sample_code)
+        print(json.dumps(result, indent=2))
+    except Exception as e:
+        print(f"Failed to scan code: {e}")

utils.py ADDED Viewed

	@@ -0,0 +1,300 @@

+import streamlit as st
+import pandas as pd
+import os
+import base64
+from pathlib import Path
+import matplotlib.pyplot as plt
+import seaborn as sns
+import numpy as np
+from datasets import load_dataset
+def load_css():
+    """Load custom CSS"""
+    with open('styles/custom.css') as f:
+        st.markdown(f'<style>{f.read()}</style>', unsafe_allow_html=True)
+def create_logo():
+    """Create and display the logo"""
+    from PIL import Image
+    import os
+    # Path to the logo image
+    logo_path = "assets/python_huggingface_logo.png"
+    # Check if the logo exists
+    if os.path.exists(logo_path):
+        # Display the logo image
+        image = Image.open(logo_path)
+        st.image(image, width=200)
+    else:
+        # Fallback to text if image is not found
+        st.markdown(
+            """
+            <div style="display: flex; justify-content: center; margin-bottom: 20px;">
+                <h2 style="color: #2196F3;">Python & HuggingFace Explorer</h2>
+            </div>
+            """,
+            unsafe_allow_html=True
+        )
+def get_dataset_info(dataset_name):
+    """Get basic information about a HuggingFace dataset"""
+    if not dataset_name or not isinstance(dataset_name, str):
+        st.error("Invalid dataset name")
+        return None, None
+    try:
+        # Attempt to load the dataset with default configuration
+        st.info(f"Loading dataset: {dataset_name}...")
+        try:
+            # First try to load the dataset with streaming=False for better compatibility
+            dataset = load_dataset(dataset_name, streaming=False)
+            # Get the first split
+            first_split = next(iter(dataset.keys()))
+            data = dataset[first_split]
+        except Exception as e:
+            st.warning(f"Couldn't load dataset with default configuration: {str(e)}. Trying specific splits...")
+            # If that fails, try loading with specific splits
+            for split_name in ["train", "test", "validation"]:
+                try:
+                    st.info(f"Trying to load '{split_name}' split...")
+                    data = load_dataset(dataset_name, split=split_name, streaming=False)
+                    break
+                except Exception as split_error:
+                    if split_name == "validation":  # Last attempt
+                        st.error(f"Failed to load dataset with any standard split: {str(split_error)}")
+                        return None, None
+                    continue
+        # Get basic info
+        info = {
+            "Dataset": dataset_name,
+            "Number of examples": len(data),
+            "Features": list(data.features.keys()),
+            "Sample": data[0] if len(data) > 0 else None
+        }
+        st.success(f"Successfully loaded dataset with {info['Number of examples']} examples")
+        return info, data
+    except Exception as e:
+        st.error(f"Error loading dataset: {str(e)}")
+        if "Connection error" in str(e) or "timeout" in str(e).lower():
+            st.warning("Network issue detected. Please check your internet connection and try again.")
+        elif "not found" in str(e).lower():
+            st.warning(f"Dataset '{dataset_name}' not found. Please check the dataset name and try again.")
+        return None, None
+def run_code(code):
+    """Run Python code and capture output"""
+    import io
+    import sys
+    import time
+    from contextlib import redirect_stdout, redirect_stderr
+    # Create StringIO objects to capture stdout and stderr
+    stdout_capture = io.StringIO()
+    stderr_capture = io.StringIO()
+    # Dictionary for storing results
+    results = {
+        "output": "",
+        "error": "",
+        "figures": []
+    }
+    # Safety check - limit code size
+    if len(code) > 100000:
+        results["error"] = "Code submission too large. Please reduce the size."
+        return results
+    # Basic security check - this is not comprehensive
+    dangerous_imports = ['os.system', 'subprocess', 'eval(', 'shutil.rmtree', 'open(', 'with open']
+    for dangerous_import in dangerous_imports:
+        if dangerous_import in code:
+            results["error"] = f"Potential security risk: {dangerous_import} is not allowed."
+            return results
+    # Capture current figures to avoid including existing ones
+    initial_figs = plt.get_fignums()
+    # Set execution timeout
+    MAX_EXECUTION_TIME = 30  # seconds
+    start_time = time.time()
+    try:
+        # Create a restricted globals dictionary
+        safe_globals = {
+            'plt': plt,
+            'pd': pd,
+            'np': np,
+            'sns': sns,
+            'print': print,
+            '__builtins__': __builtins__,
+        }
+        # Add common data science libraries
+        for module_name in ['datasets', 'transformers', 'sklearn', 'math']:
+            try:
+                module = __import__(module_name)
+                safe_globals[module_name] = module
+            except ImportError:
+                pass  # Module not available
+        # Redirect stdout and stderr
+        with redirect_stdout(stdout_capture), redirect_stderr(stderr_capture):
+            # Execute the code with timeout check
+            exec(code, safe_globals)
+            if time.time() - start_time > MAX_EXECUTION_TIME:
+                raise TimeoutError("Code execution exceeded maximum allowed time.")
+        # Get the captured output
+        results["output"] = stdout_capture.getvalue()
+        # Also capture stderr
+        stderr_output = stderr_capture.getvalue()
+        if stderr_output:
+            if results["output"]:
+                results["output"] += "\n\n--- Warnings/Errors ---\n" + stderr_output
+            else:
+                results["output"] = "--- Warnings/Errors ---\n" + stderr_output
+        # Capture any figures that were created
+        final_figs = plt.get_fignums()
+        new_figs = set(final_figs) - set(initial_figs)
+        for fig_num in new_figs:
+            fig = plt.figure(fig_num)
+            results["figures"].append(fig)
+    except Exception as e:
+        # Capture the error
+        results["error"] = f"{type(e).__name__}: {str(e)}"
+    return results
+def get_dataset_preview(data, max_rows=10):
+    """Convert a HuggingFace dataset to a pandas DataFrame for preview"""
+    try:
+        # Convert to pandas DataFrame
+        df = pd.DataFrame(data[:max_rows])
+        return df
+    except Exception as e:
+        st.error(f"Error converting dataset to DataFrame: {str(e)}")
+        return None
+def generate_basic_stats(data):
+    """Generate basic statistics for a dataset"""
+    try:
+        # Convert to pandas DataFrame
+        df = pd.DataFrame(data)
+        # Get column types
+        column_types = df.dtypes
+        # Initialize stats dictionary
+        stats = {}
+        for col in df.columns:
+            col_stats = {}
+            # Check if column is numeric
+            if pd.api.types.is_numeric_dtype(df[col]):
+                col_stats["mean"] = df[col].mean()
+                col_stats["median"] = df[col].median()
+                col_stats["std"] = df[col].std()
+                col_stats["min"] = df[col].min()
+                col_stats["max"] = df[col].max()
+                col_stats["missing"] = df[col].isna().sum()
+            # Check if column is string/object
+            elif pd.api.types.is_string_dtype(df[col]) or pd.api.types.is_object_dtype(df[col]):
+                col_stats["unique_values"] = df[col].nunique()
+                col_stats["most_common"] = df[col].value_counts().head(5).to_dict() if df[col].nunique() < 100 else "Too many unique values"
+                col_stats["missing"] = df[col].isna().sum()
+            stats[col] = col_stats
+        return stats
+    except Exception as e:
+        st.error(f"Error generating statistics: {str(e)}")
+        return None
+def create_visualization(data, viz_type, x_col=None, y_col=None, hue_col=None):
+    """Create a visualization based on the selected type and columns"""
+    try:
+        df = pd.DataFrame(data)
+        fig, ax = plt.subplots(figsize=(10, 6))
+        if viz_type == "Bar Chart":
+            if x_col and y_col:
+                sns.barplot(x=x_col, y=y_col, hue=hue_col, data=df, ax=ax)
+            else:
+                st.warning("Bar charts require both X and Y columns.")
+                return None
+        elif viz_type == "Line Chart":
+            if x_col and y_col:
+                sns.lineplot(x=x_col, y=y_col, hue=hue_col, data=df, ax=ax)
+            else:
+                st.warning("Line charts require both X and Y columns.")
+                return None
+        elif viz_type == "Scatter Plot":
+            if x_col and y_col:
+                sns.scatterplot(x=x_col, y=y_col, hue=hue_col, data=df, ax=ax)
+            else:
+                st.warning("Scatter plots require both X and Y columns.")
+                return None
+        elif viz_type == "Histogram":
+            if x_col:
+                sns.histplot(df[x_col], ax=ax)
+            else:
+                st.warning("Histograms require an X column.")
+                return None
+        elif viz_type == "Box Plot":
+            if x_col and y_col:
+                sns.boxplot(x=x_col, y=y_col, hue=hue_col, data=df, ax=ax)
+            else:
+                st.warning("Box plots require both X and Y columns.")
+                return None
+        elif viz_type == "Count Plot":
+            if x_col:
+                sns.countplot(x=x_col, hue=hue_col, data=df, ax=ax)
+            else:
+                st.warning("Count plots require an X column.")
+                return None
+        # Set title and labels
+        plt.title(f"{viz_type} of {y_col if y_col else ''} vs {x_col if x_col else ''}")
+        plt.xlabel(x_col if x_col else "")
+        plt.ylabel(y_col if y_col else "")
+        plt.tight_layout()
+        return fig
+    except Exception as e:
+        st.error(f"Error creating visualization: {str(e)}")
+        return None
+def get_popular_datasets(category=None, limit=10):
+    """Get popular HuggingFace datasets, optionally filtered by category"""
+    popular_datasets = {
+        "Text": ["glue", "imdb", "squad", "wikitext", "ag_news"],
+        "Image": ["cifar10", "cifar100", "mnist", "fashion_mnist", "coco"],
+        "Audio": ["common_voice", "librispeech_asr", "voxpopuli", "voxceleb", "audiofolder"],
+        "Multimodal": ["conceptual_captions", "flickr8k", "hateful_memes", "nlvr", "vqa"]
+    }
+    if category and category in popular_datasets:
+        return popular_datasets[category][:limit]
+    else:
+        # Return all datasets flattened
+        all_datasets = []
+        for cat_datasets in popular_datasets.values():
+            all_datasets.extend(cat_datasets)
+        return all_datasets[:limit]

uv.lock ADDED Viewed

The diff for this file is too large to render. See raw diff