import gradio as gr from transformers import pipeline import pandas as pd import spaces import plotly.express as px # Load dataset from datasets import load_dataset ds = load_dataset('ZennyKenny/demo_customer_nps') df = pd.DataFrame(ds['train']) # Initialize model pipeline from huggingface_hub import login import os # Login using the API key stored as an environment variable hf_api_key = os.getenv("API_KEY") login(token=hf_api_key) classifier = pipeline("text-classification", model="distilbert/distilbert-base-uncased-finetuned-sst-2-english") generator = pipeline("text2text-generation", model="google/flan-t5-base") # Function to classify customer comments def classify_comments(categories): global df # Ensure we're modifying the global DataFrame sentiments = [] assigned_categories = [] # Debugging output print("Classifying comments...") for comment in df['customer_comment']: # Classify sentiment sentiment = classifier(comment)[0]['label'] # Generate category category_str = ', '.join(categories) prompt = f"What category best describes this comment? '{comment}' Please answer using only the name of the category: {category_str}." category = generator(prompt, max_length=30)[0]['generated_text'] assigned_categories.append(category) sentiments.append(sentiment) df['comment_sentiment'] = sentiments df['comment_category'] = assigned_categories # Debugging output print(df.head()) print(df['comment_sentiment'].value_counts()) print(df['comment_category'].value_counts()) return df[['customer_id', 'customer_comment', 'comment_sentiment', 'comment_category', 'customer_nps', 'customer_segment']].to_html(index=False) # Function to generate visualizations def visualize_output(): global df # Check if DataFrame is empty if df.empty: return None, None, None, "Error: DataFrame is empty. Please check the data or classification step.", None # Check for required columns required_columns = ['comment_sentiment', 'comment_category', 'customer_nps', 'customer_segment'] if not all(col in df.columns for col in required_columns): return None, None, None, "Error: Required columns are missing. Please classify comments first.", None # Explicitly convert data types df['comment_sentiment'] = df['comment_sentiment'].astype(str) df['comment_category'] = df['comment_category'].astype(str) df['customer_nps'] = pd.to_numeric(df['customer_nps'], errors='coerce') df['customer_segment'] = df['customer_segment'].astype(str) # Drop NaN values df = df.dropna(subset=['comment_sentiment', 'comment_category', 'customer_nps', 'customer_segment']) # Debugging output print(df.head()) print(df['comment_sentiment'].value_counts()) print(df['comment_category'].value_counts()) # Pie Chart of Sentiment sentiment_counts = df['comment_sentiment'].value_counts() sentiment_pie = px.pie( values=sentiment_counts.values, names=sentiment_counts.index, title="Sentiment Distribution" ) # Pie Chart of Comment Categories category_counts = df['comment_category'].value_counts() category_pie = px.pie( values=category_counts.values, names=category_counts.index, title="Comment Category Distribution" ) # Stacked Bar Chart of Sentiment by Category sentiment_by_category = df.groupby(['comment_category', 'comment_sentiment']).size().unstack() stacked_bar = px.bar( sentiment_by_category, barmode='stack', title="Sentiment by Comment Category", labels={'value': 'Count', 'comment_category': 'Category', 'comment_sentiment': 'Sentiment'} ) # KPI Visualizations avg_nps = df['customer_nps'].mean() avg_nps_positive = df[df['comment_sentiment'] == 'POSITIVE']['customer_nps'].mean() avg_nps_negative = df[df['comment_sentiment'] == 'NEGATIVE']['customer_nps'].mean() avg_nps_by_category = df.groupby('comment_category')['customer_nps'].mean().reset_index() avg_nps_by_segment = df.groupby('customer_segment')['customer_nps'].mean().reset_index() kpi_visualization = f""" **Average NPS Scores:** - Overall: {avg_nps:.2f} - Positive Sentiment: {avg_nps_positive:.2f} - Negative Sentiment: {avg_nps_negative:.2f} **Average NPS by Category:** {avg_nps_by_category.to_markdown(index=False)} **Average NPS by Segment:** {avg_nps_by_segment.to_markdown(index=False)} """ # Pie Chart of Sentiment by Customer Segment sentiment_by_segment = df.groupby(['customer_segment', 'comment_sentiment']).size().unstack() sentiment_by_segment_pie = px.pie( sentiment_by_segment, title="Sentiment by Customer Segment" ) return sentiment_pie, category_pie, stacked_bar, kpi_visualization, sentiment_by_segment_pie # Gradio Interface with gr.Blocks() as nps: categories = gr.State([]) def add_category(categories, new_category): if new_category.strip() != "" and len(categories) < 5: categories.append(new_category.strip()) return categories, "", f"**Categories:**\n" + "\n".join([f"- {cat}" for cat in categories]) def reset_categories(): return [], "**Categories:**\n- None" with gr.Row(): category_input = gr.Textbox(label="New Category", placeholder="Enter category name") add_category_btn = gr.Button("Add Category") reset_btn = gr.Button("Reset Categories") category_status = gr.Markdown("**Categories:**\n- None") uploaded_file = gr.File(label="Upload CSV", type="filepath") template_btn = gr.Button("Use Template") gr.Markdown("# NPS Comment Categorization") classify_btn = gr.Button("Classify Comments") output = gr.HTML() visualize_btn = gr.Button("Visualize Output") sentiment_pie = gr.Plot(label="Sentiment Distribution") category_pie = gr.Plot(label="Comment Category Distribution") stacked_bar = gr.Plot(label="Sentiment by Comment Category") kpi_visualization = gr.Markdown() sentiment_by_segment_pie = gr.Plot(label="Sentiment by Customer Segment") add_category_btn.click(fn=add_category, inputs=[categories, category_input], outputs=[categories, category_input, category_status]) reset_btn.click(fn=reset_categories, outputs=[categories, category_status]) classify_btn.click(fn=classify_comments, inputs=categories, outputs=output) visualize_btn.click(fn=visualize_output, outputs=[sentiment_pie, category_pie, stacked_bar, kpi_visualization, sentiment_by_segment_pie]) nps.launch(share=True)