Spaces:
Running
on
Zero
Running
on
Zero
import gradio as gr | |
from transformers import pipeline | |
import pandas as pd | |
import spaces | |
import plotly.express as px | |
# Load dataset | |
from datasets import load_dataset | |
ds = load_dataset('ZennyKenny/demo_customer_nps') | |
df = pd.DataFrame(ds['train']) | |
# Initialize model pipeline | |
from huggingface_hub import login | |
import os | |
# Login using the API key stored as an environment variable | |
hf_api_key = os.getenv("API_KEY") | |
login(token=hf_api_key) | |
classifier = pipeline("text-classification", model="distilbert/distilbert-base-uncased-finetuned-sst-2-english") | |
generator = pipeline("text2text-generation", model="google/flan-t5-base") | |
# Function to classify customer comments | |
def classify_comments(categories): | |
global df # Ensure we're modifying the global DataFrame | |
sentiments = [] | |
assigned_categories = [] | |
# Debugging output | |
print("Classifying comments...") | |
for comment in df['customer_comment']: | |
# Classify sentiment | |
sentiment = classifier(comment)[0]['label'] | |
# Generate category | |
category_str = ', '.join(categories) | |
prompt = f"What category best describes this comment? '{comment}' Please answer using only the name of the category: {category_str}." | |
category = generator(prompt, max_length=30)[0]['generated_text'] | |
assigned_categories.append(category) | |
sentiments.append(sentiment) | |
df['comment_sentiment'] = sentiments | |
df['comment_category'] = assigned_categories | |
# Debugging output | |
print(df.head()) | |
print(df['comment_sentiment'].value_counts()) | |
print(df['comment_category'].value_counts()) | |
return df[['customer_id', 'customer_comment', 'comment_sentiment', 'comment_category', 'customer_nps', 'customer_segment']].to_html(index=False) | |
# Function to generate visualizations | |
def visualize_output(): | |
global df | |
# Check if DataFrame is empty | |
if df.empty: | |
return None, None, None, "Error: DataFrame is empty. Please check the data or classification step.", None | |
# Check for required columns | |
required_columns = ['comment_sentiment', 'comment_category', 'customer_nps', 'customer_segment'] | |
if not all(col in df.columns for col in required_columns): | |
return None, None, None, "Error: Required columns are missing. Please classify comments first.", None | |
# Explicitly convert data types | |
df['comment_sentiment'] = df['comment_sentiment'].astype(str) | |
df['comment_category'] = df['comment_category'].astype(str) | |
df['customer_nps'] = pd.to_numeric(df['customer_nps'], errors='coerce') | |
df['customer_segment'] = df['customer_segment'].astype(str) | |
# Drop NaN values | |
df = df.dropna(subset=['comment_sentiment', 'comment_category', 'customer_nps', 'customer_segment']) | |
# Debugging output | |
print(df.head()) | |
print(df['comment_sentiment'].value_counts()) | |
print(df['comment_category'].value_counts()) | |
# Pie Chart of Sentiment | |
sentiment_counts = df['comment_sentiment'].value_counts() | |
sentiment_pie = px.pie( | |
values=sentiment_counts.values, | |
names=sentiment_counts.index, | |
title="Sentiment Distribution" | |
) | |
# Pie Chart of Comment Categories | |
category_counts = df['comment_category'].value_counts() | |
category_pie = px.pie( | |
values=category_counts.values, | |
names=category_counts.index, | |
title="Comment Category Distribution" | |
) | |
# Stacked Bar Chart of Sentiment by Category | |
sentiment_by_category = df.groupby(['comment_category', 'comment_sentiment']).size().unstack() | |
stacked_bar = px.bar( | |
sentiment_by_category, | |
barmode='stack', | |
title="Sentiment by Comment Category", | |
labels={'value': 'Count', 'comment_category': 'Category', 'comment_sentiment': 'Sentiment'} | |
) | |
# KPI Visualizations | |
avg_nps = df['customer_nps'].mean() | |
avg_nps_positive = df[df['comment_sentiment'] == 'POSITIVE']['customer_nps'].mean() | |
avg_nps_negative = df[df['comment_sentiment'] == 'NEGATIVE']['customer_nps'].mean() | |
avg_nps_by_category = df.groupby('comment_category')['customer_nps'].mean().reset_index() | |
avg_nps_by_segment = df.groupby('customer_segment')['customer_nps'].mean().reset_index() | |
kpi_visualization = f""" | |
**Average NPS Scores:** | |
- Overall: {avg_nps:.2f} | |
- Positive Sentiment: {avg_nps_positive:.2f} | |
- Negative Sentiment: {avg_nps_negative:.2f} | |
**Average NPS by Category:** | |
{avg_nps_by_category.to_markdown(index=False)} | |
**Average NPS by Segment:** | |
{avg_nps_by_segment.to_markdown(index=False)} | |
""" | |
# Pie Chart of Sentiment by Customer Segment | |
sentiment_by_segment = df.groupby(['customer_segment', 'comment_sentiment']).size().unstack() | |
sentiment_by_segment_pie = px.pie( | |
sentiment_by_segment, | |
title="Sentiment by Customer Segment" | |
) | |
return sentiment_pie, category_pie, stacked_bar, kpi_visualization, sentiment_by_segment_pie | |
# Gradio Interface | |
with gr.Blocks() as nps: | |
categories = gr.State([]) | |
def add_category(categories, new_category): | |
if new_category.strip() != "" and len(categories) < 5: | |
categories.append(new_category.strip()) | |
return categories, "", f"**Categories:**\n" + "\n".join([f"- {cat}" for cat in categories]) | |
def reset_categories(): | |
return [], "**Categories:**\n- None" | |
with gr.Row(): | |
category_input = gr.Textbox(label="New Category", placeholder="Enter category name") | |
add_category_btn = gr.Button("Add Category") | |
reset_btn = gr.Button("Reset Categories") | |
category_status = gr.Markdown("**Categories:**\n- None") | |
uploaded_file = gr.File(label="Upload CSV", type="filepath") | |
template_btn = gr.Button("Use Template") | |
gr.Markdown("# NPS Comment Categorization") | |
classify_btn = gr.Button("Classify Comments") | |
output = gr.HTML() | |
visualize_btn = gr.Button("Visualize Output") | |
sentiment_pie = gr.Plot(label="Sentiment Distribution") | |
category_pie = gr.Plot(label="Comment Category Distribution") | |
stacked_bar = gr.Plot(label="Sentiment by Comment Category") | |
kpi_visualization = gr.Markdown() | |
sentiment_by_segment_pie = gr.Plot(label="Sentiment by Customer Segment") | |
add_category_btn.click(fn=add_category, inputs=[categories, category_input], outputs=[categories, category_input, category_status]) | |
reset_btn.click(fn=reset_categories, outputs=[categories, category_status]) | |
classify_btn.click(fn=classify_comments, inputs=categories, outputs=output) | |
visualize_btn.click(fn=visualize_output, outputs=[sentiment_pie, category_pie, stacked_bar, kpi_visualization, sentiment_by_segment_pie]) | |
nps.launch(share=True) | |