Spaces:
Running
on
Zero
Running
on
Zero
import gradio as gr | |
from transformers import pipeline | |
import pandas as pd | |
import spaces | |
import plotly.express as px | |
# Load dataset | |
from datasets import load_dataset | |
ds = load_dataset('ZennyKenny/demo_customer_nps') | |
df = pd.DataFrame(ds['train']) | |
# Initialize model pipeline | |
from huggingface_hub import login | |
import os | |
# Login using the API key stored as an environment variable | |
hf_api_key = os.getenv("API_KEY") | |
login(token=hf_api_key) | |
classifier = pipeline("text-classification", model="distilbert/distilbert-base-uncased-finetuned-sst-2-english") | |
generator = pipeline("text2text-generation", model="google/flan-t5-base") | |
# Function to classify customer comments | |
def classify_comments(categories): | |
global df # Ensure we're modifying the global DataFrame | |
sentiments = [] | |
assigned_categories = [] | |
for comment in df['customer_comment']: | |
# Classify sentiment | |
sentiment = classifier(comment)[0]['label'] | |
# Generate category | |
category_str = ', '.join(categories) | |
prompt = f"What category best describes this comment? '{comment}' Please answer using only the name of the category: {category_str}." | |
category = generator(prompt, max_length=30)[0]['generated_text'] | |
assigned_categories.append(category) | |
sentiments.append(sentiment) | |
df['comment_sentiment'] = sentiments | |
df['comment_category'] = assigned_categories | |
return df[['customer_id', 'customer_comment', 'comment_sentiment', 'comment_category', 'customer_nps', 'customer_segment']].to_html(index=False) | |
# Function to generate visualizations | |
def visualize_output(): | |
# Ensure the required columns exist | |
if 'comment_sentiment' not in df.columns or 'comment_category' not in df.columns: | |
# Return 5 values (None for plots and an error message for markdown) | |
return None, None, None, "Error: Please classify comments before visualizing.", None | |
# Pie Chart of Sentiment | |
sentiment_counts = df['comment_sentiment'].value_counts() | |
sentiment_pie = px.pie( | |
values=sentiment_counts.values, | |
names=sentiment_counts.index, | |
title="Sentiment Distribution", | |
hover_data=[sentiment_counts.values], | |
labels={'value': 'Count', 'names': 'Sentiment'} | |
) | |
sentiment_pie.update_traces(textinfo='percent+label', hovertemplate="Sentiment: %{label}<br>Count: %{value}<br>Percentage: %{percent}") | |
# Pie Chart of Comment Categories | |
category_counts = df['comment_category'].value_counts() | |
category_pie = px.pie( | |
values=category_counts.values, | |
names=category_counts.index, | |
title="Comment Category Distribution", | |
hover_data=[category_counts.values], | |
labels={'value': 'Count', 'names': 'Category'} | |
) | |
category_pie.update_traces(textinfo='percent+label', hovertemplate="Category: %{label}<br>Count: %{value}<br>Percentage: %{percent}") | |
# Stacked Bar Chart of Sentiment by Category | |
sentiment_by_category = df.groupby(['comment_category', 'comment_sentiment']).size().unstack() | |
stacked_bar = px.bar( | |
sentiment_by_category, | |
barmode='stack', | |
title="Sentiment by Comment Category", | |
labels={'value': 'Count', 'comment_category': 'Category', 'comment_sentiment': 'Sentiment'} | |
) | |
# KPI Visualizations | |
avg_nps = df['customer_nps'].mean() | |
avg_nps_positive = df[df['comment_sentiment'] == 'POSITIVE']['customer_nps'].mean() | |
avg_nps_negative = df[df['comment_sentiment'] == 'NEGATIVE']['customer_nps'].mean() | |
avg_nps_by_category = df.groupby('comment_category')['customer_nps'].mean().reset_index() | |
avg_nps_by_segment = df.groupby('customer_segment')['customer_nps'].mean().reset_index() | |
kpi_visualization = f""" | |
**Average NPS Scores:** | |
- Overall: {avg_nps:.2f} | |
- Positive Sentiment: {avg_nps_positive:.2f} | |
- Negative Sentiment: {avg_nps_negative:.2f} | |
**Average NPS by Category:** | |
{avg_nps_by_category.to_markdown(index=False)} | |
**Average NPS by Segment:** | |
{avg_nps_by_segment.to_markdown(index=False)} | |
""" | |
# Pie Chart of Sentiment by Customer Segment | |
sentiment_by_segment = df.groupby(['customer_segment', 'comment_sentiment']).size().unstack() | |
sentiment_by_segment_pie = px.pie( | |
sentiment_by_segment, | |
title="Sentiment by Customer Segment", | |
labels={'value': 'Count', 'customer_segment': 'Segment', 'comment_sentiment': 'Sentiment'} | |
) | |
return sentiment_pie, category_pie, stacked_bar, kpi_visualization, sentiment_by_segment_pie | |
# Gradio Interface | |
with gr.Blocks() as nps: | |
# State to store categories | |
categories = gr.State([]) | |
# Function to add a category | |
def add_category(categories, new_category): | |
if new_category.strip() != "" and len(categories) < 5: # Limit to 5 categories | |
categories.append(new_category.strip()) | |
return categories, "", f"**Categories:**\n" + "\n".join([f"- {cat}" for cat in categories]) | |
# Function to reset categories | |
def reset_categories(): | |
return [], "**Categories:**\n- None" | |
# UI for adding categories | |
with gr.Row(): | |
category_input = gr.Textbox(label="New Category", placeholder="Enter category name") | |
add_category_btn = gr.Button("Add Category") | |
reset_btn = gr.Button("Reset Categories") | |
category_status = gr.Markdown("**Categories:**\n- None") | |
# File upload and template buttons | |
uploaded_file = gr.File(label="Upload CSV", type="filepath") | |
template_btn = gr.Button("Use Template") | |
gr.Markdown("# NPS Comment Categorization") | |
# Classify button | |
classify_btn = gr.Button("Classify Comments") | |
output = gr.HTML() | |
# Visualize button | |
visualize_btn = gr.Button("Visualize Output") | |
sentiment_pie = gr.Plot(label="Sentiment Distribution") | |
category_pie = gr.Plot(label="Comment Category Distribution") | |
stacked_bar = gr.Plot(label="Sentiment by Comment Category") | |
kpi_visualization = gr.Markdown() | |
sentiment_by_segment_pie = gr.Plot(label="Sentiment by Customer Segment") | |
# Function to load data from uploaded CSV | |
def load_data(file): | |
global df # Ensure we're modifying the global DataFrame | |
if file is not None: | |
file.seek(0) # Reset file pointer | |
if file.name.endswith('.csv'): | |
custom_df = pd.read_csv(file, encoding='utf-8') | |
else: | |
return "Error: Uploaded file is not a CSV." | |
# Check for required columns | |
required_columns = ['customer_id', 'customer_comment', 'customer_nps', 'customer_segment'] | |
if not all(col in custom_df.columns for col in required_columns): | |
return f"Error: Uploaded CSV must contain the following columns: {', '.join(required_columns)}" | |
df = custom_df | |
return "Custom CSV loaded successfully!" | |
else: | |
return "No file uploaded." | |
# Function to use template categories | |
def use_template(): | |
template_categories = ["Product Experience", "Customer Support", "Price of Service", "Other"] | |
return template_categories, f"**Categories:**\n" + "\n".join([f"- {cat}" for cat in template_categories]) | |
# Event handlers | |
add_category_btn.click( | |
fn=add_category, | |
inputs=[categories, category_input], | |
outputs=[categories, category_input, category_status] | |
) | |
reset_btn.click( | |
fn=reset_categories, | |
outputs=[categories, category_status] | |
) | |
uploaded_file.change( | |
fn=load_data, | |
inputs=uploaded_file, | |
outputs=output | |
) | |
template_btn.click( | |
fn=use_template, | |
outputs=[categories, category_status] | |
) | |
classify_btn.click( | |
fn=classify_comments, | |
inputs=categories, | |
outputs=output | |
) | |
visualize_btn.click( | |
fn=visualize_output, | |
outputs=[sentiment_pie, category_pie, stacked_bar, kpi_visualization, sentiment_by_segment_pie] | |
) | |
nps.launch(share=True) |