Spaces:
Running
on
Zero
Running
on
Zero
File size: 6,713 Bytes
cbcd78b 1ea874c be195b7 8045f6a 4530b74 be195b7 cbcd78b be195b7 1ea874c be195b7 1ea874c be195b7 ec6871c 67df231 962079a 3fac692 67df231 3b1d1cd cbcd78b 67df231 0a5100e 67df231 9d03f28 3fac692 67df231 3fac692 3b1d1cd 3fac692 67df231 3b1d1cd 33c57aa cbcd78b 8045f6a ec6871c 8045f6a 3b1d1cd a6a991c 8045f6a 3b1d1cd 8045f6a 3b1d1cd 8045f6a 33c57aa 8045f6a 33c57aa 8045f6a 33c57aa 3b1d1cd 33c57aa 8045f6a cbcd78b 67df231 9d03f28 67df231 3b1d1cd 67df231 51f4bc0 511d4e8 ac13c62 511d4e8 9d32e7a ac13c62 c00c984 80cfb3b be195b7 67df231 1ea874c be195b7 8045f6a 33c57aa 8045f6a 3b1d1cd cbcd78b 3b1d1cd |
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163 164 165 166 167 168 169 170 171 |
import gradio as gr
from transformers import pipeline
import pandas as pd
import spaces
import plotly.express as px
# Load dataset
from datasets import load_dataset
ds = load_dataset('ZennyKenny/demo_customer_nps')
df = pd.DataFrame(ds['train'])
# Initialize model pipeline
from huggingface_hub import login
import os
# Login using the API key stored as an environment variable
hf_api_key = os.getenv("API_KEY")
login(token=hf_api_key)
classifier = pipeline("text-classification", model="distilbert/distilbert-base-uncased-finetuned-sst-2-english")
generator = pipeline("text2text-generation", model="google/flan-t5-base")
# Function to classify customer comments
@spaces.GPU
def classify_comments(categories):
global df # Ensure we're modifying the global DataFrame
sentiments = []
assigned_categories = []
# Debugging output
print("Classifying comments...")
for comment in df['customer_comment']:
# Classify sentiment
sentiment = classifier(comment)[0]['label']
# Generate category
category_str = ', '.join(categories)
prompt = f"What category best describes this comment? '{comment}' Please answer using only the name of the category: {category_str}."
category = generator(prompt, max_length=30)[0]['generated_text']
assigned_categories.append(category)
sentiments.append(sentiment)
df['comment_sentiment'] = sentiments
df['comment_category'] = assigned_categories
# Debugging output
print(df.head())
print(df['comment_sentiment'].value_counts())
print(df['comment_category'].value_counts())
return df[['customer_id', 'customer_comment', 'comment_sentiment', 'comment_category', 'customer_nps', 'customer_segment']].to_html(index=False)
# Function to generate visualizations
@spaces.GPU
def visualize_output():
global df
# Check if DataFrame is empty
if df.empty:
return None, None, None, "Error: DataFrame is empty. Please check the data or classification step.", None
# Check for required columns
required_columns = ['comment_sentiment', 'comment_category', 'customer_nps', 'customer_segment']
if not all(col in df.columns for col in required_columns):
return None, None, None, "Error: Required columns are missing. Please classify comments first.", None
# Explicitly convert data types
df['comment_sentiment'] = df['comment_sentiment'].astype(str)
df['comment_category'] = df['comment_category'].astype(str)
df['customer_nps'] = pd.to_numeric(df['customer_nps'], errors='coerce')
df['customer_segment'] = df['customer_segment'].astype(str)
# Drop NaN values
df = df.dropna(subset=['comment_sentiment', 'comment_category', 'customer_nps', 'customer_segment'])
# Debugging output
print(df.head())
print(df['comment_sentiment'].value_counts())
print(df['comment_category'].value_counts())
# Pie Chart of Sentiment
sentiment_counts = df['comment_sentiment'].value_counts()
sentiment_pie = px.pie(
values=sentiment_counts.values,
names=sentiment_counts.index,
title="Sentiment Distribution"
)
# Pie Chart of Comment Categories
category_counts = df['comment_category'].value_counts()
category_pie = px.pie(
values=category_counts.values,
names=category_counts.index,
title="Comment Category Distribution"
)
# Stacked Bar Chart of Sentiment by Category
sentiment_by_category = df.groupby(['comment_category', 'comment_sentiment']).size().unstack()
stacked_bar = px.bar(
sentiment_by_category,
barmode='stack',
title="Sentiment by Comment Category",
labels={'value': 'Count', 'comment_category': 'Category', 'comment_sentiment': 'Sentiment'}
)
# KPI Visualizations
avg_nps = df['customer_nps'].mean()
avg_nps_positive = df[df['comment_sentiment'] == 'POSITIVE']['customer_nps'].mean()
avg_nps_negative = df[df['comment_sentiment'] == 'NEGATIVE']['customer_nps'].mean()
avg_nps_by_category = df.groupby('comment_category')['customer_nps'].mean().reset_index()
avg_nps_by_segment = df.groupby('customer_segment')['customer_nps'].mean().reset_index()
kpi_visualization = f"""
**Average NPS Scores:**
- Overall: {avg_nps:.2f}
- Positive Sentiment: {avg_nps_positive:.2f}
- Negative Sentiment: {avg_nps_negative:.2f}
**Average NPS by Category:**
{avg_nps_by_category.to_markdown(index=False)}
**Average NPS by Segment:**
{avg_nps_by_segment.to_markdown(index=False)}
"""
# Pie Chart of Sentiment by Customer Segment
sentiment_by_segment = df.groupby(['customer_segment', 'comment_sentiment']).size().unstack()
sentiment_by_segment_pie = px.pie(
sentiment_by_segment,
title="Sentiment by Customer Segment"
)
return sentiment_pie, category_pie, stacked_bar, kpi_visualization, sentiment_by_segment_pie
# Gradio Interface
with gr.Blocks() as nps:
categories = gr.State([])
def add_category(categories, new_category):
if new_category.strip() != "" and len(categories) < 5:
categories.append(new_category.strip())
return categories, "", f"**Categories:**\n" + "\n".join([f"- {cat}" for cat in categories])
def reset_categories():
return [], "**Categories:**\n- None"
with gr.Row():
category_input = gr.Textbox(label="New Category", placeholder="Enter category name")
add_category_btn = gr.Button("Add Category")
reset_btn = gr.Button("Reset Categories")
category_status = gr.Markdown("**Categories:**\n- None")
uploaded_file = gr.File(label="Upload CSV", type="filepath")
template_btn = gr.Button("Use Template")
gr.Markdown("# NPS Comment Categorization")
classify_btn = gr.Button("Classify Comments")
output = gr.HTML()
visualize_btn = gr.Button("Visualize Output")
sentiment_pie = gr.Plot(label="Sentiment Distribution")
category_pie = gr.Plot(label="Comment Category Distribution")
stacked_bar = gr.Plot(label="Sentiment by Comment Category")
kpi_visualization = gr.Markdown()
sentiment_by_segment_pie = gr.Plot(label="Sentiment by Customer Segment")
add_category_btn.click(fn=add_category, inputs=[categories, category_input], outputs=[categories, category_input, category_status])
reset_btn.click(fn=reset_categories, outputs=[categories, category_status])
classify_btn.click(fn=classify_comments, inputs=categories, outputs=output)
visualize_btn.click(fn=visualize_output, outputs=[sentiment_pie, category_pie, stacked_bar, kpi_visualization, sentiment_by_segment_pie])
nps.launch(share=True)
|