Spaces:

ZennyKenny
/

comment_classification

Running on Zero

App Files Files Community

comment_classification / app.py

ZennyKenny

revert

8d2cc8a verified 10 days ago

raw

history blame

8 kB

	import gradio as gr
	from transformers import pipeline
	import pandas as pd
	import spaces
	import plotly.express as px

	# Load dataset
	from datasets import load_dataset
	ds = load_dataset('ZennyKenny/demo_customer_nps')
	df = pd.DataFrame(ds['train'])

	# Initialize model pipeline
	from huggingface_hub import login
	import os

	# Login using the API key stored as an environment variable
	hf_api_key = os.getenv("API_KEY")
	login(token=hf_api_key)

	classifier = pipeline("text-classification", model="distilbert/distilbert-base-uncased-finetuned-sst-2-english")
	generator = pipeline("text2text-generation", model="google/flan-t5-base")

	# Function to classify customer comments
	@spaces.GPU
	def classify_comments(categories):
	global df # Ensure we're modifying the global DataFrame
	sentiments = []
	assigned_categories = []
	for comment in df['customer_comment']:
	# Classify sentiment
	sentiment = classifier(comment)[0]['label']
	# Generate category
	category_str = ', '.join(categories)
	prompt = f"What category best describes this comment? '{comment}' Please answer using only the name of the category: {category_str}."
	category = generator(prompt, max_length=30)[0]['generated_text']
	assigned_categories.append(category)
	sentiments.append(sentiment)
	df['comment_sentiment'] = sentiments
	df['comment_category'] = assigned_categories
	return df[['customer_id', 'customer_comment', 'comment_sentiment', 'comment_category', 'customer_nps', 'customer_segment']].to_html(index=False)

	# Function to generate visualizations
	def visualize_output():
	# Ensure the required columns exist
	if 'comment_sentiment' not in df.columns or 'comment_category' not in df.columns:
	# Return 5 values (None for plots and an error message for markdown)
	return None, None, None, "Error: Please classify comments before visualizing.", None

	# Pie Chart of Sentiment
	sentiment_counts = df['comment_sentiment'].value_counts()
	sentiment_pie = px.pie(
	values=sentiment_counts.values,
	names=sentiment_counts.index,
	title="Sentiment Distribution",
	hover_data=[sentiment_counts.values],
	labels={'value': 'Count', 'names': 'Sentiment'}
	)
	sentiment_pie.update_traces(textinfo='percent+label', hovertemplate="Sentiment: %{label}<br>Count: %{value}<br>Percentage: %{percent}")

	# Pie Chart of Comment Categories
	category_counts = df['comment_category'].value_counts()
	category_pie = px.pie(
	values=category_counts.values,
	names=category_counts.index,
	title="Comment Category Distribution",
	hover_data=[category_counts.values],
	labels={'value': 'Count', 'names': 'Category'}
	)
	category_pie.update_traces(textinfo='percent+label', hovertemplate="Category: %{label}<br>Count: %{value}<br>Percentage: %{percent}")

	# Stacked Bar Chart of Sentiment by Category
	sentiment_by_category = df.groupby(['comment_category', 'comment_sentiment']).size().unstack()
	stacked_bar = px.bar(
	sentiment_by_category,
	barmode='stack',
	title="Sentiment by Comment Category",
	labels={'value': 'Count', 'comment_category': 'Category', 'comment_sentiment': 'Sentiment'}
	)

	# KPI Visualizations
	avg_nps = df['customer_nps'].mean()
	avg_nps_positive = df[df['comment_sentiment'] == 'POSITIVE']['customer_nps'].mean()
	avg_nps_negative = df[df['comment_sentiment'] == 'NEGATIVE']['customer_nps'].mean()
	avg_nps_by_category = df.groupby('comment_category')['customer_nps'].mean().reset_index()
	avg_nps_by_segment = df.groupby('customer_segment')['customer_nps'].mean().reset_index()

	kpi_visualization = f"""
	Average NPS Scores:
	- Overall: {avg_nps:.2f}
	- Positive Sentiment: {avg_nps_positive:.2f}
	- Negative Sentiment: {avg_nps_negative:.2f}
	Average NPS by Category:
	{avg_nps_by_category.to_markdown(index=False)}
	Average NPS by Segment:
	{avg_nps_by_segment.to_markdown(index=False)}
	"""

	# Pie Chart of Sentiment by Customer Segment
	sentiment_by_segment = df.groupby(['customer_segment', 'comment_sentiment']).size().unstack()
	sentiment_by_segment_pie = px.pie(
	sentiment_by_segment,
	title="Sentiment by Customer Segment",
	labels={'value': 'Count', 'customer_segment': 'Segment', 'comment_sentiment': 'Sentiment'}
	)

	return sentiment_pie, category_pie, stacked_bar, kpi_visualization, sentiment_by_segment_pie

	# Gradio Interface
	with gr.Blocks() as nps:
	# State to store categories
	categories = gr.State([])

	# Function to add a category
	def add_category(categories, new_category):
	if new_category.strip() != "" and len(categories) < 5: # Limit to 5 categories
	categories.append(new_category.strip())
	return categories, "", f"Categories:\n" + "\n".join([f"- {cat}" for cat in categories])

	# Function to reset categories
	def reset_categories():
	return [], "Categories:\n- None"

	# UI for adding categories
	with gr.Row():
	category_input = gr.Textbox(label="New Category", placeholder="Enter category name")
	add_category_btn = gr.Button("Add Category")
	reset_btn = gr.Button("Reset Categories")
	category_status = gr.Markdown("Categories:\n- None")

	# File upload and template buttons
	uploaded_file = gr.File(label="Upload CSV", type="filepath")
	template_btn = gr.Button("Use Template")
	gr.Markdown("# NPS Comment Categorization")

	# Classify button
	classify_btn = gr.Button("Classify Comments")
	output = gr.HTML()

	# Visualize button
	visualize_btn = gr.Button("Visualize Output")
	sentiment_pie = gr.Plot(label="Sentiment Distribution")
	category_pie = gr.Plot(label="Comment Category Distribution")
	stacked_bar = gr.Plot(label="Sentiment by Comment Category")
	kpi_visualization = gr.Markdown()
	sentiment_by_segment_pie = gr.Plot(label="Sentiment by Customer Segment")

	# Function to load data from uploaded CSV
	def load_data(file):
	global df # Ensure we're modifying the global DataFrame
	if file is not None:
	file.seek(0) # Reset file pointer
	if file.name.endswith('.csv'):
	custom_df = pd.read_csv(file, encoding='utf-8')
	else:
	return "Error: Uploaded file is not a CSV."
	# Check for required columns
	required_columns = ['customer_id', 'customer_comment', 'customer_nps', 'customer_segment']
	if not all(col in custom_df.columns for col in required_columns):
	return f"Error: Uploaded CSV must contain the following columns: {', '.join(required_columns)}"
	df = custom_df
	return "Custom CSV loaded successfully!"
	else:
	return "No file uploaded."

	# Function to use template categories
	def use_template():
	template_categories = ["Product Experience", "Customer Support", "Price of Service", "Other"]
	return template_categories, f"Categories:\n" + "\n".join([f"- {cat}" for cat in template_categories])

	# Event handlers
	add_category_btn.click(
	fn=add_category,
	inputs=[categories, category_input],
	outputs=[categories, category_input, category_status]
	)
	reset_btn.click(
	fn=reset_categories,
	outputs=[categories, category_status]
	)
	uploaded_file.change(
	fn=load_data,
	inputs=uploaded_file,
	outputs=output
	)
	template_btn.click(
	fn=use_template,
	outputs=[categories, category_status]
	)
	classify_btn.click(
	fn=classify_comments,
	inputs=categories,
	outputs=output
	)
	visualize_btn.click(
	fn=visualize_output,
	outputs=[sentiment_pie, category_pie, stacked_bar, kpi_visualization, sentiment_by_segment_pie]
	)

	nps.launch(share=True)