Spaces:

ZennyKenny
/

comment_classification

Running on Zero

App Files Files Community

comment_classification / app.py

ZennyKenny

Update app.py

3b1d1cd verified 10 days ago

raw

history blame

6.69 kB

	import gradio as gr
	from transformers import pipeline
	import pandas as pd
	import spaces
	import plotly.express as px

	# Load dataset
	from datasets import load_dataset
	ds = load_dataset('ZennyKenny/demo_customer_nps')
	df = pd.DataFrame(ds['train'])

	# Initialize model pipeline
	from huggingface_hub import login
	import os

	# Login using the API key stored as an environment variable
	hf_api_key = os.getenv("API_KEY")
	login(token=hf_api_key)

	classifier = pipeline("text-classification", model="distilbert/distilbert-base-uncased-finetuned-sst-2-english")
	generator = pipeline("text2text-generation", model="google/flan-t5-base")

	# Function to classify customer comments
	def classify_comments(categories):
	global df # Ensure we're modifying the global DataFrame
	sentiments = []
	assigned_categories = []

	# Debugging output
	print("Classifying comments...")

	for comment in df['customer_comment']:
	# Classify sentiment
	sentiment = classifier(comment)[0]['label']
	# Generate category
	category_str = ', '.join(categories)
	prompt = f"What category best describes this comment? '{comment}' Please answer using only the name of the category: {category_str}."
	category = generator(prompt, max_length=30)[0]['generated_text']
	assigned_categories.append(category)
	sentiments.append(sentiment)

	df['comment_sentiment'] = sentiments
	df['comment_category'] = assigned_categories

	# Debugging output
	print(df.head())
	print(df['comment_sentiment'].value_counts())
	print(df['comment_category'].value_counts())

	return df[['customer_id', 'customer_comment', 'comment_sentiment', 'comment_category', 'customer_nps', 'customer_segment']].to_html(index=False)

	# Function to generate visualizations
	def visualize_output():
	global df

	# Check if DataFrame is empty
	if df.empty:
	return None, None, None, "Error: DataFrame is empty. Please check the data or classification step.", None

	# Check for required columns
	required_columns = ['comment_sentiment', 'comment_category', 'customer_nps', 'customer_segment']
	if not all(col in df.columns for col in required_columns):
	return None, None, None, "Error: Required columns are missing. Please classify comments first.", None

	# Explicitly convert data types
	df['comment_sentiment'] = df['comment_sentiment'].astype(str)
	df['comment_category'] = df['comment_category'].astype(str)
	df['customer_nps'] = pd.to_numeric(df['customer_nps'], errors='coerce')
	df['customer_segment'] = df['customer_segment'].astype(str)

	# Drop NaN values
	df = df.dropna(subset=['comment_sentiment', 'comment_category', 'customer_nps', 'customer_segment'])

	# Debugging output
	print(df.head())
	print(df['comment_sentiment'].value_counts())
	print(df['comment_category'].value_counts())

	# Pie Chart of Sentiment
	sentiment_counts = df['comment_sentiment'].value_counts()
	sentiment_pie = px.pie(
	values=sentiment_counts.values,
	names=sentiment_counts.index,
	title="Sentiment Distribution"
	)

	# Pie Chart of Comment Categories
	category_counts = df['comment_category'].value_counts()
	category_pie = px.pie(
	values=category_counts.values,
	names=category_counts.index,
	title="Comment Category Distribution"
	)

	# Stacked Bar Chart of Sentiment by Category
	sentiment_by_category = df.groupby(['comment_category', 'comment_sentiment']).size().unstack()
	stacked_bar = px.bar(
	sentiment_by_category,
	barmode='stack',
	title="Sentiment by Comment Category",
	labels={'value': 'Count', 'comment_category': 'Category', 'comment_sentiment': 'Sentiment'}
	)

	# KPI Visualizations
	avg_nps = df['customer_nps'].mean()
	avg_nps_positive = df[df['comment_sentiment'] == 'POSITIVE']['customer_nps'].mean()
	avg_nps_negative = df[df['comment_sentiment'] == 'NEGATIVE']['customer_nps'].mean()
	avg_nps_by_category = df.groupby('comment_category')['customer_nps'].mean().reset_index()
	avg_nps_by_segment = df.groupby('customer_segment')['customer_nps'].mean().reset_index()

	kpi_visualization = f"""
	Average NPS Scores:
	- Overall: {avg_nps:.2f}
	- Positive Sentiment: {avg_nps_positive:.2f}
	- Negative Sentiment: {avg_nps_negative:.2f}
	Average NPS by Category:
	{avg_nps_by_category.to_markdown(index=False)}
	Average NPS by Segment:
	{avg_nps_by_segment.to_markdown(index=False)}
	"""

	# Pie Chart of Sentiment by Customer Segment
	sentiment_by_segment = df.groupby(['customer_segment', 'comment_sentiment']).size().unstack()
	sentiment_by_segment_pie = px.pie(
	sentiment_by_segment,
	title="Sentiment by Customer Segment"
	)

	return sentiment_pie, category_pie, stacked_bar, kpi_visualization, sentiment_by_segment_pie

	# Gradio Interface
	with gr.Blocks() as nps:
	categories = gr.State([])

	def add_category(categories, new_category):
	if new_category.strip() != "" and len(categories) < 5:
	categories.append(new_category.strip())
	return categories, "", f"Categories:\n" + "\n".join([f"- {cat}" for cat in categories])

	def reset_categories():
	return [], "Categories:\n- None"

	with gr.Row():
	category_input = gr.Textbox(label="New Category", placeholder="Enter category name")
	add_category_btn = gr.Button("Add Category")
	reset_btn = gr.Button("Reset Categories")
	category_status = gr.Markdown("Categories:\n- None")

	uploaded_file = gr.File(label="Upload CSV", type="filepath")
	template_btn = gr.Button("Use Template")
	gr.Markdown("# NPS Comment Categorization")

	classify_btn = gr.Button("Classify Comments")
	output = gr.HTML()

	visualize_btn = gr.Button("Visualize Output")
	sentiment_pie = gr.Plot(label="Sentiment Distribution")
	category_pie = gr.Plot(label="Comment Category Distribution")
	stacked_bar = gr.Plot(label="Sentiment by Comment Category")
	kpi_visualization = gr.Markdown()
	sentiment_by_segment_pie = gr.Plot(label="Sentiment by Customer Segment")

	add_category_btn.click(fn=add_category, inputs=[categories, category_input], outputs=[categories, category_input, category_status])
	reset_btn.click(fn=reset_categories, outputs=[categories, category_status])
	classify_btn.click(fn=classify_comments, inputs=categories, outputs=output)
	visualize_btn.click(fn=visualize_output, outputs=[sentiment_pie, category_pie, stacked_bar, kpi_visualization, sentiment_by_segment_pie])

	nps.launch(share=True)