Spaces:

ZennyKenny
/

comment_classification

Running on Zero

File size: 6,713 Bytes

cbcd78b
1ea874c
be195b7
 
8045f6a
4530b74
be195b7
 
 
 
cbcd78b
be195b7
 
 
1ea874c
be195b7
 
 
 
 
 
1ea874c
be195b7
ec6871c
67df231
962079a
3fac692
67df231
3b1d1cd
 
 
 
cbcd78b
67df231
0a5100e
67df231
 
9d03f28
3fac692
67df231
3fac692
3b1d1cd
3fac692
67df231
3b1d1cd
 
 
 
 
 
33c57aa
cbcd78b
8045f6a
ec6871c
8045f6a
3b1d1cd
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
a6a991c
8045f6a
 
 
 
 
3b1d1cd
8045f6a
 
 
 
 
 
 
3b1d1cd
8045f6a
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
33c57aa
8045f6a
 
 
 
 
 
 
 
33c57aa
 
8045f6a
 
33c57aa
 
 
 
3b1d1cd
33c57aa
 
 
8045f6a
cbcd78b
 
67df231
9d03f28
67df231
3b1d1cd
67df231
51f4bc0
511d4e8
ac13c62
 
511d4e8
9d32e7a
 
 
ac13c62
c00c984
80cfb3b
be195b7
 
 
67df231
1ea874c
 
be195b7
8045f6a
 
 
 
 
33c57aa
8045f6a
3b1d1cd
 
 
 
cbcd78b
3b1d1cd

import gradio as gr
from transformers import pipeline
import pandas as pd
import spaces
import plotly.express as px

# Load dataset
from datasets import load_dataset
ds = load_dataset('ZennyKenny/demo_customer_nps')
df = pd.DataFrame(ds['train'])

# Initialize model pipeline
from huggingface_hub import login
import os

# Login using the API key stored as an environment variable
hf_api_key = os.getenv("API_KEY")
login(token=hf_api_key)

classifier = pipeline("text-classification", model="distilbert/distilbert-base-uncased-finetuned-sst-2-english")
generator = pipeline("text2text-generation", model="google/flan-t5-base")

# Function to classify customer comments
@spaces.GPU
def classify_comments(categories):
    global df  # Ensure we're modifying the global DataFrame
    sentiments = []
    assigned_categories = []

    # Debugging output
    print("Classifying comments...")

    for comment in df['customer_comment']:
        # Classify sentiment
        sentiment = classifier(comment)[0]['label']
        # Generate category
        category_str = ', '.join(categories)
        prompt = f"What category best describes this comment? '{comment}' Please answer using only the name of the category: {category_str}."
        category = generator(prompt, max_length=30)[0]['generated_text']
        assigned_categories.append(category)
        sentiments.append(sentiment)
    
    df['comment_sentiment'] = sentiments
    df['comment_category'] = assigned_categories
    
    # Debugging output
    print(df.head())  
    print(df['comment_sentiment'].value_counts())  
    print(df['comment_category'].value_counts())  

    return df[['customer_id', 'customer_comment', 'comment_sentiment', 'comment_category', 'customer_nps', 'customer_segment']].to_html(index=False)

# Function to generate visualizations
@spaces.GPU
def visualize_output():
    global df

    # Check if DataFrame is empty
    if df.empty:
        return None, None, None, "Error: DataFrame is empty. Please check the data or classification step.", None

    # Check for required columns
    required_columns = ['comment_sentiment', 'comment_category', 'customer_nps', 'customer_segment']
    if not all(col in df.columns for col in required_columns):
        return None, None, None, "Error: Required columns are missing. Please classify comments first.", None

    # Explicitly convert data types
    df['comment_sentiment'] = df['comment_sentiment'].astype(str)
    df['comment_category'] = df['comment_category'].astype(str)
    df['customer_nps'] = pd.to_numeric(df['customer_nps'], errors='coerce')
    df['customer_segment'] = df['customer_segment'].astype(str)

    # Drop NaN values
    df = df.dropna(subset=['comment_sentiment', 'comment_category', 'customer_nps', 'customer_segment'])

    # Debugging output
    print(df.head())  
    print(df['comment_sentiment'].value_counts())  
    print(df['comment_category'].value_counts())  

    # Pie Chart of Sentiment
    sentiment_counts = df['comment_sentiment'].value_counts()
    sentiment_pie = px.pie(
        values=sentiment_counts.values,
        names=sentiment_counts.index,
        title="Sentiment Distribution"
    )

    # Pie Chart of Comment Categories
    category_counts = df['comment_category'].value_counts()
    category_pie = px.pie(
        values=category_counts.values,
        names=category_counts.index,
        title="Comment Category Distribution"
    )

    # Stacked Bar Chart of Sentiment by Category
    sentiment_by_category = df.groupby(['comment_category', 'comment_sentiment']).size().unstack()
    stacked_bar = px.bar(
        sentiment_by_category,
        barmode='stack',
        title="Sentiment by Comment Category",
        labels={'value': 'Count', 'comment_category': 'Category', 'comment_sentiment': 'Sentiment'}
    )

    # KPI Visualizations
    avg_nps = df['customer_nps'].mean()
    avg_nps_positive = df[df['comment_sentiment'] == 'POSITIVE']['customer_nps'].mean()
    avg_nps_negative = df[df['comment_sentiment'] == 'NEGATIVE']['customer_nps'].mean()
    avg_nps_by_category = df.groupby('comment_category')['customer_nps'].mean().reset_index()
    avg_nps_by_segment = df.groupby('customer_segment')['customer_nps'].mean().reset_index()

    kpi_visualization = f"""
    **Average NPS Scores:**
    - Overall: {avg_nps:.2f}
    - Positive Sentiment: {avg_nps_positive:.2f}
    - Negative Sentiment: {avg_nps_negative:.2f}
    **Average NPS by Category:**
    {avg_nps_by_category.to_markdown(index=False)}
    **Average NPS by Segment:**
    {avg_nps_by_segment.to_markdown(index=False)}
    """

    # Pie Chart of Sentiment by Customer Segment
    sentiment_by_segment = df.groupby(['customer_segment', 'comment_sentiment']).size().unstack()
    sentiment_by_segment_pie = px.pie(
        sentiment_by_segment,
        title="Sentiment by Customer Segment"
    )

    return sentiment_pie, category_pie, stacked_bar, kpi_visualization, sentiment_by_segment_pie

# Gradio Interface
with gr.Blocks() as nps:
    categories = gr.State([])

    def add_category(categories, new_category):
        if new_category.strip() != "" and len(categories) < 5:
            categories.append(new_category.strip())
        return categories, "", f"**Categories:**\n" + "\n".join([f"- {cat}" for cat in categories])

    def reset_categories():
        return [], "**Categories:**\n- None"

    with gr.Row():
        category_input = gr.Textbox(label="New Category", placeholder="Enter category name")
        add_category_btn = gr.Button("Add Category")
        reset_btn = gr.Button("Reset Categories")
        category_status = gr.Markdown("**Categories:**\n- None")

    uploaded_file = gr.File(label="Upload CSV", type="filepath")
    template_btn = gr.Button("Use Template")
    gr.Markdown("# NPS Comment Categorization")

    classify_btn = gr.Button("Classify Comments")
    output = gr.HTML()

    visualize_btn = gr.Button("Visualize Output")
    sentiment_pie = gr.Plot(label="Sentiment Distribution")
    category_pie = gr.Plot(label="Comment Category Distribution")
    stacked_bar = gr.Plot(label="Sentiment by Comment Category")
    kpi_visualization = gr.Markdown()
    sentiment_by_segment_pie = gr.Plot(label="Sentiment by Customer Segment")

    add_category_btn.click(fn=add_category, inputs=[categories, category_input], outputs=[categories, category_input, category_status])
    reset_btn.click(fn=reset_categories, outputs=[categories, category_status])
    classify_btn.click(fn=classify_comments, inputs=categories, outputs=output)
    visualize_btn.click(fn=visualize_output, outputs=[sentiment_pie, category_pie, stacked_bar, kpi_visualization, sentiment_by_segment_pie])

nps.launch(share=True)