File size: 6,843 Bytes
cbcd78b
1ea874c
be195b7
 
8045f6a
4530b74
be195b7
 
 
 
cbcd78b
be195b7
 
 
1ea874c
be195b7
 
 
 
 
 
1ea874c
be195b7
 
67df231
3fac692
67df231
cbcd78b
67df231
0a5100e
67df231
 
9d03f28
3fac692
67df231
3fac692
 
67df231
3fac692
cbcd78b
8045f6a
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
cbcd78b
 
67df231
 
9d03f28
67df231
 
 
 
51f4bc0
511d4e8
ac13c62
 
 
511d4e8
67df231
9d32e7a
 
 
ac13c62
c00c984
80cfb3b
67df231
be195b7
 
 
67df231
 
1ea874c
 
be195b7
8045f6a
 
 
 
 
 
 
67df231
be195b7
 
 
 
 
 
 
8045f6a
 
 
 
be195b7
 
 
 
 
 
67df231
be195b7
67df231
51f4bc0
67df231
 
 
 
 
c00c984
67df231
ac13c62
 
 
67df231
 
 
 
 
 
 
 
 
 
 
 
 
 
 
8045f6a
 
 
 
cbcd78b
511d4e8
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
import gradio as gr
from transformers import pipeline
import pandas as pd
import spaces
import plotly.express as px

# Load dataset
from datasets import load_dataset
ds = load_dataset('ZennyKenny/demo_customer_nps')
df = pd.DataFrame(ds['train'])

# Initialize model pipeline
from huggingface_hub import login
import os

# Login using the API key stored as an environment variable
hf_api_key = os.getenv("API_KEY")
login(token=hf_api_key)

classifier = pipeline("text-classification", model="distilbert/distilbert-base-uncased-finetuned-sst-2-english")
generator = pipeline("text2text-generation", model="google/flan-t5-base")

# Function to classify customer comments
@spaces.GPU
def classify_comments(categories):
    sentiments = []
    assigned_categories = []
    for comment in df['customer_comment']:
        # Classify sentiment
        sentiment = classifier(comment)[0]['label']
        # Generate category
        category_str = ', '.join(categories)
        prompt = f"What category best describes this comment? '{comment}' Please answer using only the name of the category: {category_str}."
        category = generator(prompt, max_length=30)[0]['generated_text']
        assigned_categories.append(category)
        sentiments.append(sentiment)
    df['comment_sentiment'] = sentiments
    df['comment_category'] = assigned_categories
    return df[['customer_comment', 'comment_sentiment', 'comment_category']].to_html(index=False)

# Function to generate visualizations
def visualize_output():
    # Pie Chart of Sentiment
    sentiment_counts = df['comment_sentiment'].value_counts()
    sentiment_pie = px.pie(
        values=sentiment_counts.values,
        names=sentiment_counts.index,
        title="Sentiment Distribution",
        hover_data=[sentiment_counts.values],
        labels={'value': 'Count', 'names': 'Sentiment'}
    )
    sentiment_pie.update_traces(textinfo='percent+label', hovertemplate="Sentiment: %{label}<br>Count: %{value}<br>Percentage: %{percent}")

    # Pie Chart of Comment Categories
    category_counts = df['comment_category'].value_counts()
    category_pie = px.pie(
        values=category_counts.values,
        names=category_counts.index,
        title="Comment Category Distribution",
        hover_data=[category_counts.values],
        labels={'value': 'Count', 'names': 'Category'}
    )
    category_pie.update_traces(textinfo='percent+label', hovertemplate="Category: %{label}<br>Count: %{value}<br>Percentage: %{percent}")

    # Stacked Bar Chart of Sentiment by Category
    sentiment_by_category = df.groupby(['comment_category', 'comment_sentiment']).size().unstack()
    stacked_bar = px.bar(
        sentiment_by_category,
        barmode='stack',
        title="Sentiment by Comment Category",
        labels={'value': 'Count', 'comment_category': 'Category', 'comment_sentiment': 'Sentiment'}
    )

    # KPI Visualizations
    avg_nps = df['customer_nps'].mean()
    avg_nps_positive = df[df['comment_sentiment'] == 'POSITIVE']['customer_nps'].mean()
    avg_nps_negative = df[df['comment_sentiment'] == 'NEGATIVE']['customer_nps'].mean()
    avg_nps_by_category = df.groupby('comment_category')['customer_nps'].mean().reset_index()

    kpi_visualization = f"""
    **Average NPS Scores:**
    - Overall: {avg_nps:.2f}
    - Positive Sentiment: {avg_nps_positive:.2f}
    - Negative Sentiment: {avg_nps_negative:.2f}
    **Average NPS by Category:**
    {avg_nps_by_category.to_markdown(index=False)}
    """

    return sentiment_pie, category_pie, stacked_bar, kpi_visualization

# Gradio Interface
with gr.Blocks() as nps:
    # State to store categories
    categories = gr.State([])

    # Function to add a category
    def add_category(categories, new_category):
        if new_category.strip() != "" and len(categories) < 5:  # Limit to 5 categories
            categories.append(new_category.strip())
        return categories, "", f"**Categories:**\n" + "\n".join([f"- {cat}" for cat in categories])

    # Function to reset categories
    def reset_categories():
        return [], "**Categories:**\n- None"

    # UI for adding categories
    with gr.Row():
        category_input = gr.Textbox(label="New Category", placeholder="Enter category name")
        add_category_btn = gr.Button("Add Category")
        reset_btn = gr.Button("Reset Categories")
        category_status = gr.Markdown("**Categories:**\n- None")

    # File upload and template buttons
    uploaded_file = gr.File(label="Upload CSV", type="filepath")
    template_btn = gr.Button("Use Template")
    gr.Markdown("# NPS Comment Categorization")

    # Classify button
    classify_btn = gr.Button("Classify Comments")
    output = gr.HTML()

    # Visualize button
    visualize_btn = gr.Button("Visualize Output")
    sentiment_pie = gr.Plot(label="Sentiment Distribution")
    category_pie = gr.Plot(label="Comment Category Distribution")
    stacked_bar = gr.Plot(label="Sentiment by Comment Category")
    kpi_visualization = gr.Markdown()

    # Function to load data from uploaded CSV
    def load_data(file):
        if file is not None:
            file.seek(0)  # Reset file pointer
            if file.name.endswith('.csv'):
                custom_df = pd.read_csv(file, encoding='utf-8')
            else:
                return "Error: Uploaded file is not a CSV."
            # Check for required columns
            required_columns = ['customer_comment', 'customer_nps']
            if not all(col in custom_df.columns for col in required_columns):
                return f"Error: Uploaded CSV must contain the following columns: {', '.join(required_columns)}"
            global df
            df = custom_df
            return "Custom CSV loaded successfully!"
        else:
            return "No file uploaded."

    # Function to use template categories
    def use_template():
        template_categories = ["Product Experience", "Customer Support", "Price of Service", "Other"]
        return template_categories, f"**Categories:**\n" + "\n".join([f"- {cat}" for cat in template_categories])

    # Event handlers
    add_category_btn.click(
        fn=add_category,
        inputs=[categories, category_input],
        outputs=[categories, category_input, category_status]
    )
    reset_btn.click(
        fn=reset_categories,
        outputs=[categories, category_status]
    )
    uploaded_file.change(
        fn=load_data,
        inputs=uploaded_file,
        outputs=output
    )
    template_btn.click(
        fn=use_template,
        outputs=[categories, category_status]
    )
    classify_btn.click(
        fn=classify_comments,
        inputs=categories,
        outputs=output
    )
    visualize_btn.click(
        fn=visualize_output,
        outputs=[sentiment_pie, category_pie, stacked_bar, kpi_visualization]
    )

nps.launch(share=True)