File size: 3,104 Bytes
cbcd78b
 
 
ee68faf
cbcd78b
a32f02f
4530b74
52d8051
 
cbcd78b
a32f02f
4530b74
 
 
 
 
 
 
3fac692
3ae1eb6
cbcd78b
 
0180738
 
cbcd78b
3fac692
 
cbcd78b
 
0a5100e
9d03f28
 
 
3fac692
 
 
 
 
 
cbcd78b
 
 
9d03f28
 
 
 
 
 
 
 
7d44e84
ea5a489
cbcd78b
 
 
 
ea5a489
 
63651ec
848f580
 
1f0cb35
7d44e84
848f580
 
 
ea5a489
 
 
 
 
 
 
 
 
 
9d03f28
cbcd78b
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
import gradio as gr
from transformers import pipeline
import pandas as pd
import spaces

# Load dataset
from datasets import load_dataset
ds = load_dataset('ZennyKenny/demo_customer_nps')
df = pd.DataFrame(ds['train'])

# Initialize model pipeline
from huggingface_hub import login
import os

# Login using the API key stored as an environment variable
hf_api_key = os.getenv("API_KEY")
login(token=hf_api_key)

classifier = pipeline("text-classification", model="distilbert/distilbert-base-uncased-finetuned-sst-2-english")
generator = pipeline("text2text-generation", model="google/flan-t5-base")

# Function to classify customer comments
# https://huggingface.co/docs/hub/spaces-zerogpu
@spaces.GPU
def classify_comments():
    sentiments = []
    categories = []
    results = []
    for comment in df['customer_comment']:
        sentiment = classifier(comment)[0]['label']
        category_list = [box.value for box in category_boxes if box.value.strip() != '']
        category_str = ', '.join([cat.strip() for cat in category_list])
        prompt = f"What category best describes this comment? '{comment}' Please answer using only the name of the category: {category_str}."
        category = generator(prompt, max_length=30)[0]['generated_text']
        categories.append(category)
        sentiments.append(sentiment)
    df['comment_sentiment'] = sentiments
    df['comment_category'] = categories
    return df[['customer_comment', 'comment_sentiment', 'comment_category']].to_html(index=False)

# Gradio Interface
with gr.Blocks() as nps:
    def add_category():
        new_box = gr.Textbox(label="Category", placeholder="Enter a category")
        category_boxes.append(new_box)
        return new_box

    add_category_btn.click(fn=add_category, outputs=category_boxes)
    category_boxes = []  # Store category input boxes
    add_category_btn = gr.Button("Add Category")
    uploaded_file = gr.File(label="Upload CSV", type="filepath")
    template_btn = gr.Button("Use Template")
    gr.Markdown("# NPS Comment Categorization")
    classify_btn = gr.Button("Classify Comments")
    output = gr.HTML()

    def load_data(file):
        if file is not None:
            file.seek(0)  # Reset file pointer
            import io
            if file.name.endswith('.csv'):
                file.seek(0)  # Reset file pointer
                custom_df = pd.read_csv(file, encoding='utf-8')
                custom_df = pd.read_csv(io.StringIO(content))
            else:
                return "Error: Uploaded file is not a CSV."
            if 'customer_comment' not in custom_df.columns:
                return "Error: Uploaded CSV must contain a column named 'customer_comment'"
            global df
            df = custom_df
            return "Custom CSV loaded successfully!"
        else:
            return "No file uploaded."

    uploaded_file.change(fn=load_data, inputs=uploaded_file, outputs=output)
    template_btn.click(fn=lambda: "Using Template Dataset", outputs=output)
    classify_btn.click(fn=classify_comments, inputs=[category_input], outputs=output)

nps.launch()