Spaces:

lunadebruyne
/

EmotioNL

Running

App Files Files Community

lunadebruyne commited on May 3, 2023

Commit

18810b9

1 Parent(s): 005ef83

Create app.py

Browse files

Files changed (1) hide show

app.py +257 -0

app.py ADDED Viewed

	@@ -0,0 +1,257 @@

+import gradio as gr
+import torch
+import numpy as np
+import pickle
+import pandas as pd
+from tqdm import tqdm
+import altair as alt
+import matplotlib.pyplot as plt
+from datetime import date, timedelta
+from transformers import AutoTokenizer, AutoConfig, AutoModel, AutoModelForSequenceClassification
+def inference_sentence(text):
+    tokenizer = AutoTokenizer.from_pretrained(inference_modelpath)
+    model = AutoModelForSequenceClassification.from_pretrained(inference_modelpath)
+    for text in tqdm([text]):
+        inputs = tokenizer(text, return_tensors="pt")
+    with torch.no_grad(): # run model
+        logits = model(**inputs).logits
+        predicted_class_id = logits.argmax().item()
+    output = model.config.id2label[predicted_class_id]
+    return "Predicted emotion:\n" + output
+def freq(file_output):
+    f = open(file_output, 'r')
+    data = f.read().split("\n")
+    f.close()
+    data = [line.split(",") for line in data[1:-1]]
+    freq_dict = {}
+    for line in data:
+    	if line[1] not in freq_dict.keys():
+    		freq_dict[line[1]] = 1
+    	else:
+    		freq_dict[line[1]] += 1
+    simple = pd.DataFrame({
+    	'Emotion category': ['neutral', 'anger', 'fear', 'joy', 'love', 'sadness'],
+    	'Frequency': [freq_dict['0'], freq_dict['1'], freq_dict['2'], freq_dict['3'], freq_dict['4'], freq_dict['5']]})
+    domain = ['neutral', 'anger', 'fear', 'joy', 'love', 'sadness']
+    range_ = ['#999999', '#b22222', '#663399', '#ffcc00', '#db7093', '#6495ed']
+    n = max(simple['Frequency'])
+    plot = alt.Chart(simple).mark_bar().encode(
+    x=alt.X("Emotion category", sort=['neutral', 'anger', 'fear', 'joy', 'love', 'sadness']),
+    y=alt.Y("Frequency", axis=alt.Axis(grid=False), scale=alt.Scale(domain=[0, (n + 9) // 10 * 10])),
+    color=alt.Color("Emotion category", scale=alt.Scale(domain=domain, range=range_), legend=None),
+    tooltip=['Emotion category', 'Frequency']).properties(
+    width=600).configure_axis(
+    grid=False).interactive()
+    return plot
+def dist(file_output):
+	f = open(file_output, 'r')
+	data = f.read().split("\n")
+	f.close()
+	data = [line.split(",") for line in data[1:-1]]
+	freq_dict = {}
+	mapping_dict = {'0': 'neutral', '1': 'anger', '2': 'fear', '3': 'joy', '4': 'love', '5': 'sadness'}
+	for line in data:
+		dat = str(date(int(line[0][:4]), int(line[0][4:6]), int(line[0][6:8])))
+		if dat not in freq_dict.keys():
+			freq_dict[dat] = {}
+			if mapping_dict[line[1]] not in freq_dict[dat].keys():
+				freq_dict[dat][mapping_dict[line[1]]] = 1
+			else:
+				freq_dict[dat][mapping_dict[line[1]]] += 1
+		else:
+			if mapping_dict[line[1]] not in freq_dict[dat].keys():
+				freq_dict[dat][mapping_dict[line[1]]] = 1
+			else:
+				freq_dict[dat][mapping_dict[line[1]]] += 1
+	start_date = date(int(data[0][0][:4]), int(data[0][0][4:6]), int(data[0][0][6:8]))
+	end_date = date(int(data[-1][0][:4]), int(data[-1][0][4:6]), int(data[-1][0][6:8]))
+	delta = end_date - start_date   # returns timedelta
+	date_range = [str(start_date + timedelta(days=i)) for i in range(delta.days + 1)]
+	dates = [dat for dat in date_range for i in range(6)]
+	frequency = [freq_dict[dat][emotion] if (dat in freq_dict.keys() and emotion in freq_dict[dat].keys()) else 0 for dat in date_range for emotion in ['neutral', 'anger', 'fear', 'joy', 'love', 'sadness']]
+	categories = [emotion for dat in date_range for emotion in ['neutral', 'anger', 'fear', 'joy', 'love', 'sadness']]
+	data = pd.DataFrame({
+		'Date': dates,
+		'Frequency': frequency,
+		'Emotion category': categories})
+	domain = ['neutral', 'anger', 'fear', 'joy', 'love', 'sadness']
+	range_ = ['#999999', '#b22222', '#663399', '#ffcc00', '#db7093', '#6495ed']
+	n = max(data['Frequency'])
+	highlight = alt.selection(
+	type='single', on='mouseover', fields=["Emotion category"], nearest=True)
+	base = alt.Chart(data).encode(
+	x ="Date:T",
+	y=alt.Y("Frequency", scale=alt.Scale(domain=[0, (n + 9) // 10 * 10])),
+	color=alt.Color("Emotion category", scale=alt.Scale(domain=domain, range=range_), legend=alt.Legend(orient='bottom', direction='horizontal')))
+	points = base.mark_circle().encode(
+		opacity=alt.value(0),
+		tooltip=[
+			alt.Tooltip('Emotion category', title='Emotion category'),
+			alt.Tooltip('Date:T', title='Date'),
+			alt.Tooltip('Frequency', title='Frequency')
+		]).add_selection(highlight)
+	lines = base.mark_line().encode(
+		size=alt.condition(~highlight, alt.value(1), alt.value(3)))
+	plot = (points + lines).properties(width=600, height=350).interactive()
+	return plot
+def showcase(dataset):
+    # predictions file
+    if dataset == "The Voice of Holland":
+        file_output = "output/predictions_tvoh.txt"
+    elif dataset == "Floodings":
+        file_output = "output/predictions_floodings.txt"
+    elif dataset == "COVID-19":
+        file_output = "output/predictions_covid.txt"
+    elif dataset == "Childcare Benefits":
+        file_output = "output/predictions_toeslagen.txt"
+    # freq bar plot
+    freq_output = freq(file_output)
+    # dist plot
+    dist_output = dist(file_output)
+    # peaks
+    if dataset == "The Voice of Holland":
+        peaks_output = pickle.load(open('output/peaks_tvoh.p', 'rb'))
+    elif dataset == "Floodings":
+        peaks_output = pickle.load(open('output/peaks_floodings.p', 'rb'))
+    elif dataset == "COVID-19":
+        peaks_output = pickle.load(open('output/peaks_covid.p', 'rb'))
+    elif dataset == "Childcare Benefits":
+        peaks_output = pickle.load(open('output/peaks_toeslagen.p', 'rb'))
+    # topics
+    if dataset == "The Voice of Holland":
+        topics_output = pickle.load(open('output/topics_tvoh.p', 'rb'))
+    elif dataset == "Floodings":
+        topics_output = pickle.load(open('output/topics_floodings.p', 'rb'))
+    elif dataset == "COVID-19":
+        topics_output = pickle.load(open('output/topics_covid.p', 'rb'))
+    elif dataset == "Childcare Benefits":
+        topics_output = pickle.load(open('output/topics_toeslagen.p', 'rb'))
+    return gr.update(visible=True), gr.update(value=file_output, visible=True), gr.update(value=freq_output,visible=True), gr.update(value=dist_output,visible=True), gr.update(value=peaks_output,visible=True), gr.update(value=topics_output,visible=True)
+inference_modelpath = "model/checkpoint-128"
+with gr.Blocks() as demo:
+    with gr.Column(scale=1, min_width=50):
+        gr.Markdown("""
+                    """)
+    with gr.Column(scale=5):
+        gr.Markdown("""
+                <div style="text-align: center"><h1>EmotioNL: A framework for Dutch emotion detection</h1></div>
+                <div style="display: block;margin-left: auto;margin-right: auto;width: 60%;"><img alt="EmotioNL logo" src="https://users.ugent.be/~lundbruy/EmotioNL.png" width="100%"></div>
+                <div style="display: block;margin-left: auto;margin-right: auto;width: 75%;">This demo was made to demonstrate the EmotioNL model, a transformer-based classification model that analyses emotions in Dutch texts. The model uses <a href="https://github.com/iPieter/RobBERT">RobBERT</a>, which was further fine-tuned on the <a href="https://lt3.ugent.be/resources/emotionl/">EmotioNL dataset</a>. The resulting model is a classifier that, given a sentence, predicts one of the following emotion categories: <i>anger</i>, <i>fear</i>, <i>joy</i>, <i>love</i>, <i>sadness</i> or <i>neutral</i>. The demo can be used either in <b>sentence mode</b>, which allows you to enter a sentence for which an emotion will be predicted; or in <b>dataset mode</b>, which allows you to upload a dataset or see the full functionality with example data.</div>
+                """)
+        with gr.Tab("Sentence"):
+            gr.Markdown("""
+                        """)
+            with gr.Row():
+                with gr.Column():
+                    input = gr.Textbox(
+                            label="Enter a sentence",
+                            value="Jaaah! Volgende vakantie Barcelona en na het zomerseizoen naar de Algarve",
+                            lines=1)
+                    send_btn = gr.Button("Send")
+                output = gr.Textbox()
+            send_btn.click(fn=inference_sentence, inputs=input, outputs=output)
+        with gr.Tab("Showcase"):
+            with gr.Row():
+                with gr.Column():
+                    gr.Markdown("""
+                                **<font size="4">Run the demo on the data of a specific crisis case</font>**
+                                Select the desired dataset and click the button to run the demo.
+                                """)
+                with gr.Column():
+                    gr.Markdown("""
+                                """)
+                with gr.Column():
+                    gr.Markdown("""
+                                **<font size="4">Output</font>**
+                                After having clicked on the run button, scroll down to see the output (running may take a while):
+                                """)
+            with gr.Row():
+                with gr.Column():
+                    # demo1_btn = gr.Button("The Voice of Holland", variant="primary")
+                    # demo2_btn = gr.Button("Floodings", variant="primary")
+                    # demo3_btn = gr.Button("COVID-19", variant="primary")
+                    # demo4_btn = gr.Button("Childcare Benefits", variant="primary")
+                    dataset = gr.Dropdown(["The Voice of Holland", "Floodings", "COVID-19", "Childcare Benefits"], show_label=False)
+                    run_btn = gr.Button("Run", variant="primary")
+                with gr.Column():
+                    gr.Markdown("""
+                                **The Voice of Holland:** 18,502 tweets about a scandal about sexual misconduct in the Dutch reality TV singing competition 'The Voice of Holland'.
+                                **Floodings:** 9,923 tweets about the floodings that affected Belgium and the Netherlands in the Summer of 2021.
+                                **COVID-19:** 609,206 tweets about the COVID-19 pandemic, posted in the first eight months of the crisis.
+                                **Chilcare Benefits:** 66,961 tweets about the political scandal concerning false allegations of fraud regarding childcare allowance in the Netherlands.
+                                """)
+                with gr.Column():
+                    gr.Markdown("""
+                                **Predictions:** file with the predicted emotion label for each instance in the dataset.
+                                **Emotion frequencies:** bar plot with the prediction frequencies of each emotion category (anger, fear, joy, love, sadness or neutral).
+                                **Emotion distribution over time:** line plot that visualises the frequency of predicted emotions over time for each emotion category.
+                                **Peaks:** step graph that only shows the significant fluctuations (upwards and downwards) in emotion frequencies over time.
+                                **Topics:** a bar plot that shows the emotion distribution for different topics in the dataset. Topics are extracted using [BERTopic](https://maartengr.github.io/BERTopic/index.html).
+                                """)
+            with gr.Row():
+                gr.Markdown("""
+                            ___
+                            """)
+            with gr.Row():
+                with gr.Column():
+                    output_markdown = gr.Markdown("""
+                            **<font size="4">Output</font>**
+                            """, visible=False)
+                    message = gr.Textbox(label="Message", visible=False)
+                    output_file = gr.File(label="Predictions", visible=False)
+                    output_plot = gr.Plot(show_label=False, visible=False).style(container=True)
+                    output_dist = gr.Plot(show_label=False, visible=False)
+                    output_peaks = gr.Plot(show_label=False, visible=False)
+                    output_topics = gr.Plot(show_label=False, visible=False)
+            run_btn.click(fn=showcase, inputs=[dataset], outputs=[output_markdown, output_file, output_plot, output_dist, output_peaks, output_topics])
+        with gr.Row():
+            with gr.Column():
+                gr.Markdown("""
+                    <font size="2">Both this demo and the dataset have been created by [LT3](https://lt3.ugent.be/), the Language and Translation Technology Team of Ghent University. The EmotioNL project has been carried out with support from the Research Foundation – Flanders (FWO). For any questions, please contact [email protected].</font>
+                    <div style="display: grid;grid-template-columns:150px auto;"> <img style="margin-right: 1em" alt="LT3 logo" src="https://lt3.ugent.be/static/images/logo_v2_single.png" width="136" height="58"> <img style="margin-right: 1em" alt="FWO logo" src="https://www.fwo.be/images/logo_desktop.png" height="58"></div>
+                    """)
+    with gr.Column(scale=1, min_width=50):
+        gr.Markdown("""
+                    """)
+demo.launch()