EmotioNL / app.py
lunadebruyne's picture
Update app.py
30512d3
raw
history blame
11 kB
import gradio as gr
import torch
import numpy as np
import pandas as pd
from tqdm import tqdm
import matplotlib.pyplot as plt
from transformers import AutoTokenizer, AutoConfig, AutoModel, AutoModelForSequenceClassification
"""
description_sentence = "<h3>Demo EmotioNL</h3>\nThis demo allows you to analyse the emotion in a sentence."
description_dataset = "<h3>Demo EmotioNL</h3>\nThis demo allows you to analyse the emotions in a dataset.\nThe data should be in tsv-format with two named columns: the first column (id) should contain the sentence IDs, and the second column (text) should contain the actual texts. Optionally, there is a third column named 'date', which specifies the date associated with the text (e.g., tweet date). This column is necessary when the options 'emotion distribution over time' and 'peaks' are selected."
inference_modelpath = "model/checkpoint-128"
def inference_sentence(text):
tokenizer = AutoTokenizer.from_pretrained(inference_modelpath)
model = AutoModelForSequenceClassification.from_pretrained(inference_modelpath)
for text in tqdm([text]):
inputs = tokenizer(text, return_tensors="pt")
with torch.no_grad(): # run model
logits = model(**inputs).logits
predicted_class_id = logits.argmax().item()
output = model.config.id2label[predicted_class_id]
return output
def frequencies(preds):
preds_dict = {"neutral": 0, "anger": 0, "fear": 0, "joy": 0, "love": 0, "sadness": 0}
for pred in preds:
preds_dict[pred] = preds_dict[pred] + 1
bars = list(preds_dict.keys())
height = list(preds_dict.values())
x_pos = np.arange(len(bars))
plt.bar(x_pos, height, color=['lightgrey', 'firebrick', 'rebeccapurple', 'orange', 'palevioletred', 'cornflowerblue'])
plt.xticks(x_pos, bars)
return plt
def inference_dataset(file_object, option_list):
tokenizer = AutoTokenizer.from_pretrained(inference_modelpath)
model = AutoModelForSequenceClassification.from_pretrained(inference_modelpath)
data_path = open(file_object.name, 'r')
df = pd.read_csv(data_path, delimiter='\t', header=0, names=['id', 'text'])
ids = df["id"].tolist()
texts = df["text"].tolist()
preds = []
for text in tqdm(texts): # progressbar
inputs = tokenizer(text, return_tensors="pt")
with torch.no_grad(): # run model
logits = model(**inputs).logits
predicted_class_id = logits.argmax().item()
prediction = model.config.id2label[predicted_class_id]
preds.append(prediction)
predictions_content = list(zip(ids, texts, preds))
# write predictions to file
output = "output.txt"
f = open(output, 'w')
f.write("id\ttext\tprediction\n")
for line in predictions_content:
f.write(str(line[0]) + '\t' + str(line[1]) + '\t' + str(line[2]) + '\n')
output1 = output
output2 = output3 = output4 = output5 = "This option was not selected."
if "emotion frequencies" in option_list:
output2 = frequencies(preds)
else:
output2 = None
if "emotion distribution over time" in option_list:
output3 = "This option was selected."
if "peaks" in option_list:
output4 = "This option was selected."
if "topics" in option_list:
output5 = "This option was selected."
return [output1, output2, output3, output4, output5]
iface_sentence = gr.Interface(
fn=inference_sentence,
description = description_sentence,
inputs = gr.Textbox(
label="Enter a sentence",
lines=1),
outputs="text")
inputs = [gr.File(
label="Upload a dataset"),
gr.CheckboxGroup(
["emotion frequencies", "emotion distribution over time", "peaks", "topics"],
label = "Select options")]
outputs = [gr.File(),
gr.Plot(label="Emotion frequencies"),
gr.Textbox(label="Emotion distribution over time"),
gr.Textbox(label="Peaks"),
gr.Textbox(label="Topics")]
iface_dataset = gr.Interface(
fn = inference_dataset,
description = description_dataset,
inputs=inputs,
outputs = outputs)
iface = gr.TabbedInterface([iface_sentence, iface_dataset], ["Sentence", "Dataset"])
iface.queue().launch()
"""
def inference_sentence(text):
output = "This sentence will be processed:\n" + text
return output
"""
def inference_dataset1(input_file):
output = "output.txt"
f = open(output, 'w')
f.write("The predictions come here.")
f.close()
return gr.update(value=output, visible=True), gr.update(visible=True)
def inference_dataset2(input_file, input_checks):
if "emotion frequencies" in input_checks:
preds_dict = {"neutral": 10, "anger": 15, "fear": 2, "joy": 30, "love": 5, "sadness": 8}
bars = list(preds_dict.keys())
height = list(preds_dict.values())
x_pos = np.arange(len(bars))
plt.bar(x_pos, height, color=['lightgrey', 'firebrick', 'rebeccapurple', 'orange', 'palevioletred', 'cornflowerblue'])
plt.xticks(x_pos, bars)
return gr.update(value=plt, visible=True)
else:
return None
def inference_dataset3(input_file, input_checks):
if "emotion distribution over time" in input_checks:
return gr.update(value="This option was selected.", visible=True)
else:
return gr.update(value="This option was not selected.")
def inference_dataset4(input_file, input_checks):
if "peaks" in input_checks:
return gr.update(value="This option was selected.", visible=True)
else:
return gr.update(value="This option was not selected.")
def inference_dataset5(input_file, input_checks):
if "topics" in input_checks:
return gr.update(value="This option was selected.", visible=True)
else:
return gr.update(value="This option was not selected.")
"""
def file(input_file, input_checks):
output = "output.txt"
f = open(output, 'w')
f.write("The predictions come here.")
f.close()
if "emotion frequencies" in input_checks:
return gr.update(value=output, visible=True), gr.update(visible=True), gr.update(visible=False), gr.update(visible=False), gr.update(visible=False) # next_button_freq becomes available
elif "emotion distribution over time" in input_checks:
return gr.update(value=output, visible=True), gr.update(visible=False), gr.update(visible=True), gr.update(visible=False), gr.update(visible=False) # next_button_dist becomes available
elif "peaks" in input_checks:
return gr.update(value=output, visible=True), gr.update(visible=False), gr.update(visible=False), gr.update(visible=True), gr.update(visible=False) # next_button_peaks becomes available
elif "topics" in input_checks:
return gr.update(value=output, visible=True), gr.update(visible=False), gr.update(visible=False), gr.update(visible=False), gr.update(visible=True) # next_button_peaks becomes available
else:
return gr.update(value=output, visible=True), gr.update(visible=False), gr.update(visible=False), gr.update(visible=False), gr.update(visible=False) # next_button_freq becomes available
with gr.Blocks() as demo:
with gr.Tab("Sentence"):
gr.Markdown("""
# Demo EmotioNL
This demo allows you to analyse the emotion in a sentence.
""")
with gr.Row():
with gr.Column():
input = gr.Textbox(
label="Enter a sentence",
lines=1)
send_btn = gr.Button("Send")
output = gr.Textbox()
send_btn.click(fn=inference_sentence, inputs=input, outputs=output)
with gr.Tab("Dataset"):
gr.Markdown("""
# Demo EmotioNL
This demo allows you to analyse the emotions in a dataset. The data should be in tsv-format with two named columns: the first column (id) should contain the sentence IDs, and the second column (text) should contain the actual texts. Optionally, there is a third column named 'date', which specifies the date associated with the text (e.g., tweet date). This column is necessary when the options 'emotion distribution over time' and 'peaks' are selected.
""")
with gr.Row():
with gr.Column():
input_file = gr.File(
label="Upload a dataset")
input_checks = gr.CheckboxGroup(
["emotion frequencies", "emotion distribution over time", "peaks", "topics"],
label = "Select options")
send_btn = gr.Button("Send")
with gr.Column():
output_file = gr.File(label="Predictions", visible=False)
next_button_freq = gr.Button("Next_button freq", visible=False)
output_plot = gr.Plot(label="Emotion frequencies", visible=False)
next_button_dist = gr.Button("Next_button dist", visible=False)
output_dist = gr.Textbox(label="Emotion distribution over time", visible=False)
next_button_peaks = gr.Button("Next_button peaks", visible=False)
output_peaks = gr.Textbox(label="Peaks", visible=False)
next_button_topics = gr.Button("Next_button topics", visible=False)
output_topics = gr.Textbox(label="Topics", visible=False)
#send_btn.click(fn=inference_dataset1, inputs=input_file, outputs=[output_file,next_button_freq]) # next_button_freq becomes visible
send_btn.click(fn=file, inputs=input_file, outputs=[output_file,next_button_freq,next_button_dist,next_button_peaks,next_button_topics])
""""
if "emotion frequencies" in input_checks:
next_button_freq.click(fn=freq_freq, inputs=[output_file,input_checks], outputs=[output_plot,next_button_dist]) # next_button_dist becomes visible
else:
if "emotion distribution over time" in input_checks:
next_button_freq.click(fn=freq_dist, inputs=[output_file,input_checks], outputs=[output_dist,next_peaks]) # nex_button_peaks becomes visible
else:
if "peaks" in input_checks:
next_button_freq.click(fn=freq_peaks, inputs=[output_file,input_checks], outputs=[output_,next_peaks]) # nex_button_peaks becomes visible
"""
#output_file.change(fn=inference_dataset2, inputs=[input_file,input_checks], outputs=output_plot).then(fn=inference_dataset3, inputs=[input_file,input_checks], outputs=output_dist)
#output_plot.change(fn=inference_dataset3, inputs=[input_file,input_checks], outputs=output_dist)
#output_dist.change(fn=inference_dataset4, inputs=[input_file,input_checks], outputs=output_peaks)
#output_peaks.change(fn=inference_dataset5, inputs=[input_file,input_checks], outputs=output_topics)
demo.launch()