Spaces:
Running
Running
File size: 20,679 Bytes
6c96578 ade73e6 6c96578 5b6eeaf 6c96578 fff91af 6c96578 c02823d 6c96578 c02823d b2a109a bc251bf b2a109a 767781f b2a109a c02823d b2a109a c02823d f8a3f83 653e1d1 f8a3f83 b0f937c e974c2a fbababd b0f937c c820969 7ac98e2 e974c2a c820969 30512d3 c820969 3afb663 c820969 3afb663 c7ec0f1 3afb663 c7ec0f1 bc251bf 8f08788 7ac98e2 e974c2a 7ac98e2 8f08788 7ac98e2 8f08788 f3aef14 8f08788 f3aef14 8f08788 e974c2a 8f08788 dcfd073 fff91af 946844d fff91af 946844d dcfd073 49443a1 a19e7ce 946844d 71f6b58 49443a1 1eea227 49443a1 dcfd073 e974c2a 0186e6d dcfd073 0e3d8df dcfd073 0e3d8df c7ec0f1 f5ee1f4 01608b1 1742f92 e974c2a 26daae1 c7ec0f1 ade73e6 da1cbbc c02823d 443c823 c02823d 5d12990 4aaf98d 5d12990 c02823d 011cb7f c02823d 84d8036 3891a69 84d8036 4aaf98d c02823d 5d12990 b59d991 5481b27 b59d991 71f6b58 3b63119 53b74c4 8034ef9 dcfd073 bc251bf 53b74c4 dcfd073 bc251bf 53b74c4 dcfd073 bc251bf f5ee1f4 dcfd073 bc251bf da1cbbc bc251bf 2c4f796 53b74c4 2c4f796 7ec489a bc251bf 31fe7d7 c02823d |
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163 164 165 166 167 168 169 170 171 172 173 174 175 176 177 178 179 180 181 182 183 184 185 186 187 188 189 190 191 192 193 194 195 196 197 198 199 200 201 202 203 204 205 206 207 208 209 210 211 212 213 214 215 216 217 218 219 220 221 222 223 224 225 226 227 228 229 230 231 232 233 234 235 236 237 238 239 240 241 242 243 244 245 246 247 248 249 250 251 252 253 254 255 256 257 258 259 260 261 262 263 264 265 266 267 268 269 270 271 272 273 274 275 276 277 278 279 280 281 282 283 284 285 286 287 288 289 290 291 292 293 294 295 296 297 298 299 300 301 302 303 304 305 306 307 308 309 310 311 312 313 314 315 316 317 318 319 320 321 322 323 324 325 326 327 328 329 330 331 332 333 334 335 336 337 338 339 340 341 342 343 344 345 346 347 348 349 350 351 352 353 354 355 356 357 358 359 360 361 362 363 364 365 366 367 368 369 370 371 372 373 374 375 376 377 378 379 380 381 382 383 384 385 386 387 388 389 390 391 392 393 394 395 396 397 398 399 400 401 402 403 404 405 406 407 408 409 410 411 412 413 414 415 416 417 418 419 420 421 422 423 424 425 426 427 428 429 430 431 432 433 434 435 |
import gradio as gr
import torch
import numpy as np
import pickle
import pandas as pd
from tqdm import tqdm
import altair as alt
import matplotlib.pyplot as plt
from datetime import date, timedelta
from transformers import AutoTokenizer, AutoConfig, AutoModel, AutoModelForSequenceClassification
"""
description_sentence = "<h3>Demo EmotioNL</h3>\nThis demo allows you to analyse the emotion in a sentence."
description_dataset = "<h3>Demo EmotioNL</h3>\nThis demo allows you to analyse the emotions in a dataset.\nThe data should be in tsv-format with two named columns: the first column (id) should contain the sentence IDs, and the second column (text) should contain the actual texts. Optionally, there is a third column named 'date', which specifies the date associated with the text (e.g., tweet date). This column is necessary when the options 'emotion distribution over time' and 'peaks' are selected."
inference_modelpath = "model/checkpoint-128"
def inference_sentence(text):
tokenizer = AutoTokenizer.from_pretrained(inference_modelpath)
model = AutoModelForSequenceClassification.from_pretrained(inference_modelpath)
for text in tqdm([text]):
inputs = tokenizer(text, return_tensors="pt")
with torch.no_grad(): # run model
logits = model(**inputs).logits
predicted_class_id = logits.argmax().item()
output = model.config.id2label[predicted_class_id]
return output
def frequencies(preds):
preds_dict = {"neutral": 0, "anger": 0, "fear": 0, "joy": 0, "love": 0, "sadness": 0}
for pred in preds:
preds_dict[pred] = preds_dict[pred] + 1
bars = list(preds_dict.keys())
height = list(preds_dict.values())
x_pos = np.arange(len(bars))
plt.bar(x_pos, height, color=['lightgrey', 'firebrick', 'rebeccapurple', 'orange', 'palevioletred', 'cornflowerblue'])
plt.xticks(x_pos, bars)
return plt
def inference_dataset(file_object, option_list):
tokenizer = AutoTokenizer.from_pretrained(inference_modelpath)
model = AutoModelForSequenceClassification.from_pretrained(inference_modelpath)
data_path = open(file_object.name, 'r')
df = pd.read_csv(data_path, delimiter='\t', header=0, names=['id', 'text'])
ids = df["id"].tolist()
texts = df["text"].tolist()
preds = []
for text in tqdm(texts): # progressbar
inputs = tokenizer(text, return_tensors="pt")
with torch.no_grad(): # run model
logits = model(**inputs).logits
predicted_class_id = logits.argmax().item()
prediction = model.config.id2label[predicted_class_id]
preds.append(prediction)
predictions_content = list(zip(ids, texts, preds))
# write predictions to file
output = "output.txt"
f = open(output, 'w')
f.write("id\ttext\tprediction\n")
for line in predictions_content:
f.write(str(line[0]) + '\t' + str(line[1]) + '\t' + str(line[2]) + '\n')
output1 = output
output2 = output3 = output4 = output5 = "This option was not selected."
if "emotion frequencies" in option_list:
output2 = frequencies(preds)
else:
output2 = None
if "emotion distribution over time" in option_list:
output3 = "This option was selected."
if "peaks" in option_list:
output4 = "This option was selected."
if "topics" in option_list:
output5 = "This option was selected."
return [output1, output2, output3, output4, output5]
iface_sentence = gr.Interface(
fn=inference_sentence,
description = description_sentence,
inputs = gr.Textbox(
label="Enter a sentence",
lines=1),
outputs="text")
inputs = [gr.File(
label="Upload a dataset"),
gr.CheckboxGroup(
["emotion frequencies", "emotion distribution over time", "peaks", "topics"],
label = "Select options")]
outputs = [gr.File(),
gr.Plot(label="Emotion frequencies"),
gr.Textbox(label="Emotion distribution over time"),
gr.Textbox(label="Peaks"),
gr.Textbox(label="Topics")]
iface_dataset = gr.Interface(
fn = inference_dataset,
description = description_dataset,
inputs=inputs,
outputs = outputs)
iface = gr.TabbedInterface([iface_sentence, iface_dataset], ["Sentence", "Dataset"])
iface.queue().launch()
"""
inference_modelpath = "model/checkpoint-128"
def inference_sentence(text):
tokenizer = AutoTokenizer.from_pretrained(inference_modelpath)
model = AutoModelForSequenceClassification.from_pretrained(inference_modelpath)
for text in tqdm([text]):
inputs = tokenizer(text, return_tensors="pt")
with torch.no_grad(): # run model
logits = model(**inputs).logits
predicted_class_id = logits.argmax().item()
output = model.config.id2label[predicted_class_id]
return "Predicted emotion:\n" + output
"""
def inference_sentence(text):
output = "This sentence will be processed:\n" + text
return output
"""
def unavailable(input_file, input_checks):
output = "As we are currently updating this demo, submitting your own data is unavailable for the moment. However, you can try out the showcase mode π"
return gr.update(value=output, label="Oops!", visible=True)
def showcase(input_file):
output = "showcase/example_predictions.txt"
return gr.update(visible=False), gr.update(value=output, visible=True), gr.update(visible=True), gr.update(visible=False), gr.update(visible=False), gr.update(visible=False) # next_button_freq becomes available
def file(input_file, input_checks):
#output = "output.txt"
#f = open(output, 'w')
#f.write("The predictions come here.")
#f.close()
output = "showcase/example_predictions.txt"
if "emotion frequencies" in input_checks:
return gr.update(value=output, visible=True), gr.update(visible=True), gr.update(visible=False), gr.update(visible=False), gr.update(visible=False) # next_button_freq becomes available
elif "emotion distribution over time" in input_checks:
return gr.update(value=output, visible=True), gr.update(visible=False), gr.update(visible=True), gr.update(visible=False), gr.update(visible=False) # next_button_dist becomes available
elif "peaks" in input_checks:
return gr.update(value=output, visible=True), gr.update(visible=False), gr.update(visible=False), gr.update(visible=True), gr.update(visible=False) # next_button_peaks becomes available
elif "topics" in input_checks:
return gr.update(value=output, visible=True), gr.update(visible=False), gr.update(visible=False), gr.update(visible=False), gr.update(visible=True) # next_button_topics becomes available
else:
return gr.update(value=output, visible=True), gr.update(visible=False), gr.update(visible=False), gr.update(visible=False), gr.update(visible=False) # no next_button becomes available
def freq(output_file, input_checks):
#simple = pd.DataFrame({
#'Emotion category': ['neutral', 'anger', 'fear', 'joy', 'love', 'sadness'],
#'Frequency': [10, 8, 2, 15, 3, 4]})
f = open("showcase/example_predictions.txt", 'r')
data = f.read().split("\n")
f.close()
data = [line.split("\t") for line in data[1:-1]]
freq_dict = {}
for line in data:
if line[1] not in freq_dict.keys():
freq_dict[line[1]] = 1
else:
freq_dict[line[1]] += 1
simple = pd.DataFrame({
'Emotion category': ['neutral', 'anger', 'fear', 'joy', 'love', 'sadness'],
'Frequency': [freq_dict['neutral'], freq_dict['anger'], freq_dict['fear'], freq_dict['joy'], freq_dict['love'], freq_dict['sadness']]})
domain = ['neutral', 'anger', 'fear', 'joy', 'love', 'sadness']
range_ = ['#999999', '#b22222', '#663399', '#ffcc00', '#db7093', '#6495ed']
n = max(simple['Frequency'])
plot = alt.Chart(simple).mark_bar().encode(
x=alt.X("Emotion category", sort=['neutral', 'anger', 'fear', 'joy', 'love', 'sadness']),
y=alt.Y("Frequency", axis=alt.Axis(grid=False), scale=alt.Scale(domain=[0, (n + 9) // 10 * 10])),
color=alt.Color("Emotion category", scale=alt.Scale(domain=domain, range=range_), legend=None),
tooltip=['Emotion category', 'Frequency']).properties(
width=600).configure_axis(
grid=False).interactive()
if "emotion distribution over time" in input_checks or (output_file.name).startswith('/tmp/example_predictions'):
return gr.update(value=plot, visible=True), gr.update(visible=True), gr.update(visible=False), gr.update(visible=False) # next_button_dist becomes available
elif "peaks" in input_checks:
return gr.update(value=plot, visible=True), gr.update(visible=False), gr.update(visible=True), gr.update(visible=False) # next_button_peaks becomes available
elif "topics" in input_checks:
return gr.update(value=plot, visible=True), gr.update(visible=False), gr.update(visible=False), gr.update(visible=True) # next_button_topics becomes available
else:
return gr.update(value=plot, visible=True), gr.update(visible=False), gr.update(visible=False), gr.update(visible=False) # no next_button becomes available
def dist(output_file, input_checks):
#data = pd.DataFrame({
#'Date': ['1/1', '1/1', '1/1', '1/1', '1/1', '1/1', '2/1', '2/1', '2/1', '2/1', '2/1', '2/1', '3/1', '3/1', '3/1', '3/1', '3/1', '3/1'],
#'Frequency': [3, 5, 1, 8, 2, 3, 4, 7, 1, 12, 4, 2, 3, 6, 3, 10, 3, 4],
#'Emotion category': ['neutral', 'anger', 'fear', 'joy', 'love', 'sadness', 'neutral', 'anger', 'fear', 'joy', 'love', 'sadness', 'neutral', 'anger', 'fear', 'joy', 'love', 'sadness']})
f = open("showcase/data.txt", 'r')
data = f.read().split("\n")
f.close()
data = [line.split("\t") for line in data[1:-1]]
freq_dict = {}
for line in data:
dat = str(date(2000+int(line[0].split("/")[2]), int(line[0].split("/")[1]), int(line[0].split("/")[0])))
if dat not in freq_dict.keys():
freq_dict[dat] = {}
if line[1] not in freq_dict[dat].keys():
freq_dict[dat][line[1]] = 1
else:
freq_dict[dat][line[1]] += 1
else:
if line[1] not in freq_dict[dat].keys():
freq_dict[dat][line[1]] = 1
else:
freq_dict[dat][line[1]] += 1
start_date = date(2000+int(data[0][0].split("/")[2]), int(data[0][0].split("/")[1]), int(data[0][0].split("/")[0]))
end_date = date(2000+int(data[-1][0].split("/")[2]), int(data[-1][0].split("/")[1]), int(data[-1][0].split("/")[0]))
delta = end_date - start_date # returns timedelta
date_range = [str(start_date + timedelta(days=i)) for i in range(delta.days + 1)]
dates = [dat for dat in date_range for i in range(6)]
frequency = [freq_dict[dat][emotion] if (dat in freq_dict.keys() and emotion in freq_dict[dat].keys()) else 0 for dat in date_range for emotion in ['neutral', 'anger', 'fear', 'joy', 'love', 'sadness']]
categories = [emotion for dat in date_range for emotion in ['neutral', 'anger', 'fear', 'joy', 'love', 'sadness']]
data = pd.DataFrame({
'Date': dates,
'Frequency': frequency,
'Emotion category': categories})
domain = ['neutral', 'anger', 'fear', 'joy', 'love', 'sadness']
range_ = ['#999999', '#b22222', '#663399', '#ffcc00', '#db7093', '#6495ed']
n = max(data['Frequency'])
highlight = alt.selection(
type='single', on='mouseover', fields=["Emotion category"], nearest=True)
base = alt.Chart(data).encode(
x ="Date:T",
y=alt.Y("Frequency", scale=alt.Scale(domain=[0, (n + 9) // 10 * 10])),
color=alt.Color("Emotion category", scale=alt.Scale(domain=domain, range=range_), legend=alt.Legend(orient='bottom', direction='horizontal')))
points = base.mark_circle().encode(
opacity=alt.value(0),
tooltip=[
alt.Tooltip('Emotion category', title='Emotion category'),
alt.Tooltip('Date:T', title='Date'),
alt.Tooltip('Frequency', title='Frequency')
]).add_selection(highlight)
lines = base.mark_line().encode(
size=alt.condition(~highlight, alt.value(1), alt.value(3)))
plot = (points + lines).properties(width=600, height=350).interactive()
if "peaks" in input_checks or (output_file.name).startswith('/tmp/example_predictions'):
return gr.Plot.update(value=plot, visible=True), gr.update(visible=True), gr.update(visible=False) # next_button_peaks becomes available
elif "topics" in input_checks:
return gr.Plot.update(value=plot, visible=True), gr.update(visible=False), gr.update(visible=True) # next_button_topics becomes available
else:
return gr.Plot.update(value=plot, visible=True), gr.update(visible=False), gr.update(visible=False) # no next_button becomes available
def peaks(output_file, input_checks):
plot = pickle.load(open('showcase/peaks_covid.p', 'rb'))
if "topics" in input_checks or (output_file.name).startswith('/tmp/example_predictions'):
return gr.Plot.update(value=plot, visible=True), gr.update(visible=True) # next_button_topics becomes available
else:
return gr.Plot.update(value=plot, visible=True), gr.update(visible=False) # no next_button becomes available
def peaks2(output_file, input_checks):
peaks_anger = {"9/2/2020": "up", "18/2/2020": "down", "8/3/2020": "up", "20/3/2020": "up", "31/5/2020": "up", "6/6/2020": "up", "19/6/2020": "up", "19/7/2020": "up"}
peaks_fear = {"8/2/2020": "up", "11/2/2020": "down", "31/5/2020": "down", "12/6/2020": "up", "5/7/2020": "up", "19/7/2020": "up"}
peaks_joy = {"13/3/2020": "up", "4/4/2020": "up", "19/6/2020": "up", "26/6/2020": "up"}
peaks_love = {"12/3/2020": "up", "5/5/2020": "down", "26/6/2020": "up", "7/8/2020": "up",}
peaks_sadness = {"14/2/2020": "up", "3/4/2020": "up", "5/5/2020": "down", "18/5/2020": "down", "30/6/2020": "up", "5/7/2020": "up"}
text_anger = ", ".join([str(key) + " (β)" if value == "up" else str(key) + " (β)" for key, value in peaks_anger.items()])
text_fear = ", ".join([str(key) + " (β)" if value == "up" else str(key) + " (β)" for key, value in peaks_fear.items()])
text_joy = ", ".join([str(key) + " (β)" if value == "up" else str(key) + " (β)" for key, value in peaks_joy.items()])
text_love = ", ".join([str(key) + " (β)" if value == "up" else str(key) + " (β)" for key, value in peaks_love.items()])
text_sadness = ", ".join([str(key) + " (β)" if value == "up" else str(key) + " (β)" for key, value in peaks_sadness.items()])
html = (
'<html>'
'<head>'
'<meta name="viewport" content="width=device-width, initial-scale=1">'
'<style>'
'.dot_neutral {'
'height: 11px;'
'width: 11px;'
'background-color: #999999;'
'border-radius: 50%;'
'display: inline-block;'
'}'
'.dot_anger {'
'height: 11px;'
'width: 11px;'
'background-color: #b22222;'
'border-radius: 50%;'
'display: inline-block;'
'}'
'.dot_fear {'
'height: 11px;'
'width: 11px;'
'background-color: #663399;'
'border-radius: 50%;'
'display: inline-block;'
'}'
'.dot_joy {'
'height: 11px;'
'width: 11px;'
'background-color: #ffcc00;'
'border-radius: 50%;'
'display: inline-block;'
'}'
'.dot_love {'
'height: 11px;'
'width: 11px;'
'background-color: #db7093;'
'border-radius: 50%;'
'display: inline-block;'
'}'
'.dot_sadness {'
'height: 11px;'
'width: 11px;'
'background-color: #6495ed;'
'border-radius: 50%;'
'display: inline-block;'
'}'
'.tab {'
'padding-left: 1em;'
'}'
'</style>'
'</head>'
'<body>'
'<div>'
'<p>These significant fluctuations were found:</p>'
'<p><span class="dot_anger"></span> anger:</p>'
'<p class="tab">' + text_anger + '<p>'
'<p><span class="dot_fear"></span> fear:</p>'
'<p class="tab">' + text_fear + '<p>'
'<p><span class="dot_joy"></span> joy:</p>'
'<p class="tab">' + text_joy + '<p>'
'<p><span class="dot_love"></span> love:</p>'
'<p class="tab">' + text_love + '<p>'
'<p><span class="dot_sadness"></span> sadness:</p>'
'<p class="tab">' + text_sadness + '<p>'
'</div>'
'</body>'
'</html>'
)
if "topics" in input_checks or (output_file.name).startswith('/tmp/example_predictions'):
return gr.update(value=html, visible=True), gr.update(visible=True) # next_button_topics becomes available
else:
return gr.update(value=html, visible=True), gr.update(visible=False) # no next_button becomes available
def topics(output_file, input_checks):
plot = pickle.load(open('showcase/vis_classes_covid.p', 'rb'))
plot.update_layout(width=600, height=400)
return gr.Plot.update(value=plot, visible=True) # no next_button becomes available
with gr.Blocks() as demo:
with gr.Tab("Sentence"):
gr.Markdown("""
# Demo EmotioNL
This demo allows you to analyse the emotion in a Dutch sentence.
""")
with gr.Row():
with gr.Column():
input = gr.Textbox(
label="Enter a sentence",
value="Jaaah! Volgende vakantie Barcelona en na het zomerseizoen naar de Algarve",
lines=1)
send_btn = gr.Button("Send")
output = gr.Textbox()
send_btn.click(fn=inference_sentence, inputs=input, outputs=output)
with gr.Tab("Dataset"):
gr.Markdown("""
# Demo EmotioNL
This demo allows you to analyse the emotions in a dataset with Dutch sentences.
_! As we are currently updating this demo, submitting your own data is unavailable for the moment ! However, you can try out the showcase mode π_
The data should be in tsv-format with two named columns: the first column (id) should contain the sentence IDs, and the second column (text) should contain the actual texts. Optionally, there is a third column named 'date', which specifies the date associated with the text (e.g., tweet date). This column is necessary when the options 'emotion distribution over time' and 'peaks' are selected.
You can also try out the demo in showcase mode, which uses example data, namely a dataset with tweets about the COVID-19 pandemic.
""")
with gr.Row():
with gr.Column():
input_file = gr.File(
label="Upload a dataset")
input_checks = gr.CheckboxGroup(
["emotion frequencies", "emotion distribution over time", "peaks", "topics"],
label = "Select options")
send_btn = gr.Button("Submit data")
demo_btn = gr.Button("... or showcase with example data")
with gr.Column():
message = gr.Textbox(label="Message", visible=False)
output_file = gr.File(label="Predictions", visible=False)
next_button_freq = gr.Button("Show emotion frequencies", visible=False)
output_plot = gr.Plot(show_label=False, visible=False).style(container=True)
next_button_dist = gr.Button("Show emotion distribution over time", visible=False)
output_dist = gr.Plot(show_label=False, visible=False)
next_button_peaks = gr.Button("Show peaks", visible=False)
output_peaks = gr.Plot(visible=False)
next_button_topics = gr.Button("Show topics", visible=False)
output_topics = gr.Plot(show_label=False, visible=False)
#send_btn.click(fn=file, inputs=[input_file,input_checks], outputs=[output_file,next_button_freq,next_button_dist,next_button_peaks,next_button_topics])
next_button_freq.click(fn=freq, inputs=[output_file,input_checks], outputs=[output_plot,next_button_dist,next_button_peaks,next_button_topics])
next_button_dist.click(fn=dist, inputs=[output_file,input_checks], outputs=[output_dist,next_button_peaks,next_button_topics])
next_button_peaks.click(fn=peaks, inputs=[output_file,input_checks], outputs=[output_peaks,next_button_topics])
next_button_topics.click(fn=topics, inputs=[output_file,input_checks], outputs=output_topics)
send_btn.click(fn=unavailable, inputs=[input_file,input_checks], outputs=message)
demo_btn.click(fn=showcase, inputs=[input_file], outputs=[message,output_file,next_button_freq,next_button_dist,next_button_peaks,next_button_topics])
demo.launch()
|