Spaces:

lunadebruyne
/

EmotioNL

Running

App Files Files Community

lunadebruyne commited on Feb 17, 2023

Commit

bd33424

1 Parent(s): b760f7b

Update app.py

Browse files

Files changed (1) hide show

app.py +15 -138

app.py CHANGED Viewed

@@ -6,84 +6,24 @@ import pandas as pd
 from tqdm import tqdm
 from transformers import AutoTokenizer, AutoConfig, AutoModel, AutoModelForSequenceClassification
-from transformers import TrainingArguments, Trainer
-from datasets import load_dataset
 description_sentence = "<h3>Demo EmotioNL</h3>\nThis demo allows you to analyse the emotion in a sentence."
-description2 = "<h3>Demo EmotioNL</h3>\nThis demo allows you to analyse the emotions in a dataset.\nThe data should be in tsv-format with two named columns: the first column (id) should contain the sentence IDs, and the second column (text) should contain the actual texts. Optionally, there is a third column named 'date', which specifies the date associated with the text (e.g., tweet date). This column is necessary when the options 'emotion distribution over time' and 'peaks' are selected."
 inference_modelpath = "model/checkpoint-128"
-"""
-output_dir = "model"
-model_config = {
-    "model_weights": "pdelobelle/robbert-v2-dutch-base",
-    "num_labels": 6,
-    "max_length": 128,
-    "device": "cpu"
-}
-## Tokenizer and model
-tokenizer = AutoTokenizer.from_pretrained(model_config["model_weights"])
-model = AutoModelForSequenceClassification.from_pretrained(inference_modelpath)
-# Function for encoding (tokenizing) data
-def encode_data(data):
-  text = data["text"]
-  label = data["label"]
-  encoded_input = tokenizer(
-                text,
-                add_special_tokens=True,
-                max_length= model_config["max_length"],
-                padding= "max_length",
-                return_overflowing_tokens=True,
-                truncation=True
-            )
-  encoded_input["labels"] = label
-  return encoded_input
-# Test arguments for Trainer
-test_args = TrainingArguments(
-    output_dir = output_dir,
-    do_train = False,
-    do_predict = True,
-    per_device_eval_batch_size = 64,
-    dataloader_drop_last = False
-)
-trainer = Trainer(
-              model = model,
-              args = test_args)
-def inference_dataset(file_object):
-  input_file = file_object
-  data_paths = {"train": input_file, "inference": input_file}
-  dataset = load_dataset('csv', skiprows=1, data_files=data_paths, column_names = ['id', 'text', 'label'], delimiter='\t')
-  encoded_dataset = dataset.map(encode_data, batched=True)
-  encoded_dataset.set_format("torch")
-  encoded_dataset["inference"] = encoded_dataset["inference"].remove_columns("label")
-  # Run trainer in prediction mode
-  prediction_output = trainer.predict(encoded_dataset["inference"])
-  predictions = prediction_output[0]
-  ids = dataset["inference"]["id"]
-  texts = dataset["inference"]["text"]
-  preds = np.argmax(predictions, axis=1)
-  preds = [model.config.id2label[pred] for pred in preds]
-  predictions_content = list(zip(ids, texts, preds))
-  # write predictions to file
-  output = "output.txt"
-  f = open(output, 'w')
-  f.write("id\ttext\tprediction\n")
-  for line in predictions_content:
-      f.write(str(line[0]) + '\t' + str(line[1]) + '\t' + str(line[2]) + '\n')
-  f.close()
-  return output
-"""
 def inference_dataset(file_object, option_list):
     tokenizer = AutoTokenizer.from_pretrained(inference_modelpath)
     model = AutoModelForSequenceClassification.from_pretrained(inference_modelpath)
@@ -119,68 +59,6 @@ def inference_dataset(file_object, option_list):
         output5 = "This option was selected."
     return [output1, output2, output3, output4, output5]
-def what_happened(text, file_object, option_list):
-    if file_object:
-        output = "You uploaded a file."
-        #if len(option_list) > 0:
-            #output = output + "\nYou selected these options:\n- " + "\n- ".join(option_list)
-    else:
-        output = "Normally, this demo should analyse the emotions in this text:\n" + text
-        if len(option_list) > 0:
-            output = output + "\nYou can only select options when uploading a dataset."
-    return output
-def what_happened1(text):
-    output = "Normally, this demo should analyse the emotions in this text:\n" + text
-    return output
-def what_happened2(file_object, option_list):
-    #input_file = open(file_object.name, 'r')
-    #lines = input_file.read()
-    #input_file.close()
-    #output_file = open('output.txt', 'w')
-    #output_file.write(lines)
-    #output_file.close()
-    #output1 = 'output.txt'
-    output1 = inference_dataset(file_object.name)
-    output2 = output3 = output4 = output5 = "This option was not selected."
-    if "emotion frequencies" in option_list:
-        output2 = "This option was selected."
-    if "emotion distribution over time" in option_list:
-        output3 = "This option was selected."
-    if "peaks" in option_list:
-        output4 = "This option was selected."
-    if "topics" in option_list:
-        output5 = "This option was selected."
-    return [output1, output2, output3, output4, output5]
-def inference_sentence(text):
-    tokenizer = AutoTokenizer.from_pretrained(inference_modelpath)
-    model = AutoModelForSequenceClassification.from_pretrained(inference_modelpath)
-    for text in tqdm([text]):
-        inputs = tokenizer(text, return_tensors="pt")
-    with torch.no_grad(): # run model
-        logits = model(**inputs).logits
-        predicted_class_id = logits.argmax().item()
-    output = model.config.id2label[predicted_class_id]
-    return output
-iface0 = gr.Interface(
-            fn=what_happened,
-            inputs=[
-                gr.Textbox(
-                    label= "Enter a sentence",
-                    lines=1,
-                    value="Your name"),
-                gr.File(
-                    label="Or upload a dataset"),
-                gr.CheckboxGroup(
-                    ["emotion frequencies", "emotion distribution over time", "peaks", "topics"],
-                    label = "Select options")
-                   ],
-            outputs="text")
 iface_sentence = gr.Interface(
             fn=inference_sentence,
             description = description_sentence,
@@ -189,10 +67,9 @@ iface_sentence = gr.Interface(
                     lines=1),
             outputs="text")
-iface2 = gr.Interface(
-            #fn=what_happened2,
             fn = inference_dataset,
-            description = description2,
             inputs=[
                 gr.File(
                     label="Upload a dataset"),
@@ -210,6 +87,6 @@ iface2 = gr.Interface(
                 gr.Textbox(label="Topics")
             ])
-iface = gr.TabbedInterface([iface_sentence, iface2], ["Sentence", "Dataset"])
 iface.queue().launch()

 from tqdm import tqdm
 from transformers import AutoTokenizer, AutoConfig, AutoModel, AutoModelForSequenceClassification
 description_sentence = "<h3>Demo EmotioNL</h3>\nThis demo allows you to analyse the emotion in a sentence."
+description_dataset = "<h3>Demo EmotioNL</h3>\nThis demo allows you to analyse the emotions in a dataset.\nThe data should be in tsv-format with two named columns: the first column (id) should contain the sentence IDs, and the second column (text) should contain the actual texts. Optionally, there is a third column named 'date', which specifies the date associated with the text (e.g., tweet date). This column is necessary when the options 'emotion distribution over time' and 'peaks' are selected."
 inference_modelpath = "model/checkpoint-128"
+def inference_sentence(text):
+    tokenizer = AutoTokenizer.from_pretrained(inference_modelpath)
+    model = AutoModelForSequenceClassification.from_pretrained(inference_modelpath)
+    for text in tqdm([text]):
+        inputs = tokenizer(text, return_tensors="pt")
+    with torch.no_grad(): # run model
+        logits = model(**inputs).logits
+        predicted_class_id = logits.argmax().item()
+    output = model.config.id2label[predicted_class_id]
+    return output
 def inference_dataset(file_object, option_list):
     tokenizer = AutoTokenizer.from_pretrained(inference_modelpath)
     model = AutoModelForSequenceClassification.from_pretrained(inference_modelpath)
         output5 = "This option was selected."
     return [output1, output2, output3, output4, output5]
 iface_sentence = gr.Interface(
             fn=inference_sentence,
             description = description_sentence,
                     lines=1),
             outputs="text")
+iface_dataset = gr.Interface(
             fn = inference_dataset,
+            description = description_dataset,
             inputs=[
                 gr.File(
                     label="Upload a dataset"),
                 gr.Textbox(label="Topics")
             ])
+iface = gr.TabbedInterface([iface_sentence, iface_dataset], ["Sentence", "Dataset"])
 iface.queue().launch()