Spaces:

avichr
/

HebEMO_demo

Runtime error

App Files Files Community

avichr commited on Dec 18, 2021

Commit

eb2e6c5

1 Parent(s): 771e9d0

Create HebEMO.py

Browse files

Files changed (1) hide show

HebEMO.py +85 -0

HebEMO.py ADDED Viewed

	@@ -0,0 +1,85 @@

+class HebEMO:
+    def __init__(self, device=0, emotions = ['expectation', 'happy', 'trust', 'fear', 'surprise', 'anger',
+      'sadness', 'disgust']):
+        from transformers import pipeline
+        from tqdm import tqdm
+        self.device = device
+        self.emotions = emotions
+        self.hebemo_models = {}
+        for emo in tqdm(emotions):
+            self.hebemo_models[emo] = pipeline(
+                "sentiment-analysis",
+                model="../hebEMO/"+emo+'_classifier',
+                tokenizer="../heBERT_base_oscar",
+                device = self.device #run on GPU
+            )
+    def hebemo(self, text = None, input_path=False, save_results=False, read_lines=False, plot=False):
+        '''
+        text (str): a text or list of text to analyze
+        input_path(str): the path to the text file (txt file, each row for different instance)
+        returns pandas DataFrame of the analyzed texts and save it to the same dir of the input file
+        '''
+        from pyplutchik import plutchik
+        import matplotlib.pyplot as plt
+        import pandas as pd
+        import time
+        import torch
+        from tqdm import tqdm
+        if text is None and type(input_path) is str:
+            # read the file
+            with open(input_path, encoding='utf8') as p:
+                txt = p.readlines()
+        elif text is not None and (input_path is None or input_path is False):
+            if type(text) is str:
+                if read_lines:
+                    txt = text.split('\n')
+                else:
+                    txt = [text]
+            elif type(text) is list:
+                txt = text
+            else:
+                raise ValueError('text should be text or list of text.')
+        else:
+            raise ValueError('you should provide a text string, list of strings or text path.')
+        # run hebEMO
+        hebEMO_df = pd.DataFrame(txt)
+        for emo in tqdm(self.emotions):
+            x = self.hebemo_models[emo](txt)
+            hebEMO_df = hebEMO_df.join(pd.DataFrame(x).rename(columns = {'label': emo, 'score':'confidence_'+emo}))
+            del x
+            torch.cuda.empty_cache()
+        hebEMO_df = hebEMO_df.applymap(lambda x: 0 if x=='LABEL_0' else 1 if x=='LABEL_1' else x)
+        if save_results is not False:
+            gen_name = str(int(time.time()*1e7))
+            if type(save_results) is str:
+                hebEMO_df.to_csv(save_results+'/'+gen_name+'_heEMOed.csv', encoding='utf8')
+            else:
+                hebEMO_df.to_csv(gen_name+'_heEMOed.csv', encoding='utf8')
+        if plot:
+            hebEMO = pd.DataFrame()
+            for emo in hebEMO_df.columns[1::2]:
+                hebEMO[emo] = abs(hebEMO_df[emo]-(1-hebEMO_df['confidence_'+emo]))
+                hebEMO = hebEMO.rename(columns= {'happy': 'joy', 'expectation':'anticipation'})
+            for i in range(0,1):
+                ax = plutchik(hebEMO.to_dict(orient='records')[i])
+                print(hebEMO_df[0][i])
+                plt.show()
+            return (plt.figure())
+        else:
+            return (hebEMO_df)
+HebEMO_model = HebEMO()