Spaces:

TheDemond
/

Movie-reviews-analyzer

Sleeping

App Files Files Community

TheDemond commited on Nov 4, 2024

Commit

82b4581

verified ·

1 Parent(s): d350a95

Upload 6 files

Browse files

Files changed (6) hide show

Model_define.py +35 -0
Vocabulary.py +71 -0
app.py +50 -0
lstm_model.bin +3 -0
lstm_model_states.pt +3 -0
requirements.txt +3 -0

Model_define.py ADDED Viewed

	@@ -0,0 +1,35 @@

+import torch
+from torch import nn
+class Sentiment_LSTM(nn.Module):
+    def __init__(self, embedding_matrix:torch.Tensor, lstm_hidden_size, lstm_layers, linear_hidden_size):
+        super(Sentiment_LSTM, self).__init__()
+        self.input_size = embedding_matrix.size(-1)
+        self.lstm_hidden_size = lstm_hidden_size
+        self.lstm_layers = lstm_layers
+        self.linear_hidden_size = linear_hidden_size
+        self.embedding_matrix = nn.Embedding.from_pretrained(embedding_matrix, freeze=False)
+        self.lstm = nn.LSTM(input_size=self.input_size, hidden_size=self.lstm_hidden_size,
+                          num_layers=self.lstm_layers, batch_first=True)
+        # We will use only the last hidden state of the last layer for the prediction
+        self.fc = nn.Sequential(nn.Linear(self.lstm_hidden_size, self.linear_hidden_size),
+                                nn.ReLU(), nn.Dropout(0.3))
+        self.classifier = nn.Linear(self.linear_hidden_size, 1)
+    def forward(self, x, device):
+        h_0 = torch.zeros((self.lstm_layers, x.size(0), self.lstm_hidden_size)).to(device)
+        c_0 = torch.zeros((self.lstm_layers, x.size(0), self.lstm_hidden_size)).to(device)
+        embds = self.embedding_matrix(x)
+        all_outputs, (h_final, c_final) = self.lstm(embds, (h_0, c_0))
+        h_final_final_layer = h_final[-1,:,:]
+        fc_out = self.fc(h_final_final_layer)
+        output = self.classifier(h_final_final_layer)
+        return nn.functional.sigmoid(output)

Vocabulary.py ADDED Viewed

	@@ -0,0 +1,71 @@

+import spacy
+from collections import Counter
+class spacy_tokenizer():
+    def __init__(self):
+        self.spacy_eng = spacy.load("en_core_web_sm")
+    def __call__(self, text):
+        return [tok.text.lower() for tok in self.spacy_eng.tokenizer(text)]
+class Vocabulary:
+    def __init__(self, callable_tokenizer=None, max_freq=3, unk=True, sos=False, eos=False):
+        self.sos = sos
+        self.eos = eos
+        self.unk = unk
+        if callable_tokenizer:
+            self.callable_tokenizer = callable_tokenizer
+        else:
+            self.callable_tokenizer = spacy_tokenizer()
+        self.stoi = {"<PAD>": 0}
+        if self.unk:
+            self.stoi['<UNK>'] = len(self.stoi)
+        if self.sos:
+            self.stoi['<SOS>'] = len(self.stoi)
+        if self.eos:
+            self.stoi['<EOS>'] = len(self.stoi)
+    def __len__(self):
+        return len(self.stoi)
+    def get_vocabulary(self):
+        return self.stoi
+    def add_token(self, token_name: str):
+        if token_name not in self.stoi:
+            self.stoi[token_name] = len(self.stoi)
+    def build_vocabulary(self, sentences_list):
+        if type(sentences_list[0]) != str:
+            ## ex: [['eating', 'apples'], ['eating', 'oranges']]
+            sentences_list = [' '.join(sen) for sen in sentences_list]
+        word_counts = Counter()
+        for sentence in sentences_list:
+            tokens = self.callable_tokenizer(sentence)
+            word_counts.update(tokens)
+        # Filter words with mox_freq or more occurrences
+        filtered_words = [word for word, count in word_counts.items() if count >= 3]
+        for word in filtered_words:
+            if word not in self.stoi:
+                self.stoi[word] = len(self.stoi)
+    def get_numerical_tokens(self, text: str):
+        tokens = self.callable_tokenizer(text)
+        # tokens.insert(0, '<SOS>') if self.sos else None
+        # tokens.append('<EOS>') if self.eos else None
+        unk_id = self.stoi.get('<UNK>', None)
+        return [self.stoi.get(word, unk_id) for word in tokens]
+    def __call__(self, text: str):
+        return self.get_numerical_tokens(text)
+    def tokens_to_text(self, tokens_list):
+        keys = list(self.stoi.keys())
+        values = list(self.stoi.values())
+        return ' '.join([keys[values.index(token)] for token in tokens_list])

app.py ADDED Viewed

	@@ -0,0 +1,50 @@

+import torch
+import gradio as gr
+from Vocabulary import spacy_tokenizer
+from Model_define import Sentiment_LSTM
+device = 'gpu' if torch.cuda.is_available() else 'cpu'
+model = torch.load("lstm_model.bin", map_location=device, weights_only=False)
+model_state = torch.load("lstm_model_states.pt", map_location=device, weights_only=False)
+vocab = model_state['vocabulary']
+tokenizer = spacy_tokenizer()
+cls_to_idx = model_state['class_dict']
+idx_to_cls = {value:key for key,value in cls_to_idx.items()}
+def pre_processor(text):
+    tokens = tokenizer(text.lower())
+    unk_id = vocab.get('<UNK>', None)
+    return torch.tensor([vocab.get(word, unk_id) for word in tokens])
+def post_processor(raw_output):
+    label = (raw_output >= 0.5).int()
+    return idx_to_cls[label.item()].capitalize(), round(raw_output.item(), 2)
+@torch.no_grad
+def lunch(raw_input):
+    input = pre_processor(raw_input)
+    output = model(input.unsqueeze(0), device)
+    return post_processor(output)
+custom_css ='.gr-button {background-color: #bf4b04; color: white;}'
+with gr.Blocks(css=custom_css) as demo:
+    with gr.Row():
+        with gr.Column():
+            input_text = gr.Textbox(label='Input a Review or click an Example')
+            gr.Examples(["It is no wonder that the film has such a high rating, it is quite literally breathtaking. What can I say that hasn't said before? Not much, it's the story, the acting, the premise, but most of all, this movie is about how it makes you feel. Sometimes you watch a film, and can't remember it days later, this film loves with you, once you've seen it, you don't forget.",
+                          "This film is nothing but one cliche after another. Having seen many of the 100's of prison films made from the early 30's to the 50's, I was able to pull almost every minute of Shawcrap from one of those films."],
+                        inputs=input_text, label="Examples: ")
+        with gr.Column():
+            class_name = gr.Textbox(label="This review is")
+            confidence = gr.Textbox(label='Confidence')
+            start_btn = gr.Button(value='Submit', elem_classes=["gr-button"])
+    start_btn.click(fn=lunch, inputs=input_text, outputs=[class_name, confidence])
+demo.launch()

lstm_model.bin ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:f1dceea1f78112220fedc531cae7815a0808ef2582d53de8149631590db30227
+size 22590178

lstm_model_states.pt ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:2b4ef1293501e36bb34ba9d8d12cedbdfd2555e3e070baef5cdba91bce8739b6
+size 1071816

requirements.txt ADDED Viewed

	@@ -0,0 +1,3 @@

+torch
+spacy
+gradio