TheDemond commited on
Commit
82b4581
·
verified ·
1 Parent(s): d350a95

Upload 6 files

Browse files
Files changed (6) hide show
  1. Model_define.py +35 -0
  2. Vocabulary.py +71 -0
  3. app.py +50 -0
  4. lstm_model.bin +3 -0
  5. lstm_model_states.pt +3 -0
  6. requirements.txt +3 -0
Model_define.py ADDED
@@ -0,0 +1,35 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import torch
2
+ from torch import nn
3
+
4
+ class Sentiment_LSTM(nn.Module):
5
+ def __init__(self, embedding_matrix:torch.Tensor, lstm_hidden_size, lstm_layers, linear_hidden_size):
6
+ super(Sentiment_LSTM, self).__init__()
7
+
8
+ self.input_size = embedding_matrix.size(-1)
9
+ self.lstm_hidden_size = lstm_hidden_size
10
+ self.lstm_layers = lstm_layers
11
+ self.linear_hidden_size = linear_hidden_size
12
+
13
+ self.embedding_matrix = nn.Embedding.from_pretrained(embedding_matrix, freeze=False)
14
+
15
+ self.lstm = nn.LSTM(input_size=self.input_size, hidden_size=self.lstm_hidden_size,
16
+ num_layers=self.lstm_layers, batch_first=True)
17
+
18
+ # We will use only the last hidden state of the last layer for the prediction
19
+ self.fc = nn.Sequential(nn.Linear(self.lstm_hidden_size, self.linear_hidden_size),
20
+ nn.ReLU(), nn.Dropout(0.3))
21
+
22
+ self.classifier = nn.Linear(self.linear_hidden_size, 1)
23
+
24
+ def forward(self, x, device):
25
+ h_0 = torch.zeros((self.lstm_layers, x.size(0), self.lstm_hidden_size)).to(device)
26
+ c_0 = torch.zeros((self.lstm_layers, x.size(0), self.lstm_hidden_size)).to(device)
27
+
28
+ embds = self.embedding_matrix(x)
29
+ all_outputs, (h_final, c_final) = self.lstm(embds, (h_0, c_0))
30
+ h_final_final_layer = h_final[-1,:,:]
31
+
32
+ fc_out = self.fc(h_final_final_layer)
33
+
34
+ output = self.classifier(h_final_final_layer)
35
+ return nn.functional.sigmoid(output)
Vocabulary.py ADDED
@@ -0,0 +1,71 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import spacy
2
+ from collections import Counter
3
+
4
+ class spacy_tokenizer():
5
+ def __init__(self):
6
+ self.spacy_eng = spacy.load("en_core_web_sm")
7
+
8
+ def __call__(self, text):
9
+ return [tok.text.lower() for tok in self.spacy_eng.tokenizer(text)]
10
+
11
+
12
+ class Vocabulary:
13
+ def __init__(self, callable_tokenizer=None, max_freq=3, unk=True, sos=False, eos=False):
14
+
15
+ self.sos = sos
16
+ self.eos = eos
17
+ self.unk = unk
18
+ if callable_tokenizer:
19
+ self.callable_tokenizer = callable_tokenizer
20
+ else:
21
+ self.callable_tokenizer = spacy_tokenizer()
22
+
23
+ self.stoi = {"<PAD>": 0}
24
+ if self.unk:
25
+ self.stoi['<UNK>'] = len(self.stoi)
26
+ if self.sos:
27
+ self.stoi['<SOS>'] = len(self.stoi)
28
+ if self.eos:
29
+ self.stoi['<EOS>'] = len(self.stoi)
30
+
31
+ def __len__(self):
32
+ return len(self.stoi)
33
+
34
+ def get_vocabulary(self):
35
+ return self.stoi
36
+
37
+ def add_token(self, token_name: str):
38
+ if token_name not in self.stoi:
39
+ self.stoi[token_name] = len(self.stoi)
40
+
41
+ def build_vocabulary(self, sentences_list):
42
+ if type(sentences_list[0]) != str:
43
+ ## ex: [['eating', 'apples'], ['eating', 'oranges']]
44
+ sentences_list = [' '.join(sen) for sen in sentences_list]
45
+
46
+ word_counts = Counter()
47
+ for sentence in sentences_list:
48
+ tokens = self.callable_tokenizer(sentence)
49
+ word_counts.update(tokens)
50
+
51
+ # Filter words with mox_freq or more occurrences
52
+ filtered_words = [word for word, count in word_counts.items() if count >= 3]
53
+ for word in filtered_words:
54
+ if word not in self.stoi:
55
+ self.stoi[word] = len(self.stoi)
56
+
57
+ def get_numerical_tokens(self, text: str):
58
+ tokens = self.callable_tokenizer(text)
59
+ # tokens.insert(0, '<SOS>') if self.sos else None
60
+ # tokens.append('<EOS>') if self.eos else None
61
+ unk_id = self.stoi.get('<UNK>', None)
62
+ return [self.stoi.get(word, unk_id) for word in tokens]
63
+
64
+ def __call__(self, text: str):
65
+ return self.get_numerical_tokens(text)
66
+
67
+ def tokens_to_text(self, tokens_list):
68
+ keys = list(self.stoi.keys())
69
+ values = list(self.stoi.values())
70
+
71
+ return ' '.join([keys[values.index(token)] for token in tokens_list])
app.py ADDED
@@ -0,0 +1,50 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import torch
2
+ import gradio as gr
3
+ from Vocabulary import spacy_tokenizer
4
+ from Model_define import Sentiment_LSTM
5
+
6
+
7
+ device = 'gpu' if torch.cuda.is_available() else 'cpu'
8
+
9
+ model = torch.load("lstm_model.bin", map_location=device, weights_only=False)
10
+ model_state = torch.load("lstm_model_states.pt", map_location=device, weights_only=False)
11
+
12
+
13
+ vocab = model_state['vocabulary']
14
+ tokenizer = spacy_tokenizer()
15
+
16
+ cls_to_idx = model_state['class_dict']
17
+ idx_to_cls = {value:key for key,value in cls_to_idx.items()}
18
+
19
+ def pre_processor(text):
20
+ tokens = tokenizer(text.lower())
21
+ unk_id = vocab.get('<UNK>', None)
22
+ return torch.tensor([vocab.get(word, unk_id) for word in tokens])
23
+
24
+ def post_processor(raw_output):
25
+ label = (raw_output >= 0.5).int()
26
+ return idx_to_cls[label.item()].capitalize(), round(raw_output.item(), 2)
27
+
28
+
29
+ @torch.no_grad
30
+ def lunch(raw_input):
31
+ input = pre_processor(raw_input)
32
+ output = model(input.unsqueeze(0), device)
33
+ return post_processor(output)
34
+
35
+ custom_css ='.gr-button {background-color: #bf4b04; color: white;}'
36
+
37
+ with gr.Blocks(css=custom_css) as demo:
38
+ with gr.Row():
39
+ with gr.Column():
40
+ input_text = gr.Textbox(label='Input a Review or click an Example')
41
+ gr.Examples(["It is no wonder that the film has such a high rating, it is quite literally breathtaking. What can I say that hasn't said before? Not much, it's the story, the acting, the premise, but most of all, this movie is about how it makes you feel. Sometimes you watch a film, and can't remember it days later, this film loves with you, once you've seen it, you don't forget.",
42
+ "This film is nothing but one cliche after another. Having seen many of the 100's of prison films made from the early 30's to the 50's, I was able to pull almost every minute of Shawcrap from one of those films."],
43
+ inputs=input_text, label="Examples: ")
44
+ with gr.Column():
45
+ class_name = gr.Textbox(label="This review is")
46
+ confidence = gr.Textbox(label='Confidence')
47
+ start_btn = gr.Button(value='Submit', elem_classes=["gr-button"])
48
+ start_btn.click(fn=lunch, inputs=input_text, outputs=[class_name, confidence])
49
+
50
+ demo.launch()
lstm_model.bin ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:f1dceea1f78112220fedc531cae7815a0808ef2582d53de8149631590db30227
3
+ size 22590178
lstm_model_states.pt ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:2b4ef1293501e36bb34ba9d8d12cedbdfd2555e3e070baef5cdba91bce8739b6
3
+ size 1071816
requirements.txt ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ torch
2
+ spacy
3
+ gradio