sariaslaso commited on
Commit
21d8cce
·
verified ·
1 Parent(s): 8d39607

Upload 2 files

Browse files
Files changed (2) hide show
  1. MovieClassifier.py +76 -0
  2. gradio_app.py +27 -0
MovieClassifier.py ADDED
@@ -0,0 +1,76 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import numpy as np
2
+ import torch
3
+
4
+ from transformers import AutoTokenizer
5
+ from transformers import AutoModelForSequenceClassification
6
+
7
+
8
+ class MovieClassifier:
9
+
10
+ ratings = ['bad', 'average', 'good']
11
+
12
+ # initialize the model and tokenizer
13
+ def __init__(self, model_path):
14
+ self.model = AutoModelForSequenceClassification.from_pretrained(model_path)
15
+ self.tokenizer = AutoTokenizer.from_pretrained(model_path)
16
+
17
+ def __preProcessInput(self, titles, summaries, genres):
18
+ # titles: list of strings in the form: [title_1, title_2, ...]
19
+ # summaries: list of summaries(strings) in the form: [summary_1, summary_2, ...]
20
+ # genres: list of genres in the form: [[genres_1], [genres_2], ...] with genres_i = "genres_i1", "genres_i2", ...
21
+
22
+ inputs = []
23
+
24
+ for i in range(len(titles)):
25
+ # normalice spacing in the titles
26
+ title_i = (' ').join(titles[i].split())
27
+
28
+ # normalice spacing in the summaries
29
+ summary_i = (' ').join(summaries[i].split())
30
+
31
+ if genres[i] == []:
32
+ genres_i = 'NonGiven'
33
+ else:
34
+ # convert the lists of genres to strings separated by '|'
35
+ genres_i = '|'.join(genres[i])
36
+
37
+ input_i = {'title': title_i, 'summary': summary_i, 'genres': genres_i}
38
+ inputs.append(input_i)
39
+
40
+ return inputs
41
+
42
+ def __tokenizeInputs(self, inputs):
43
+ title_mod = [movie['title'] + '<SEP>' + movie['summary'] for movie in inputs]
44
+ genres_list = [movie['genres'] for movie in inputs]
45
+
46
+ return self.tokenizer(title_mod, genres_list, padding = 'max_length',
47
+ truncation = True,
48
+ return_tensors = "pt")
49
+
50
+ def __modelPredictions(self, model, tokenized_input):
51
+ # generate model predictions using the model logits and tokenized input and determine
52
+ # the most likely rating using
53
+
54
+ with torch.no_grad():
55
+ model_output = self.model(**tokenized_input)
56
+
57
+ logits = model_output.logits
58
+ predictions = np.argmax(logits, axis = -1)
59
+
60
+ return predictions
61
+
62
+ def __predMovieRating(self, predictions):
63
+ predicted_ratings = []
64
+
65
+ for pred in predictions:
66
+ predicted_ratings.append((pred, self.ratings[pred]))
67
+
68
+ return predicted_ratings
69
+
70
+ def predict(self, title, summary, genre):
71
+ movies = self.__preProcessInput(title, summary, genre)
72
+ tokenized_movies = self.__tokenizeInputs(movies)
73
+ predictions = self.__modelPredictions(self.model, tokenized_movies)
74
+ pred_ratings = self.__predMovieRating(predictions)
75
+
76
+ return pred_ratings
gradio_app.py ADDED
@@ -0,0 +1,27 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import gradio as gr
2
+
3
+
4
+ from MovieClassifier import MovieClassifier
5
+
6
+ # model_path = "./models/fine_tuned_DeBERTa_v3/v3"
7
+ model_path = "sariaslaso/movies_and_LLMs"
8
+
9
+ classifier = MovieClassifier(model_path)
10
+
11
+ def predict_rating(title, summary, genres):
12
+ print(title, summary, genres)
13
+ genres = [genre.strip() for genre in genres.split(",")]
14
+ prediction = classifier.predict([title], [summary], [genres])[0][1]
15
+
16
+ return prediction
17
+
18
+ demo = gr.Interface(
19
+ fn = predict_rating,
20
+ inputs = ["textbox", "textbox", "textbox"],
21
+ outputs = ["textbox"],
22
+ title = "Create your highly rated movie!",
23
+ description = "Here is a movie-rating predictor. Enter title, summary, and comma-separated genres.",
24
+ )
25
+
26
+ demo.launch(share = True)
27
+