Spaces:
Sleeping
Sleeping
Upload 2 files
Browse files- MovieClassifier.py +76 -0
- gradio_app.py +27 -0
MovieClassifier.py
ADDED
@@ -0,0 +1,76 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
import numpy as np
|
2 |
+
import torch
|
3 |
+
|
4 |
+
from transformers import AutoTokenizer
|
5 |
+
from transformers import AutoModelForSequenceClassification
|
6 |
+
|
7 |
+
|
8 |
+
class MovieClassifier:
|
9 |
+
|
10 |
+
ratings = ['bad', 'average', 'good']
|
11 |
+
|
12 |
+
# initialize the model and tokenizer
|
13 |
+
def __init__(self, model_path):
|
14 |
+
self.model = AutoModelForSequenceClassification.from_pretrained(model_path)
|
15 |
+
self.tokenizer = AutoTokenizer.from_pretrained(model_path)
|
16 |
+
|
17 |
+
def __preProcessInput(self, titles, summaries, genres):
|
18 |
+
# titles: list of strings in the form: [title_1, title_2, ...]
|
19 |
+
# summaries: list of summaries(strings) in the form: [summary_1, summary_2, ...]
|
20 |
+
# genres: list of genres in the form: [[genres_1], [genres_2], ...] with genres_i = "genres_i1", "genres_i2", ...
|
21 |
+
|
22 |
+
inputs = []
|
23 |
+
|
24 |
+
for i in range(len(titles)):
|
25 |
+
# normalice spacing in the titles
|
26 |
+
title_i = (' ').join(titles[i].split())
|
27 |
+
|
28 |
+
# normalice spacing in the summaries
|
29 |
+
summary_i = (' ').join(summaries[i].split())
|
30 |
+
|
31 |
+
if genres[i] == []:
|
32 |
+
genres_i = 'NonGiven'
|
33 |
+
else:
|
34 |
+
# convert the lists of genres to strings separated by '|'
|
35 |
+
genres_i = '|'.join(genres[i])
|
36 |
+
|
37 |
+
input_i = {'title': title_i, 'summary': summary_i, 'genres': genres_i}
|
38 |
+
inputs.append(input_i)
|
39 |
+
|
40 |
+
return inputs
|
41 |
+
|
42 |
+
def __tokenizeInputs(self, inputs):
|
43 |
+
title_mod = [movie['title'] + '<SEP>' + movie['summary'] for movie in inputs]
|
44 |
+
genres_list = [movie['genres'] for movie in inputs]
|
45 |
+
|
46 |
+
return self.tokenizer(title_mod, genres_list, padding = 'max_length',
|
47 |
+
truncation = True,
|
48 |
+
return_tensors = "pt")
|
49 |
+
|
50 |
+
def __modelPredictions(self, model, tokenized_input):
|
51 |
+
# generate model predictions using the model logits and tokenized input and determine
|
52 |
+
# the most likely rating using
|
53 |
+
|
54 |
+
with torch.no_grad():
|
55 |
+
model_output = self.model(**tokenized_input)
|
56 |
+
|
57 |
+
logits = model_output.logits
|
58 |
+
predictions = np.argmax(logits, axis = -1)
|
59 |
+
|
60 |
+
return predictions
|
61 |
+
|
62 |
+
def __predMovieRating(self, predictions):
|
63 |
+
predicted_ratings = []
|
64 |
+
|
65 |
+
for pred in predictions:
|
66 |
+
predicted_ratings.append((pred, self.ratings[pred]))
|
67 |
+
|
68 |
+
return predicted_ratings
|
69 |
+
|
70 |
+
def predict(self, title, summary, genre):
|
71 |
+
movies = self.__preProcessInput(title, summary, genre)
|
72 |
+
tokenized_movies = self.__tokenizeInputs(movies)
|
73 |
+
predictions = self.__modelPredictions(self.model, tokenized_movies)
|
74 |
+
pred_ratings = self.__predMovieRating(predictions)
|
75 |
+
|
76 |
+
return pred_ratings
|
gradio_app.py
ADDED
@@ -0,0 +1,27 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
import gradio as gr
|
2 |
+
|
3 |
+
|
4 |
+
from MovieClassifier import MovieClassifier
|
5 |
+
|
6 |
+
# model_path = "./models/fine_tuned_DeBERTa_v3/v3"
|
7 |
+
model_path = "sariaslaso/movies_and_LLMs"
|
8 |
+
|
9 |
+
classifier = MovieClassifier(model_path)
|
10 |
+
|
11 |
+
def predict_rating(title, summary, genres):
|
12 |
+
print(title, summary, genres)
|
13 |
+
genres = [genre.strip() for genre in genres.split(",")]
|
14 |
+
prediction = classifier.predict([title], [summary], [genres])[0][1]
|
15 |
+
|
16 |
+
return prediction
|
17 |
+
|
18 |
+
demo = gr.Interface(
|
19 |
+
fn = predict_rating,
|
20 |
+
inputs = ["textbox", "textbox", "textbox"],
|
21 |
+
outputs = ["textbox"],
|
22 |
+
title = "Create your highly rated movie!",
|
23 |
+
description = "Here is a movie-rating predictor. Enter title, summary, and comma-separated genres.",
|
24 |
+
)
|
25 |
+
|
26 |
+
demo.launch(share = True)
|
27 |
+
|