Spaces:

sariaslaso
/

movies_LLMs_gradio

Sleeping

App Files Files Community

sariaslaso commited on Jul 27, 2024

Commit

21d8cce

verified ·

1 Parent(s): 8d39607

Upload 2 files

Browse files

Files changed (2) hide show

MovieClassifier.py +76 -0
gradio_app.py +27 -0

MovieClassifier.py ADDED Viewed

	@@ -0,0 +1,76 @@

+import numpy as np
+import torch
+from transformers import AutoTokenizer
+from transformers import AutoModelForSequenceClassification
+class MovieClassifier:
+    ratings = ['bad', 'average', 'good']
+    # initialize the model and tokenizer
+    def __init__(self, model_path):
+        self.model = AutoModelForSequenceClassification.from_pretrained(model_path)
+        self.tokenizer = AutoTokenizer.from_pretrained(model_path)
+    def __preProcessInput(self, titles, summaries, genres):
+    # titles: list of strings in the form: [title_1, title_2, ...]
+    # summaries: list of summaries(strings) in the form: [summary_1, summary_2, ...]
+    # genres: list of genres in the form: [[genres_1], [genres_2], ...] with genres_i = "genres_i1", "genres_i2", ...
+        inputs = []
+        for i in range(len(titles)):
+        # normalice spacing in the titles
+            title_i = (' ').join(titles[i].split())
+        # normalice spacing in the summaries
+            summary_i = (' ').join(summaries[i].split())
+            if genres[i] == []:
+                genres_i = 'NonGiven'
+            else:
+            # convert the lists of genres to strings separated by '|'
+                genres_i = '|'.join(genres[i])
+            input_i = {'title': title_i, 'summary': summary_i, 'genres': genres_i}
+            inputs.append(input_i)
+        return inputs
+    def __tokenizeInputs(self, inputs):
+        title_mod = [movie['title'] + '<SEP>' + movie['summary'] for movie in inputs]
+        genres_list = [movie['genres'] for movie in inputs]
+        return self.tokenizer(title_mod, genres_list, padding = 'max_length',
+                         truncation = True,
+                         return_tensors = "pt")
+    def __modelPredictions(self, model, tokenized_input):
+    # generate model predictions using the model logits and tokenized input and determine
+    # the most likely rating using
+        with torch.no_grad():
+            model_output = self.model(**tokenized_input)
+        logits = model_output.logits
+        predictions = np.argmax(logits, axis = -1)
+        return predictions
+    def __predMovieRating(self, predictions):
+        predicted_ratings = []
+        for pred in predictions:
+            predicted_ratings.append((pred, self.ratings[pred]))
+        return predicted_ratings
+    def predict(self, title, summary, genre):
+        movies = self.__preProcessInput(title, summary, genre)
+        tokenized_movies = self.__tokenizeInputs(movies)
+        predictions = self.__modelPredictions(self.model, tokenized_movies)
+        pred_ratings = self.__predMovieRating(predictions)
+        return pred_ratings

gradio_app.py ADDED Viewed

	@@ -0,0 +1,27 @@

+import gradio as gr
+from MovieClassifier import MovieClassifier
+# model_path = "./models/fine_tuned_DeBERTa_v3/v3"
+model_path = "sariaslaso/movies_and_LLMs"
+classifier = MovieClassifier(model_path)
+def predict_rating(title, summary, genres):
+	print(title, summary, genres)
+	genres = [genre.strip() for genre in genres.split(",")]
+	prediction = classifier.predict([title], [summary], [genres])[0][1]
+	return prediction
+demo = gr.Interface(
+	fn = predict_rating,
+	inputs = ["textbox", "textbox", "textbox"],
+	outputs = ["textbox"],
+	title = "Create your highly rated movie!",
+	description = "Here is a movie-rating predictor. Enter title, summary, and comma-separated genres.",
+	)
+demo.launch(share = True)