import pandas as pd import torch from transformers import AutoTokenizer, AutoModelForSequenceClassification def evaluate_emotions_from_csv( csv_path, model_path, output_csv_path, emotion_columns, device="cuda" if torch.cuda.is_available() else "cpu", threshold=0.1 ): tokenizer = AutoTokenizer.from_pretrained(model_path) model = AutoModelForSequenceClassification.from_pretrained(model_path) model.to(device) model.eval() data = pd.read_csv(csv_path) data['emotions'] = data['emotions'].apply(lambda x: x.split(' ')) true_emotions_list = [] predicted_emotions_list = [] correctly_identified = [] incorrectly_identified = [] undefined = [] for idx, row in data.iterrows(): text = row['text'] true_emotions = set(row['emotions']) inputs = tokenizer(text, return_tensors="pt", padding="max_length", truncation=True, max_length=128).to(device) with torch.no_grad(): logits = model(**inputs).logits probabilities = torch.sigmoid(logits).squeeze().cpu().numpy() predicted_emotions = set( emotion for emotion, prob in zip(emotion_columns, probabilities) if prob > threshold ) correct = true_emotions & predicted_emotions incorrect = predicted_emotions - true_emotions undefined_emotions = true_emotions - predicted_emotions true_emotions_list.append(' '.join(true_emotions)) predicted_emotions_list.append(' '.join(predicted_emotions)) correctly_identified.append(' '.join(correct) if correct else 'None') incorrectly_identified.append(' '.join(incorrect) if incorrect else 'None') undefined.append(' '.join(undefined_emotions) if undefined_emotions else 'None') results_df = pd.DataFrame({ "text": data['text'], "true emotions": true_emotions_list, "predict emotions": predicted_emotions_list, "correctly Identified": correctly_identified, "incorrectly Identified": incorrectly_identified, "undefined": undefined, }) results_df.to_csv(output_csv_path, index=False) print(f"Результаты сохранены в {output_csv_path}") csv_path = "RuBert-tiny2-EmotionsDetected/Dstasets/testEmotionDetected.csv" model_path = "RuBert-tiny2-EmotionsDetected" output_csv_path = "RuBert-tiny2-EmotionsDetected/Dstasets/Emotions_detected.csv" emotion_columns = [ "admiration", "amusement", "anger", "annoyance", "approval", "caring", "confusion", "curiosity", "desire", "disappointment", "disapproval", "disgust", "embarrassment", "excitement", "fear", "gratitude", "grief", "joy", "love", "nervousness", "optimism", "pride", "realization", "relief", "remorse", "sadness", "surprise", "neutral" ] evaluate_emotions_from_csv(csv_path, model_path, output_csv_path, emotion_columns, threshold=0.2)