sanshizhang commited on
Commit
c979986
·
verified ·
1 Parent(s): 694248a

Update README.md

Browse files
Files changed (1) hide show
  1. README.md +16 -11
README.md CHANGED
@@ -1,6 +1,9 @@
1
  ---
2
- license: openrail
 
 
3
  ---
 
4
  验证集准确度: 0.9382193411826961
5
  验证集分类报告:
6
  precision recall f1-score support
@@ -14,28 +17,30 @@ license: openrail
14
  weighted avg 0.94 0.94 0.94 15118
15
 
16
  大概使用了10w+的数据做了一个基金方面的中文情感分析模型,暂时测试下来还可以,负面方面的文本是有专人处理过的,中性的可能不准确。
 
17
  0: 'negative', 1: 'positive', 2: 'neutral'
18
 
19
 
20
- 测试代码如下:
 
21
  import sys
22
  import re
23
  import torch
24
  from transformers import BertTokenizer, BertForSequenceClassification
25
  from torch.nn.functional import softmax
26
 
27
- # 设定使用CPU或CUDA
28
  device = 'cuda' if torch.cuda.is_available() else 'cpu'
29
 
30
- # 载入预先保存的模型和分词器
31
  model = BertForSequenceClassification.from_pretrained('sanshizhang/Chinese-Sentiment-Analysis-Fund-Direction')
32
  tokenizer = BertTokenizer.from_pretrained('sanshizhang/Chinese-Sentiment-Analysis-Fund-Direction')
33
 
34
- # 确保模型在正确的设备上
35
  model = model.to(device)
36
  model.eval() # 把模型设置为评估模式
37
 
38
- # 函数定义:进行预测并返回预测概率
39
  def predict_sentiment(text):
40
  # 编码文本数据
41
  encoding = tokenizer.encode_plus(
@@ -66,18 +71,18 @@ def predict_sentiment(text):
66
  # 返回概率和预测的类别
67
  return probs, torch.argmax(probs, dim=1).cpu().numpy()[0]
68
 
69
- # 从命令行参数获取文本,合并并清理特殊字符
70
  arguments = sys.argv[1:] # 忽略脚本名称
71
  text = ' '.join(arguments) # 合并为单一字符串
72
  text = re.sub(r"[^\u4e00-\u9fff\d.a-zA-Z%+\-。!?,、;:()【】《》“”‘’]", '', text) # 去除特殊字符
73
 
74
- # print(f"传过来的文本是: {text}")
75
- # 进行预测
76
  probabilities, prediction = predict_sentiment(text)
77
 
78
  sentiment_labels = {0: 'negative', 1: 'positive', 2: 'neutral'}
79
 
80
- # 打印出预测的情感及其概率
81
  predicted_sentiment = sentiment_labels[prediction]
82
  print(f"Predicted sentiment: {predicted_sentiment},Probability:{probabilities[0][prediction].item()}")
83
- # print(f"Probability: {probabilities[0][prediction].item()}")
 
1
  ---
2
+ license: apache-2.0
3
+ language:
4
+ - zh
5
  ---
6
+
7
  验证集准确度: 0.9382193411826961
8
  验证集分类报告:
9
  precision recall f1-score support
 
17
  weighted avg 0.94 0.94 0.94 15118
18
 
19
  大概使用了10w+的数据做了一个基金方面的中文情感分析模型,暂时测试下来还可以,负面方面的文本是有专人处理过的,中性的可能不准确。
20
+ # 返回值解释:
21
  0: 'negative', 1: 'positive', 2: 'neutral'
22
 
23
 
24
+
25
+ # 测试代码如下:
26
  import sys
27
  import re
28
  import torch
29
  from transformers import BertTokenizer, BertForSequenceClassification
30
  from torch.nn.functional import softmax
31
 
32
+ #设定使用CPU或CUDA
33
  device = 'cuda' if torch.cuda.is_available() else 'cpu'
34
 
35
+ #载入预先保存的模型和分词器
36
  model = BertForSequenceClassification.from_pretrained('sanshizhang/Chinese-Sentiment-Analysis-Fund-Direction')
37
  tokenizer = BertTokenizer.from_pretrained('sanshizhang/Chinese-Sentiment-Analysis-Fund-Direction')
38
 
39
+ #确保模型在正确的设备上
40
  model = model.to(device)
41
  model.eval() # 把模型设置为评估模式
42
 
43
+ #函数定义:进行预测并返回预测概率
44
  def predict_sentiment(text):
45
  # 编码文本数据
46
  encoding = tokenizer.encode_plus(
 
71
  # 返回概率和预测的类别
72
  return probs, torch.argmax(probs, dim=1).cpu().numpy()[0]
73
 
74
+ #从命令行参数获取文本,合并并清理特殊字符
75
  arguments = sys.argv[1:] # 忽略脚本名称
76
  text = ' '.join(arguments) # 合并为单一字符串
77
  text = re.sub(r"[^\u4e00-\u9fff\d.a-zA-Z%+\-。!?,、;:()【】《》“”‘’]", '', text) # 去除特殊字符
78
 
79
+ #print(f"传过来的文本是: {text}")
80
+ #进行预测
81
  probabilities, prediction = predict_sentiment(text)
82
 
83
  sentiment_labels = {0: 'negative', 1: 'positive', 2: 'neutral'}
84
 
85
+ #打印出预测的情感及其概率
86
  predicted_sentiment = sentiment_labels[prediction]
87
  print(f"Predicted sentiment: {predicted_sentiment},Probability:{probabilities[0][prediction].item()}")
88
+ #print(f"Probability: {probabilities[0][prediction].item()}")