ysenarath commited on
Commit
d7a9c37
·
verified ·
1 Parent(s): 4cc43f8

Upload tokenizer

Browse files
Files changed (6) hide show
  1. README.md +9 -9
  2. merges.txt +0 -0
  3. special_tokens_map.json +15 -0
  4. tokenizer.json +0 -0
  5. tokenizer_config.json +58 -0
  6. vocab.json +0 -0
README.md CHANGED
@@ -15,8 +15,8 @@ model-index:
15
  - name: roberta-base-sentiment140
16
  results:
17
  - task:
18
- name: Text Classification
19
  type: text-classification
 
20
  dataset:
21
  name: sentiment140
22
  type: sentiment140
@@ -24,18 +24,18 @@ model-index:
24
  split: train
25
  args: sentiment140
26
  metrics:
27
- - name: Accuracy
28
- type: accuracy
29
  value: 0.883
30
- - name: Precision
31
- type: precision
32
  value: 0.8801652892561983
33
- - name: Recall
34
- type: recall
35
  value: 0.8783505154639175
36
- - name: F1
37
- type: f1
38
  value: 0.8792569659442725
 
39
  ---
40
 
41
  <!-- This model card has been generated automatically according to the information the Trainer had access to. You
 
15
  - name: roberta-base-sentiment140
16
  results:
17
  - task:
 
18
  type: text-classification
19
+ name: Text Classification
20
  dataset:
21
  name: sentiment140
22
  type: sentiment140
 
24
  split: train
25
  args: sentiment140
26
  metrics:
27
+ - type: accuracy
 
28
  value: 0.883
29
+ name: Accuracy
30
+ - type: precision
31
  value: 0.8801652892561983
32
+ name: Precision
33
+ - type: recall
34
  value: 0.8783505154639175
35
+ name: Recall
36
+ - type: f1
37
  value: 0.8792569659442725
38
+ name: F1
39
  ---
40
 
41
  <!-- This model card has been generated automatically according to the information the Trainer had access to. You
merges.txt ADDED
The diff for this file is too large to render. See raw diff
 
special_tokens_map.json ADDED
@@ -0,0 +1,15 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "bos_token": "<s>",
3
+ "cls_token": "<s>",
4
+ "eos_token": "</s>",
5
+ "mask_token": {
6
+ "content": "<mask>",
7
+ "lstrip": true,
8
+ "normalized": false,
9
+ "rstrip": false,
10
+ "single_word": false
11
+ },
12
+ "pad_token": "<pad>",
13
+ "sep_token": "</s>",
14
+ "unk_token": "<unk>"
15
+ }
tokenizer.json ADDED
The diff for this file is too large to render. See raw diff
 
tokenizer_config.json ADDED
@@ -0,0 +1,58 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "add_prefix_space": false,
3
+ "added_tokens_decoder": {
4
+ "0": {
5
+ "content": "<s>",
6
+ "lstrip": false,
7
+ "normalized": true,
8
+ "rstrip": false,
9
+ "single_word": false,
10
+ "special": true
11
+ },
12
+ "1": {
13
+ "content": "<pad>",
14
+ "lstrip": false,
15
+ "normalized": true,
16
+ "rstrip": false,
17
+ "single_word": false,
18
+ "special": true
19
+ },
20
+ "2": {
21
+ "content": "</s>",
22
+ "lstrip": false,
23
+ "normalized": true,
24
+ "rstrip": false,
25
+ "single_word": false,
26
+ "special": true
27
+ },
28
+ "3": {
29
+ "content": "<unk>",
30
+ "lstrip": false,
31
+ "normalized": true,
32
+ "rstrip": false,
33
+ "single_word": false,
34
+ "special": true
35
+ },
36
+ "50264": {
37
+ "content": "<mask>",
38
+ "lstrip": true,
39
+ "normalized": false,
40
+ "rstrip": false,
41
+ "single_word": false,
42
+ "special": true
43
+ }
44
+ },
45
+ "bos_token": "<s>",
46
+ "clean_up_tokenization_spaces": false,
47
+ "cls_token": "<s>",
48
+ "eos_token": "</s>",
49
+ "errors": "replace",
50
+ "extra_special_tokens": {},
51
+ "mask_token": "<mask>",
52
+ "model_max_length": 512,
53
+ "pad_token": "<pad>",
54
+ "sep_token": "</s>",
55
+ "tokenizer_class": "RobertaTokenizer",
56
+ "trim_offsets": true,
57
+ "unk_token": "<unk>"
58
+ }
vocab.json ADDED
The diff for this file is too large to render. See raw diff