Upload tokenizer

Files changed (6) hide show

README.md CHANGED Viewed

@@ -15,8 +15,8 @@ model-index:
 - name: roberta-base-sentiment140
   results:
   - task:
-      name: Text Classification
       type: text-classification
     dataset:
       name: sentiment140
       type: sentiment140
@@ -24,18 +24,18 @@ model-index:
       split: train
       args: sentiment140
     metrics:
-    - name: Accuracy
-      type: accuracy
       value: 0.883
-    - name: Precision
-      type: precision
       value: 0.8801652892561983
-    - name: Recall
-      type: recall
       value: 0.8783505154639175
-    - name: F1
-      type: f1
       value: 0.8792569659442725
 ---
 <!-- This model card has been generated automatically according to the information the Trainer had access to. You

 - name: roberta-base-sentiment140
   results:
   - task:
       type: text-classification
+      name: Text Classification
     dataset:
       name: sentiment140
       type: sentiment140
       split: train
       args: sentiment140
     metrics:
+    - type: accuracy
       value: 0.883
+      name: Accuracy
+    - type: precision
       value: 0.8801652892561983
+      name: Precision
+    - type: recall
       value: 0.8783505154639175
+      name: Recall
+    - type: f1
       value: 0.8792569659442725
+      name: F1
 ---
 <!-- This model card has been generated automatically according to the information the Trainer had access to. You

merges.txt ADDED Viewed

The diff for this file is too large to render. See raw diff

special_tokens_map.json ADDED Viewed

+{
+  "bos_token": "<s>",
+  "cls_token": "<s>",
+  "eos_token": "</s>",
+  "mask_token": {
+    "content": "<mask>",
+    "lstrip": true,
+    "normalized": false,
+    "rstrip": false,
+    "single_word": false
+  },
+  "pad_token": "<pad>",
+  "sep_token": "</s>",
+  "unk_token": "<unk>"
+}

tokenizer.json ADDED Viewed

The diff for this file is too large to render. See raw diff

tokenizer_config.json ADDED Viewed

+{
+  "add_prefix_space": false,
+  "added_tokens_decoder": {
+    "0": {
+      "content": "<s>",
+      "lstrip": false,
+      "normalized": true,
+      "rstrip": false,
+      "single_word": false,
+      "special": true
+    },
+    "1": {
+      "content": "<pad>",
+      "lstrip": false,
+      "normalized": true,
+      "rstrip": false,
+      "single_word": false,
+      "special": true
+    },
+    "2": {
+      "content": "</s>",
+      "lstrip": false,
+      "normalized": true,
+      "rstrip": false,
+      "single_word": false,
+      "special": true
+    },
+    "3": {
+      "content": "<unk>",
+      "lstrip": false,
+      "normalized": true,
+      "rstrip": false,
+      "single_word": false,
+      "special": true
+    },
+    "50264": {
+      "content": "<mask>",
+      "lstrip": true,
+      "normalized": false,
+      "rstrip": false,
+      "single_word": false,
+      "special": true
+    }
+  },
+  "bos_token": "<s>",
+  "clean_up_tokenization_spaces": false,
+  "cls_token": "<s>",
+  "eos_token": "</s>",
+  "errors": "replace",
+  "extra_special_tokens": {},
+  "mask_token": "<mask>",
+  "model_max_length": 512,
+  "pad_token": "<pad>",
+  "sep_token": "</s>",
+  "tokenizer_class": "RobertaTokenizer",
+  "trim_offsets": true,
+  "unk_token": "<unk>"
+}

vocab.json ADDED Viewed

The diff for this file is too large to render. See raw diff