JUNPYO99
/

LMFlavorGraph-tokenizer

SEO commited on Dec 27, 2024

Commit

198c391

1 Parent(s): 308080e

Upload tokenizer files

Files changed (5) hide show

id2binary_mask.json CHANGED Viewed

The diff for this file is too large to render. See raw diff

id2csp_feature.json CHANGED Viewed

@@ -1,3 +1,3 @@
 version https://git-lfs.github.com/spec/v1
-oid sha256:aaf54f0b425d9040877d72a5c1b4c5ec93bb9f64b3ca8ad9dcfcc014acdd0960
-size 33699822

 version https://git-lfs.github.com/spec/v1
+oid sha256:2c8c714a8aead7afcac70003f45428b7f9d405bc4caee937b27c7d8f5516de77
+size 33695410

tokenizer.json CHANGED Viewed

@@ -4,7 +4,7 @@
   "padding": null,
   "added_tokens": [
     {
-      "id": 0,
       "content": "[PAD]",
       "single_word": false,
       "lstrip": false,
@@ -13,13 +13,22 @@
       "special": true
     },
     {
-      "id": 1,
       "content": "[UNK]",
       "single_word": false,
       "lstrip": false,
       "rstrip": false,
       "normalized": false,
       "special": true
     }
   ],
   "normalizer": null,

   "padding": null,
   "added_tokens": [
     {
+      "id": 8284,
       "content": "[PAD]",
       "single_word": false,
       "lstrip": false,
       "special": true
     },
     {
+      "id": 8285,
       "content": "[UNK]",
       "single_word": false,
       "lstrip": false,
       "rstrip": false,
       "normalized": false,
       "special": true
+    },
+    {
+      "id": 8286,
+      "content": "[MASK]",
+      "single_word": false,
+      "lstrip": false,
+      "rstrip": false,
+      "normalized": false,
+      "special": true
     }
   ],
   "normalizer": null,

tokenizer_config.json CHANGED Viewed

	@@ -1 +1 @@
1	- {"model": "wordlevel", "vocab_size": ~~8286~~, "unk_token": "[UNK]", "special_tokens": ["[PAD]", "[UNK]"]}


1	+ {"model": "wordlevel", "vocab_size": 8287, "unk_token": "[UNK]", "special_tokens": ["[PAD]", "[UNK]"]}

vocab.json CHANGED Viewed

The diff for this file is too large to render. See raw diff