{ "version": "1.0", "truncation": null, "padding": null, "added_tokens": [ { "id": 0, "content": "<|endoftext|>", "single_word": false, "lstrip": false, "rstrip": false, "normalized": true, "special": true } ], "normalizer": null, "pre_tokenizer": { "type": "ByteLevel", "add_prefix_space": false, "trim_offsets": true, "use_regex": true }, "post_processor": { "type": "ByteLevel", "add_prefix_space": true, "trim_offsets": false, "use_regex": true }, "decoder": { "type": "ByteLevel", "add_prefix_space": true, "trim_offsets": true, "use_regex": true }, "model": { "type": "BPE", "dropout": null, "unk_token": null, "continuing_subword_prefix": "", "end_of_word_suffix": "", "fuse_unk": false, "byte_fallback": false, "ignore_merges": false, "vocab": { "<|endoftext|>": 0, "!": 1, "\"": 2, "#": 3, "$": 4, "%": 5, "&": 6, "'": 7, "(": 8, ")": 9, "*": 10, "+": 11, ",": 12, "-": 13, ".": 14, "/": 15, "0": 16, "1": 17, "2": 18, "3": 19, "4": 20, "5": 21, "6": 22, "7": 23, "8": 24, "9": 25, ":": 26, ";": 27, "<": 28, "=": 29, ">": 30, "?": 31, "@": 32, "A": 33, "B": 34, "C": 35, "D": 36, "E": 37, "F": 38, "G": 39, "H": 40, "I": 41, "J": 42, "K": 43, "L": 44, "M": 45, "N": 46, "O": 47, "P": 48, "Q": 49, "R": 50, "S": 51, "T": 52, "U": 53, "V": 54, "W": 55, "X": 56, "Y": 57, "Z": 58, "[": 59, "\\": 60, "]": 61, "^": 62, "_": 63, "`": 64, "a": 65, "b": 66, "c": 67, "d": 68, "e": 69, "f": 70, "g": 71, "h": 72, "i": 73, "j": 74, "k": 75, "l": 76, "m": 77, "n": 78, "o": 79, "p": 80, "q": 81, "r": 82, "s": 83, "t": 84, "u": 85, "v": 86, "w": 87, "x": 88, "y": 89, "z": 90, "{": 91, "|": 92, "}": 93, "~": 94, "¡": 95, "¢": 96, "£": 97, "¤": 98, "¥": 99, "¦": 100, "§": 101, "¨": 102, "©": 103, "ª": 104, "«": 105, "¬": 106, "®": 107, "¯": 108, "°": 109, "±": 110, "²": 111, "³": 112, "´": 113, "µ": 114, "¶": 115, "·": 116, "¸": 117, "¹": 118, "º": 119, "»": 120, "¼": 121, "½": 122, "¾": 123, "¿": 124, "À": 125, "Á": 126, "Â": 127, "Ã": 128, "Ä": 129, "Å": 130, "Æ": 131, "Ç": 132, "È": 133, "É": 134, "Ê": 135, "Ë": 136, "Ì": 137, "Í": 138, "Î": 139, "Ï": 140, "Ð": 141, "Ñ": 142, "Ò": 143, "Ó": 144, "Ô": 145, "Õ": 146, "Ö": 147, "×": 148, "Ø": 149, "Ù": 150, "Ú": 151, "Û": 152, "Ü": 153, "Ý": 154, "Þ": 155, "ß": 156, "à": 157, "á": 158, "â": 159, "ã": 160, "ä": 161, "å": 162, "æ": 163, "ç": 164, "è": 165, "é": 166, "ê": 167, "ë": 168, "ì": 169, "í": 170, "î": 171, "ï": 172, "ð": 173, "ñ": 174, "ò": 175, "ó": 176, "ô": 177, "õ": 178, "ö": 179, "÷": 180, "ø": 181, "ù": 182, "ú": 183, "û": 184, "ü": 185, "ý": 186, "þ": 187, "ÿ": 188, "Ā": 189, "ā": 190, "Ă": 191, "ă": 192, "Ą": 193, "ą": 194, "Ć": 195, "ć": 196, "Ĉ": 197, "ĉ": 198, "Ċ": 199, "ċ": 200, "Č": 201, "č": 202, "Ď": 203, "ď": 204, "Đ": 205, "đ": 206, "Ē": 207, "ē": 208, "Ĕ": 209, "ĕ": 210, "Ė": 211, "ė": 212, "Ę": 213, "ę": 214, "Ě": 215, "ě": 216, "Ĝ": 217, "ĝ": 218, "Ğ": 219, "ğ": 220, "Ġ": 221, "ġ": 222, "Ģ": 223, "ģ": 224, "Ĥ": 225, "ĥ": 226, "Ħ": 227, "ħ": 228, "Ĩ": 229, "ĩ": 230, "Ī": 231, "ī": 232, "Ĭ": 233, "ĭ": 234, "Į": 235, "į": 236, "İ": 237, "ı": 238, "IJ": 239, "ij": 240, "Ĵ": 241, "ĵ": 242, "Ķ": 243, "ķ": 244, "ĸ": 245, "Ĺ": 246, "ĺ": 247, "Ļ": 248, "ļ": 249, "Ľ": 250, "ľ": 251, "Ŀ": 252, "ŀ": 253, "Ł": 254, "ł": 255, "Ń": 256, "aa": 257, "en": 258, "an": 259, "Ġp": 260, "ga": 261, "th": 262, "la": 263, "dh": 264, "ku": 265, "ra": 266, "na": 267, "di": 268, "in": 269, "ir": 270, "Ġm": 271, "Ġs": 272, "Ġe": 273, "dhu": 274, "er": 275, "Ġpan": 276, "kku": 277, "um": 278, "een": 279, "am": 280, "da": 281, "ka": 282, "naa": 283, "dha": 284, "ch": 285, "on": 286, "eenga": 287, "iru": 288, "es": 289, "or": 290, "Ġen": 291, "thu": 292, "pa": 293, "Ġiru": 294, "nga": 295, "al": 296, "du": 297, "ti": 298, "ah": 299, "il": 300, "oo": 301, "ar": 302, "ty": 303, "ki": 304, "Ġn": 305, "Ġmu": 306, "Ġb": 307, "Ġth": 308, "Ġv": 309, "om": 310, "Ġa": 311, "ee": 312, "ma": 313, "tt": 314, "Ġc": 315, "Ġk": 316, "yum": 317, "li": 318, "Ġf": 319, "Ġenna": 320, "yumaa": 321, "ppa": 322, "re": 323, "va": 324, "Ġh": 325, "Ġaa": 326, "kk": 327, "nu": 328, "Ġpanna": 329, "oda": 330, "ram": 331, "el": 332, "Ġnaa": 333, "aan": 334, "ya": 335, "Ġpa": 336, "Ġpo": 337, "Ġmudi": 338, "ppadi": 339, "lan": 340, "à®": 341, "lo": 342, "amil": 343, "Ġirukk": 344, "Ġmudiyumaa": 345, "ea": 346, "kka": 347, "lu": 348, "Ġka": 349, "end": 350, "radhu": 351, "nai": 352, "idi": 353, "un": 354, "tim": 355, "ru": 356, "unga": 357, "van": 358, "ing": 359, "Ġeppadi": 360, "st": 361, "ur": 362, "ve": 363, "yaa": 364, "Ġt": 365, "Ġsaa": 366, "eri": 367, "Ġpann": 368, "Ġneenga": 369, "lla": 370, "sh": 371, "Ġw": 372, "Ġre": 373, "ent": 374, "tha": 375, "Ġev": 376, "est": 377, "time": 378, "gu": 379, "ig": 380, "ri": 381, "Ġin": 382, "aanga": 383, "Ġpidi": 384, "si": 385, "Ġse": 386, "Ġirukku": 387, "Ġevlo": 388, "ce": 389, "com": 390, "ht": 391, "le": 392, "se": 393, "ani": 394, "Ġpaa": 395, "day": 396, "lukku": 397, "Tamil": 398, "laa": 399 }, "merges": [ [ "a", "a" ], [ "e", "n" ], [ "a", "n" ], [ "Ġ", "p" ], [ "g", "a" ], [ "t", "h" ], [ "l", "a" ], [ "d", "h" ], [ "k", "u" ], [ "r", "a" ], [ "n", "a" ], [ "d", "i" ], [ "i", "n" ], [ "i", "r" ], [ "Ġ", "m" ], [ "Ġ", "s" ], [ "Ġ", "e" ], [ "dh", "u" ], [ "e", "r" ], [ "Ġp", "an" ], [ "k", "ku" ], [ "u", "m" ], [ "e", "en" ], [ "a", "m" ], [ "d", "a" ], [ "k", "a" ], [ "n", "aa" ], [ "dh", "a" ], [ "c", "h" ], [ "o", "n" ], [ "een", "ga" ], [ "ir", "u" ], [ "e", "s" ], [ "o", "r" ], [ "Ġ", "en" ], [ "th", "u" ], [ "p", "a" ], [ "Ġ", "iru" ], [ "n", "ga" ], [ "a", "l" ], [ "d", "u" ], [ "t", "i" ], [ "a", "h" ], [ "i", "l" ], [ "o", "o" ], [ "a", "r" ], [ "t", "y" ], [ "k", "i" ], [ "Ġ", "n" ], [ "Ġm", "u" ], [ "Ġ", "b" ], [ "Ġ", "th" ], [ "Ġ", "v" ], [ "o", "m" ], [ "Ġ", "a" ], [ "e", "e" ], [ "m", "a" ], [ "t", "t" ], [ "Ġ", "c" ], [ "Ġ", "k" ], [ "y", "um" ], [ "l", "i" ], [ "Ġ", "f" ], [ "Ġen", "na" ], [ "yum", "aa" ], [ "p", "pa" ], [ "r", "e" ], [ "v", "a" ], [ "Ġ", "h" ], [ "Ġ", "aa" ], [ "k", "k" ], [ "n", "u" ], [ "Ġpan", "na" ], [ "o", "da" ], [ "ra", "m" ], [ "e", "l" ], [ "Ġ", "naa" ], [ "aa", "n" ], [ "y", "a" ], [ "Ġp", "a" ], [ "Ġp", "o" ], [ "Ġmu", "di" ], [ "ppa", "di" ], [ "l", "an" ], [ "à", "®" ], [ "l", "o" ], [ "am", "il" ], [ "Ġiru", "kk" ], [ "Ġmudi", "yumaa" ], [ "e", "a" ], [ "k", "ka" ], [ "l", "u" ], [ "Ġ", "ka" ], [ "en", "d" ], [ "ra", "dhu" ], [ "na", "i" ], [ "i", "di" ], [ "u", "n" ], [ "ti", "m" ], [ "r", "u" ], [ "u", "nga" ], [ "v", "an" ], [ "in", "g" ], [ "Ġe", "ppadi" ], [ "s", "t" ], [ "u", "r" ], [ "v", "e" ], [ "y", "aa" ], [ "Ġ", "t" ], [ "Ġs", "aa" ], [ "er", "i" ], [ "Ġpan", "n" ], [ "Ġn", "eenga" ], [ "l", "la" ], [ "s", "h" ], [ "Ġ", "w" ], [ "Ġ", "re" ], [ "en", "t" ], [ "th", "a" ], [ "Ġe", "v" ], [ "es", "t" ], [ "tim", "e" ], [ "g", "u" ], [ "i", "g" ], [ "r", "i" ], [ "Ġ", "in" ], [ "aa", "nga" ], [ "Ġp", "idi" ], [ "s", "i" ], [ "Ġs", "e" ], [ "Ġiru", "kku" ], [ "Ġev", "lo" ], [ "c", "e" ], [ "c", "om" ], [ "h", "t" ], [ "l", "e" ], [ "s", "e" ], [ "an", "i" ], [ "Ġp", "aa" ], [ "da", "y" ], [ "lu", "kku" ], [ "T", "amil" ], [ "l", "aa" ] ] } }