diff --git "a/tokenizer_config.json" "b/tokenizer_config.json" --- "a/tokenizer_config.json" +++ "b/tokenizer_config.json" @@ -39,6 +39,4974 @@ "rstrip": false, "single_word": false, "special": true + }, + "30000": { + "content": "▁regia", + "lstrip": false, + "normalized": true, + "rstrip": false, + "single_word": false, + "special": false + }, + "30001": { + "content": "▁nepe", + "lstrip": false, + "normalized": true, + "rstrip": false, + "single_word": false, + "special": false + }, + "30002": { + "content": "▁chacra", + "lstrip": false, + "normalized": true, + "rstrip": false, + "single_word": false, + "special": false + }, + "30003": { + "content": "▁guzm", + "lstrip": false, + "normalized": true, + "rstrip": false, + "single_word": false, + "special": false + }, + "30004": { + "content": "▁tallarines", + "lstrip": false, + "normalized": true, + "rstrip": false, + "single_word": false, + "special": false + }, + "30005": { + "content": "uc", + "lstrip": false, + "normalized": true, + "rstrip": false, + "single_word": false, + "special": false + }, + "30006": { + "content": "▁reynoso!!!", + "lstrip": false, + "normalized": true, + "rstrip": false, + "single_word": false, + "special": false + }, + "30007": { + "content": "ć", + "lstrip": false, + "normalized": true, + "rstrip": false, + "single_word": false, + "special": false + }, + "30008": { + "content": "▁choclo", + "lstrip": false, + "normalized": true, + "rstrip": false, + "single_word": false, + "special": false + }, + "30009": { + "content": "▁mamadera", + "lstrip": false, + "normalized": true, + "rstrip": false, + "single_word": false, + "special": false + }, + "30010": { + "content": "ampe", + "lstrip": false, + "normalized": true, + "rstrip": false, + "single_word": false, + "special": false + }, + "30011": { + "content": "▁argolla", + "lstrip": false, + "normalized": true, + "rstrip": false, + "single_word": false, + "special": false + }, + "30012": { + "content": "®", + "lstrip": false, + "normalized": true, + "rstrip": false, + "single_word": false, + "special": false + }, + "30013": { + "content": "▁trucha", + "lstrip": false, + "normalized": true, + "rstrip": false, + "single_word": false, + "special": false + }, + "30014": { + "content": "▁roche", + "lstrip": false, + "normalized": true, + "rstrip": false, + "single_word": false, + "special": false + }, + "30015": { + "content": "▁sarta", + "lstrip": false, + "normalized": true, + "rstrip": false, + "single_word": false, + "special": false + }, + "30016": { + "content": "▁maleteros", + "lstrip": false, + "normalized": true, + "rstrip": false, + "single_word": false, + "special": false + }, + "30017": { + "content": "▁cachetada", + "lstrip": false, + "normalized": true, + "rstrip": false, + "single_word": false, + "special": false + }, + "30018": { + "content": "érez", + "lstrip": false, + "normalized": true, + "rstrip": false, + "single_word": false, + "special": false + }, + "30019": { + "content": "▁tallar", + "lstrip": false, + "normalized": true, + "rstrip": false, + "single_word": false, + "special": false + }, + "30020": { + "content": "▁peruanos,", + "lstrip": false, + "normalized": true, + "rstrip": false, + "single_word": false, + "special": false + }, + "30021": { + "content": "▁huevadas", + "lstrip": false, + "normalized": true, + "rstrip": false, + "single_word": false, + "special": false + }, + "30022": { + "content": "inando", + "lstrip": false, + "normalized": true, + "rstrip": false, + "single_word": false, + "special": false + }, + "30023": { + "content": "▁viaj", + "lstrip": false, + "normalized": true, + "rstrip": false, + "single_word": false, + "special": false + }, + "30024": { + "content": "▁picarones", + "lstrip": false, + "normalized": true, + "rstrip": false, + "single_word": false, + "special": false + }, + "30025": { + "content": "▁cuto", + "lstrip": false, + "normalized": true, + "rstrip": false, + "single_word": false, + "special": false + }, + "30026": { + "content": "▁chevere", + "lstrip": false, + "normalized": true, + "rstrip": false, + "single_word": false, + "special": false + }, + "30027": { + "content": "▁mostro", + "lstrip": false, + "normalized": true, + "rstrip": false, + "single_word": false, + "special": false + }, + "30028": { + "content": "▁mrd", + "lstrip": false, + "normalized": true, + "rstrip": false, + "single_word": false, + "special": false + }, + "30029": { + "content": "▁jau", + "lstrip": false, + "normalized": true, + "rstrip": false, + "single_word": false, + "special": false + }, + "30030": { + "content": "▁fernan", + "lstrip": false, + "normalized": true, + "rstrip": false, + "single_word": false, + "special": false + }, + "30031": { + "content": "\"\"\"\"", + "lstrip": false, + "normalized": true, + "rstrip": false, + "single_word": false, + "special": false + }, + "30032": { + "content": "▁fuj", + "lstrip": false, + "normalized": true, + "rstrip": false, + "single_word": false, + "special": false + }, + "30033": { + "content": "leg", + "lstrip": false, + "normalized": true, + "rstrip": false, + "single_word": false, + "special": false + }, + "30034": { + "content": "inera", + "lstrip": false, + "normalized": true, + "rstrip": false, + "single_word": false, + "special": false + }, + "30035": { + "content": "▁fior", + "lstrip": false, + "normalized": true, + "rstrip": false, + "single_word": false, + "special": false + }, + "30036": { + "content": "▁habi", + "lstrip": false, + "normalized": true, + "rstrip": false, + "single_word": false, + "special": false + }, + "30037": { + "content": "▁philli", + "lstrip": false, + "normalized": true, + "rstrip": false, + "single_word": false, + "special": false + }, + "30038": { + "content": "▁vacar", + "lstrip": false, + "normalized": true, + "rstrip": false, + "single_word": false, + "special": false + }, + "30039": { + "content": "▁pitucos", + "lstrip": false, + "normalized": true, + "rstrip": false, + "single_word": false, + "special": false + }, + "30040": { + "content": "▁piu", + "lstrip": false, + "normalized": true, + "rstrip": false, + "single_word": false, + "special": false + }, + "30041": { + "content": "▁tobi", + "lstrip": false, + "normalized": true, + "rstrip": false, + "single_word": false, + "special": false + }, + "30042": { + "content": "▁pim", + "lstrip": false, + "normalized": true, + "rstrip": false, + "single_word": false, + "special": false + }, + "30043": { + "content": "▁chac", + "lstrip": false, + "normalized": true, + "rstrip": false, + "single_word": false, + "special": false + }, + "30044": { + "content": "▁huevón", + "lstrip": false, + "normalized": true, + "rstrip": false, + "single_word": false, + "special": false + }, + "30045": { + "content": "trados", + "lstrip": false, + "normalized": true, + "rstrip": false, + "single_word": false, + "special": false + }, + "30046": { + "content": "▁yala", + "lstrip": false, + "normalized": true, + "rstrip": false, + "single_word": false, + "special": false + }, + "30047": { + "content": "no*", + "lstrip": false, + "normalized": true, + "rstrip": false, + "single_word": false, + "special": false + }, + "30048": { + "content": "▁ají", + "lstrip": false, + "normalized": true, + "rstrip": false, + "single_word": false, + "special": false + }, + "30049": { + "content": "iod", + "lstrip": false, + "normalized": true, + "rstrip": false, + "single_word": false, + "special": false + }, + "30050": { + "content": "▁yid", + "lstrip": false, + "normalized": true, + "rstrip": false, + "single_word": false, + "special": false + }, + "30051": { + "content": "▁keik", + "lstrip": false, + "normalized": true, + "rstrip": false, + "single_word": false, + "special": false + }, + "30052": { + "content": "▁sobrado", + "lstrip": false, + "normalized": true, + "rstrip": false, + "single_word": false, + "special": false + }, + "30053": { + "content": "▁pij", + "lstrip": false, + "normalized": true, + "rstrip": false, + "single_word": false, + "special": false + }, + "30054": { + "content": "▁brasa", + "lstrip": false, + "normalized": true, + "rstrip": false, + "single_word": false, + "special": false + }, + "30055": { + "content": "▁soli", + "lstrip": false, + "normalized": true, + "rstrip": false, + "single_word": false, + "special": false + }, + "30056": { + "content": "▁perulima", + "lstrip": false, + "normalized": true, + "rstrip": false, + "single_word": false, + "special": false + }, + "30057": { + "content": "▁cebiche", + "lstrip": false, + "normalized": true, + "rstrip": false, + "single_word": false, + "special": false + }, + "30058": { + "content": "úpi", + "lstrip": false, + "normalized": true, + "rstrip": false, + "single_word": false, + "special": false + }, + "30059": { + "content": "ián", + "lstrip": false, + "normalized": true, + "rstrip": false, + "single_word": false, + "special": false + }, + "30060": { + "content": "ficial)", + "lstrip": false, + "normalized": true, + "rstrip": false, + "single_word": false, + "special": false + }, + "30061": { + "content": "▁movadef", + "lstrip": false, + "normalized": true, + "rstrip": false, + "single_word": false, + "special": false + }, + "30062": { + "content": "▁tombos", + "lstrip": false, + "normalized": true, + "rstrip": false, + "single_word": false, + "special": false + }, + "30063": { + "content": "▁tongo", + "lstrip": false, + "normalized": true, + "rstrip": false, + "single_word": false, + "special": false + }, + "30064": { + "content": "ø", + "lstrip": false, + "normalized": true, + "rstrip": false, + "single_word": false, + "special": false + }, + "30065": { + "content": "▁trab", + "lstrip": false, + "normalized": true, + "rstrip": false, + "single_word": false, + "special": false + }, + "30066": { + "content": "lava", + "lstrip": false, + "normalized": true, + "rstrip": false, + "single_word": false, + "special": false + }, + "30067": { + "content": "▁huér", + "lstrip": false, + "normalized": true, + "rstrip": false, + "single_word": false, + "special": false + }, + "30068": { + "content": "itay", + "lstrip": false, + "normalized": true, + "rstrip": false, + "single_word": false, + "special": false + }, + "30069": { + "content": "omen", + "lstrip": false, + "normalized": true, + "rstrip": false, + "single_word": false, + "special": false + }, + "30070": { + "content": "▁cacharro", + "lstrip": false, + "normalized": true, + "rstrip": false, + "single_word": false, + "special": false + }, + "30071": { + "content": "▁huay", + "lstrip": false, + "normalized": true, + "rstrip": false, + "single_word": false, + "special": false + }, + "30072": { + "content": "▁ceviche", + "lstrip": false, + "normalized": true, + "rstrip": false, + "single_word": false, + "special": false + }, + "30073": { + "content": "▁huaman", + "lstrip": false, + "normalized": true, + "rstrip": false, + "single_word": false, + "special": false + }, + "30074": { + "content": "▁vacancia", + "lstrip": false, + "normalized": true, + "rstrip": false, + "single_word": false, + "special": false + }, + "30075": { + "content": "ávez", + "lstrip": false, + "normalized": true, + "rstrip": false, + "single_word": false, + "special": false + }, + "30076": { + "content": "▁ptmr", + "lstrip": false, + "normalized": true, + "rstrip": false, + "single_word": false, + "special": false + }, + "30077": { + "content": "ajar", + "lstrip": false, + "normalized": true, + "rstrip": false, + "single_word": false, + "special": false + }, + "30078": { + "content": "▁juer", + "lstrip": false, + "normalized": true, + "rstrip": false, + "single_word": false, + "special": false + }, + "30079": { + "content": "<", + "lstrip": false, + "normalized": true, + "rstrip": false, + "single_word": false, + "special": false + }, + "30080": { + "content": "period", + "lstrip": false, + "normalized": true, + "rstrip": false, + "single_word": false, + "special": false + }, + "30081": { + "content": "▁tmr", + "lstrip": false, + "normalized": true, + "rstrip": false, + "single_word": false, + "special": false + }, + "30082": { + "content": "œ", + "lstrip": false, + "normalized": true, + "rstrip": false, + "single_word": false, + "special": false + }, + "30083": { + "content": "cuen", + "lstrip": false, + "normalized": true, + "rstrip": false, + "single_word": false, + "special": false + }, + "30084": { + "content": "▁uste", + "lstrip": false, + "normalized": true, + "rstrip": false, + "single_word": false, + "special": false + }, + "30085": { + "content": "▁cochino", + "lstrip": false, + "normalized": true, + "rstrip": false, + "single_word": false, + "special": false + }, + "30086": { + "content": "▁sudado", + "lstrip": false, + "normalized": true, + "rstrip": false, + "single_word": false, + "special": false + }, + "30087": { + "content": "▁chela", + "lstrip": false, + "normalized": true, + "rstrip": false, + "single_word": false, + "special": false + }, + "30088": { + "content": "↯", + "lstrip": false, + "normalized": true, + "rstrip": false, + "single_word": false, + "special": false + }, + "30089": { + "content": "ë", + "lstrip": false, + "normalized": true, + "rstrip": false, + "single_word": false, + "special": false + }, + "30090": { + "content": "▁chelas", + "lstrip": false, + "normalized": true, + "rstrip": false, + "single_word": false, + "special": false + }, + "30091": { + "content": "▁erika", + "lstrip": false, + "normalized": true, + "rstrip": false, + "single_word": false, + "special": false + }, + "30092": { + "content": "▁turry", + "lstrip": false, + "normalized": true, + "rstrip": false, + "single_word": false, + "special": false + }, + "30093": { + "content": "▁pamela", + "lstrip": false, + "normalized": true, + "rstrip": false, + "single_word": false, + "special": false + }, + "30094": { + "content": "▁pirañas", + "lstrip": false, + "normalized": true, + "rstrip": false, + "single_word": false, + "special": false + }, + "30095": { + "content": "▁tamales", + "lstrip": false, + "normalized": true, + "rstrip": false, + "single_word": false, + "special": false + }, + "30096": { + "content": "crim", + "lstrip": false, + "normalized": true, + "rstrip": false, + "single_word": false, + "special": false + }, + "30097": { + "content": "▁huari", + "lstrip": false, + "normalized": true, + "rstrip": false, + "single_word": false, + "special": false + }, + "30098": { + "content": "amba", + "lstrip": false, + "normalized": true, + "rstrip": false, + "single_word": false, + "special": false + }, + "30099": { + "content": "▁perul", + "lstrip": false, + "normalized": true, + "rstrip": false, + "single_word": false, + "special": false + }, + "30100": { + "content": "͡", + "lstrip": false, + "normalized": true, + "rstrip": false, + "single_word": false, + "special": false + }, + "30101": { + "content": "▁sentim", + "lstrip": false, + "normalized": true, + "rstrip": false, + "single_word": false, + "special": false + }, + "30102": { + "content": "▁pichanga", + "lstrip": false, + "normalized": true, + "rstrip": false, + "single_word": false, + "special": false + }, + "30103": { + "content": "pipi", + "lstrip": false, + "normalized": true, + "rstrip": false, + "single_word": false, + "special": false + }, + "30104": { + "content": "▁crim", + "lstrip": false, + "normalized": true, + "rstrip": false, + "single_word": false, + "special": false + }, + "30105": { + "content": "▁serran", + "lstrip": false, + "normalized": true, + "rstrip": false, + "single_word": false, + "special": false + }, + "30106": { + "content": "▁mundi", + "lstrip": false, + "normalized": true, + "rstrip": false, + "single_word": false, + "special": false + }, + "30107": { + "content": "erme", + "lstrip": false, + "normalized": true, + "rstrip": false, + "single_word": false, + "special": false + }, + "30108": { + "content": "▁lisuras", + "lstrip": false, + "normalized": true, + "rstrip": false, + "single_word": false, + "special": false + }, + "30109": { + "content": "▁colom", + "lstrip": false, + "normalized": true, + "rstrip": false, + "single_word": false, + "special": false + }, + "30110": { + "content": "▁porky", + "lstrip": false, + "normalized": true, + "rstrip": false, + "single_word": false, + "special": false + }, + "30111": { + "content": "pañ", + "lstrip": false, + "normalized": true, + "rstrip": false, + "single_word": false, + "special": false + }, + "30112": { + "content": "▁abuel", + "lstrip": false, + "normalized": true, + "rstrip": false, + "single_word": false, + "special": false + }, + "30113": { + "content": "▁recom", + "lstrip": false, + "normalized": true, + "rstrip": false, + "single_word": false, + "special": false + }, + "30114": { + "content": "ápi", + "lstrip": false, + "normalized": true, + "rstrip": false, + "single_word": false, + "special": false + }, + "30115": { + "content": "▁trome", + "lstrip": false, + "normalized": true, + "rstrip": false, + "single_word": false, + "special": false + }, + "30116": { + "content": "▁jora", + "lstrip": false, + "normalized": true, + "rstrip": false, + "single_word": false, + "special": false + }, + "30117": { + "content": "▁xf", + "lstrip": false, + "normalized": true, + "rstrip": false, + "single_word": false, + "special": false + }, + "30118": { + "content": "íble", + "lstrip": false, + "normalized": true, + "rstrip": false, + "single_word": false, + "special": false + }, + "30119": { + "content": "▁calzon", + "lstrip": false, + "normalized": true, + "rstrip": false, + "single_word": false, + "special": false + }, + "30120": { + "content": "▁gentita", + "lstrip": false, + "normalized": true, + "rstrip": false, + "single_word": false, + "special": false + }, + "30121": { + "content": "▁repúbl", + "lstrip": false, + "normalized": true, + "rstrip": false, + "single_word": false, + "special": false + }, + "30122": { + "content": "ami", + "lstrip": false, + "normalized": true, + "rstrip": false, + "single_word": false, + "special": false + }, + "30123": { + "content": "▁pacham", + "lstrip": false, + "normalized": true, + "rstrip": false, + "single_word": false, + "special": false + }, + "30124": { + "content": "▁anticuchos", + "lstrip": false, + "normalized": true, + "rstrip": false, + "single_word": false, + "special": false + }, + "30125": { + "content": "▁pollerita", + "lstrip": false, + "normalized": true, + "rstrip": false, + "single_word": false, + "special": false + }, + "30126": { + "content": "▁chim", + "lstrip": false, + "normalized": true, + "rstrip": false, + "single_word": false, + "special": false + }, + "30127": { + "content": "▁cham", + "lstrip": false, + "normalized": true, + "rstrip": false, + "single_word": false, + "special": false + }, + "30128": { + "content": "truc", + "lstrip": false, + "normalized": true, + "rstrip": false, + "single_word": false, + "special": false + }, + "30129": { + "content": "ome", + "lstrip": false, + "normalized": true, + "rstrip": false, + "single_word": false, + "special": false + }, + "30130": { + "content": "▁cuarenten", + "lstrip": false, + "normalized": true, + "rstrip": false, + "single_word": false, + "special": false + }, + "30131": { + "content": "▁diab", + "lstrip": false, + "normalized": true, + "rstrip": false, + "single_word": false, + "special": false + }, + "30132": { + "content": "ximo", + "lstrip": false, + "normalized": true, + "rstrip": false, + "single_word": false, + "special": false + }, + "30133": { + "content": "checo", + "lstrip": false, + "normalized": true, + "rstrip": false, + "single_word": false, + "special": false + }, + "30134": { + "content": "▁pueb", + "lstrip": false, + "normalized": true, + "rstrip": false, + "single_word": false, + "special": false + }, + "30135": { + "content": "▁irrespon", + "lstrip": false, + "normalized": true, + "rstrip": false, + "single_word": false, + "special": false + }, + "30136": { + "content": "▁jh", + "lstrip": false, + "normalized": true, + "rstrip": false, + "single_word": false, + "special": false + }, + "30137": { + "content": "▁chicla", + "lstrip": false, + "normalized": true, + "rstrip": false, + "single_word": false, + "special": false + }, + "30138": { + "content": "▁asqu", + "lstrip": false, + "normalized": true, + "rstrip": false, + "single_word": false, + "special": false + }, + "30139": { + "content": "▁camote", + "lstrip": false, + "normalized": true, + "rstrip": false, + "single_word": false, + "special": false + }, + "30140": { + "content": "▁mayim", + "lstrip": false, + "normalized": true, + "rstrip": false, + "single_word": false, + "special": false + }, + "30141": { + "content": "▁term", + "lstrip": false, + "normalized": true, + "rstrip": false, + "single_word": false, + "special": false + }, + "30142": { + "content": "rac", + "lstrip": false, + "normalized": true, + "rstrip": false, + "single_word": false, + "special": false + }, + "30143": { + "content": "imen", + "lstrip": false, + "normalized": true, + "rstrip": false, + "single_word": false, + "special": false + }, + "30144": { + "content": "repente", + "lstrip": false, + "normalized": true, + "rstrip": false, + "single_word": false, + "special": false + }, + "30145": { + "content": "▁chiclayo", + "lstrip": false, + "normalized": true, + "rstrip": false, + "single_word": false, + "special": false + }, + "30146": { + "content": "▁chihuan", + "lstrip": false, + "normalized": true, + "rstrip": false, + "single_word": false, + "special": false + }, + "30147": { + "content": "▁chicharrones", + "lstrip": false, + "normalized": true, + "rstrip": false, + "single_word": false, + "special": false + }, + "30148": { + "content": "▁jape", + "lstrip": false, + "normalized": true, + "rstrip": false, + "single_word": false, + "special": false + }, + "30149": { + "content": "▁aluc", + "lstrip": false, + "normalized": true, + "rstrip": false, + "single_word": false, + "special": false + }, + "30150": { + "content": "▁chifa", + "lstrip": false, + "normalized": true, + "rstrip": false, + "single_word": false, + "special": false + }, + "30151": { + "content": "▁mostrito", + "lstrip": false, + "normalized": true, + "rstrip": false, + "single_word": false, + "special": false + }, + "30152": { + "content": "▁marciano", + "lstrip": false, + "normalized": true, + "rstrip": false, + "single_word": false, + "special": false + }, + "30153": { + "content": "enry", + "lstrip": false, + "normalized": true, + "rstrip": false, + "single_word": false, + "special": false + }, + "30154": { + "content": "▁campeonar", + "lstrip": false, + "normalized": true, + "rstrip": false, + "single_word": false, + "special": false + }, + "30155": { + "content": "oana", + "lstrip": false, + "normalized": true, + "rstrip": false, + "single_word": false, + "special": false + }, + "30156": { + "content": "tras,", + "lstrip": false, + "normalized": true, + "rstrip": false, + "single_word": false, + "special": false + }, + "30157": { + "content": "ave", + "lstrip": false, + "normalized": true, + "rstrip": false, + "single_word": false, + "special": false + }, + "30158": { + "content": "ᴥ", + "lstrip": false, + "normalized": true, + "rstrip": false, + "single_word": false, + "special": false + }, + "30159": { + "content": "▁dich", + "lstrip": false, + "normalized": true, + "rstrip": false, + "single_word": false, + "special": false + }, + "30160": { + "content": "▁palte", + "lstrip": false, + "normalized": true, + "rstrip": false, + "single_word": false, + "special": false + }, + "30161": { + "content": "▁alexan", + "lstrip": false, + "normalized": true, + "rstrip": false, + "single_word": false, + "special": false + }, + "30162": { + "content": "arones", + "lstrip": false, + "normalized": true, + "rstrip": false, + "single_word": false, + "special": false + }, + "30163": { + "content": "▁jauja", + "lstrip": false, + "normalized": true, + "rstrip": false, + "single_word": false, + "special": false + }, + "30164": { + "content": "abel", + "lstrip": false, + "normalized": true, + "rstrip": false, + "single_word": false, + "special": false + }, + "30165": { + "content": "▁malcri", + "lstrip": false, + "normalized": true, + "rstrip": false, + "single_word": false, + "special": false + }, + "30166": { + "content": "▁terruco", + "lstrip": false, + "normalized": true, + "rstrip": false, + "single_word": false, + "special": false + }, + "30167": { + "content": "viamente", + "lstrip": false, + "normalized": true, + "rstrip": false, + "single_word": false, + "special": false + }, + "30168": { + "content": "▁malograr", + "lstrip": false, + "normalized": true, + "rstrip": false, + "single_word": false, + "special": false + }, + "30169": { + "content": "▁callej", + "lstrip": false, + "normalized": true, + "rstrip": false, + "single_word": false, + "special": false + }, + "30170": { + "content": "▁copello", + "lstrip": false, + "normalized": true, + "rstrip": false, + "single_word": false, + "special": false + }, + "30171": { + "content": "▁bacán", + "lstrip": false, + "normalized": true, + "rstrip": false, + "single_word": false, + "special": false + }, + "30172": { + "content": "ectacular", + "lstrip": false, + "normalized": true, + "rstrip": false, + "single_word": false, + "special": false + }, + "30173": { + "content": "▁mondon", + "lstrip": false, + "normalized": true, + "rstrip": false, + "single_word": false, + "special": false + }, + "30174": { + "content": "▁maños", + "lstrip": false, + "normalized": true, + "rstrip": false, + "single_word": false, + "special": false + }, + "30175": { + "content": "▁fujira", + "lstrip": false, + "normalized": true, + "rstrip": false, + "single_word": false, + "special": false + }, + "30176": { + "content": "▁calientito", + "lstrip": false, + "normalized": true, + "rstrip": false, + "single_word": false, + "special": false + }, + "30177": { + "content": "▁motota", + "lstrip": false, + "normalized": true, + "rstrip": false, + "single_word": false, + "special": false + }, + "30178": { + "content": "▁terru", + "lstrip": false, + "normalized": true, + "rstrip": false, + "single_word": false, + "special": false + }, + "30179": { + "content": "▁llam", + "lstrip": false, + "normalized": true, + "rstrip": false, + "single_word": false, + "special": false + }, + "30180": { + "content": "▁alejan", + "lstrip": false, + "normalized": true, + "rstrip": false, + "single_word": false, + "special": false + }, + "30181": { + "content": "▁huariques", + "lstrip": false, + "normalized": true, + "rstrip": false, + "single_word": false, + "special": false + }, + "30182": { + "content": "▁lapo", + "lstrip": false, + "normalized": true, + "rstrip": false, + "single_word": false, + "special": false + }, + "30183": { + "content": "▁quinua", + "lstrip": false, + "normalized": true, + "rstrip": false, + "single_word": false, + "special": false + }, + "30184": { + "content": "▁envidi", + "lstrip": false, + "normalized": true, + "rstrip": false, + "single_word": false, + "special": false + }, + "30185": { + "content": "▁bellido", + "lstrip": false, + "normalized": true, + "rstrip": false, + "single_word": false, + "special": false + }, + "30186": { + "content": "amar", + "lstrip": false, + "normalized": true, + "rstrip": false, + "single_word": false, + "special": false + }, + "30187": { + "content": "ecía", + "lstrip": false, + "normalized": true, + "rstrip": false, + "single_word": false, + "special": false + }, + "30188": { + "content": "▁huanca", + "lstrip": false, + "normalized": true, + "rstrip": false, + "single_word": false, + "special": false + }, + "30189": { + "content": "▁chib", + "lstrip": false, + "normalized": true, + "rstrip": false, + "single_word": false, + "special": false + }, + "30190": { + "content": "▁espinoza", + "lstrip": false, + "normalized": true, + "rstrip": false, + "single_word": false, + "special": false + }, + "30191": { + "content": "▁fujimontesin", + "lstrip": false, + "normalized": true, + "rstrip": false, + "single_word": false, + "special": false + }, + "30192": { + "content": "▁sopl", + "lstrip": false, + "normalized": true, + "rstrip": false, + "single_word": false, + "special": false + }, + "30193": { + "content": "▁trom", + "lstrip": false, + "normalized": true, + "rstrip": false, + "single_word": false, + "special": false + }, + "30194": { + "content": "▁chicha", + "lstrip": false, + "normalized": true, + "rstrip": false, + "single_word": false, + "special": false + }, + "30195": { + "content": "\"\"\"", + "lstrip": false, + "normalized": true, + "rstrip": false, + "single_word": false, + "special": false + }, + "30196": { + "content": "▁pía", + "lstrip": false, + "normalized": true, + "rstrip": false, + "single_word": false, + "special": false + }, + "30197": { + "content": "▁arequi", + "lstrip": false, + "normalized": true, + "rstrip": false, + "single_word": false, + "special": false + }, + "30198": { + "content": "▁señito", + "lstrip": false, + "normalized": true, + "rstrip": false, + "single_word": false, + "special": false + }, + "30199": { + "content": "@us", + "lstrip": false, + "normalized": true, + "rstrip": false, + "single_word": false, + "special": false + }, + "30200": { + "content": "▁adolesc", + "lstrip": false, + "normalized": true, + "rstrip": false, + "single_word": false, + "special": false + }, + "30201": { + "content": "▁cuellos", + "lstrip": false, + "normalized": true, + "rstrip": false, + "single_word": false, + "special": false + }, + "30202": { + "content": "▁ollu", + "lstrip": false, + "normalized": true, + "rstrip": false, + "single_word": false, + "special": false + }, + "30203": { + "content": "▁cerrón", + "lstrip": false, + "normalized": true, + "rstrip": false, + "single_word": false, + "special": false + }, + "30204": { + "content": "▁serenazgo", + "lstrip": false, + "normalized": true, + "rstrip": false, + "single_word": false, + "special": false + }, + "30205": { + "content": "▁pulpines", + "lstrip": false, + "normalized": true, + "rstrip": false, + "single_word": false, + "special": false + }, + "30206": { + "content": "▁cabros", + "lstrip": false, + "normalized": true, + "rstrip": false, + "single_word": false, + "special": false + }, + "30207": { + "content": "▁gareca", + "lstrip": false, + "normalized": true, + "rstrip": false, + "single_word": false, + "special": false + }, + "30208": { + "content": "oc", + "lstrip": false, + "normalized": true, + "rstrip": false, + "single_word": false, + "special": false + }, + "30209": { + "content": ">", + "lstrip": false, + "normalized": true, + "rstrip": false, + "single_word": false, + "special": false + }, + "30210": { + "content": "√", + "lstrip": false, + "normalized": true, + "rstrip": false, + "single_word": false, + "special": false + }, + "30211": { + "content": "cham", + "lstrip": false, + "normalized": true, + "rstrip": false, + "single_word": false, + "special": false + }, + "30212": { + "content": "▁herm", + "lstrip": false, + "normalized": true, + "rstrip": false, + "single_word": false, + "special": false + }, + "30213": { + "content": "erzo", + "lstrip": false, + "normalized": true, + "rstrip": false, + "single_word": false, + "special": false + }, + "30214": { + "content": "▁elim", + "lstrip": false, + "normalized": true, + "rstrip": false, + "single_word": false, + "special": false + }, + "30215": { + "content": "▁catara", + "lstrip": false, + "normalized": true, + "rstrip": false, + "single_word": false, + "special": false + }, + "30216": { + "content": "▁batán", + "lstrip": false, + "normalized": true, + "rstrip": false, + "single_word": false, + "special": false + }, + "30217": { + "content": "▁olluquito", + "lstrip": false, + "normalized": true, + "rstrip": false, + "single_word": false, + "special": false + }, + "30218": { + "content": "enaz", + "lstrip": false, + "normalized": true, + "rstrip": false, + "single_word": false, + "special": false + }, + "30219": { + "content": "▁benji", + "lstrip": false, + "normalized": true, + "rstrip": false, + "single_word": false, + "special": false + }, + "30220": { + "content": "▁pitu", + "lstrip": false, + "normalized": true, + "rstrip": false, + "single_word": false, + "special": false + }, + "30221": { + "content": "▁chuparse", + "lstrip": false, + "normalized": true, + "rstrip": false, + "single_word": false, + "special": false + }, + "30222": { + "content": "ŕ", + "lstrip": false, + "normalized": true, + "rstrip": false, + "single_word": false, + "special": false + }, + "30223": { + "content": "noso", + "lstrip": false, + "normalized": true, + "rstrip": false, + "single_word": false, + "special": false + }, + "30224": { + "content": "▁mafi", + "lstrip": false, + "normalized": true, + "rstrip": false, + "single_word": false, + "special": false + }, + "30225": { + "content": "ores,", + "lstrip": false, + "normalized": true, + "rstrip": false, + "single_word": false, + "special": false + }, + "30226": { + "content": "▁chicharr", + "lstrip": false, + "normalized": true, + "rstrip": false, + "single_word": false, + "special": false + }, + "30227": { + "content": "▁chimbote", + "lstrip": false, + "normalized": true, + "rstrip": false, + "single_word": false, + "special": false + }, + "30228": { + "content": "vade", + "lstrip": false, + "normalized": true, + "rstrip": false, + "single_word": false, + "special": false + }, + "30229": { + "content": "\"\".", + "lstrip": false, + "normalized": true, + "rstrip": false, + "single_word": false, + "special": false + }, + "30230": { + "content": "ajes", + "lstrip": false, + "normalized": true, + "rstrip": false, + "single_word": false, + "special": false + }, + "30231": { + "content": "▁willax", + "lstrip": false, + "normalized": true, + "rstrip": false, + "single_word": false, + "special": false + }, + "30232": { + "content": "▁frito", + "lstrip": false, + "normalized": true, + "rstrip": false, + "single_word": false, + "special": false + }, + "30233": { + "content": "mendo", + "lstrip": false, + "normalized": true, + "rstrip": false, + "single_word": false, + "special": false + }, + "30234": { + "content": "ace", + "lstrip": false, + "normalized": true, + "rstrip": false, + "single_word": false, + "special": false + }, + "30235": { + "content": "parta", + "lstrip": false, + "normalized": true, + "rstrip": false, + "single_word": false, + "special": false + }, + "30236": { + "content": "▁\"\"\"\"", + "lstrip": false, + "normalized": true, + "rstrip": false, + "single_word": false, + "special": false + }, + "30237": { + "content": "▁esfu", + "lstrip": false, + "normalized": true, + "rstrip": false, + "single_word": false, + "special": false + }, + "30238": { + "content": "tarle", + "lstrip": false, + "normalized": true, + "rstrip": false, + "single_word": false, + "special": false + }, + "30239": { + "content": "▁castillo.", + "lstrip": false, + "normalized": true, + "rstrip": false, + "single_word": false, + "special": false + }, + "30240": { + "content": "clus", + "lstrip": false, + "normalized": true, + "rstrip": false, + "single_word": false, + "special": false + }, + "30241": { + "content": "▁gastón", + "lstrip": false, + "normalized": true, + "rstrip": false, + "single_word": false, + "special": false + }, + "30242": { + "content": "▁ricooo", + "lstrip": false, + "normalized": true, + "rstrip": false, + "single_word": false, + "special": false + }, + "30243": { + "content": "▁gastronom", + "lstrip": false, + "normalized": true, + "rstrip": false, + "single_word": false, + "special": false + }, + "30244": { + "content": "ė", + "lstrip": false, + "normalized": true, + "rstrip": false, + "single_word": false, + "special": false + }, + "30245": { + "content": "▁palta", + "lstrip": false, + "normalized": true, + "rstrip": false, + "single_word": false, + "special": false + }, + "30246": { + "content": "trito", + "lstrip": false, + "normalized": true, + "rstrip": false, + "single_word": false, + "special": false + }, + "30247": { + "content": "▁cebich", + "lstrip": false, + "normalized": true, + "rstrip": false, + "single_word": false, + "special": false + }, + "30248": { + "content": "▁serenaz", + "lstrip": false, + "normalized": true, + "rstrip": false, + "single_word": false, + "special": false + }, + "30249": { + "content": "▁pulpin", + "lstrip": false, + "normalized": true, + "rstrip": false, + "single_word": false, + "special": false + }, + "30250": { + "content": "▁pab", + "lstrip": false, + "normalized": true, + "rstrip": false, + "single_word": false, + "special": false + }, + "30251": { + "content": "∆", + "lstrip": false, + "normalized": true, + "rstrip": false, + "single_word": false, + "special": false + }, + "30252": { + "content": "▁molle", + "lstrip": false, + "normalized": true, + "rstrip": false, + "single_word": false, + "special": false + }, + "30253": { + "content": "▁chaufa", + "lstrip": false, + "normalized": true, + "rstrip": false, + "single_word": false, + "special": false + }, + "30254": { + "content": "pobre", + "lstrip": false, + "normalized": true, + "rstrip": false, + "single_word": false, + "special": false + }, + "30255": { + "content": "▁asquer", + "lstrip": false, + "normalized": true, + "rstrip": false, + "single_word": false, + "special": false + }, + "30256": { + "content": "▁pollo,", + "lstrip": false, + "normalized": true, + "rstrip": false, + "single_word": false, + "special": false + }, + "30257": { + "content": "▁manyas", + "lstrip": false, + "normalized": true, + "rstrip": false, + "single_word": false, + "special": false + }, + "30258": { + "content": "ilar", + "lstrip": false, + "normalized": true, + "rstrip": false, + "single_word": false, + "special": false + }, + "30259": { + "content": "bern", + "lstrip": false, + "normalized": true, + "rstrip": false, + "single_word": false, + "special": false + }, + "30260": { + "content": "eg", + "lstrip": false, + "normalized": true, + "rstrip": false, + "single_word": false, + "special": false + }, + "30261": { + "content": "▁fujim", + "lstrip": false, + "normalized": true, + "rstrip": false, + "single_word": false, + "special": false + }, + "30262": { + "content": "▁truj", + "lstrip": false, + "normalized": true, + "rstrip": false, + "single_word": false, + "special": false + }, + "30263": { + "content": "▁serru", + "lstrip": false, + "normalized": true, + "rstrip": false, + "single_word": false, + "special": false + }, + "30264": { + "content": "▁mayimbu", + "lstrip": false, + "normalized": true, + "rstrip": false, + "single_word": false, + "special": false + }, + "30265": { + "content": "▁quispe", + "lstrip": false, + "normalized": true, + "rstrip": false, + "single_word": false, + "special": false + }, + "30266": { + "content": "ow", + "lstrip": false, + "normalized": true, + "rstrip": false, + "single_word": false, + "special": false + }, + "30267": { + "content": "▁chimbo", + "lstrip": false, + "normalized": true, + "rstrip": false, + "single_word": false, + "special": false + }, + "30268": { + "content": "▁huara", + "lstrip": false, + "normalized": true, + "rstrip": false, + "single_word": false, + "special": false + }, + "30269": { + "content": "▁saltado", + "lstrip": false, + "normalized": true, + "rstrip": false, + "single_word": false, + "special": false + }, + "30270": { + "content": "istal", + "lstrip": false, + "normalized": true, + "rstrip": false, + "single_word": false, + "special": false + }, + "30271": { + "content": "▁lim", + "lstrip": false, + "normalized": true, + "rstrip": false, + "single_word": false, + "special": false + }, + "30272": { + "content": "▁núm", + "lstrip": false, + "normalized": true, + "rstrip": false, + "single_word": false, + "special": false + }, + "30273": { + "content": "▁pacheco", + "lstrip": false, + "normalized": true, + "rstrip": false, + "single_word": false, + "special": false + }, + "30274": { + "content": "undir", + "lstrip": false, + "normalized": true, + "rstrip": false, + "single_word": false, + "special": false + }, + "30275": { + "content": "▁jebe", + "lstrip": false, + "normalized": true, + "rstrip": false, + "single_word": false, + "special": false + }, + "30276": { + "content": "htag", + "lstrip": false, + "normalized": true, + "rstrip": false, + "single_word": false, + "special": false + }, + "30277": { + "content": "ň", + "lstrip": false, + "normalized": true, + "rstrip": false, + "single_word": false, + "special": false + }, + "30278": { + "content": "av", + "lstrip": false, + "normalized": true, + "rstrip": false, + "single_word": false, + "special": false + }, + "30279": { + "content": "ajo", + "lstrip": false, + "normalized": true, + "rstrip": false, + "single_word": false, + "special": false + }, + "30280": { + "content": "▁magaly", + "lstrip": false, + "normalized": true, + "rstrip": false, + "single_word": false, + "special": false + }, + "30281": { + "content": "xto", + "lstrip": false, + "normalized": true, + "rstrip": false, + "single_word": false, + "special": false + }, + "30282": { + "content": "▁@us", + "lstrip": false, + "normalized": true, + "rstrip": false, + "single_word": false, + "special": false + }, + "30283": { + "content": "ásitos", + "lstrip": false, + "normalized": true, + "rstrip": false, + "single_word": false, + "special": false + }, + "30284": { + "content": "útil", + "lstrip": false, + "normalized": true, + "rstrip": false, + "single_word": false, + "special": false + }, + "30285": { + "content": "\"\",", + "lstrip": false, + "normalized": true, + "rstrip": false, + "single_word": false, + "special": false + }, + "30286": { + "content": "ásico", + "lstrip": false, + "normalized": true, + "rstrip": false, + "single_word": false, + "special": false + }, + "30287": { + "content": "▁combinado", + "lstrip": false, + "normalized": true, + "rstrip": false, + "single_word": false, + "special": false + }, + "30288": { + "content": "paña", + "lstrip": false, + "normalized": true, + "rstrip": false, + "single_word": false, + "special": false + }, + "30289": { + "content": "▁chancho", + "lstrip": false, + "normalized": true, + "rstrip": false, + "single_word": false, + "special": false + }, + "30290": { + "content": "▁salv", + "lstrip": false, + "normalized": true, + "rstrip": false, + "single_word": false, + "special": false + }, + "30291": { + "content": "▁peralta", + "lstrip": false, + "normalized": true, + "rstrip": false, + "single_word": false, + "special": false + }, + "30292": { + "content": "▁colera", + "lstrip": false, + "normalized": true, + "rstrip": false, + "single_word": false, + "special": false + }, + "30293": { + "content": "▁reynoso!!!largo", + "lstrip": false, + "normalized": true, + "rstrip": false, + "single_word": false, + "special": false + }, + "30294": { + "content": "▁riqu", + "lstrip": false, + "normalized": true, + "rstrip": false, + "single_word": false, + "special": false + }, + "30295": { + "content": "▁pachamanca", + "lstrip": false, + "normalized": true, + "rstrip": false, + "single_word": false, + "special": false + }, + "30296": { + "content": "▁pituca", + "lstrip": false, + "normalized": true, + "rstrip": false, + "single_word": false, + "special": false + }, + "30297": { + "content": "▁huacho", + "lstrip": false, + "normalized": true, + "rstrip": false, + "single_word": false, + "special": false + }, + "30298": { + "content": "▁chibolo", + "lstrip": false, + "normalized": true, + "rstrip": false, + "single_word": false, + "special": false + }, + "30299": { + "content": "úper", + "lstrip": false, + "normalized": true, + "rstrip": false, + "single_word": false, + "special": false + }, + "30300": { + "content": "▁coleg", + "lstrip": false, + "normalized": true, + "rstrip": false, + "single_word": false, + "special": false + }, + "30301": { + "content": "poco", + "lstrip": false, + "normalized": true, + "rstrip": false, + "single_word": false, + "special": false + }, + "30302": { + "content": "ő", + "lstrip": false, + "normalized": true, + "rstrip": false, + "single_word": false, + "special": false + }, + "30303": { + "content": "▁gare", + "lstrip": false, + "normalized": true, + "rstrip": false, + "single_word": false, + "special": false + }, + "30304": { + "content": "▁yidda", + "lstrip": false, + "normalized": true, + "rstrip": false, + "single_word": false, + "special": false + }, + "30305": { + "content": "oles", + "lstrip": false, + "normalized": true, + "rstrip": false, + "single_word": false, + "special": false + }, + "30306": { + "content": "▁phillip", + "lstrip": false, + "normalized": true, + "rstrip": false, + "single_word": false, + "special": false + }, + "30307": { + "content": "▁asno", + "lstrip": false, + "normalized": true, + "rstrip": false, + "single_word": false, + "special": false + }, + "30308": { + "content": "unto", + "lstrip": false, + "normalized": true, + "rstrip": false, + "single_word": false, + "special": false + }, + "30309": { + "content": "mac", + "lstrip": false, + "normalized": true, + "rstrip": false, + "single_word": false, + "special": false + }, + "30310": { + "content": "grim", + "lstrip": false, + "normalized": true, + "rstrip": false, + "single_word": false, + "special": false + }, + "30311": { + "content": "▁muni", + "lstrip": false, + "normalized": true, + "rstrip": false, + "single_word": false, + "special": false + }, + "30312": { + "content": "▁fisc", + "lstrip": false, + "normalized": true, + "rstrip": false, + "single_word": false, + "special": false + }, + "30313": { + "content": "▁pueblo...", + "lstrip": false, + "normalized": true, + "rstrip": false, + "single_word": false, + "special": false + }, + "30314": { + "content": "▁canchita", + "lstrip": false, + "normalized": true, + "rstrip": false, + "single_word": false, + "special": false + }, + "30315": { + "content": "¢", + "lstrip": false, + "normalized": true, + "rstrip": false, + "single_word": false, + "special": false + }, + "30316": { + "content": "▁minim", + "lstrip": false, + "normalized": true, + "rstrip": false, + "single_word": false, + "special": false + }, + "30317": { + "content": "͜", + "lstrip": false, + "normalized": true, + "rstrip": false, + "single_word": false, + "special": false + }, + "30318": { + "content": "▁idi", + "lstrip": false, + "normalized": true, + "rstrip": false, + "single_word": false, + "special": false + }, + "30319": { + "content": "uario", + "lstrip": false, + "normalized": true, + "rstrip": false, + "single_word": false, + "special": false + }, + "30320": { + "content": "▁hum", + "lstrip": false, + "normalized": true, + "rstrip": false, + "single_word": false, + "special": false + }, + "30321": { + "content": "lesc", + "lstrip": false, + "normalized": true, + "rstrip": false, + "single_word": false, + "special": false + }, + "30322": { + "content": "▁willa", + "lstrip": false, + "normalized": true, + "rstrip": false, + "single_word": false, + "special": false + }, + "30323": { + "content": "indr", + "lstrip": false, + "normalized": true, + "rstrip": false, + "single_word": false, + "special": false + }, + "30324": { + "content": "▁callao", + "lstrip": false, + "normalized": true, + "rstrip": false, + "single_word": false, + "special": false + }, + "30325": { + "content": "▁pincho", + "lstrip": false, + "normalized": true, + "rstrip": false, + "single_word": false, + "special": false + }, + "30326": { + "content": "▁monse", + "lstrip": false, + "normalized": true, + "rstrip": false, + "single_word": false, + "special": false + }, + "30327": { + "content": "entado", + "lstrip": false, + "normalized": true, + "rstrip": false, + "single_word": false, + "special": false + }, + "30328": { + "content": "▁huan", + "lstrip": false, + "normalized": true, + "rstrip": false, + "single_word": false, + "special": false + }, + "30329": { + "content": "icho", + "lstrip": false, + "normalized": true, + "rstrip": false, + "single_word": false, + "special": false + }, + "30330": { + "content": "▁úl", + "lstrip": false, + "normalized": true, + "rstrip": false, + "single_word": false, + "special": false + }, + "30331": { + "content": "▁oe,", + "lstrip": false, + "normalized": true, + "rstrip": false, + "single_word": false, + "special": false + }, + "30332": { + "content": "▁mache", + "lstrip": false, + "normalized": true, + "rstrip": false, + "single_word": false, + "special": false + }, + "30333": { + "content": "▁mermelera", + "lstrip": false, + "normalized": true, + "rstrip": false, + "single_word": false, + "special": false + }, + "30334": { + "content": "▁conchu", + "lstrip": false, + "normalized": true, + "rstrip": false, + "single_word": false, + "special": false + }, + "30335": { + "content": "▁lime", + "lstrip": false, + "normalized": true, + "rstrip": false, + "single_word": false, + "special": false + }, + "30336": { + "content": "▁buenazo", + "lstrip": false, + "normalized": true, + "rstrip": false, + "single_word": false, + "special": false + }, + "30337": { + "content": "aja", + "lstrip": false, + "normalized": true, + "rstrip": false, + "single_word": false, + "special": false + }, + "30338": { + "content": "▁mermel", + "lstrip": false, + "normalized": true, + "rstrip": false, + "single_word": false, + "special": false + }, + "30339": { + "content": "▁pichi", + "lstrip": false, + "normalized": true, + "rstrip": false, + "single_word": false, + "special": false + }, + "30340": { + "content": "misitay", + "lstrip": false, + "normalized": true, + "rstrip": false, + "single_word": false, + "special": false + }, + "30341": { + "content": "pello", + "lstrip": false, + "normalized": true, + "rstrip": false, + "single_word": false, + "special": false + }, + "30342": { + "content": "▁oport", + "lstrip": false, + "normalized": true, + "rstrip": false, + "single_word": false, + "special": false + }, + "30343": { + "content": "▁cachar", + "lstrip": false, + "normalized": true, + "rstrip": false, + "single_word": false, + "special": false + }, + "30344": { + "content": "cima", + "lstrip": false, + "normalized": true, + "rstrip": false, + "single_word": false, + "special": false + }, + "30345": { + "content": "▁mermeleros", + "lstrip": false, + "normalized": true, + "rstrip": false, + "single_word": false, + "special": false + }, + "30346": { + "content": "▁humala", + "lstrip": false, + "normalized": true, + "rstrip": false, + "single_word": false, + "special": false + }, + "30347": { + "content": "▁churro", + "lstrip": false, + "normalized": true, + "rstrip": false, + "single_word": false, + "special": false + }, + "30348": { + "content": "▁gonzales", + "lstrip": false, + "normalized": true, + "rstrip": false, + "single_word": false, + "special": false + }, + "30349": { + "content": "▁terrucos", + "lstrip": false, + "normalized": true, + "rstrip": false, + "single_word": false, + "special": false + }, + "30350": { + "content": "▁huas", + "lstrip": false, + "normalized": true, + "rstrip": false, + "single_word": false, + "special": false + }, + "30351": { + "content": "ī", + "lstrip": false, + "normalized": true, + "rstrip": false, + "single_word": false, + "special": false + }, + "30352": { + "content": "▁fiorella", + "lstrip": false, + "normalized": true, + "rstrip": false, + "single_word": false, + "special": false + }, + "30353": { + "content": "▁choc", + "lstrip": false, + "normalized": true, + "rstrip": false, + "single_word": false, + "special": false + }, + "30354": { + "content": "▁argol", + "lstrip": false, + "normalized": true, + "rstrip": false, + "single_word": false, + "special": false + }, + "30355": { + "content": "▁represen", + "lstrip": false, + "normalized": true, + "rstrip": false, + "single_word": false, + "special": false + }, + "30356": { + "content": "teri", + "lstrip": false, + "normalized": true, + "rstrip": false, + "single_word": false, + "special": false + }, + "30357": { + "content": "▁inoc", + "lstrip": false, + "normalized": true, + "rstrip": false, + "single_word": false, + "special": false + }, + "30358": { + "content": "▁gastron", + "lstrip": false, + "normalized": true, + "rstrip": false, + "single_word": false, + "special": false + }, + "30359": { + "content": "▁chala", + "lstrip": false, + "normalized": true, + "rstrip": false, + "single_word": false, + "special": false + }, + "30360": { + "content": "ື", + "lstrip": false, + "normalized": true, + "rstrip": false, + "single_word": false, + "special": false + }, + "30361": { + "content": "clana", + "lstrip": false, + "normalized": true, + "rstrip": false, + "single_word": false, + "special": false + }, + "30362": { + "content": "▁chue", + "lstrip": false, + "normalized": true, + "rstrip": false, + "single_word": false, + "special": false + }, + "30363": { + "content": "▁chauf", + "lstrip": false, + "normalized": true, + "rstrip": false, + "single_word": false, + "special": false + }, + "30364": { + "content": "▁mela", + "lstrip": false, + "normalized": true, + "rstrip": false, + "single_word": false, + "special": false + }, + "30365": { + "content": "▁viej", + "lstrip": false, + "normalized": true, + "rstrip": false, + "single_word": false, + "special": false + }, + "30366": { + "content": "▁castillo,", + "lstrip": false, + "normalized": true, + "rstrip": false, + "single_word": false, + "special": false + }, + "30367": { + "content": "cona", + "lstrip": false, + "normalized": true, + "rstrip": false, + "single_word": false, + "special": false + }, + "30368": { + "content": "▁causita", + "lstrip": false, + "normalized": true, + "rstrip": false, + "single_word": false, + "special": false + }, + "30369": { + "content": "▁moch", + "lstrip": false, + "normalized": true, + "rstrip": false, + "single_word": false, + "special": false + }, + "30370": { + "content": "roz", + "lstrip": false, + "normalized": true, + "rstrip": false, + "single_word": false, + "special": false + }, + "30371": { + "content": "▁prosor", + "lstrip": false, + "normalized": true, + "rstrip": false, + "single_word": false, + "special": false + }, + "30372": { + "content": "cuente", + "lstrip": false, + "normalized": true, + "rstrip": false, + "single_word": false, + "special": false + }, + "30373": { + "content": "patrio", + "lstrip": false, + "normalized": true, + "rstrip": false, + "single_word": false, + "special": false + }, + "30374": { + "content": "yay", + "lstrip": false, + "normalized": true, + "rstrip": false, + "single_word": false, + "special": false + }, + "30375": { + "content": "▁ensala", + "lstrip": false, + "normalized": true, + "rstrip": false, + "single_word": false, + "special": false + }, + "30376": { + "content": "▁beto,", + "lstrip": false, + "normalized": true, + "rstrip": false, + "single_word": false, + "special": false + }, + "30377": { + "content": "xima", + "lstrip": false, + "normalized": true, + "rstrip": false, + "single_word": false, + "special": false + }, + "30378": { + "content": "▁chola", + "lstrip": false, + "normalized": true, + "rstrip": false, + "single_word": false, + "special": false + }, + "30379": { + "content": "▁chuecona", + "lstrip": false, + "normalized": true, + "rstrip": false, + "single_word": false, + "special": false + }, + "30380": { + "content": "ego", + "lstrip": false, + "normalized": true, + "rstrip": false, + "single_word": false, + "special": false + }, + "30381": { + "content": "aman", + "lstrip": false, + "normalized": true, + "rstrip": false, + "single_word": false, + "special": false + }, + "30382": { + "content": "▁cajamar", + "lstrip": false, + "normalized": true, + "rstrip": false, + "single_word": false, + "special": false + }, + "30383": { + "content": "▁pinga", + "lstrip": false, + "normalized": true, + "rstrip": false, + "single_word": false, + "special": false + }, + "30384": { + "content": "▁garc", + "lstrip": false, + "normalized": true, + "rstrip": false, + "single_word": false, + "special": false + }, + "30385": { + "content": "pencer", + "lstrip": false, + "normalized": true, + "rstrip": false, + "single_word": false, + "special": false + }, + "30386": { + "content": "▁canguro", + "lstrip": false, + "normalized": true, + "rstrip": false, + "single_word": false, + "special": false + }, + "30387": { + "content": "inci", + "lstrip": false, + "normalized": true, + "rstrip": false, + "single_word": false, + "special": false + }, + "30388": { + "content": "ʋ", + "lstrip": false, + "normalized": true, + "rstrip": false, + "single_word": false, + "special": false + }, + "30389": { + "content": "▁spencer", + "lstrip": false, + "normalized": true, + "rstrip": false, + "single_word": false, + "special": false + }, + "30390": { + "content": "▁momen", + "lstrip": false, + "normalized": true, + "rstrip": false, + "single_word": false, + "special": false + }, + "30391": { + "content": "▁entrev", + "lstrip": false, + "normalized": true, + "rstrip": false, + "single_word": false, + "special": false + }, + "30392": { + "content": "▁palom", + "lstrip": false, + "normalized": true, + "rstrip": false, + "single_word": false, + "special": false + }, + "30393": { + "content": "▁zam", + "lstrip": false, + "normalized": true, + "rstrip": false, + "single_word": false, + "special": false + }, + "30394": { + "content": "▁\"\"el", + "lstrip": false, + "normalized": true, + "rstrip": false, + "single_word": false, + "special": false + }, + "30395": { + "content": "▁pezu", + "lstrip": false, + "normalized": true, + "rstrip": false, + "single_word": false, + "special": false + }, + "30396": { + "content": "pente", + "lstrip": false, + "normalized": true, + "rstrip": false, + "single_word": false, + "special": false + }, + "30397": { + "content": "▁huancayo", + "lstrip": false, + "normalized": true, + "rstrip": false, + "single_word": false, + "special": false + }, + "30398": { + "content": "uentes", + "lstrip": false, + "normalized": true, + "rstrip": false, + "single_word": false, + "special": false + }, + "30399": { + "content": "tau", + "lstrip": false, + "normalized": true, + "rstrip": false, + "single_word": false, + "special": false + }, + "30400": { + "content": "▁catarata", + "lstrip": false, + "normalized": true, + "rstrip": false, + "single_word": false, + "special": false + }, + "30401": { + "content": "▁económ", + "lstrip": false, + "normalized": true, + "rstrip": false, + "single_word": false, + "special": false + }, + "30402": { + "content": "▁telo", + "lstrip": false, + "normalized": true, + "rstrip": false, + "single_word": false, + "special": false + }, + "30403": { + "content": "▁muchís", + "lstrip": false, + "normalized": true, + "rstrip": false, + "single_word": false, + "special": false + }, + "30404": { + "content": "▁anticu", + "lstrip": false, + "normalized": true, + "rstrip": false, + "single_word": false, + "special": false + }, + "30405": { + "content": "▁kather", + "lstrip": false, + "normalized": true, + "rstrip": false, + "single_word": false, + "special": false + }, + "30406": { + "content": "▁bravazo", + "lstrip": false, + "normalized": true, + "rstrip": false, + "single_word": false, + "special": false + }, + "30407": { + "content": "▁bac", + "lstrip": false, + "normalized": true, + "rstrip": false, + "single_word": false, + "special": false + }, + "30408": { + "content": "▁mayra", + "lstrip": false, + "normalized": true, + "rstrip": false, + "single_word": false, + "special": false + }, + "30409": { + "content": "▁coim", + "lstrip": false, + "normalized": true, + "rstrip": false, + "single_word": false, + "special": false + }, + "30410": { + "content": "▁huevas", + "lstrip": false, + "normalized": true, + "rstrip": false, + "single_word": false, + "special": false + }, + "30411": { + "content": "▁gase", + "lstrip": false, + "normalized": true, + "rstrip": false, + "single_word": false, + "special": false + }, + "30412": { + "content": "▁\"\"la", + "lstrip": false, + "normalized": true, + "rstrip": false, + "single_word": false, + "special": false + }, + "30413": { + "content": "▁bicenten", + "lstrip": false, + "normalized": true, + "rstrip": false, + "single_word": false, + "special": false + }, + "30414": { + "content": "▁conmi", + "lstrip": false, + "normalized": true, + "rstrip": false, + "single_word": false, + "special": false + }, + "30415": { + "content": "▁ayacu", + "lstrip": false, + "normalized": true, + "rstrip": false, + "single_word": false, + "special": false + }, + "30416": { + "content": "▁cuy", + "lstrip": false, + "normalized": true, + "rstrip": false, + "single_word": false, + "special": false + }, + "30417": { + "content": "▁encuen", + "lstrip": false, + "normalized": true, + "rstrip": false, + "single_word": false, + "special": false + }, + "30418": { + "content": "xic", + "lstrip": false, + "normalized": true, + "rstrip": false, + "single_word": false, + "special": false + }, + "30419": { + "content": "▁manol", + "lstrip": false, + "normalized": true, + "rstrip": false, + "single_word": false, + "special": false + }, + "30420": { + "content": "▁transformers", + "lstrip": false, + "normalized": true, + "rstrip": false, + "single_word": false, + "special": false + }, + "30421": { + "content": "▁piura", + "lstrip": false, + "normalized": true, + "rstrip": false, + "single_word": false, + "special": false + }, + "30422": { + "content": "▁xoana", + "lstrip": false, + "normalized": true, + "rstrip": false, + "single_word": false, + "special": false + }, + "30423": { + "content": "ama", + "lstrip": false, + "normalized": true, + "rstrip": false, + "single_word": false, + "special": false + }, + "30424": { + "content": "largo", + "lstrip": false, + "normalized": true, + "rstrip": false, + "single_word": false, + "special": false + }, + "30425": { + "content": "▁conar", + "lstrip": false, + "normalized": true, + "rstrip": false, + "single_word": false, + "special": false + }, + "30426": { + "content": "▁huérf", + "lstrip": false, + "normalized": true, + "rstrip": false, + "single_word": false, + "special": false + }, + "30427": { + "content": "▁fujimontes", + "lstrip": false, + "normalized": true, + "rstrip": false, + "single_word": false, + "special": false + }, + "30428": { + "content": "▁tiem", + "lstrip": false, + "normalized": true, + "rstrip": false, + "single_word": false, + "special": false + }, + "30429": { + "content": "▁habr", + "lstrip": false, + "normalized": true, + "rstrip": false, + "single_word": false, + "special": false + }, + "30430": { + "content": "▁hicis", + "lstrip": false, + "normalized": true, + "rstrip": false, + "single_word": false, + "special": false + }, + "30431": { + "content": "▁(oficial)", + "lstrip": false, + "normalized": true, + "rstrip": false, + "single_word": false, + "special": false + }, + "30432": { + "content": "pina", + "lstrip": false, + "normalized": true, + "rstrip": false, + "single_word": false, + "special": false + }, + "30433": { + "content": "avor", + "lstrip": false, + "normalized": true, + "rstrip": false, + "single_word": false, + "special": false + }, + "30434": { + "content": "▁csmr", + "lstrip": false, + "normalized": true, + "rstrip": false, + "single_word": false, + "special": false + }, + "30435": { + "content": "⏯", + "lstrip": false, + "normalized": true, + "rstrip": false, + "single_word": false, + "special": false + }, + "30436": { + "content": "mej", + "lstrip": false, + "normalized": true, + "rstrip": false, + "single_word": false, + "special": false + }, + "30437": { + "content": "▁sanco", + "lstrip": false, + "normalized": true, + "rstrip": false, + "single_word": false, + "special": false + }, + "30438": { + "content": "▁reynoso", + "lstrip": false, + "normalized": true, + "rstrip": false, + "single_word": false, + "special": false + }, + "30439": { + "content": "▁chupamedias", + "lstrip": false, + "normalized": true, + "rstrip": false, + "single_word": false, + "special": false + }, + "30440": { + "content": "▁delici", + "lstrip": false, + "normalized": true, + "rstrip": false, + "single_word": false, + "special": false + }, + "30441": { + "content": "3r", + "lstrip": false, + "normalized": true, + "rstrip": false, + "single_word": false, + "special": false + }, + "30442": { + "content": "▁sobrin", + "lstrip": false, + "normalized": true, + "rstrip": false, + "single_word": false, + "special": false + }, + "30443": { + "content": "▁transformer", + "lstrip": false, + "normalized": true, + "rstrip": false, + "single_word": false, + "special": false + }, + "30444": { + "content": "▁coimas", + "lstrip": false, + "normalized": true, + "rstrip": false, + "single_word": false, + "special": false + }, + "30445": { + "content": "▁juerga", + "lstrip": false, + "normalized": true, + "rstrip": false, + "single_word": false, + "special": false + }, + "30446": { + "content": "▁tumbes", + "lstrip": false, + "normalized": true, + "rstrip": false, + "single_word": false, + "special": false + }, + "30447": { + "content": "▁ayay", + "lstrip": false, + "normalized": true, + "rstrip": false, + "single_word": false, + "special": false + }, + "30448": { + "content": "▁jui", + "lstrip": false, + "normalized": true, + "rstrip": false, + "single_word": false, + "special": false + }, + "30449": { + "content": "▁mosta", + "lstrip": false, + "normalized": true, + "rstrip": false, + "single_word": false, + "special": false + }, + "30450": { + "content": "▁floro", + "lstrip": false, + "normalized": true, + "rstrip": false, + "single_word": false, + "special": false + }, + "30451": { + "content": "▁osores", + "lstrip": false, + "normalized": true, + "rstrip": false, + "single_word": false, + "special": false + }, + "30452": { + "content": "▁urra", + "lstrip": false, + "normalized": true, + "rstrip": false, + "single_word": false, + "special": false + }, + "30453": { + "content": "▁merm", + "lstrip": false, + "normalized": true, + "rstrip": false, + "single_word": false, + "special": false + }, + "30454": { + "content": "▁munici", + "lstrip": false, + "normalized": true, + "rstrip": false, + "single_word": false, + "special": false + }, + "30455": { + "content": "▁alucina", + "lstrip": false, + "normalized": true, + "rstrip": false, + "single_word": false, + "special": false + }, + "30456": { + "content": "▁soro", + "lstrip": false, + "normalized": true, + "rstrip": false, + "single_word": false, + "special": false + }, + "30457": { + "content": "▁argen", + "lstrip": false, + "normalized": true, + "rstrip": false, + "single_word": false, + "special": false + }, + "30458": { + "content": "gual", + "lstrip": false, + "normalized": true, + "rstrip": false, + "single_word": false, + "special": false + }, + "30459": { + "content": "imar", + "lstrip": false, + "normalized": true, + "rstrip": false, + "single_word": false, + "special": false + }, + "30460": { + "content": "▁arequipe", + "lstrip": false, + "normalized": true, + "rstrip": false, + "single_word": false, + "special": false + }, + "30461": { + "content": "▁tranquil", + "lstrip": false, + "normalized": true, + "rstrip": false, + "single_word": false, + "special": false + }, + "30462": { + "content": "▁riquísimo", + "lstrip": false, + "normalized": true, + "rstrip": false, + "single_word": false, + "special": false + }, + "30463": { + "content": "erro", + "lstrip": false, + "normalized": true, + "rstrip": false, + "single_word": false, + "special": false + }, + "30464": { + "content": "▁pituco", + "lstrip": false, + "normalized": true, + "rstrip": false, + "single_word": false, + "special": false + }, + "30465": { + "content": "▁fujimorista", + "lstrip": false, + "normalized": true, + "rstrip": false, + "single_word": false, + "special": false + }, + "30466": { + "content": "erm", + "lstrip": false, + "normalized": true, + "rstrip": false, + "single_word": false, + "special": false + }, + "30467": { + "content": "ō", + "lstrip": false, + "normalized": true, + "rstrip": false, + "single_word": false, + "special": false + }, + "30468": { + "content": "oma", + "lstrip": false, + "normalized": true, + "rstrip": false, + "single_word": false, + "special": false + }, + "30469": { + "content": "dova", + "lstrip": false, + "normalized": true, + "rstrip": false, + "single_word": false, + "special": false + }, + "30470": { + "content": "▁sigu", + "lstrip": false, + "normalized": true, + "rstrip": false, + "single_word": false, + "special": false + }, + "30471": { + "content": "▁serenos", + "lstrip": false, + "normalized": true, + "rstrip": false, + "single_word": false, + "special": false + }, + "30472": { + "content": "▁yapa", + "lstrip": false, + "normalized": true, + "rstrip": false, + "single_word": false, + "special": false + }, + "30473": { + "content": "▁conchas", + "lstrip": false, + "normalized": true, + "rstrip": false, + "single_word": false, + "special": false + }, + "30474": { + "content": "irra", + "lstrip": false, + "normalized": true, + "rstrip": false, + "single_word": false, + "special": false + }, + "30475": { + "content": "ą", + "lstrip": false, + "normalized": true, + "rstrip": false, + "single_word": false, + "special": false + }, + "30476": { + "content": "▁ridí", + "lstrip": false, + "normalized": true, + "rstrip": false, + "single_word": false, + "special": false + }, + "30477": { + "content": "▁vla", + "lstrip": false, + "normalized": true, + "rstrip": false, + "single_word": false, + "special": false + }, + "30478": { + "content": "▁reportajes", + "lstrip": false, + "normalized": true, + "rstrip": false, + "single_word": false, + "special": false + }, + "30479": { + "content": "▁trabaj", + "lstrip": false, + "normalized": true, + "rstrip": false, + "single_word": false, + "special": false + }, + "30480": { + "content": "chul", + "lstrip": false, + "normalized": true, + "rstrip": false, + "single_word": false, + "special": false + }, + "30481": { + "content": "vadef", + "lstrip": false, + "normalized": true, + "rstrip": false, + "single_word": false, + "special": false + }, + "30482": { + "content": "lag", + "lstrip": false, + "normalized": true, + "rstrip": false, + "single_word": false, + "special": false + }, + "30483": { + "content": "▁cuesti", + "lstrip": false, + "normalized": true, + "rstrip": false, + "single_word": false, + "special": false + }, + "30484": { + "content": "aje", + "lstrip": false, + "normalized": true, + "rstrip": false, + "single_word": false, + "special": false + }, + "30485": { + "content": "▁enri", + "lstrip": false, + "normalized": true, + "rstrip": false, + "single_word": false, + "special": false + }, + "30486": { + "content": "▁quisi", + "lstrip": false, + "normalized": true, + "rstrip": false, + "single_word": false, + "special": false + }, + "30487": { + "content": "▁buenazos", + "lstrip": false, + "normalized": true, + "rstrip": false, + "single_word": false, + "special": false + }, + "30488": { + "content": "▁rocoto", + "lstrip": false, + "normalized": true, + "rstrip": false, + "single_word": false, + "special": false + }, + "30489": { + "content": "▁aldo", + "lstrip": false, + "normalized": true, + "rstrip": false, + "single_word": false, + "special": false + }, + "30490": { + "content": "dras", + "lstrip": false, + "normalized": true, + "rstrip": false, + "single_word": false, + "special": false + }, + "30491": { + "content": "⃢", + "lstrip": false, + "normalized": true, + "rstrip": false, + "single_word": false, + "special": false + }, + "30492": { + "content": "▁chinito", + "lstrip": false, + "normalized": true, + "rstrip": false, + "single_word": false, + "special": false + }, + "30493": { + "content": "ag", + "lstrip": false, + "normalized": true, + "rstrip": false, + "single_word": false, + "special": false + }, + "30494": { + "content": "▁jato", + "lstrip": false, + "normalized": true, + "rstrip": false, + "single_word": false, + "special": false + }, + "30495": { + "content": "▁participen", + "lstrip": false, + "normalized": true, + "rstrip": false, + "single_word": false, + "special": false + }, + "30496": { + "content": "▁cojudos", + "lstrip": false, + "normalized": true, + "rstrip": false, + "single_word": false, + "special": false + }, + "30497": { + "content": "▁ica", + "lstrip": false, + "normalized": true, + "rstrip": false, + "single_word": false, + "special": false + }, + "30498": { + "content": "▁famil", + "lstrip": false, + "normalized": true, + "rstrip": false, + "single_word": false, + "special": false + }, + "30499": { + "content": "▁chicharrón", + "lstrip": false, + "normalized": true, + "rstrip": false, + "single_word": false, + "special": false + }, + "30500": { + "content": "arr", + "lstrip": false, + "normalized": true, + "rstrip": false, + "single_word": false, + "special": false + }, + "30501": { + "content": "▁chambe", + "lstrip": false, + "normalized": true, + "rstrip": false, + "single_word": false, + "special": false + }, + "30502": { + "content": "▁betito", + "lstrip": false, + "normalized": true, + "rstrip": false, + "single_word": false, + "special": false + }, + "30503": { + "content": "▁riv", + "lstrip": false, + "normalized": true, + "rstrip": false, + "single_word": false, + "special": false + }, + "30504": { + "content": "▁sinverguenza", + "lstrip": false, + "normalized": true, + "rstrip": false, + "single_word": false, + "special": false + }, + "30505": { + "content": "gaba", + "lstrip": false, + "normalized": true, + "rstrip": false, + "single_word": false, + "special": false + }, + "30506": { + "content": "▁chancay", + "lstrip": false, + "normalized": true, + "rstrip": false, + "single_word": false, + "special": false + }, + "30507": { + "content": "▁eslava", + "lstrip": false, + "normalized": true, + "rstrip": false, + "single_word": false, + "special": false + }, + "30508": { + "content": "▁malog", + "lstrip": false, + "normalized": true, + "rstrip": false, + "single_word": false, + "special": false + }, + "30509": { + "content": "▁urraca", + "lstrip": false, + "normalized": true, + "rstrip": false, + "single_word": false, + "special": false + }, + "30510": { + "content": "▁chacal", + "lstrip": false, + "normalized": true, + "rstrip": false, + "single_word": false, + "special": false + }, + "30511": { + "content": "▁pisco", + "lstrip": false, + "normalized": true, + "rstrip": false, + "single_word": false, + "special": false + }, + "30512": { + "content": "▁coju", + "lstrip": false, + "normalized": true, + "rstrip": false, + "single_word": false, + "special": false + }, + "30513": { + "content": "▁dij", + "lstrip": false, + "normalized": true, + "rstrip": false, + "single_word": false, + "special": false + }, + "30514": { + "content": "▁cavi", + "lstrip": false, + "normalized": true, + "rstrip": false, + "single_word": false, + "special": false + }, + "30515": { + "content": "▁besti", + "lstrip": false, + "normalized": true, + "rstrip": false, + "single_word": false, + "special": false + }, + "30516": { + "content": "▁frau", + "lstrip": false, + "normalized": true, + "rstrip": false, + "single_word": false, + "special": false + }, + "30517": { + "content": "▁renzo", + "lstrip": false, + "normalized": true, + "rstrip": false, + "single_word": false, + "special": false + }, + "30518": { + "content": "celente", + "lstrip": false, + "normalized": true, + "rstrip": false, + "single_word": false, + "special": false + }, + "30519": { + "content": "uen", + "lstrip": false, + "normalized": true, + "rstrip": false, + "single_word": false, + "special": false + }, + "30520": { + "content": "vecho", + "lstrip": false, + "normalized": true, + "rstrip": false, + "single_word": false, + "special": false + }, + "30521": { + "content": "ontes", + "lstrip": false, + "normalized": true, + "rstrip": false, + "single_word": false, + "special": false + }, + "30522": { + "content": "tauro", + "lstrip": false, + "normalized": true, + "rstrip": false, + "single_word": false, + "special": false + }, + "30523": { + "content": "▁fujimoristas", + "lstrip": false, + "normalized": true, + "rstrip": false, + "single_word": false, + "special": false + }, + "30524": { + "content": "▁palomino", + "lstrip": false, + "normalized": true, + "rstrip": false, + "single_word": false, + "special": false + }, + "30525": { + "content": "▁hom", + "lstrip": false, + "normalized": true, + "rstrip": false, + "single_word": false, + "special": false + }, + "30526": { + "content": "vamente", + "lstrip": false, + "normalized": true, + "rstrip": false, + "single_word": false, + "special": false + }, + "30527": { + "content": "▁manolito", + "lstrip": false, + "normalized": true, + "rstrip": false, + "single_word": false, + "special": false + }, + "30528": { + "content": "▁medioc", + "lstrip": false, + "normalized": true, + "rstrip": false, + "single_word": false, + "special": false + }, + "30529": { + "content": "▁chilindr", + "lstrip": false, + "normalized": true, + "rstrip": false, + "single_word": false, + "special": false + }, + "30530": { + "content": "▁huar", + "lstrip": false, + "normalized": true, + "rstrip": false, + "single_word": false, + "special": false + }, + "30531": { + "content": "▁peruano,", + "lstrip": false, + "normalized": true, + "rstrip": false, + "single_word": false, + "special": false + }, + "30532": { + "content": "▁misias", + "lstrip": false, + "normalized": true, + "rstrip": false, + "single_word": false, + "special": false + }, + "30533": { + "content": "▁alimen", + "lstrip": false, + "normalized": true, + "rstrip": false, + "single_word": false, + "special": false + }, + "30534": { + "content": "carra", + "lstrip": false, + "normalized": true, + "rstrip": false, + "single_word": false, + "special": false + }, + "30535": { + "content": "▁chanca", + "lstrip": false, + "normalized": true, + "rstrip": false, + "single_word": false, + "special": false + }, + "30536": { + "content": "▁sender", + "lstrip": false, + "normalized": true, + "rstrip": false, + "single_word": false, + "special": false + }, + "30537": { + "content": "▁antauro", + "lstrip": false, + "normalized": true, + "rstrip": false, + "single_word": false, + "special": false + }, + "30538": { + "content": "sabilidad", + "lstrip": false, + "normalized": true, + "rstrip": false, + "single_word": false, + "special": false + }, + "30539": { + "content": "▁poto", + "lstrip": false, + "normalized": true, + "rstrip": false, + "single_word": false, + "special": false + }, + "30540": { + "content": "▁acuña", + "lstrip": false, + "normalized": true, + "rstrip": false, + "single_word": false, + "special": false + }, + "30541": { + "content": "▁tacna", + "lstrip": false, + "normalized": true, + "rstrip": false, + "single_word": false, + "special": false + }, + "30542": { + "content": "▁bataclana", + "lstrip": false, + "normalized": true, + "rstrip": false, + "single_word": false, + "special": false + }, + "30543": { + "content": "▁árbi", + "lstrip": false, + "normalized": true, + "rstrip": false, + "single_word": false, + "special": false + }, + "30544": { + "content": "fía", + "lstrip": false, + "normalized": true, + "rstrip": false, + "single_word": false, + "special": false + }, + "30545": { + "content": "▁aji", + "lstrip": false, + "normalized": true, + "rstrip": false, + "single_word": false, + "special": false + }, + "30546": { + "content": "▁roco", + "lstrip": false, + "normalized": true, + "rstrip": false, + "single_word": false, + "special": false + }, + "30547": { + "content": "▁alegr", + "lstrip": false, + "normalized": true, + "rstrip": false, + "single_word": false, + "special": false + }, + "30548": { + "content": "gusto", + "lstrip": false, + "normalized": true, + "rstrip": false, + "single_word": false, + "special": false + }, + "30549": { + "content": "▁fujimorismo", + "lstrip": false, + "normalized": true, + "rstrip": false, + "single_word": false, + "special": false + }, + "30550": { + "content": "▁fujirra", + "lstrip": false, + "normalized": true, + "rstrip": false, + "single_word": false, + "special": false + }, + "30551": { + "content": "▁arequipa", + "lstrip": false, + "normalized": true, + "rstrip": false, + "single_word": false, + "special": false + }, + "30552": { + "content": "huan", + "lstrip": false, + "normalized": true, + "rstrip": false, + "single_word": false, + "special": false + }, + "30553": { + "content": "lva", + "lstrip": false, + "normalized": true, + "rstrip": false, + "single_word": false, + "special": false + }, + "30554": { + "content": "▁warmisitay", + "lstrip": false, + "normalized": true, + "rstrip": false, + "single_word": false, + "special": false + }, + "30555": { + "content": "▁acom", + "lstrip": false, + "normalized": true, + "rstrip": false, + "single_word": false, + "special": false + }, + "30556": { + "content": "brero", + "lstrip": false, + "normalized": true, + "rstrip": false, + "single_word": false, + "special": false + }, + "30557": { + "content": "▁calata", + "lstrip": false, + "normalized": true, + "rstrip": false, + "single_word": false, + "special": false + }, + "30558": { + "content": "ám", + "lstrip": false, + "normalized": true, + "rstrip": false, + "single_word": false, + "special": false + }, + "30559": { + "content": "cim", + "lstrip": false, + "normalized": true, + "rstrip": false, + "single_word": false, + "special": false + }, + "30560": { + "content": "▁*no*", + "lstrip": false, + "normalized": true, + "rstrip": false, + "single_word": false, + "special": false + }, + "30561": { + "content": "▁téc", + "lstrip": false, + "normalized": true, + "rstrip": false, + "single_word": false, + "special": false + }, + "30562": { + "content": "▁fijas", + "lstrip": false, + "normalized": true, + "rstrip": false, + "single_word": false, + "special": false + }, + "30563": { + "content": "▁ampay", + "lstrip": false, + "normalized": true, + "rstrip": false, + "single_word": false, + "special": false + }, + "30564": { + "content": "▁pane", + "lstrip": false, + "normalized": true, + "rstrip": false, + "single_word": false, + "special": false + }, + "30565": { + "content": "ß", + "lstrip": false, + "normalized": true, + "rstrip": false, + "single_word": false, + "special": false + }, + "30566": { + "content": "▁chibol", + "lstrip": false, + "normalized": true, + "rstrip": false, + "single_word": false, + "special": false + }, + "30567": { + "content": "bién", + "lstrip": false, + "normalized": true, + "rstrip": false, + "single_word": false, + "special": false + }, + "30568": { + "content": "▁\"\"no", + "lstrip": false, + "normalized": true, + "rstrip": false, + "single_word": false, + "special": false + }, + "30569": { + "content": "tros,", + "lstrip": false, + "normalized": true, + "rstrip": false, + "single_word": false, + "special": false + }, + "30570": { + "content": "▁senderistas", + "lstrip": false, + "normalized": true, + "rstrip": false, + "single_word": false, + "special": false + }, + "30571": { + "content": "▁cevich", + "lstrip": false, + "normalized": true, + "rstrip": false, + "single_word": false, + "special": false + }, + "30572": { + "content": "▁calato", + "lstrip": false, + "normalized": true, + "rstrip": false, + "single_word": false, + "special": false + }, + "30573": { + "content": "omo", + "lstrip": false, + "normalized": true, + "rstrip": false, + "single_word": false, + "special": false + }, + "30574": { + "content": "▁gritó", + "lstrip": false, + "normalized": true, + "rstrip": false, + "single_word": false, + "special": false + }, + "30575": { + "content": "cioso", + "lstrip": false, + "normalized": true, + "rstrip": false, + "single_word": false, + "special": false + }, + "30576": { + "content": "▁merino", + "lstrip": false, + "normalized": true, + "rstrip": false, + "single_word": false, + "special": false + }, + "30577": { + "content": "▁sendero", + "lstrip": false, + "normalized": true, + "rstrip": false, + "single_word": false, + "special": false + }, + "30578": { + "content": "▁lágrim", + "lstrip": false, + "normalized": true, + "rstrip": false, + "single_word": false, + "special": false + }, + "30579": { + "content": "▁gaston", + "lstrip": false, + "normalized": true, + "rstrip": false, + "single_word": false, + "special": false + }, + "30580": { + "content": "lau", + "lstrip": false, + "normalized": true, + "rstrip": false, + "single_word": false, + "special": false + }, + "30581": { + "content": "▁ceb", + "lstrip": false, + "normalized": true, + "rstrip": false, + "single_word": false, + "special": false + }, + "30582": { + "content": "▁delincuen", + "lstrip": false, + "normalized": true, + "rstrip": false, + "single_word": false, + "special": false + }, + "30583": { + "content": "ź", + "lstrip": false, + "normalized": true, + "rstrip": false, + "single_word": false, + "special": false + }, + "30584": { + "content": "▁caviares", + "lstrip": false, + "normalized": true, + "rstrip": false, + "single_word": false, + "special": false + }, + "30585": { + "content": "▁terc", + "lstrip": false, + "normalized": true, + "rstrip": false, + "single_word": false, + "special": false + }, + "30586": { + "content": "▁chibolos", + "lstrip": false, + "normalized": true, + "rstrip": false, + "single_word": false, + "special": false + }, + "30587": { + "content": "▁destr", + "lstrip": false, + "normalized": true, + "rstrip": false, + "single_word": false, + "special": false + }, + "30588": { + "content": "orge", + "lstrip": false, + "normalized": true, + "rstrip": false, + "single_word": false, + "special": false + }, + "30589": { + "content": "▁soroche", + "lstrip": false, + "normalized": true, + "rstrip": false, + "single_word": false, + "special": false + }, + "30590": { + "content": "inoche", + "lstrip": false, + "normalized": true, + "rstrip": false, + "single_word": false, + "special": false + }, + "30591": { + "content": "óm", + "lstrip": false, + "normalized": true, + "rstrip": false, + "single_word": false, + "special": false + }, + "30592": { + "content": "rim", + "lstrip": false, + "normalized": true, + "rstrip": false, + "single_word": false, + "special": false + }, + "30593": { + "content": "▁montesinos", + "lstrip": false, + "normalized": true, + "rstrip": false, + "single_word": false, + "special": false + }, + "30594": { + "content": "▁taper", + "lstrip": false, + "normalized": true, + "rstrip": false, + "single_word": false, + "special": false + }, + "30595": { + "content": "▁fujimori,", + "lstrip": false, + "normalized": true, + "rstrip": false, + "single_word": false, + "special": false + }, + "30596": { + "content": "▁paj", + "lstrip": false, + "normalized": true, + "rstrip": false, + "single_word": false, + "special": false + }, + "30597": { + "content": "▁enem", + "lstrip": false, + "normalized": true, + "rstrip": false, + "single_word": false, + "special": false + }, + "30598": { + "content": "▁malcriado", + "lstrip": false, + "normalized": true, + "rstrip": false, + "single_word": false, + "special": false + }, + "30599": { + "content": "▁pam", + "lstrip": false, + "normalized": true, + "rstrip": false, + "single_word": false, + "special": false + }, + "30600": { + "content": "▁víc", + "lstrip": false, + "normalized": true, + "rstrip": false, + "single_word": false, + "special": false + }, + "30601": { + "content": "⃟", + "lstrip": false, + "normalized": true, + "rstrip": false, + "single_word": false, + "special": false + }, + "30602": { + "content": "▁betofilo", + "lstrip": false, + "normalized": true, + "rstrip": false, + "single_word": false, + "special": false + }, + "30603": { + "content": "▁carame", + "lstrip": false, + "normalized": true, + "rstrip": false, + "single_word": false, + "special": false + }, + "30604": { + "content": "▁chévere", + "lstrip": false, + "normalized": true, + "rstrip": false, + "single_word": false, + "special": false + }, + "30605": { + "content": "▁\"\"\"", + "lstrip": false, + "normalized": true, + "rstrip": false, + "single_word": false, + "special": false + }, + "30606": { + "content": "▁pavos", + "lstrip": false, + "normalized": true, + "rstrip": false, + "single_word": false, + "special": false + }, + "30607": { + "content": "entable", + "lstrip": false, + "normalized": true, + "rstrip": false, + "single_word": false, + "special": false + }, + "30608": { + "content": "▁fregado", + "lstrip": false, + "normalized": true, + "rstrip": false, + "single_word": false, + "special": false + }, + "30609": { + "content": "▁rosque", + "lstrip": false, + "normalized": true, + "rstrip": false, + "single_word": false, + "special": false + }, + "30610": { + "content": "▁atorrante", + "lstrip": false, + "normalized": true, + "rstrip": false, + "single_word": false, + "special": false + }, + "30611": { + "content": "▁mondongo", + "lstrip": false, + "normalized": true, + "rstrip": false, + "single_word": false, + "special": false + }, + "30612": { + "content": "▁poller", + "lstrip": false, + "normalized": true, + "rstrip": false, + "single_word": false, + "special": false + }, + "30613": { + "content": "▁pepa", + "lstrip": false, + "normalized": true, + "rstrip": false, + "single_word": false, + "special": false + }, + "30614": { + "content": "ectacu", + "lstrip": false, + "normalized": true, + "rstrip": false, + "single_word": false, + "special": false + }, + "30615": { + "content": "anga", + "lstrip": false, + "normalized": true, + "rstrip": false, + "single_word": false, + "special": false + }, + "30616": { + "content": "trado", + "lstrip": false, + "normalized": true, + "rstrip": false, + "single_word": false, + "special": false + }, + "30617": { + "content": "▁cusco", + "lstrip": false, + "normalized": true, + "rstrip": false, + "single_word": false, + "special": false + }, + "30618": { + "content": "▁anteri", + "lstrip": false, + "normalized": true, + "rstrip": false, + "single_word": false, + "special": false + }, + "30619": { + "content": "▁machete", + "lstrip": false, + "normalized": true, + "rstrip": false, + "single_word": false, + "special": false + }, + "30620": { + "content": "▁mermelero", + "lstrip": false, + "normalized": true, + "rstrip": false, + "single_word": false, + "special": false } }, "bos_token": "", @@ -46,9 +5014,16 @@ "cls_token": "", "eos_token": "", "mask_token": "", + "max_length": 128, "model_max_length": 1000000000000000019884624838656, + "pad_to_multiple_of": null, "pad_token": "", + "pad_token_type_id": 0, + "padding_side": "right", "sep_token": "", + "stride": 0, "tokenizer_class": "PreTrainedTokenizerFast", + "truncation_side": "right", + "truncation_strategy": "longest_first", "unk_token": "" }