m2m_translation_v / tokenizer_config.json
Ragab167's picture
Upload tokenizer
9d085e1 verified
{
"added_tokens_decoder": {
"0": {
"content": "<s>",
"lstrip": false,
"normalized": false,
"rstrip": false,
"single_word": false,
"special": true
},
"1": {
"content": "<pad>",
"lstrip": false,
"normalized": false,
"rstrip": false,
"single_word": false,
"special": true
},
"2": {
"content": "</s>",
"lstrip": false,
"normalized": false,
"rstrip": false,
"single_word": false,
"special": true
},
"3": {
"content": "<unk>",
"lstrip": false,
"normalized": false,
"rstrip": false,
"single_word": false,
"special": true
},
"128004": {
"content": "__af__",
"lstrip": false,
"normalized": false,
"rstrip": false,
"single_word": false,
"special": true
},
"128005": {
"content": "__am__",
"lstrip": false,
"normalized": false,
"rstrip": false,
"single_word": false,
"special": true
},
"128006": {
"content": "__ar__",
"lstrip": false,
"normalized": false,
"rstrip": false,
"single_word": false,
"special": true
},
"128007": {
"content": "__ast__",
"lstrip": false,
"normalized": false,
"rstrip": false,
"single_word": false,
"special": true
},
"128008": {
"content": "__az__",
"lstrip": false,
"normalized": false,
"rstrip": false,
"single_word": false,
"special": true
},
"128009": {
"content": "__ba__",
"lstrip": false,
"normalized": false,
"rstrip": false,
"single_word": false,
"special": true
},
"128010": {
"content": "__be__",
"lstrip": false,
"normalized": false,
"rstrip": false,
"single_word": false,
"special": true
},
"128011": {
"content": "__bg__",
"lstrip": false,
"normalized": false,
"rstrip": false,
"single_word": false,
"special": true
},
"128012": {
"content": "__bn__",
"lstrip": false,
"normalized": false,
"rstrip": false,
"single_word": false,
"special": true
},
"128013": {
"content": "__br__",
"lstrip": false,
"normalized": false,
"rstrip": false,
"single_word": false,
"special": true
},
"128014": {
"content": "__bs__",
"lstrip": false,
"normalized": false,
"rstrip": false,
"single_word": false,
"special": true
},
"128015": {
"content": "__ca__",
"lstrip": false,
"normalized": false,
"rstrip": false,
"single_word": false,
"special": true
},
"128016": {
"content": "__ceb__",
"lstrip": false,
"normalized": false,
"rstrip": false,
"single_word": false,
"special": true
},
"128017": {
"content": "__cs__",
"lstrip": false,
"normalized": false,
"rstrip": false,
"single_word": false,
"special": true
},
"128018": {
"content": "__cy__",
"lstrip": false,
"normalized": false,
"rstrip": false,
"single_word": false,
"special": true
},
"128019": {
"content": "__da__",
"lstrip": false,
"normalized": false,
"rstrip": false,
"single_word": false,
"special": true
},
"128020": {
"content": "__de__",
"lstrip": false,
"normalized": false,
"rstrip": false,
"single_word": false,
"special": true
},
"128021": {
"content": "__el__",
"lstrip": false,
"normalized": false,
"rstrip": false,
"single_word": false,
"special": true
},
"128022": {
"content": "__en__",
"lstrip": false,
"normalized": false,
"rstrip": false,
"single_word": false,
"special": true
},
"128023": {
"content": "__es__",
"lstrip": false,
"normalized": false,
"rstrip": false,
"single_word": false,
"special": true
},
"128024": {
"content": "__et__",
"lstrip": false,
"normalized": false,
"rstrip": false,
"single_word": false,
"special": true
},
"128025": {
"content": "__fa__",
"lstrip": false,
"normalized": false,
"rstrip": false,
"single_word": false,
"special": true
},
"128026": {
"content": "__ff__",
"lstrip": false,
"normalized": false,
"rstrip": false,
"single_word": false,
"special": true
},
"128027": {
"content": "__fi__",
"lstrip": false,
"normalized": false,
"rstrip": false,
"single_word": false,
"special": true
},
"128028": {
"content": "__fr__",
"lstrip": false,
"normalized": false,
"rstrip": false,
"single_word": false,
"special": true
},
"128029": {
"content": "__fy__",
"lstrip": false,
"normalized": false,
"rstrip": false,
"single_word": false,
"special": true
},
"128030": {
"content": "__ga__",
"lstrip": false,
"normalized": false,
"rstrip": false,
"single_word": false,
"special": true
},
"128031": {
"content": "__gd__",
"lstrip": false,
"normalized": false,
"rstrip": false,
"single_word": false,
"special": true
},
"128032": {
"content": "__gl__",
"lstrip": false,
"normalized": false,
"rstrip": false,
"single_word": false,
"special": true
},
"128033": {
"content": "__gu__",
"lstrip": false,
"normalized": false,
"rstrip": false,
"single_word": false,
"special": true
},
"128034": {
"content": "__ha__",
"lstrip": false,
"normalized": false,
"rstrip": false,
"single_word": false,
"special": true
},
"128035": {
"content": "__he__",
"lstrip": false,
"normalized": false,
"rstrip": false,
"single_word": false,
"special": true
},
"128036": {
"content": "__hi__",
"lstrip": false,
"normalized": false,
"rstrip": false,
"single_word": false,
"special": true
},
"128037": {
"content": "__hr__",
"lstrip": false,
"normalized": false,
"rstrip": false,
"single_word": false,
"special": true
},
"128038": {
"content": "__ht__",
"lstrip": false,
"normalized": false,
"rstrip": false,
"single_word": false,
"special": true
},
"128039": {
"content": "__hu__",
"lstrip": false,
"normalized": false,
"rstrip": false,
"single_word": false,
"special": true
},
"128040": {
"content": "__hy__",
"lstrip": false,
"normalized": false,
"rstrip": false,
"single_word": false,
"special": true
},
"128041": {
"content": "__id__",
"lstrip": false,
"normalized": false,
"rstrip": false,
"single_word": false,
"special": true
},
"128042": {
"content": "__ig__",
"lstrip": false,
"normalized": false,
"rstrip": false,
"single_word": false,
"special": true
},
"128043": {
"content": "__ilo__",
"lstrip": false,
"normalized": false,
"rstrip": false,
"single_word": false,
"special": true
},
"128044": {
"content": "__is__",
"lstrip": false,
"normalized": false,
"rstrip": false,
"single_word": false,
"special": true
},
"128045": {
"content": "__it__",
"lstrip": false,
"normalized": false,
"rstrip": false,
"single_word": false,
"special": true
},
"128046": {
"content": "__ja__",
"lstrip": false,
"normalized": false,
"rstrip": false,
"single_word": false,
"special": true
},
"128047": {
"content": "__jv__",
"lstrip": false,
"normalized": false,
"rstrip": false,
"single_word": false,
"special": true
},
"128048": {
"content": "__ka__",
"lstrip": false,
"normalized": false,
"rstrip": false,
"single_word": false,
"special": true
},
"128049": {
"content": "__kk__",
"lstrip": false,
"normalized": false,
"rstrip": false,
"single_word": false,
"special": true
},
"128050": {
"content": "__km__",
"lstrip": false,
"normalized": false,
"rstrip": false,
"single_word": false,
"special": true
},
"128051": {
"content": "__kn__",
"lstrip": false,
"normalized": false,
"rstrip": false,
"single_word": false,
"special": true
},
"128052": {
"content": "__ko__",
"lstrip": false,
"normalized": false,
"rstrip": false,
"single_word": false,
"special": true
},
"128053": {
"content": "__lb__",
"lstrip": false,
"normalized": false,
"rstrip": false,
"single_word": false,
"special": true
},
"128054": {
"content": "__lg__",
"lstrip": false,
"normalized": false,
"rstrip": false,
"single_word": false,
"special": true
},
"128055": {
"content": "__ln__",
"lstrip": false,
"normalized": false,
"rstrip": false,
"single_word": false,
"special": true
},
"128056": {
"content": "__lo__",
"lstrip": false,
"normalized": false,
"rstrip": false,
"single_word": false,
"special": true
},
"128057": {
"content": "__lt__",
"lstrip": false,
"normalized": false,
"rstrip": false,
"single_word": false,
"special": true
},
"128058": {
"content": "__lv__",
"lstrip": false,
"normalized": false,
"rstrip": false,
"single_word": false,
"special": true
},
"128059": {
"content": "__mg__",
"lstrip": false,
"normalized": false,
"rstrip": false,
"single_word": false,
"special": true
},
"128060": {
"content": "__mk__",
"lstrip": false,
"normalized": false,
"rstrip": false,
"single_word": false,
"special": true
},
"128061": {
"content": "__ml__",
"lstrip": false,
"normalized": false,
"rstrip": false,
"single_word": false,
"special": true
},
"128062": {
"content": "__mn__",
"lstrip": false,
"normalized": false,
"rstrip": false,
"single_word": false,
"special": true
},
"128063": {
"content": "__mr__",
"lstrip": false,
"normalized": false,
"rstrip": false,
"single_word": false,
"special": true
},
"128064": {
"content": "__ms__",
"lstrip": false,
"normalized": false,
"rstrip": false,
"single_word": false,
"special": true
},
"128065": {
"content": "__my__",
"lstrip": false,
"normalized": false,
"rstrip": false,
"single_word": false,
"special": true
},
"128066": {
"content": "__ne__",
"lstrip": false,
"normalized": false,
"rstrip": false,
"single_word": false,
"special": true
},
"128067": {
"content": "__nl__",
"lstrip": false,
"normalized": false,
"rstrip": false,
"single_word": false,
"special": true
},
"128068": {
"content": "__no__",
"lstrip": false,
"normalized": false,
"rstrip": false,
"single_word": false,
"special": true
},
"128069": {
"content": "__ns__",
"lstrip": false,
"normalized": false,
"rstrip": false,
"single_word": false,
"special": true
},
"128070": {
"content": "__oc__",
"lstrip": false,
"normalized": false,
"rstrip": false,
"single_word": false,
"special": true
},
"128071": {
"content": "__or__",
"lstrip": false,
"normalized": false,
"rstrip": false,
"single_word": false,
"special": true
},
"128072": {
"content": "__pa__",
"lstrip": false,
"normalized": false,
"rstrip": false,
"single_word": false,
"special": true
},
"128073": {
"content": "__pl__",
"lstrip": false,
"normalized": false,
"rstrip": false,
"single_word": false,
"special": true
},
"128074": {
"content": "__ps__",
"lstrip": false,
"normalized": false,
"rstrip": false,
"single_word": false,
"special": true
},
"128075": {
"content": "__pt__",
"lstrip": false,
"normalized": false,
"rstrip": false,
"single_word": false,
"special": true
},
"128076": {
"content": "__ro__",
"lstrip": false,
"normalized": false,
"rstrip": false,
"single_word": false,
"special": true
},
"128077": {
"content": "__ru__",
"lstrip": false,
"normalized": false,
"rstrip": false,
"single_word": false,
"special": true
},
"128078": {
"content": "__sd__",
"lstrip": false,
"normalized": false,
"rstrip": false,
"single_word": false,
"special": true
},
"128079": {
"content": "__si__",
"lstrip": false,
"normalized": false,
"rstrip": false,
"single_word": false,
"special": true
},
"128080": {
"content": "__sk__",
"lstrip": false,
"normalized": false,
"rstrip": false,
"single_word": false,
"special": true
},
"128081": {
"content": "__sl__",
"lstrip": false,
"normalized": false,
"rstrip": false,
"single_word": false,
"special": true
},
"128082": {
"content": "__so__",
"lstrip": false,
"normalized": false,
"rstrip": false,
"single_word": false,
"special": true
},
"128083": {
"content": "__sq__",
"lstrip": false,
"normalized": false,
"rstrip": false,
"single_word": false,
"special": true
},
"128084": {
"content": "__sr__",
"lstrip": false,
"normalized": false,
"rstrip": false,
"single_word": false,
"special": true
},
"128085": {
"content": "__ss__",
"lstrip": false,
"normalized": false,
"rstrip": false,
"single_word": false,
"special": true
},
"128086": {
"content": "__su__",
"lstrip": false,
"normalized": false,
"rstrip": false,
"single_word": false,
"special": true
},
"128087": {
"content": "__sv__",
"lstrip": false,
"normalized": false,
"rstrip": false,
"single_word": false,
"special": true
},
"128088": {
"content": "__sw__",
"lstrip": false,
"normalized": false,
"rstrip": false,
"single_word": false,
"special": true
},
"128089": {
"content": "__ta__",
"lstrip": false,
"normalized": false,
"rstrip": false,
"single_word": false,
"special": true
},
"128090": {
"content": "__th__",
"lstrip": false,
"normalized": false,
"rstrip": false,
"single_word": false,
"special": true
},
"128091": {
"content": "__tl__",
"lstrip": false,
"normalized": false,
"rstrip": false,
"single_word": false,
"special": true
},
"128092": {
"content": "__tn__",
"lstrip": false,
"normalized": false,
"rstrip": false,
"single_word": false,
"special": true
},
"128093": {
"content": "__tr__",
"lstrip": false,
"normalized": false,
"rstrip": false,
"single_word": false,
"special": true
},
"128094": {
"content": "__uk__",
"lstrip": false,
"normalized": false,
"rstrip": false,
"single_word": false,
"special": true
},
"128095": {
"content": "__ur__",
"lstrip": false,
"normalized": false,
"rstrip": false,
"single_word": false,
"special": true
},
"128096": {
"content": "__uz__",
"lstrip": false,
"normalized": false,
"rstrip": false,
"single_word": false,
"special": true
},
"128097": {
"content": "__vi__",
"lstrip": false,
"normalized": false,
"rstrip": false,
"single_word": false,
"special": true
},
"128098": {
"content": "__wo__",
"lstrip": false,
"normalized": false,
"rstrip": false,
"single_word": false,
"special": true
},
"128099": {
"content": "__xh__",
"lstrip": false,
"normalized": false,
"rstrip": false,
"single_word": false,
"special": true
},
"128100": {
"content": "__yi__",
"lstrip": false,
"normalized": false,
"rstrip": false,
"single_word": false,
"special": true
},
"128101": {
"content": "__yo__",
"lstrip": false,
"normalized": false,
"rstrip": false,
"single_word": false,
"special": true
},
"128102": {
"content": "__zh__",
"lstrip": false,
"normalized": false,
"rstrip": false,
"single_word": false,
"special": true
},
"128103": {
"content": "__zu__",
"lstrip": false,
"normalized": false,
"rstrip": false,
"single_word": false,
"special": true
}
},
"additional_special_tokens": [
"__af__",
"__am__",
"__ar__",
"__ast__",
"__az__",
"__ba__",
"__be__",
"__bg__",
"__bn__",
"__br__",
"__bs__",
"__ca__",
"__ceb__",
"__cs__",
"__cy__",
"__da__",
"__de__",
"__el__",
"__en__",
"__es__",
"__et__",
"__fa__",
"__ff__",
"__fi__",
"__fr__",
"__fy__",
"__ga__",
"__gd__",
"__gl__",
"__gu__",
"__ha__",
"__he__",
"__hi__",
"__hr__",
"__ht__",
"__hu__",
"__hy__",
"__id__",
"__ig__",
"__ilo__",
"__is__",
"__it__",
"__ja__",
"__jv__",
"__ka__",
"__kk__",
"__km__",
"__kn__",
"__ko__",
"__lb__",
"__lg__",
"__ln__",
"__lo__",
"__lt__",
"__lv__",
"__mg__",
"__mk__",
"__ml__",
"__mn__",
"__mr__",
"__ms__",
"__my__",
"__ne__",
"__nl__",
"__no__",
"__ns__",
"__oc__",
"__or__",
"__pa__",
"__pl__",
"__ps__",
"__pt__",
"__ro__",
"__ru__",
"__sd__",
"__si__",
"__sk__",
"__sl__",
"__so__",
"__sq__",
"__sr__",
"__ss__",
"__su__",
"__sv__",
"__sw__",
"__ta__",
"__th__",
"__tl__",
"__tn__",
"__tr__",
"__uk__",
"__ur__",
"__uz__",
"__vi__",
"__wo__",
"__xh__",
"__yi__",
"__yo__",
"__zh__",
"__zu__"
],
"bos_token": "<s>",
"clean_up_tokenization_spaces": true,
"eos_token": "</s>",
"language_codes": "m2m100",
"model_max_length": 1024,
"num_madeup_words": 8,
"pad_token": "<pad>",
"sep_token": "</s>",
"sp_model_kwargs": {},
"src_lang": "ar",
"tgt_lang": "en",
"tokenizer_class": "M2M100Tokenizer",
"unk_token": "<unk>"
}