|
{ |
|
"version": "1.0", |
|
"truncation": null, |
|
"padding": null, |
|
"added_tokens": [ |
|
{ |
|
"id": 0, |
|
"content": "<unk>", |
|
"single_word": false, |
|
"lstrip": false, |
|
"rstrip": false, |
|
"normalized": false, |
|
"special": true |
|
}, |
|
{ |
|
"id": 1, |
|
"content": "<bos>", |
|
"single_word": false, |
|
"lstrip": false, |
|
"rstrip": false, |
|
"normalized": false, |
|
"special": true |
|
}, |
|
{ |
|
"id": 2, |
|
"content": "</eos>", |
|
"single_word": false, |
|
"lstrip": false, |
|
"rstrip": false, |
|
"normalized": false, |
|
"special": true |
|
}, |
|
{ |
|
"id": 3, |
|
"content": "<mask>", |
|
"single_word": false, |
|
"lstrip": false, |
|
"rstrip": false, |
|
"normalized": false, |
|
"special": true |
|
} |
|
], |
|
"normalizer": null, |
|
"pre_tokenizer": null, |
|
"post_processor": { |
|
"type": "TemplateProcessing", |
|
"single": [ |
|
{ |
|
"SpecialToken": { |
|
"id": "<bos>", |
|
"type_id": 0 |
|
} |
|
}, |
|
{ |
|
"Sequence": { |
|
"id": "A", |
|
"type_id": 0 |
|
} |
|
}, |
|
{ |
|
"SpecialToken": { |
|
"id": "<eos>", |
|
"type_id": 0 |
|
} |
|
} |
|
], |
|
"pair": [ |
|
{ |
|
"Sequence": { |
|
"id": "A", |
|
"type_id": 0 |
|
} |
|
}, |
|
{ |
|
"Sequence": { |
|
"id": "B", |
|
"type_id": 1 |
|
} |
|
} |
|
], |
|
"special_tokens": { |
|
"<bos>": { |
|
"id": "<bos>", |
|
"ids": [ |
|
1 |
|
], |
|
"tokens": [ |
|
"<bos>" |
|
] |
|
}, |
|
"<eos>": { |
|
"id": "<eos>", |
|
"ids": [ |
|
2 |
|
], |
|
"tokens": [ |
|
"<eos>" |
|
] |
|
} |
|
} |
|
}, |
|
"decoder": null, |
|
"model": { |
|
"type": "BPE", |
|
"dropout": null, |
|
"unk_token": "<unk>", |
|
"continuing_subword_prefix": null, |
|
"end_of_word_suffix": null, |
|
"fuse_unk": false, |
|
"byte_fallback": false, |
|
"ignore_merges": false, |
|
"vocab": { |
|
"<unk>": 0, |
|
"<bos>": 1, |
|
"</eos>": 2, |
|
"<mask>": 3, |
|
" ": 4, |
|
|
|
"ᵝ": 4, |
|
|
|
"!": 5, |
|
"\"": 6, |
|
"(": 7, |
|
"{": 7, |
|
"[": 7, |
|
")": 8, |
|
"}": 8, |
|
"]": 8, |
|
",": 9, |
|
"-": 10, |
|
".": 11, |
|
"1": 12, |
|
":": 13, |
|
";": 14, |
|
"?": 15, |
|
"a": 16, |
|
"ä": 16, |
|
"ɒ": 16, |
|
"b": 17, |
|
"c": 18, |
|
"d": 19, |
|
"e": 20, |
|
"f": 21, |
|
"h": 22, |
|
"i": 23, |
|
"ĩ": 23, |
|
"j": 24, |
|
"k": 25, |
|
"l": 26, |
|
"m": 27, |
|
"n": 28, |
|
"ɴ": 28, |
|
"ɲ": 28, |
|
"o": 29, |
|
"̞": 29, |
|
"p": 30, |
|
"ɸ": 30, |
|
"q": 31, |
|
"r": 32, |
|
"ɽ": 32, |
|
"ʁ": 32, |
|
"s": 33, |
|
"t": 34, |
|
"u": 35, |
|
"ø": 35, |
|
"œ": 35, |
|
"y": 35, |
|
"ɣ": 35, |
|
"ũ": 35, |
|
"v": 36, |
|
"w": 37, |
|
"ʍ": 37, |
|
"x": 38, |
|
"z": 39, |
|
"¡": 40, |
|
"«": 41, |
|
"»": 42, |
|
"¿": 43, |
|
"æ": 44, |
|
"ç": 45, |
|
"ð": 46, |
|
"ŋ": 47, |
|
"ɐ": 48, |
|
"ɑ": 49, |
|
"ɔ": 50, |
|
"ɕ": 51, |
|
"ə": 52, |
|
"ɚ": 53, |
|
"ɛ": 54, |
|
"ɜ": 55, |
|
"ɟ": 56, |
|
"ɡ": 57, |
|
"ɪ": 58, |
|
"ɬ": 59, |
|
"ɯ": 60, |
|
"ɹ": 61, |
|
"ɾ": 62, |
|
"ʃ": 63, |
|
"ʈ": 64, |
|
"ʊ": 65, |
|
"ʋ": 66, |
|
"ʌ": 67, |
|
"ʑ": 68, |
|
"ʒ": 69, |
|
"ʔ": 70, |
|
"ʲ": 71, |
|
"ˈ": 72, |
|
"ˌ": 73, |
|
"ˌ": 73, |
|
"ː": 74, |
|
"̃": 75, |
|
"̩": 76, |
|
"θ": 77, |
|
"ᵻ": 78, |
|
"—": 79, |
|
"“": 80, |
|
"”": 81, |
|
"…": 82, |
|
"ˈɛ": 83, |
|
"iː": 84, |
|
"aɪ": 85, |
|
"nd": 86, |
|
"ˈɪ": 87, |
|
"eɪ": 88, |
|
"ˈæ": 89, |
|
"ðə": 90, |
|
"oʊ": 91, |
|
"ɑː": 92, |
|
"ˈeɪ": 93, |
|
"ən": 94, |
|
"uː": 95, |
|
"ˈʌ": 96, |
|
"ˈaɪ": 97, |
|
"st": 98, |
|
"ˈɔ": 99, |
|
"ˈoʊ": 100, |
|
"ˈiː": 101, |
|
"ˈɑː": 102, |
|
"ænd": 103, |
|
"ːɹ": 104, |
|
"ɪŋ": 105, |
|
"ɜː": 106, |
|
"ɪn": 107, |
|
"tə": 108, |
|
"ʌv": 109, |
|
"aʊ": 110, |
|
"əl": 111, |
|
"ˈuː": 112, |
|
"tʃ": 113, |
|
"ɪz": 114, |
|
"ˈɜː": 115, |
|
"ˌʌ": 116, |
|
"æt": 117, |
|
"dʒ": 118, |
|
"ˈɔː": 119, |
|
"ɪt": 120, |
|
"ˈaʊ": 121, |
|
"ɚɹ": 122, |
|
"ˈɛn": 123, |
|
"wʌ": 124, |
|
"li": 125, |
|
"hiː": 126, |
|
"ˌɛ": 127, |
|
"wɪ": 128, |
|
"wʌz": 129, |
|
"ðæt": 130, |
|
"juː": 131, |
|
"oːɹ": 132, |
|
"ðɪ": 133, |
|
"sˈɛ": 134, |
|
"ˌɪ": 135, |
|
"ˈɑːɹ": 136, |
|
"nt": 137, |
|
"ˈʊ": 138, |
|
"ənt": 139, |
|
"hɪz": 140, |
|
"ˌɑː": 141, |
|
"hæ": 142, |
|
"ɔːɹ": 143, |
|
"ˈɛɹ": 144, |
|
"wɪð": 145, |
|
"ᵻd": 146, |
|
"ˈoːɹ": 147, |
|
"pɹ": 148, |
|
"ˈɔːl": 149, |
|
"mˌ": 150, |
|
"ʃən": 151, |
|
"kt": 152, |
|
"ˌoʊ": 153, |
|
"ˈɔːɹ": 154, |
|
"fɹ": 155, |
|
"æz": 156, |
|
"ˌʌt": 157, |
|
"ʃiː": 158, |
|
"ˈɛl": 159, |
|
"ˌaʊ": 160, |
|
"ˈʌn": 161, |
|
"əs": 162, |
|
"hɜː": 163, |
|
"lˈaɪ": 164, |
|
"ˈæn": 165, |
|
"ˈɪɹ": 166, |
|
"ʊd": 167, |
|
"ɹᵻ": 168, |
|
"ld": 169, |
|
"bˌʌt": 170, |
|
"ks": 171, |
|
"nˈoʊ": 172, |
|
"hæd": 173, |
|
"ɾɚ": 174, |
|
"ɛɹ": 175, |
|
"ˈɪŋ": 176, |
|
"ɡɹ": 177, |
|
"nˌɑː": 178, |
|
"ɔn": 179, |
|
"vɚ": 180, |
|
"maɪ": 181, |
|
"fɔːɹ": 182, |
|
"ðɚ": 183, |
|
"tʊ": 184, |
|
"ðɛɹ": 185, |
|
"nˌɑːt": 186, |
|
"ˈʌm": 187, |
|
"tɹ": 188, |
|
"sˈiː": 189, |
|
"ʌvðə": 190, |
|
"mˈɪ": 191, |
|
"hˈæ": 192, |
|
"ˌɪm": 193, |
|
"lˈeɪ": 194, |
|
"ɪk": 195, |
|
"sp": 196, |
|
"hˌɪm": 197, |
|
"ɐn": 198, |
|
"ðeɪ": 199, |
|
"lˈɪ": 200, |
|
"ɾi": 201, |
|
"lˈɛ": 202, |
|
"bɹ": 203, |
|
"kɹ": 204, |
|
"lˈæ": 205, |
|
"ˈɪl": 206, |
|
"jˈuː": 207, |
|
"ʌm": 208, |
|
"mˌiː": 209, |
|
"bᵻ": 210, |
|
"wˈʌn": 211, |
|
"ˌɪn": 212, |
|
"ˈɪn": 213, |
|
"ˈoʊn": 214, |
|
"sˈɛd": 215, |
|
"biː": 216, |
|
"ˈɛd": 217, |
|
"ˈaɪt": 218, |
|
"baɪ": 219, |
|
"fɹʌm": 220, |
|
"ɪs": 221, |
|
"ɚz": 222, |
|
"ðɪs": 223, |
|
"əns": 224, |
|
"bəl": 225, |
|
"ɪf": 226, |
|
"ɪnðə": 227, |
|
"əm": 228, |
|
"ᵻz": 229, |
|
"ˌuː": 230, |
|
"wˈeɪ": 231, |
|
"ft": 232, |
|
"wiː": 233, |
|
"stɹ": 234, |
|
"lˈiː": 235, |
|
"iːz": 236, |
|
"pt": 237, |
|
"jʊ": 238, |
|
"ɚd": 239, |
|
"ˌaɪ": 240, |
|
"kw": 241, |
|
"ˌɔn": 242, |
|
"ˈaɪd": 243, |
|
"ɪm": 244, |
|
"ˈʌst": 245, |
|
"ˈoʊld": 246, |
|
"ts": 247, |
|
"ˌɪtʃ": 248, |
|
"sˌoʊ": 249, |
|
"dˈɪ": 250, |
|
"ɑːɹ": 251, |
|
"hɐ": 252, |
|
"sˈeɪ": 253, |
|
"ɾᵻd": 254, |
|
"wˌɪtʃ": 255 |
|
}, |
|
"merges": [ |
|
"ˈ ɛ", |
|
"i ː", |
|
"a ɪ", |
|
"n d", |
|
"ˈ ɪ", |
|
"e ɪ", |
|
"ˈ æ", |
|
"ð ə", |
|
"o ʊ", |
|
"ɑ ː", |
|
"ˈ eɪ", |
|
"ə n", |
|
"u ː", |
|
"ˈ ʌ", |
|
"ˈ aɪ", |
|
"s t", |
|
"ˈ ɔ", |
|
"ˈ oʊ", |
|
"ˈ iː", |
|
"ˈ ɑː", |
|
"æ nd", |
|
"ː ɹ", |
|
"ɪ ŋ", |
|
"ɜ ː", |
|
"ɪ n", |
|
"t ə", |
|
"ʌ v", |
|
"a ʊ", |
|
"ə l", |
|
"ˈ uː", |
|
"t ʃ", |
|
"ɪ z", |
|
"ˈ ɜː", |
|
"ˌ ʌ", |
|
"æ t", |
|
"d ʒ", |
|
"ˈɔ ː", |
|
"ɪ t", |
|
"ˈ aʊ", |
|
"ɚ ɹ", |
|
"ˈɛ n", |
|
"w ʌ", |
|
"l i", |
|
"h iː", |
|
"ˌ ɛ", |
|
"w ɪ", |
|
"wʌ z", |
|
"ð æt", |
|
"j uː", |
|
"o ːɹ", |
|
"ð ɪ", |
|
"s ˈɛ", |
|
"ˌ ɪ", |
|
"ˈɑː ɹ", |
|
"n t", |
|
"ˈ ʊ", |
|
"ən t", |
|
"h ɪz", |
|
"ˌ ɑː", |
|
"h æ", |
|
"ɔ ːɹ", |
|
"ˈɛ ɹ", |
|
"wɪ ð", |
|
"ᵻ d", |
|
"ˈ oːɹ", |
|
"p ɹ", |
|
"ˈɔː l", |
|
"m ˌ", |
|
"ʃ ən", |
|
"k t", |
|
"ˌ oʊ", |
|
"ˈɔ ːɹ", |
|
"f ɹ", |
|
"æ z", |
|
"ˌʌ t", |
|
"ʃ iː", |
|
"ˈɛ l", |
|
"ˌ aʊ", |
|
"ˈʌ n", |
|
"ə s", |
|
"h ɜː", |
|
"l ˈaɪ", |
|
"ˈæ n", |
|
"ˈɪ ɹ", |
|
"ʊ d", |
|
"ɹ ᵻ", |
|
"l d", |
|
"b ˌʌt", |
|
"k s", |
|
"n ˈoʊ", |
|
"hæ d", |
|
"ɾ ɚ", |
|
"ɛ ɹ", |
|
"ˈɪ ŋ", |
|
"ɡ ɹ", |
|
"n ˌɑː", |
|
"ɔ n", |
|
"v ɚ", |
|
"m aɪ", |
|
"f ɔːɹ", |
|
"ð ɚ", |
|
"t ʊ", |
|
"ð ɛɹ", |
|
"nˌɑː t", |
|
"ˈʌ m", |
|
"t ɹ", |
|
"s ˈiː", |
|
"ʌv ðə", |
|
"m ˈɪ", |
|
"h ˈæ", |
|
"ˌɪ m", |
|
"l ˈeɪ", |
|
"ɪ k", |
|
"s p", |
|
"h ˌɪm", |
|
"ɐ n", |
|
"ð eɪ", |
|
"l ˈɪ", |
|
"ɾ i", |
|
"l ˈɛ", |
|
"b ɹ", |
|
"k ɹ", |
|
"l ˈæ", |
|
"ˈɪ l", |
|
"j ˈuː", |
|
"ʌ m", |
|
"mˌ iː", |
|
"b ᵻ", |
|
"w ˈʌn", |
|
"ˌ ɪn", |
|
"ˈɪ n", |
|
"ˈoʊ n", |
|
"sˈɛ d", |
|
"b iː", |
|
"ˈɛ d", |
|
"ˈaɪ t", |
|
"b aɪ", |
|
"fɹ ʌm", |
|
"ɪ s", |
|
"ɚ z", |
|
"ðɪ s", |
|
"ən s", |
|
"b əl", |
|
"ɪ f", |
|
"ɪn ðə", |
|
"ə m", |
|
"ᵻ z", |
|
"ˌ uː", |
|
"w ˈeɪ", |
|
"f t", |
|
"w iː", |
|
"st ɹ", |
|
"l ˈiː", |
|
"iː z", |
|
"p t", |
|
"j ʊ", |
|
"ɚ d", |
|
"ˌ aɪ", |
|
"k w", |
|
"ˌ ɔn", |
|
"ˈaɪ d", |
|
"ɪ m", |
|
"ˈʌ st", |
|
"ˈoʊ ld", |
|
"t s", |
|
"ˌɪ tʃ", |
|
"s ˌoʊ", |
|
"d ˈɪ", |
|
"ɑː ɹ", |
|
"h ɐ", |
|
"s ˈeɪ", |
|
"ɾ ᵻd", |
|
"w ˌɪtʃ" |
|
] |
|
} |
|
} |