tamasheq-99 / vocab.json
ad019el's picture
Upload tokenizer
b28b38c
raw
history blame
1.13 kB
{
":": 59,
"J": 76,
"[PAD]": 92,
"[UNK]": 91,
"a": 21,
"b": 86,
"c": 53,
"d": 8,
"e": 29,
"f": 41,
"g": 11,
"h": 54,
"i": 7,
"j": 25,
"k": 60,
"l": 75,
"m": 89,
"n": 0,
"o": 67,
"p": 3,
"q": 47,
"r": 1,
"s": 69,
"t": 46,
"u": 44,
"v": 56,
"w": 82,
"y": 35,
"z": 43,
"|": 64,
"°": 40,
"à": 84,
"ä": 83,
"å": 23,
"è": 18,
"ì": 37,
"ò": 78,
"ö": 17,
"ù": 85,
"ą": 22,
"č": 80,
"ė": 28,
"ę": 24,
"į": 72,
"š": 65,
"ū": 61,
"ų": 45,
"ž": 73,
"அ": 42,
"ஆ": 66,
"இ": 62,
"உ": 70,
"ஊ": 16,
"எ": 63,
"ஏ": 51,
"ஐ": 10,
"ஒ": 32,
"ஓ": 49,
"க": 48,
"ங": 26,
"ச": 87,
"ஜ": 88,
"ஞ": 79,
"ட": 31,
"ண": 74,
"த": 20,
"ந": 36,
"ன": 34,
"ப": 9,
"ம": 6,
"ய": 15,
"ர": 52,
"ற": 57,
"ல": 55,
"ள": 12,
"ழ": 5,
"வ": 50,
"ஷ": 38,
"ஸ": 77,
"ா": 58,
"ி": 14,
"ீ": 30,
"ு": 4,
"ூ": 19,
"ெ": 71,
"ே": 33,
"ை": 81,
"ொ": 90,
"ோ": 13,
"்": 27,
"’": 39,
"„": 2,
"…": 68
}