W2V2-bert-Malasar / vocab.json
leenag's picture
Upload tokenizer
c47a3d8 verified
raw
history blame
654 Bytes
{
"(": 1,
")": 2,
"[PAD]": 50,
"[UNK]": 49,
"|": 0,
"அ": 3,
"ஆ": 4,
"இ": 5,
"ஈ": 6,
"உ": 7,
"ஊ": 8,
"எ": 9,
"ஏ": 10,
"ஐ": 11,
"ஒ": 12,
"ஓ": 13,
"க": 14,
"ங": 15,
"ச": 16,
"ஜ": 17,
"ஞ": 18,
"ட": 19,
"ண": 20,
"த": 21,
"ந": 22,
"ன": 23,
"ப": 24,
"ம": 25,
"ய": 26,
"ர": 27,
"ற": 28,
"ல": 29,
"ள": 30,
"ழ": 31,
"வ": 32,
"ஷ": 33,
"ஸ": 34,
"ா": 35,
"ி": 36,
"ீ": 37,
"ு": 38,
"ூ": 39,
"ெ": 40,
"ே": 41,
"ை": 42,
"ொ": 43,
"ோ": 44,
"்": 45,
"‍": 46,
"’": 47,
"": 48
}