mms-MGB3 / vocab.json
herwoww's picture
Upload tokenizer
1e8a052 verified
raw
history blame
No virus
1.28 kB
{
"ara": {
"#": 1,
"'": 2,
"(": 3,
")": 4,
"-": 5,
".": 6,
"A": 7,
"C": 8,
"D": 9,
"E": 10,
"F": 11,
"G": 12,
"H": 13,
"I": 14,
"L": 15,
"M": 16,
"N": 17,
"O": 18,
"P": 19,
"R": 20,
"S": 21,
"T": 22,
"U": 23,
"V": 24,
"Y": 25,
"[PAD]": 93,
"[UNK]": 92,
"_": 26,
"a": 27,
"b": 28,
"c": 29,
"d": 30,
"e": 31,
"f": 32,
"g": 33,
"h": 34,
"i": 35,
"k": 36,
"l": 37,
"m": 38,
"n": 39,
"o": 40,
"p": 41,
"r": 42,
"s": 43,
"t": 44,
"u": 45,
"v": 46,
"w": 47,
"y": 48,
"z": 49,
"|": 0,
"،": 50,
"؟": 51,
"ء": 52,
"آ": 53,
"أ": 54,
"ؤ": 55,
"إ": 56,
"ئ": 57,
"ا": 58,
"ب": 59,
"ة": 60,
"ت": 61,
"ث": 62,
"ج": 63,
"ح": 64,
"خ": 65,
"د": 66,
"ذ": 67,
"ر": 68,
"ز": 69,
"س": 70,
"ش": 71,
"ص": 72,
"ض": 73,
"ط": 74,
"ظ": 75,
"ع": 76,
"غ": 77,
"ـ": 78,
"ف": 79,
"ق": 80,
"ك": 81,
"ل": 82,
"م": 83,
"ن": 84,
"ه": 85,
"و": 86,
"ى": 87,
"ي": 88,
"ً": 89,
"ُ": 90,
"ّ": 91
}
}