ad019el commited on
Commit
0252ab3
1 Parent(s): 25be0ac

Upload tokenizer

Browse files
Files changed (1) hide show
  1. vocab.json +44 -62
vocab.json CHANGED
@@ -1,63 +1,45 @@
1
  {
2
- "!": 0,
3
- "\": 26,
4
- ",": 47,
5
- ".": 28,
6
- ":": 41,
7
- "[PAD]": 60,
8
- "[UNK]": 59,
9
- "|": 4,
10
- " ": 21,
11
- "،": 16,
12
- "؟": 56,
13
- "ء": 30,
14
- "آ": 37,
15
- "أ": 38,
16
- "ؤ": 5,
17
- "إ": 49,
18
- "ئ": 40,
19
- "ا": 22,
20
- "ب": 34,
21
- "ة": 17,
22
- "ت": 24,
23
- "ث": 19,
24
- "ج": 6,
25
- "ح": 27,
26
- "خ": 44,
27
- "د": 9,
28
- "ذ": 54,
29
- "ر": 14,
30
- "ز": 43,
31
- "س": 55,
32
- "ش": 29,
33
- "ص": 2,
34
- "ض": 50,
35
- "ط": 31,
36
- "ظ": 57,
37
- "ع": 46,
38
- "غ": 53,
39
- "ـ": 25,
40
- "ف": 48,
41
- "ق": 39,
42
- "ك": 1,
43
- "ل": 11,
44
- "م": 52,
45
- "ن": 8,
46
- "ه": 3,
47
- "و": 51,
48
- "ى": 35,
49
- "ي": 42,
50
- "ً": 12,
51
- "ٌ": 32,
52
- "ٍ": 45,
53
- "َ": 10,
54
- "ُ": 20,
55
- "ِ": 58,
56
- "ّ": 23,
57
- "ْ": 15,
58
- "ٱ": 36,
59
- "ڤ": 18,
60
- "ک": 7,
61
- "ی": 13,
62
- "—": 33
63
- }
 
1
  {
2
+ "[PAD]": 42,
3
+ "[UNK]": 41,
4
+ "|": 8,
5
+ "ء": 36,
6
+ "آ": 25,
7
+ "أ": 9,
8
+ "ؤ": 27,
9
+ "إ": 24,
10
+ "ئ": 14,
11
+ "ا": 17,
12
+ "ب": 30,
13
+ "ة": 38,
14
+ "ت": 0,
15
+ "ث": 35,
16
+ "ج": 12,
17
+ "ح": 33,
18
+ "خ": 29,
19
+ "د": 18,
20
+ "ذ": 6,
21
+ "ر": 32,
22
+ "ز": 1,
23
+ "س": 11,
24
+ "ش": 13,
25
+ "ص": 3,
26
+ "ض": 7,
27
+ "ط": 2,
28
+ "ظ": 15,
29
+ "ع": 5,
30
+ "غ": 20,
31
+ "ف": 34,
32
+ "ق": 16,
33
+ "ك": 40,
34
+ "ل": 26,
35
+ "م": 23,
36
+ "ن": 4,
37
+ "ه": 10,
38
+ "و": 31,
39
+ "ى": 22,
40
+ "ي": 39,
41
+ "ٱ": 21,
42
+ "ڤ": 19,
43
+ "ک": 28,
44
+ "ی": 37
45
+ }