ad019el commited on
Commit
a3938b0
1 Parent(s): 4855bcb

Upload tokenizer

Browse files
Files changed (1) hide show
  1. vocab.json +62 -69
vocab.json CHANGED
@@ -1,71 +1,64 @@
1
  {
2
- "!": 66,
3
- "\"": 52,
4
- ",": 12,
5
- ".": 21,
6
- ":": 33,
7
- "[PAD]": 68,
8
- "[UNK]": 67,
9
- "a": 14,
10
- "d": 58,
11
- "h": 46,
12
- "i": 65,
13
- "n": 41,
14
- "p": 5,
15
- "s": 9,
16
- "v": 56,
17
- "|": 6,
18
- " ": 57,
19
- "،": 40,
20
- "؟": 24,
21
- "ء": 59,
22
- "آ": 63,
23
- "أ": 13,
24
- "ؤ": 1,
25
- "إ": 43,
26
- "ئ": 23,
27
- "ا": 60,
28
- "ب": 27,
29
- "ة": 35,
30
- "ت": 29,
31
- "ث": 45,
32
- "ج": 44,
33
- "ح": 15,
34
- "خ": 22,
35
- "د": 4,
36
- "ذ": 50,
37
- "ر": 53,
38
- "ز": 34,
39
- "س": 42,
40
- "ش": 26,
41
- "ص": 38,
42
- "ض": 17,
43
- "ط": 36,
44
- "ظ": 7,
45
- "ع": 39,
46
- "غ": 48,
47
- "ـ": 8,
48
- "ف": 54,
49
- "ق": 2,
50
- "ك": 18,
51
- "ل": 51,
52
- "م": 11,
53
- "ن": 47,
54
- "ه": 49,
55
- "و": 64,
56
- "ى": 0,
57
- "ي": 16,
58
- "ً": 28,
59
- "ٌ": 61,
60
- "ٍ": 19,
61
- "َ": 55,
62
- "ُ": 30,
63
- "ِ": 3,
64
- "ّ": 10,
65
- "ْ": 62,
66
- "چ": 32,
67
- "ڤ": 37,
68
- "ک": 25,
69
- "ی": 20,
70
- "—": 31
71
  }
 
1
  {
2
+ "!": 0,
3
+ "\"": 26,
4
+ ",": 48,
5
+ ".": 28,
6
+ ":": 41,
7
+ "[PAD]": 61,
8
+ "[UNK]": 60,
9
+ "p": 47,
10
+ "|": 4,
11
+ " ": 21,
12
+ "،": 16,
13
+ "؟": 57,
14
+ "ء": 30,
15
+ "آ": 37,
16
+ "أ": 38,
17
+ "ؤ": 5,
18
+ "إ": 50,
19
+ "ئ": 40,
20
+ "ا": 22,
21
+ "ب": 34,
22
+ "ة": 17,
23
+ "ت": 24,
24
+ "ث": 19,
25
+ "ج": 6,
26
+ "ح": 27,
27
+ "خ": 44,
28
+ "د": 9,
29
+ "ذ": 55,
30
+ "ر": 14,
31
+ "ز": 43,
32
+ "س": 56,
33
+ "ش": 29,
34
+ "ص": 2,
35
+ "ض": 51,
36
+ "ط": 31,
37
+ "ظ": 58,
38
+ "ع": 46,
39
+ "غ": 54,
40
+ "ـ": 25,
41
+ "ف": 49,
42
+ "ق": 39,
43
+ "ك": 1,
44
+ "ل": 11,
45
+ "م": 53,
46
+ "ن": 8,
47
+ "ه": 3,
48
+ "و": 52,
49
+ "ى": 35,
50
+ "ي": 42,
51
+ "ً": 12,
52
+ "ٌ": 32,
53
+ "ٍ": 45,
54
+ "َ": 10,
55
+ "ُ": 20,
56
+ "ِ": 59,
57
+ "ّ": 23,
58
+ "ْ": 15,
59
+ "ٱ": 36,
60
+ "ڤ": 18,
61
+ "ک": 7,
62
+ "ی": 13,
63
+ "": 33
 
 
 
 
 
 
 
64
  }