ad019el commited on
Commit
03bb054
1 Parent(s): a3938b0

Upload tokenizer

Browse files
Files changed (1) hide show
  1. vocab.json +14 -15
vocab.json CHANGED
@@ -1,21 +1,20 @@
1
  {
2
  "!": 0,
3
  "\"": 26,
4
- ",": 48,
5
  ".": 28,
6
  ":": 41,
7
- "[PAD]": 61,
8
- "[UNK]": 60,
9
- "p": 47,
10
  "|": 4,
11
  " ": 21,
12
  "،": 16,
13
- "؟": 57,
14
  "ء": 30,
15
  "آ": 37,
16
  "أ": 38,
17
  "ؤ": 5,
18
- "إ": 50,
19
  "ئ": 40,
20
  "ا": 22,
21
  "ب": 34,
@@ -26,26 +25,26 @@
26
  "ح": 27,
27
  "خ": 44,
28
  "د": 9,
29
- "ذ": 55,
30
  "ر": 14,
31
  "ز": 43,
32
- "س": 56,
33
  "ش": 29,
34
  "ص": 2,
35
- "ض": 51,
36
  "ط": 31,
37
- "ظ": 58,
38
  "ع": 46,
39
- "غ": 54,
40
  "ـ": 25,
41
- "ف": 49,
42
  "ق": 39,
43
  "ك": 1,
44
  "ل": 11,
45
- "م": 53,
46
  "ن": 8,
47
  "ه": 3,
48
- "و": 52,
49
  "ى": 35,
50
  "ي": 42,
51
  "ً": 12,
@@ -53,7 +52,7 @@
53
  "ٍ": 45,
54
  "َ": 10,
55
  "ُ": 20,
56
- "ِ": 59,
57
  "ّ": 23,
58
  "ْ": 15,
59
  "ٱ": 36,
 
1
  {
2
  "!": 0,
3
  "\"": 26,
4
+ ",": 47,
5
  ".": 28,
6
  ":": 41,
7
+ "[PAD]": 60,
8
+ "[UNK]": 59,
 
9
  "|": 4,
10
  " ": 21,
11
  "،": 16,
12
+ "؟": 56,
13
  "ء": 30,
14
  "آ": 37,
15
  "أ": 38,
16
  "ؤ": 5,
17
+ "إ": 49,
18
  "ئ": 40,
19
  "ا": 22,
20
  "ب": 34,
 
25
  "ح": 27,
26
  "خ": 44,
27
  "د": 9,
28
+ "ذ": 54,
29
  "ر": 14,
30
  "ز": 43,
31
+ "س": 55,
32
  "ش": 29,
33
  "ص": 2,
34
+ "ض": 50,
35
  "ط": 31,
36
+ "ظ": 57,
37
  "ع": 46,
38
+ "غ": 53,
39
  "ـ": 25,
40
+ "ف": 48,
41
  "ق": 39,
42
  "ك": 1,
43
  "ل": 11,
44
+ "م": 52,
45
  "ن": 8,
46
  "ه": 3,
47
+ "و": 51,
48
  "ى": 35,
49
  "ي": 42,
50
  "ً": 12,
 
52
  "ٍ": 45,
53
  "َ": 10,
54
  "ُ": 20,
55
+ "ِ": 58,
56
  "ّ": 23,
57
  "ْ": 15,
58
  "ٱ": 36,