hadiqa123 commited on
Commit
d387f35
1 Parent(s): bd939c4

add tokenizer

Browse files
Files changed (2) hide show
  1. added_tokens.json +2 -2
  2. vocab.json +15 -44
added_tokens.json CHANGED
@@ -1,4 +1,4 @@
1
  {
2
- "</s>": 47,
3
- "<s>": 46
4
  }
 
1
  {
2
+ "</s>": 18,
3
+ "<s>": 17
4
  }
vocab.json CHANGED
@@ -1,48 +1,19 @@
1
  {
2
- "[PAD]": 45,
3
- "[UNK]": 44,
4
  "|": 0,
5
- "ء": 1,
6
  "ا": 2,
7
- "ب": 3,
8
- "ت": 4,
9
- "ث": 5,
10
- "ج": 6,
11
- "ح": 7,
12
- "خ": 8,
13
- "د": 9,
14
- "ذ": 10,
15
- "ر": 11,
16
- "ز": 12,
17
- "س": 13,
18
- "ش": 14,
19
- "ص": 15,
20
- "ض": 16,
21
- "ط": 17,
22
- "ظ": 18,
23
- "ع": 19,
24
- "غ": 20,
25
- "ف": 21,
26
- "ق": 22,
27
- "ل": 23,
28
- "م": 24,
29
- "ن": 25,
30
- "و": 26,
31
- "ى": 27,
32
- "ي": 28,
33
- "ً": 29,
34
- "ٓ": 30,
35
- "ٹ": 31,
36
- "پ": 32,
37
- "چ": 33,
38
- "ڈ": 34,
39
- "ڑ": 35,
40
- "ژ": 36,
41
- "ک": 37,
42
- "گ": 38,
43
- "ں": 39,
44
- "ھ": 40,
45
- "ہ": 41,
46
- "ی": 42,
47
- "ے": 43
48
  }
 
1
  {
2
+ "[PAD]": 16,
3
+ "[UNK]": 15,
4
  "|": 0,
 
5
  "ا": 2,
6
+ "ب": 14,
7
+ "ت": 13,
8
+ "د": 1,
9
+ "ز": 7,
10
+ "س": 9,
11
+ "ل": 11,
12
+ "م": 12,
13
+ "ڑ": 8,
14
+ "ک": 5,
15
+ "ھ": 6,
16
+ "ہ": 4,
17
+ "ی": 3,
18
+ "ے": 10
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
19
  }