ad019el commited on
Commit
4f96dd3
1 Parent(s): 12c18d1

Upload tokenizer

Browse files
Files changed (1) hide show
  1. vocab.json +85 -28
vocab.json CHANGED
@@ -1,30 +1,87 @@
1
  {
2
- "'": 11,
3
- ":": 0,
4
- "[PAD]": 27,
5
- "[UNK]": 26,
6
- "a": 1,
7
- "b": 19,
8
- "d": 8,
9
- "e": 3,
10
- "f": 9,
11
- "g": 4,
12
- "h": 2,
13
- "i": 10,
14
- "j": 5,
15
- "k": 14,
16
- "l": 6,
17
- "m": 21,
18
- "n": 25,
19
- "p": 15,
20
- "q": 16,
21
- "r": 24,
22
- "s": 23,
23
- "t": 18,
24
- "v": 12,
25
- "w": 17,
26
- "y": 20,
27
- "z": 7,
28
- "|": 13,
29
- "ٱ": 22
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
30
  }
 
1
  {
2
+ "!": 19,
3
+ "\"": 73,
4
+ "'": 23,
5
+ "(": 50,
6
+ ")": 37,
7
+ ",": 24,
8
+ "-": 20,
9
+ ".": 42,
10
+ ":": 67,
11
+ ";": 38,
12
+ "?": 4,
13
+ "A": 16,
14
+ "B": 17,
15
+ "C": 58,
16
+ "D": 12,
17
+ "E": 70,
18
+ "F": 10,
19
+ "G": 60,
20
+ "H": 34,
21
+ "I": 77,
22
+ "J": 8,
23
+ "K": 11,
24
+ "L": 31,
25
+ "M": 65,
26
+ "N": 48,
27
+ "O": 69,
28
+ "P": 36,
29
+ "R": 41,
30
+ "S": 49,
31
+ "T": 1,
32
+ "U": 68,
33
+ "V": 64,
34
+ "Z": 79,
35
+ "[PAD]": 84,
36
+ "[UNK]": 83,
37
+ "a": 66,
38
+ "b": 63,
39
+ "c": 15,
40
+ "d": 71,
41
+ "e": 76,
42
+ "f": 25,
43
+ "g": 27,
44
+ "h": 35,
45
+ "i": 6,
46
+ "j": 81,
47
+ "k": 57,
48
+ "l": 39,
49
+ "m": 46,
50
+ "n": 33,
51
+ "o": 47,
52
+ "p": 29,
53
+ "q": 2,
54
+ "r": 56,
55
+ "s": 7,
56
+ "t": 51,
57
+ "u": 54,
58
+ "v": 28,
59
+ "w": 72,
60
+ "y": 32,
61
+ "z": 5,
62
+ "|": 18,
63
+ "«": 61,
64
+ "é": 9,
65
+ "Ĉ": 82,
66
+ "ĉ": 55,
67
+ "Ĝ": 78,
68
+ "ĝ": 40,
69
+ "Ĥ": 59,
70
+ "ĥ": 21,
71
+ "Ĵ": 75,
72
+ "ĵ": 14,
73
+ "Ŝ": 22,
74
+ "ŝ": 30,
75
+ "Ŭ": 80,
76
+ "ŭ": 13,
77
+ "ٱ": 52,
78
+ "–": 53,
79
+ "—": 43,
80
+ "‘": 3,
81
+ "’": 26,
82
+ "“": 74,
83
+ "”": 44,
84
+ "„": 45,
85
+ "fi": 0,
86
+ "fl": 62
87
  }