ad019el commited on
Commit
b478b17
1 Parent(s): c69af5b

Upload tokenizer

Browse files
Files changed (1) hide show
  1. vocab.json +95 -91
vocab.json CHANGED
@@ -1,95 +1,99 @@
1
  {
2
  ":": 59,
3
- "J": 76,
4
- "[PAD]": 92,
5
- "[UNK]": 91,
6
- "a": 21,
7
- "b": 86,
8
- "c": 53,
9
- "d": 8,
10
- "e": 29,
11
- "f": 41,
12
- "g": 11,
13
- "h": 54,
14
- "i": 7,
15
- "j": 25,
16
- "k": 60,
17
- "l": 75,
18
- "m": 89,
19
- "n": 0,
20
- "o": 67,
21
- "p": 3,
22
- "q": 47,
23
- "r": 1,
24
- "s": 69,
25
- "t": 46,
26
- "u": 44,
27
- "v": 56,
28
- "w": 82,
29
- "y": 35,
30
- "z": 43,
31
- "|": 64,
32
- "°": 40,
33
- "à": 84,
34
- "ä": 83,
35
- "å": 23,
36
- "è": 18,
37
- "ì": 37,
38
- "ò": 78,
39
- "ö": 17,
40
- "ù": 85,
41
- "ą": 22,
42
- "č": 80,
43
- "ė": 28,
44
- "ę": 24,
45
- "į": 72,
46
- "š": 65,
47
- "ū": 61,
48
- "ų": 45,
49
- "ž": 73,
 
50
  "அ": 42,
51
- "ஆ": 66,
52
- "இ": 62,
53
- "": 70,
54
- "": 16,
55
- "": 63,
56
- "": 51,
57
- "": 10,
58
- "": 32,
59
- "": 49,
60
- "": 48,
61
- "": 26,
62
- "": 87,
63
- "": 88,
64
- "": 79,
65
- "": 31,
66
- "": 74,
67
- "": 20,
68
- "": 36,
69
- "": 34,
70
- "": 9,
71
- "": 6,
72
- "": 15,
73
- "": 52,
74
- "": 57,
75
- "": 55,
76
- "": 12,
77
- "": 5,
78
- "": 50,
79
- "": 38,
80
- "": 77,
81
- "": 58,
82
- "ி": 14,
83
- "": 30,
84
- "": 4,
85
- "": 19,
86
- "": 71,
87
- "": 33,
88
- "": 81,
89
- "": 90,
90
- "": 13,
91
- "": 27,
92
- "": 39,
93
- "": 2,
94
- "": 68
 
 
 
95
  }
 
1
  {
2
  ":": 59,
3
+ "J": 12,
4
+ "[PAD]": 96,
5
+ "[UNK]": 95,
6
+ "a": 74,
7
+ "b": 45,
8
+ "c": 60,
9
+ "d": 5,
10
+ "e": 46,
11
+ "f": 56,
12
+ "g": 84,
13
+ "h": 29,
14
+ "i": 72,
15
+ "j": 70,
16
+ "k": 82,
17
+ "l": 67,
18
+ "m": 4,
19
+ "n": 17,
20
+ "o": 94,
21
+ "p": 50,
22
+ "q": 65,
23
+ "r": 90,
24
+ "s": 68,
25
+ "t": 24,
26
+ "u": 23,
27
+ "v": 14,
28
+ "w": 37,
29
+ "x": 49,
30
+ "y": 25,
31
+ "z": 73,
32
+ "|": 43,
33
+ "°": 38,
34
+ "à": 13,
35
+ "ä": 31,
36
+ "å": 86,
37
+ "è": 78,
38
+ "ì": 62,
39
+ "ò": 1,
40
+ "ö": 6,
41
+ "ù": 81,
42
+ "ą": 66,
43
+ "č": 89,
44
+ "ė": 54,
45
+ "ę": 44,
46
+ "į": 0,
47
+ "š": 51,
48
+ "ū": 47,
49
+ "ų": 3,
50
+ "ž": 61,
51
  "அ": 42,
52
+ "ஆ": 27,
53
+ "இ": 52,
54
+ "": 30,
55
+ "": 77,
56
+ "": 8,
57
+ "": 35,
58
+ "": 85,
59
+ "": 41,
60
+ "": 91,
61
+ "": 48,
62
+ "": 34,
63
+ "": 93,
64
+ "": 21,
65
+ "": 40,
66
+ "": 57,
67
+ "": 88,
68
+ "": 15,
69
+ "": 64,
70
+ "": 26,
71
+ "": 18,
72
+ "": 11,
73
+ "": 53,
74
+ "": 83,
75
+ "": 2,
76
+ "": 76,
77
+ "": 39,
78
+ "": 55,
79
+ "": 92,
80
+ "": 63,
81
+ "": 28,
82
+ "": 19,
83
+ "": 20,
84
+ "": 32,
85
+ "ி": 9,
86
+ "": 58,
87
+ "": 87,
88
+ "": 69,
89
+ "": 16,
90
+ "": 7,
91
+ "": 36,
92
+ "": 10,
93
+ "": 75,
94
+ "": 33,
95
+ "": 80,
96
+ "’": 79,
97
+ "„": 22,
98
+ "…": 71
99
  }