nbaden commited on
Commit
5eae543
1 Parent(s): c9ecdab

Upload tokenizer

Browse files
Files changed (1) hide show
  1. vocab.json +43 -43
vocab.json CHANGED
@@ -1,51 +1,51 @@
1
  {
2
  "[PAD]": 48,
3
  "[UNK]": 47,
4
- "c": 41,
5
- "i": 44,
6
- "o": 8,
7
- "y": 24,
8
- "|": 30,
9
- "«": 29,
10
- "»": 22,
11
- "ç": 9,
12
- "а": 33,
13
  "б": 0,
14
- "в": 34,
15
- "г": 5,
16
- "д": 31,
17
- "е": 46,
18
- "ж": 19,
19
- "з": 21,
20
- "и": 7,
21
- "й": 26,
22
- "к": 20,
23
  "л": 35,
24
- "м": 4,
25
- "н": 18,
26
- "о": 23,
27
- "п": 37,
28
- "р": 10,
29
- "с": 1,
30
- "т": 25,
31
- "у": 40,
32
- "ф": 6,
33
  "х": 32,
34
- "ц": 13,
35
- "ч": 27,
36
- "ш": 38,
37
- "щ": 42,
38
- "ъ": 2,
39
  "ы": 45,
40
- "ь": 28,
41
- "э": 3,
42
- "ю": 16,
43
- "я": 15,
44
- "і": 43,
45
- "ј": 17,
46
- "ҕ": 36,
47
- "ҥ": 12,
48
- "ү": 11,
49
- "һ": 14,
50
- "ө": 39
51
  }
 
1
  {
2
  "[PAD]": 48,
3
  "[UNK]": 47,
4
+ "c": 24,
5
+ "i": 15,
6
+ "o": 30,
7
+ "y": 6,
8
+ "|": 23,
9
+ "«": 16,
10
+ "»": 19,
11
+ "ç": 22,
12
+ "а": 31,
13
  "б": 0,
14
+ "в": 13,
15
+ "г": 12,
16
+ "д": 17,
17
+ "е": 38,
18
+ "ж": 14,
19
+ "з": 8,
20
+ "и": 28,
21
+ "й": 42,
22
+ "к": 27,
23
  "л": 35,
24
+ "м": 20,
25
+ "н": 25,
26
+ "о": 10,
27
+ "п": 33,
28
+ "р": 29,
29
+ "с": 26,
30
+ "т": 11,
31
+ "у": 18,
32
+ "ф": 3,
33
  "х": 32,
34
+ "ц": 39,
35
+ "ч": 40,
36
+ "ш": 4,
37
+ "щ": 21,
38
+ "ъ": 9,
39
  "ы": 45,
40
+ "ь": 34,
41
+ "э": 1,
42
+ "ю": 5,
43
+ "я": 36,
44
+ "і": 46,
45
+ "ј": 37,
46
+ "ҕ": 44,
47
+ "ҥ": 43,
48
+ "ү": 41,
49
+ "һ": 7,
50
+ "ө": 2
51
  }