Bluecast commited on
Commit
1a87f8d
1 Parent(s): b411059

Upload tokenizer

Browse files
Files changed (1) hide show
  1. vocab.json +70 -70
vocab.json CHANGED
@@ -1,76 +1,76 @@
1
  {
2
- "'": 62,
3
  "[PAD]": 73,
4
  "[UNK]": 72,
5
- "|": 70,
6
- "ം": 64,
7
- "ഃ": 15,
8
- "അ": 61,
9
- "ആ": 31,
10
- "ഇ": 42,
11
- "ഈ": 9,
12
- "ഉ": 68,
13
- "ഊ": 7,
14
- "എ": 41,
15
- "ഏ": 63,
16
- "ഐ": 26,
17
- "ഒ": 17,
18
- "ഓ": 46,
19
- "ഔ": 47,
20
- "ക": 53,
21
- "ഖ": 36,
22
- "ഗ": 13,
23
- "ഘ": 2,
24
- "ങ": 22,
25
- "ച": 25,
26
- "ഛ": 0,
27
- "ജ": 71,
28
- "ഞ": 39,
29
- "ട": 3,
30
- "ഠ": 55,
31
- "ഡ": 6,
32
- "ഢ": 45,
33
- "ണ": 57,
34
- "ത": 16,
35
- "ഥ": 20,
36
- "ദ": 59,
37
- "ധ": 43,
38
- "ന": 44,
39
- "പ": 66,
40
- "ഫ": 5,
41
- "ബ": 27,
42
- "ഭ": 4,
43
- "മ": 11,
44
- "യ": 38,
45
- "ര": 56,
46
- "റ": 18,
47
  "ല": 37,
48
- "ള": 33,
49
- "ഴ": 14,
50
- "വ": 40,
51
  "ശ": 8,
52
- "ഷ": 1,
53
- "സ": 54,
54
- "ഹ": 19,
55
- "ാ": 65,
56
- "ി": 12,
57
- "ീ": 58,
58
- "ു": 30,
59
- "ൂ": 49,
60
- "ൃ": 69,
61
- "െ": 67,
62
- "േ": 51,
63
- "ൈ": 32,
64
- "ൊ": 24,
65
- "ോ": 10,
66
- "ൌ": 23,
67
- "്": 29,
68
- "ൗ": 28,
69
- "ൺ": 34,
70
- "ൻ": 21,
71
- "ർ": 60,
72
- "ൽ": 50,
73
- "ൾ": 35,
74
- "ൿ": 48,
75
- "’": 52
76
  }
 
1
  {
2
+ "'": 59,
3
  "[PAD]": 73,
4
  "[UNK]": 72,
5
+ "|": 25,
6
+ "ം": 39,
7
+ "ഃ": 57,
8
+ "അ": 67,
9
+ "ആ": 18,
10
+ "ഇ": 3,
11
+ "ഈ": 48,
12
+ "ഉ": 46,
13
+ "ഊ": 28,
14
+ "എ": 53,
15
+ "ഏ": 43,
16
+ "ഐ": 47,
17
+ "ഒ": 58,
18
+ "ഓ": 10,
19
+ "ഔ": 62,
20
+ "ക": 1,
21
+ "ഖ": 19,
22
+ "ഗ": 69,
23
+ "ഘ": 50,
24
+ "ങ": 64,
25
+ "ച": 12,
26
+ "ഛ": 6,
27
+ "ജ": 14,
28
+ "ഞ": 24,
29
+ "ട": 7,
30
+ "ഠ": 49,
31
+ "ഡ": 13,
32
+ "ഢ": 41,
33
+ "ണ": 29,
34
+ "ത": 70,
35
+ "ഥ": 4,
36
+ "ദ": 21,
37
+ "ധ": 9,
38
+ "ന": 45,
39
+ "പ": 31,
40
+ "ഫ": 17,
41
+ "ബ": 51,
42
+ "ഭ": 15,
43
+ "മ": 23,
44
+ "യ": 22,
45
+ "ര": 26,
46
+ "റ": 42,
47
  "ല": 37,
48
+ "ള": 16,
49
+ "ഴ": 35,
50
+ "വ": 34,
51
  "ശ": 8,
52
+ "ഷ": 63,
53
+ "സ": 61,
54
+ "ഹ": 52,
55
+ "ാ": 38,
56
+ "ി": 65,
57
+ "ീ": 40,
58
+ "ു": 11,
59
+ "ൂ": 68,
60
+ "ൃ": 36,
61
+ "െ": 44,
62
+ "േ": 0,
63
+ "ൈ": 54,
64
+ "ൊ": 32,
65
+ "ോ": 66,
66
+ "ൌ": 56,
67
+ "്": 33,
68
+ "ൗ": 71,
69
+ "ൺ": 5,
70
+ "ൻ": 55,
71
+ "ർ": 2,
72
+ "ൽ": 27,
73
+ "ൾ": 60,
74
+ "ൿ": 20,
75
+ "’": 30
76
  }