Kyudan commited on
Commit
d9b4809
1 Parent(s): 44370a1

Upload tokenizer.json with huggingface_hub

Browse files
Files changed (1) hide show
  1. tokenizer.json +189 -0
tokenizer.json ADDED
@@ -0,0 +1,189 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "version": "1.0",
3
+ "truncation": null,
4
+ "padding": null,
5
+ "added_tokens": [
6
+ {
7
+ "id": 0,
8
+ "content": "[UNK]",
9
+ "single_word": false,
10
+ "lstrip": false,
11
+ "rstrip": false,
12
+ "normalized": false,
13
+ "special": true
14
+ },
15
+ {
16
+ "id": 1,
17
+ "content": "[CLS]",
18
+ "single_word": false,
19
+ "lstrip": false,
20
+ "rstrip": false,
21
+ "normalized": false,
22
+ "special": true
23
+ },
24
+ {
25
+ "id": 2,
26
+ "content": "[SEP]",
27
+ "single_word": false,
28
+ "lstrip": false,
29
+ "rstrip": false,
30
+ "normalized": false,
31
+ "special": true
32
+ },
33
+ {
34
+ "id": 3,
35
+ "content": "[PAD]",
36
+ "single_word": false,
37
+ "lstrip": false,
38
+ "rstrip": false,
39
+ "normalized": false,
40
+ "special": true
41
+ },
42
+ {
43
+ "id": 4,
44
+ "content": "[MASK]",
45
+ "single_word": false,
46
+ "lstrip": false,
47
+ "rstrip": false,
48
+ "normalized": false,
49
+ "special": true
50
+ }
51
+ ],
52
+ "normalizer": null,
53
+ "pre_tokenizer": {
54
+ "type": "Whitespace"
55
+ },
56
+ "post_processor": null,
57
+ "decoder": null,
58
+ "model": {
59
+ "type": "BPE",
60
+ "dropout": null,
61
+ "unk_token": "[UNK]",
62
+ "continuing_subword_prefix": null,
63
+ "end_of_word_suffix": null,
64
+ "fuse_unk": false,
65
+ "byte_fallback": false,
66
+ "ignore_merges": false,
67
+ "vocab": {
68
+ "[UNK]": 0,
69
+ "[CLS]": 1,
70
+ "[SEP]": 2,
71
+ "[PAD]": 3,
72
+ "[MASK]": 4,
73
+ "(": 5,
74
+ ")": 6,
75
+ "+": 7,
76
+ "-": 8,
77
+ ".": 9,
78
+ "/": 10,
79
+ "0": 11,
80
+ "1": 12,
81
+ "4": 13,
82
+ "5": 14,
83
+ "8": 15,
84
+ "<": 16,
85
+ "=": 17,
86
+ "C": 18,
87
+ "F": 19,
88
+ "\\": 20,
89
+ "^": 21,
90
+ "_": 22,
91
+ "a": 23,
92
+ "c": 24,
93
+ "d": 25,
94
+ "f": 26,
95
+ "h": 27,
96
+ "i": 28,
97
+ "m": 29,
98
+ "q": 30,
99
+ "r": 31,
100
+ "s": 32,
101
+ "t": 33,
102
+ "u": 34,
103
+ "x": 35,
104
+ "{": 36,
105
+ "}": 37,
106
+ "~": 38,
107
+ "}}": 39,
108
+ "C_": 40,
109
+ "^{": 41,
110
+ "{\\": 42,
111
+ "}+": 43,
112
+ "ac": 44,
113
+ "fr": 45,
114
+ "frac": 46,
115
+ ")=": 47,
116
+ "10": 48,
117
+ "=-": 49,
118
+ "ad": 50,
119
+ "at": 51,
120
+ "hr": 52,
121
+ "if": 53,
122
+ "mat": 54,
123
+ "qr": 55,
124
+ "qu": 56,
125
+ "sqr": 57,
126
+ "}\\": 58,
127
+ "}}}+": 59,
128
+ "^{-": 60,
129
+ "hrm": 61,
130
+ "mathrm": 62,
131
+ "quad": 63,
132
+ "sqrt": 64,
133
+ "/-": 65,
134
+ "4x": 66,
135
+ "{-": 67,
136
+ "}{": 68,
137
+ "}=-": 69,
138
+ "~}": 70,
139
+ "~}}": 71,
140
+ "}}{-": 72,
141
+ ")=\\": 73,
142
+ ")={\\": 74,
143
+ "10x": 75,
144
+ "=-{\\": 76,
145
+ "}=-\\": 77
146
+ },
147
+ "merges": [
148
+ "} }",
149
+ "C _",
150
+ "^ {",
151
+ "{ \\",
152
+ "} +",
153
+ "a c",
154
+ "f r",
155
+ "fr ac",
156
+ ") =",
157
+ "1 0",
158
+ "= -",
159
+ "a d",
160
+ "a t",
161
+ "h r",
162
+ "i f",
163
+ "m at",
164
+ "q r",
165
+ "q u",
166
+ "s qr",
167
+ "} \\",
168
+ "}} }+",
169
+ "^{ -",
170
+ "hr m",
171
+ "mat hrm",
172
+ "qu ad",
173
+ "sqr t",
174
+ "/ -",
175
+ "4 x",
176
+ "{ -",
177
+ "} {",
178
+ "} =-",
179
+ "~ }",
180
+ "~ }}",
181
+ "}} {-",
182
+ ")= \\",
183
+ ")= {\\",
184
+ "10 x",
185
+ "=- {\\",
186
+ "}=- \\"
187
+ ]
188
+ }
189
+ }