hamedjahantigh commited on
Commit
5da4ade
1 Parent(s): 448c578

Upload tokenizer

Browse files
Files changed (3) hide show
  1. tokenizer.json +13 -13
  2. tokenizer_config.json +5 -5
  3. vocab.json +0 -0
tokenizer.json CHANGED
@@ -5,7 +5,7 @@
5
  "added_tokens": [
6
  {
7
  "id": 0,
8
- "content": "[PAD]",
9
  "single_word": false,
10
  "lstrip": false,
11
  "rstrip": false,
@@ -14,7 +14,7 @@
14
  },
15
  {
16
  "id": 1,
17
- "content": "[CLS]",
18
  "single_word": false,
19
  "lstrip": false,
20
  "rstrip": false,
@@ -23,7 +23,7 @@
23
  },
24
  {
25
  "id": 2,
26
- "content": "[SEP]",
27
  "single_word": false,
28
  "lstrip": false,
29
  "rstrip": false,
@@ -32,7 +32,7 @@
32
  },
33
  {
34
  "id": 3,
35
- "content": "[MASK]",
36
  "single_word": false,
37
  "lstrip": false,
38
  "rstrip": false,
@@ -41,7 +41,7 @@
41
  },
42
  {
43
  "id": 4,
44
- "content": "[UNK]",
45
  "single_word": false,
46
  "lstrip": false,
47
  "rstrip": false,
@@ -156,7 +156,7 @@
156
  "[CLS]": {
157
  "id": "[CLS]",
158
  "ids": [
159
- 1
160
  ],
161
  "tokens": [
162
  "[CLS]"
@@ -165,7 +165,7 @@
165
  "[SEP]": {
166
  "id": "[SEP]",
167
  "ids": [
168
- 2
169
  ],
170
  "tokens": [
171
  "[SEP]"
@@ -177,17 +177,17 @@
177
  "model": {
178
  "type": "BPE",
179
  "dropout": null,
180
- "unk_token": "[PAD]",
181
  "continuing_subword_prefix": null,
182
  "end_of_word_suffix": null,
183
  "fuse_unk": false,
184
  "byte_fallback": false,
185
  "vocab": {
186
- "[PAD]": 0,
187
- "[CLS]": 1,
188
- "[SEP]": 2,
189
- "[MASK]": 3,
190
- "[UNK]": 4,
191
  "#": 5,
192
  "-": 6,
193
  ".": 7,
 
5
  "added_tokens": [
6
  {
7
  "id": 0,
8
+ "content": "[UNK]",
9
  "single_word": false,
10
  "lstrip": false,
11
  "rstrip": false,
 
14
  },
15
  {
16
  "id": 1,
17
+ "content": "[SEP]",
18
  "single_word": false,
19
  "lstrip": false,
20
  "rstrip": false,
 
23
  },
24
  {
25
  "id": 2,
26
+ "content": "[MASK]",
27
  "single_word": false,
28
  "lstrip": false,
29
  "rstrip": false,
 
32
  },
33
  {
34
  "id": 3,
35
+ "content": "[CLS]",
36
  "single_word": false,
37
  "lstrip": false,
38
  "rstrip": false,
 
41
  },
42
  {
43
  "id": 4,
44
+ "content": "[PAD]",
45
  "single_word": false,
46
  "lstrip": false,
47
  "rstrip": false,
 
156
  "[CLS]": {
157
  "id": "[CLS]",
158
  "ids": [
159
+ 3
160
  ],
161
  "tokens": [
162
  "[CLS]"
 
165
  "[SEP]": {
166
  "id": "[SEP]",
167
  "ids": [
168
+ 1
169
  ],
170
  "tokens": [
171
  "[SEP]"
 
177
  "model": {
178
  "type": "BPE",
179
  "dropout": null,
180
+ "unk_token": "[UNK]",
181
  "continuing_subword_prefix": null,
182
  "end_of_word_suffix": null,
183
  "fuse_unk": false,
184
  "byte_fallback": false,
185
  "vocab": {
186
+ "[UNK]": 0,
187
+ "[SEP]": 1,
188
+ "[MASK]": 2,
189
+ "[CLS]": 3,
190
+ "[PAD]": 4,
191
  "#": 5,
192
  "-": 6,
193
  ".": 7,
tokenizer_config.json CHANGED
@@ -2,7 +2,7 @@
2
  "add_prefix_space": false,
3
  "added_tokens_decoder": {
4
  "0": {
5
- "content": "[PAD]",
6
  "lstrip": false,
7
  "normalized": false,
8
  "rstrip": false,
@@ -10,7 +10,7 @@
10
  "special": true
11
  },
12
  "1": {
13
- "content": "[CLS]",
14
  "lstrip": false,
15
  "normalized": false,
16
  "rstrip": false,
@@ -18,7 +18,7 @@
18
  "special": true
19
  },
20
  "2": {
21
- "content": "[SEP]",
22
  "lstrip": false,
23
  "normalized": false,
24
  "rstrip": false,
@@ -26,7 +26,7 @@
26
  "special": true
27
  },
28
  "3": {
29
- "content": "[MASK]",
30
  "lstrip": false,
31
  "normalized": false,
32
  "rstrip": false,
@@ -34,7 +34,7 @@
34
  "special": true
35
  },
36
  "4": {
37
- "content": "[UNK]",
38
  "lstrip": false,
39
  "normalized": false,
40
  "rstrip": false,
 
2
  "add_prefix_space": false,
3
  "added_tokens_decoder": {
4
  "0": {
5
+ "content": "[UNK]",
6
  "lstrip": false,
7
  "normalized": false,
8
  "rstrip": false,
 
10
  "special": true
11
  },
12
  "1": {
13
+ "content": "[SEP]",
14
  "lstrip": false,
15
  "normalized": false,
16
  "rstrip": false,
 
18
  "special": true
19
  },
20
  "2": {
21
+ "content": "[MASK]",
22
  "lstrip": false,
23
  "normalized": false,
24
  "rstrip": false,
 
26
  "special": true
27
  },
28
  "3": {
29
+ "content": "[CLS]",
30
  "lstrip": false,
31
  "normalized": false,
32
  "rstrip": false,
 
34
  "special": true
35
  },
36
  "4": {
37
+ "content": "[PAD]",
38
  "lstrip": false,
39
  "normalized": false,
40
  "rstrip": false,
vocab.json CHANGED
The diff for this file is too large to render. See raw diff