karthikqnq commited on
Commit
ab0b188
1 Parent(s): 3a9d5b0

Upload tokenizer

Browse files
merges.txt CHANGED
The diff for this file is too large to render. See raw diff
 
special_tokens_map.json CHANGED
@@ -20,13 +20,7 @@
20
  "rstrip": false,
21
  "single_word": false
22
  },
23
- "pad_token": {
24
- "content": "<pad>",
25
- "lstrip": false,
26
- "normalized": false,
27
- "rstrip": false,
28
- "single_word": false
29
- },
30
  "sep_token": {
31
  "content": "<sep>",
32
  "lstrip": false,
 
20
  "rstrip": false,
21
  "single_word": false
22
  },
23
+ "pad_token": "<|endoftext|>",
 
 
 
 
 
 
24
  "sep_token": {
25
  "content": "<sep>",
26
  "lstrip": false,
tokenizer.json ADDED
The diff for this file is too large to render. See raw diff
 
tokenizer_config.json CHANGED
@@ -41,7 +41,7 @@
41
  "errors": "replace",
42
  "mask_token": "<mask>",
43
  "model_max_length": 1024,
44
- "pad_token": "<pad>",
45
  "sep_token": "<sep>",
46
  "tokenizer_class": "GPT2Tokenizer",
47
  "unk_token": "<|endoftext|>"
 
41
  "errors": "replace",
42
  "mask_token": "<mask>",
43
  "model_max_length": 1024,
44
+ "pad_token": "<|endoftext|>",
45
  "sep_token": "<sep>",
46
  "tokenizer_class": "GPT2Tokenizer",
47
  "unk_token": "<|endoftext|>"
vocab.json CHANGED
The diff for this file is too large to render. See raw diff