Upload tokenizer
Browse files- tokenizer.json +6 -1
- tokenizer_config.json +2 -0
tokenizer.json
CHANGED
@@ -23,7 +23,12 @@
|
|
23 |
"use_regex": true
|
24 |
},
|
25 |
"post_processor": null,
|
26 |
-
"decoder":
|
|
|
|
|
|
|
|
|
|
|
27 |
"model": {
|
28 |
"type": "Unigram",
|
29 |
"unk_id": 0,
|
|
|
23 |
"use_regex": true
|
24 |
},
|
25 |
"post_processor": null,
|
26 |
+
"decoder": {
|
27 |
+
"type": "ByteLevel",
|
28 |
+
"add_prefix_space": true,
|
29 |
+
"trim_offsets": true,
|
30 |
+
"use_regex": true
|
31 |
+
},
|
32 |
"model": {
|
33 |
"type": "Unigram",
|
34 |
"unk_id": 0,
|
tokenizer_config.json
CHANGED
@@ -1,4 +1,6 @@
|
|
1 |
{
|
|
|
|
|
2 |
"tokenizer_class": "PreTrainedTokenizerFast",
|
3 |
"unk_token": "[UNK]",
|
4 |
"vocab_size": 70000
|
|
|
1 |
{
|
2 |
+
"name_or_path": "cakiki/bigcode_tokenizer",
|
3 |
+
"special_tokens_map_file": "/home/christopher/.cache/huggingface/hub/models--cakiki--bigcode_tokenizer/snapshots/e96afb14b6c9d15112592b7c41cde4953fcfc189/special_tokens_map.json",
|
4 |
"tokenizer_class": "PreTrainedTokenizerFast",
|
5 |
"unk_token": "[UNK]",
|
6 |
"vocab_size": 70000
|