cakiki commited on
Commit
6625ce4
1 Parent(s): e96afb1

Upload tokenizer

Browse files
Files changed (2) hide show
  1. tokenizer.json +6 -1
  2. tokenizer_config.json +2 -0
tokenizer.json CHANGED
@@ -23,7 +23,12 @@
23
  "use_regex": true
24
  },
25
  "post_processor": null,
26
- "decoder": null,
 
 
 
 
 
27
  "model": {
28
  "type": "Unigram",
29
  "unk_id": 0,
 
23
  "use_regex": true
24
  },
25
  "post_processor": null,
26
+ "decoder": {
27
+ "type": "ByteLevel",
28
+ "add_prefix_space": true,
29
+ "trim_offsets": true,
30
+ "use_regex": true
31
+ },
32
  "model": {
33
  "type": "Unigram",
34
  "unk_id": 0,
tokenizer_config.json CHANGED
@@ -1,4 +1,6 @@
1
  {
 
 
2
  "tokenizer_class": "PreTrainedTokenizerFast",
3
  "unk_token": "[UNK]",
4
  "vocab_size": 70000
 
1
  {
2
+ "name_or_path": "cakiki/bigcode_tokenizer",
3
+ "special_tokens_map_file": "/home/christopher/.cache/huggingface/hub/models--cakiki--bigcode_tokenizer/snapshots/e96afb14b6c9d15112592b7c41cde4953fcfc189/special_tokens_map.json",
4
  "tokenizer_class": "PreTrainedTokenizerFast",
5
  "unk_token": "[UNK]",
6
  "vocab_size": 70000