Thimira commited on
Commit
cc011da
1 Parent(s): c471dbb

Upload tokenizer

Browse files
Files changed (2) hide show
  1. tokenizer.json +1 -2
  2. tokenizer_config.json +0 -2
tokenizer.json CHANGED
@@ -27,7 +27,7 @@
27
  "single_word": false,
28
  "lstrip": false,
29
  "rstrip": false,
30
- "normalized": true,
31
  "special": true
32
  }
33
  ],
@@ -134,7 +134,6 @@
134
  "end_of_word_suffix": null,
135
  "fuse_unk": true,
136
  "byte_fallback": true,
137
- "ignore_merges": false,
138
  "vocab": {
139
  "<unk>": 0,
140
  "<s>": 1,
 
27
  "single_word": false,
28
  "lstrip": false,
29
  "rstrip": false,
30
+ "normalized": false,
31
  "special": true
32
  }
33
  ],
 
134
  "end_of_word_suffix": null,
135
  "fuse_unk": true,
136
  "byte_fallback": true,
 
137
  "vocab": {
138
  "<unk>": 0,
139
  "<s>": 1,
tokenizer_config.json CHANGED
@@ -1,6 +1,4 @@
1
  {
2
- "add_bos_token": true,
3
- "add_eos_token": false,
4
  "added_tokens_decoder": {
5
  "0": {
6
  "content": "<unk>",
 
1
  {
 
 
2
  "added_tokens_decoder": {
3
  "0": {
4
  "content": "<unk>",