Upload tokenizer
Browse files- tokenizer_config.json +4 -33
tokenizer_config.json
CHANGED
@@ -12976,43 +12976,14 @@
|
|
12976 |
"<|nocaptions|>",
|
12977 |
"<|notimestamps|>"
|
12978 |
],
|
12979 |
-
"bos_token":
|
12980 |
-
"__type": "AddedToken",
|
12981 |
-
"content": "<|endoftext|>",
|
12982 |
-
"lstrip": false,
|
12983 |
-
"normalized": true,
|
12984 |
-
"rstrip": false,
|
12985 |
-
"single_word": false
|
12986 |
-
},
|
12987 |
"clean_up_tokenization_spaces": true,
|
12988 |
-
"eos_token":
|
12989 |
-
"__type": "AddedToken",
|
12990 |
-
"content": "<|endoftext|>",
|
12991 |
-
"lstrip": false,
|
12992 |
-
"normalized": true,
|
12993 |
-
"rstrip": false,
|
12994 |
-
"single_word": false
|
12995 |
-
},
|
12996 |
"errors": "replace",
|
12997 |
"model_max_length": 1024,
|
12998 |
-
"pad_token":
|
12999 |
-
"__type": "AddedToken",
|
13000 |
-
"content": "<|endoftext|>",
|
13001 |
-
"lstrip": false,
|
13002 |
-
"normalized": true,
|
13003 |
-
"rstrip": false,
|
13004 |
-
"single_word": false
|
13005 |
-
},
|
13006 |
"processor_class": "WhisperProcessor",
|
13007 |
"return_attention_mask": false,
|
13008 |
-
"special_tokens_map_file": "/mnt/Disk2/huggingface/hub/models--openai--whisper-medium/snapshots/abdf7c39ab9d0397620ccaea8974cc764cd0953e/special_tokens_map.json",
|
13009 |
"tokenizer_class": "WhisperTokenizer",
|
13010 |
-
"unk_token":
|
13011 |
-
"__type": "AddedToken",
|
13012 |
-
"content": "<|endoftext|>",
|
13013 |
-
"lstrip": false,
|
13014 |
-
"normalized": true,
|
13015 |
-
"rstrip": false,
|
13016 |
-
"single_word": false
|
13017 |
-
}
|
13018 |
}
|
|
|
12976 |
"<|nocaptions|>",
|
12977 |
"<|notimestamps|>"
|
12978 |
],
|
12979 |
+
"bos_token": "<|endoftext|>",
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
12980 |
"clean_up_tokenization_spaces": true,
|
12981 |
+
"eos_token": "<|endoftext|>",
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
12982 |
"errors": "replace",
|
12983 |
"model_max_length": 1024,
|
12984 |
+
"pad_token": "<|endoftext|>",
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
12985 |
"processor_class": "WhisperProcessor",
|
12986 |
"return_attention_mask": false,
|
|
|
12987 |
"tokenizer_class": "WhisperTokenizer",
|
12988 |
+
"unk_token": "<|endoftext|>"
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
12989 |
}
|