KevinGeng commited on
Commit
70959a3
1 Parent(s): e94723a

Upload tokenizer

Browse files
Files changed (1) hide show
  1. tokenizer_config.json +4 -33
tokenizer_config.json CHANGED
@@ -12976,43 +12976,14 @@
12976
  "<|nocaptions|>",
12977
  "<|notimestamps|>"
12978
  ],
12979
- "bos_token": {
12980
- "__type": "AddedToken",
12981
- "content": "<|endoftext|>",
12982
- "lstrip": false,
12983
- "normalized": true,
12984
- "rstrip": false,
12985
- "single_word": false
12986
- },
12987
  "clean_up_tokenization_spaces": true,
12988
- "eos_token": {
12989
- "__type": "AddedToken",
12990
- "content": "<|endoftext|>",
12991
- "lstrip": false,
12992
- "normalized": true,
12993
- "rstrip": false,
12994
- "single_word": false
12995
- },
12996
  "errors": "replace",
12997
  "model_max_length": 1024,
12998
- "pad_token": {
12999
- "__type": "AddedToken",
13000
- "content": "<|endoftext|>",
13001
- "lstrip": false,
13002
- "normalized": true,
13003
- "rstrip": false,
13004
- "single_word": false
13005
- },
13006
  "processor_class": "WhisperProcessor",
13007
  "return_attention_mask": false,
13008
- "special_tokens_map_file": "/mnt/Disk2/huggingface/hub/models--openai--whisper-medium/snapshots/abdf7c39ab9d0397620ccaea8974cc764cd0953e/special_tokens_map.json",
13009
  "tokenizer_class": "WhisperTokenizer",
13010
- "unk_token": {
13011
- "__type": "AddedToken",
13012
- "content": "<|endoftext|>",
13013
- "lstrip": false,
13014
- "normalized": true,
13015
- "rstrip": false,
13016
- "single_word": false
13017
- }
13018
  }
 
12976
  "<|nocaptions|>",
12977
  "<|notimestamps|>"
12978
  ],
12979
+ "bos_token": "<|endoftext|>",
 
 
 
 
 
 
 
12980
  "clean_up_tokenization_spaces": true,
12981
+ "eos_token": "<|endoftext|>",
 
 
 
 
 
 
 
12982
  "errors": "replace",
12983
  "model_max_length": 1024,
12984
+ "pad_token": "<|endoftext|>",
 
 
 
 
 
 
 
12985
  "processor_class": "WhisperProcessor",
12986
  "return_attention_mask": false,
 
12987
  "tokenizer_class": "WhisperTokenizer",
12988
+ "unk_token": "<|endoftext|>"
 
 
 
 
 
 
 
12989
  }