internlm-7b / tokenizer_config.json
Matt
Re-add custom tokenizer
f2847d8
raw
history blame
433 Bytes
{
"add_bos_token": true,
"add_eos_token": false,
"auto_map": {
"AutoTokenizer": [
"tokenization_internlm.InternLMTokenizer",
null
]
},
"bos_token": "<s>",
"clean_up_tokenization_spaces": false,
"eos_token": "</s>",
"use_fast": false,
"legacy": true,
"model_max_length": 1000000000000000019884624838656,
"pad_token": "</s>",
"tokenizer_class": "InternLMTokenizer",
"unk_token": "<unk>"
}