sentencepiece_ja / tokenizer_config.json
if001's picture
fix
e254bed
raw
history blame
348 Bytes
{
"bos_token": "<BOS>",
"eos_token": "<EOS>",
"mask_token": "<MASK>",
"model_max_length": 1000000000000000019884624838656,
"pad_token": "<PAD>",
"unk_token": "<UNK>",
"clean_up_tokenization_spaces": true,
"tokenizer_class": "SentencePieceJA",
"auto_map": {
"AutoTokenizer": ["","sentencepiece_ja.SentencePieceJA"]
}
}