deberta-v3-base-DIALOCONAN-WIKI-CLS / tokenizer_config.json
xoyeop's picture
Training in progress, epoch 1
81db6d3 verified
raw
history blame
12.5 kB
{
"added_tokens_decoder": {
"0": {
"content": "[PAD]",
"lstrip": false,
"normalized": false,
"rstrip": false,
"single_word": false,
"special": true
},
"1": {
"content": "[CLS]",
"lstrip": false,
"normalized": false,
"rstrip": false,
"single_word": false,
"special": true
},
"2": {
"content": "[SEP]",
"lstrip": false,
"normalized": false,
"rstrip": false,
"single_word": false,
"special": true
},
"3": {
"content": "[UNK]",
"lstrip": false,
"normalized": true,
"rstrip": false,
"single_word": false,
"special": true
},
"17894": {
"content": "cock",
"lstrip": false,
"normalized": true,
"rstrip": false,
"single_word": false,
"special": false
},
"40535": {
"content": "hell",
"lstrip": false,
"normalized": true,
"rstrip": false,
"single_word": false,
"special": false
},
"45389": {
"content": "dick",
"lstrip": false,
"normalized": true,
"rstrip": false,
"single_word": false,
"special": false
},
"46925": {
"content": "arse",
"lstrip": false,
"normalized": true,
"rstrip": false,
"single_word": false,
"special": false
},
"54152": {
"content": "fuck",
"lstrip": false,
"normalized": true,
"rstrip": false,
"single_word": false,
"special": false
},
"55324": {
"content": "damn",
"lstrip": false,
"normalized": true,
"rstrip": false,
"single_word": false,
"special": false
},
"81293": {
"content": "crap",
"lstrip": false,
"normalized": true,
"rstrip": false,
"single_word": false,
"special": false
},
"97295": {
"content": "dyke",
"lstrip": false,
"normalized": true,
"rstrip": false,
"single_word": false,
"special": false
},
"110592": {
"content": "bitch",
"lstrip": false,
"normalized": true,
"rstrip": false,
"single_word": false,
"special": false
},
"112806": {
"content": "fucking",
"lstrip": false,
"normalized": true,
"rstrip": false,
"single_word": false,
"special": false
},
"128000": {
"content": "[MASK]",
"lstrip": false,
"normalized": false,
"rstrip": false,
"single_word": false,
"special": true
},
"128001": {
"content": "arsehead",
"lstrip": false,
"normalized": true,
"rstrip": false,
"single_word": false,
"special": false
},
"128002": {
"content": "arsehole",
"lstrip": false,
"normalized": true,
"rstrip": false,
"single_word": false,
"special": false
},
"128003": {
"content": "ass",
"lstrip": false,
"normalized": true,
"rstrip": false,
"single_word": false,
"special": false
},
"128004": {
"content": "asshole",
"lstrip": false,
"normalized": true,
"rstrip": false,
"single_word": false,
"special": false
},
"128005": {
"content": "bastard",
"lstrip": false,
"normalized": true,
"rstrip": false,
"single_word": false,
"special": false
},
"128006": {
"content": "bloody",
"lstrip": false,
"normalized": true,
"rstrip": false,
"single_word": false,
"special": false
},
"128007": {
"content": "bollocks",
"lstrip": false,
"normalized": true,
"rstrip": false,
"single_word": false,
"special": false
},
"128008": {
"content": "brotherfucker",
"lstrip": false,
"normalized": true,
"rstrip": false,
"single_word": false,
"special": false
},
"128009": {
"content": "bugger",
"lstrip": false,
"normalized": true,
"rstrip": false,
"single_word": false,
"special": false
},
"128010": {
"content": "bullshit",
"lstrip": false,
"normalized": true,
"rstrip": false,
"single_word": false,
"special": false
},
"128011": {
"content": "child-fucker",
"lstrip": false,
"normalized": true,
"rstrip": false,
"single_word": false,
"special": false
},
"128012": {
"content": "Christ on a bike",
"lstrip": false,
"normalized": true,
"rstrip": false,
"single_word": false,
"special": false
},
"128013": {
"content": "Christ on a cracker",
"lstrip": false,
"normalized": true,
"rstrip": false,
"single_word": false,
"special": false
},
"128014": {
"content": "cocksucker",
"lstrip": false,
"normalized": true,
"rstrip": false,
"single_word": false,
"special": false
},
"128015": {
"content": "cunt",
"lstrip": false,
"normalized": true,
"rstrip": false,
"single_word": false,
"special": false
},
"128016": {
"content": "dammit",
"lstrip": false,
"normalized": true,
"rstrip": false,
"single_word": false,
"special": false
},
"128017": {
"content": "damned",
"lstrip": false,
"normalized": true,
"rstrip": false,
"single_word": false,
"special": false
},
"128018": {
"content": "damn it",
"lstrip": false,
"normalized": true,
"rstrip": false,
"single_word": false,
"special": false
},
"128019": {
"content": "dickhead",
"lstrip": false,
"normalized": true,
"rstrip": false,
"single_word": false,
"special": false
},
"128020": {
"content": "dumbass",
"lstrip": false,
"normalized": true,
"rstrip": false,
"single_word": false,
"special": false
},
"128021": {
"content": "fatherfucker",
"lstrip": false,
"normalized": true,
"rstrip": false,
"single_word": false,
"special": false
},
"128022": {
"content": "frigger",
"lstrip": false,
"normalized": true,
"rstrip": false,
"single_word": false,
"special": false
},
"128023": {
"content": "fucker",
"lstrip": false,
"normalized": true,
"rstrip": false,
"single_word": false,
"special": false
},
"128024": {
"content": "goddamn",
"lstrip": false,
"normalized": true,
"rstrip": false,
"single_word": false,
"special": false
},
"128025": {
"content": "goddamned",
"lstrip": false,
"normalized": true,
"rstrip": false,
"single_word": false,
"special": false
},
"128026": {
"content": "godsdamn",
"lstrip": false,
"normalized": true,
"rstrip": false,
"single_word": false,
"special": false
},
"128027": {
"content": "holy shit",
"lstrip": false,
"normalized": true,
"rstrip": false,
"single_word": false,
"special": false
},
"128028": {
"content": "horseshit",
"lstrip": false,
"normalized": true,
"rstrip": false,
"single_word": false,
"special": false
},
"128029": {
"content": "jackass",
"lstrip": false,
"normalized": true,
"rstrip": false,
"single_word": false,
"special": false
},
"128030": {
"content": "Jesus Christ",
"lstrip": false,
"normalized": true,
"rstrip": false,
"single_word": false,
"special": false
},
"128031": {
"content": "Jesus fuck",
"lstrip": false,
"normalized": true,
"rstrip": false,
"single_word": false,
"special": false
},
"128032": {
"content": "Jesus H. Christ",
"lstrip": false,
"normalized": true,
"rstrip": false,
"single_word": false,
"special": false
},
"128033": {
"content": "Jesus Harold Christ",
"lstrip": false,
"normalized": true,
"rstrip": false,
"single_word": false,
"special": false
},
"128034": {
"content": "Jesus, Mary and Joseph",
"lstrip": false,
"normalized": true,
"rstrip": false,
"single_word": false,
"special": false
},
"128035": {
"content": "Jesus wept",
"lstrip": false,
"normalized": true,
"rstrip": false,
"single_word": false,
"special": false
},
"128036": {
"content": "kike",
"lstrip": false,
"normalized": true,
"rstrip": false,
"single_word": false,
"special": false
},
"128037": {
"content": "motherfucker",
"lstrip": false,
"normalized": true,
"rstrip": false,
"single_word": false,
"special": false
},
"128038": {
"content": "nigga",
"lstrip": false,
"normalized": true,
"rstrip": false,
"single_word": false,
"special": false
},
"128039": {
"content": "nigra",
"lstrip": false,
"normalized": true,
"rstrip": false,
"single_word": false,
"special": false
},
"128040": {
"content": "pigfucker",
"lstrip": false,
"normalized": true,
"rstrip": false,
"single_word": false,
"special": false
},
"128041": {
"content": "piss",
"lstrip": false,
"normalized": true,
"rstrip": false,
"single_word": false,
"special": false
},
"128042": {
"content": "prick",
"lstrip": false,
"normalized": true,
"rstrip": false,
"single_word": false,
"special": false
},
"128043": {
"content": "pussy",
"lstrip": false,
"normalized": true,
"rstrip": false,
"single_word": false,
"special": false
},
"128044": {
"content": "shit",
"lstrip": false,
"normalized": true,
"rstrip": false,
"single_word": false,
"special": false
},
"128045": {
"content": "shit ass",
"lstrip": false,
"normalized": true,
"rstrip": false,
"single_word": false,
"special": false
},
"128046": {
"content": "shite",
"lstrip": false,
"normalized": true,
"rstrip": false,
"single_word": false,
"special": false
},
"128047": {
"content": "sisterfucker",
"lstrip": false,
"normalized": true,
"rstrip": false,
"single_word": false,
"special": false
},
"128048": {
"content": "slut",
"lstrip": false,
"normalized": true,
"rstrip": false,
"single_word": false,
"special": false
},
"128049": {
"content": "son of a bitch",
"lstrip": false,
"normalized": true,
"rstrip": false,
"single_word": false,
"special": false
},
"128050": {
"content": "son of a whore",
"lstrip": false,
"normalized": true,
"rstrip": false,
"single_word": false,
"special": false
},
"128051": {
"content": "spastic",
"lstrip": false,
"normalized": true,
"rstrip": false,
"single_word": false,
"special": false
},
"128052": {
"content": "sweet Jesus",
"lstrip": false,
"normalized": true,
"rstrip": false,
"single_word": false,
"special": false
},
"128053": {
"content": "twat",
"lstrip": false,
"normalized": true,
"rstrip": false,
"single_word": false,
"special": false
},
"128054": {
"content": "wanker",
"lstrip": false,
"normalized": true,
"rstrip": false,
"single_word": false,
"special": false
}
},
"bos_token": "[CLS]",
"clean_up_tokenization_spaces": true,
"cls_token": "[CLS]",
"do_lower_case": false,
"eos_token": "[SEP]",
"mask_token": "[MASK]",
"model_max_length": 1000000000000000019884624838656,
"pad_token": "[PAD]",
"sep_token": "[SEP]",
"sp_model_kwargs": {},
"split_by_punct": false,
"tokenizer_class": "DebertaV2Tokenizer",
"unk_token": "[UNK]",
"vocab_type": "spm"
}