{ "version": "1.0", "truncation": null, "padding": null, "added_tokens": [ { "id": 0, "content": "[PAD]", "single_word": false, "lstrip": false, "rstrip": false, "normalized": false, "special": true }, { "id": 1, "content": "[UNK]", "single_word": false, "lstrip": false, "rstrip": false, "normalized": false, "special": true }, { "id": 2, "content": "[CLS]", "single_word": false, "lstrip": false, "rstrip": false, "normalized": false, "special": true }, { "id": 3, "content": "[SEP]", "single_word": false, "lstrip": false, "rstrip": false, "normalized": false, "special": true }, { "id": 4, "content": "[MASK]", "single_word": false, "lstrip": false, "rstrip": false, "normalized": false, "special": true } ], "normalizer": { "type": "BertNormalizer", "clean_text": true, "handle_chinese_chars": true, "strip_accents": null, "lowercase": true }, "pre_tokenizer": { "type": "BertPreTokenizer" }, "post_processor": { "type": "TemplateProcessing", "single": [ { "SpecialToken": { "id": "[CLS]", "type_id": 0 } }, { "Sequence": { "id": "A", "type_id": 0 } }, { "SpecialToken": { "id": "[SEP]", "type_id": 0 } } ], "pair": [ { "SpecialToken": { "id": "[CLS]", "type_id": 0 } }, { "Sequence": { "id": "A", "type_id": 0 } }, { "SpecialToken": { "id": "[SEP]", "type_id": 0 } }, { "Sequence": { "id": "B", "type_id": 1 } }, { "SpecialToken": { "id": "[SEP]", "type_id": 1 } } ], "special_tokens": { "[CLS]": { "id": "[CLS]", "ids": [ 2 ], "tokens": [ "[CLS]" ] }, "[SEP]": { "id": "[SEP]", "ids": [ 3 ], "tokens": [ "[SEP]" ] } } }, "decoder": { "type": "WordPiece", "prefix": "##", "cleanup": true }, "model": { "type": "WordPiece", "unk_token": "[UNK]", "continuing_subword_prefix": "##", "max_input_chars_per_word": 100, "vocab": { "[PAD]": 0, "[UNK]": 1, "[CLS]": 2, "[SEP]": 3, "[MASK]": 4, "!": 5, ",": 6, ".": 7, "?": 8, "a": 9, "b": 10, "c": 11, "d": 12, "e": 13, "f": 14, "g": 15, "h": 16, "i": 17, "k": 18, "l": 19, "m": 20, "n": 21, "o": 22, "p": 23, "q": 24, "r": 25, "s": 26, "t": 27, "u": 28, "v": 29, "w": 30, "x": 31, "y": 32, "z": 33, "##l": 34, "##e": 35, "##p": 36, "##i": 37, "##s": 38, "##t": 39, "##r": 40, "##f": 41, "##w": 42, "##a": 43, "##o": 44, "##u": 45, "##n": 46, "##d": 47, "##g": 48, "##h": 49, "##v": 50, "##k": 51, "##m": 52, "##z": 53, "##y": 54, "##c": 55, "##b": 56, "##x": 57, "th": 58, "##ou": 59, "##re": 60, "the": 61, "##nd": 62, "##is": 63, "##es": 64, "##er": 65, "my": 66, "##or": 67, "##ve": 68, "ha": 69, "##ll": 70, "##it": 71, "to": 72, "##nt": 73, "and": 74, "no": 75, "##ed": 76, "mo": 77, "##st": 78, "##at": 79, "in": 80, "thou": 81, "##ea": 82, "##in": 83, "##me": 84, "co": 85, "of": 86, "##ir": 87, "wh": 88, "##el": 89, "##on": 90, "not": 91, "is": 92, "wi": 93, "##ee": 94, "as": 95, "##ld": 96, "##th": 97, "##ra": 98, "most": 99, "for": 100, "##ri": 101, "will": 102, "be": 103, "ca": 104, "me": 105, "so": 106, "sh": 107, "##la": 108, "##en": 109, "##se": 110, "##ro": 111, "##ch": 112, "thy": 113, "##est": 114, "have": 115, "what": 116, "are": 117, "do": 118, "it": 119, "li": 120, "sp": 121, "you": 122, "##il": 123, "##ty": 124, "##ar": 125, "##ay": 126, "##ke": 127, "this": 128, "thee": 129, "##ear": 130, "come": 131, "##irit": 132, "spirit": 133, "ch": 134, "go": 135, "ho": 136, "his": 137, "lo": 138, "##le": 139, "##ly": 140, "##ut": 141, "##ith": 142, "more": 143, "##ment": 144, "all": 145, "ba": 146, "but": 147, "de": 148, "st": 149, "see": 150, "we": 151, "wor": 152, "with": 153, "##ic": 154, "##wn": 155, "##an": 156, "##ake": 157, "##ul": 158, "##gh": 159, "##mp": 160, "##ber": 161, "that": 162, "##ould": 163, "##ist": 164, "now": 165, "##ing": 166, "##eep": 167, "am": 168, "ari": 169, "ex": 170, "he": 171, "po": 172, "per": 173, "re": 174, "sou": 175, "ser": 176, "sla": 177, "say": 178, "tis": 179, "wa": 180, "##et": 181, "##ement": 182, "##id": 183, "##for": 184, "##ful": 185, "##od": 186, "##un": 187, "##ure": 188, "##nst": 189, "##ge": 190, "##ves": 191, "##ms": 192, "##med": 193, "##ct": 194, "##ous": 195, "##our": 196, "they": 197, "##ish": 198, "##ess": 199, "##nter": 200, "##eas": 201, "##rom": 202, "liber": 203, "char": 204, "ariel": 205, "serv": 206, "slave": 207, "liberty": 208, "ad": 209, "br": 210, "bo": 211, "by": 212, "bes": 213, "bra": 214, "bear": 215, "du": 216, "en": 217, "ear": 218, "fre": 219, "fir": 220, "fri": 221, "fly": 222, "fet": 223, "from": 224, "gre": 225, "gra": 226, "hon": 227, "kn": 228, "king": 229, "make": 230, "mist": 231, "ne": 232, "ou": 233, "on": 234, "own": 235, "pl": 236, "qu": 237, "rel": 238, "sl": 239, "sw": 240, "sa": 241, "su": 242, "sen": 243, "vi": 244, "would": 245, "wish": 246, "##li": 247, "##em": 248, "##ere": 249, "##end": 250, "##ip": 251, "##ion": 252, "##igh": 253, "##ss": 254, "##ses": 255, "##te": 256, "##tun": 257, "##fe": 258, "##wer": 259, "##and": 260, "##all": 261, "##ant": 262, "##ow": 263, "##ore": 264, "##ue": 265, "##us": 266, "##ver": 267, "##vil": 268, "##ck": 269, "##cit": 270, "##ban": 271, "##ress": 272, "them": 273, "there": 274, "##nds": 275, "##ven": 276, "has": 277, "hast": 278, "hath": 279, "##ease": 280, "cont": 281, "comp": 282, "##elf": 283, "##one": 284, "forth": 285, "canst": 286, "cali": 287, "mere": 288, "ship": 289, "shall": 290, "##lain": 291, "like": 292, "your": 293, "good": 294, "how": 295, "love": 296, "##lete": 297, "bad": 298, "devil": 299 } } }