{ | |
"version": "1.0", | |
"truncation": null, | |
"padding": null, | |
"added_tokens": [ | |
{ | |
"id": 0, | |
"content": "[PAD]", | |
"single_word": false, | |
"lstrip": false, | |
"rstrip": false, | |
"normalized": false, | |
"special": true | |
}, | |
{ | |
"id": 1, | |
"content": "[UNK]", | |
"single_word": false, | |
"lstrip": false, | |
"rstrip": false, | |
"normalized": false, | |
"special": true | |
}, | |
{ | |
"id": 2, | |
"content": "[CLS]", | |
"single_word": false, | |
"lstrip": false, | |
"rstrip": false, | |
"normalized": false, | |
"special": true | |
}, | |
{ | |
"id": 3, | |
"content": "[SEP]", | |
"single_word": false, | |
"lstrip": false, | |
"rstrip": false, | |
"normalized": false, | |
"special": true | |
}, | |
{ | |
"id": 4, | |
"content": "[MASK]", | |
"single_word": false, | |
"lstrip": false, | |
"rstrip": false, | |
"normalized": false, | |
"special": true | |
}, | |
{ | |
"id": 500, | |
"content": ".", | |
"single_word": false, | |
"lstrip": false, | |
"rstrip": false, | |
"normalized": true, | |
"special": false | |
}, | |
{ | |
"id": 501, | |
"content": ",", | |
"single_word": false, | |
"lstrip": false, | |
"rstrip": false, | |
"normalized": true, | |
"special": false | |
}, | |
{ | |
"id": 502, | |
"content": "!", | |
"single_word": false, | |
"lstrip": false, | |
"rstrip": false, | |
"normalized": true, | |
"special": false | |
}, | |
{ | |
"id": 503, | |
"content": "?", | |
"single_word": false, | |
"lstrip": false, | |
"rstrip": false, | |
"normalized": true, | |
"special": false | |
}, | |
{ | |
"id": 504, | |
"content": "-", | |
"single_word": false, | |
"lstrip": false, | |
"rstrip": false, | |
"normalized": true, | |
"special": false | |
}, | |
{ | |
"id": 505, | |
"content": ":", | |
"single_word": false, | |
"lstrip": false, | |
"rstrip": false, | |
"normalized": true, | |
"special": false | |
}, | |
{ | |
"id": 506, | |
"content": ";", | |
"single_word": false, | |
"lstrip": false, | |
"rstrip": false, | |
"normalized": true, | |
"special": false | |
}, | |
{ | |
"id": 507, | |
"content": "/", | |
"single_word": false, | |
"lstrip": false, | |
"rstrip": false, | |
"normalized": true, | |
"special": false | |
}, | |
{ | |
"id": 508, | |
"content": "(", | |
"single_word": false, | |
"lstrip": false, | |
"rstrip": false, | |
"normalized": true, | |
"special": false | |
}, | |
{ | |
"id": 509, | |
"content": ")", | |
"single_word": false, | |
"lstrip": false, | |
"rstrip": false, | |
"normalized": true, | |
"special": false | |
}, | |
{ | |
"id": 510, | |
"content": "'", | |
"single_word": false, | |
"lstrip": false, | |
"rstrip": false, | |
"normalized": true, | |
"special": false | |
}, | |
{ | |
"id": 511, | |
"content": "\"", | |
"single_word": false, | |
"lstrip": false, | |
"rstrip": false, | |
"normalized": true, | |
"special": false | |
}, | |
{ | |
"id": 512, | |
"content": "...", | |
"single_word": false, | |
"lstrip": false, | |
"rstrip": false, | |
"normalized": true, | |
"special": false | |
}, | |
{ | |
"id": 513, | |
"content": "0", | |
"single_word": false, | |
"lstrip": false, | |
"rstrip": false, | |
"normalized": true, | |
"special": false | |
}, | |
{ | |
"id": 514, | |
"content": "1", | |
"single_word": false, | |
"lstrip": false, | |
"rstrip": false, | |
"normalized": true, | |
"special": false | |
}, | |
{ | |
"id": 515, | |
"content": "2", | |
"single_word": false, | |
"lstrip": false, | |
"rstrip": false, | |
"normalized": true, | |
"special": false | |
}, | |
{ | |
"id": 516, | |
"content": "3", | |
"single_word": false, | |
"lstrip": false, | |
"rstrip": false, | |
"normalized": true, | |
"special": false | |
}, | |
{ | |
"id": 517, | |
"content": "4", | |
"single_word": false, | |
"lstrip": false, | |
"rstrip": false, | |
"normalized": true, | |
"special": false | |
}, | |
{ | |
"id": 518, | |
"content": "5", | |
"single_word": false, | |
"lstrip": false, | |
"rstrip": false, | |
"normalized": true, | |
"special": false | |
}, | |
{ | |
"id": 519, | |
"content": "6", | |
"single_word": false, | |
"lstrip": false, | |
"rstrip": false, | |
"normalized": true, | |
"special": false | |
}, | |
{ | |
"id": 520, | |
"content": "7", | |
"single_word": false, | |
"lstrip": false, | |
"rstrip": false, | |
"normalized": true, | |
"special": false | |
}, | |
{ | |
"id": 521, | |
"content": "8", | |
"single_word": false, | |
"lstrip": false, | |
"rstrip": false, | |
"normalized": true, | |
"special": false | |
}, | |
{ | |
"id": 522, | |
"content": "9", | |
"single_word": false, | |
"lstrip": false, | |
"rstrip": false, | |
"normalized": true, | |
"special": false | |
} | |
], | |
"normalizer": { | |
"type": "BertNormalizer", | |
"clean_text": true, | |
"handle_chinese_chars": true, | |
"strip_accents": null, | |
"lowercase": false | |
}, | |
"pre_tokenizer": { | |
"type": "BertPreTokenizer" | |
}, | |
"post_processor": { | |
"type": "TemplateProcessing", | |
"single": [ | |
{ | |
"SpecialToken": { | |
"id": "[CLS]", | |
"type_id": 0 | |
} | |
}, | |
{ | |
"Sequence": { | |
"id": "A", | |
"type_id": 0 | |
} | |
}, | |
{ | |
"SpecialToken": { | |
"id": "[SEP]", | |
"type_id": 0 | |
} | |
} | |
], | |
"pair": [ | |
{ | |
"SpecialToken": { | |
"id": "[CLS]", | |
"type_id": 0 | |
} | |
}, | |
{ | |
"Sequence": { | |
"id": "A", | |
"type_id": 0 | |
} | |
}, | |
{ | |
"SpecialToken": { | |
"id": "[SEP]", | |
"type_id": 0 | |
} | |
}, | |
{ | |
"Sequence": { | |
"id": "B", | |
"type_id": 1 | |
} | |
}, | |
{ | |
"SpecialToken": { | |
"id": "[SEP]", | |
"type_id": 1 | |
} | |
} | |
], | |
"special_tokens": { | |
"[CLS]": { | |
"id": "[CLS]", | |
"ids": [ | |
2 | |
], | |
"tokens": [ | |
"[CLS]" | |
] | |
}, | |
"[SEP]": { | |
"id": "[SEP]", | |
"ids": [ | |
3 | |
], | |
"tokens": [ | |
"[SEP]" | |
] | |
} | |
} | |
}, | |
"decoder": { | |
"type": "WordPiece", | |
"prefix": "##", | |
"cleanup": true | |
}, | |
"model": { | |
"type": "WordPiece", | |
"unk_token": "[UNK]", | |
"continuing_subword_prefix": "##", | |
"max_input_chars_per_word": 100, | |
"vocab": { | |
"[PAD]": 0, | |
"[UNK]": 1, | |
"[CLS]": 2, | |
"[SEP]": 3, | |
"[MASK]": 4, | |
"": 5, | |
"ஂ": 6, | |
"ஃ": 7, | |
"அ": 8, | |
"ஆ": 9, | |
"இ": 10, | |
"ஈ": 11, | |
"உ": 12, | |
"ஊ": 13, | |
"": 14, | |
"எ": 15, | |
"ஏ": 16, | |
"ஐ": 17, | |
"": 18, | |
"ஒ": 19, | |
"ஓ": 20, | |
"ஔ": 21, | |
"க": 22, | |
"": 23, | |
"": 24, | |
"ங": 25, | |
"ச": 26, | |
"ஜ": 27, | |
"ஞ": 28, | |
"ட": 29, | |
"": 30, | |
"": 31, | |
"": 32, | |
"ண": 33, | |
"த": 34, | |
"": 35, | |
"": 36, | |
"": 37, | |
"ந": 38, | |
"ன": 39, | |
"ப": 40, | |
"": 41, | |
"": 42, | |
"": 43, | |
"ம": 44, | |
"ய": 45, | |
"ர": 46, | |
"ற": 47, | |
"ல": 48, | |
"ள": 49, | |
"ழ": 50, | |
"வ": 51, | |
"ஶ": 52, | |
"ஷ": 53, | |
"ஸ": 54, | |
"ஹ": 55, | |
"": 56, | |
"ா": 57, | |
"ி": 58, | |
"ீ": 59, | |
"ு": 60, | |
"ூ": 61, | |
"": 62, | |
"ெ": 63, | |
"ே": 64, | |
"ை": 65, | |
"": 66, | |
"ொ": 67, | |
"ோ": 68, | |
"ௌ": 69, | |
"்": 70, | |
"ௐ": 71, | |
"ௗ": 72, | |
"": 73, | |
"௦": 74, | |
"௧": 75, | |
"௨": 76, | |
"௩": 77, | |
"௪": 78, | |
"௫": 79, | |
"௬": 80, | |
"௭": 81, | |
"௮": 82, | |
"௯": 83, | |
"௰": 84, | |
"௱": 85, | |
"௲": 86, | |
"௳": 87, | |
"௴": 88, | |
"௵": 89, | |
"௶": 90, | |
"௷": 91, | |
"௸": 92, | |
"௹": 93, | |
"௺": 94, | |
"": 95, | |
"": 96, | |
"##ல": 97, | |
"##ை": 98, | |
"##க": 99, | |
"##்": 100, | |
"##ு": 101, | |
"##த": 102, | |
"##ர": 103, | |
"##வ": 104, | |
"##ே": 105, | |
"##ற": 106, | |
"##ம": 107, | |
"##ப": 108, | |
"##ன": 109, | |
"##ட": 110, | |
"##ி": 111, | |
"##ா": 112, | |
"##ச": 113, | |
"##ூ": 114, | |
"##ழ": 115, | |
"##ந": 116, | |
"##ோ": 117, | |
"##ொ": 118, | |
"##ெ": 119, | |
"##ள": 120, | |
"##ங": 121, | |
"##ய": 122, | |
"##ஞ": 123, | |
"##ண": 124, | |
"##ஸ": 125, | |
"##ஜ": 126, | |
"##ஷ": 127, | |
"##ீ": 128, | |
"##ஹ": 129, | |
"##உ": 130, | |
"##ஃ": 131, | |
"##அ": 132, | |
"##ஓ": 133, | |
"##எ": 134, | |
"##ஆ": 135, | |
"##ஊ": 136, | |
"##இ": 137, | |
"##ௌ": 138, | |
"##ஏ": 139, | |
"##ஒ": 140, | |
"##ஐ": 141, | |
"##௫": 142, | |
"##ஶ": 143, | |
"##௯": 144, | |
"##": 145, | |
"##ஈ": 146, | |
"##": 147, | |
"##ஔ": 148, | |
"##௦": 149, | |
"##௧": 150, | |
"##௰": 151, | |
"##௪": 152, | |
"##ஂ": 153, | |
"##௱": 154, | |
"##ௗ": 155, | |
"##௬": 156, | |
"##": 157, | |
"##": 158, | |
"##௩": 159, | |
"##": 160, | |
"##ௐ": 161, | |
"##௲": 162, | |
"##௭": 163, | |
"##": 164, | |
"##௮": 165, | |
"##௨": 166, | |
"##": 167, | |
"##௵": 168, | |
"##": 169, | |
"##": 170, | |
"##௶": 171, | |
"##": 172, | |
"##௹": 173, | |
"##௸": 174, | |
"##": 175, | |
"##௴": 176, | |
"##": 177, | |
"##௳": 178, | |
"##": 179, | |
"##": 180, | |
"##": 181, | |
"##": 182, | |
"##": 183, | |
"##": 184, | |
"##்க": 185, | |
"##்த": 186, | |
"##ம்": 187, | |
"##ன்": 188, | |
"##ல்": 189, | |
"##க்க": 190, | |
"##்ட": 191, | |
"##ப்": 192, | |
"##த்த": 193, | |
"##ள்": 194, | |
"##ும்": 195, | |
"##ர்": 196, | |
"##ிய": 197, | |
"##ப்ப": 198, | |
"##ரு": 199, | |
"##ந்த": 200, | |
"##ட்ட": 201, | |
"##து": 202, | |
"##ில்": 203, | |
"##ங்க": 204, | |
"##ைய": 205, | |
"##ற்": 206, | |
"##ின்": 207, | |
"##ாக": 208, | |
"##று": 209, | |
"##ிர": 210, | |
"##டு": 211, | |
"##ிக": 212, | |
"##ண்ட": 213, | |
"##்ச": 214, | |
"##க்கு": 215, | |
"##ர்க": 216, | |
"##ிற": 217, | |
"##ில": 218, | |
"மு": 219, | |
"##ான": 220, | |
"##த்து": 221, | |
"செ": 222, | |
"என்": 223, | |
"##டி": 224, | |
"வி": 225, | |
"##லை": 226, | |
"##ற்ற": 227, | |
"##ள்ள": 228, | |
"##ார": 229, | |
"##தி": 230, | |
"##ார்": 231, | |
"##ப்பு": 232, | |
"##ிரு": 233, | |
"##வு": 234, | |
"##ட்டு": 235, | |
"##ல்ல": 236, | |
"##ரி": 237, | |
"##வி": 238, | |
"##க்": 239, | |
"கு": 240, | |
"##ான்": 241, | |
"##ந்து": 242, | |
"##ால்": 243, | |
"##ளை": 244, | |
"##ய்": 245, | |
"##ச்ச": 246, | |
"கொ": 247, | |
"##த்": 248, | |
"போ": 249, | |
"இரு": 250, | |
"##னை": 251, | |
"அவ": 252, | |
"கா": 253, | |
"##ர்கள்": 254, | |
"##ங்கள்": 255, | |
"பெ": 256, | |
"##ண்": 257, | |
"##ம்ப": 258, | |
"##றி": 259, | |
"##ஸ்": 260, | |
"##ாத": 261, | |
"##மி": 262, | |
"பு": 263, | |
"##கள்": 264, | |
"##கு": 265, | |
"##ாவ": 266, | |
"##மை": 267, | |
"##ளு": 268, | |
"வே": 269, | |
"ஒரு": 270, | |
"##க்கும்": 271, | |
"##ின": 272, | |
"##ழு": 273, | |
"பா": 274, | |
"அத": 275, | |
"தொ": 276, | |
"இந்த": 277, | |
"வெ": 278, | |
"##ண்டு": 279, | |
"##ாம்": 280, | |
"வா": 281, | |
"##ற்க": 282, | |
"##த்தில்": 283, | |
"##டை": 284, | |
"##ன்ன": 285, | |
"செய": 286, | |
"##ன்ற": 287, | |
"##ழ்": 288, | |
"##மா": 289, | |
"##ிக்க": 290, | |
"##டிய": 291, | |
"நா": 292, | |
"மா": 293, | |
"##ச்": 294, | |
"##ரை": 295, | |
"##ரா": 296, | |
"##வா": 297, | |
"##ரிய": 298, | |
"##தை": 299, | |
"##ையில்": 300, | |
"##ட்": 301, | |
"##ளி": 302, | |
"கூ": 303, | |
"பொ": 304, | |
"##வே": 305, | |
"சு": 306, | |
"##ால": 307, | |
"##்த்த": 308, | |
"தமி": 309, | |
"மே": 310, | |
"என": 311, | |
"##றை": 312, | |
"தே": 313, | |
"சொ": 314, | |
"பிர": 315, | |
"##ங்கள": 316, | |
"##வை": 317, | |
"##ாம": 318, | |
"சி": 319, | |
"##ப்பட்ட": 320, | |
"##ற்ப": 321, | |
"##ையும்": 322, | |
"##மாக": 323, | |
"நி": 324, | |
"##மு": 325, | |
"##ண்ண": 326, | |
"பே": 327, | |
"##த்தை": 328, | |
"##கிற": 329, | |
"##திய": 330, | |
"##ளுக்கு": 331, | |
"தெ": 332, | |
"என்று": 333, | |
"##ட்ச": 334, | |
"கோ": 335, | |
"நீ": 336, | |
"செய்த": 337, | |
"##ிகள்": 338, | |
"##வர்": 339, | |
"##னி": 340, | |
"##மான": 341, | |
"##பு": 342, | |
"என்ற": 343, | |
"##வும்": 344, | |
"##சு": 345, | |
"##ன்று": 346, | |
"##டுத்த": 347, | |
"##னு": 348, | |
"##கள": 349, | |
"##டன்": 350, | |
"மற்ற": 351, | |
"##லி": 352, | |
"##்கள்": 353, | |
"##ர்கள": 354, | |
"உள்ள": 355, | |
"##ரும்": 356, | |
"பகு": 357, | |
"##சி": 358, | |
"##ற்று": 359, | |
"##ப்பட": 360, | |
"##ாள": 361, | |
"அர": 362, | |
"செய்": 363, | |
"பி": 364, | |
"இத": 365, | |
"##வத": 366, | |
"##ணி": 367, | |
"##வில்": 368, | |
"##ின்ற": 369, | |
"##ழி": 370, | |
"##ாய": 371, | |
"கே": 372, | |
"##க்கிற": 373, | |
"என்ப": 374, | |
"##ேன்": 375, | |
"நட": 376, | |
"து": 377, | |
"கி": 378, | |
"##்கு": 379, | |
"##சிய": 380, | |
"##னர்": 381, | |
"திரு": 382, | |
"##ஞ்ச": 383, | |
"மற்றும்": 384, | |
"##டைய": 385, | |
"##ண்டும்": 386, | |
"##ிக்": 387, | |
"தொட": 388, | |
"வை": 389, | |
"##பா": 390, | |
"முத": 391, | |
"##கம்": 392, | |
"##டம்": 393, | |
"மூ": 394, | |
"##ங்கு": 395, | |
"##லா": 396, | |
"கரு": 397, | |
"சே": 398, | |
"##ியா": 399, | |
"பய": 400, | |
"செய்ய": 401, | |
"வீ": 402, | |
"பல": 403, | |
"ஆக": 404, | |
"##மைய": 405, | |
"வரு": 406, | |
"##வர": 407, | |
"##ட்டி": 408, | |
"மீ": 409, | |
"##களை": 410, | |
"##னால்": 411, | |
"##வ்": 412, | |
"##ப்பா": 413, | |
"##ளிய": 414, | |
"இது": 415, | |
"எழு": 416, | |
"இருந்த": 417, | |
"பகுப்பு": 418, | |
"அறி": 419, | |
"தி": 420, | |
"தமிழ்": 421, | |
"##ப்பி": 422, | |
"நில": 423, | |
"##மே": 424, | |
"அந்த": 425, | |
"##ும்ப": 426, | |
"வர": 427, | |
"பதி": 428, | |
"##ப்போ": 429, | |
"##க்கிய": 430, | |
"நே": 431, | |
"தலை": 432, | |
"தமிழ": 433, | |
"##கை": 434, | |
"##ணை": 435, | |
"##லாம்": 436, | |
"பார": 437, | |
"##த்தின்": 438, | |
"##விய": 439, | |
"வழ": 440, | |
"##ிருந்த": 441, | |
"##டுத்து": 442, | |
"கொண்ட": 443, | |
"##ர்க்க": 444, | |
"##ம்பர்": 445, | |
"குறி": 446, | |
"##ையை": 447, | |
"கட": 448, | |
"என்ன": 449, | |
"##ூர்": 450, | |
"அமை": 451, | |
"##ற்கு": 452, | |
"##ரம்": 453, | |
"##லு": 454, | |
"##ன்ப": 455, | |
"##நா": 456, | |
"##கிறது": 457, | |
"##ிலும்": 458, | |
"தீ": 459, | |
"##ழை": 460, | |
"##க்கள்": 461, | |
"##வது": 462, | |
"##லம்": 463, | |
"##ங்களை": 464, | |
"##ார்கள்": 465, | |
"வெளிய": 466, | |
"இய": 467, | |
"##ிகள": 468, | |
"இர": 469, | |
"##ற்றி": 470, | |
"##யர்": 471, | |
"##ணம்": 472, | |
"சம": 473, | |
"##ங்கில": 474, | |
"சா": 475, | |
"##த்திய": 476, | |
"சொல்ல": 477, | |
"##க்கி": 478, | |
"அதிக": 479, | |
"வேண்டும்": 480, | |
"##ாது": 481, | |
"##னா": 482, | |
"பத": 483, | |
"நான்": 484, | |
"அல்ல": 485, | |
"இல்": 486, | |
"பின்": 487, | |
"இல": 488, | |
"##ரோ": 489, | |
"##பி": 490, | |
"சிற": 491, | |
"திர": 492, | |
"##க்கம்": 493, | |
"##ஸ்ட": 494, | |
"வந்த": 495, | |
"##போ": 496, | |
"##ிற்கு": 497, | |
"##டிக்க": 498, | |
"பிற": 499 | |
} | |
} | |
} |