{ "version": "1.0", "truncation": null, "padding": null, "added_tokens": [ { "id": 0, "content": "[PAD]", "single_word": false, "lstrip": false, "rstrip": false, "normalized": false, "special": true }, { "id": 1, "content": "[UNK]", "single_word": false, "lstrip": false, "rstrip": false, "normalized": false, "special": true }, { "id": 2, "content": "[CLS]", "single_word": false, "lstrip": false, "rstrip": false, "normalized": false, "special": true }, { "id": 3, "content": "[SEP]", "single_word": false, "lstrip": false, "rstrip": false, "normalized": false, "special": true }, { "id": 4, "content": "[MASK]", "single_word": false, "lstrip": false, "rstrip": false, "normalized": false, "special": true }, { "id": 500, "content": ".", "single_word": false, "lstrip": false, "rstrip": false, "normalized": true, "special": false }, { "id": 501, "content": ",", "single_word": false, "lstrip": false, "rstrip": false, "normalized": true, "special": false }, { "id": 502, "content": "!", "single_word": false, "lstrip": false, "rstrip": false, "normalized": true, "special": false }, { "id": 503, "content": "?", "single_word": false, "lstrip": false, "rstrip": false, "normalized": true, "special": false }, { "id": 504, "content": "-", "single_word": false, "lstrip": false, "rstrip": false, "normalized": true, "special": false }, { "id": 505, "content": ":", "single_word": false, "lstrip": false, "rstrip": false, "normalized": true, "special": false }, { "id": 506, "content": ";", "single_word": false, "lstrip": false, "rstrip": false, "normalized": true, "special": false }, { "id": 507, "content": "/", "single_word": false, "lstrip": false, "rstrip": false, "normalized": true, "special": false }, { "id": 508, "content": "(", "single_word": false, "lstrip": false, "rstrip": false, "normalized": true, "special": false }, { "id": 509, "content": ")", "single_word": false, "lstrip": false, "rstrip": false, "normalized": true, "special": false }, { "id": 510, "content": "'", "single_word": false, "lstrip": false, "rstrip": false, "normalized": true, "special": false }, { "id": 511, "content": "\"", "single_word": false, "lstrip": false, "rstrip": false, "normalized": true, "special": false }, { "id": 512, "content": "...", "single_word": false, "lstrip": false, "rstrip": false, "normalized": true, "special": false }, { "id": 513, "content": "0", "single_word": false, "lstrip": false, "rstrip": false, "normalized": true, "special": false }, { "id": 514, "content": "1", "single_word": false, "lstrip": false, "rstrip": false, "normalized": true, "special": false }, { "id": 515, "content": "2", "single_word": false, "lstrip": false, "rstrip": false, "normalized": true, "special": false }, { "id": 516, "content": "3", "single_word": false, "lstrip": false, "rstrip": false, "normalized": true, "special": false }, { "id": 517, "content": "4", "single_word": false, "lstrip": false, "rstrip": false, "normalized": true, "special": false }, { "id": 518, "content": "5", "single_word": false, "lstrip": false, "rstrip": false, "normalized": true, "special": false }, { "id": 519, "content": "6", "single_word": false, "lstrip": false, "rstrip": false, "normalized": true, "special": false }, { "id": 520, "content": "7", "single_word": false, "lstrip": false, "rstrip": false, "normalized": true, "special": false }, { "id": 521, "content": "8", "single_word": false, "lstrip": false, "rstrip": false, "normalized": true, "special": false }, { "id": 522, "content": "9", "single_word": false, "lstrip": false, "rstrip": false, "normalized": true, "special": false } ], "normalizer": { "type": "BertNormalizer", "clean_text": true, "handle_chinese_chars": true, "strip_accents": null, "lowercase": false }, "pre_tokenizer": { "type": "BertPreTokenizer" }, "post_processor": { "type": "TemplateProcessing", "single": [ { "SpecialToken": { "id": "[CLS]", "type_id": 0 } }, { "Sequence": { "id": "A", "type_id": 0 } }, { "SpecialToken": { "id": "[SEP]", "type_id": 0 } } ], "pair": [ { "SpecialToken": { "id": "[CLS]", "type_id": 0 } }, { "Sequence": { "id": "A", "type_id": 0 } }, { "SpecialToken": { "id": "[SEP]", "type_id": 0 } }, { "Sequence": { "id": "B", "type_id": 1 } }, { "SpecialToken": { "id": "[SEP]", "type_id": 1 } } ], "special_tokens": { "[CLS]": { "id": "[CLS]", "ids": [ 2 ], "tokens": [ "[CLS]" ] }, "[SEP]": { "id": "[SEP]", "ids": [ 3 ], "tokens": [ "[SEP]" ] } } }, "decoder": { "type": "WordPiece", "prefix": "##", "cleanup": true }, "model": { "type": "WordPiece", "unk_token": "[UNK]", "continuing_subword_prefix": "##", "max_input_chars_per_word": 100, "vocab": { "[PAD]": 0, "[UNK]": 1, "[CLS]": 2, "[SEP]": 3, "[MASK]": 4, "஁": 5, "ஂ": 6, "ஃ": 7, "அ": 8, "ஆ": 9, "இ": 10, "ஈ": 11, "உ": 12, "ஊ": 13, "஋": 14, "எ": 15, "ஏ": 16, "ஐ": 17, "஑": 18, "ஒ": 19, "ஓ": 20, "ஔ": 21, "க": 22, "஖": 23, "஗": 24, "ங": 25, "ச": 26, "ஜ": 27, "ஞ": 28, "ட": 29, "஠": 30, "஡": 31, "஢": 32, "ண": 33, "த": 34, "஥": 35, "஦": 36, "஧": 37, "ந": 38, "ன": 39, "ப": 40, "஫": 41, "஬": 42, "஭": 43, "ம": 44, "ய": 45, "ர": 46, "ற": 47, "ல": 48, "ள": 49, "ழ": 50, "வ": 51, "ஶ": 52, "ஷ": 53, "ஸ": 54, "ஹ": 55, "஽": 56, "ா": 57, "ி": 58, "ீ": 59, "ு": 60, "ூ": 61, "௃": 62, "ெ": 63, "ே": 64, "ை": 65, "௉": 66, "ொ": 67, "ோ": 68, "ௌ": 69, "்": 70, "ௐ": 71, "ௗ": 72, "௠": 73, "௦": 74, "௧": 75, "௨": 76, "௩": 77, "௪": 78, "௫": 79, "௬": 80, "௭": 81, "௮": 82, "௯": 83, "௰": 84, "௱": 85, "௲": 86, "௳": 87, "௴": 88, "௵": 89, "௶": 90, "௷": 91, "௸": 92, "௹": 93, "௺": 94, "௼": 95, "௿": 96, "##ல": 97, "##ை": 98, "##க": 99, "##்": 100, "##ு": 101, "##த": 102, "##ர": 103, "##வ": 104, "##ே": 105, "##ற": 106, "##ம": 107, "##ப": 108, "##ன": 109, "##ட": 110, "##ி": 111, "##ா": 112, "##ச": 113, "##ூ": 114, "##ழ": 115, "##ந": 116, "##ோ": 117, "##ொ": 118, "##ெ": 119, "##ள": 120, "##ங": 121, "##ய": 122, "##ஞ": 123, "##ண": 124, "##ஸ": 125, "##ஜ": 126, "##ஷ": 127, "##ீ": 128, "##ஹ": 129, "##உ": 130, "##ஃ": 131, "##அ": 132, "##ஓ": 133, "##எ": 134, "##ஆ": 135, "##ஊ": 136, "##இ": 137, "##ௌ": 138, "##ஏ": 139, "##ஒ": 140, "##ஐ": 141, "##௫": 142, "##ஶ": 143, "##௯": 144, "##஑": 145, "##ஈ": 146, "##஢": 147, "##ஔ": 148, "##௦": 149, "##௧": 150, "##௰": 151, "##௪": 152, "##ஂ": 153, "##௱": 154, "##ௗ": 155, "##௬": 156, "##஡": 157, "##஭": 158, "##௩": 159, "##௿": 160, "##ௐ": 161, "##௲": 162, "##௭": 163, "##஧": 164, "##௮": 165, "##௨": 166, "##௃": 167, "##௵": 168, "##஦": 169, "##஬": 170, "##௶": 171, "##஽": 172, "##௹": 173, "##௸": 174, "##஖": 175, "##௴": 176, "##௉": 177, "##௳": 178, "##஫": 179, "##௠": 180, "##஠": 181, "##௼": 182, "##஁": 183, "##஥": 184, "##்க": 185, "##்த": 186, "##ம்": 187, "##ன்": 188, "##ல்": 189, "##க்க": 190, "##்ட": 191, "##ப்": 192, "##த்த": 193, "##ள்": 194, "##ும்": 195, "##ர்": 196, "##ிய": 197, "##ப்ப": 198, "##ரு": 199, "##ந்த": 200, "##ட்ட": 201, "##து": 202, "##ில்": 203, "##ங்க": 204, "##ைய": 205, "##ற்": 206, "##ின்": 207, "##ாக": 208, "##று": 209, "##ிர": 210, "##டு": 211, "##ிக": 212, "##ண்ட": 213, "##்ச": 214, "##க்கு": 215, "##ர்க": 216, "##ிற": 217, "##ில": 218, "மு": 219, "##ான": 220, "##த்து": 221, "செ": 222, "என்": 223, "##டி": 224, "வி": 225, "##லை": 226, "##ற்ற": 227, "##ள்ள": 228, "##ார": 229, "##தி": 230, "##ார்": 231, "##ப்பு": 232, "##ிரு": 233, "##வு": 234, "##ட்டு": 235, "##ல்ல": 236, "##ரி": 237, "##வி": 238, "##க்": 239, "கு": 240, "##ான்": 241, "##ந்து": 242, "##ால்": 243, "##ளை": 244, "##ய்": 245, "##ச்ச": 246, "கொ": 247, "##த்": 248, "போ": 249, "இரு": 250, "##னை": 251, "அவ": 252, "கா": 253, "##ர்கள்": 254, "##ங்கள்": 255, "பெ": 256, "##ண்": 257, "##ம்ப": 258, "##றி": 259, "##ஸ்": 260, "##ாத": 261, "##மி": 262, "பு": 263, "##கள்": 264, "##கு": 265, "##ாவ": 266, "##மை": 267, "##ளு": 268, "வே": 269, "ஒரு": 270, "##க்கும்": 271, "##ின": 272, "##ழு": 273, "பா": 274, "அத": 275, "தொ": 276, "இந்த": 277, "வெ": 278, "##ண்டு": 279, "##ாம்": 280, "வா": 281, "##ற்க": 282, "##த்தில்": 283, "##டை": 284, "##ன்ன": 285, "செய": 286, "##ன்ற": 287, "##ழ்": 288, "##மா": 289, "##ிக்க": 290, "##டிய": 291, "நா": 292, "மா": 293, "##ச்": 294, "##ரை": 295, "##ரா": 296, "##வா": 297, "##ரிய": 298, "##தை": 299, "##ையில்": 300, "##ட்": 301, "##ளி": 302, "கூ": 303, "பொ": 304, "##வே": 305, "சு": 306, "##ால": 307, "##்த்த": 308, "தமி": 309, "மே": 310, "என": 311, "##றை": 312, "தே": 313, "சொ": 314, "பிர": 315, "##ங்கள": 316, "##வை": 317, "##ாம": 318, "சி": 319, "##ப்பட்ட": 320, "##ற்ப": 321, "##ையும்": 322, "##மாக": 323, "நி": 324, "##மு": 325, "##ண்ண": 326, "பே": 327, "##த்தை": 328, "##கிற": 329, "##திய": 330, "##ளுக்கு": 331, "தெ": 332, "என்று": 333, "##ட்ச": 334, "கோ": 335, "நீ": 336, "செய்த": 337, "##ிகள்": 338, "##வர்": 339, "##னி": 340, "##மான": 341, "##பு": 342, "என்ற": 343, "##வும்": 344, "##சு": 345, "##ன்று": 346, "##டுத்த": 347, "##னு": 348, "##கள": 349, "##டன்": 350, "மற்ற": 351, "##லி": 352, "##்கள்": 353, "##ர்கள": 354, "உள்ள": 355, "##ரும்": 356, "பகு": 357, "##சி": 358, "##ற்று": 359, "##ப்பட": 360, "##ாள": 361, "அர": 362, "செய்": 363, "பி": 364, "இத": 365, "##வத": 366, "##ணி": 367, "##வில்": 368, "##ின்ற": 369, "##ழி": 370, "##ாய": 371, "கே": 372, "##க்கிற": 373, "என்ப": 374, "##ேன்": 375, "நட": 376, "து": 377, "கி": 378, "##்கு": 379, "##சிய": 380, "##னர்": 381, "திரு": 382, "##ஞ்ச": 383, "மற்றும்": 384, "##டைய": 385, "##ண்டும்": 386, "##ிக்": 387, "தொட": 388, "வை": 389, "##பா": 390, "முத": 391, "##கம்": 392, "##டம்": 393, "மூ": 394, "##ங்கு": 395, "##லா": 396, "கரு": 397, "சே": 398, "##ியா": 399, "பய": 400, "செய்ய": 401, "வீ": 402, "பல": 403, "ஆக": 404, "##மைய": 405, "வரு": 406, "##வர": 407, "##ட்டி": 408, "மீ": 409, "##களை": 410, "##னால்": 411, "##வ்": 412, "##ப்பா": 413, "##ளிய": 414, "இது": 415, "எழு": 416, "இருந்த": 417, "பகுப்பு": 418, "அறி": 419, "தி": 420, "தமிழ்": 421, "##ப்பி": 422, "நில": 423, "##மே": 424, "அந்த": 425, "##ும்ப": 426, "வர": 427, "பதி": 428, "##ப்போ": 429, "##க்கிய": 430, "நே": 431, "தலை": 432, "தமிழ": 433, "##கை": 434, "##ணை": 435, "##லாம்": 436, "பார": 437, "##த்தின்": 438, "##விய": 439, "வழ": 440, "##ிருந்த": 441, "##டுத்து": 442, "கொண்ட": 443, "##ர்க்க": 444, "##ம்பர்": 445, "குறி": 446, "##ையை": 447, "கட": 448, "என்ன": 449, "##ூர்": 450, "அமை": 451, "##ற்கு": 452, "##ரம்": 453, "##லு": 454, "##ன்ப": 455, "##நா": 456, "##கிறது": 457, "##ிலும்": 458, "தீ": 459, "##ழை": 460, "##க்கள்": 461, "##வது": 462, "##லம்": 463, "##ங்களை": 464, "##ார்கள்": 465, "வெளிய": 466, "இய": 467, "##ிகள": 468, "இர": 469, "##ற்றி": 470, "##யர்": 471, "##ணம்": 472, "சம": 473, "##ங்கில": 474, "சா": 475, "##த்திய": 476, "சொல்ல": 477, "##க்கி": 478, "அதிக": 479, "வேண்டும்": 480, "##ாது": 481, "##னா": 482, "பத": 483, "நான்": 484, "அல்ல": 485, "இல்": 486, "பின்": 487, "இல": 488, "##ரோ": 489, "##பி": 490, "சிற": 491, "திர": 492, "##க்கம்": 493, "##ஸ்ட": 494, "வந்த": 495, "##போ": 496, "##ிற்கு": 497, "##டிக்க": 498, "பிற": 499 } } }