Jungwonchang's picture
Upload tokenizer
b364491
raw
history blame
11.5 kB
{
"kor": {
"[PAD]": 722,
"[UNK]": 721,
"k": 1,
"|": 0,
"’": 2,
"가": 3,
"각": 4,
"간": 5,
"갈": 6,
"감": 7,
"갑": 8,
"값": 9,
"갔": 10,
"강": 11,
"같": 12,
"갚": 13,
"갛": 14,
"개": 15,
"객": 16,
"거": 17,
"걱": 18,
"건": 19,
"걷": 20,
"걸": 21,
"검": 22,
"겁": 23,
"것": 24,
"게": 25,
"겠": 26,
"겨": 27,
"격": 28,
"견": 29,
"결": 30,
"겼": 31,
"경": 32,
"곁": 33,
"계": 34,
"고": 35,
"곡": 36,
"곤": 37,
"골": 38,
"곰": 39,
"곱": 40,
"곳": 41,
"공": 42,
"과": 43,
"관": 44,
"광": 45,
"괜": 46,
"괴": 47,
"굉": 48,
"교": 49,
"구": 50,
"국": 51,
"군": 52,
"굳": 53,
"굴": 54,
"권": 55,
"귀": 56,
"그": 57,
"근": 58,
"글": 59,
"금": 60,
"급": 61,
"긋": 62,
"기": 63,
"긴": 64,
"길": 65,
"김": 66,
"깊": 67,
"까": 68,
"깐": 69,
"깔": 70,
"깨": 71,
"꺼": 72,
"껄": 73,
"껑": 74,
"께": 75,
"꼈": 76,
"꼬": 77,
"꼭": 78,
"꼰": 79,
"꽉": 80,
"꾀": 81,
"꾸": 82,
"꾹": 83,
"꿈": 84,
"끄": 85,
"끈": 86,
"끊": 87,
"끌": 88,
"끓": 89,
"끔": 90,
"끝": 91,
"끼": 92,
"나": 93,
"낙": 94,
"난": 95,
"날": 96,
"남": 97,
"납": 98,
"났": 99,
"낮": 100,
"낯": 101,
"내": 102,
"냄": 103,
"냐": 104,
"냥": 105,
"너": 106,
"넉": 107,
"널": 108,
"넘": 109,
"네": 110,
"녀": 111,
"녁": 112,
"년": 113,
"녕": 114,
"노": 115,
"논": 116,
"놀": 117,
"놈": 118,
"농": 119,
"놓": 120,
"놔": 121,
"누": 122,
"눈": 123,
"눕": 124,
"느": 125,
"는": 126,
"늘": 127,
"늙": 128,
"능": 129,
"늦": 130,
"니": 131,
"닌": 132,
"님": 133,
"다": 134,
"닦": 135,
"단": 136,
"닫": 137,
"달": 138,
"닭": 139,
"닮": 140,
"담": 141,
"답": 142,
"닷": 143,
"당": 144,
"닿": 145,
"대": 146,
"댓": 147,
"더": 148,
"덕": 149,
"던": 150,
"덤": 151,
"덥": 152,
"덧": 153,
"덮": 154,
"데": 155,
"덴": 156,
"도": 157,
"독": 158,
"돈": 159,
"돌": 160,
"동": 161,
"됐": 162,
"되": 163,
"된": 164,
"될": 165,
"됨": 166,
"두": 167,
"둑": 168,
"둘": 169,
"둣": 170,
"둥": 171,
"뒤": 172,
"드": 173,
"득": 174,
"든": 175,
"듣": 176,
"들": 177,
"듬": 178,
"듯": 179,
"등": 180,
"디": 181,
"딘": 182,
"딪": 183,
"따": 184,
"딱": 185,
"딴": 186,
"땀": 187,
"땅": 188,
"때": 189,
"떠": 190,
"떡": 191,
"떤": 192,
"떨": 193,
"떻": 194,
"떼": 195,
"또": 196,
"똑": 197,
"똥": 198,
"뚜": 199,
"뚝": 200,
"뛰": 201,
"뜨": 202,
"뜩": 203,
"뜻": 204,
"띠": 205,
"띤": 206,
"라": 207,
"락": 208,
"란": 209,
"람": 210,
"랍": 211,
"랐": 212,
"랑": 213,
"랗": 214,
"래": 215,
"랫": 216,
"랬": 217,
"랴": 218,
"략": 219,
"러": 220,
"럭": 221,
"런": 222,
"럴": 223,
"럼": 224,
"렀": 225,
"렁": 226,
"렇": 227,
"레": 228,
"렌": 229,
"려": 230,
"력": 231,
"련": 232,
"렸": 233,
"령": 234,
"례": 235,
"로": 236,
"록": 237,
"론": 238,
"롭": 239,
"료": 240,
"룡": 241,
"루": 242,
"룩": 243,
"룽": 244,
"률": 245,
"르": 246,
"른": 247,
"를": 248,
"름": 249,
"릅": 250,
"릇": 251,
"릉": 252,
"리": 253,
"린": 254,
"릴": 255,
"림": 256,
"립": 257,
"릿": 258,
"마": 259,
"막": 260,
"만": 261,
"많": 262,
"말": 263,
"맘": 264,
"망": 265,
"맞": 266,
"맡": 267,
"매": 268,
"맹": 269,
"머": 270,
"먹": 271,
"먼": 272,
"멀": 273,
"멈": 274,
"멍": 275,
"메": 276,
"며": 277,
"면": 278,
"명": 279,
"몇": 280,
"모": 281,
"목": 282,
"몰": 283,
"몸": 284,
"몹": 285,
"못": 286,
"무": 287,
"묵": 288,
"문": 289,
"물": 290,
"뭇": 291,
"뭉": 292,
"뭐": 293,
"뭘": 294,
"므": 295,
"미": 296,
"민": 297,
"믿": 298,
"밀": 299,
"밑": 300,
"바": 301,
"박": 302,
"밖": 303,
"반": 304,
"받": 305,
"발": 306,
"밝": 307,
"밤": 308,
"밥": 309,
"방": 310,
"밭": 311,
"배": 312,
"백": 313,
"뱀": 314,
"버": 315,
"벅": 316,
"번": 317,
"벌": 318,
"법": 319,
"베": 320,
"벼": 321,
"벽": 322,
"변": 323,
"별": 324,
"볍": 325,
"병": 326,
"볕": 327,
"보": 328,
"복": 329,
"본": 330,
"볼": 331,
"봐": 332,
"봤": 333,
"부": 334,
"분": 335,
"불": 336,
"붉": 337,
"붐": 338,
"붙": 339,
"비": 340,
"빈": 341,
"빌": 342,
"빚": 343,
"빛": 344,
"빠": 345,
"빨": 346,
"빼": 347,
"뺄": 348,
"뺨": 349,
"뻘": 350,
"뼈": 351,
"뼉": 352,
"뽑": 353,
"뾰": 354,
"뿌": 355,
"뿐": 356,
"사": 357,
"산": 358,
"살": 359,
"삼": 360,
"삿": 361,
"상": 362,
"새": 363,
"색": 364,
"샘": 365,
"생": 366,
"서": 367,
"석": 368,
"섞": 369,
"선": 370,
"설": 371,
"섭": 372,
"섯": 373,
"섰": 374,
"성": 375,
"세": 376,
"센": 377,
"셔": 378,
"셨": 379,
"소": 380,
"속": 381,
"손": 382,
"솟": 383,
"송": 384,
"쇼": 385,
"수": 386,
"숙": 387,
"순": 388,
"술": 389,
"숨": 390,
"쉬": 391,
"쉴": 392,
"스": 393,
"슨": 394,
"슬": 395,
"슴": 396,
"습": 397,
"슷": 398,
"승": 399,
"시": 400,
"식": 401,
"신": 402,
"실": 403,
"싫": 404,
"심": 405,
"십": 406,
"싶": 407,
"싸": 408,
"싹": 409,
"쌀": 410,
"쌍": 411,
"쌓": 412,
"써": 413,
"썼": 414,
"쎄": 415,
"쏟": 416,
"쏴": 417,
"쑥": 418,
"쓰": 419,
"쓴": 420,
"쓸": 421,
"씀": 422,
"씨": 423,
"씩": 424,
"씬": 425,
"아": 426,
"악": 427,
"안": 428,
"앉": 429,
"않": 430,
"알": 431,
"암": 432,
"았": 433,
"앙": 434,
"앞": 435,
"애": 436,
"앤": 437,
"야": 438,
"약": 439,
"양": 440,
"얕": 441,
"어": 442,
"억": 443,
"언": 444,
"얹": 445,
"얻": 446,
"얼": 447,
"엄": 448,
"업": 449,
"없": 450,
"엇": 451,
"었": 452,
"엉": 453,
"엌": 454,
"엎": 455,
"에": 456,
"엔": 457,
"여": 458,
"역": 459,
"연": 460,
"열": 461,
"염": 462,
"였": 463,
"영": 464,
"옆": 465,
"예": 466,
"오": 467,
"옥": 468,
"온": 469,
"올": 470,
"옮": 471,
"옵": 472,
"옷": 473,
"와": 474,
"왔": 475,
"왜": 476,
"외": 477,
"요": 478,
"욕": 479,
"용": 480,
"우": 481,
"욱": 482,
"운": 483,
"울": 484,
"움": 485,
"웃": 486,
"워": 487,
"원": 488,
"월": 489,
"웠": 490,
"웬": 491,
"위": 492,
"유": 493,
"육": 494,
"윤": 495,
"으": 496,
"은": 497,
"을": 498,
"음": 499,
"읍": 500,
"의": 501,
"이": 502,
"인": 503,
"일": 504,
"읽": 505,
"잃": 506,
"임": 507,
"입": 508,
"잇": 509,
"있": 510,
"잊": 511,
"자": 512,
"작": 513,
"잔": 514,
"잘": 515,
"잠": 516,
"잡": 517,
"장": 518,
"재": 519,
"쟁": 520,
"저": 521,
"적": 522,
"전": 523,
"절": 524,
"젊": 525,
"점": 526,
"정": 527,
"제": 528,
"젯": 529,
"져": 530,
"졌": 531,
"조": 532,
"좀": 533,
"좁": 534,
"종": 535,
"좋": 536,
"죄": 537,
"죠": 538,
"주": 539,
"죽": 540,
"준": 541,
"줄": 542,
"줌": 543,
"중": 544,
"쥐": 545,
"즈": 546,
"즉": 547,
"즐": 548,
"증": 549,
"지": 550,
"직": 551,
"진": 552,
"질": 553,
"짐": 554,
"집": 555,
"짓": 556,
"징": 557,
"짖": 558,
"짜": 559,
"짝": 560,
"째": 561,
"쩍": 562,
"쪼": 563,
"쪽": 564,
"쫓": 565,
"쭉": 566,
"찌": 567,
"찔": 568,
"찢": 569,
"차": 570,
"착": 571,
"찬": 572,
"찮": 573,
"찰": 574,
"참": 575,
"창": 576,
"찾": 577,
"채": 578,
"책": 579,
"챘": 580,
"챙": 581,
"처": 582,
"척": 583,
"천": 584,
"철": 585,
"첨": 586,
"첫": 587,
"청": 588,
"체": 589,
"쳐": 590,
"쳤": 591,
"초": 592,
"총": 593,
"최": 594,
"추": 595,
"축": 596,
"춘": 597,
"출": 598,
"충": 599,
"취": 600,
"츠": 601,
"층": 602,
"치": 603,
"친": 604,
"칠": 605,
"침": 606,
"칭": 607,
"칵": 608,
"칼": 609,
"커": 610,
"컨": 611,
"컬": 612,
"켓": 613,
"켜": 614,
"켰": 615,
"코": 616,
"콘": 617,
"쾌": 618,
"쿵": 619,
"퀴": 620,
"크": 621,
"큰": 622,
"큼": 623,
"키": 624,
"킬": 625,
"타": 626,
"탄": 627,
"탓": 628,
"탕": 629,
"태": 630,
"택": 631,
"터": 632,
"턱": 633,
"털": 634,
"테": 635,
"텐": 636,
"토": 637,
"통": 638,
"퇴": 639,
"투": 640,
"툭": 641,
"튀": 642,
"트": 643,
"특": 644,
"튿": 645,
"틀": 646,
"틈": 647,
"티": 648,
"파": 649,
"판": 650,
"팔": 651,
"팠": 652,
"팡": 653,
"패": 654,
"퍼": 655,
"편": 656,
"폈": 657,
"평": 658,
"포": 659,
"폭": 660,
"표": 661,
"푹": 662,
"풀": 663,
"품": 664,
"프": 665,
"픈": 666,
"픔": 667,
"피": 668,
"필": 669,
"핏": 670,
"하": 671,
"학": 672,
"한": 673,
"할": 674,
"함": 675,
"합": 676,
"해": 677,
"핵": 678,
"햇": 679,
"했": 680,
"행": 681,
"향": 682,
"허": 683,
"헌": 684,
"헤": 685,
"혀": 686,
"혁": 687,
"현": 688,
"형": 689,
"혜": 690,
"호": 691,
"혹": 692,
"혼": 693,
"홍": 694,
"화": 695,
"확": 696,
"환": 697,
"활": 698,
"황": 699,
"회": 700,
"횡": 701,
"효": 702,
"후": 703,
"훈": 704,
"훌": 705,
"훔": 706,
"훤": 707,
"훨": 708,
"휘": 709,
"휙": 710,
"흉": 711,
"흔": 712,
"흘": 713,
"흙": 714,
"흥": 715,
"흩": 716,
"희": 717,
"히": 718,
"힌": 719,
"힘": 720
}
}