TeeA commited on
Commit
e49220a
1 Parent(s): a54a8b0

Training done

Browse files
added_tokens.json CHANGED
@@ -1,44 +1,131 @@
1
  {
2
- "</s_benh_vien>": 59529,
3
- "</s_chieu>": 59559,
4
- "</s_dia_chi>": 59547,
5
- "</s_doi_tuong>": 59543,
6
- "</s_drug>": 59549,
7
- "</s_gioi_tinh>": 59539,
8
- "</s_khoa_dieu_tri>": 59527,
9
- "</s_lieu_thuoc>": 59551,
10
- "</s_ma_doi_tuong>": 59545,
11
- "</s_ma_y_te>": 59541,
12
- "</s_sang>": 59555,
13
- "</s_so_luong>": 59553,
14
- "</s_so_vao_vien>": 59531,
15
- "</s_so_y_te>": 59525,
16
- "</s_ten_benh_nhan>": 59535,
17
- "</s_toa_thuoc>": 59533,
18
- "</s_trua>": 59557,
19
- "</s_tuoi>": 59537,
20
- "<ocr-toa-thuoc>": 59560,
21
- "<s_benh_vien>": 59528,
22
- "<s_chieu>": 59558,
23
- "<s_dia_chi>": 59546,
24
- "<s_doi_tuong>": 59542,
25
- "<s_drug>": 59548,
26
- "<s_gioi_tinh>": 59538,
27
  "<s_iitcdip>": 57523,
28
- "<s_khoa_dieu_tri>": 59526,
29
- "<s_lieu_thuoc>": 59550,
30
- "<s_ma_doi_tuong>": 59544,
31
- "<s_ma_y_te>": 59540,
32
- "<s_sang>": 59554,
33
- "<s_so_luong>": 59552,
34
- "<s_so_vao_vien>": 59530,
35
- "<s_so_y_te>": 59524,
36
  "<s_synthdog>": 57524,
37
- "<s_ten_benh_nhan>": 59534,
38
- "<s_toa_thuoc>": 59532,
39
- "<s_trua>": 59556,
40
- "<s_tuoi>": 59536,
41
  "<sep/>": 57522,
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
42
  "a!": 58437,
43
  "a)": 58675,
44
  "a,": 57925,
@@ -1176,6 +1263,135 @@
1176
  "yễn": 58653,
1177
  "yện": 57958,
1178
  "yệt": 57774,
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1179
  "à!": 58201,
1180
  "à)": 58710,
1181
  "à,": 57961,
@@ -1442,6 +1658,12 @@
1442
  "ýp": 58975,
1443
  "ýt": 58031,
1444
  "ýu": 59022,
 
 
 
 
 
 
1445
  "ă,Ă": 57899,
1446
  "ăc": 58625,
1447
  "ăccoóc": 57900,
@@ -1477,6 +1699,12 @@
1477
  "ăngđa": 59069,
1478
  "ăngđôlin": 59070,
1479
  "ăảm": 59338,
 
 
 
 
 
 
1480
  "ĩ<SUP>o": 58694,
1481
  "ĩa": 57683,
1482
  "ĩa)": 59350,
@@ -1486,6 +1714,13 @@
1486
  "ĩnh": 57750,
1487
  "ĩu": 58242,
1488
  "ĩữ": 59235,
 
 
 
 
 
 
 
1489
  "ũ,": 58002,
1490
  "ũa": 57955,
1491
  "ũa;": 58505,
@@ -1494,6 +1729,13 @@
1494
  "ũn": 58447,
1495
  "ũy": 58289,
1496
  "ũy,": 59068,
 
 
 
 
 
 
 
1497
  "ơ": 57620,
1498
  "ơcanh": 59055,
1499
  "ơcua": 59507,
@@ -1521,6 +1763,12 @@
1521
  "ơu": 58325,
1522
  "ơì": 57866,
1523
  "ơí": 58520,
 
 
 
 
 
 
1524
  "ư": 57674,
1525
  "ư,": 59160,
1526
  "ưa": 57840,
@@ -1590,6 +1838,19 @@
1590
  "ượp": 58491,
1591
  "ượt": 57842,
1592
  "ượu": 58090,
 
 
 
 
 
 
 
 
 
 
 
 
 
1593
  "ạ,": 58458,
1594
  "ạ;": 59282,
1595
  "ạc": 57584,
@@ -1615,6 +1876,16 @@
1615
  "ạy": 57795,
1616
  "ạị": 59220,
1617
  "ạồm": 58415,
 
 
 
 
 
 
 
 
 
 
1618
  "ả": 57575,
1619
  "ả)": 58936,
1620
  "ả,": 58650,
@@ -1635,6 +1906,13 @@
1635
  "ảy": 57928,
1636
  "ảy,": 59458,
1637
  "ảẳm": 59344,
 
 
 
 
 
 
 
1638
  "ấc": 57743,
1639
  "ấm": 57823,
1640
  "ấm,": 59123,
@@ -1647,6 +1925,10 @@
1647
  "ất;": 58696,
1648
  "ấu": 57844,
1649
  "ấy": 57662,
 
 
 
 
1650
  "ầm": 57974,
1651
  "ầm,": 59162,
1652
  "ần": 57631,
@@ -1656,16 +1938,31 @@
1656
  "ầu,": 58314,
1657
  "ầy": 57865,
1658
  "ầy;": 58604,
 
 
 
 
1659
  "ẩm": 57832,
1660
  "ẩn": 57803,
1661
  "ẩng": 58417,
1662
  "ẩu": 57530,
1663
  "ẩy": 58063,
 
 
 
 
1664
  "ẫm": 57911,
1665
  "ẫn": 57862,
1666
  "ẫng": 58323,
1667
  "ẫu": 57985,
1668
  "ẫy": 58199,
 
 
 
 
 
 
 
1669
  "ậc": 58037,
1670
  "ậm": 58007,
1671
  "ậm,": 58269,
@@ -1678,6 +1975,11 @@
1678
  "ậu": 57582,
1679
  "ậu,": 57977,
1680
  "ậy": 57912,
 
 
 
 
 
1681
  "ắc": 57641,
1682
  "ắm": 57864,
1683
  "ắm)": 58676,
@@ -1691,21 +1993,33 @@
1691
  "ắt": 57663,
1692
  "ắt,": 58512,
1693
  "ắác": 59337,
 
 
1694
  "ằm,": 59105,
1695
  "ằn": 58020,
1696
  "ằng": 57578,
1697
  "ằng,": 59430,
1698
  "ằng;": 58645,
 
 
1699
  "ẳm": 58421,
1700
  "ẳn": 58035,
1701
  "ẳnc": 59188,
1702
  "ẳng": 57860,
1703
  "ẳón": 58394,
 
 
 
1704
  "ẵm": 57908,
1705
  "ẵn": 58078,
1706
  "ẵng": 58176,
1707
  "ẵp": 59342,
1708
  "ẵắn": 59340,
 
 
 
 
 
1709
  "ặc": 57847,
1710
  "ặc,": 58584,
1711
  "ặm": 58114,
@@ -1716,6 +2030,12 @@
1716
  "ặt": 57868,
1717
  "ặt,": 58833,
1718
  "ặạt": 59339,
 
 
 
 
 
 
1719
  "ẹ": 57804,
1720
  "ẹc": 58642,
1721
  "ẹm": 58721,
@@ -1723,6 +2043,11 @@
1723
  "ẹo": 58133,
1724
  "ẹp": 57857,
1725
  "ẹt": 57835,
 
 
 
 
 
1726
  "ẻ": 57719,
1727
  "ẻ,": 59408,
1728
  "ẻm": 58210,
@@ -1730,12 +2055,26 @@
1730
  "ẻng": 58334,
1731
  "��o": 57913,
1732
  "ẻo,": 58357,
 
 
 
 
 
 
1733
  "ẽ": 58066,
1734
  "ẽ,": 58960,
1735
  "ẽm": 58119,
1736
  "ẽn": 57834,
1737
  "ẽo": 57979,
1738
  "ẽê": 59346,
 
 
 
 
 
 
 
 
1739
  "ế": 57708,
1740
  "ế,": 58662,
1741
  "ếc": 58229,
@@ -1753,6 +2092,11 @@
1753
  "ết,": 58524,
1754
  "ếu": 57643,
1755
  "ếu,": 58248,
 
 
 
 
 
1756
  "ề": 57848,
1757
  "ề,": 58459,
1758
  "ềm": 58186,
@@ -1763,6 +2107,11 @@
1763
  "ều": 57687,
1764
  "ều,": 57937,
1765
  "ều;": 58668,
 
 
 
 
 
1766
  "ể": 57799,
1767
  "ể,": 58568,
1768
  "ểm": 57610,
@@ -1771,6 +2120,10 @@
1771
  "ểng": 58889,
1772
  "ểnh": 58282,
1773
  "ểu": 57604,
 
 
 
 
1774
  "ễ": 57749,
1775
  "ễ,": 59152,
1776
  "ễm": 57684,
@@ -1779,6 +2132,14 @@
1779
  "ễnh": 58122,
1780
  "ễnh;": 58944,
1781
  "ễu": 58154,
 
 
 
 
 
 
 
 
1782
  "ệ": 57775,
1783
  "ệ;": 58951,
1784
  "ệc": 58024,
@@ -1793,6 +2154,12 @@
1793
  "ệt": 57589,
1794
  "ệt;": 58941,
1795
  "ệu": 57715,
 
 
 
 
 
 
1796
  "ỉ": 57627,
1797
  "ỉ,": 59157,
1798
  "ỉa": 57935,
@@ -1801,6 +2168,14 @@
1801
  "ỉn": 58303,
1802
  "ỉnh": 58156,
1803
  "ỉu": 58041,
 
 
 
 
 
 
 
 
1804
  "ị": 57721,
1805
  "ị,": 59482,
1806
  "ịa": 57556,
@@ -1814,6 +2189,16 @@
1814
  "ịt;": 58321,
1815
  "ịu": 57936,
1816
  "ịệp": 59360,
 
 
 
 
 
 
 
 
 
 
1817
  "ọ,": 59163,
1818
  "ọa": 57767,
1819
  "ọc": 57553,
@@ -1830,6 +2215,15 @@
1830
  "ọp": 58222,
1831
  "ọt": 57854,
1832
  "ọt,": 58945,
 
 
 
 
 
 
 
 
 
1833
  "ỏ": 57614,
1834
  "ỏ,": 59032,
1835
  "ỏa": 58214,
@@ -1841,6 +2235,13 @@
1841
  "ỏn": 58260,
1842
  "ỏng": 57565,
1843
  "ỏng,": 58368,
 
 
 
 
 
 
 
1844
  "ố": 57686,
1845
  "ốc": 57690,
1846
  "ốc,": 58315,
@@ -1855,6 +2256,12 @@
1855
  "ốp": 58134,
1856
  "ốt": 57817,
1857
  "ốt,": 58347,
 
 
 
 
 
 
1858
  "ồ": 57813,
1859
  "ồ,": 58756,
1860
  "ồi": 57736,
@@ -1865,6 +2272,13 @@
1865
  "ồng": 57705,
1866
  "ồng,": 58445,
1867
  "ồí": 58730,
 
 
 
 
 
 
 
1868
  "ổ": 57771,
1869
  "ổ,": 58033,
1870
  "ổ;": 58542,
@@ -1875,12 +2289,23 @@
1875
  "ổng": 57962,
1876
  "ổng,": 58360,
1877
  "ổí": 58578,
 
 
 
 
 
1878
  "ỗ": 57824,
1879
  "ỗ,": 57946,
1880
  "ỗi": 57597,
1881
  "ỗm": 58457,
1882
  "ỗn": 58142,
1883
  "ỗng": 57963,
 
 
 
 
 
 
1884
  "ộ": 57541,
1885
  "ộc": 57606,
1886
  "ộc)": 58606,
@@ -1897,6 +2322,16 @@
1897
  "ột": 57618,
1898
  "ột)": 57595,
1899
  "ột,": 58312,
 
 
 
 
 
 
 
 
 
 
1900
  "ớ": 57839,
1901
  "ớc": 57757,
1902
  "ớc,": 57952,
@@ -1912,6 +2347,16 @@
1912
  "ớu": 58327,
1913
  "ớì": 57969,
1914
  "ớỉ": 58638,
 
 
 
 
 
 
 
 
 
 
1915
  "ờ": 57672,
1916
  "ờ,": 57812,
1917
  "ời": 57667,
@@ -1929,6 +2374,12 @@
1929
  "ờì": 58713,
1930
  "ờí": 58620,
1931
  "ờõ": 59141,
 
 
 
 
 
 
1932
  "ở": 57918,
1933
  "ở,": 57950,
1934
  "ởi": 57992,
@@ -1938,6 +2389,14 @@
1938
  "ởng": 57747,
1939
  "ởu": 58059,
1940
  "ởì": 58488,
 
 
 
 
 
 
 
 
1941
  "ỡ": 57819,
1942
  "ỡi": 58079,
1943
  "ỡm": 58895,
@@ -1947,6 +2406,15 @@
1947
  "ỡì": 58392,
1948
  "ỡí": 58110,
1949
  "ỡỉ": 58419,
 
 
 
 
 
 
 
 
 
1950
  "ợ": 57724,
1951
  "ợ,": 58313,
1952
  "ợc": 57981,
@@ -1958,6 +2426,16 @@
1958
  "ợp": 58097,
1959
  "ợt": 57841,
1960
  "ợu": 58089,
 
 
 
 
 
 
 
 
 
 
1961
  "ụ;": 58448,
1962
  "ụa": 57855,
1963
  "ụa,": 59146,
@@ -1972,6 +2450,15 @@
1972
  "ụt": 57836,
1973
  "ụt,": 58873,
1974
  "ụy": 58157,
 
 
 
 
 
 
 
 
 
1975
  "ủ": 57755,
1976
  "ủ,": 58624,
1977
  "ủ;": 58695,
@@ -1984,6 +2471,12 @@
1984
  "ủo": 58997,
1985
  "ủy": 58224,
1986
  "ủó": 58998,
 
 
 
 
 
 
1987
  "ứ": 57837,
1988
  "ứ!": 58903,
1989
  "ứa": 58017,
@@ -1993,6 +2486,14 @@
1993
  "ứng,": 59124,
1994
  "ứt": 57973,
1995
  "ứu": 58268,
 
 
 
 
 
 
 
 
1996
  "ừ": 57780,
1997
  "ừ!": 58902,
1998
  "ừ,": 59115,
@@ -2003,12 +2504,18 @@
2003
  "ừng": 57592,
2004
  "ừng,": 57947,
2005
  "ừu": 58146,
 
 
 
2006
  "ử": 57675,
2007
  "ửa": 57909,
2008
  "ửa,": 58446,
2009
  "ửi": 57993,
2010
  "ửng": 58006,
2011
  "ửu": 58127,
 
 
 
2012
  "ữ": 57682,
2013
  "ữa": 57920,
2014
  "ữa,": 58562,
@@ -2016,6 +2523,12 @@
2016
  "ững": 58218,
2017
  "ữu": 57678,
2018
  "ữu,": 59154,
 
 
 
 
 
 
2019
  "ự": 57711,
2020
  "ự,": 59355,
2021
  "ự;": 58649,
@@ -2028,6 +2541,12 @@
2028
  "ựu,": 58506,
2029
  "ỳ": 57546,
2030
  "ỳnh": 57571,
 
 
 
 
 
 
2031
  "ỵ": 57548,
2032
  "ỵa": 58797,
2033
  "ỵch": 58901,
@@ -2035,6 +2554,7 @@
2035
  "ỵp": 59305,
2036
  "ỵt": 58015,
2037
  "ỵu": 58976,
 
2038
  "ỷ": 57630,
2039
  "ỷnh": 58973,
2040
  "ỷu": 58803
 
1
  {
2
+ "</s_benh_vien>": 60049,
3
+ "</s_chieu>": 60079,
4
+ "</s_dia_chi>": 60067,
5
+ "</s_doi_tuong>": 60063,
6
+ "</s_drug>": 60069,
7
+ "</s_gioi_tinh>": 60059,
8
+ "</s_khoa_dieu_tri>": 60047,
9
+ "</s_lieu_thuoc>": 60071,
10
+ "</s_ma_doi_tuong>": 60065,
11
+ "</s_ma_y_te>": 60061,
12
+ "</s_sang>": 60075,
13
+ "</s_so_luong>": 60073,
14
+ "</s_so_vao_vien>": 60051,
15
+ "</s_so_y_te>": 60045,
16
+ "</s_ten_benh_nhan>": 60055,
17
+ "</s_toa_thuoc>": 60053,
18
+ "</s_trua>": 60077,
19
+ "</s_tuoi>": 60057,
20
+ "<ocr-toa-thuoc>": 60080,
21
+ "<s_benh_vien>": 60048,
22
+ "<s_chieu>": 60078,
23
+ "<s_dia_chi>": 60066,
24
+ "<s_doi_tuong>": 60062,
25
+ "<s_drug>": 60068,
26
+ "<s_gioi_tinh>": 60058,
27
  "<s_iitcdip>": 57523,
28
+ "<s_khoa_dieu_tri>": 60046,
29
+ "<s_lieu_thuoc>": 60070,
30
+ "<s_ma_doi_tuong>": 60064,
31
+ "<s_ma_y_te>": 60060,
32
+ "<s_sang>": 60074,
33
+ "<s_so_luong>": 60072,
34
+ "<s_so_vao_vien>": 60050,
35
+ "<s_so_y_te>": 60044,
36
  "<s_synthdog>": 57524,
37
+ "<s_ten_benh_nhan>": 60054,
38
+ "<s_toa_thuoc>": 60052,
39
+ "<s_trua>": 60076,
40
+ "<s_tuoi>": 60056,
41
  "<sep/>": 57522,
42
+ "A!": 59911,
43
+ "A)": 59952,
44
+ "A,": 59733,
45
+ "A;": 59925,
46
+ "AY": 59679,
47
+ "E,": 59847,
48
+ "I!": 59898,
49
+ "I)": 60025,
50
+ "I,": 59737,
51
+ "I3": 59936,
52
+ "I;": 59905,
53
+ "IÀ": 59766,
54
+ "IÁ": 59715,
55
+ "IÃ": 59819,
56
+ "IÉ": 59969,
57
+ "IÊ": 59858,
58
+ "IÒ": 59835,
59
+ "IÓ": 59699,
60
+ "IÔ": 59971,
61
+ "IÚ": 59974,
62
+ "IÝ": 60034,
63
+ "IŨ": 59707,
64
+ "IƠ": 59769,
65
+ "IẠ": 59806,
66
+ "IẢ": 59572,
67
+ "IẺ": 59876,
68
+ "IỀ": 59964,
69
+ "IỌ": 59970,
70
+ "IỎ": 59797,
71
+ "IỒ": 59972,
72
+ "IỖ": 59767,
73
+ "IỘ": 59973,
74
+ "IỜ": 59623,
75
+ "IỞ": 59966,
76
+ "IỪ": 59975,
77
+ "IỮ": 59710,
78
+ "O,": 59738,
79
+ "O;": 59932,
80
+ "OE": 59908,
81
+ "O]": 60043,
82
+ "OÀ": 59639,
83
+ "OÁ": 59610,
84
+ "OÃ": 59823,
85
+ "OÈ": 59812,
86
+ "OÉ": 59799,
87
+ "OẠ": 59650,
88
+ "OẢ": 59772,
89
+ "OẸ": 59801,
90
+ "OẺ": 59989,
91
+ "OẼ": 60039,
92
+ "U,": 59742,
93
+ "U;": 59871,
94
+ "UY": 59593,
95
+ "UÀ": 59778,
96
+ "UÁ": 59853,
97
+ "UÈ": 59899,
98
+ "UÉ": 59961,
99
+ "UÊ": 59698,
100
+ "UÌ": 59907,
101
+ "UÍ": 59897,
102
+ "UÝ": 59647,
103
+ "UĨ": 59930,
104
+ "UƠ": 59852,
105
+ "UẠ": 59909,
106
+ "UẢ": 59582,
107
+ "UẺ": 59629,
108
+ "UẼ": 60026,
109
+ "UẾ": 59826,
110
+ "UỀ": 59951,
111
+ "UỂ": 60038,
112
+ "UỆ": 59665,
113
+ "UỈ": 59585,
114
+ "UỊ": 59948,
115
+ "UỌ": 60027,
116
+ "UỚ": 60001,
117
+ "UỜ": 59991,
118
+ "UỞ": 59855,
119
+ "UỲ": 59537,
120
+ "UỴ": 59538,
121
+ "UỶ": 59586,
122
+ "UỸ": 59885,
123
+ "Y!": 60020,
124
+ "Y)": 59996,
125
+ "Y,": 59732,
126
+ "Y;": 59937,
127
+ "YD": 59621,
128
+ "YÔ": 60003,
129
  "a!": 58437,
130
  "a)": 58675,
131
  "a,": 57925,
 
1263
  "yễn": 58653,
1264
  "yện": 57958,
1265
  "yệt": 57774,
1266
+ "À!": 59856,
1267
+ "À)": 59957,
1268
+ "À,": 59751,
1269
+ "À;": 59943,
1270
+ "ÀM": 59685,
1271
+ "ÀN": 59532,
1272
+ "ÀO": 59551,
1273
+ "ÀU": 59672,
1274
+ "ÀY": 59548,
1275
+ "Á,": 59671,
1276
+ "ÁC": 59529,
1277
+ "ÁI": 59562,
1278
+ "ÁN": 59533,
1279
+ "ÁO": 59564,
1280
+ "ÁP": 59625,
1281
+ "ÁT": 59630,
1282
+ "ÁU": 59763,
1283
+ "ÁY": 59719,
1284
+ "Â": 59791,
1285
+ "Â,": 59790,
1286
+ "ÂM": 59567,
1287
+ "ÂN": 59560,
1288
+ "ÂP": 59887,
1289
+ "ÂU": 59554,
1290
+ "ÂY": 59622,
1291
+ "Ã": 59690,
1292
+ "Ã)": 59955,
1293
+ "ÃI": 59642,
1294
+ "ÃM": 59824,
1295
+ "ÃN": 59583,
1296
+ "ÃO": 59640,
1297
+ "ÃY": 59810,
1298
+ "È": 59600,
1299
+ "È)": 59848,
1300
+ "ÈM": 59859,
1301
+ "ÈN": 59759,
1302
+ "ÈO": 59658,
1303
+ "ÈY": 59906,
1304
+ "ÈÕ": 59999,
1305
+ "ÉC": 59857,
1306
+ "ÉM": 59765,
1307
+ "ÉN": 59700,
1308
+ "ÉO": 59546,
1309
+ "ÉP": 59668,
1310
+ "ÉT": 59687,
1311
+ "Ê": 59618,
1312
+ "Ê,": 59910,
1313
+ "ÊH": 59886,
1314
+ "ÊM": 59649,
1315
+ "ÊN": 59558,
1316
+ "ÊP": 59931,
1317
+ "ÊT": 59869,
1318
+ "ÊU": 59632,
1319
+ "ÊY": 59920,
1320
+ "ÊÔ": 59983,
1321
+ "Ì)": 59954,
1322
+ "Ì,": 60012,
1323
+ "Ì3": 60042,
1324
+ "ÌA": 59680,
1325
+ "ÌI": 60033,
1326
+ "ÌM": 59808,
1327
+ "ÌN": 59638,
1328
+ "ÌU": 59796,
1329
+ "Í'": 59953,
1330
+ "Í)": 59965,
1331
+ "Í,": 59977,
1332
+ "ÍA": 59813,
1333
+ "ÍC": 60032,
1334
+ "ÍM": 59831,
1335
+ "ÍN": 59746,
1336
+ "ÍP": 59870,
1337
+ "ÍT": 59540,
1338
+ "ÍU": 59827,
1339
+ "Ò": 59681,
1340
+ "Ò)": 59995,
1341
+ "Ò,": 60018,
1342
+ "Ò;": 59994,
1343
+ "ÒA": 59664,
1344
+ "ÒE": 59811,
1345
+ "ÒI": 59747,
1346
+ "ÒM": 59878,
1347
+ "ÒN": 59729,
1348
+ "Ó,": 59782,
1349
+ "Ó;": 59888,
1350
+ "ÓA": 59675,
1351
+ "ÓC": 59721,
1352
+ "ÓE": 59798,
1353
+ "ÓI": 59725,
1354
+ "ÓM": 59849,
1355
+ "ÓN": 59851,
1356
+ "ÓP": 59818,
1357
+ "ÓT": 59683,
1358
+ "Ô": 59576,
1359
+ "Ô!": 60021,
1360
+ "Ô,": 60019,
1361
+ "ÔC": 59984,
1362
+ "ÔI": 59557,
1363
+ "ÔM": 59549,
1364
+ "ÔN": 59525,
1365
+ "ÔR": 59935,
1366
+ "ÔT": 59866,
1367
+ "ÔỂ": 60009,
1368
+ "Õ": 59660,
1369
+ "ÕA": 59822,
1370
+ "ÕI": 59758,
1371
+ "ÕM": 59743,
1372
+ "ÕN": 59814,
1373
+ "Ù": 59535,
1374
+ "Ù;": 59986,
1375
+ "ÙA": 59786,
1376
+ "ÙI": 59736,
1377
+ "ÙM": 59776,
1378
+ "ÙN": 59662,
1379
+ "ÙY": 59868,
1380
+ "Ú,": 60015,
1381
+ "ÚA": 59626,
1382
+ "ÚC": 59545,
1383
+ "ÚI": 59682,
1384
+ "ÚM": 59861,
1385
+ "ÚN": 59874,
1386
+ "ÚP": 59802,
1387
+ "ÚT": 59718,
1388
+ "ÚY": 59928,
1389
+ "Ý": 59588,
1390
+ "Ý,": 59938,
1391
+ "ÝN": 60016,
1392
+ "ÝP": 59992,
1393
+ "ÝT": 59787,
1394
+ "ÝU": 60000,
1395
  "à!": 58201,
1396
  "à)": 58710,
1397
  "à,": 57961,
 
1658
  "ýp": 58975,
1659
  "ýt": 58031,
1660
  "ýu": 59022,
1661
+ "Ă": 59720,
1662
+ "ĂC": 59941,
1663
+ "ĂH": 59921,
1664
+ "ĂK": 59942,
1665
+ "ĂM": 59704,
1666
+ "ĂN": 59606,
1667
  "ă,Ă": 57899,
1668
  "ăc": 58625,
1669
  "ăccoóc": 57900,
 
1699
  "ăngđa": 59069,
1700
  "ăngđôlin": 59070,
1701
  "ăảm": 59338,
1702
+ "Ĩ": 59615,
1703
+ "ĨA": 59612,
1704
+ "ĨM": 59976,
1705
+ "ĨN": 59919,
1706
+ "ĨU": 59872,
1707
+ "ĨỮ": 60023,
1708
  "ĩ<SUP>o": 58694,
1709
  "ĩa": 57683,
1710
  "ĩa)": 59350,
 
1714
  "ĩnh": 57750,
1715
  "ĩu": 58242,
1716
  "ĩữ": 59235,
1717
+ "Ũ": 59654,
1718
+ "Ũ,": 59775,
1719
+ "ŨA": 59750,
1720
+ "ŨI": 59773,
1721
+ "ŨM": 59889,
1722
+ "ŨN": 59912,
1723
+ "ŨY": 59884,
1724
  "ũ,": 58002,
1725
  "ũa": 57955,
1726
  "ũa;": 58505,
 
1729
  "ũn": 58447,
1730
  "ũy": 58289,
1731
  "ũy,": 59068,
1732
+ "Ơ": 59579,
1733
+ "ƠI": 59526,
1734
+ "ƠM": 59661,
1735
+ "ƠN": 59536,
1736
+ "ƠU": 59895,
1737
+ "ƠÌ": 59713,
1738
+ "ƠÍ": 59927,
1739
  "ơ": 57620,
1740
  "ơcanh": 59055,
1741
  "ơcua": 59507,
 
1763
  "ơu": 58325,
1764
  "ơì": 57866,
1765
  "ơí": 58520,
1766
+ "Ư": 59604,
1767
+ "Ư,": 60013,
1768
+ "ƯA": 59692,
1769
+ "ƯN": 59923,
1770
+ "ƯU": 59547,
1771
+ "ƯỜ": 60008,
1772
  "ư": 57674,
1773
  "ư,": 59160,
1774
  "ưa": 57840,
 
1838
  "ượp": 58491,
1839
  "ượt": 57842,
1840
  "ượu": 58090,
1841
+ "Ạ": 59561,
1842
+ "Ạ,": 59915,
1843
+ "Ạ;": 60028,
1844
+ "ẠC": 59556,
1845
+ "ẠI": 59573,
1846
+ "ẠM": 59581,
1847
+ "ẠN": 59717,
1848
+ "ẠO": 59568,
1849
+ "ẠP": 59768,
1850
+ "ẠT": 59659,
1851
+ "ẠU": 59845,
1852
+ "ẠY": 59663,
1853
+ "ẠỊ": 60022,
1854
  "ạ,": 58458,
1855
  "ạ;": 59282,
1856
  "ạc": 57584,
 
1876
  "ạy": 57795,
1877
  "ạị": 59220,
1878
  "ạồm": 58415,
1879
+ "Ả": 59550,
1880
+ "Ả)": 59985,
1881
+ "Ả,": 59947,
1882
+ "Ả;": 59902,
1883
+ "ẢI": 59605,
1884
+ "ẢM": 59566,
1885
+ "ẢN": 59793,
1886
+ "ẢO": 59594,
1887
+ "ẢU": 59846,
1888
+ "ẢY": 59735,
1889
  "ả": 57575,
1890
  "ả)": 58936,
1891
  "ả,": 58650,
 
1906
  "ảy": 57928,
1907
  "ảy,": 59458,
1908
  "ảẳm": 59344,
1909
+ "ẤC": 59637,
1910
+ "ẤM": 59676,
1911
+ "ẤN": 59635,
1912
+ "ẤP": 59534,
1913
+ "ẤT": 59655,
1914
+ "ẤU": 59694,
1915
+ "ẤY": 59596,
1916
  "ấc": 57743,
1917
  "ấm": 57823,
1918
  "ấm,": 59123,
 
1925
  "ất;": 58696,
1926
  "ấu": 57844,
1927
  "ấy": 57662,
1928
+ "ẦM": 59757,
1929
+ "ẦN": 59587,
1930
+ "ẦU": 59552,
1931
+ "ẦY": 59712,
1932
  "ầm": 57974,
1933
  "ầm,": 59162,
1934
  "ần": 57631,
 
1938
  "ầu,": 58314,
1939
  "ầy": 57865,
1940
  "ầy;": 58604,
1941
+ "ẨM": 59684,
1942
+ "ẨN": 59667,
1943
+ "ẨU": 59527,
1944
+ "ẨY": 59805,
1945
  "ẩm": 57832,
1946
  "ẩn": 57803,
1947
  "ẩng": 58417,
1948
  "ẩu": 57530,
1949
  "ẩy": 58063,
1950
+ "ẪM": 59726,
1951
+ "ẪN": 59709,
1952
+ "ẪU": 59764,
1953
+ "ẪY": 59854,
1954
  "ẫm": 57911,
1955
  "ẫn": 57862,
1956
  "ẫng": 58323,
1957
  "ẫu": 57985,
1958
  "ẫy": 58199,
1959
+ "ẬC": 59792,
1960
+ "ẬM": 59777,
1961
+ "ẬN": 59590,
1962
+ "ẬP": 59599,
1963
+ "ẬT": 59524,
1964
+ "ẬU": 59555,
1965
+ "ẬY": 59727,
1966
  "ậc": 58037,
1967
  "ậm": 58007,
1968
  "ậm,": 58269,
 
1975
  "ậu": 57582,
1976
  "ậu,": 57977,
1977
  "ậy": 57912,
1978
+ "ẮC": 59589,
1979
+ "ẮM": 59711,
1980
+ "ẮN": 59706,
1981
+ "ẮP": 59722,
1982
+ "ẮT": 59597,
1983
  "ắc": 57641,
1984
  "ắm": 57864,
1985
  "ắm)": 58676,
 
1993
  "ắt": 57663,
1994
  "ắt,": 58512,
1995
  "ắác": 59337,
1996
+ "ẰM": 59734,
1997
+ "ẰN": 59781,
1998
  "ằm,": 59105,
1999
  "ằn": 58020,
2000
  "ằng": 57578,
2001
  "ằng,": 59430,
2002
  "ằng;": 58645,
2003
+ "ẲM": 59904,
2004
+ "ẲN": 59789,
2005
  "ẳm": 58421,
2006
  "ẳn": 58035,
2007
  "ẳnc": 59188,
2008
  "ẳng": 57860,
2009
  "ẳón": 58394,
2010
+ "ẴM": 59723,
2011
+ "ẴN": 59816,
2012
+ "ẴP": 60030,
2013
  "ẵm": 57908,
2014
  "ẵn": 58078,
2015
  "ẵng": 58176,
2016
  "ẵp": 59342,
2017
  "ẵắn": 59340,
2018
+ "ẶC": 59696,
2019
+ "ẶM": 59830,
2020
+ "ẶN": 59701,
2021
+ "ẶP": 59678,
2022
+ "ẶT": 59714,
2023
  "ặc": 57847,
2024
  "ặc,": 58584,
2025
  "ặm": 58114,
 
2030
  "ặt": 57868,
2031
  "ặt,": 58833,
2032
  "ặạt": 59339,
2033
+ "ẸC": 59945,
2034
+ "ẸM": 59959,
2035
+ "ẸN": 59739,
2036
+ "ẸO": 59836,
2037
+ "ẸP": 59705,
2038
+ "ẸT": 59688,
2039
  "ẹ": 57804,
2040
  "ẹc": 58642,
2041
  "ẹm": 58721,
 
2043
  "ẹo": 58133,
2044
  "ẹp": 57857,
2045
  "ẹt": 57835,
2046
+ "Ẻ": 59628,
2047
+ "Ẻ,": 60037,
2048
+ "ẺM": 59860,
2049
+ "ẺN": 59880,
2050
+ "ẺO": 59728,
2051
  "ẻ": 57719,
2052
  "ẻ,": 59408,
2053
  "ẻm": 58210,
 
2055
  "ẻng": 58334,
2056
  "��o": 57913,
2057
  "ẻo,": 58357,
2058
+ "Ẽ": 59807,
2059
+ "Ẽ,": 59990,
2060
+ "ẼM": 59832,
2061
+ "ẼN": 59686,
2062
+ "ẼO": 59760,
2063
+ "ẼÊ": 60031,
2064
  "ẽ": 58066,
2065
  "ẽ,": 58960,
2066
  "ẽm": 58119,
2067
  "ẽn": 57834,
2068
  "ẽo": 57979,
2069
  "ẽê": 59346,
2070
+ "Ế": 59624,
2071
+ "Ế,": 59950,
2072
+ "ẾC": 59867,
2073
+ "ẾM": 59803,
2074
+ "ẾN": 59541,
2075
+ "ẾP": 59619,
2076
+ "ẾT": 59595,
2077
+ "ẾU": 59591,
2078
  "ế": 57708,
2079
  "ế,": 58662,
2080
  "ếc": 58229,
 
2092
  "ết,": 58524,
2093
  "ếu": 57643,
2094
  "ếu,": 58248,
2095
+ "Ề": 59697,
2096
+ "Ề,": 59916,
2097
+ "ỀM": 59850,
2098
+ "ỀN": 59620,
2099
+ "ỀU": 59616,
2100
  "ề": 57848,
2101
  "ề,": 58459,
2102
  "ềm": 58186,
 
2107
  "ều": 57687,
2108
  "ều,": 57937,
2109
  "ều;": 58668,
2110
+ "Ể": 59666,
2111
+ "Ể,": 59933,
2112
+ "ỂM": 59574,
2113
+ "ỂN": 59716,
2114
+ "ỂU": 59569,
2115
  "ể": 57799,
2116
  "ể,": 58568,
2117
  "ểm": 57610,
 
2120
  "ểng": 58889,
2121
  "ểnh": 58282,
2122
  "ểu": 57604,
2123
+ "Ễ,": 60010,
2124
+ "ỄM": 59613,
2125
+ "ỄN": 59653,
2126
+ "ỄU": 59843,
2127
  "ễ": 57749,
2128
  "ễ,": 59152,
2129
  "ễm": 57684,
 
2132
  "ễnh": 58122,
2133
  "ễnh;": 58944,
2134
  "ễu": 58154,
2135
+ "Ệ": 59656,
2136
+ "Ệ;": 59987,
2137
+ "ỆC": 59783,
2138
+ "ỆM": 59580,
2139
+ "ỆN": 59553,
2140
+ "ỆP": 59530,
2141
+ "ỆT": 59559,
2142
+ "ỆU": 59627,
2143
  "ệ": 57775,
2144
  "ệ;": 58951,
2145
  "ệc": 58024,
 
2154
  "ệt": 57589,
2155
  "ệt;": 58941,
2156
  "ệu": 57715,
2157
+ "Ỉ": 59584,
2158
+ "Ỉ,": 60011,
2159
+ "ỈA": 59740,
2160
+ "ỈM": 59839,
2161
+ "ỈN": 59893,
2162
+ "ỈU": 59795,
2163
  "ỉ": 57627,
2164
  "ỉ,": 59157,
2165
  "ỉa": 57935,
 
2168
  "ỉn": 58303,
2169
  "ỉnh": 58156,
2170
  "ỉu": 58041,
2171
+ "Ị": 59631,
2172
+ "Ị,": 60041,
2173
+ "ỊA": 59543,
2174
+ "ỊM": 59982,
2175
+ "ỊN": 59761,
2176
+ "ỊP": 59774,
2177
+ "ỊT": 59708,
2178
+ "ỊU": 59741,
2179
  "ị": 57721,
2180
  "ị,": 59482,
2181
  "ịa": 57556,
 
2189
  "ịt;": 58321,
2190
  "ịu": 57936,
2191
  "ịệp": 59360,
2192
+ "Ọ": 59648,
2193
+ "Ọ,": 60014,
2194
+ "ỌA": 59651,
2195
+ "ỌC": 59542,
2196
+ "ỌE": 59800,
2197
+ "ỌI": 59815,
2198
+ "ỌM": 59926,
2199
+ "ỌN": 59744,
2200
+ "ỌP": 59863,
2201
+ "ỌT": 59702,
2202
  "ọ,": 59163,
2203
  "ọa": 57767,
2204
  "ọc": 57553,
 
2215
  "ọp": 58222,
2216
  "ọt": 57854,
2217
  "ọt,": 58945,
2218
+ "Ỏ": 59575,
2219
+ "Ỏ,": 60002,
2220
+ "ỎA": 59862,
2221
+ "ỎC": 59949,
2222
+ "ỎE": 59988,
2223
+ "ỎI": 59531,
2224
+ "ỎL": 59963,
2225
+ "ỎM": 59873,
2226
+ "ỎN": 59879,
2227
  "ỏ": 57614,
2228
  "ỏ,": 59032,
2229
  "ỏa": 58214,
 
2235
  "ỏn": 58260,
2236
  "ỏng": 57565,
2237
  "ỏng,": 58368,
2238
+ "Ố": 59614,
2239
+ "ỐC": 59617,
2240
+ "ỐI": 59565,
2241
+ "ỐM": 59834,
2242
+ "ỐN": 59598,
2243
+ "ỐP": 59837,
2244
+ "ỐT": 59673,
2245
  "ố": 57686,
2246
  "ốc": 57690,
2247
  "ốc,": 58315,
 
2256
  "ốp": 58134,
2257
  "ốt": 57817,
2258
  "ốt,": 58347,
2259
+ "Ồ": 59670,
2260
+ "Ồ,": 59962,
2261
+ "ỒI": 59634,
2262
+ "ỒM": 59577,
2263
+ "ỒN": 59794,
2264
+ "ỒÍ": 59960,
2265
  "ồ": 57813,
2266
  "ồ,": 58756,
2267
  "ồi": 57736,
 
2272
  "ồng": 57705,
2273
  "ồng,": 58445,
2274
  "ồí": 58730,
2275
+ "Ổ,": 59788,
2276
+ "Ổ;": 59929,
2277
+ "ỔC": 59922,
2278
+ "ỔI": 59695,
2279
+ "ỔM": 59875,
2280
+ "ỔN": 59644,
2281
+ "ỔÍ": 59934,
2282
  "ổ": 57771,
2283
  "ổ,": 58033,
2284
  "ổ;": 58542,
 
2289
  "ổng": 57962,
2290
  "ổng,": 58360,
2291
  "ổí": 58578,
2292
+ "Ỗ": 59677,
2293
+ "Ỗ,": 59748,
2294
+ "ỖI": 59563,
2295
+ "ỖM": 59914,
2296
+ "ỖN": 59840,
2297
  "ỗ": 57824,
2298
  "ỗ,": 57946,
2299
  "ỗi": 57597,
2300
  "ỗm": 58457,
2301
  "ỗn": 58142,
2302
  "ỗng": 57963,
2303
+ "ỘC": 59570,
2304
+ "ỘI": 59608,
2305
+ "ỘM": 59785,
2306
+ "ỘN": 59745,
2307
+ "ỘP": 59864,
2308
+ "ỘT": 59578,
2309
  "ộ": 57541,
2310
  "ộc": 57606,
2311
  "ộc)": 58606,
 
2322
  "ột": 57618,
2323
  "ột)": 57595,
2324
  "ột,": 58312,
2325
+ "Ớ": 59691,
2326
+ "ỚC": 59646,
2327
+ "ỚI": 59528,
2328
+ "ỚM": 59829,
2329
+ "ỚN": 59784,
2330
+ "ỚP": 59755,
2331
+ "ỚT": 59730,
2332
+ "ỚU": 59896,
2333
+ "ỚÌ": 59754,
2334
+ "ỚỈ": 59944,
2335
  "ớ": 57839,
2336
  "ớc": 57757,
2337
  "ớc,": 57952,
 
2347
  "ớu": 58327,
2348
  "ớì": 57969,
2349
  "ớỉ": 58638,
2350
+ "Ờ": 59603,
2351
+ "Ờ,": 59669,
2352
+ "ỜI": 59601,
2353
+ "ỜL": 60036,
2354
+ "ỜM": 59841,
2355
+ "ỜN": 59820,
2356
+ "ỜU": 60040,
2357
+ "ỜÌ": 59958,
2358
+ "ỜÍ": 59939,
2359
+ "ỜÕ": 60007,
2360
  "ờ": 57672,
2361
  "ờ,": 57812,
2362
  "ời": 57667,
 
2374
  "ờì": 58713,
2375
  "ờí": 58620,
2376
  "ờõ": 59141,
2377
+ "Ở,": 59749,
2378
+ "ỞI": 59770,
2379
+ "ỞM": 59918,
2380
+ "ỞN": 59917,
2381
+ "ỞU": 59804,
2382
+ "ỞÌ": 59924,
2383
  "ở": 57918,
2384
  "ở,": 57950,
2385
  "ởi": 57992,
 
2389
  "ởng": 57747,
2390
  "ởu": 58059,
2391
  "ởì": 58488,
2392
+ "Ỡ": 59674,
2393
+ "ỠI": 59817,
2394
+ "ỠM": 59978,
2395
+ "ỠN": 59883,
2396
+ "ỠU": 60006,
2397
+ "ỠÌ": 59900,
2398
+ "ỠÍ": 59828,
2399
+ "ỠỈ": 59903,
2400
  "ỡ": 57819,
2401
  "ỡi": 58079,
2402
  "ỡm": 58895,
 
2406
  "ỡì": 58392,
2407
  "ỡí": 58110,
2408
  "ỡỉ": 58419,
2409
+ "Ợ": 59633,
2410
+ "Ợ,": 59894,
2411
+ "ỢC": 59762,
2412
+ "ỢI": 59641,
2413
+ "ỢM": 59838,
2414
+ "ỢN": 59602,
2415
+ "ỢP": 59825,
2416
+ "ỢT": 59693,
2417
+ "ỢU": 59821,
2418
  "ợ": 57724,
2419
  "ợ,": 58313,
2420
  "ợc": 57981,
 
2426
  "ợp": 58097,
2427
  "ợt": 57841,
2428
  "ợu": 58089,
2429
+ "Ụ": 59645,
2430
+ "Ụ;": 59913,
2431
+ "ỤA": 59703,
2432
+ "ỤC": 59544,
2433
+ "ỤI": 59592,
2434
+ "ỤM": 59809,
2435
+ "ỤN": 59881,
2436
+ "ỤP": 59890,
2437
+ "ỤT": 59689,
2438
+ "ỤY": 59844,
2439
  "ụ;": 58448,
2440
  "ụa": 57855,
2441
  "ụa,": 59146,
 
2450
  "ụt": 57836,
2451
  "ụt,": 58873,
2452
  "ụy": 58157,
2453
+ "Ủ,": 59940,
2454
+ "Ủ;": 59956,
2455
+ "ỦA": 59753,
2456
+ "ỦI": 59652,
2457
+ "ỦM": 59891,
2458
+ "ỦN": 59892,
2459
+ "ỦO": 59997,
2460
+ "ỦY": 59865,
2461
+ "ỦÓ": 59998,
2462
  "ủ": 57755,
2463
  "ủ,": 58624,
2464
  "ủ;": 58695,
 
2471
  "ủo": 58997,
2472
  "ủy": 58224,
2473
  "ủó": 58998,
2474
+ "Ứ!": 59980,
2475
+ "ỨA": 59780,
2476
+ "ỨC": 59571,
2477
+ "ỨN": 60004,
2478
+ "ỨT": 59756,
2479
+ "ỨU": 59882,
2480
  "ứ": 57837,
2481
  "ứ!": 58903,
2482
  "ứa": 58017,
 
2486
  "ứng,": 59124,
2487
  "ứt": 57973,
2488
  "ứu": 58268,
2489
+ "Ừ": 59657,
2490
+ "Ừ!": 59979,
2491
+ "Ừ,": 60005,
2492
+ "Ừ;": 59901,
2493
+ "ỪA": 59752,
2494
+ "ỪM": 59981,
2495
+ "ỪN": 60024,
2496
+ "ỪU": 59842,
2497
  "ừ": 57780,
2498
  "ừ!": 58902,
2499
  "ừ,": 59115,
 
2504
  "ừng": 57592,
2505
  "ừng,": 57947,
2506
  "ừu": 58146,
2507
+ "ỬA": 59724,
2508
+ "ỬI": 59771,
2509
+ "ỬU": 59833,
2510
  "ử": 57675,
2511
  "ửa": 57909,
2512
  "ửa,": 58446,
2513
  "ửi": 57993,
2514
  "ửng": 58006,
2515
  "ửu": 58127,
2516
+ "Ữ": 59611,
2517
+ "ỮA": 59731,
2518
+ "ỮU": 59607,
2519
  "ữ": 57682,
2520
  "ữa": 57920,
2521
  "ữa,": 58562,
 
2523
  "ững": 58218,
2524
  "ữu": 57678,
2525
  "ữu,": 59154,
2526
+ "Ự,": 60035,
2527
+ "Ự;": 59946,
2528
+ "ỰA": 59877,
2529
+ "ỰC": 59609,
2530
+ "ỰT": 59643,
2531
+ "ỰU": 59636,
2532
  "ự": 57711,
2533
  "ự,": 59355,
2534
  "ự;": 58649,
 
2541
  "ựu,": 58506,
2542
  "ỳ": 57546,
2543
  "ỳnh": 57571,
2544
+ "Ỵ": 59539,
2545
+ "ỴA": 59967,
2546
+ "ỴN": 60017,
2547
+ "ỴP": 60029,
2548
+ "ỴT": 59779,
2549
+ "ỴU": 59993,
2550
  "ỵ": 57548,
2551
  "ỵa": 58797,
2552
  "ỵch": 58901,
 
2554
  "ỵp": 59305,
2555
  "ỵt": 58015,
2556
  "ỵu": 58976,
2557
+ "ỶU": 59968,
2558
  "ỷ": 57630,
2559
  "ỷnh": 58973,
2560
  "ỷu": 58803
preprocessor_config.json CHANGED
@@ -1,22 +1,4 @@
1
  {
2
- "_valid_processor_keys": [
3
- "images",
4
- "do_resize",
5
- "size",
6
- "resample",
7
- "do_thumbnail",
8
- "do_align_long_axis",
9
- "do_pad",
10
- "random_padding",
11
- "do_rescale",
12
- "rescale_factor",
13
- "do_normalize",
14
- "image_mean",
15
- "image_std",
16
- "return_tensors",
17
- "data_format",
18
- "input_data_format"
19
- ],
20
  "do_align_long_axis": false,
21
  "do_normalize": true,
22
  "do_pad": true,
@@ -37,8 +19,8 @@
37
  "processor_class": "DonutProcessor",
38
  "resample": 2,
39
  "rescale_factor": 0.00392156862745098,
40
- "size": {
41
- "height": 1280,
42
- "width": 960
43
- }
44
  }
 
1
  {
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
2
  "do_align_long_axis": false,
3
  "do_normalize": true,
4
  "do_pad": true,
 
19
  "processor_class": "DonutProcessor",
20
  "resample": 2,
21
  "rescale_factor": 0.00392156862745098,
22
+ "size": [
23
+ 960,
24
+ 1280
25
+ ]
26
  }
special_tokens_map.json CHANGED
@@ -3,27 +3,9 @@
3
  "<s_iitcdip>",
4
  "<s_synthdog>"
5
  ],
6
- "bos_token": {
7
- "content": "<s>",
8
- "lstrip": false,
9
- "normalized": false,
10
- "rstrip": false,
11
- "single_word": false
12
- },
13
- "cls_token": {
14
- "content": "<s>",
15
- "lstrip": false,
16
- "normalized": false,
17
- "rstrip": false,
18
- "single_word": false
19
- },
20
- "eos_token": {
21
- "content": "</s>",
22
- "lstrip": false,
23
- "normalized": false,
24
- "rstrip": false,
25
- "single_word": false
26
- },
27
  "mask_token": {
28
  "content": "<mask>",
29
  "lstrip": true,
@@ -31,25 +13,7 @@
31
  "rstrip": false,
32
  "single_word": false
33
  },
34
- "pad_token": {
35
- "content": "<pad>",
36
- "lstrip": false,
37
- "normalized": false,
38
- "rstrip": false,
39
- "single_word": false
40
- },
41
- "sep_token": {
42
- "content": "</s>",
43
- "lstrip": false,
44
- "normalized": false,
45
- "rstrip": false,
46
- "single_word": false
47
- },
48
- "unk_token": {
49
- "content": "<unk>",
50
- "lstrip": false,
51
- "normalized": false,
52
- "rstrip": false,
53
- "single_word": false
54
- }
55
  }
 
3
  "<s_iitcdip>",
4
  "<s_synthdog>"
5
  ],
6
+ "bos_token": "<s>",
7
+ "cls_token": "<s>",
8
+ "eos_token": "</s>",
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
9
  "mask_token": {
10
  "content": "<mask>",
11
  "lstrip": true,
 
13
  "rstrip": false,
14
  "single_word": false
15
  },
16
+ "pad_token": "<pad>",
17
+ "sep_token": "</s>",
18
+ "unk_token": "<unk>"
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
19
  }
tokenizer.json CHANGED
The diff for this file is too large to render. See raw diff
 
tokenizer_config.json CHANGED
The diff for this file is too large to render. See raw diff