wanderer2k1 commited on
Commit
c2a588a
1 Parent(s): ca3b649

Upload tokenizer

Browse files
added_tokens.json ADDED
@@ -0,0 +1,98 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "<extra_id_0>": 36095,
3
+ "<extra_id_10>": 36085,
4
+ "<extra_id_11>": 36084,
5
+ "<extra_id_12>": 36083,
6
+ "<extra_id_13>": 36082,
7
+ "<extra_id_14>": 36081,
8
+ "<extra_id_15>": 36080,
9
+ "<extra_id_16>": 36079,
10
+ "<extra_id_17>": 36078,
11
+ "<extra_id_18>": 36077,
12
+ "<extra_id_19>": 36076,
13
+ "<extra_id_1>": 36094,
14
+ "<extra_id_20>": 36075,
15
+ "<extra_id_21>": 36074,
16
+ "<extra_id_22>": 36073,
17
+ "<extra_id_23>": 36072,
18
+ "<extra_id_24>": 36071,
19
+ "<extra_id_25>": 36070,
20
+ "<extra_id_26>": 36069,
21
+ "<extra_id_27>": 36068,
22
+ "<extra_id_28>": 36067,
23
+ "<extra_id_29>": 36066,
24
+ "<extra_id_2>": 36093,
25
+ "<extra_id_30>": 36065,
26
+ "<extra_id_31>": 36064,
27
+ "<extra_id_32>": 36063,
28
+ "<extra_id_33>": 36062,
29
+ "<extra_id_34>": 36061,
30
+ "<extra_id_35>": 36060,
31
+ "<extra_id_36>": 36059,
32
+ "<extra_id_37>": 36058,
33
+ "<extra_id_38>": 36057,
34
+ "<extra_id_39>": 36056,
35
+ "<extra_id_3>": 36092,
36
+ "<extra_id_40>": 36055,
37
+ "<extra_id_41>": 36054,
38
+ "<extra_id_42>": 36053,
39
+ "<extra_id_43>": 36052,
40
+ "<extra_id_44>": 36051,
41
+ "<extra_id_45>": 36050,
42
+ "<extra_id_46>": 36049,
43
+ "<extra_id_47>": 36048,
44
+ "<extra_id_48>": 36047,
45
+ "<extra_id_49>": 36046,
46
+ "<extra_id_4>": 36091,
47
+ "<extra_id_50>": 36045,
48
+ "<extra_id_51>": 36044,
49
+ "<extra_id_52>": 36043,
50
+ "<extra_id_53>": 36042,
51
+ "<extra_id_54>": 36041,
52
+ "<extra_id_55>": 36040,
53
+ "<extra_id_56>": 36039,
54
+ "<extra_id_57>": 36038,
55
+ "<extra_id_58>": 36037,
56
+ "<extra_id_59>": 36036,
57
+ "<extra_id_5>": 36090,
58
+ "<extra_id_60>": 36035,
59
+ "<extra_id_61>": 36034,
60
+ "<extra_id_62>": 36033,
61
+ "<extra_id_63>": 36032,
62
+ "<extra_id_64>": 36031,
63
+ "<extra_id_65>": 36030,
64
+ "<extra_id_66>": 36029,
65
+ "<extra_id_67>": 36028,
66
+ "<extra_id_68>": 36027,
67
+ "<extra_id_69>": 36026,
68
+ "<extra_id_6>": 36089,
69
+ "<extra_id_70>": 36025,
70
+ "<extra_id_71>": 36024,
71
+ "<extra_id_72>": 36023,
72
+ "<extra_id_73>": 36022,
73
+ "<extra_id_74>": 36021,
74
+ "<extra_id_75>": 36020,
75
+ "<extra_id_76>": 36019,
76
+ "<extra_id_77>": 36018,
77
+ "<extra_id_78>": 36017,
78
+ "<extra_id_79>": 36016,
79
+ "<extra_id_7>": 36088,
80
+ "<extra_id_80>": 36015,
81
+ "<extra_id_81>": 36014,
82
+ "<extra_id_82>": 36013,
83
+ "<extra_id_83>": 36012,
84
+ "<extra_id_84>": 36011,
85
+ "<extra_id_85>": 36010,
86
+ "<extra_id_86>": 36009,
87
+ "<extra_id_87>": 36008,
88
+ "<extra_id_88>": 36007,
89
+ "<extra_id_89>": 36006,
90
+ "<extra_id_8>": 36087,
91
+ "<extra_id_90>": 36005,
92
+ "<extra_id_91>": 36004,
93
+ "<extra_id_92>": 36003,
94
+ "<extra_id_93>": 36002,
95
+ "<extra_id_94>": 36001,
96
+ "<extra_id_95>": 36000,
97
+ "<extra_id_9>": 36086
98
+ }
special_tokens_map.json CHANGED
@@ -97,7 +97,25 @@
97
  "<extra_id_94>",
98
  "<extra_id_95>"
99
  ],
100
- "eos_token": "</s>",
101
- "pad_token": "<pad>",
102
- "unk_token": "<unk>"
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
103
  }
 
97
  "<extra_id_94>",
98
  "<extra_id_95>"
99
  ],
100
+ "eos_token": {
101
+ "content": "</s>",
102
+ "lstrip": false,
103
+ "normalized": false,
104
+ "rstrip": false,
105
+ "single_word": false
106
+ },
107
+ "pad_token": {
108
+ "content": "<pad>",
109
+ "lstrip": false,
110
+ "normalized": false,
111
+ "rstrip": false,
112
+ "single_word": false
113
+ },
114
+ "unk_token": {
115
+ "content": "<unk>",
116
+ "lstrip": false,
117
+ "normalized": false,
118
+ "rstrip": false,
119
+ "single_word": false
120
+ }
121
  }
tokenizer_config.json CHANGED
@@ -1,4 +1,5 @@
1
  {
 
2
  "added_tokens_decoder": {
3
  "0": {
4
  "content": "<pad>",
@@ -894,6 +895,7 @@
894
  "clean_up_tokenization_spaces": true,
895
  "eos_token": "</s>",
896
  "extra_ids": 96,
 
897
  "model_max_length": 1000000000000000019884624838656,
898
  "pad_token": "<pad>",
899
  "sp_model_kwargs": {},
 
1
  {
2
+ "add_prefix_space": true,
3
  "added_tokens_decoder": {
4
  "0": {
5
  "content": "<pad>",
 
895
  "clean_up_tokenization_spaces": true,
896
  "eos_token": "</s>",
897
  "extra_ids": 96,
898
+ "legacy": true,
899
  "model_max_length": 1000000000000000019884624838656,
900
  "pad_token": "<pad>",
901
  "sp_model_kwargs": {},