Isaak Carter Augustus commited on
Commit
03c16d6
1 Parent(s): 159000c

Upload tokenizer

Browse files
added_tokens.json CHANGED
@@ -1,25 +1,26 @@
1
  {
2
- "<|assistant|>": 32006,
3
- "<|context|>": 32014,
4
- "<|current_states|>": 32013,
5
  "<|endoftext|>": 32001,
6
- "<|function_call|>": 32007,
7
- "<|function_response|>": 32008,
8
  "<|functions|>": 32002,
9
- "<|gökdeniz|>": 32003,
10
- "<|home_state|>": 32012,
11
- "<|image|>": 32009,
12
- "<|josie|>": 32005,
13
- "<|long_term_memory|>": 32010,
14
- "<|short_term_memory|>": 32011,
15
  "<|startoftext|>": 32000,
16
- "<|user|>": 32004,
17
- "Gökdeniz": 32016,
18
- "Gökdeniz Gülmez": 32015,
19
- "Gülmez": 32017,
20
- "J.O.S.I.E.": 32019,
21
- "JOSIE": 32018,
22
- "Josie": 32020,
23
- "Just an Outstandingly Smart and Intelligent Entity": 32022,
24
- "josie": 32021
 
25
  }
 
1
  {
2
+ "<|assistant|>": 32007,
3
+ "<|context|>": 32015,
4
+ "<|current_states|>": 32014,
5
  "<|endoftext|>": 32001,
6
+ "<|function_call|>": 32008,
7
+ "<|function_response|>": 32009,
8
  "<|functions|>": 32002,
9
+ "<|gökdeniz|>": 32004,
10
+ "<|home_state|>": 32013,
11
+ "<|image|>": 32010,
12
+ "<|josie|>": 32006,
13
+ "<|long_term_memory|>": 32011,
14
+ "<|short_term_memory|>": 32012,
15
  "<|startoftext|>": 32000,
16
+ "<|system|>": 32003,
17
+ "<|user|>": 32005,
18
+ "Gökdeniz": 32017,
19
+ "Gökdeniz Gülmez": 32016,
20
+ "Gülmez": 32018,
21
+ "J.O.S.I.E.": 32020,
22
+ "JOSIE": 32019,
23
+ "Josie": 32021,
24
+ "Just an Outstandingly Smart and Intelligent Entity": 32023,
25
+ "josie": 32022
26
  }
special_tokens_map.json CHANGED
@@ -7,6 +7,13 @@
7
  "rstrip": false,
8
  "single_word": false
9
  },
 
 
 
 
 
 
 
10
  {
11
  "content": "<|gökdeniz|>",
12
  "lstrip": false,
 
7
  "rstrip": false,
8
  "single_word": false
9
  },
10
+ {
11
+ "content": "<|system|>",
12
+ "lstrip": false,
13
+ "normalized": false,
14
+ "rstrip": false,
15
+ "single_word": false
16
+ },
17
  {
18
  "content": "<|gökdeniz|>",
19
  "lstrip": false,
tokenizer.json CHANGED
@@ -59,7 +59,7 @@
59
  },
60
  {
61
  "id": 32003,
62
- "content": "<|gökdeniz|>",
63
  "single_word": false,
64
  "lstrip": false,
65
  "rstrip": false,
@@ -68,7 +68,7 @@
68
  },
69
  {
70
  "id": 32004,
71
- "content": "<|user|>",
72
  "single_word": false,
73
  "lstrip": false,
74
  "rstrip": false,
@@ -77,7 +77,7 @@
77
  },
78
  {
79
  "id": 32005,
80
- "content": "<|josie|>",
81
  "single_word": false,
82
  "lstrip": false,
83
  "rstrip": false,
@@ -86,7 +86,7 @@
86
  },
87
  {
88
  "id": 32006,
89
- "content": "<|assistant|>",
90
  "single_word": false,
91
  "lstrip": false,
92
  "rstrip": false,
@@ -95,7 +95,7 @@
95
  },
96
  {
97
  "id": 32007,
98
- "content": "<|function_call|>",
99
  "single_word": false,
100
  "lstrip": false,
101
  "rstrip": false,
@@ -104,7 +104,7 @@
104
  },
105
  {
106
  "id": 32008,
107
- "content": "<|function_response|>",
108
  "single_word": false,
109
  "lstrip": false,
110
  "rstrip": false,
@@ -113,7 +113,7 @@
113
  },
114
  {
115
  "id": 32009,
116
- "content": "<|image|>",
117
  "single_word": false,
118
  "lstrip": false,
119
  "rstrip": false,
@@ -122,7 +122,7 @@
122
  },
123
  {
124
  "id": 32010,
125
- "content": "<|long_term_memory|>",
126
  "single_word": false,
127
  "lstrip": false,
128
  "rstrip": false,
@@ -131,7 +131,7 @@
131
  },
132
  {
133
  "id": 32011,
134
- "content": "<|short_term_memory|>",
135
  "single_word": false,
136
  "lstrip": false,
137
  "rstrip": false,
@@ -140,7 +140,7 @@
140
  },
141
  {
142
  "id": 32012,
143
- "content": "<|home_state|>",
144
  "single_word": false,
145
  "lstrip": false,
146
  "rstrip": false,
@@ -149,7 +149,7 @@
149
  },
150
  {
151
  "id": 32013,
152
- "content": "<|current_states|>",
153
  "single_word": false,
154
  "lstrip": false,
155
  "rstrip": false,
@@ -158,7 +158,7 @@
158
  },
159
  {
160
  "id": 32014,
161
- "content": "<|context|>",
162
  "single_word": false,
163
  "lstrip": false,
164
  "rstrip": false,
@@ -167,6 +167,15 @@
167
  },
168
  {
169
  "id": 32015,
 
 
 
 
 
 
 
 
 
170
  "content": "Gökdeniz Gülmez",
171
  "single_word": false,
172
  "lstrip": false,
@@ -175,7 +184,7 @@
175
  "special": false
176
  },
177
  {
178
- "id": 32016,
179
  "content": "Gökdeniz",
180
  "single_word": false,
181
  "lstrip": false,
@@ -184,7 +193,7 @@
184
  "special": false
185
  },
186
  {
187
- "id": 32017,
188
  "content": "Gülmez",
189
  "single_word": false,
190
  "lstrip": false,
@@ -193,7 +202,7 @@
193
  "special": false
194
  },
195
  {
196
- "id": 32018,
197
  "content": "JOSIE",
198
  "single_word": false,
199
  "lstrip": false,
@@ -202,7 +211,7 @@
202
  "special": false
203
  },
204
  {
205
- "id": 32019,
206
  "content": "J.O.S.I.E.",
207
  "single_word": false,
208
  "lstrip": false,
@@ -211,7 +220,7 @@
211
  "special": false
212
  },
213
  {
214
- "id": 32020,
215
  "content": "Josie",
216
  "single_word": false,
217
  "lstrip": false,
@@ -220,7 +229,7 @@
220
  "special": false
221
  },
222
  {
223
- "id": 32021,
224
  "content": "josie",
225
  "single_word": false,
226
  "lstrip": false,
@@ -229,7 +238,7 @@
229
  "special": false
230
  },
231
  {
232
- "id": 32022,
233
  "content": "Just an Outstandingly Smart and Intelligent Entity",
234
  "single_word": false,
235
  "lstrip": false,
 
59
  },
60
  {
61
  "id": 32003,
62
+ "content": "<|system|>",
63
  "single_word": false,
64
  "lstrip": false,
65
  "rstrip": false,
 
68
  },
69
  {
70
  "id": 32004,
71
+ "content": "<|gökdeniz|>",
72
  "single_word": false,
73
  "lstrip": false,
74
  "rstrip": false,
 
77
  },
78
  {
79
  "id": 32005,
80
+ "content": "<|user|>",
81
  "single_word": false,
82
  "lstrip": false,
83
  "rstrip": false,
 
86
  },
87
  {
88
  "id": 32006,
89
+ "content": "<|josie|>",
90
  "single_word": false,
91
  "lstrip": false,
92
  "rstrip": false,
 
95
  },
96
  {
97
  "id": 32007,
98
+ "content": "<|assistant|>",
99
  "single_word": false,
100
  "lstrip": false,
101
  "rstrip": false,
 
104
  },
105
  {
106
  "id": 32008,
107
+ "content": "<|function_call|>",
108
  "single_word": false,
109
  "lstrip": false,
110
  "rstrip": false,
 
113
  },
114
  {
115
  "id": 32009,
116
+ "content": "<|function_response|>",
117
  "single_word": false,
118
  "lstrip": false,
119
  "rstrip": false,
 
122
  },
123
  {
124
  "id": 32010,
125
+ "content": "<|image|>",
126
  "single_word": false,
127
  "lstrip": false,
128
  "rstrip": false,
 
131
  },
132
  {
133
  "id": 32011,
134
+ "content": "<|long_term_memory|>",
135
  "single_word": false,
136
  "lstrip": false,
137
  "rstrip": false,
 
140
  },
141
  {
142
  "id": 32012,
143
+ "content": "<|short_term_memory|>",
144
  "single_word": false,
145
  "lstrip": false,
146
  "rstrip": false,
 
149
  },
150
  {
151
  "id": 32013,
152
+ "content": "<|home_state|>",
153
  "single_word": false,
154
  "lstrip": false,
155
  "rstrip": false,
 
158
  },
159
  {
160
  "id": 32014,
161
+ "content": "<|current_states|>",
162
  "single_word": false,
163
  "lstrip": false,
164
  "rstrip": false,
 
167
  },
168
  {
169
  "id": 32015,
170
+ "content": "<|context|>",
171
+ "single_word": false,
172
+ "lstrip": false,
173
+ "rstrip": false,
174
+ "normalized": false,
175
+ "special": true
176
+ },
177
+ {
178
+ "id": 32016,
179
  "content": "Gökdeniz Gülmez",
180
  "single_word": false,
181
  "lstrip": false,
 
184
  "special": false
185
  },
186
  {
187
+ "id": 32017,
188
  "content": "Gökdeniz",
189
  "single_word": false,
190
  "lstrip": false,
 
193
  "special": false
194
  },
195
  {
196
+ "id": 32018,
197
  "content": "Gülmez",
198
  "single_word": false,
199
  "lstrip": false,
 
202
  "special": false
203
  },
204
  {
205
+ "id": 32019,
206
  "content": "JOSIE",
207
  "single_word": false,
208
  "lstrip": false,
 
211
  "special": false
212
  },
213
  {
214
+ "id": 32020,
215
  "content": "J.O.S.I.E.",
216
  "single_word": false,
217
  "lstrip": false,
 
220
  "special": false
221
  },
222
  {
223
+ "id": 32021,
224
  "content": "Josie",
225
  "single_word": false,
226
  "lstrip": false,
 
229
  "special": false
230
  },
231
  {
232
+ "id": 32022,
233
  "content": "josie",
234
  "single_word": false,
235
  "lstrip": false,
 
238
  "special": false
239
  },
240
  {
241
+ "id": 32023,
242
  "content": "Just an Outstandingly Smart and Intelligent Entity",
243
  "single_word": false,
244
  "lstrip": false,
tokenizer_config.json CHANGED
@@ -51,7 +51,7 @@
51
  "special": true
52
  },
53
  "32003": {
54
- "content": "<|gökdeniz|>",
55
  "lstrip": false,
56
  "normalized": false,
57
  "rstrip": false,
@@ -59,7 +59,7 @@
59
  "special": true
60
  },
61
  "32004": {
62
- "content": "<|user|>",
63
  "lstrip": false,
64
  "normalized": false,
65
  "rstrip": false,
@@ -67,7 +67,7 @@
67
  "special": true
68
  },
69
  "32005": {
70
- "content": "<|josie|>",
71
  "lstrip": false,
72
  "normalized": false,
73
  "rstrip": false,
@@ -75,7 +75,7 @@
75
  "special": true
76
  },
77
  "32006": {
78
- "content": "<|assistant|>",
79
  "lstrip": false,
80
  "normalized": false,
81
  "rstrip": false,
@@ -83,7 +83,7 @@
83
  "special": true
84
  },
85
  "32007": {
86
- "content": "<|function_call|>",
87
  "lstrip": false,
88
  "normalized": false,
89
  "rstrip": false,
@@ -91,7 +91,7 @@
91
  "special": true
92
  },
93
  "32008": {
94
- "content": "<|function_response|>",
95
  "lstrip": false,
96
  "normalized": false,
97
  "rstrip": false,
@@ -99,7 +99,7 @@
99
  "special": true
100
  },
101
  "32009": {
102
- "content": "<|image|>",
103
  "lstrip": false,
104
  "normalized": false,
105
  "rstrip": false,
@@ -107,7 +107,7 @@
107
  "special": true
108
  },
109
  "32010": {
110
- "content": "<|long_term_memory|>",
111
  "lstrip": false,
112
  "normalized": false,
113
  "rstrip": false,
@@ -115,7 +115,7 @@
115
  "special": true
116
  },
117
  "32011": {
118
- "content": "<|short_term_memory|>",
119
  "lstrip": false,
120
  "normalized": false,
121
  "rstrip": false,
@@ -123,7 +123,7 @@
123
  "special": true
124
  },
125
  "32012": {
126
- "content": "<|home_state|>",
127
  "lstrip": false,
128
  "normalized": false,
129
  "rstrip": false,
@@ -131,7 +131,7 @@
131
  "special": true
132
  },
133
  "32013": {
134
- "content": "<|current_states|>",
135
  "lstrip": false,
136
  "normalized": false,
137
  "rstrip": false,
@@ -139,7 +139,7 @@
139
  "special": true
140
  },
141
  "32014": {
142
- "content": "<|context|>",
143
  "lstrip": false,
144
  "normalized": false,
145
  "rstrip": false,
@@ -147,6 +147,14 @@
147
  "special": true
148
  },
149
  "32015": {
 
 
 
 
 
 
 
 
150
  "content": "Gökdeniz Gülmez",
151
  "lstrip": false,
152
  "normalized": true,
@@ -154,7 +162,7 @@
154
  "single_word": false,
155
  "special": false
156
  },
157
- "32016": {
158
  "content": "Gökdeniz",
159
  "lstrip": false,
160
  "normalized": true,
@@ -162,7 +170,7 @@
162
  "single_word": false,
163
  "special": false
164
  },
165
- "32017": {
166
  "content": "Gülmez",
167
  "lstrip": false,
168
  "normalized": true,
@@ -170,7 +178,7 @@
170
  "single_word": false,
171
  "special": false
172
  },
173
- "32018": {
174
  "content": "JOSIE",
175
  "lstrip": false,
176
  "normalized": true,
@@ -178,7 +186,7 @@
178
  "single_word": false,
179
  "special": false
180
  },
181
- "32019": {
182
  "content": "J.O.S.I.E.",
183
  "lstrip": false,
184
  "normalized": true,
@@ -186,7 +194,7 @@
186
  "single_word": false,
187
  "special": false
188
  },
189
- "32020": {
190
  "content": "Josie",
191
  "lstrip": false,
192
  "normalized": true,
@@ -194,7 +202,7 @@
194
  "single_word": false,
195
  "special": false
196
  },
197
- "32021": {
198
  "content": "josie",
199
  "lstrip": false,
200
  "normalized": true,
@@ -202,7 +210,7 @@
202
  "single_word": false,
203
  "special": false
204
  },
205
- "32022": {
206
  "content": "Just an Outstandingly Smart and Intelligent Entity",
207
  "lstrip": false,
208
  "normalized": true,
@@ -213,6 +221,7 @@
213
  },
214
  "additional_special_tokens": [
215
  "<|functions|>",
 
216
  "<|gökdeniz|>",
217
  "<|user|>",
218
  "<|josie|>",
 
51
  "special": true
52
  },
53
  "32003": {
54
+ "content": "<|system|>",
55
  "lstrip": false,
56
  "normalized": false,
57
  "rstrip": false,
 
59
  "special": true
60
  },
61
  "32004": {
62
+ "content": "<|gökdeniz|>",
63
  "lstrip": false,
64
  "normalized": false,
65
  "rstrip": false,
 
67
  "special": true
68
  },
69
  "32005": {
70
+ "content": "<|user|>",
71
  "lstrip": false,
72
  "normalized": false,
73
  "rstrip": false,
 
75
  "special": true
76
  },
77
  "32006": {
78
+ "content": "<|josie|>",
79
  "lstrip": false,
80
  "normalized": false,
81
  "rstrip": false,
 
83
  "special": true
84
  },
85
  "32007": {
86
+ "content": "<|assistant|>",
87
  "lstrip": false,
88
  "normalized": false,
89
  "rstrip": false,
 
91
  "special": true
92
  },
93
  "32008": {
94
+ "content": "<|function_call|>",
95
  "lstrip": false,
96
  "normalized": false,
97
  "rstrip": false,
 
99
  "special": true
100
  },
101
  "32009": {
102
+ "content": "<|function_response|>",
103
  "lstrip": false,
104
  "normalized": false,
105
  "rstrip": false,
 
107
  "special": true
108
  },
109
  "32010": {
110
+ "content": "<|image|>",
111
  "lstrip": false,
112
  "normalized": false,
113
  "rstrip": false,
 
115
  "special": true
116
  },
117
  "32011": {
118
+ "content": "<|long_term_memory|>",
119
  "lstrip": false,
120
  "normalized": false,
121
  "rstrip": false,
 
123
  "special": true
124
  },
125
  "32012": {
126
+ "content": "<|short_term_memory|>",
127
  "lstrip": false,
128
  "normalized": false,
129
  "rstrip": false,
 
131
  "special": true
132
  },
133
  "32013": {
134
+ "content": "<|home_state|>",
135
  "lstrip": false,
136
  "normalized": false,
137
  "rstrip": false,
 
139
  "special": true
140
  },
141
  "32014": {
142
+ "content": "<|current_states|>",
143
  "lstrip": false,
144
  "normalized": false,
145
  "rstrip": false,
 
147
  "special": true
148
  },
149
  "32015": {
150
+ "content": "<|context|>",
151
+ "lstrip": false,
152
+ "normalized": false,
153
+ "rstrip": false,
154
+ "single_word": false,
155
+ "special": true
156
+ },
157
+ "32016": {
158
  "content": "Gökdeniz Gülmez",
159
  "lstrip": false,
160
  "normalized": true,
 
162
  "single_word": false,
163
  "special": false
164
  },
165
+ "32017": {
166
  "content": "Gökdeniz",
167
  "lstrip": false,
168
  "normalized": true,
 
170
  "single_word": false,
171
  "special": false
172
  },
173
+ "32018": {
174
  "content": "Gülmez",
175
  "lstrip": false,
176
  "normalized": true,
 
178
  "single_word": false,
179
  "special": false
180
  },
181
+ "32019": {
182
  "content": "JOSIE",
183
  "lstrip": false,
184
  "normalized": true,
 
186
  "single_word": false,
187
  "special": false
188
  },
189
+ "32020": {
190
  "content": "J.O.S.I.E.",
191
  "lstrip": false,
192
  "normalized": true,
 
194
  "single_word": false,
195
  "special": false
196
  },
197
+ "32021": {
198
  "content": "Josie",
199
  "lstrip": false,
200
  "normalized": true,
 
202
  "single_word": false,
203
  "special": false
204
  },
205
+ "32022": {
206
  "content": "josie",
207
  "lstrip": false,
208
  "normalized": true,
 
210
  "single_word": false,
211
  "special": false
212
  },
213
+ "32023": {
214
  "content": "Just an Outstandingly Smart and Intelligent Entity",
215
  "lstrip": false,
216
  "normalized": true,
 
221
  },
222
  "additional_special_tokens": [
223
  "<|functions|>",
224
+ "<|system|>",
225
  "<|gökdeniz|>",
226
  "<|user|>",
227
  "<|josie|>",