👷♀️ Fix he -> iw change.
Browse files- tokenizer.json +8 -20
tokenizer.json
CHANGED
@@ -203,7 +203,7 @@
|
|
203 |
},
|
204 |
{
|
205 |
"id": 50279,
|
206 |
-
"content": "<|
|
207 |
"single_word": false,
|
208 |
"lstrip": false,
|
209 |
"rstrip": false,
|
@@ -14546,30 +14546,18 @@
|
|
14546 |
"special_tokens": {
|
14547 |
"<|endoftext|>": {
|
14548 |
"id": "<|endoftext|>",
|
14549 |
-
"ids": [
|
14550 |
-
|
14551 |
-
],
|
14552 |
-
"tokens": [
|
14553 |
-
"<|endoftext|>"
|
14554 |
-
]
|
14555 |
},
|
14556 |
"<|notimestamps|>": {
|
14557 |
"id": "<|notimestamps|>",
|
14558 |
-
"ids": [
|
14559 |
-
|
14560 |
-
],
|
14561 |
-
"tokens": [
|
14562 |
-
"<|notimestamps|>"
|
14563 |
-
]
|
14564 |
},
|
14565 |
"<|startoftranscript|>": {
|
14566 |
"id": "<|startoftranscript|>",
|
14567 |
-
"ids": [
|
14568 |
-
|
14569 |
-
],
|
14570 |
-
"tokens": [
|
14571 |
-
"<|startoftranscript|>"
|
14572 |
-
]
|
14573 |
}
|
14574 |
}
|
14575 |
},
|
@@ -114849,4 +114837,4 @@
|
|
114849 |
"åľ º"
|
114850 |
]
|
114851 |
}
|
114852 |
-
}
|
|
|
203 |
},
|
204 |
{
|
205 |
"id": 50279,
|
206 |
+
"content": "<|iw|>",
|
207 |
"single_word": false,
|
208 |
"lstrip": false,
|
209 |
"rstrip": false,
|
|
|
14546 |
"special_tokens": {
|
14547 |
"<|endoftext|>": {
|
14548 |
"id": "<|endoftext|>",
|
14549 |
+
"ids": [50257],
|
14550 |
+
"tokens": ["<|endoftext|>"]
|
|
|
|
|
|
|
|
|
14551 |
},
|
14552 |
"<|notimestamps|>": {
|
14553 |
"id": "<|notimestamps|>",
|
14554 |
+
"ids": [50363],
|
14555 |
+
"tokens": ["<|notimestamps|>"]
|
|
|
|
|
|
|
|
|
14556 |
},
|
14557 |
"<|startoftranscript|>": {
|
14558 |
"id": "<|startoftranscript|>",
|
14559 |
+
"ids": [50258],
|
14560 |
+
"tokens": ["<|startoftranscript|>"]
|
|
|
|
|
|
|
|
|
14561 |
}
|
14562 |
}
|
14563 |
},
|
|
|
114837 |
"åľ º"
|
114838 |
]
|
114839 |
}
|
114840 |
+
}
|