トークナイザーに全角記号・数字のNKFC正規化を追加
#2
by
misdelivery
- opened
- tokenizer.json +3 -0
tokenizer.json
CHANGED
@@ -124,6 +124,9 @@
|
|
124 |
"normalizer": {
|
125 |
"type": "Sequence",
|
126 |
"normalizers": [
|
|
|
|
|
|
|
127 |
{
|
128 |
"type": "Replace",
|
129 |
"pattern": {
|
|
|
124 |
"normalizer": {
|
125 |
"type": "Sequence",
|
126 |
"normalizers": [
|
127 |
+
{
|
128 |
+
"type": "NFKC"
|
129 |
+
},
|
130 |
{
|
131 |
"type": "Replace",
|
132 |
"pattern": {
|