トークナイザーに全角記号・数字のNKFC正規化を追加

#2
Files changed (1) hide show
  1. tokenizer.json +3 -0
tokenizer.json CHANGED
@@ -124,6 +124,9 @@
124
  "normalizer": {
125
  "type": "Sequence",
126
  "normalizers": [
 
 
 
127
  {
128
  "type": "Replace",
129
  "pattern": {
 
124
  "normalizer": {
125
  "type": "Sequence",
126
  "normalizers": [
127
+ {
128
+ "type": "NFKC"
129
+ },
130
  {
131
  "type": "Replace",
132
  "pattern": {