diff --git a/app.py b/app.py
new file mode 100644
index 0000000000000000000000000000000000000000..26e1df01739c84f19bdf2f045c48fa31ea6799b0
--- /dev/null
+++ b/app.py
@@ -0,0 +1,178 @@
+"""app.py
+streamlit demo of yomikata"""
+import pandas as pd
+import spacy
+import streamlit as st
+from speach import ttlig
+
+from yomikata import utils
+from yomikata.dictionary import Dictionary
+from yomikata.utils import parse_furigana
+from pathlib import Path
+
+@st.cache_data
+def add_border(html: str):
+ WRAPPER = """
{}
"""
+ html = html.replace("\n", " ")
+ return WRAPPER.format(html)
+
+
+def get_random_sentence():
+ from config.config import TEST_DATA_DIR
+
+ df = pd.read_csv(Path(TEST_DATA_DIR, "test_optimized_strict_heteronyms.csv"))
+ return df.sample(1).iloc[0].sentence
+
+@st.cache_data
+def get_dbert_prediction_and_heteronym_list(text):
+ from yomikata.dbert import dBert
+
+ reader = dBert()
+ return reader.furigana(text), reader.heteronyms
+
+@st.cache_data
+def get_stats():
+ from config import config
+ from yomikata.utils import load_dict
+ stats = load_dict(Path(config.STORES_DIR, "dbert/training_performance.json"))
+
+ global_accuracy = stats['test']['accuracy']
+
+ stats = stats['test']['heteronym_performance']
+ heteronyms = stats.keys()
+
+ accuracy = [stats[heteronym]['accuracy'] for heteronym in heteronyms]
+
+ readings = [ "、".join(["{reading} ({correct}/{n})".format(reading=reading, correct=stats[heteronym]['readings'][reading]['found'][reading], n=stats[heteronym]['readings'][reading]['n']) for reading in stats[heteronym]['readings'].keys() if (stats[heteronym]['readings'][reading]['found'][reading] !=0 or reading != '')]) for heteronym in heteronyms ]
+
+ #if reading != ''
+
+ df = pd.DataFrame({'heteronym': heteronyms, 'accuracy': accuracy, 'readings': readings} )
+
+ df = df[df['readings'].str.contains('、')]
+
+ df['readings'] = df['readings'].str.replace('', 'Other')
+
+ df = df.rename(columns={'readings':'readings (test corr./total)'})
+
+ df= df.sort_values('accuracy', ascending=False, ignore_index=True)
+
+ df.index += 1
+
+ return global_accuracy, df
+
+
+@st.cache_data
+def furigana_to_spacy(text_with_furigana):
+ tokens = parse_furigana(text_with_furigana)
+ ents = []
+ output_text = ""
+ heteronym_count = 0
+ for token in tokens.groups:
+ if isinstance(token, ttlig.RubyFrag):
+ if heteronym_count != 0:
+ output_text += ", "
+
+ ents.append(
+ {
+ "start": len(output_text),
+ "end": len(output_text) + len(token.text),
+ "label": token.furi,
+ }
+ )
+
+ output_text += token.text
+ heteronym_count += 1
+ else:
+ pass
+ return {
+ "text": output_text,
+ "ents": ents,
+ "title": None,
+ }
+
+
+st.title("Yomikata: Disambiguate Japanese Heteronyms with a BERT model")
+
+# Input text box
+st.markdown("Input a Japanese sentence:")
+
+if "default_sentence" not in st.session_state:
+ st.session_state.default_sentence = "え、{人間/にんげん}というものかい? {人間/にんげん}というものは{角/つの}の{生/は}えない、{生白/なまじろ}い{顔/かお}や{手足/てあし}をした、{何/なん}ともいわれず{気味/きみ}の{悪/わる}いものだよ。"
+
+input_text = st.text_area(
+ "Input a Japanese sentence:",
+ utils.remove_furigana(st.session_state.default_sentence),
+ label_visibility="collapsed",
+)
+
+# Yomikata prediction
+dbert_prediction, heteronyms = get_dbert_prediction_and_heteronym_list(input_text)
+
+# spacy-style output for the predictions
+colors = ["#85DCDF", "#DF85DC", "#DCDF85", "#85ABDF"]
+spacy_dict = furigana_to_spacy(dbert_prediction)
+label_colors = {
+ reading: colors[i % len(colors)]
+ for i, reading in enumerate(set([item["label"] for item in spacy_dict["ents"]]))
+}
+html = spacy.displacy.render(
+ spacy_dict, style="ent", manual=True, options={"colors": label_colors}
+)
+
+if len(spacy_dict["ents"]) > 0:
+ st.markdown("**Yomikata** found and disambiguated the following heteronyms:")
+ st.write(
+ f"{add_border(html)}",
+ unsafe_allow_html=True,
+ )
+else:
+ st.markdown("**Yomikata** found no heteronyms in the input text.")
+
+# Dictionary + Yomikata prediction
+st.markdown("**Yomikata** can be coupled with a dictionary to get full furigana:")
+dictionary = st.radio(
+ "It can be coupled with a dictionary",
+ ("sudachi", "unidic", "ipadic", "juman"),
+ horizontal=True,
+ label_visibility="collapsed",
+)
+
+dictreader = Dictionary(dictionary)
+dictionary_prediction = dictreader.furigana(dbert_prediction)
+html = parse_furigana(dictionary_prediction).to_html()
+st.write(
+ f"{add_border(html)}",
+ unsafe_allow_html=True,
+)
+
+# Dictionary alone prediction
+if len(spacy_dict["ents"]) > 0:
+ dictionary_prediction = dictreader.furigana(utils.remove_furigana(input_text))
+ html = parse_furigana(dictionary_prediction).to_html()
+ st.markdown("Without **Yomikata** disambiguation, the dictionary would yield:")
+ st.write(
+ f"{add_border(html)}",
+ unsafe_allow_html=True,
+ )
+
+# Randomize button
+if st.button("🎲 Randomize the input sentence"):
+ st.session_state.default_sentence = get_random_sentence()
+ st.experimental_rerun()
+
+# Stats section
+global_accuracy, stats_df = get_stats()
+
+st.subheader(f"{len(stats_df)} heteronyms supported, with a global accuracy of {global_accuracy:.0%}")
+
+st.dataframe(stats_df)
+
+# Hide the footer
+hide_streamlit_style = """
+
+ """
+st.markdown(hide_streamlit_style, unsafe_allow_html=True)
diff --git a/config/__pycache__/config.cpython-310.pyc b/config/__pycache__/config.cpython-310.pyc
new file mode 100644
index 0000000000000000000000000000000000000000..6329ca04a2a94fe88d9ebfd281de4c5125723bae
Binary files /dev/null and b/config/__pycache__/config.cpython-310.pyc differ
diff --git a/config/config.py b/config/config.py
new file mode 100644
index 0000000000000000000000000000000000000000..b292d64eacc579153068782f588e88bebfb6e197
--- /dev/null
+++ b/config/config.py
@@ -0,0 +1,102 @@
+# config.py
+
+import json
+import logging.config
+import sys
+from pathlib import Path
+
+import mlflow
+from rich.logging import RichHandler
+
+# Base and Config Directories
+BASE_DIR = Path(__file__).parent.parent.absolute()
+CONFIG_DIR = Path(BASE_DIR, "config")
+
+# Data Directories
+RAW_DATA_DIR = Path(BASE_DIR, "raw_data")
+SENTENCE_DATA_DIR = Path(BASE_DIR, "sentence_data")
+TRAIN_DATA_DIR = Path(SENTENCE_DATA_DIR, "train")
+VAL_DATA_DIR = Path(SENTENCE_DATA_DIR, "val")
+TEST_DATA_DIR = Path(SENTENCE_DATA_DIR, "test")
+READING_DATA_DIR = Path(BASE_DIR, "reading_data")
+
+# Logs Directory
+LOGS_DIR = Path(BASE_DIR, "logs")
+
+# Model Storage Directory
+STORES_DIR = Path(BASE_DIR, "stores")
+RUN_REGISTRY = Path(STORES_DIR, "runs")
+
+# Create dirs
+RAW_DATA_DIR.mkdir(parents=True, exist_ok=True)
+SENTENCE_DATA_DIR.mkdir(parents=True, exist_ok=True)
+TRAIN_DATA_DIR.mkdir(parents=True, exist_ok=True)
+VAL_DATA_DIR.mkdir(parents=True, exist_ok=True)
+TEST_DATA_DIR.mkdir(parents=True, exist_ok=True)
+READING_DATA_DIR.mkdir(parents=True, exist_ok=True)
+LOGS_DIR.mkdir(parents=True, exist_ok=True)
+STORES_DIR.mkdir(parents=True, exist_ok=True)
+RUN_REGISTRY.mkdir(parents=True, exist_ok=True)
+
+# Special tokens reserved
+ASCII_SPACE_TOKEN = "\U0000FFFF" # this is used to replace the usual space characters before sending text to mecab, because mecab uses the usual space to separate words.
+
+# Seed
+SEED = 1271297
+
+# Training parameters
+TRAIN_SIZE = 0.7
+VAL_SIZE = 0.15
+TEST_SIZE = 0.15
+assert TRAIN_SIZE + VAL_SIZE + TEST_SIZE == 1
+
+# Heteronym list
+with open(Path(CONFIG_DIR, "heteronyms.json")) as fp:
+ HETERONYMS = json.load(fp)
+
+# MLFlow model registry
+mlflow.set_tracking_uri("file://" + str(RUN_REGISTRY.absolute()))
+
+# Logger
+logging_config = {
+ "version": 1,
+ "disable_existing_loggers": False,
+ "formatters": {
+ "minimal": {"format": "%(message)s"},
+ "detailed": {
+ "format": "%(levelname)s %(asctime)s [%(name)s:%(filename)s:%(funcName)s:%(lineno)d]\n%(message)s\n"
+ },
+ },
+ "handlers": {
+ "console": {
+ "class": "logging.StreamHandler",
+ "stream": sys.stdout,
+ "formatter": "minimal",
+ "level": logging.DEBUG,
+ },
+ "info": {
+ "class": "logging.handlers.RotatingFileHandler",
+ "filename": Path(LOGS_DIR, "info.log"),
+ "maxBytes": 10485760, # 1 MB
+ "backupCount": 10,
+ "formatter": "detailed",
+ "level": logging.INFO,
+ },
+ "error": {
+ "class": "logging.handlers.RotatingFileHandler",
+ "filename": Path(LOGS_DIR, "error.log"),
+ "maxBytes": 10485760, # 1 MB
+ "backupCount": 10,
+ "formatter": "detailed",
+ "level": logging.ERROR,
+ },
+ },
+ "root": {
+ "handlers": ["console", "info", "error"],
+ "level": logging.INFO,
+ "propagate": True,
+ },
+}
+logging.config.dictConfig(logging_config)
+logger = logging.getLogger()
+logger.handlers[0] = RichHandler(markup=True)
diff --git a/config/dbert-train-args.json b/config/dbert-train-args.json
new file mode 100644
index 0000000000000000000000000000000000000000..7d8f2f5d1d89f0c4cac79050cbca1a5e34136df8
--- /dev/null
+++ b/config/dbert-train-args.json
@@ -0,0 +1,21 @@
+{
+ "model": "dBert",
+ "dataset": "optimized_strict_heteronyms",
+ "experiment": "train-dBert",
+ "run": "test",
+ "num_train_epochs": 10,
+ "evaluation_strategy": "steps",
+ "eval_steps": 300,
+ "logging_strategy": "steps",
+ "logging_steps": 300,
+ "save_strategy": "steps",
+ "save_steps": 300,
+ "learning_rate": 2e-5,
+ "per_device_train_batch_size": 128,
+ "per_device_eval_batch_size": 128,
+ "load_best_model_at_end": true,
+ "metric_for_best_model": "loss",
+ "weight_decay": 0.01,
+ "save_total_limit": 5,
+ "report_to": "mlflow"
+}
\ No newline at end of file
diff --git a/config/heteronyms.json b/config/heteronyms.json
new file mode 100644
index 0000000000000000000000000000000000000000..16f3883ab3e1246d985bebacd1ed7a6c75de1d93
--- /dev/null
+++ b/config/heteronyms.json
@@ -0,0 +1,559 @@
+{
+ "表": {
+ "ひょう": 3349,
+ "おもて": 3034,
+ "あらわ": 2474,
+ "あら": 731
+ },
+ "角": {
+ "かく": 4360,
+ "かど": 2303,
+ "つの": 372,
+ "すみ": 70
+ },
+ "大分": {
+ "おおいた": 3358,
+ "だいぶ": 797,
+ "だいぶん": 97
+ },
+ "国立": {
+ "こくりつ": 19256,
+ "くにたち": 246
+ },
+ "人気": {
+ "にんき": 7383,
+ "ひとけ": 149,
+ "じんき": 44
+ },
+ "市場": {
+ "しじょう": 85107,
+ "いちば": 781
+ },
+ "気質": {
+ "きしつ": 1108,
+ "かたぎ": 398
+ },
+ "上方": {
+ "かみがた": 1411,
+ "じょうほう": 656
+ },
+ "上手": {
+ "じょうず": 8065,
+ "うま": 706,
+ "かみて": 150,
+ "うわて": 57
+ },
+ "下手": {
+ "へた": 849,
+ "したて": 128,
+ "べた": 121,
+ "しもて": 50
+ },
+ "仮名": {
+ "かな": 1407,
+ "がな": 129,
+ "かめい": 115
+ },
+ "礼拝": {
+ "れいはい": 841,
+ "らいはい": 62
+ },
+ "遺言": {
+ "ゆいごん": 3152,
+ "いげん": 67,
+ "いごん": 57
+ },
+ "口腔": {
+ "こうこう": 6475,
+ "こうくう": 5577
+ },
+ "骨": {
+ "ほね": 10697,
+ "こつ": 5870
+ },
+ "一途": {
+ "いちず": 576,
+ "いっと": 139
+ },
+ "一言": {
+ "ひとこと": 2567,
+ "いちげん": 133,
+ "いちごん": 106
+ },
+ "最中": {
+ "さいちゅう": 520,
+ "さなか": 43
+ },
+ "一目": {
+ "ひとめ": 1596,
+ "いちもく": 210
+ },
+ "係": {
+ "かか": 14218,
+ "かかわ": 9804,
+ "がかり": 234,
+ "かかり": 227
+ },
+ "足跡": {
+ "あしあと": 2626,
+ "そくせき": 1862
+ },
+ "今日": {
+ "きょう": 17624,
+ "こんにち": 6772
+ },
+ "明日": {
+ "あす": 9824,
+ "あした": 6606,
+ "みょうにち": 66
+ },
+ "生物": {
+ "せいぶつ": 26088,
+ "いきもの": 55
+ },
+ "変化": {
+ "へんか": 87895,
+ "へんげ": 337
+ },
+ "大事": {
+ "だいじ": 5293,
+ "おおごと": 54
+ },
+ "大家": {
+ "たいか": 586,
+ "おおや": 238,
+ "たいけ": 79
+ },
+ "心中": {
+ "しんじゅう": 1541,
+ "しんちゅう": 250,
+ "しんぢゅう": 127
+ },
+ "一行": {
+ "いっこう": 1112,
+ "いちぎょう": 95
+ },
+ "一時": {
+ "いちじ": 2649,
+ "いっとき": 381,
+ "いちどき": 47
+ },
+ "一方": {
+ "いっぽう": 5327,
+ "ひとかた": 112,
+ "いちほう": 42
+ },
+ "一夜": {
+ "いちや": 1148,
+ "ひとよ": 82
+ },
+ "下野": {
+ "しもつけ": 530,
+ "げや": 104,
+ "しもの": 57
+ },
+ "花弁": {
+ "かべん": 213,
+ "はなびら": 58
+ },
+ "玩具": {
+ "がんぐ": 1354,
+ "おもちゃ": 238
+ },
+ "強力": {
+ "きょうりょく": 2319,
+ "ごうりき": 51
+ },
+ "金色": {
+ "きんいろ": 942,
+ "こんじき": 484
+ },
+ "経緯": {
+ "けいい": 7659,
+ "いきさつ": 56
+ },
+ "故郷": {
+ "こきょう": 3840,
+ "ふるさと": 506,
+ "くに": 122
+ },
+ "紅葉": {
+ "こうよう": 856,
+ "もみじ": 339
+ },
+ "根本": {
+ "こんぽん": 2872,
+ "ねもと": 262
+ },
+ "山陰": {
+ "さんいん": 2094,
+ "やまかげ": 51
+ },
+ "上下": {
+ "じょうげ": 1549,
+ "うえした": 97
+ },
+ "身体": {
+ "しんたい": 20301,
+ "からだ": 3375
+ },
+ "水面": {
+ "すいめん": 1387,
+ "みなも": 91
+ },
+ "世論": {
+ "よろん": 4554,
+ "せろん": 1934
+ },
+ "清水": {
+ "しみず": 4114,
+ "きよみず": 98
+ },
+ "大手": {
+ "おおて": 6695,
+ "おおで": 119
+ },
+ "大人": {
+ "おとな": 11037,
+ "たいじん": 113,
+ "うし": 59
+ },
+ "大勢": {
+ "おおぜい": 1290,
+ "たいせい": 398
+ },
+ "中間": {
+ "ちゅうかん": 17669,
+ "ちゅうげん": 144
+ },
+ "日向": {
+ "ひゅうが": 800,
+ "ひなた": 318
+ },
+ "夫婦": {
+ "ふうふ": 9165,
+ "めおと": 354
+ },
+ "牧場": {
+ "ぼくじょう": 1913,
+ "まきば": 159
+ },
+ "末期": {
+ "まっき": 3569,
+ "まつご": 78
+ },
+ "利益": {
+ "りえき": 13434,
+ "りやく": 209
+ },
+ "一味": {
+ "いちみ": 442,
+ "ひとあじ": 60
+ },
+ "魚": {
+ "さかな": 5857,
+ "うお": 1706,
+ "ぎょ": 413,
+ "ざかな": 50
+ },
+ "施行": {
+ "しこう": 18724,
+ "せこう": 70
+ },
+ "施工": {
+ "せこう": 25734,
+ "しこう": 48,
+ "せこ": 43
+ },
+ "転生": {
+ "てんせい": 911,
+ "てんしょう": 175
+ },
+ "博士": {
+ "はくし": 17017,
+ "はかせ": 2462
+ },
+ "眼鏡": {
+ "めがね": 2040,
+ "がんきょう": 102
+ },
+ "文字": {
+ "もじ": 9583,
+ "もんじ": 633
+ },
+ "文書": {
+ "ぶんしょ": 15094,
+ "もんじょ": 5879,
+ "もんしょ": 51
+ },
+ "現世": {
+ "げんせい": 192,
+ "げんせ": 125
+ },
+ "日中": {
+ "にっちゅう": 12478,
+ "にちじゅう": 117
+ },
+ "夜中": {
+ "よなか": 723,
+ "やちゅう": 106
+ },
+ "二人": {
+ "ふたり": 22151,
+ "ににん": 256
+ },
+ "見物": {
+ "けんぶつ": 1832,
+ "みもの": 61
+ },
+ "清浄": {
+ "せいじょう": 800,
+ "しょうじょう": 46
+ },
+ "谷間": {
+ "たにま": 1089,
+ "たにあい": 67
+ },
+ "追従": {
+ "ついじゅう": 1000,
+ "ついしょう": 73
+ },
+ "墓石": {
+ "はかいし": 323,
+ "ぼせき": 257
+ },
+ "漢書": {
+ "かんじょ": 171,
+ "かんしょ": 66,
+ "からぶみ": 47
+ },
+ "作法": {
+ "さほう": 3905,
+ "さくほう": 427
+ },
+ "半月": {
+ "はんつき": 388,
+ "はんげつ": 85
+ },
+ "黒子": {
+ "ほくろ": 200,
+ "くろこ": 183
+ },
+ "競売": {
+ "けいばい": 937,
+ "きょうばい": 332
+ },
+ "開眼": {
+ "かいげん": 338,
+ "かいがん": 144
+ },
+ "求道": {
+ "きゅうどう": 379,
+ "ぐどう": 81
+ },
+ "施業": {
+ "せぎょう": 602,
+ "しぎょう": 264
+ },
+ "借家": {
+ "しゃっか": 505,
+ "しゃくや": 394
+ },
+ "法衣": {
+ "ころも": 115,
+ "ほうえ": 87
+ },
+ "昨日": {
+ "きのう": 2670,
+ "さくじつ": 713
+ },
+ "風車": {
+ "ふうしゃ": 1133,
+ "かざぐるま": 678
+ },
+ "寒気": {
+ "かんき": 153,
+ "さむけ": 79
+ },
+ "背筋": {
+ "せすじ": 177,
+ "はいきん": 43
+ },
+ "逆手": {
+ "さかて": 169,
+ "ぎゃくて": 116
+ },
+ "生花": {
+ "いけばな": 283,
+ "せいか": 91
+ },
+ "白髪": {
+ "しらが": 313,
+ "はくはつ": 113
+ },
+ "一月": {
+ "ひとつき": 301,
+ "いちがつ": 282
+ },
+ "一寸": {
+ "ちょっと": 1481,
+ "いっすん": 111
+ },
+ "一声": {
+ "ひとこえ": 253,
+ "いっせい": 109
+ },
+ "一日": {
+ "いちにち": 1711,
+ "ついたち": 866,
+ "いちじつ": 41
+ },
+ "一分": {
+ "いちぶん": 75,
+ "いちぶ": 62
+ },
+ "一文": {
+ "いちもん": 86,
+ "いちぶん": 48
+ },
+ "何時": {
+ "いつ": 1248,
+ "なんじ": 159,
+ "なんどき": 63
+ },
+ "何分": {
+ "なにぶん": 379,
+ "なんぷん": 51
+ },
+ "気骨": {
+ "きこつ": 140,
+ "きぼね": 67
+ },
+ "銀杏": {
+ "いちょう": 322,
+ "ぎんなん": 85
+ },
+ "細々": {
+ "こまごま": 88,
+ "ほそぼそ": 67
+ },
+ "細目": {
+ "さいもく": 962,
+ "ほそめ": 123
+ },
+ "疾風": {
+ "しっぷう": 544,
+ "はやて": 94,
+ "かぜ": 68
+ },
+ "菖蒲": {
+ "しょうぶ": 165,
+ "あやめ": 65
+ },
+ "船底": {
+ "せんてい": 246,
+ "ふなぞこ": 80
+ },
+ "相乗": {
+ "そうじょう": 732,
+ "あいの": 89
+ },
+ "造作": {
+ "ぞうさ": 188,
+ "ぞうさく": 65
+ },
+ "頭数": {
+ "あたまかず": 168,
+ "とうすう": 119
+ },
+ "二重": {
+ "にじゅう": 5418,
+ "ふたえ": 65
+ },
+ "日暮": {
+ "ひぐ": 403,
+ "ひぐれ": 97,
+ "ひぐらし": 81
+ },
+ "梅雨": {
+ "つゆ": 471,
+ "ばいう": 284
+ },
+ "風穴": {
+ "かざあな": 300,
+ "ふうけつ": 68
+ },
+ "分別": {
+ "ふんべつ": 1280,
+ "ぶんべつ": 635
+ },
+ "夜話": {
+ "やわ": 2153,
+ "よばなし": 52
+ },
+ "野兎": {
+ "やと": 176,
+ "のうさぎ": 43
+ },
+ "冷水": {
+ "れいすい": 189,
+ "ひやみず": 153
+ },
+ "連中": {
+ "れんじゅう": 853,
+ "れんちゅう": 691
+ },
+ "飛沫": {
+ "ひまつ": 223,
+ "しぶき": 96
+ },
+ "翡翠": {
+ "ひすい": 177,
+ "かわせみ": 94
+ },
+ "一昨日": {
+ "おととい": 208,
+ "いっさくじつ": 71
+ },
+ "一昨年": {
+ "おととし": 72,
+ "いっさくねん": 59
+ },
+ "十八番": {
+ "じゅうはちばん": 212,
+ "おはこ": 41
+ },
+ "明後日": {
+ "あさって": 186,
+ "みょうごにち": 60
+ },
+ "石綿": {
+ "いしわた": 1702,
+ "せきめん": 360
+ },
+ "公文": {
+ "こうぶん": 196,
+ "くもん": 46
+ },
+ "読本": {
+ "どくほん": 12176,
+ "とくほん": 2414,
+ "よみほん": 121
+ },
+ "古本": {
+ "ふるほん": 550,
+ "こほん": 109
+ },
+ "町家": {
+ "まちや": 655,
+ "ちょうか": 216
+ },
+ "米": {
+ "べい": 17392,
+ "こめ": 9021,
+ "まい": 2829,
+ "よね": 620,
+ "ごめ": 164,
+ "めーとる": 112
+ }
+}
diff --git a/config/heteronyms_Sato2022.json b/config/heteronyms_Sato2022.json
new file mode 100644
index 0000000000000000000000000000000000000000..6af226b561ddff73a90188e88a16cebfaf0c61e6
--- /dev/null
+++ b/config/heteronyms_Sato2022.json
@@ -0,0 +1,211 @@
+{
+ "heteronyms_in_bert": {
+ "表": 2,
+ "角": 4,
+ "大分": 2,
+ "国立": 2,
+ "人気": 3,
+ "市場": 2,
+ "気質": 2,
+ "役所": 2,
+ "上方": 2,
+ "上手": 3,
+ "下手": 3,
+ "人事": 2,
+ "金星": 2,
+ "仮名": 2,
+ "内面": 2,
+ "礼拝": 2,
+ "遺言": 3,
+ "口腔": 2,
+ "後世": 2,
+ "骨": 2,
+ "一途": 2,
+ "一言": 3,
+ "最中": 3,
+ "一目": 2,
+ "係": 3,
+ "足跡": 2,
+ "今日": 2,
+ "明日": 3,
+ "生物": 3,
+ "変化": 2,
+ "大事": 2,
+ "水車": 2,
+ "一見": 2,
+ "一端": 2,
+ "大家": 3,
+ "心中": 2,
+ "書物": 2,
+ "一角": 2,
+ "一行": 3,
+ "一時": 3,
+ "一定": 2,
+ "一方": 2,
+ "一夜": 2,
+ "下野": 3,
+ "化学": 2,
+ "火口": 2,
+ "花弁": 2,
+ "玩具": 2,
+ "強力": 3,
+ "金色": 2,
+ "経緯": 2,
+ "故郷": 2,
+ "紅葉": 2,
+ "行方": 3,
+ "根本": 2,
+ "左右": 3,
+ "山陰": 2,
+ "十分": 2,
+ "上下": 5,
+ "身体": 2,
+ "水面": 2,
+ "世論": 2,
+ "清水": 3,
+ "大手": 2,
+ "大人": 4,
+ "大勢": 3,
+ "中間": 5,
+ "日向": 42,
+ "日時": 3,
+ "夫婦": 2,
+ "牧場": 2,
+ "末期": 2,
+ "利益": 2,
+ "工夫": 2,
+ "一味": 2,
+ "魚": 3,
+ "区分": 2,
+ "施行": 4,
+ "施工": 2,
+ "転生": 2,
+ "博士": 2,
+ "法華": 2,
+ "真面目": 3,
+ "眼鏡": 2,
+ "文字": 2,
+ "文書": 3,
+ "律令": 2,
+ "現世": 2,
+ "日中": 2,
+ "夜中": 3,
+ "前世": 2,
+ "二人": 2,
+ "立像": 2
+ },
+ "heteronyms_not_in_bert": {
+ "教化": 3,
+ "見物": 2,
+ "清浄": 2,
+ "谷間": 2,
+ "追従": 2,
+ "墓石": 2,
+ "大文字": 2,
+ "漢書": 2,
+ "作法": 2,
+ "兵法": 2,
+ "大人気": 2,
+ "半月": 2,
+ "黒子": 2,
+ "外面": 2,
+ "競売": 2,
+ "開眼": 2,
+ "求道": 2,
+ "血脈": 2,
+ "施業": 2,
+ "借家": 2,
+ "頭蓋骨": 2,
+ "法衣": 2,
+ "昨日": 2,
+ "氷柱": 2,
+ "風車": 2,
+ "寒気": 2,
+ "背筋": 2,
+ "逆手": 2,
+ "色紙": 2,
+ "生花": 3,
+ "白髪": 2,
+ "貼付": 2,
+ "一回": 2,
+ "一期": 2,
+ "一月": 3,
+ "一所": 2,
+ "一寸": 2,
+ "一声": 2,
+ "一石": 2,
+ "一日": 4,
+ "一分": 3,
+ "一文": 3,
+ "一片": 3,
+ "何時": 3,
+ "何分": 2,
+ "火煙": 2,
+ "火傷": 2,
+ "火床": 3,
+ "火先": 2,
+ "火筒": 2,
+ "芥子": 3,
+ "気骨": 2,
+ "銀杏": 3,
+ "元金": 2,
+ "五分": 2,
+ "後々": 2,
+ "後生": 2,
+ "御供": 4,
+ "細々": 3,
+ "細目": 2,
+ "三位": 2,
+ "疾風": 3,
+ "菖蒲": 2,
+ "世人": 2,
+ "世路": 2,
+ "船底": 2,
+ "早急": 2,
+ "相乗": 2,
+ "造作": 2,
+ "他言": 2,
+ "東雲": 2,
+ "頭数": 2,
+ "二重": 2,
+ "日供": 2,
+ "日次": 4,
+ "日暮": 3,
+ "日来": 3,
+ "梅雨": 2,
+ "風穴": 2,
+ "仏語": 3,
+ "分別": 2,
+ "面子": 2,
+ "木目": 2,
+ "目下": 2,
+ "夜直": 2,
+ "夜来": 2,
+ "夜話": 2,
+ "野兎": 2,
+ "野馬": 3,
+ "野分": 2,
+ "野辺": 2,
+ "野面": 3,
+ "野立": 3,
+ "冷水": 2,
+ "連中": 2,
+ "飛沫": 2,
+ "翡翠": 2,
+ "餃子": 2,
+ "一足": 2,
+ "意気地": 2,
+ "一昨日": 3,
+ "一昨年": 2,
+ "十八番": 2,
+ "十六夜": 2,
+ "明後日": 2,
+ "石綿": 2,
+ "公文": 2,
+ "読本": 3,
+ "仏国": 3,
+ "古本": 2,
+ "町家": 2,
+ "遊行": 2
+ }
+}
\ No newline at end of file
diff --git a/pyproject.toml b/pyproject.toml
new file mode 100644
index 0000000000000000000000000000000000000000..ccd4efceef22cda88e02c15b6f6e2e2974d488f3
--- /dev/null
+++ b/pyproject.toml
@@ -0,0 +1,65 @@
+[build-system]
+requires = ["setuptools>=61.0"]
+build-backend = "setuptools.build_meta"
+
+[project]
+name = "yomikata"
+version = "0.0.1"
+authors = [{name="Sam Passaglia"}]
+description = "Japanese kanji disambiguation"
+readme = "README.md"
+requires-python = ">=3.8"
+classifiers = [
+ "Programming Language :: Python :: 3",
+ "Operating System :: OS Independent",
+ "License:: OSI Approved :: MIT License"
+]
+dynamic = ["dependencies"]
+
+[project.urls]
+"Homepage" = "https://github.com/passaglia/yomikata"
+"Demo" = "https://huggingface.co/spaces/passaglia/yomikata"
+"Bug Tracker" = "https://github.com/passaglia/yomikata/issues"
+
+[tool.setuptools.dynamic]
+dependencies = {file = ["requirements.txt"]}
+
+[tool.setuptools]
+packages = ["yomikata", "config"]
+
+[tool.flake8]
+exclude = "venv"
+ignore = ["E203","E501", "W503", "E226"]
+max-line-length = 79
+# E501: Line too long
+# W503: Line break occurred before binary operator
+# E226: Missing white space around arithmetic operator
+# E203: whitespace before ':' ()
+
+# iSort
+[tool.isort]
+profile = "black"
+line_length = 79
+multi_line_output = 3
+include_trailing_comma = true
+virtual_env = "venv"
+
+# Black formatting
+[tool.black]
+line-length = 100
+include = '\.pyi?$'
+exclude = '''
+ /(
+ .eggs # exclude a few common directories
+ | .git # in the root of the project
+ | .hg
+ | .mypy_cache
+ | .tox
+ | venv
+ | _build
+ | buck-out
+ | build
+ | dist
+ )/
+ '''
+
diff --git a/requirements.txt b/requirements.txt
new file mode 100644
index 0000000000000000000000000000000000000000..87b4d53649d86e1be5df8553662ebb429f896df9
--- /dev/null
+++ b/requirements.txt
@@ -0,0 +1,25 @@
+numpy==1.24.0
+pandas==1.5.2
+pretty-errors==1.2.25
+fugashi==1.2.1
+ipadic==1.0.0
+jumandic==1.0.0
+jaconv==0.3
+fugashi[unidic] #python -m unidic download
+sudachidict_full
+scikit-learn==1.2.0
+speach==0.1a15.post1
+torch==1.13.1
+transformers==4.25.1
+datasets==2.7.1
+pynvml==11.4.1
+sentencepiece==0.1.97
+typer==0.7.0
+rich==12.6.0
+unidic-lite
+japanize_matplotlib
+mlflow-skinny==2.1.1
+streamlit==1.18.1
+black
+flake8
+isort
diff --git a/robot_reading.png b/robot_reading.png
new file mode 100644
index 0000000000000000000000000000000000000000..abed71a5e613b3eddc351008b3381ec5071a4adc
Binary files /dev/null and b/robot_reading.png differ
diff --git a/stores/dbert/added_tokens.json b/stores/dbert/added_tokens.json
new file mode 100644
index 0000000000000000000000000000000000000000..97e5eda617319554c3efedf230f24cf526ae9213
--- /dev/null
+++ b/stores/dbert/added_tokens.json
@@ -0,0 +1,64 @@
+{
+ "一分": 32813,
+ "一声": 32824,
+ "一寸": 32779,
+ "一文": 32798,
+ "一日": 32791,
+ "一昨年": 32825,
+ "一昨日": 32822,
+ "一月": 32783,
+ "二重": 32782,
+ "何分": 32772,
+ "何時": 32773,
+ "作法": 32816,
+ "借家": 32819,
+ "公文": 32780,
+ "冷水": 32796,
+ "分別": 32827,
+ "十八番": 32810,
+ "半月": 32801,
+ "古本": 32805,
+ "墓石": 32814,
+ "夜話": 32806,
+ "大文字": 32774,
+ "寒気": 32804,
+ "施業": 32775,
+ "日暮": 32786,
+ "明後日": 32808,
+ "昨日": 32788,
+ "梅雨": 32803,
+ "気骨": 32777,
+ "求道": 32784,
+ "法衣": 32821,
+ "清浄": 32785,
+ "漢書": 32776,
+ "生花": 32811,
+ "町家": 32797,
+ "疾風": 32789,
+ "白髪": 32794,
+ "相乗": 32809,
+ "石綿": 32781,
+ "競売": 32799,
+ "細々": 32769,
+ "細目": 32815,
+ "翡翠": 32826,
+ "背筋": 32823,
+ "船底": 32812,
+ "菖蒲": 32820,
+ "見物": 32829,
+ "読本": 32795,
+ "谷間": 32800,
+ "追従": 32828,
+ "逆手": 32778,
+ "造作": 32818,
+ "連中": 32770,
+ "野兎": 32807,
+ "銀杏": 32768,
+ "開眼": 32790,
+ "頭数": 32792,
+ "頭蓋骨": 32817,
+ "風穴": 32802,
+ "風車": 32793,
+ "飛沫": 32787,
+ "黒子": 32771
+}
diff --git a/stores/dbert/config.json b/stores/dbert/config.json
new file mode 100644
index 0000000000000000000000000000000000000000..666d97d3e5dec4bbf1b3c94501577c24aa7cdbd9
--- /dev/null
+++ b/stores/dbert/config.json
@@ -0,0 +1,634 @@
+{
+ "_name_or_path": "cl-tohoku/bert-base-japanese-v2",
+ "architectures": [
+ "BertForTokenClassification"
+ ],
+ "attention_probs_dropout_prob": 0.1,
+ "classifier_dropout": null,
+ "hidden_act": "gelu",
+ "hidden_dropout_prob": 0.1,
+ "hidden_size": 768,
+ "id2label": {
+ "0": "LABEL_0",
+ "1": "LABEL_1",
+ "2": "LABEL_2",
+ "3": "LABEL_3",
+ "4": "LABEL_4",
+ "5": "LABEL_5",
+ "6": "LABEL_6",
+ "7": "LABEL_7",
+ "8": "LABEL_8",
+ "9": "LABEL_9",
+ "10": "LABEL_10",
+ "11": "LABEL_11",
+ "12": "LABEL_12",
+ "13": "LABEL_13",
+ "14": "LABEL_14",
+ "15": "LABEL_15",
+ "16": "LABEL_16",
+ "17": "LABEL_17",
+ "18": "LABEL_18",
+ "19": "LABEL_19",
+ "20": "LABEL_20",
+ "21": "LABEL_21",
+ "22": "LABEL_22",
+ "23": "LABEL_23",
+ "24": "LABEL_24",
+ "25": "LABEL_25",
+ "26": "LABEL_26",
+ "27": "LABEL_27",
+ "28": "LABEL_28",
+ "29": "LABEL_29",
+ "30": "LABEL_30",
+ "31": "LABEL_31",
+ "32": "LABEL_32",
+ "33": "LABEL_33",
+ "34": "LABEL_34",
+ "35": "LABEL_35",
+ "36": "LABEL_36",
+ "37": "LABEL_37",
+ "38": "LABEL_38",
+ "39": "LABEL_39",
+ "40": "LABEL_40",
+ "41": "LABEL_41",
+ "42": "LABEL_42",
+ "43": "LABEL_43",
+ "44": "LABEL_44",
+ "45": "LABEL_45",
+ "46": "LABEL_46",
+ "47": "LABEL_47",
+ "48": "LABEL_48",
+ "49": "LABEL_49",
+ "50": "LABEL_50",
+ "51": "LABEL_51",
+ "52": "LABEL_52",
+ "53": "LABEL_53",
+ "54": "LABEL_54",
+ "55": "LABEL_55",
+ "56": "LABEL_56",
+ "57": "LABEL_57",
+ "58": "LABEL_58",
+ "59": "LABEL_59",
+ "60": "LABEL_60",
+ "61": "LABEL_61",
+ "62": "LABEL_62",
+ "63": "LABEL_63",
+ "64": "LABEL_64",
+ "65": "LABEL_65",
+ "66": "LABEL_66",
+ "67": "LABEL_67",
+ "68": "LABEL_68",
+ "69": "LABEL_69",
+ "70": "LABEL_70",
+ "71": "LABEL_71",
+ "72": "LABEL_72",
+ "73": "LABEL_73",
+ "74": "LABEL_74",
+ "75": "LABEL_75",
+ "76": "LABEL_76",
+ "77": "LABEL_77",
+ "78": "LABEL_78",
+ "79": "LABEL_79",
+ "80": "LABEL_80",
+ "81": "LABEL_81",
+ "82": "LABEL_82",
+ "83": "LABEL_83",
+ "84": "LABEL_84",
+ "85": "LABEL_85",
+ "86": "LABEL_86",
+ "87": "LABEL_87",
+ "88": "LABEL_88",
+ "89": "LABEL_89",
+ "90": "LABEL_90",
+ "91": "LABEL_91",
+ "92": "LABEL_92",
+ "93": "LABEL_93",
+ "94": "LABEL_94",
+ "95": "LABEL_95",
+ "96": "LABEL_96",
+ "97": "LABEL_97",
+ "98": "LABEL_98",
+ "99": "LABEL_99",
+ "100": "LABEL_100",
+ "101": "LABEL_101",
+ "102": "LABEL_102",
+ "103": "LABEL_103",
+ "104": "LABEL_104",
+ "105": "LABEL_105",
+ "106": "LABEL_106",
+ "107": "LABEL_107",
+ "108": "LABEL_108",
+ "109": "LABEL_109",
+ "110": "LABEL_110",
+ "111": "LABEL_111",
+ "112": "LABEL_112",
+ "113": "LABEL_113",
+ "114": "LABEL_114",
+ "115": "LABEL_115",
+ "116": "LABEL_116",
+ "117": "LABEL_117",
+ "118": "LABEL_118",
+ "119": "LABEL_119",
+ "120": "LABEL_120",
+ "121": "LABEL_121",
+ "122": "LABEL_122",
+ "123": "LABEL_123",
+ "124": "LABEL_124",
+ "125": "LABEL_125",
+ "126": "LABEL_126",
+ "127": "LABEL_127",
+ "128": "LABEL_128",
+ "129": "LABEL_129",
+ "130": "LABEL_130",
+ "131": "LABEL_131",
+ "132": "LABEL_132",
+ "133": "LABEL_133",
+ "134": "LABEL_134",
+ "135": "LABEL_135",
+ "136": "LABEL_136",
+ "137": "LABEL_137",
+ "138": "LABEL_138",
+ "139": "LABEL_139",
+ "140": "LABEL_140",
+ "141": "LABEL_141",
+ "142": "LABEL_142",
+ "143": "LABEL_143",
+ "144": "LABEL_144",
+ "145": "LABEL_145",
+ "146": "LABEL_146",
+ "147": "LABEL_147",
+ "148": "LABEL_148",
+ "149": "LABEL_149",
+ "150": "LABEL_150",
+ "151": "LABEL_151",
+ "152": "LABEL_152",
+ "153": "LABEL_153",
+ "154": "LABEL_154",
+ "155": "LABEL_155",
+ "156": "LABEL_156",
+ "157": "LABEL_157",
+ "158": "LABEL_158",
+ "159": "LABEL_159",
+ "160": "LABEL_160",
+ "161": "LABEL_161",
+ "162": "LABEL_162",
+ "163": "LABEL_163",
+ "164": "LABEL_164",
+ "165": "LABEL_165",
+ "166": "LABEL_166",
+ "167": "LABEL_167",
+ "168": "LABEL_168",
+ "169": "LABEL_169",
+ "170": "LABEL_170",
+ "171": "LABEL_171",
+ "172": "LABEL_172",
+ "173": "LABEL_173",
+ "174": "LABEL_174",
+ "175": "LABEL_175",
+ "176": "LABEL_176",
+ "177": "LABEL_177",
+ "178": "LABEL_178",
+ "179": "LABEL_179",
+ "180": "LABEL_180",
+ "181": "LABEL_181",
+ "182": "LABEL_182",
+ "183": "LABEL_183",
+ "184": "LABEL_184",
+ "185": "LABEL_185",
+ "186": "LABEL_186",
+ "187": "LABEL_187",
+ "188": "LABEL_188",
+ "189": "LABEL_189",
+ "190": "LABEL_190",
+ "191": "LABEL_191",
+ "192": "LABEL_192",
+ "193": "LABEL_193",
+ "194": "LABEL_194",
+ "195": "LABEL_195",
+ "196": "LABEL_196",
+ "197": "LABEL_197",
+ "198": "LABEL_198",
+ "199": "LABEL_199",
+ "200": "LABEL_200",
+ "201": "LABEL_201",
+ "202": "LABEL_202",
+ "203": "LABEL_203",
+ "204": "LABEL_204",
+ "205": "LABEL_205",
+ "206": "LABEL_206",
+ "207": "LABEL_207",
+ "208": "LABEL_208",
+ "209": "LABEL_209",
+ "210": "LABEL_210",
+ "211": "LABEL_211",
+ "212": "LABEL_212",
+ "213": "LABEL_213",
+ "214": "LABEL_214",
+ "215": "LABEL_215",
+ "216": "LABEL_216",
+ "217": "LABEL_217",
+ "218": "LABEL_218",
+ "219": "LABEL_219",
+ "220": "LABEL_220",
+ "221": "LABEL_221",
+ "222": "LABEL_222",
+ "223": "LABEL_223",
+ "224": "LABEL_224",
+ "225": "LABEL_225",
+ "226": "LABEL_226",
+ "227": "LABEL_227",
+ "228": "LABEL_228",
+ "229": "LABEL_229",
+ "230": "LABEL_230",
+ "231": "LABEL_231",
+ "232": "LABEL_232",
+ "233": "LABEL_233",
+ "234": "LABEL_234",
+ "235": "LABEL_235",
+ "236": "LABEL_236",
+ "237": "LABEL_237",
+ "238": "LABEL_238",
+ "239": "LABEL_239",
+ "240": "LABEL_240",
+ "241": "LABEL_241",
+ "242": "LABEL_242",
+ "243": "LABEL_243",
+ "244": "LABEL_244",
+ "245": "LABEL_245",
+ "246": "LABEL_246",
+ "247": "LABEL_247",
+ "248": "LABEL_248",
+ "249": "LABEL_249",
+ "250": "LABEL_250",
+ "251": "LABEL_251",
+ "252": "LABEL_252",
+ "253": "LABEL_253",
+ "254": "LABEL_254",
+ "255": "LABEL_255",
+ "256": "LABEL_256",
+ "257": "LABEL_257",
+ "258": "LABEL_258",
+ "259": "LABEL_259",
+ "260": "LABEL_260",
+ "261": "LABEL_261",
+ "262": "LABEL_262",
+ "263": "LABEL_263",
+ "264": "LABEL_264",
+ "265": "LABEL_265",
+ "266": "LABEL_266",
+ "267": "LABEL_267",
+ "268": "LABEL_268",
+ "269": "LABEL_269",
+ "270": "LABEL_270",
+ "271": "LABEL_271",
+ "272": "LABEL_272",
+ "273": "LABEL_273",
+ "274": "LABEL_274",
+ "275": "LABEL_275",
+ "276": "LABEL_276",
+ "277": "LABEL_277",
+ "278": "LABEL_278",
+ "279": "LABEL_279",
+ "280": "LABEL_280",
+ "281": "LABEL_281",
+ "282": "LABEL_282",
+ "283": "LABEL_283",
+ "284": "LABEL_284",
+ "285": "LABEL_285",
+ "286": "LABEL_286",
+ "287": "LABEL_287",
+ "288": "LABEL_288",
+ "289": "LABEL_289",
+ "290": "LABEL_290",
+ "291": "LABEL_291",
+ "292": "LABEL_292",
+ "293": "LABEL_293",
+ "294": "LABEL_294",
+ "295": "LABEL_295",
+ "296": "LABEL_296",
+ "297": "LABEL_297",
+ "298": "LABEL_298",
+ "299": "LABEL_299",
+ "300": "LABEL_300",
+ "301": "LABEL_301"
+ },
+ "initializer_range": 0.02,
+ "intermediate_size": 3072,
+ "label2id": {
+ "LABEL_0": 0,
+ "LABEL_1": 1,
+ "LABEL_10": 10,
+ "LABEL_100": 100,
+ "LABEL_101": 101,
+ "LABEL_102": 102,
+ "LABEL_103": 103,
+ "LABEL_104": 104,
+ "LABEL_105": 105,
+ "LABEL_106": 106,
+ "LABEL_107": 107,
+ "LABEL_108": 108,
+ "LABEL_109": 109,
+ "LABEL_11": 11,
+ "LABEL_110": 110,
+ "LABEL_111": 111,
+ "LABEL_112": 112,
+ "LABEL_113": 113,
+ "LABEL_114": 114,
+ "LABEL_115": 115,
+ "LABEL_116": 116,
+ "LABEL_117": 117,
+ "LABEL_118": 118,
+ "LABEL_119": 119,
+ "LABEL_12": 12,
+ "LABEL_120": 120,
+ "LABEL_121": 121,
+ "LABEL_122": 122,
+ "LABEL_123": 123,
+ "LABEL_124": 124,
+ "LABEL_125": 125,
+ "LABEL_126": 126,
+ "LABEL_127": 127,
+ "LABEL_128": 128,
+ "LABEL_129": 129,
+ "LABEL_13": 13,
+ "LABEL_130": 130,
+ "LABEL_131": 131,
+ "LABEL_132": 132,
+ "LABEL_133": 133,
+ "LABEL_134": 134,
+ "LABEL_135": 135,
+ "LABEL_136": 136,
+ "LABEL_137": 137,
+ "LABEL_138": 138,
+ "LABEL_139": 139,
+ "LABEL_14": 14,
+ "LABEL_140": 140,
+ "LABEL_141": 141,
+ "LABEL_142": 142,
+ "LABEL_143": 143,
+ "LABEL_144": 144,
+ "LABEL_145": 145,
+ "LABEL_146": 146,
+ "LABEL_147": 147,
+ "LABEL_148": 148,
+ "LABEL_149": 149,
+ "LABEL_15": 15,
+ "LABEL_150": 150,
+ "LABEL_151": 151,
+ "LABEL_152": 152,
+ "LABEL_153": 153,
+ "LABEL_154": 154,
+ "LABEL_155": 155,
+ "LABEL_156": 156,
+ "LABEL_157": 157,
+ "LABEL_158": 158,
+ "LABEL_159": 159,
+ "LABEL_16": 16,
+ "LABEL_160": 160,
+ "LABEL_161": 161,
+ "LABEL_162": 162,
+ "LABEL_163": 163,
+ "LABEL_164": 164,
+ "LABEL_165": 165,
+ "LABEL_166": 166,
+ "LABEL_167": 167,
+ "LABEL_168": 168,
+ "LABEL_169": 169,
+ "LABEL_17": 17,
+ "LABEL_170": 170,
+ "LABEL_171": 171,
+ "LABEL_172": 172,
+ "LABEL_173": 173,
+ "LABEL_174": 174,
+ "LABEL_175": 175,
+ "LABEL_176": 176,
+ "LABEL_177": 177,
+ "LABEL_178": 178,
+ "LABEL_179": 179,
+ "LABEL_18": 18,
+ "LABEL_180": 180,
+ "LABEL_181": 181,
+ "LABEL_182": 182,
+ "LABEL_183": 183,
+ "LABEL_184": 184,
+ "LABEL_185": 185,
+ "LABEL_186": 186,
+ "LABEL_187": 187,
+ "LABEL_188": 188,
+ "LABEL_189": 189,
+ "LABEL_19": 19,
+ "LABEL_190": 190,
+ "LABEL_191": 191,
+ "LABEL_192": 192,
+ "LABEL_193": 193,
+ "LABEL_194": 194,
+ "LABEL_195": 195,
+ "LABEL_196": 196,
+ "LABEL_197": 197,
+ "LABEL_198": 198,
+ "LABEL_199": 199,
+ "LABEL_2": 2,
+ "LABEL_20": 20,
+ "LABEL_200": 200,
+ "LABEL_201": 201,
+ "LABEL_202": 202,
+ "LABEL_203": 203,
+ "LABEL_204": 204,
+ "LABEL_205": 205,
+ "LABEL_206": 206,
+ "LABEL_207": 207,
+ "LABEL_208": 208,
+ "LABEL_209": 209,
+ "LABEL_21": 21,
+ "LABEL_210": 210,
+ "LABEL_211": 211,
+ "LABEL_212": 212,
+ "LABEL_213": 213,
+ "LABEL_214": 214,
+ "LABEL_215": 215,
+ "LABEL_216": 216,
+ "LABEL_217": 217,
+ "LABEL_218": 218,
+ "LABEL_219": 219,
+ "LABEL_22": 22,
+ "LABEL_220": 220,
+ "LABEL_221": 221,
+ "LABEL_222": 222,
+ "LABEL_223": 223,
+ "LABEL_224": 224,
+ "LABEL_225": 225,
+ "LABEL_226": 226,
+ "LABEL_227": 227,
+ "LABEL_228": 228,
+ "LABEL_229": 229,
+ "LABEL_23": 23,
+ "LABEL_230": 230,
+ "LABEL_231": 231,
+ "LABEL_232": 232,
+ "LABEL_233": 233,
+ "LABEL_234": 234,
+ "LABEL_235": 235,
+ "LABEL_236": 236,
+ "LABEL_237": 237,
+ "LABEL_238": 238,
+ "LABEL_239": 239,
+ "LABEL_24": 24,
+ "LABEL_240": 240,
+ "LABEL_241": 241,
+ "LABEL_242": 242,
+ "LABEL_243": 243,
+ "LABEL_244": 244,
+ "LABEL_245": 245,
+ "LABEL_246": 246,
+ "LABEL_247": 247,
+ "LABEL_248": 248,
+ "LABEL_249": 249,
+ "LABEL_25": 25,
+ "LABEL_250": 250,
+ "LABEL_251": 251,
+ "LABEL_252": 252,
+ "LABEL_253": 253,
+ "LABEL_254": 254,
+ "LABEL_255": 255,
+ "LABEL_256": 256,
+ "LABEL_257": 257,
+ "LABEL_258": 258,
+ "LABEL_259": 259,
+ "LABEL_26": 26,
+ "LABEL_260": 260,
+ "LABEL_261": 261,
+ "LABEL_262": 262,
+ "LABEL_263": 263,
+ "LABEL_264": 264,
+ "LABEL_265": 265,
+ "LABEL_266": 266,
+ "LABEL_267": 267,
+ "LABEL_268": 268,
+ "LABEL_269": 269,
+ "LABEL_27": 27,
+ "LABEL_270": 270,
+ "LABEL_271": 271,
+ "LABEL_272": 272,
+ "LABEL_273": 273,
+ "LABEL_274": 274,
+ "LABEL_275": 275,
+ "LABEL_276": 276,
+ "LABEL_277": 277,
+ "LABEL_278": 278,
+ "LABEL_279": 279,
+ "LABEL_28": 28,
+ "LABEL_280": 280,
+ "LABEL_281": 281,
+ "LABEL_282": 282,
+ "LABEL_283": 283,
+ "LABEL_284": 284,
+ "LABEL_285": 285,
+ "LABEL_286": 286,
+ "LABEL_287": 287,
+ "LABEL_288": 288,
+ "LABEL_289": 289,
+ "LABEL_29": 29,
+ "LABEL_290": 290,
+ "LABEL_291": 291,
+ "LABEL_292": 292,
+ "LABEL_293": 293,
+ "LABEL_294": 294,
+ "LABEL_295": 295,
+ "LABEL_296": 296,
+ "LABEL_297": 297,
+ "LABEL_298": 298,
+ "LABEL_299": 299,
+ "LABEL_3": 3,
+ "LABEL_30": 30,
+ "LABEL_300": 300,
+ "LABEL_301": 301,
+ "LABEL_31": 31,
+ "LABEL_32": 32,
+ "LABEL_33": 33,
+ "LABEL_34": 34,
+ "LABEL_35": 35,
+ "LABEL_36": 36,
+ "LABEL_37": 37,
+ "LABEL_38": 38,
+ "LABEL_39": 39,
+ "LABEL_4": 4,
+ "LABEL_40": 40,
+ "LABEL_41": 41,
+ "LABEL_42": 42,
+ "LABEL_43": 43,
+ "LABEL_44": 44,
+ "LABEL_45": 45,
+ "LABEL_46": 46,
+ "LABEL_47": 47,
+ "LABEL_48": 48,
+ "LABEL_49": 49,
+ "LABEL_5": 5,
+ "LABEL_50": 50,
+ "LABEL_51": 51,
+ "LABEL_52": 52,
+ "LABEL_53": 53,
+ "LABEL_54": 54,
+ "LABEL_55": 55,
+ "LABEL_56": 56,
+ "LABEL_57": 57,
+ "LABEL_58": 58,
+ "LABEL_59": 59,
+ "LABEL_6": 6,
+ "LABEL_60": 60,
+ "LABEL_61": 61,
+ "LABEL_62": 62,
+ "LABEL_63": 63,
+ "LABEL_64": 64,
+ "LABEL_65": 65,
+ "LABEL_66": 66,
+ "LABEL_67": 67,
+ "LABEL_68": 68,
+ "LABEL_69": 69,
+ "LABEL_7": 7,
+ "LABEL_70": 70,
+ "LABEL_71": 71,
+ "LABEL_72": 72,
+ "LABEL_73": 73,
+ "LABEL_74": 74,
+ "LABEL_75": 75,
+ "LABEL_76": 76,
+ "LABEL_77": 77,
+ "LABEL_78": 78,
+ "LABEL_79": 79,
+ "LABEL_8": 8,
+ "LABEL_80": 80,
+ "LABEL_81": 81,
+ "LABEL_82": 82,
+ "LABEL_83": 83,
+ "LABEL_84": 84,
+ "LABEL_85": 85,
+ "LABEL_86": 86,
+ "LABEL_87": 87,
+ "LABEL_88": 88,
+ "LABEL_89": 89,
+ "LABEL_9": 9,
+ "LABEL_90": 90,
+ "LABEL_91": 91,
+ "LABEL_92": 92,
+ "LABEL_93": 93,
+ "LABEL_94": 94,
+ "LABEL_95": 95,
+ "LABEL_96": 96,
+ "LABEL_97": 97,
+ "LABEL_98": 98,
+ "LABEL_99": 99
+ },
+ "layer_norm_eps": 1e-12,
+ "max_position_embeddings": 512,
+ "model_type": "bert",
+ "num_attention_heads": 12,
+ "num_hidden_layers": 12,
+ "pad_token_id": 0,
+ "position_embedding_type": "absolute",
+ "tokenizer_class": "BertJapaneseTokenizer",
+ "torch_dtype": "float32",
+ "transformers_version": "4.25.1",
+ "type_vocab_size": 2,
+ "use_cache": true,
+ "vocab_size": 32830
+}
diff --git a/stores/dbert/heteronyms.json b/stores/dbert/heteronyms.json
new file mode 100644
index 0000000000000000000000000000000000000000..5c57b1909cd25604c6038ee2c077fb0ca9f277ed
--- /dev/null
+++ b/stores/dbert/heteronyms.json
@@ -0,0 +1,567 @@
+{
+ "表": {
+ "ひょう": 3349,
+ "おもて": 3034,
+ "あらわ": 2474,
+ "あら": 731
+ },
+ "角": {
+ "かく": 4360,
+ "かど": 2303,
+ "つの": 372,
+ "すみ": 70
+ },
+ "大分": {
+ "おおいた": 3358,
+ "だいぶ": 797,
+ "だいぶん": 97
+ },
+ "国立": {
+ "こくりつ": 19256,
+ "くにたち": 246
+ },
+ "人気": {
+ "にんき": 7383,
+ "ひとけ": 149,
+ "じんき": 44
+ },
+ "市場": {
+ "しじょう": 85107,
+ "いちば": 781
+ },
+ "気質": {
+ "きしつ": 1108,
+ "かたぎ": 398
+ },
+ "上方": {
+ "かみがた": 1411,
+ "じょうほう": 656
+ },
+ "上手": {
+ "じょうず": 8065,
+ "うま": 706,
+ "かみて": 150,
+ "うわて": 57
+ },
+ "下手": {
+ "へた": 849,
+ "したて": 128,
+ "べた": 121,
+ "しもて": 50
+ },
+ "仮名": {
+ "かな": 1407,
+ "がな": 129,
+ "かめい": 115
+ },
+ "礼拝": {
+ "れいはい": 841,
+ "らいはい": 62
+ },
+ "遺言": {
+ "ゆいごん": 3152,
+ "いげん": 67,
+ "いごん": 57
+ },
+ "口腔": {
+ "こうこう": 6475,
+ "こうくう": 5577
+ },
+ "骨": {
+ "ほね": 10697,
+ "こつ": 5870
+ },
+ "一途": {
+ "いちず": 576,
+ "いっと": 139
+ },
+ "一言": {
+ "ひとこと": 2567,
+ "いちげん": 133,
+ "いちごん": 106
+ },
+ "最中": {
+ "さいちゅう": 520,
+ "さなか": 43
+ },
+ "一目": {
+ "ひとめ": 1596,
+ "いちもく": 210
+ },
+ "係": {
+ "かか": 14218,
+ "かかわ": 9804,
+ "がかり": 234,
+ "かかり": 227
+ },
+ "足跡": {
+ "あしあと": 2626,
+ "そくせき": 1862
+ },
+ "今日": {
+ "きょう": 17624,
+ "こんにち": 6772
+ },
+ "明日": {
+ "あす": 9824,
+ "あした": 6606,
+ "みょうにち": 66
+ },
+ "生物": {
+ "せいぶつ": 26088,
+ "いきもの": 55
+ },
+ "変化": {
+ "へんか": 87895,
+ "へんげ": 337
+ },
+ "大事": {
+ "だいじ": 5293,
+ "おおごと": 54
+ },
+ "大家": {
+ "たいか": 586,
+ "おおや": 238,
+ "たいけ": 79
+ },
+ "心中": {
+ "しんじゅう": 1541,
+ "しんちゅう": 250,
+ "しんぢゅう": 127
+ },
+ "一行": {
+ "いっこう": 1112,
+ "いちぎょう": 95
+ },
+ "一時": {
+ "いちじ": 2649,
+ "いっとき": 381,
+ "いちどき": 47
+ },
+ "一方": {
+ "いっぽう": 5327,
+ "ひとかた": 112,
+ "いちほう": 42
+ },
+ "一夜": {
+ "いちや": 1148,
+ "ひとよ": 82
+ },
+ "下野": {
+ "しもつけ": 530,
+ "げや": 104,
+ "しもの": 57
+ },
+ "花弁": {
+ "かべん": 213,
+ "はなびら": 58
+ },
+ "玩具": {
+ "がんぐ": 1354,
+ "おもちゃ": 238
+ },
+ "強力": {
+ "きょうりょく": 2319,
+ "ごうりき": 51
+ },
+ "金色": {
+ "きんいろ": 942,
+ "こんじき": 484
+ },
+ "経緯": {
+ "けいい": 7659,
+ "いきさつ": 56
+ },
+ "故郷": {
+ "こきょう": 3840,
+ "ふるさと": 506,
+ "くに": 122
+ },
+ "紅葉": {
+ "こうよう": 856,
+ "もみじ": 339
+ },
+ "根本": {
+ "こんぽん": 2872,
+ "ねもと": 262
+ },
+ "山陰": {
+ "さんいん": 2094,
+ "やまかげ": 51
+ },
+ "上下": {
+ "じょうげ": 1549,
+ "うえした": 97
+ },
+ "身体": {
+ "しんたい": 20301,
+ "からだ": 3375
+ },
+ "水面": {
+ "すいめん": 1387,
+ "みなも": 91
+ },
+ "世論": {
+ "よろん": 4554,
+ "せろん": 1934
+ },
+ "清水": {
+ "しみず": 4114,
+ "きよみず": 98
+ },
+ "大手": {
+ "おおて": 6695,
+ "おおで": 119
+ },
+ "大人": {
+ "おとな": 11037,
+ "たいじん": 113,
+ "うし": 59
+ },
+ "大勢": {
+ "おおぜい": 1290,
+ "たいせい": 398
+ },
+ "中間": {
+ "ちゅうかん": 17669,
+ "ちゅうげん": 144
+ },
+ "日向": {
+ "ひゅうが": 800,
+ "ひなた": 318
+ },
+ "夫婦": {
+ "ふうふ": 9165,
+ "めおと": 354
+ },
+ "牧場": {
+ "ぼくじょう": 1913,
+ "まきば": 159
+ },
+ "末期": {
+ "まっき": 3569,
+ "まつご": 78
+ },
+ "利益": {
+ "りえき": 13434,
+ "りやく": 209
+ },
+ "一味": {
+ "いちみ": 442,
+ "ひとあじ": 60
+ },
+ "魚": {
+ "さかな": 5857,
+ "うお": 1706,
+ "ぎょ": 413,
+ "ざかな": 50
+ },
+ "施行": {
+ "しこう": 18724,
+ "せこう": 70
+ },
+ "施工": {
+ "せこう": 25734,
+ "しこう": 48,
+ "せこ": 43
+ },
+ "転生": {
+ "てんせい": 911,
+ "てんしょう": 175
+ },
+ "博士": {
+ "はくし": 17017,
+ "はかせ": 2462
+ },
+ "眼鏡": {
+ "めがね": 2040,
+ "がんきょう": 102
+ },
+ "文字": {
+ "もじ": 9583,
+ "もんじ": 633
+ },
+ "文書": {
+ "ぶんしょ": 15094,
+ "もんじょ": 5879,
+ "もんしょ": 51
+ },
+ "現世": {
+ "げんせい": 192,
+ "げんせ": 125
+ },
+ "日中": {
+ "にっちゅう": 12478,
+ "にちじゅう": 117
+ },
+ "夜中": {
+ "よなか": 723,
+ "やちゅう": 106
+ },
+ "二人": {
+ "ふたり": 22151,
+ "ににん": 256
+ },
+ "見物": {
+ "けんぶつ": 1832,
+ "みもの": 61
+ },
+ "清浄": {
+ "せいじょう": 800,
+ "しょうじょう": 46
+ },
+ "谷間": {
+ "たにま": 1089,
+ "たにあい": 67
+ },
+ "追従": {
+ "ついじゅう": 1000,
+ "ついしょう": 73
+ },
+ "墓石": {
+ "はかいし": 323,
+ "ぼせき": 257
+ },
+ "大文字": {
+ "おおもじ": 65,
+ "だいもんじ": 46
+ },
+ "漢書": {
+ "かんじょ": 171,
+ "かんしょ": 66,
+ "からぶみ": 47
+ },
+ "作法": {
+ "さほう": 3905,
+ "さくほう": 427
+ },
+ "半月": {
+ "はんつき": 388,
+ "はんげつ": 85
+ },
+ "黒子": {
+ "ほくろ": 200,
+ "くろこ": 183
+ },
+ "競売": {
+ "けいばい": 937,
+ "きょうばい": 332
+ },
+ "開眼": {
+ "かいげん": 338,
+ "かいがん": 144
+ },
+ "求道": {
+ "きゅうどう": 379,
+ "ぐどう": 81
+ },
+ "施業": {
+ "せぎょう": 602,
+ "しぎょう": 264
+ },
+ "借家": {
+ "しゃっか": 505,
+ "しゃくや": 394
+ },
+ "頭蓋骨": {
+ "ずがいこつ": 377,
+ "とうがいこつ": 187
+ },
+ "法衣": {
+ "ころも": 115,
+ "ほうえ": 87
+ },
+ "昨日": {
+ "きのう": 2670,
+ "さくじつ": 713
+ },
+ "風車": {
+ "ふうしゃ": 1133,
+ "かざぐるま": 678
+ },
+ "寒気": {
+ "かんき": 153,
+ "さむけ": 79
+ },
+ "背筋": {
+ "せすじ": 177,
+ "はいきん": 43
+ },
+ "逆手": {
+ "さかて": 169,
+ "ぎゃくて": 116
+ },
+ "生花": {
+ "いけばな": 283,
+ "せいか": 91
+ },
+ "白髪": {
+ "しらが": 313,
+ "はくはつ": 113
+ },
+ "一月": {
+ "ひとつき": 301,
+ "いちがつ": 282
+ },
+ "一寸": {
+ "ちょっと": 1481,
+ "いっすん": 111
+ },
+ "一声": {
+ "ひとこえ": 253,
+ "いっせい": 109
+ },
+ "一日": {
+ "いちにち": 1711,
+ "ついたち": 866,
+ "いちじつ": 41
+ },
+ "一分": {
+ "いちぶん": 75,
+ "いちぶ": 62
+ },
+ "一文": {
+ "いちもん": 86,
+ "いちぶん": 48
+ },
+ "何時": {
+ "いつ": 1248,
+ "なんじ": 159,
+ "なんどき": 63
+ },
+ "何分": {
+ "なにぶん": 379,
+ "なんぷん": 51
+ },
+ "気骨": {
+ "きこつ": 140,
+ "きぼね": 67
+ },
+ "銀杏": {
+ "いちょう": 322,
+ "ぎんなん": 85
+ },
+ "細々": {
+ "こまごま": 88,
+ "ほそぼそ": 67
+ },
+ "細目": {
+ "さいもく": 962,
+ "ほそめ": 123
+ },
+ "疾風": {
+ "しっぷう": 544,
+ "はやて": 94,
+ "かぜ": 68
+ },
+ "菖蒲": {
+ "しょうぶ": 165,
+ "あやめ": 65
+ },
+ "船底": {
+ "せんてい": 246,
+ "ふなぞこ": 80
+ },
+ "相乗": {
+ "そうじょう": 732,
+ "あいの": 89
+ },
+ "造作": {
+ "ぞうさ": 188,
+ "ぞうさく": 65
+ },
+ "頭数": {
+ "あたまかず": 168,
+ "とうすう": 119
+ },
+ "二重": {
+ "にじゅう": 5418,
+ "ふたえ": 65
+ },
+ "日暮": {
+ "ひぐ": 403,
+ "ひぐれ": 97,
+ "ひぐらし": 81
+ },
+ "梅雨": {
+ "つゆ": 471,
+ "ばいう": 284
+ },
+ "風穴": {
+ "かざあな": 300,
+ "ふうけつ": 68
+ },
+ "分別": {
+ "ふんべつ": 1280,
+ "ぶんべつ": 635
+ },
+ "夜話": {
+ "やわ": 2153,
+ "よばなし": 52
+ },
+ "野兎": {
+ "やと": 176,
+ "のうさぎ": 43
+ },
+ "冷水": {
+ "れいすい": 189,
+ "ひやみず": 153
+ },
+ "連中": {
+ "れんじゅう": 853,
+ "れんちゅう": 691
+ },
+ "飛沫": {
+ "ひまつ": 223,
+ "しぶき": 96
+ },
+ "翡翠": {
+ "ひすい": 177,
+ "かわせみ": 94
+ },
+ "一昨日": {
+ "おととい": 208,
+ "いっさくじつ": 71
+ },
+ "一昨年": {
+ "おととし": 72,
+ "いっさくねん": 59
+ },
+ "十八番": {
+ "じゅうはちばん": 212,
+ "おはこ": 41
+ },
+ "明後日": {
+ "あさって": 186,
+ "みょうごにち": 60
+ },
+ "石綿": {
+ "いしわた": 1702,
+ "せきめん": 360
+ },
+ "公文": {
+ "こうぶん": 196,
+ "くもん": 46
+ },
+ "読本": {
+ "どくほん": 12176,
+ "とくほん": 2414,
+ "よみほん": 121
+ },
+ "古本": {
+ "ふるほん": 550,
+ "こほん": 109
+ },
+ "町家": {
+ "まちや": 655,
+ "ちょうか": 216
+ },
+ "米": {
+ "べい": 17392,
+ "こめ": 9021,
+ "まい": 2829,
+ "よね": 620,
+ "ごめ": 164,
+ "めーとる": 112
+ }
+}
diff --git a/stores/dbert/label_encoder.json b/stores/dbert/label_encoder.json
new file mode 100644
index 0000000000000000000000000000000000000000..e2a1eded19e70edfaca784be045df15af2358d09
--- /dev/null
+++ b/stores/dbert/label_encoder.json
@@ -0,0 +1,306 @@
+{
+ "class_to_index": {
+ "": 0,
+ "\u4e00\u5206:\u3044\u3061\u3076": 1,
+ "\u4e00\u5206:\u3044\u3061\u3076\u3093": 2,
+ "\u4e00\u5473:\u3044\u3061\u307f": 3,
+ "\u4e00\u5473:\u3072\u3068\u3042\u3058": 4,
+ "\u4e00\u58f0:\u3044\u3063\u305b\u3044": 5,
+ "\u4e00\u58f0:\u3072\u3068\u3053\u3048": 6,
+ "\u4e00\u591c:\u3044\u3061\u3084": 7,
+ "\u4e00\u591c:\u3072\u3068\u3088": 8,
+ "\u4e00\u5bf8:\u3044\u3063\u3059\u3093": 9,
+ "\u4e00\u5bf8:\u3061\u3087\u3063\u3068": 10,
+ "\u4e00\u6587:\u3044\u3061\u3076\u3093": 11,
+ "\u4e00\u6587:\u3044\u3061\u3082\u3093": 12,
+ "\u4e00\u65b9:\u3044\u3061\u307b\u3046": 13,
+ "\u4e00\u65b9:\u3044\u3063\u307d\u3046": 14,
+ "\u4e00\u65b9:\u3072\u3068\u304b\u305f": 15,
+ "\u4e00\u65e5:\u3044\u3061\u3058\u3064": 16,
+ "\u4e00\u65e5:\u3044\u3061\u306b\u3061": 17,
+ "\u4e00\u65e5:\u3064\u3044\u305f\u3061": 18,
+ "\u4e00\u6628\u5e74:\u3044\u3063\u3055\u304f\u306d\u3093": 19,
+ "\u4e00\u6628\u5e74:\u304a\u3068\u3068\u3057": 20,
+ "\u4e00\u6628\u65e5:\u3044\u3063\u3055\u304f\u3058\u3064": 21,
+ "\u4e00\u6628\u65e5:\u304a\u3068\u3068\u3044": 22,
+ "\u4e00\u6642:\u3044\u3061\u3058": 23,
+ "\u4e00\u6642:\u3044\u3061\u3069\u304d": 24,
+ "\u4e00\u6642:\u3044\u3063\u3068\u304d": 25,
+ "\u4e00\u6708:\u3044\u3061\u304c\u3064": 26,
+ "\u4e00\u6708:\u3072\u3068\u3064\u304d": 27,
+ "\u4e00\u76ee:\u3044\u3061\u3082\u304f": 28,
+ "\u4e00\u76ee:\u3072\u3068\u3081": 29,
+ "\u4e00\u884c:\u3044\u3061\u304e\u3087\u3046": 30,
+ "\u4e00\u884c:\u3044\u3063\u3053\u3046": 31,
+ "\u4e00\u8a00:\u3044\u3061\u3052\u3093": 32,
+ "\u4e00\u8a00:\u3044\u3061\u3054\u3093": 33,
+ "\u4e00\u8a00:\u3072\u3068\u3053\u3068": 34,
+ "\u4e00\u9014:\u3044\u3061\u305a": 35,
+ "\u4e00\u9014:\u3044\u3063\u3068": 36,
+ "\u4e0a\u4e0b:\u3046\u3048\u3057\u305f": 37,
+ "\u4e0a\u4e0b:\u3058\u3087\u3046\u3052": 38,
+ "\u4e0a\u624b:\u3046\u307e": 39,
+ "\u4e0a\u624b:\u3046\u308f\u3066": 40,
+ "\u4e0a\u624b:\u304b\u307f\u3066": 41,
+ "\u4e0a\u624b:\u3058\u3087\u3046\u305a": 42,
+ "\u4e0a\u65b9:\u304b\u307f\u304c\u305f": 43,
+ "\u4e0a\u65b9:\u3058\u3087\u3046\u307b\u3046": 44,
+ "\u4e0b\u624b:\u3057\u305f\u3066": 45,
+ "\u4e0b\u624b:\u3057\u3082\u3066": 46,
+ "\u4e0b\u624b:\u3078\u305f": 47,
+ "\u4e0b\u624b:\u3079\u305f": 48,
+ "\u4e0b\u91ce:\u3052\u3084": 49,
+ "\u4e0b\u91ce:\u3057\u3082\u3064\u3051": 50,
+ "\u4e0b\u91ce:\u3057\u3082\u306e": 51,
+ "\u4e16\u8ad6:\u305b\u308d\u3093": 52,
+ "\u4e16\u8ad6:\u3088\u308d\u3093": 53,
+ "\u4e2d\u9593:\u3061\u3085\u3046\u304b\u3093": 54,
+ "\u4e2d\u9593:\u3061\u3085\u3046\u3052\u3093": 55,
+ "\u4e8c\u4eba:\u306b\u306b\u3093": 56,
+ "\u4e8c\u4eba:\u3075\u305f\u308a": 57,
+ "\u4e8c\u91cd:\u306b\u3058\u3085\u3046": 58,
+ "\u4e8c\u91cd:\u3075\u305f\u3048": 59,
+ "\u4eba\u6c17:\u3058\u3093\u304d": 60,
+ "\u4eba\u6c17:\u306b\u3093\u304d": 61,
+ "\u4eba\u6c17:\u3072\u3068\u3051": 62,
+ "\u4eca\u65e5:\u304d\u3087\u3046": 63,
+ "\u4eca\u65e5:\u3053\u3093\u306b\u3061": 64,
+ "\u4eee\u540d:\u304b\u306a": 65,
+ "\u4eee\u540d:\u304b\u3081\u3044": 66,
+ "\u4eee\u540d:\u304c\u306a": 67,
+ "\u4f55\u5206:\u306a\u306b\u3076\u3093": 68,
+ "\u4f55\u5206:\u306a\u3093\u3077\u3093": 69,
+ "\u4f55\u6642:\u3044\u3064": 70,
+ "\u4f55\u6642:\u306a\u3093\u3058": 71,
+ "\u4f55\u6642:\u306a\u3093\u3069\u304d": 72,
+ "\u4f5c\u6cd5:\u3055\u304f\u307b\u3046": 73,
+ "\u4f5c\u6cd5:\u3055\u307b\u3046": 74,
+ "\u4fc2:\u304b\u304b": 75,
+ "\u4fc2:\u304b\u304b\u308a": 76,
+ "\u4fc2:\u304b\u304b\u308f": 77,
+ "\u4fc2:\u304c\u304b\u308a": 78,
+ "\u501f\u5bb6:\u3057\u3083\u304f\u3084": 79,
+ "\u501f\u5bb6:\u3057\u3083\u3063\u304b": 80,
+ "\u516c\u6587:\u304f\u3082\u3093": 81,
+ "\u516c\u6587:\u3053\u3046\u3076\u3093": 82,
+ "\u51b7\u6c34:\u3072\u3084\u307f\u305a": 83,
+ "\u51b7\u6c34:\u308c\u3044\u3059\u3044": 84,
+ "\u5206\u5225:\u3075\u3093\u3079\u3064": 85,
+ "\u5206\u5225:\u3076\u3093\u3079\u3064": 86,
+ "\u5229\u76ca:\u308a\u3048\u304d": 87,
+ "\u5229\u76ca:\u308a\u3084\u304f": 88,
+ "\u5341\u516b\u756a:\u304a\u306f\u3053": 89,
+ "\u5341\u516b\u756a:\u3058\u3085\u3046\u306f\u3061\u3070\u3093": 90,
+ "\u534a\u6708:\u306f\u3093\u3052\u3064": 91,
+ "\u534a\u6708:\u306f\u3093\u3064\u304d": 92,
+ "\u535a\u58eb:\u306f\u304b\u305b": 93,
+ "\u535a\u58eb:\u306f\u304f\u3057": 94,
+ "\u53e3\u8154:\u3053\u3046\u304f\u3046": 95,
+ "\u53e3\u8154:\u3053\u3046\u3053\u3046": 96,
+ "\u53e4\u672c:\u3053\u307b\u3093": 97,
+ "\u53e4\u672c:\u3075\u308b\u307b\u3093": 98,
+ "\u56fd\u7acb:\u304f\u306b\u305f\u3061": 99,
+ "\u56fd\u7acb:\u3053\u304f\u308a\u3064": 100,
+ "\u5893\u77f3:\u306f\u304b\u3044\u3057": 101,
+ "\u5893\u77f3:\u307c\u305b\u304d": 102,
+ "\u5909\u5316:\u3078\u3093\u304b": 103,
+ "\u5909\u5316:\u3078\u3093\u3052": 104,
+ "\u591c\u4e2d:\u3084\u3061\u3085\u3046": 105,
+ "\u591c\u4e2d:\u3088\u306a\u304b": 106,
+ "\u591c\u8a71:\u3084\u308f": 107,
+ "\u591c\u8a71:\u3088\u3070\u306a\u3057": 108,
+ "\u5927\u4e8b:\u304a\u304a\u3054\u3068": 109,
+ "\u5927\u4e8b:\u3060\u3044\u3058": 110,
+ "\u5927\u4eba:\u3046\u3057": 111,
+ "\u5927\u4eba:\u304a\u3068\u306a": 112,
+ "\u5927\u4eba:\u305f\u3044\u3058\u3093": 113,
+ "\u5927\u5206:\u304a\u304a\u3044\u305f": 114,
+ "\u5927\u5206:\u3060\u3044\u3076": 115,
+ "\u5927\u5206:\u3060\u3044\u3076\u3093": 116,
+ "\u5927\u52e2:\u304a\u304a\u305c\u3044": 117,
+ "\u5927\u52e2:\u305f\u3044\u305b\u3044": 118,
+ "\u5927\u5bb6:\u304a\u304a\u3084": 119,
+ "\u5927\u5bb6:\u305f\u3044\u304b": 120,
+ "\u5927\u5bb6:\u305f\u3044\u3051": 121,
+ "\u5927\u624b:\u304a\u304a\u3066": 122,
+ "\u5927\u624b:\u304a\u304a\u3067": 123,
+ "\u5927\u6587\u5b57:\u304a\u304a\u3082\u3058": 124,
+ "\u5927\u6587\u5b57:\u3060\u3044\u3082\u3093\u3058": 125,
+ "\u592b\u5a66:\u3075\u3046\u3075": 126,
+ "\u592b\u5a66:\u3081\u304a\u3068": 127,
+ "\u5bd2\u6c17:\u304b\u3093\u304d": 128,
+ "\u5bd2\u6c17:\u3055\u3080\u3051": 129,
+ "\u5c71\u9670:\u3055\u3093\u3044\u3093": 130,
+ "\u5c71\u9670:\u3084\u307e\u304b\u3052": 131,
+ "\u5e02\u5834:\u3044\u3061\u3070": 132,
+ "\u5e02\u5834:\u3057\u3058\u3087\u3046": 133,
+ "\u5f37\u529b:\u304d\u3087\u3046\u308a\u3087\u304f": 134,
+ "\u5f37\u529b:\u3054\u3046\u308a\u304d": 135,
+ "\u5fc3\u4e2d:\u3057\u3093\u3058\u3085\u3046": 136,
+ "\u5fc3\u4e2d:\u3057\u3093\u3061\u3085\u3046": 137,
+ "\u5fc3\u4e2d:\u3057\u3093\u3062\u3085\u3046": 138,
+ "\u6545\u90f7:\u304f\u306b": 139,
+ "\u6545\u90f7:\u3053\u304d\u3087\u3046": 140,
+ "\u6545\u90f7:\u3075\u308b\u3055\u3068": 141,
+ "\u6587\u5b57:\u3082\u3058": 142,
+ "\u6587\u5b57:\u3082\u3093\u3058": 143,
+ "\u6587\u66f8:\u3076\u3093\u3057\u3087": 144,
+ "\u6587\u66f8:\u3082\u3093\u3057\u3087": 145,
+ "\u6587\u66f8:\u3082\u3093\u3058\u3087": 146,
+ "\u65bd\u5de5:\u3057\u3053\u3046": 147,
+ "\u65bd\u5de5:\u305b\u3053": 148,
+ "\u65bd\u5de5:\u305b\u3053\u3046": 149,
+ "\u65bd\u696d:\u3057\u304e\u3087\u3046": 150,
+ "\u65bd\u696d:\u305b\u304e\u3087\u3046": 151,
+ "\u65bd\u884c:\u3057\u3053\u3046": 152,
+ "\u65bd\u884c:\u305b\u3053\u3046": 153,
+ "\u65e5\u4e2d:\u306b\u3061\u3058\u3085\u3046": 154,
+ "\u65e5\u4e2d:\u306b\u3063\u3061\u3085\u3046": 155,
+ "\u65e5\u5411:\u3072\u306a\u305f": 156,
+ "\u65e5\u5411:\u3072\u3085\u3046\u304c": 157,
+ "\u65e5\u66ae:\u3072\u3050": 158,
+ "\u65e5\u66ae:\u3072\u3050\u3089\u3057": 159,
+ "\u65e5\u66ae:\u3072\u3050\u308c": 160,
+ "\u660e\u5f8c\u65e5:\u3042\u3055\u3063\u3066": 161,
+ "\u660e\u5f8c\u65e5:\u307f\u3087\u3046\u3054\u306b\u3061": 162,
+ "\u660e\u65e5:\u3042\u3057\u305f": 163,
+ "\u660e\u65e5:\u3042\u3059": 164,
+ "\u660e\u65e5:\u307f\u3087\u3046\u306b\u3061": 165,
+ "\u6628\u65e5:\u304d\u306e\u3046": 166,
+ "\u6628\u65e5:\u3055\u304f\u3058\u3064": 167,
+ "\u6700\u4e2d:\u3055\u3044\u3061\u3085\u3046": 168,
+ "\u6700\u4e2d:\u3055\u306a\u304b": 169,
+ "\u672b\u671f:\u307e\u3063\u304d": 170,
+ "\u672b\u671f:\u307e\u3064\u3054": 171,
+ "\u6839\u672c:\u3053\u3093\u307d\u3093": 172,
+ "\u6839\u672c:\u306d\u3082\u3068": 173,
+ "\u6885\u96e8:\u3064\u3086": 174,
+ "\u6885\u96e8:\u3070\u3044\u3046": 175,
+ "\u6c17\u8cea:\u304b\u305f\u304e": 176,
+ "\u6c17\u8cea:\u304d\u3057\u3064": 177,
+ "\u6c17\u9aa8:\u304d\u3053\u3064": 178,
+ "\u6c17\u9aa8:\u304d\u307c\u306d": 179,
+ "\u6c34\u9762:\u3059\u3044\u3081\u3093": 180,
+ "\u6c34\u9762:\u307f\u306a\u3082": 181,
+ "\u6c42\u9053:\u304d\u3085\u3046\u3069\u3046": 182,
+ "\u6c42\u9053:\u3050\u3069\u3046": 183,
+ "\u6cd5\u8863:\u3053\u308d\u3082": 184,
+ "\u6cd5\u8863:\u307b\u3046\u3048": 185,
+ "\u6e05\u6c34:\u304d\u3088\u307f\u305a": 186,
+ "\u6e05\u6c34:\u3057\u307f\u305a": 187,
+ "\u6e05\u6d44:\u3057\u3087\u3046\u3058\u3087\u3046": 188,
+ "\u6e05\u6d44:\u305b\u3044\u3058\u3087\u3046": 189,
+ "\u6f22\u66f8:\u304b\u3089\u3076\u307f": 190,
+ "\u6f22\u66f8:\u304b\u3093\u3057\u3087": 191,
+ "\u6f22\u66f8:\u304b\u3093\u3058\u3087": 192,
+ "\u7267\u5834:\u307c\u304f\u3058\u3087\u3046": 193,
+ "\u7267\u5834:\u307e\u304d\u3070": 194,
+ "\u73a9\u5177:\u304a\u3082\u3061\u3083": 195,
+ "\u73a9\u5177:\u304c\u3093\u3050": 196,
+ "\u73fe\u4e16:\u3052\u3093\u305b": 197,
+ "\u73fe\u4e16:\u3052\u3093\u305b\u3044": 198,
+ "\u751f\u7269:\u3044\u304d\u3082\u306e": 199,
+ "\u751f\u7269:\u305b\u3044\u3076\u3064": 200,
+ "\u751f\u82b1:\u3044\u3051\u3070\u306a": 201,
+ "\u751f\u82b1:\u305b\u3044\u304b": 202,
+ "\u753a\u5bb6:\u3061\u3087\u3046\u304b": 203,
+ "\u753a\u5bb6:\u307e\u3061\u3084": 204,
+ "\u75be\u98a8:\u304b\u305c": 205,
+ "\u75be\u98a8:\u3057\u3063\u3077\u3046": 206,
+ "\u75be\u98a8:\u306f\u3084\u3066": 207,
+ "\u767d\u9aea:\u3057\u3089\u304c": 208,
+ "\u767d\u9aea:\u306f\u304f\u306f\u3064": 209,
+ "\u76f8\u4e57:\u3042\u3044\u306e": 210,
+ "\u76f8\u4e57:\u305d\u3046\u3058\u3087\u3046": 211,
+ "\u773c\u93e1:\u304c\u3093\u304d\u3087\u3046": 212,
+ "\u773c\u93e1:\u3081\u304c\u306d": 213,
+ "\u77f3\u7dbf:\u3044\u3057\u308f\u305f": 214,
+ "\u77f3\u7dbf:\u305b\u304d\u3081\u3093": 215,
+ "\u793c\u62dd:\u3089\u3044\u306f\u3044": 216,
+ "\u793c\u62dd:\u308c\u3044\u306f\u3044": 217,
+ "\u7af6\u58f2:\u304d\u3087\u3046\u3070\u3044": 218,
+ "\u7af6\u58f2:\u3051\u3044\u3070\u3044": 219,
+ "\u7c73:\u3053\u3081": 220,
+ "\u7c73:\u3054\u3081": 221,
+ "\u7c73:\u3079\u3044": 222,
+ "\u7c73:\u307e\u3044": 223,
+ "\u7c73:\u3081\u30fc\u3068\u308b": 224,
+ "\u7c73:\u3088\u306d": 225,
+ "\u7d05\u8449:\u3053\u3046\u3088\u3046": 226,
+ "\u7d05\u8449:\u3082\u307f\u3058": 227,
+ "\u7d30\u3005:\u3053\u307e\u3054\u307e": 228,
+ "\u7d30\u3005:\u307b\u305d\u307c\u305d": 229,
+ "\u7d30\u76ee:\u3055\u3044\u3082\u304f": 230,
+ "\u7d30\u76ee:\u307b\u305d\u3081": 231,
+ "\u7d4c\u7def:\u3044\u304d\u3055\u3064": 232,
+ "\u7d4c\u7def:\u3051\u3044\u3044": 233,
+ "\u7fe1\u7fe0:\u304b\u308f\u305b\u307f": 234,
+ "\u7fe1\u7fe0:\u3072\u3059\u3044": 235,
+ "\u80cc\u7b4b:\u305b\u3059\u3058": 236,
+ "\u80cc\u7b4b:\u306f\u3044\u304d\u3093": 237,
+ "\u8239\u5e95:\u305b\u3093\u3066\u3044": 238,
+ "\u8239\u5e95:\u3075\u306a\u305e\u3053": 239,
+ "\u82b1\u5f01:\u304b\u3079\u3093": 240,
+ "\u82b1\u5f01:\u306f\u306a\u3073\u3089": 241,
+ "\u83d6\u84b2:\u3042\u3084\u3081": 242,
+ "\u83d6\u84b2:\u3057\u3087\u3046\u3076": 243,
+ "\u8868:\u3042\u3089": 244,
+ "\u8868:\u3042\u3089\u308f": 245,
+ "\u8868:\u304a\u3082\u3066": 246,
+ "\u8868:\u3072\u3087\u3046": 247,
+ "\u898b\u7269:\u3051\u3093\u3076\u3064": 248,
+ "\u898b\u7269:\u307f\u3082\u306e": 249,
+ "\u89d2:\u304b\u304f": 250,
+ "\u89d2:\u304b\u3069": 251,
+ "\u89d2:\u3059\u307f": 252,
+ "\u89d2:\u3064\u306e": 253,
+ "\u8aad\u672c:\u3068\u304f\u307b\u3093": 254,
+ "\u8aad\u672c:\u3069\u304f\u307b\u3093": 255,
+ "\u8aad\u672c:\u3088\u307f\u307b\u3093": 256,
+ "\u8c37\u9593:\u305f\u306b\u3042\u3044": 257,
+ "\u8c37\u9593:\u305f\u306b\u307e": 258,
+ "\u8db3\u8de1:\u3042\u3057\u3042\u3068": 259,
+ "\u8db3\u8de1:\u305d\u304f\u305b\u304d": 260,
+ "\u8eab\u4f53:\u304b\u3089\u3060": 261,
+ "\u8eab\u4f53:\u3057\u3093\u305f\u3044": 262,
+ "\u8ee2\u751f:\u3066\u3093\u3057\u3087\u3046": 263,
+ "\u8ee2\u751f:\u3066\u3093\u305b\u3044": 264,
+ "\u8ffd\u5f93:\u3064\u3044\u3057\u3087\u3046": 265,
+ "\u8ffd\u5f93:\u3064\u3044\u3058\u3085\u3046": 266,
+ "\u9006\u624b:\u304e\u3083\u304f\u3066": 267,
+ "\u9006\u624b:\u3055\u304b\u3066": 268,
+ "\u9020\u4f5c:\u305e\u3046\u3055": 269,
+ "\u9020\u4f5c:\u305e\u3046\u3055\u304f": 270,
+ "\u9023\u4e2d:\u308c\u3093\u3058\u3085\u3046": 271,
+ "\u9023\u4e2d:\u308c\u3093\u3061\u3085\u3046": 272,
+ "\u907a\u8a00:\u3044\u3052\u3093": 273,
+ "\u907a\u8a00:\u3044\u3054\u3093": 274,
+ "\u907a\u8a00:\u3086\u3044\u3054\u3093": 275,
+ "\u91ce\u514e:\u306e\u3046\u3055\u304e": 276,
+ "\u91ce\u514e:\u3084\u3068": 277,
+ "\u91d1\u8272:\u304d\u3093\u3044\u308d": 278,
+ "\u91d1\u8272:\u3053\u3093\u3058\u304d": 279,
+ "\u9280\u674f:\u3044\u3061\u3087\u3046": 280,
+ "\u9280\u674f:\u304e\u3093\u306a\u3093": 281,
+ "\u958b\u773c:\u304b\u3044\u304c\u3093": 282,
+ "\u958b\u773c:\u304b\u3044\u3052\u3093": 283,
+ "\u982d\u6570:\u3042\u305f\u307e\u304b\u305a": 284,
+ "\u982d\u6570:\u3068\u3046\u3059\u3046": 285,
+ "\u982d\u84cb\u9aa8:\u305a\u304c\u3044\u3053\u3064": 286,
+ "\u982d\u84cb\u9aa8:\u3068\u3046\u304c\u3044\u3053\u3064": 287,
+ "\u98a8\u7a74:\u304b\u3056\u3042\u306a": 288,
+ "\u98a8\u7a74:\u3075\u3046\u3051\u3064": 289,
+ "\u98a8\u8eca:\u304b\u3056\u3050\u308b\u307e": 290,
+ "\u98a8\u8eca:\u3075\u3046\u3057\u3083": 291,
+ "\u98db\u6cab:\u3057\u3076\u304d": 292,
+ "\u98db\u6cab:\u3072\u307e\u3064": 293,
+ "\u9aa8:\u3053\u3064": 294,
+ "\u9aa8:\u307b\u306d": 295,
+ "\u9b5a:\u3046\u304a": 296,
+ "\u9b5a:\u304e\u3087": 297,
+ "\u9b5a:\u3055\u304b\u306a": 298,
+ "\u9b5a:\u3056\u304b\u306a": 299,
+ "\u9ed2\u5b50:\u304f\u308d\u3053": 300,
+ "\u9ed2\u5b50:\u307b\u304f\u308d": 301
+ }
+}
\ No newline at end of file
diff --git a/stores/dbert/pytorch_model.bin b/stores/dbert/pytorch_model.bin
new file mode 100644
index 0000000000000000000000000000000000000000..15d531babb135ffcfce9604ad68616343d804f0f
--- /dev/null
+++ b/stores/dbert/pytorch_model.bin
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:da84a3d4be38191f4485086a8b4c7013a2ab33cf2c7d20df6c3fdfe0092041af
+size 443657837
diff --git a/stores/dbert/special_tokens_map.json b/stores/dbert/special_tokens_map.json
new file mode 100644
index 0000000000000000000000000000000000000000..a8b3208c2884c4efb86e49300fdd3dc877220cdf
--- /dev/null
+++ b/stores/dbert/special_tokens_map.json
@@ -0,0 +1,7 @@
+{
+ "cls_token": "[CLS]",
+ "mask_token": "[MASK]",
+ "pad_token": "[PAD]",
+ "sep_token": "[SEP]",
+ "unk_token": "[UNK]"
+}
diff --git a/stores/dbert/tokenizer_config.json b/stores/dbert/tokenizer_config.json
new file mode 100644
index 0000000000000000000000000000000000000000..916a28b1f1725315989ce9fecf9182c806488b3a
--- /dev/null
+++ b/stores/dbert/tokenizer_config.json
@@ -0,0 +1,22 @@
+{
+ "cls_token": "[CLS]",
+ "do_lower_case": false,
+ "do_subword_tokenize": true,
+ "do_word_tokenize": true,
+ "jumanpp_kwargs": null,
+ "mask_token": "[MASK]",
+ "mecab_kwargs": {
+ "mecab_dic": "unidic_lite"
+ },
+ "model_max_length": 1000000000000000019884624838656,
+ "name_or_path": "cl-tohoku/bert-base-japanese-v2",
+ "never_split": null,
+ "pad_token": "[PAD]",
+ "sep_token": "[SEP]",
+ "special_tokens_map_file": null,
+ "subword_tokenizer_type": "wordpiece",
+ "sudachi_kwargs": null,
+ "tokenizer_class": "BertJapaneseTokenizer",
+ "unk_token": "[UNK]",
+ "word_tokenizer_type": "mecab"
+}
diff --git a/stores/dbert/training_args.bin b/stores/dbert/training_args.bin
new file mode 100644
index 0000000000000000000000000000000000000000..b64705d218c4aa84bba08bda1b3c213f00c76d5a
--- /dev/null
+++ b/stores/dbert/training_args.bin
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:aee0041b2ad4b019fea4db1f8aabd34b0081878cae2c17395657331db1adbb70
+size 3579
diff --git a/stores/dbert/training_performance.json b/stores/dbert/training_performance.json
new file mode 100644
index 0000000000000000000000000000000000000000..677251472a4da4d4bf2c652cbaf8d3e4c9fd4428
--- /dev/null
+++ b/stores/dbert/training_performance.json
@@ -0,0 +1,14618 @@
+{
+ "train": {
+ "metrics": {
+ "test_loss": 0.10660427063703537,
+ "test_runtime": 166.1669,
+ "test_samples_per_second": 601.805,
+ "test_steps_per_second": 4.706
+ },
+ "accuracy": 0.957,
+ "heteronym_performance": {
+ "表": {
+ "n": 3776,
+ "readings": {
+ "ひょう": {
+ "n": 3185,
+ "found": {
+ "ひょう": 3157,
+ "おもて": 24,
+ "あらわ": 0,
+ "あら": 4,
+ "": 0
+ },
+ "accuracy": 0.991
+ },
+ "おもて": {
+ "n": 566,
+ "found": {
+ "ひょう": 5,
+ "おもて": 561,
+ "あらわ": 0,
+ "あら": 0,
+ "": 0
+ },
+ "accuracy": 0.991
+ },
+ "あらわ": {
+ "n": 0,
+ "found": {
+ "ひょう": 0,
+ "おもて": 0,
+ "あらわ": 0,
+ "あら": 0,
+ "": 0
+ },
+ "accuracy": NaN
+ },
+ "あら": {
+ "n": 11,
+ "found": {
+ "ひょう": 0,
+ "おもて": 0,
+ "あらわ": 0,
+ "あら": 11,
+ "": 0
+ },
+ "accuracy": 1.0
+ },
+ "": {
+ "n": 14,
+ "found": {
+ "ひょう": 6,
+ "おもて": 1,
+ "あらわ": 0,
+ "あら": 0,
+ "": 7
+ },
+ "accuracy": 0.5
+ }
+ },
+ "accuracy": 0.989
+ },
+ "角": {
+ "n": 1262,
+ "readings": {
+ "かく": {
+ "n": 1008,
+ "found": {
+ "かく": 1004,
+ "かど": 1,
+ "つの": 3,
+ "すみ": 0,
+ "": 0
+ },
+ "accuracy": 0.996
+ },
+ "かど": {
+ "n": 192,
+ "found": {
+ "かく": 15,
+ "かど": 163,
+ "つの": 14,
+ "すみ": 0,
+ "": 0
+ },
+ "accuracy": 0.849
+ },
+ "つの": {
+ "n": 54,
+ "found": {
+ "かく": 1,
+ "かど": 0,
+ "つの": 53,
+ "すみ": 0,
+ "": 0
+ },
+ "accuracy": 0.981
+ },
+ "すみ": {
+ "n": 5,
+ "found": {
+ "かく": 3,
+ "かど": 1,
+ "つの": 1,
+ "すみ": 0,
+ "": 0
+ },
+ "accuracy": 0.0
+ },
+ "": {
+ "n": 3,
+ "found": {
+ "かく": 1,
+ "かど": 0,
+ "つの": 0,
+ "すみ": 0,
+ "": 2
+ },
+ "accuracy": 0.667
+ }
+ },
+ "accuracy": 0.968
+ },
+ "大分": {
+ "n": 579,
+ "readings": {
+ "おおいた": {
+ "n": 434,
+ "found": {
+ "おおいた": 434,
+ "だいぶ": 0,
+ "だいぶん": 0,
+ "": 0
+ },
+ "accuracy": 1.0
+ },
+ "だいぶ": {
+ "n": 132,
+ "found": {
+ "おおいた": 0,
+ "だいぶ": 132,
+ "だいぶん": 0,
+ "": 0
+ },
+ "accuracy": 1.0
+ },
+ "だいぶん": {
+ "n": 13,
+ "found": {
+ "おおいた": 0,
+ "だいぶ": 13,
+ "だいぶん": 0,
+ "": 0
+ },
+ "accuracy": 0.0
+ },
+ "": {
+ "n": 0,
+ "found": {
+ "おおいた": 0,
+ "だいぶ": 0,
+ "だいぶん": 0,
+ "": 0
+ },
+ "accuracy": NaN
+ }
+ },
+ "accuracy": 0.978
+ },
+ "国立": {
+ "n": 1269,
+ "readings": {
+ "こくりつ": {
+ "n": 1229,
+ "found": {
+ "こくりつ": 1225,
+ "くにたち": 4,
+ "": 0
+ },
+ "accuracy": 0.997
+ },
+ "くにたち": {
+ "n": 40,
+ "found": {
+ "こくりつ": 2,
+ "くにたち": 38,
+ "": 0
+ },
+ "accuracy": 0.95
+ },
+ "": {
+ "n": 0,
+ "found": {
+ "こくりつ": 0,
+ "くにたち": 0,
+ "": 0
+ },
+ "accuracy": NaN
+ }
+ },
+ "accuracy": 0.995
+ },
+ "人気": {
+ "n": 1224,
+ "readings": {
+ "にんき": {
+ "n": 1196,
+ "found": {
+ "にんき": 1195,
+ "ひとけ": 1,
+ "じんき": 0,
+ "": 0
+ },
+ "accuracy": 0.999
+ },
+ "ひとけ": {
+ "n": 21,
+ "found": {
+ "にんき": 1,
+ "ひとけ": 20,
+ "じんき": 0,
+ "": 0
+ },
+ "accuracy": 0.952
+ },
+ "じんき": {
+ "n": 6,
+ "found": {
+ "にんき": 1,
+ "ひとけ": 1,
+ "じんき": 4,
+ "": 0
+ },
+ "accuracy": 0.667
+ },
+ "": {
+ "n": 1,
+ "found": {
+ "にんき": 0,
+ "ひとけ": 1,
+ "じんき": 0,
+ "": 0
+ },
+ "accuracy": 0.0
+ }
+ },
+ "accuracy": 0.996
+ },
+ "市場": {
+ "n": 9207,
+ "readings": {
+ "しじょう": {
+ "n": 9045,
+ "found": {
+ "しじょう": 9034,
+ "いちば": 11,
+ "": 0
+ },
+ "accuracy": 0.999
+ },
+ "いちば": {
+ "n": 162,
+ "found": {
+ "しじょう": 16,
+ "いちば": 146,
+ "": 0
+ },
+ "accuracy": 0.901
+ },
+ "": {
+ "n": 0,
+ "found": {
+ "しじょう": 0,
+ "いちば": 0,
+ "": 0
+ },
+ "accuracy": NaN
+ }
+ },
+ "accuracy": 0.997
+ },
+ "気質": {
+ "n": 285,
+ "readings": {
+ "きしつ": {
+ "n": 210,
+ "found": {
+ "きしつ": 188,
+ "かたぎ": 22,
+ "": 0
+ },
+ "accuracy": 0.895
+ },
+ "かたぎ": {
+ "n": 65,
+ "found": {
+ "きしつ": 15,
+ "かたぎ": 50,
+ "": 0
+ },
+ "accuracy": 0.769
+ },
+ "": {
+ "n": 10,
+ "found": {
+ "きしつ": 10,
+ "かたぎ": 0,
+ "": 0
+ },
+ "accuracy": 0.0
+ }
+ },
+ "accuracy": 0.835
+ },
+ "上方": {
+ "n": 298,
+ "readings": {
+ "かみがた": {
+ "n": 226,
+ "found": {
+ "かみがた": 224,
+ "じょうほう": 2,
+ "": 0
+ },
+ "accuracy": 0.991
+ },
+ "じょうほう": {
+ "n": 72,
+ "found": {
+ "かみがた": 17,
+ "じょうほう": 55,
+ "": 0
+ },
+ "accuracy": 0.764
+ },
+ "": {
+ "n": 0,
+ "found": {
+ "かみがた": 0,
+ "じょうほう": 0,
+ "": 0
+ },
+ "accuracy": NaN
+ }
+ },
+ "accuracy": 0.936
+ },
+ "上手": {
+ "n": 1265,
+ "readings": {
+ "じょうず": {
+ "n": 1221,
+ "found": {
+ "じょうず": 1219,
+ "うま": 0,
+ "かみて": 2,
+ "うわて": 0,
+ "": 0
+ },
+ "accuracy": 0.998
+ },
+ "うま": {
+ "n": 10,
+ "found": {
+ "じょうず": 1,
+ "うま": 9,
+ "かみて": 0,
+ "うわて": 0,
+ "": 0
+ },
+ "accuracy": 0.9
+ },
+ "かみて": {
+ "n": 22,
+ "found": {
+ "じょうず": 12,
+ "うま": 0,
+ "かみて": 10,
+ "うわて": 0,
+ "": 0
+ },
+ "accuracy": 0.455
+ },
+ "うわて": {
+ "n": 12,
+ "found": {
+ "じょうず": 11,
+ "うま": 0,
+ "かみて": 0,
+ "うわて": 1,
+ "": 0
+ },
+ "accuracy": 0.083
+ },
+ "": {
+ "n": 0,
+ "found": {
+ "じょうず": 0,
+ "うま": 0,
+ "かみて": 0,
+ "うわて": 0,
+ "": 0
+ },
+ "accuracy": NaN
+ }
+ },
+ "accuracy": 0.979
+ },
+ "下手": {
+ "n": 195,
+ "readings": {
+ "へた": {
+ "n": 129,
+ "found": {
+ "へた": 129,
+ "したて": 0,
+ "べた": 0,
+ "しもて": 0,
+ "": 0
+ },
+ "accuracy": 1.0
+ },
+ "したて": {
+ "n": 24,
+ "found": {
+ "へた": 7,
+ "したて": 14,
+ "べた": 2,
+ "しもて": 1,
+ "": 0
+ },
+ "accuracy": 0.583
+ },
+ "べた": {
+ "n": 26,
+ "found": {
+ "へた": 0,
+ "したて": 0,
+ "べた": 26,
+ "しもて": 0,
+ "": 0
+ },
+ "accuracy": 1.0
+ },
+ "しもて": {
+ "n": 6,
+ "found": {
+ "へた": 0,
+ "したて": 0,
+ "べた": 0,
+ "しもて": 6,
+ "": 0
+ },
+ "accuracy": 1.0
+ },
+ "": {
+ "n": 10,
+ "found": {
+ "へた": 0,
+ "したて": 0,
+ "べた": 0,
+ "しもて": 0,
+ "": 10
+ },
+ "accuracy": 1.0
+ }
+ },
+ "accuracy": 0.949
+ },
+ "仮名": {
+ "n": 348,
+ "readings": {
+ "かな": {
+ "n": 312,
+ "found": {
+ "かな": 305,
+ "がな": 5,
+ "かめい": 2,
+ "": 0
+ },
+ "accuracy": 0.978
+ },
+ "がな": {
+ "n": 15,
+ "found": {
+ "かな": 2,
+ "がな": 13,
+ "かめい": 0,
+ "": 0
+ },
+ "accuracy": 0.867
+ },
+ "かめい": {
+ "n": 20,
+ "found": {
+ "かな": 1,
+ "がな": 0,
+ "かめい": 19,
+ "": 0
+ },
+ "accuracy": 0.95
+ },
+ "": {
+ "n": 1,
+ "found": {
+ "かな": 1,
+ "がな": 0,
+ "かめい": 0,
+ "": 0
+ },
+ "accuracy": 0.0
+ }
+ },
+ "accuracy": 0.968
+ },
+ "礼拝": {
+ "n": 165,
+ "readings": {
+ "れいはい": {
+ "n": 152,
+ "found": {
+ "れいはい": 148,
+ "らいはい": 4,
+ "": 0
+ },
+ "accuracy": 0.974
+ },
+ "らいはい": {
+ "n": 13,
+ "found": {
+ "れいはい": 4,
+ "らいはい": 9,
+ "": 0
+ },
+ "accuracy": 0.692
+ },
+ "": {
+ "n": 0,
+ "found": {
+ "れいはい": 0,
+ "らいはい": 0,
+ "": 0
+ },
+ "accuracy": NaN
+ }
+ },
+ "accuracy": 0.952
+ },
+ "遺言": {
+ "n": 562,
+ "readings": {
+ "ゆいごん": {
+ "n": 541,
+ "found": {
+ "ゆいごん": 539,
+ "いげん": 1,
+ "いごん": 1,
+ "": 0
+ },
+ "accuracy": 0.996
+ },
+ "いげん": {
+ "n": 5,
+ "found": {
+ "ゆいごん": 2,
+ "いげん": 3,
+ "いごん": 0,
+ "": 0
+ },
+ "accuracy": 0.6
+ },
+ "いごん": {
+ "n": 12,
+ "found": {
+ "ゆいごん": 9,
+ "いげん": 0,
+ "いごん": 3,
+ "": 0
+ },
+ "accuracy": 0.25
+ },
+ "": {
+ "n": 4,
+ "found": {
+ "ゆいごん": 0,
+ "いげん": 0,
+ "いごん": 0,
+ "": 4
+ },
+ "accuracy": 1.0
+ }
+ },
+ "accuracy": 0.977
+ },
+ "口腔": {
+ "n": 2148,
+ "readings": {
+ "こうこう": {
+ "n": 1198,
+ "found": {
+ "こうこう": 961,
+ "こうくう": 237,
+ "": 0
+ },
+ "accuracy": 0.802
+ },
+ "こうくう": {
+ "n": 949,
+ "found": {
+ "こうこう": 98,
+ "こうくう": 851,
+ "": 0
+ },
+ "accuracy": 0.897
+ },
+ "": {
+ "n": 1,
+ "found": {
+ "こうこう": 1,
+ "こうくう": 0,
+ "": 0
+ },
+ "accuracy": 0.0
+ }
+ },
+ "accuracy": 0.844
+ },
+ "骨": {
+ "n": 3851,
+ "readings": {
+ "ほね": {
+ "n": 1456,
+ "found": {
+ "ほね": 1197,
+ "こつ": 258,
+ "": 1
+ },
+ "accuracy": 0.822
+ },
+ "こつ": {
+ "n": 2255,
+ "found": {
+ "ほね": 91,
+ "こつ": 2154,
+ "": 10
+ },
+ "accuracy": 0.955
+ },
+ "": {
+ "n": 140,
+ "found": {
+ "ほね": 3,
+ "こつ": 29,
+ "": 108
+ },
+ "accuracy": 0.771
+ }
+ },
+ "accuracy": 0.898
+ },
+ "一途": {
+ "n": 108,
+ "readings": {
+ "いちず": {
+ "n": 81,
+ "found": {
+ "いちず": 76,
+ "いっと": 5,
+ "": 0
+ },
+ "accuracy": 0.938
+ },
+ "いっと": {
+ "n": 27,
+ "found": {
+ "いちず": 3,
+ "いっと": 24,
+ "": 0
+ },
+ "accuracy": 0.889
+ },
+ "": {
+ "n": 0,
+ "found": {
+ "いちず": 0,
+ "いっと": 0,
+ "": 0
+ },
+ "accuracy": NaN
+ }
+ },
+ "accuracy": 0.926
+ },
+ "一言": {
+ "n": 489,
+ "readings": {
+ "ひとこと": {
+ "n": 425,
+ "found": {
+ "ひとこと": 424,
+ "いちげん": 0,
+ "いちごん": 1,
+ "": 0
+ },
+ "accuracy": 0.998
+ },
+ "いちげん": {
+ "n": 21,
+ "found": {
+ "ひとこと": 5,
+ "いちげん": 15,
+ "いちごん": 1,
+ "": 0
+ },
+ "accuracy": 0.714
+ },
+ "いちごん": {
+ "n": 31,
+ "found": {
+ "ひとこと": 22,
+ "いちげん": 1,
+ "いちごん": 8,
+ "": 0
+ },
+ "accuracy": 0.258
+ },
+ "": {
+ "n": 12,
+ "found": {
+ "ひとこと": 10,
+ "いちげん": 0,
+ "いちごん": 0,
+ "": 2
+ },
+ "accuracy": 0.167
+ }
+ },
+ "accuracy": 0.918
+ },
+ "最中": {
+ "n": 94,
+ "readings": {
+ "さいちゅう": {
+ "n": 82,
+ "found": {
+ "さいちゅう": 82,
+ "さなか": 0,
+ "": 0
+ },
+ "accuracy": 1.0
+ },
+ "さなか": {
+ "n": 12,
+ "found": {
+ "さいちゅう": 12,
+ "さなか": 0,
+ "": 0
+ },
+ "accuracy": 0.0
+ },
+ "": {
+ "n": 0,
+ "found": {
+ "さいちゅう": 0,
+ "さなか": 0,
+ "": 0
+ },
+ "accuracy": NaN
+ }
+ },
+ "accuracy": 0.872
+ },
+ "一目": {
+ "n": 318,
+ "readings": {
+ "ひとめ": {
+ "n": 247,
+ "found": {
+ "ひとめ": 247,
+ "いちもく": 0,
+ "": 0
+ },
+ "accuracy": 1.0
+ },
+ "いちもく": {
+ "n": 69,
+ "found": {
+ "ひとめ": 3,
+ "いちもく": 66,
+ "": 0
+ },
+ "accuracy": 0.957
+ },
+ "": {
+ "n": 2,
+ "found": {
+ "ひとめ": 2,
+ "いちもく": 0,
+ "": 0
+ },
+ "accuracy": 0.0
+ }
+ },
+ "accuracy": 0.984
+ },
+ "係": {
+ "n": 275,
+ "readings": {
+ "かか": {
+ "n": 21,
+ "found": {
+ "かか": 21,
+ "かかわ": 0,
+ "がかり": 0,
+ "かかり": 0,
+ "": 0
+ },
+ "accuracy": 1.0
+ },
+ "かかわ": {
+ "n": 1,
+ "found": {
+ "かか": 1,
+ "かかわ": 0,
+ "がかり": 0,
+ "かかり": 0,
+ "": 0
+ },
+ "accuracy": 0.0
+ },
+ "がかり": {
+ "n": 192,
+ "found": {
+ "かか": 0,
+ "かかわ": 0,
+ "がかり": 191,
+ "かかり": 1,
+ "": 0
+ },
+ "accuracy": 0.995
+ },
+ "かかり": {
+ "n": 29,
+ "found": {
+ "かか": 0,
+ "かかわ": 0,
+ "がかり": 1,
+ "かかり": 27,
+ "": 1
+ },
+ "accuracy": 0.931
+ },
+ "": {
+ "n": 32,
+ "found": {
+ "かか": 0,
+ "かかわ": 0,
+ "がかり": 1,
+ "かかり": 0,
+ "": 31
+ },
+ "accuracy": 0.969
+ }
+ },
+ "accuracy": 0.982
+ },
+ "足跡": {
+ "n": 714,
+ "readings": {
+ "あしあと": {
+ "n": 423,
+ "found": {
+ "あしあと": 322,
+ "そくせき": 101,
+ "": 0
+ },
+ "accuracy": 0.761
+ },
+ "そくせき": {
+ "n": 291,
+ "found": {
+ "あしあと": 85,
+ "そくせき": 206,
+ "": 0
+ },
+ "accuracy": 0.708
+ },
+ "": {
+ "n": 0,
+ "found": {
+ "あしあと": 0,
+ "そくせき": 0,
+ "": 0
+ },
+ "accuracy": NaN
+ }
+ },
+ "accuracy": 0.739
+ },
+ "今日": {
+ "n": 3809,
+ "readings": {
+ "きょう": {
+ "n": 2742,
+ "found": {
+ "きょう": 2527,
+ "こんにち": 215,
+ "": 0
+ },
+ "accuracy": 0.922
+ },
+ "こんにち": {
+ "n": 1067,
+ "found": {
+ "きょう": 130,
+ "こんにち": 937,
+ "": 0
+ },
+ "accuracy": 0.878
+ },
+ "": {
+ "n": 0,
+ "found": {
+ "きょう": 0,
+ "こんにち": 0,
+ "": 0
+ },
+ "accuracy": NaN
+ }
+ },
+ "accuracy": 0.909
+ },
+ "明日": {
+ "n": 2588,
+ "readings": {
+ "あす": {
+ "n": 1553,
+ "found": {
+ "あす": 1441,
+ "あした": 112,
+ "みょうにち": 0,
+ "": 0
+ },
+ "accuracy": 0.928
+ },
+ "あした": {
+ "n": 1025,
+ "found": {
+ "あす": 564,
+ "あした": 461,
+ "みょうにち": 0,
+ "": 0
+ },
+ "accuracy": 0.45
+ },
+ "みょうにち": {
+ "n": 6,
+ "found": {
+ "あす": 6,
+ "あした": 0,
+ "みょうにち": 0,
+ "": 0
+ },
+ "accuracy": 0.0
+ },
+ "": {
+ "n": 4,
+ "found": {
+ "あす": 2,
+ "あした": 0,
+ "みょうにち": 0,
+ "": 2
+ },
+ "accuracy": 0.5
+ }
+ },
+ "accuracy": 0.736
+ },
+ "生物": {
+ "n": 3700,
+ "readings": {
+ "せいぶつ": {
+ "n": 3689,
+ "found": {
+ "せいぶつ": 3687,
+ "いきもの": 2,
+ "": 0
+ },
+ "accuracy": 0.999
+ },
+ "いきもの": {
+ "n": 11,
+ "found": {
+ "せいぶつ": 2,
+ "いきもの": 9,
+ "": 0
+ },
+ "accuracy": 0.818
+ },
+ "": {
+ "n": 0,
+ "found": {
+ "せいぶつ": 0,
+ "いきもの": 0,
+ "": 0
+ },
+ "accuracy": NaN
+ }
+ },
+ "accuracy": 0.999
+ },
+ "変化": {
+ "n": 13002,
+ "readings": {
+ "へんか": {
+ "n": 12933,
+ "found": {
+ "へんか": 12932,
+ "へんげ": 1,
+ "": 0
+ },
+ "accuracy": 1.0
+ },
+ "へんげ": {
+ "n": 69,
+ "found": {
+ "へんか": 2,
+ "へんげ": 67,
+ "": 0
+ },
+ "accuracy": 0.971
+ },
+ "": {
+ "n": 0,
+ "found": {
+ "へんか": 0,
+ "へんげ": 0,
+ "": 0
+ },
+ "accuracy": NaN
+ }
+ },
+ "accuracy": 1.0
+ },
+ "大事": {
+ "n": 875,
+ "readings": {
+ "だいじ": {
+ "n": 868,
+ "found": {
+ "だいじ": 868,
+ "おおごと": 0,
+ "": 0
+ },
+ "accuracy": 1.0
+ },
+ "おおごと": {
+ "n": 7,
+ "found": {
+ "だいじ": 7,
+ "おおごと": 0,
+ "": 0
+ },
+ "accuracy": 0.0
+ },
+ "": {
+ "n": 0,
+ "found": {
+ "だいじ": 0,
+ "おおごと": 0,
+ "": 0
+ },
+ "accuracy": NaN
+ }
+ },
+ "accuracy": 0.992
+ },
+ "大家": {
+ "n": 157,
+ "readings": {
+ "たいか": {
+ "n": 90,
+ "found": {
+ "たいか": 84,
+ "おおや": 2,
+ "たいけ": 4,
+ "": 0
+ },
+ "accuracy": 0.933
+ },
+ "おおや": {
+ "n": 50,
+ "found": {
+ "たいか": 1,
+ "おおや": 49,
+ "たいけ": 0,
+ "": 0
+ },
+ "accuracy": 0.98
+ },
+ "たいけ": {
+ "n": 11,
+ "found": {
+ "たいか": 0,
+ "おおや": 0,
+ "たいけ": 11,
+ "": 0
+ },
+ "accuracy": 1.0
+ },
+ "": {
+ "n": 6,
+ "found": {
+ "たいか": 0,
+ "おおや": 0,
+ "たいけ": 0,
+ "": 6
+ },
+ "accuracy": 1.0
+ }
+ },
+ "accuracy": 0.955
+ },
+ "心中": {
+ "n": 323,
+ "readings": {
+ "しんじゅう": {
+ "n": 257,
+ "found": {
+ "しんじゅう": 246,
+ "しんちゅう": 2,
+ "しんぢゅう": 9,
+ "": 0
+ },
+ "accuracy": 0.957
+ },
+ "しんちゅう": {
+ "n": 50,
+ "found": {
+ "しんじゅう": 1,
+ "しんちゅう": 49,
+ "しんぢゅう": 0,
+ "": 0
+ },
+ "accuracy": 0.98
+ },
+ "しんぢゅう": {
+ "n": 16,
+ "found": {
+ "しんじゅう": 5,
+ "しんちゅう": 0,
+ "しんぢゅう": 11,
+ "": 0
+ },
+ "accuracy": 0.688
+ },
+ "": {
+ "n": 0,
+ "found": {
+ "しんじゅう": 0,
+ "しんちゅう": 0,
+ "しんぢゅう": 0,
+ "": 0
+ },
+ "accuracy": NaN
+ }
+ },
+ "accuracy": 0.947
+ },
+ "一行": {
+ "n": 216,
+ "readings": {
+ "いっこう": {
+ "n": 189,
+ "found": {
+ "いっこう": 187,
+ "いちぎょう": 2,
+ "": 0
+ },
+ "accuracy": 0.989
+ },
+ "いちぎょう": {
+ "n": 22,
+ "found": {
+ "いっこう": 1,
+ "いちぎょう": 21,
+ "": 0
+ },
+ "accuracy": 0.955
+ },
+ "": {
+ "n": 5,
+ "found": {
+ "いっこう": 3,
+ "いちぎょう": 0,
+ "": 2
+ },
+ "accuracy": 0.4
+ }
+ },
+ "accuracy": 0.972
+ },
+ "一時": {
+ "n": 446,
+ "readings": {
+ "いちじ": {
+ "n": 379,
+ "found": {
+ "いちじ": 378,
+ "いっとき": 1,
+ "いちどき": 0,
+ "": 0
+ },
+ "accuracy": 0.997
+ },
+ "いっとき": {
+ "n": 45,
+ "found": {
+ "いちじ": 28,
+ "いっとき": 16,
+ "いちどき": 0,
+ "": 1
+ },
+ "accuracy": 0.356
+ },
+ "いちどき": {
+ "n": 9,
+ "found": {
+ "いちじ": 9,
+ "いっとき": 0,
+ "いちどき": 0,
+ "": 0
+ },
+ "accuracy": 0.0
+ },
+ "": {
+ "n": 13,
+ "found": {
+ "いちじ": 0,
+ "いっとき": 0,
+ "いちどき": 0,
+ "": 13
+ },
+ "accuracy": 1.0
+ }
+ },
+ "accuracy": 0.913
+ },
+ "一方": {
+ "n": 877,
+ "readings": {
+ "いっぽう": {
+ "n": 827,
+ "found": {
+ "いっぽう": 827,
+ "ひとかた": 0,
+ "いちほう": 0,
+ "": 0
+ },
+ "accuracy": 1.0
+ },
+ "ひとかた": {
+ "n": 21,
+ "found": {
+ "いっぽう": 0,
+ "ひとかた": 21,
+ "いちほう": 0,
+ "": 0
+ },
+ "accuracy": 1.0
+ },
+ "いちほう": {
+ "n": 28,
+ "found": {
+ "いっぽう": 25,
+ "ひとかた": 0,
+ "いちほう": 3,
+ "": 0
+ },
+ "accuracy": 0.107
+ },
+ "": {
+ "n": 1,
+ "found": {
+ "いっぽう": 0,
+ "ひとかた": 0,
+ "いちほう": 1,
+ "": 0
+ },
+ "accuracy": 0.0
+ }
+ },
+ "accuracy": 0.97
+ },
+ "一夜": {
+ "n": 304,
+ "readings": {
+ "いちや": {
+ "n": 260,
+ "found": {
+ "いちや": 255,
+ "ひとよ": 3,
+ "": 2
+ },
+ "accuracy": 0.981
+ },
+ "ひとよ": {
+ "n": 30,
+ "found": {
+ "いちや": 11,
+ "ひとよ": 14,
+ "": 5
+ },
+ "accuracy": 0.467
+ },
+ "": {
+ "n": 14,
+ "found": {
+ "いちや": 0,
+ "ひとよ": 0,
+ "": 14
+ },
+ "accuracy": 1.0
+ }
+ },
+ "accuracy": 0.931
+ },
+ "下野": {
+ "n": 93,
+ "readings": {
+ "しもつけ": {
+ "n": 60,
+ "found": {
+ "しもつけ": 60,
+ "げや": 0,
+ "しもの": 0,
+ "": 0
+ },
+ "accuracy": 1.0
+ },
+ "げや": {
+ "n": 22,
+ "found": {
+ "しもつけ": 11,
+ "げや": 11,
+ "しもの": 0,
+ "": 0
+ },
+ "accuracy": 0.5
+ },
+ "しもの": {
+ "n": 10,
+ "found": {
+ "しもつけ": 5,
+ "げや": 2,
+ "しもの": 3,
+ "": 0
+ },
+ "accuracy": 0.3
+ },
+ "": {
+ "n": 1,
+ "found": {
+ "しもつけ": 1,
+ "げや": 0,
+ "しもの": 0,
+ "": 0
+ },
+ "accuracy": 0.0
+ }
+ },
+ "accuracy": 0.796
+ },
+ "花弁": {
+ "n": 40,
+ "readings": {
+ "かべん": {
+ "n": 32,
+ "found": {
+ "かべん": 32,
+ "はなびら": 0,
+ "": 0
+ },
+ "accuracy": 1.0
+ },
+ "はなびら": {
+ "n": 8,
+ "found": {
+ "かべん": 8,
+ "はなびら": 0,
+ "": 0
+ },
+ "accuracy": 0.0
+ },
+ "": {
+ "n": 0,
+ "found": {
+ "かべん": 0,
+ "はなびら": 0,
+ "": 0
+ },
+ "accuracy": NaN
+ }
+ },
+ "accuracy": 0.8
+ },
+ "玩具": {
+ "n": 229,
+ "readings": {
+ "がんぐ": {
+ "n": 192,
+ "found": {
+ "がんぐ": 190,
+ "おもちゃ": 2,
+ "": 0
+ },
+ "accuracy": 0.99
+ },
+ "おもちゃ": {
+ "n": 37,
+ "found": {
+ "がんぐ": 1,
+ "おもちゃ": 36,
+ "": 0
+ },
+ "accuracy": 0.973
+ },
+ "": {
+ "n": 0,
+ "found": {
+ "がんぐ": 0,
+ "おもちゃ": 0,
+ "": 0
+ },
+ "accuracy": NaN
+ }
+ },
+ "accuracy": 0.987
+ },
+ "強力": {
+ "n": 403,
+ "readings": {
+ "きょうりょく": {
+ "n": 387,
+ "found": {
+ "きょうりょく": 387,
+ "ごうりき": 0,
+ "": 0
+ },
+ "accuracy": 1.0
+ },
+ "ごうりき": {
+ "n": 16,
+ "found": {
+ "きょうりょく": 0,
+ "ごうりき": 16,
+ "": 0
+ },
+ "accuracy": 1.0
+ },
+ "": {
+ "n": 0,
+ "found": {
+ "きょうりょく": 0,
+ "ごうりき": 0,
+ "": 0
+ },
+ "accuracy": NaN
+ }
+ },
+ "accuracy": 1.0
+ },
+ "金色": {
+ "n": 202,
+ "readings": {
+ "きんいろ": {
+ "n": 123,
+ "found": {
+ "きんいろ": 120,
+ "こんじき": 3,
+ "": 0
+ },
+ "accuracy": 0.976
+ },
+ "こんじき": {
+ "n": 79,
+ "found": {
+ "きんいろ": 14,
+ "こんじき": 65,
+ "": 0
+ },
+ "accuracy": 0.823
+ },
+ "": {
+ "n": 0,
+ "found": {
+ "きんいろ": 0,
+ "こんじき": 0,
+ "": 0
+ },
+ "accuracy": NaN
+ }
+ },
+ "accuracy": 0.916
+ },
+ "経緯": {
+ "n": 1189,
+ "readings": {
+ "けいい": {
+ "n": 1177,
+ "found": {
+ "けいい": 1176,
+ "いきさつ": 1,
+ "": 0
+ },
+ "accuracy": 0.999
+ },
+ "いきさつ": {
+ "n": 12,
+ "found": {
+ "けいい": 0,
+ "いきさつ": 12,
+ "": 0
+ },
+ "accuracy": 1.0
+ },
+ "": {
+ "n": 0,
+ "found": {
+ "けいい": 0,
+ "いきさつ": 0,
+ "": 0
+ },
+ "accuracy": NaN
+ }
+ },
+ "accuracy": 0.999
+ },
+ "故郷": {
+ "n": 704,
+ "readings": {
+ "こきょう": {
+ "n": 600,
+ "found": {
+ "こきょう": 571,
+ "ふるさと": 26,
+ "くに": 3,
+ "": 0
+ },
+ "accuracy": 0.952
+ },
+ "ふるさと": {
+ "n": 79,
+ "found": {
+ "こきょう": 46,
+ "ふるさと": 32,
+ "くに": 1,
+ "": 0
+ },
+ "accuracy": 0.405
+ },
+ "くに": {
+ "n": 25,
+ "found": {
+ "こきょう": 8,
+ "ふるさと": 3,
+ "くに": 14,
+ "": 0
+ },
+ "accuracy": 0.56
+ },
+ "": {
+ "n": 0,
+ "found": {
+ "こきょう": 0,
+ "ふるさと": 0,
+ "くに": 0,
+ "": 0
+ },
+ "accuracy": NaN
+ }
+ },
+ "accuracy": 0.876
+ },
+ "紅葉": {
+ "n": 191,
+ "readings": {
+ "こうよう": {
+ "n": 123,
+ "found": {
+ "こうよう": 107,
+ "もみじ": 16,
+ "": 0
+ },
+ "accuracy": 0.87
+ },
+ "もみじ": {
+ "n": 68,
+ "found": {
+ "こうよう": 4,
+ "もみじ": 64,
+ "": 0
+ },
+ "accuracy": 0.941
+ },
+ "": {
+ "n": 0,
+ "found": {
+ "こうよう": 0,
+ "もみじ": 0,
+ "": 0
+ },
+ "accuracy": NaN
+ }
+ },
+ "accuracy": 0.895
+ },
+ "根本": {
+ "n": 481,
+ "readings": {
+ "こんぽん": {
+ "n": 433,
+ "found": {
+ "こんぽん": 433,
+ "ねもと": 0,
+ "": 0
+ },
+ "accuracy": 1.0
+ },
+ "ねもと": {
+ "n": 47,
+ "found": {
+ "こんぽん": 1,
+ "ねもと": 46,
+ "": 0
+ },
+ "accuracy": 0.979
+ },
+ "": {
+ "n": 1,
+ "found": {
+ "こんぽん": 0,
+ "ねもと": 0,
+ "": 1
+ },
+ "accuracy": 1.0
+ }
+ },
+ "accuracy": 0.998
+ },
+ "山陰": {
+ "n": 290,
+ "readings": {
+ "さんいん": {
+ "n": 279,
+ "found": {
+ "さんいん": 278,
+ "やまかげ": 1,
+ "": 0
+ },
+ "accuracy": 0.996
+ },
+ "やまかげ": {
+ "n": 11,
+ "found": {
+ "さんいん": 1,
+ "やまかげ": 10,
+ "": 0
+ },
+ "accuracy": 0.909
+ },
+ "": {
+ "n": 0,
+ "found": {
+ "さんいん": 0,
+ "やまかげ": 0,
+ "": 0
+ },
+ "accuracy": NaN
+ }
+ },
+ "accuracy": 0.993
+ },
+ "上下": {
+ "n": 265,
+ "readings": {
+ "じょうげ": {
+ "n": 227,
+ "found": {
+ "じょうげ": 222,
+ "うえした": 0,
+ "": 5
+ },
+ "accuracy": 0.978
+ },
+ "うえした": {
+ "n": 20,
+ "found": {
+ "じょうげ": 8,
+ "うえした": 12,
+ "": 0
+ },
+ "accuracy": 0.6
+ },
+ "": {
+ "n": 18,
+ "found": {
+ "じょうげ": 3,
+ "うえした": 0,
+ "": 15
+ },
+ "accuracy": 0.833
+ }
+ },
+ "accuracy": 0.94
+ },
+ "身体": {
+ "n": 3183,
+ "readings": {
+ "しんたい": {
+ "n": 2652,
+ "found": {
+ "しんたい": 2647,
+ "からだ": 5,
+ "": 0
+ },
+ "accuracy": 0.998
+ },
+ "からだ": {
+ "n": 531,
+ "found": {
+ "しんたい": 38,
+ "からだ": 493,
+ "": 0
+ },
+ "accuracy": 0.928
+ },
+ "": {
+ "n": 0,
+ "found": {
+ "しんたい": 0,
+ "からだ": 0,
+ "": 0
+ },
+ "accuracy": NaN
+ }
+ },
+ "accuracy": 0.986
+ },
+ "水面": {
+ "n": 273,
+ "readings": {
+ "すいめん": {
+ "n": 255,
+ "found": {
+ "すいめん": 255,
+ "みなも": 0,
+ "": 0
+ },
+ "accuracy": 1.0
+ },
+ "みなも": {
+ "n": 18,
+ "found": {
+ "すいめん": 17,
+ "みなも": 1,
+ "": 0
+ },
+ "accuracy": 0.056
+ },
+ "": {
+ "n": 0,
+ "found": {
+ "すいめん": 0,
+ "みなも": 0,
+ "": 0
+ },
+ "accuracy": NaN
+ }
+ },
+ "accuracy": 0.938
+ },
+ "世論": {
+ "n": 308,
+ "readings": {
+ "よろん": {
+ "n": 177,
+ "found": {
+ "よろん": 163,
+ "せろん": 14,
+ "": 0
+ },
+ "accuracy": 0.921
+ },
+ "せろん": {
+ "n": 131,
+ "found": {
+ "よろん": 60,
+ "せろん": 71,
+ "": 0
+ },
+ "accuracy": 0.542
+ },
+ "": {
+ "n": 0,
+ "found": {
+ "よろん": 0,
+ "せろん": 0,
+ "": 0
+ },
+ "accuracy": NaN
+ }
+ },
+ "accuracy": 0.76
+ },
+ "清水": {
+ "n": 506,
+ "readings": {
+ "しみず": {
+ "n": 485,
+ "found": {
+ "しみず": 473,
+ "きよみず": 12,
+ "": 0
+ },
+ "accuracy": 0.975
+ },
+ "きよみず": {
+ "n": 19,
+ "found": {
+ "しみず": 5,
+ "きよみず": 14,
+ "": 0
+ },
+ "accuracy": 0.737
+ },
+ "": {
+ "n": 2,
+ "found": {
+ "しみず": 2,
+ "きよみず": 0,
+ "": 0
+ },
+ "accuracy": 0.0
+ }
+ },
+ "accuracy": 0.962
+ },
+ "大手": {
+ "n": 1091,
+ "readings": {
+ "おおて": {
+ "n": 1076,
+ "found": {
+ "おおて": 1073,
+ "おおで": 3,
+ "": 0
+ },
+ "accuracy": 0.997
+ },
+ "おおで": {
+ "n": 15,
+ "found": {
+ "おおて": 0,
+ "おおで": 15,
+ "": 0
+ },
+ "accuracy": 1.0
+ },
+ "": {
+ "n": 0,
+ "found": {
+ "おおて": 0,
+ "おおで": 0,
+ "": 0
+ },
+ "accuracy": NaN
+ }
+ },
+ "accuracy": 0.997
+ },
+ "大人": {
+ "n": 1718,
+ "readings": {
+ "おとな": {
+ "n": 1681,
+ "found": {
+ "おとな": 1680,
+ "たいじん": 1,
+ "うし": 0,
+ "": 0
+ },
+ "accuracy": 0.999
+ },
+ "たいじん": {
+ "n": 21,
+ "found": {
+ "おとな": 4,
+ "たいじん": 17,
+ "うし": 0,
+ "": 0
+ },
+ "accuracy": 0.81
+ },
+ "うし": {
+ "n": 15,
+ "found": {
+ "おとな": 0,
+ "たいじん": 3,
+ "うし": 12,
+ "": 0
+ },
+ "accuracy": 0.8
+ },
+ "": {
+ "n": 1,
+ "found": {
+ "おとな": 0,
+ "たいじん": 0,
+ "うし": 0,
+ "": 1
+ },
+ "accuracy": 1.0
+ }
+ },
+ "accuracy": 0.995
+ },
+ "大勢": {
+ "n": 270,
+ "readings": {
+ "おおぜい": {
+ "n": 202,
+ "found": {
+ "おおぜい": 201,
+ "たいせい": 1,
+ "": 0
+ },
+ "accuracy": 0.995
+ },
+ "たいせい": {
+ "n": 67,
+ "found": {
+ "おおぜい": 2,
+ "たいせい": 65,
+ "": 0
+ },
+ "accuracy": 0.97
+ },
+ "": {
+ "n": 1,
+ "found": {
+ "おおぜい": 0,
+ "たいせい": 0,
+ "": 1
+ },
+ "accuracy": 1.0
+ }
+ },
+ "accuracy": 0.989
+ },
+ "中間": {
+ "n": 1872,
+ "readings": {
+ "ちゅうかん": {
+ "n": 1822,
+ "found": {
+ "ちゅうかん": 1821,
+ "ちゅうげん": 0,
+ "": 1
+ },
+ "accuracy": 0.999
+ },
+ "ちゅうげん": {
+ "n": 23,
+ "found": {
+ "ちゅうかん": 3,
+ "ちゅうげん": 20,
+ "": 0
+ },
+ "accuracy": 0.87
+ },
+ "": {
+ "n": 27,
+ "found": {
+ "ちゅうかん": 1,
+ "ちゅうげん": 0,
+ "": 26
+ },
+ "accuracy": 0.963
+ }
+ },
+ "accuracy": 0.997
+ },
+ "日向": {
+ "n": 171,
+ "readings": {
+ "ひゅうが": {
+ "n": 125,
+ "found": {
+ "ひゅうが": 122,
+ "ひなた": 3,
+ "": 0
+ },
+ "accuracy": 0.976
+ },
+ "ひなた": {
+ "n": 45,
+ "found": {
+ "ひゅうが": 2,
+ "ひなた": 43,
+ "": 0
+ },
+ "accuracy": 0.956
+ },
+ "": {
+ "n": 1,
+ "found": {
+ "ひゅうが": 1,
+ "ひなた": 0,
+ "": 0
+ },
+ "accuracy": 0.0
+ }
+ },
+ "accuracy": 0.965
+ },
+ "夫婦": {
+ "n": 1477,
+ "readings": {
+ "ふうふ": {
+ "n": 1392,
+ "found": {
+ "ふうふ": 1385,
+ "めおと": 7,
+ "": 0
+ },
+ "accuracy": 0.995
+ },
+ "めおと": {
+ "n": 83,
+ "found": {
+ "ふうふ": 12,
+ "めおと": 71,
+ "": 0
+ },
+ "accuracy": 0.855
+ },
+ "": {
+ "n": 2,
+ "found": {
+ "ふうふ": 2,
+ "めおと": 0,
+ "": 0
+ },
+ "accuracy": 0.0
+ }
+ },
+ "accuracy": 0.986
+ },
+ "牧場": {
+ "n": 301,
+ "readings": {
+ "ぼくじょう": {
+ "n": 275,
+ "found": {
+ "ぼくじょう": 273,
+ "まきば": 2,
+ "": 0
+ },
+ "accuracy": 0.993
+ },
+ "まきば": {
+ "n": 26,
+ "found": {
+ "ぼくじょう": 6,
+ "まきば": 20,
+ "": 0
+ },
+ "accuracy": 0.769
+ },
+ "": {
+ "n": 0,
+ "found": {
+ "ぼくじょう": 0,
+ "まきば": 0,
+ "": 0
+ },
+ "accuracy": NaN
+ }
+ },
+ "accuracy": 0.973
+ },
+ "末期": {
+ "n": 492,
+ "readings": {
+ "まっき": {
+ "n": 484,
+ "found": {
+ "まっき": 483,
+ "まつご": 1,
+ "": 0
+ },
+ "accuracy": 0.998
+ },
+ "まつご": {
+ "n": 8,
+ "found": {
+ "まっき": 0,
+ "まつご": 8,
+ "": 0
+ },
+ "accuracy": 1.0
+ },
+ "": {
+ "n": 0,
+ "found": {
+ "まっき": 0,
+ "まつご": 0,
+ "": 0
+ },
+ "accuracy": NaN
+ }
+ },
+ "accuracy": 0.998
+ },
+ "利益": {
+ "n": 1915,
+ "readings": {
+ "りえき": {
+ "n": 1897,
+ "found": {
+ "りえき": 1895,
+ "りやく": 2,
+ "": 0
+ },
+ "accuracy": 0.999
+ },
+ "りやく": {
+ "n": 18,
+ "found": {
+ "りえき": 0,
+ "りやく": 18,
+ "": 0
+ },
+ "accuracy": 1.0
+ },
+ "": {
+ "n": 0,
+ "found": {
+ "りえき": 0,
+ "りやく": 0,
+ "": 0
+ },
+ "accuracy": NaN
+ }
+ },
+ "accuracy": 0.999
+ },
+ "一味": {
+ "n": 82,
+ "readings": {
+ "いちみ": {
+ "n": 72,
+ "found": {
+ "いちみ": 70,
+ "ひとあじ": 2,
+ "": 0
+ },
+ "accuracy": 0.972
+ },
+ "ひとあじ": {
+ "n": 10,
+ "found": {
+ "いちみ": 1,
+ "ひとあじ": 9,
+ "": 0
+ },
+ "accuracy": 0.9
+ },
+ "