yomikata-demo / config /config.py
Sam Passaglia
initial commit
9aba307
raw
history blame
3.05 kB
# config.py
import json
import logging.config
import sys
from pathlib import Path
import mlflow
from rich.logging import RichHandler
# Base and Config Directories
BASE_DIR = Path(__file__).parent.parent.absolute()
CONFIG_DIR = Path(BASE_DIR, "config")
# Data Directories
RAW_DATA_DIR = Path(BASE_DIR, "raw_data")
SENTENCE_DATA_DIR = Path(BASE_DIR, "sentence_data")
TRAIN_DATA_DIR = Path(SENTENCE_DATA_DIR, "train")
VAL_DATA_DIR = Path(SENTENCE_DATA_DIR, "val")
TEST_DATA_DIR = Path(SENTENCE_DATA_DIR, "test")
READING_DATA_DIR = Path(BASE_DIR, "reading_data")
# Logs Directory
LOGS_DIR = Path(BASE_DIR, "logs")
# Model Storage Directory
STORES_DIR = Path(BASE_DIR, "stores")
RUN_REGISTRY = Path(STORES_DIR, "runs")
# Create dirs
RAW_DATA_DIR.mkdir(parents=True, exist_ok=True)
SENTENCE_DATA_DIR.mkdir(parents=True, exist_ok=True)
TRAIN_DATA_DIR.mkdir(parents=True, exist_ok=True)
VAL_DATA_DIR.mkdir(parents=True, exist_ok=True)
TEST_DATA_DIR.mkdir(parents=True, exist_ok=True)
READING_DATA_DIR.mkdir(parents=True, exist_ok=True)
LOGS_DIR.mkdir(parents=True, exist_ok=True)
STORES_DIR.mkdir(parents=True, exist_ok=True)
RUN_REGISTRY.mkdir(parents=True, exist_ok=True)
# Special tokens reserved
ASCII_SPACE_TOKEN = "\U0000FFFF" # this is used to replace the usual space characters before sending text to mecab, because mecab uses the usual space to separate words.
# Seed
SEED = 1271297
# Training parameters
TRAIN_SIZE = 0.7
VAL_SIZE = 0.15
TEST_SIZE = 0.15
assert TRAIN_SIZE + VAL_SIZE + TEST_SIZE == 1
# Heteronym list
with open(Path(CONFIG_DIR, "heteronyms.json")) as fp:
HETERONYMS = json.load(fp)
# MLFlow model registry
mlflow.set_tracking_uri("file://" + str(RUN_REGISTRY.absolute()))
# Logger
logging_config = {
"version": 1,
"disable_existing_loggers": False,
"formatters": {
"minimal": {"format": "%(message)s"},
"detailed": {
"format": "%(levelname)s %(asctime)s [%(name)s:%(filename)s:%(funcName)s:%(lineno)d]\n%(message)s\n"
},
},
"handlers": {
"console": {
"class": "logging.StreamHandler",
"stream": sys.stdout,
"formatter": "minimal",
"level": logging.DEBUG,
},
"info": {
"class": "logging.handlers.RotatingFileHandler",
"filename": Path(LOGS_DIR, "info.log"),
"maxBytes": 10485760, # 1 MB
"backupCount": 10,
"formatter": "detailed",
"level": logging.INFO,
},
"error": {
"class": "logging.handlers.RotatingFileHandler",
"filename": Path(LOGS_DIR, "error.log"),
"maxBytes": 10485760, # 1 MB
"backupCount": 10,
"formatter": "detailed",
"level": logging.ERROR,
},
},
"root": {
"handlers": ["console", "info", "error"],
"level": logging.INFO,
"propagate": True,
},
}
logging.config.dictConfig(logging_config)
logger = logging.getLogger()
logger.handlers[0] = RichHandler(markup=True)