Spaces:
Build error
Build error
# config.py | |
import json | |
import logging.config | |
import sys | |
from pathlib import Path | |
import mlflow | |
from rich.logging import RichHandler | |
# Base and Config Directories | |
BASE_DIR = Path(__file__).parent.parent.absolute() | |
CONFIG_DIR = Path(BASE_DIR, "config") | |
# Data Directories | |
RAW_DATA_DIR = Path(BASE_DIR, "raw_data") | |
SENTENCE_DATA_DIR = Path(BASE_DIR, "sentence_data") | |
TRAIN_DATA_DIR = Path(SENTENCE_DATA_DIR, "train") | |
VAL_DATA_DIR = Path(SENTENCE_DATA_DIR, "val") | |
TEST_DATA_DIR = Path(SENTENCE_DATA_DIR, "test") | |
READING_DATA_DIR = Path(BASE_DIR, "reading_data") | |
# Logs Directory | |
LOGS_DIR = Path(BASE_DIR, "logs") | |
# Model Storage Directory | |
STORES_DIR = Path(BASE_DIR, "stores") | |
RUN_REGISTRY = Path(STORES_DIR, "runs") | |
# Create dirs | |
RAW_DATA_DIR.mkdir(parents=True, exist_ok=True) | |
SENTENCE_DATA_DIR.mkdir(parents=True, exist_ok=True) | |
TRAIN_DATA_DIR.mkdir(parents=True, exist_ok=True) | |
VAL_DATA_DIR.mkdir(parents=True, exist_ok=True) | |
TEST_DATA_DIR.mkdir(parents=True, exist_ok=True) | |
READING_DATA_DIR.mkdir(parents=True, exist_ok=True) | |
LOGS_DIR.mkdir(parents=True, exist_ok=True) | |
STORES_DIR.mkdir(parents=True, exist_ok=True) | |
RUN_REGISTRY.mkdir(parents=True, exist_ok=True) | |
# Special tokens reserved | |
ASCII_SPACE_TOKEN = "\U0000FFFF" # this is used to replace the usual space characters before sending text to mecab, because mecab uses the usual space to separate words. | |
# Seed | |
SEED = 1271297 | |
# Training parameters | |
TRAIN_SIZE = 0.7 | |
VAL_SIZE = 0.15 | |
TEST_SIZE = 0.15 | |
assert TRAIN_SIZE + VAL_SIZE + TEST_SIZE == 1 | |
# Heteronym list | |
with open(Path(CONFIG_DIR, "heteronyms.json")) as fp: | |
HETERONYMS = json.load(fp) | |
# MLFlow model registry | |
mlflow.set_tracking_uri("file://" + str(RUN_REGISTRY.absolute())) | |
# Logger | |
logging_config = { | |
"version": 1, | |
"disable_existing_loggers": False, | |
"formatters": { | |
"minimal": {"format": "%(message)s"}, | |
"detailed": { | |
"format": "%(levelname)s %(asctime)s [%(name)s:%(filename)s:%(funcName)s:%(lineno)d]\n%(message)s\n" | |
}, | |
}, | |
"handlers": { | |
"console": { | |
"class": "logging.StreamHandler", | |
"stream": sys.stdout, | |
"formatter": "minimal", | |
"level": logging.DEBUG, | |
}, | |
"info": { | |
"class": "logging.handlers.RotatingFileHandler", | |
"filename": Path(LOGS_DIR, "info.log"), | |
"maxBytes": 10485760, # 1 MB | |
"backupCount": 10, | |
"formatter": "detailed", | |
"level": logging.INFO, | |
}, | |
"error": { | |
"class": "logging.handlers.RotatingFileHandler", | |
"filename": Path(LOGS_DIR, "error.log"), | |
"maxBytes": 10485760, # 1 MB | |
"backupCount": 10, | |
"formatter": "detailed", | |
"level": logging.ERROR, | |
}, | |
}, | |
"root": { | |
"handlers": ["console", "info", "error"], | |
"level": logging.INFO, | |
"propagate": True, | |
}, | |
} | |
logging.config.dictConfig(logging_config) | |
logger = logging.getLogger() | |
logger.handlers[0] = RichHandler(markup=True) | |