Spaces:

passaglia
/

yomikata-demo

Build error

File size: 2,941 Bytes

9aba307

# config.py

import json
import logging.config
import sys
from pathlib import Path

from rich.logging import RichHandler

# Base and Config Directories
BASE_DIR = Path(__file__).parent.parent.absolute()
CONFIG_DIR = Path(BASE_DIR, "config")

# Data Directories
RAW_DATA_DIR = Path(BASE_DIR, "raw_data")
SENTENCE_DATA_DIR = Path(BASE_DIR, "sentence_data")
TRAIN_DATA_DIR = Path(SENTENCE_DATA_DIR, "train")
VAL_DATA_DIR = Path(SENTENCE_DATA_DIR, "val")
TEST_DATA_DIR = Path(SENTENCE_DATA_DIR, "test")
READING_DATA_DIR = Path(BASE_DIR, "reading_data")

# Logs Directory
LOGS_DIR = Path(BASE_DIR, "logs")

# Model Storage Directory
STORES_DIR = Path(BASE_DIR, "stores")
RUN_REGISTRY = Path(STORES_DIR, "runs")

# Create dirs
RAW_DATA_DIR.mkdir(parents=True, exist_ok=True)
SENTENCE_DATA_DIR.mkdir(parents=True, exist_ok=True)
TRAIN_DATA_DIR.mkdir(parents=True, exist_ok=True)
VAL_DATA_DIR.mkdir(parents=True, exist_ok=True)
TEST_DATA_DIR.mkdir(parents=True, exist_ok=True)
READING_DATA_DIR.mkdir(parents=True, exist_ok=True)
LOGS_DIR.mkdir(parents=True, exist_ok=True)
STORES_DIR.mkdir(parents=True, exist_ok=True)
RUN_REGISTRY.mkdir(parents=True, exist_ok=True)

# Special tokens reserved
ASCII_SPACE_TOKEN = "\U0000FFFF"  # this is used to replace the usual space characters before sending text to mecab, because mecab uses the usual space to separate words.

# Seed
SEED = 1271297

# Training parameters
TRAIN_SIZE = 0.7
VAL_SIZE = 0.15
TEST_SIZE = 0.15
assert TRAIN_SIZE + VAL_SIZE + TEST_SIZE == 1

# Heteronym list
with open(Path(CONFIG_DIR, "heteronyms.json")) as fp:
    HETERONYMS = json.load(fp)

# Logger
logging_config = {
    "version": 1,
    "disable_existing_loggers": False,
    "formatters": {
        "minimal": {"format": "%(message)s"},
        "detailed": {
            "format": "%(levelname)s %(asctime)s [%(name)s:%(filename)s:%(funcName)s:%(lineno)d]\n%(message)s\n"
        },
    },
    "handlers": {
        "console": {
            "class": "logging.StreamHandler",
            "stream": sys.stdout,
            "formatter": "minimal",
            "level": logging.DEBUG,
        },
        "info": {
            "class": "logging.handlers.RotatingFileHandler",
            "filename": Path(LOGS_DIR, "info.log"),
            "maxBytes": 10485760,  # 1 MB
            "backupCount": 10,
            "formatter": "detailed",
            "level": logging.INFO,
        },
        "error": {
            "class": "logging.handlers.RotatingFileHandler",
            "filename": Path(LOGS_DIR, "error.log"),
            "maxBytes": 10485760,  # 1 MB
            "backupCount": 10,
            "formatter": "detailed",
            "level": logging.ERROR,
        },
    },
    "root": {
        "handlers": ["console", "info", "error"],
        "level": logging.INFO,
        "propagate": True,
    },
}
logging.config.dictConfig(logging_config)
logger = logging.getLogger()
logger.handlers[0] = RichHandler(markup=True)