|
import argparse |
|
import os |
|
import torch |
|
import pytorch_lightning as ptl |
|
from pytorch_lightning.loggers import TensorBoardLogger |
|
|
|
from detector.data import FontDataModule |
|
from detector.model import * |
|
from utils import get_current_tag |
|
|
|
|
|
parser = argparse.ArgumentParser() |
|
parser.add_argument( |
|
"-d", |
|
"--devices", |
|
nargs="*", |
|
type=int, |
|
default=[0], |
|
help="GPU devices to use (default: [0])", |
|
) |
|
parser.add_argument( |
|
"-b", |
|
"--single-batch-size", |
|
type=int, |
|
default=64, |
|
help="Batch size of single device (default: 64)", |
|
) |
|
parser.add_argument( |
|
"-c", |
|
"--checkpoint", |
|
type=str, |
|
default=None, |
|
help="Trainer checkpoint path (default: None)", |
|
) |
|
parser.add_argument( |
|
"-m", |
|
"--model", |
|
type=str, |
|
default="resnet18", |
|
choices=["resnet18", "resnet34", "resnet50", "resnet101", "deepfont"], |
|
help="Model to use (default: resnet18)", |
|
) |
|
parser.add_argument( |
|
"-p", |
|
"--pretrained", |
|
action="store_true", |
|
help="Use pretrained model for ResNet (default: False)", |
|
) |
|
parser.add_argument( |
|
"-i", |
|
"--crop-roi-bbox", |
|
action="store_true", |
|
help="Crop ROI bounding box (default: False)", |
|
) |
|
parser.add_argument( |
|
"-a", |
|
"--augmentation", |
|
type=str, |
|
default=None, |
|
choices=["v1", "v2", "v3"], |
|
help="Augmentation strategy to use (default: None)", |
|
) |
|
parser.add_argument( |
|
"-l", |
|
"--lr", |
|
type=float, |
|
default=0.0001, |
|
help="Learning rate (default: 0.0001)", |
|
) |
|
parser.add_argument( |
|
"-s", |
|
"--datasets", |
|
nargs="*", |
|
type=str, |
|
default=["./dataset/font_img"], |
|
help="Datasets paths, seperated by space (default: ['./dataset/font_img'])", |
|
) |
|
parser.add_argument( |
|
"-n", |
|
"--model-name", |
|
type=str, |
|
default=None, |
|
help="Model name (default: current tag)", |
|
) |
|
parser.add_argument( |
|
"-f", |
|
"--font-classification-only", |
|
action="store_true", |
|
help="Font classification only (default: False)", |
|
) |
|
parser.add_argument( |
|
"-z", |
|
"--size", |
|
type=int, |
|
default=512, |
|
help="Model feature image input size (default: 512)", |
|
) |
|
parser.add_argument( |
|
"-t", |
|
"--tensor-core", |
|
type=str, |
|
choices=["medium", "high", "heighest"], |
|
default="high", |
|
help="Tensor core precision (default: high)", |
|
) |
|
parser.add_argument( |
|
"-r", |
|
"--preserve-aspect-ratio-by-random-crop", |
|
action="store_true", |
|
help="Preserve aspect ratio (default: False)", |
|
) |
|
|
|
args = parser.parse_args() |
|
|
|
torch.set_float32_matmul_precision(args.tensor_core) |
|
|
|
devices = args.devices |
|
single_batch_size = args.single_batch_size |
|
|
|
total_num_workers = os.cpu_count() |
|
single_device_num_workers = total_num_workers // len(devices) |
|
|
|
config.INPUT_SIZE = args.size |
|
|
|
if os.name == "nt": |
|
single_device_num_workers = 0 |
|
|
|
lr = args.lr |
|
b1 = 0.9 |
|
b2 = 0.999 |
|
|
|
lambda_font = 2.0 |
|
lambda_direction = 0.5 |
|
lambda_regression = 1.0 |
|
|
|
regression_use_tanh = False |
|
|
|
num_warmup_epochs = 5 |
|
num_epochs = 100 |
|
|
|
log_every_n_steps = 100 |
|
|
|
num_device = len(devices) |
|
|
|
data_module = FontDataModule( |
|
train_paths=[os.path.join(path, "train") for path in args.datasets], |
|
val_paths=[os.path.join(path, "val") for path in args.datasets], |
|
test_paths=[os.path.join(path, "test") for path in args.datasets], |
|
batch_size=single_batch_size, |
|
num_workers=single_device_num_workers, |
|
pin_memory=True, |
|
train_shuffle=True, |
|
val_shuffle=False, |
|
test_shuffle=False, |
|
regression_use_tanh=regression_use_tanh, |
|
train_transforms=args.augmentation, |
|
crop_roi_bbox=args.crop_roi_bbox, |
|
preserve_aspect_ratio_by_random_crop=args.preserve_aspect_ratio_by_random_crop, |
|
) |
|
|
|
num_iters = data_module.get_train_num_iter(num_device) * num_epochs |
|
num_warmup_iter = data_module.get_train_num_iter(num_device) * num_warmup_epochs |
|
|
|
model_name = get_current_tag() if args.model_name is None else args.model_name |
|
|
|
logger_unconditioned = TensorBoardLogger( |
|
save_dir=os.getcwd(), name="tensorboard", version=model_name |
|
) |
|
|
|
strategy = "auto" if num_device == 1 else "ddp" |
|
|
|
trainer = ptl.Trainer( |
|
max_epochs=num_epochs, |
|
logger=logger_unconditioned, |
|
devices=devices, |
|
accelerator="gpu", |
|
enable_checkpointing=True, |
|
log_every_n_steps=log_every_n_steps, |
|
strategy=strategy, |
|
deterministic=True, |
|
) |
|
|
|
if args.model == "resnet18": |
|
model = ResNet18Regressor( |
|
pretrained=args.pretrained, regression_use_tanh=regression_use_tanh |
|
) |
|
elif args.model == "resnet34": |
|
model = ResNet34Regressor( |
|
pretrained=args.pretrained, regression_use_tanh=regression_use_tanh |
|
) |
|
elif args.model == "resnet50": |
|
model = ResNet50Regressor( |
|
pretrained=args.pretrained, regression_use_tanh=regression_use_tanh |
|
) |
|
elif args.model == "resnet101": |
|
model = ResNet101Regressor( |
|
pretrained=args.pretrained, regression_use_tanh=regression_use_tanh |
|
) |
|
elif args.model == "deepfont": |
|
assert args.pretrained is False |
|
assert args.size == 105 |
|
assert args.font_classification_only is True |
|
model = DeepFontBaseline() |
|
else: |
|
raise NotImplementedError() |
|
|
|
if torch.__version__ >= "2.0" and os.name == "posix": |
|
model = torch.compile(model) |
|
|
|
detector = FontDetector( |
|
model=model, |
|
lambda_font=lambda_font, |
|
lambda_direction=lambda_direction, |
|
lambda_regression=lambda_regression, |
|
font_classification_only=args.font_classification_only, |
|
lr=lr, |
|
betas=(b1, b2), |
|
num_warmup_iters=num_warmup_iter, |
|
num_iters=num_iters, |
|
num_epochs=num_epochs, |
|
) |
|
|
|
trainer.fit(detector, datamodule=data_module, ckpt_path=args.checkpoint) |
|
trainer.test(detector, datamodule=data_module) |
|
|