|
auto_scale_lr = dict(base_batch_size=4096) |
|
data_preprocessor = dict( |
|
mean=[ |
|
123.675, |
|
116.28, |
|
103.53, |
|
], |
|
non_blocking=True, |
|
std=[ |
|
58.395, |
|
57.12, |
|
57.375, |
|
], |
|
to_rgb=True, |
|
type='SelfSupDataPreprocessor') |
|
data_root = '/workdir/ILSVRC2012/' |
|
dataset_type = 'ImageNet' |
|
default_hooks = dict( |
|
checkpoint=dict(interval=1, max_keep_ckpts=2, type='CheckpointHook'), |
|
logger=dict(interval=20, type='LoggerHook'), |
|
param_scheduler=dict(type='ParamSchedulerHook'), |
|
sampler_seed=dict(type='DistSamplerSeedHook'), |
|
timer=dict(type='IterTimerHook'), |
|
visualization=dict(enable=False, type='VisualizationHook')) |
|
default_scope = 'mmpretrain' |
|
env_cfg = dict( |
|
cudnn_benchmark=True, |
|
dist_cfg=dict(backend='nccl'), |
|
mp_cfg=dict(mp_start_method='spawn', opencv_num_threads=0)) |
|
launcher = 'pytorch' |
|
load_from = None |
|
log_level = 'INFO' |
|
model = dict( |
|
backbone=dict(arch='l', mask_ratio=0.75, patch_size=16, type='MAELLaMA'), |
|
head=dict( |
|
loss=dict(criterion='L2', type='PixelReconstructionLoss'), |
|
norm_pix=True, |
|
patch_size=16, |
|
type='MAEPretrainHead'), |
|
init_cfg=[ |
|
dict(distribution='uniform', layer='Linear', type='Xavier'), |
|
dict(bias=0.0, layer='LayerNorm', type='Constant', val=1.0), |
|
], |
|
neck=dict( |
|
decoder_depth=8, |
|
decoder_embed_dim=512, |
|
decoder_num_heads=16, |
|
embed_dim=1024, |
|
in_chans=3, |
|
mlp_ratio=4.0, |
|
patch_size=16, |
|
type='MAEPretrainDecoder'), |
|
type='MAE') |
|
optim_wrapper = dict( |
|
loss_scale='dynamic', |
|
optimizer=dict( |
|
betas=( |
|
0.9, |
|
0.95, |
|
), lr=0.0024, type='AdamW', weight_decay=0.05), |
|
paramwise_cfg=dict( |
|
custom_keys=dict( |
|
bias=dict(decay_mult=0.0), |
|
cls_token=dict(decay_mult=0.0), |
|
ln=dict(decay_mult=0.0), |
|
mask_token=dict(decay_mult=0.0), |
|
pos_embed=dict(decay_mult=0.0))), |
|
type='AmpOptimWrapper') |
|
param_scheduler = [ |
|
dict( |
|
begin=0, |
|
by_epoch=True, |
|
convert_to_iter_based=True, |
|
end=40, |
|
start_factor=1e-09, |
|
type='LinearLR'), |
|
dict( |
|
T_max=760, |
|
begin=40, |
|
by_epoch=True, |
|
convert_to_iter_based=True, |
|
end=800, |
|
type='CosineAnnealingLR'), |
|
] |
|
randomness = dict(deterministic=False, diff_rank_seed=True, seed=0) |
|
resume = True |
|
train_cfg = dict(max_epochs=800, type='EpochBasedTrainLoop') |
|
train_dataloader = dict( |
|
batch_size=256, |
|
collate_fn=dict(type='default_collate'), |
|
dataset=dict( |
|
data_root='/workdir/ILSVRC2012/', |
|
pipeline=[ |
|
dict(type='LoadImageFromFile'), |
|
dict( |
|
backend='pillow', |
|
crop_ratio_range=( |
|
0.2, |
|
1.0, |
|
), |
|
interpolation='bicubic', |
|
scale=224, |
|
type='RandomResizedCrop'), |
|
dict(prob=0.5, type='RandomFlip'), |
|
dict(type='PackInputs'), |
|
], |
|
split='train', |
|
type='ImageNet'), |
|
num_workers=8, |
|
persistent_workers=True, |
|
pin_memory=True, |
|
sampler=dict(shuffle=True, type='DefaultSampler')) |
|
train_pipeline = [ |
|
dict(type='LoadImageFromFile'), |
|
dict( |
|
backend='pillow', |
|
crop_ratio_range=( |
|
0.2, |
|
1.0, |
|
), |
|
interpolation='bicubic', |
|
scale=224, |
|
type='RandomResizedCrop'), |
|
dict(prob=0.5, type='RandomFlip'), |
|
dict(type='PackInputs'), |
|
] |
|
vis_backends = [ |
|
dict(type='LocalVisBackend'), |
|
] |
|
visualizer = dict( |
|
type='UniversalVisualizer', vis_backends=[ |
|
dict(type='LocalVisBackend'), |
|
]) |
|
work_dir = './work_dirs/mae_lama-large-p16_8xb512-amp-coslr-800e_in1k' |
|
|