File size: 3,820 Bytes
1bef79d |
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 |
auto_scale_lr = dict(base_batch_size=4096)
data_preprocessor = dict(
mean=[
123.675,
116.28,
103.53,
],
non_blocking=True,
std=[
58.395,
57.12,
57.375,
],
to_rgb=True,
type='SelfSupDataPreprocessor')
data_root = '/workdir/ILSVRC2012/'
dataset_type = 'ImageNet'
default_hooks = dict(
checkpoint=dict(interval=1, max_keep_ckpts=3, type='CheckpointHook'),
logger=dict(interval=20, type='LoggerHook'),
param_scheduler=dict(type='ParamSchedulerHook'),
sampler_seed=dict(type='DistSamplerSeedHook'),
timer=dict(type='IterTimerHook'),
visualization=dict(enable=False, type='VisualizationHook'))
default_scope = 'mmpretrain'
env_cfg = dict(
cudnn_benchmark=True,
dist_cfg=dict(backend='nccl'),
mp_cfg=dict(mp_start_method='spawn', opencv_num_threads=0))
launcher = 'pytorch'
load_from = None
log_level = 'INFO'
model = dict(
backbone=dict(arch='b', mask_ratio=0.75, patch_size=16, type='MAELLaMA'),
head=dict(
loss=dict(criterion='L2', type='PixelReconstructionLoss'),
norm_pix=True,
patch_size=16,
type='MAEPretrainHead'),
init_cfg=[
dict(distribution='uniform', layer='Linear', type='Xavier'),
dict(bias=0.0, layer='LayerNorm', type='Constant', val=1.0),
],
neck=dict(
decoder_depth=8,
decoder_embed_dim=512,
decoder_num_heads=16,
embed_dim=768,
in_chans=3,
mlp_ratio=4.0,
patch_size=16,
type='MAEPretrainDecoder'),
type='MAE')
optim_wrapper = dict(
loss_scale='dynamic',
optimizer=dict(
betas=(
0.9,
0.95,
), lr=0.0024, type='AdamW', weight_decay=0.05),
paramwise_cfg=dict(
custom_keys=dict(
bias=dict(decay_mult=0.0),
cls_token=dict(decay_mult=0.0),
ln=dict(decay_mult=0.0),
mask_token=dict(decay_mult=0.0),
pos_embed=dict(decay_mult=0.0))),
type='AmpOptimWrapper')
param_scheduler = [
dict(
begin=0,
by_epoch=True,
convert_to_iter_based=True,
end=40,
start_factor=0.0001,
type='LinearLR'),
dict(
T_max=1560,
begin=40,
by_epoch=True,
convert_to_iter_based=True,
end=1600,
type='CosineAnnealingLR'),
]
randomness = dict(deterministic=False, diff_rank_seed=True, seed=0)
resume = True
train_cfg = dict(max_epochs=1600, type='EpochBasedTrainLoop')
train_dataloader = dict(
batch_size=512,
collate_fn=dict(type='default_collate'),
dataset=dict(
data_root='/workdir/ILSVRC2012/',
pipeline=[
dict(type='LoadImageFromFile'),
dict(
backend='pillow',
crop_ratio_range=(
0.2,
1.0,
),
interpolation='bicubic',
scale=224,
type='RandomResizedCrop'),
dict(prob=0.5, type='RandomFlip'),
dict(type='PackInputs'),
],
split='train',
type='ImageNet'),
num_workers=8,
persistent_workers=True,
pin_memory=True,
sampler=dict(shuffle=True, type='DefaultSampler'))
train_pipeline = [
dict(type='LoadImageFromFile'),
dict(
backend='pillow',
crop_ratio_range=(
0.2,
1.0,
),
interpolation='bicubic',
scale=224,
type='RandomResizedCrop'),
dict(prob=0.5, type='RandomFlip'),
dict(type='PackInputs'),
]
vis_backends = [
dict(type='LocalVisBackend'),
]
visualizer = dict(
type='UniversalVisualizer', vis_backends=[
dict(type='LocalVisBackend'),
])
work_dir = './work_dirs/mae_lama-base-p16_8xb512-amp-coslr-1600e_in1k'
|