Spaces:
Running
on
A10G
Running
on
A10G
# GMFlow with refinement | |
# number of gpus for training, please set according to your hardware | |
# by default use all gpus on a machine | |
# can be trained on 4x 32G V100 or 4x 40GB A100 or 8x 16G V100 gpus | |
NUM_GPUS=4 | |
# chairs | |
CHECKPOINT_DIR=checkpoints/chairs-gmflow_with_refine && \ | |
mkdir -p ${CHECKPOINT_DIR} && \ | |
python -m torch.distributed.launch --nproc_per_node=${NUM_GPUS} --master_port=9989 main.py \ | |
--launcher pytorch \ | |
--checkpoint_dir ${CHECKPOINT_DIR} \ | |
--batch_size 16 \ | |
--val_dataset chairs sintel kitti \ | |
--lr 4e-4 \ | |
--image_size 384 512 \ | |
--padding_factor 32 \ | |
--upsample_factor 4 \ | |
--num_scales 2 \ | |
--attn_splits_list 2 8 \ | |
--corr_radius_list -1 4 \ | |
--prop_radius_list -1 1 \ | |
--with_speed_metric \ | |
--val_freq 10000 \ | |
--save_ckpt_freq 10000 \ | |
--num_steps 100000 \ | |
2>&1 | tee -a ${CHECKPOINT_DIR}/train.log | |
# things (our final model is trained for 800K iterations, for ablation study, you can train for 200K) | |
CHECKPOINT_DIR=checkpoints/things-gmflow_with_refine && \ | |
mkdir -p ${CHECKPOINT_DIR} && \ | |
python -m torch.distributed.launch --nproc_per_node=${NUM_GPUS} --master_port=9989 main.py \ | |
--launcher pytorch \ | |
--checkpoint_dir ${CHECKPOINT_DIR} \ | |
--resume checkpoints/chairs-gmflow_with_refine/step_100000.pth \ | |
--stage things \ | |
--batch_size 8 \ | |
--val_dataset things sintel kitti \ | |
--lr 2e-4 \ | |
--image_size 384 768 \ | |
--padding_factor 32 \ | |
--upsample_factor 4 \ | |
--num_scales 2 \ | |
--attn_splits_list 2 8 \ | |
--corr_radius_list -1 4 \ | |
--prop_radius_list -1 1 \ | |
--with_speed_metric \ | |
--val_freq 40000 \ | |
--save_ckpt_freq 50000 \ | |
--num_steps 800000 \ | |
2>&1 | tee -a ${CHECKPOINT_DIR}/train.log | |
# sintel | |
CHECKPOINT_DIR=checkpoints/sintel-gmflow_with_refine && \ | |
mkdir -p ${CHECKPOINT_DIR} && \ | |
python -m torch.distributed.launch --nproc_per_node=${NUM_GPUS} --master_port=9989 main.py \ | |
--launcher pytorch \ | |
--checkpoint_dir ${CHECKPOINT_DIR} \ | |
--resume checkpoints/things-gmflow_with_refine/step_800000.pth \ | |
--stage sintel \ | |
--batch_size 8 \ | |
--val_dataset sintel kitti \ | |
--lr 2e-4 \ | |
--image_size 320 896 \ | |
--padding_factor 32 \ | |
--upsample_factor 4 \ | |
--num_scales 2 \ | |
--attn_splits_list 2 8 \ | |
--corr_radius_list -1 4 \ | |
--prop_radius_list -1 1 \ | |
--with_speed_metric \ | |
--val_freq 20000 \ | |
--save_ckpt_freq 20000 \ | |
--num_steps 200000 \ | |
2>&1 | tee -a ${CHECKPOINT_DIR}/train.log | |
# kitti | |
CHECKPOINT_DIR=checkpoints/kitti-gmflow_with_refine && \ | |
mkdir -p ${CHECKPOINT_DIR} && \ | |
python -m torch.distributed.launch --nproc_per_node=${NUM_GPUS} --master_port=9989 main.py \ | |
--launcher pytorch \ | |
--checkpoint_dir ${CHECKPOINT_DIR} \ | |
--resume checkpoints/sintel-gmflow_with_refine/step_200000.pth \ | |
--stage kitti \ | |
--batch_size 8 \ | |
--val_dataset kitti \ | |
--lr 2e-4 \ | |
--image_size 320 1152 \ | |
--padding_factor 32 \ | |
--upsample_factor 4 \ | |
--num_scales 2 \ | |
--attn_splits_list 2 8 \ | |
--corr_radius_list -1 4 \ | |
--prop_radius_list -1 1 \ | |
--with_speed_metric \ | |
--val_freq 10000 \ | |
--save_ckpt_freq 10000 \ | |
--num_steps 100000 \ | |
2>&1 | tee -a ${CHECKPOINT_DIR}/train.log | |
# a final note: if your training is terminated unexpectedly, you can resume from the latest checkpoint | |
# an example: resume chairs training | |
# CHECKPOINT_DIR=checkpoints/chairs-gmflow_with_refine && \ | |
# mkdir -p ${CHECKPOINT_DIR} && \ | |
# python -m torch.distributed.launch --nproc_per_node=${NUM_GPUS} --master_port=9989 main.py \ | |
# --launcher pytorch \ | |
# --checkpoint_dir ${CHECKPOINT_DIR} \ | |
# --resume checkpoints/chairs-gmflow_with_refine/checkpoint_latest.pth \ | |
# --batch_size 16 \ | |
# --val_dataset chairs sintel kitti \ | |
# --lr 4e-4 \ | |
# --image_size 384 512 \ | |
# --padding_factor 32 \ | |
# --upsample_factor 4 \ | |
# --num_scales 2 \ | |
# --attn_splits_list 2 8 \ | |
# --corr_radius_list -1 4 \ | |
# --prop_radius_list -1 1 \ | |
# --with_speed_metric \ | |
# --val_freq 10000 \ | |
# --save_ckpt_freq 10000 \ | |
# --num_steps 100000 \ | |
# 2>&1 | tee -a ${CHECKPOINT_DIR}/train.log | |