chendl's picture
Add application file
0b7b08a
raw
history blame
1.21 kB
LAION_DATA=/gpfs/u/home/LMCG/LMCGljnn/scratch-shared/junyan/raw/karpathy_coco_wds_full_ground/{00000..00066}.tar
PILE_DATA=/gpfs/u/home/LMCG/LMCGljnn/scratch-shared/junyan/raw/the_pile/{00000..01925}.tar
SAVE_DIR=checkpoints_local/debug0922
mkdir -p ${SAVE_DIR}
cp $0 ${SAVE_DIR}/
export TRANSFORMERS_OFFLINE=1
torchrun --nnodes=1 --nproc_per_node=6 --master_port=14288 open_flamingo/train/train.py \
--run_name ${SAVE_DIR} \
--vision_encoder_path ViT-L-14 \
--vision_encoder_pretrained datacomp_xl_s13b_b90k \
--lm_path EleutherAI/pythia-1.4b \
--tokenizer_path EleutherAI/pythia-1.4b \
--dataset_resampled \
--laion_shards ${LAION_DATA} \
--pile_shards ${PILE_DATA} \
--batch_size_laion 14 \
--batch_size_pile 2 \
--workers=4 \
--lr_scheduler cosine \
--warmup_steps 200 \
--num_steps 4000 \
--checkpoint_activations \
--delete_previous_checkpoint \
--gradient_accumulation_steps 1 \
--save_interval 100 \
--logging_steps 2 \
--skip_delete_pattern 500 \
--precision amp_fp16 \
--learning_rate 1.0e-5 \
--add_visual_token \
--max-length 960 \
--loss_multiplier_det 0.025 \
--add_box \
--expand \
--use_format_v2 \
--resume_from_checkpoint checkpoints/091701_pythiaS_previsual_fix/checkpoint_20000.pt \
--restart