LAION_DATA=/gpfs/u/home/LMCG/LMCGljnn/scratch-shared/junyan/raw/karpathy_coco_wds_full_ground/{00000..00066}.tar PILE_DATA=/gpfs/u/home/LMCG/LMCGljnn/scratch-shared/junyan/raw/the_pile/{00000..01925}.tar SAVE_DIR=checkpoints_local/debug0922 mkdir -p ${SAVE_DIR} cp $0 ${SAVE_DIR}/ export TRANSFORMERS_OFFLINE=1 torchrun --nnodes=1 --nproc_per_node=6 --master_port=14288 open_flamingo/train/train.py \ --run_name ${SAVE_DIR} \ --vision_encoder_path ViT-L-14 \ --vision_encoder_pretrained datacomp_xl_s13b_b90k \ --lm_path EleutherAI/pythia-1.4b \ --tokenizer_path EleutherAI/pythia-1.4b \ --dataset_resampled \ --laion_shards ${LAION_DATA} \ --pile_shards ${PILE_DATA} \ --batch_size_laion 14 \ --batch_size_pile 2 \ --workers=4 \ --lr_scheduler cosine \ --warmup_steps 200 \ --num_steps 4000 \ --checkpoint_activations \ --delete_previous_checkpoint \ --gradient_accumulation_steps 1 \ --save_interval 100 \ --logging_steps 2 \ --skip_delete_pattern 500 \ --precision amp_fp16 \ --learning_rate 1.0e-5 \ --add_visual_token \ --max-length 960 \ --loss_multiplier_det 0.025 \ --add_box \ --expand \ --use_format_v2 \ --resume_from_checkpoint checkpoints/091701_pythiaS_previsual_fix/checkpoint_20000.pt \ --restart