Spaces:
Running
on
Zero
Running
on
Zero
set -x | |
lpips_lambda=0.8 | |
image_size=128 | |
image_size_encoder=224 | |
patch_size=14 | |
batch_size=1 | |
num_samples=1 | |
dataset_name=ffhq | |
DATASET_FLAGS=" | |
--data_dir /mnt/yslan/datasets/cache/lmdb_debug/${dataset_name} \ | |
" | |
lr=2e-5 # for improved-diffusion unet | |
kl_lambda=0 | |
vit_lr=1e-5 # for improved-diffusion unet | |
encoder_lr=$vit_lr | |
vit_decoder_lr=$vit_lr | |
conv_lr=0.0005 | |
triplane_decoder_lr=$conv_lr | |
super_resolution_lr=$conv_lr | |
scale_clip_encoding=18.4 | |
triplane_scaling_divider=1 | |
CKPT_FLAGS=" | |
--resume_checkpoint checkpoints/ffhq/model_joint_denoise_rec_model1580000.pt \ | |
" | |
LR_FLAGS="--encoder_lr $encoder_lr \ | |
--vit_decoder_lr $vit_decoder_lr \ | |
--triplane_decoder_lr $triplane_decoder_lr \ | |
--super_resolution_lr $super_resolution_lr \ | |
--lr $lr" | |
TRAIN_FLAGS="--iterations 10001 --anneal_lr False \ | |
--batch_size $batch_size --save_interval 10000 \ | |
--image_size_encoder $image_size_encoder \ | |
--image_size $image_size \ | |
--dino_version v2 \ | |
--sr_training False \ | |
--cls_token False \ | |
--weight_decay 0.05 \ | |
--image_size $image_size \ | |
--kl_lambda ${kl_lambda} \ | |
--no_dim_up_mlp True \ | |
--uvit_skip_encoder True \ | |
--fg_mse True \ | |
--bg_lamdba 0.01 \ | |
" | |
# --vae_p 1 \ | |
DDPM_MODEL_FLAGS=" | |
--learn_sigma False \ | |
--num_heads 8 \ | |
--num_res_blocks 2 \ | |
--num_channels 320 \ | |
--attention_resolutions "4,2,1" \ | |
--use_spatial_transformer True \ | |
--transformer_depth 1 \ | |
--context_dim 768 \ | |
" | |
DIFFUSION_FLAGS="--diffusion_steps 1000 --noise_schedule linear \ | |
--use_kl False \ | |
--use_amp False \ | |
--triplane_scaling_divider ${triplane_scaling_divider} \ | |
--trainer_name vpsde_crossattn \ | |
--mixed_prediction True \ | |
--denoise_in_channels 12 \ | |
--denoise_out_channels 12 \ | |
--diffusion_input_size 32 \ | |
--p_rendering_loss False \ | |
--pred_type v \ | |
--predict_v True \ | |
" | |
DDIM_FLAGS=" | |
--timestep_respacing ddim250 \ | |
--use_ddim True \ | |
--unconditional_guidance_scale 6.5 \ | |
" | |
# not used here | |
CONTROL_FLAGS=" | |
--train_vae False \ | |
--create_controlnet False \ | |
--control_key img_sr \ | |
" | |
prompt="a middle aged woman with brown hair, wearing glasses." | |
logdir="./logs/LSGM/inference/t23d/${dataset_name}/crossattn-v1-ddim250/T23D_test/woman_glass-newcls" | |
SR_TRAIN_FLAGS_v1_2XC=" | |
--decoder_in_chans 32 \ | |
--out_chans 96 \ | |
--alpha_lambda 1 \ | |
--logdir $logdir \ | |
--arch_encoder vits \ | |
--arch_decoder vitb \ | |
--vit_decoder_wd 0.001 \ | |
--encoder_weight_decay 0.001 \ | |
--color_criterion mse \ | |
--triplane_in_chans 32 \ | |
--decoder_output_dim 32 \ | |
--ae_classname vit.vit_triplane.VAE_LDM_V4_vit3D_v3_conv3D_depth2_xformer_mha_PEinit_2d_sincos_uvit_RodinRollOutConv_4x4_lite_mlp_unshuffle_4XC_final \ | |
" | |
SR_TRAIN_FLAGS=${SR_TRAIN_FLAGS_v1_2XC} | |
NUM_GPUS=1 | |
rm -rf "$logdir"/runs | |
mkdir -p "$logdir"/ | |
cp "$0" "$logdir"/ | |
export OMP_NUM_THREADS=12 | |
export NCCL_ASYNC_ERROR_HANDLING=1 | |
export CUDA_VISIBLE_DEVICES=6 | |
torchrun --nproc_per_node=$NUM_GPUS \ | |
--master_port=0 \ | |
--rdzv_backend=c10d \ | |
--rdzv-endpoint=localhost:33385 \ | |
--nnodes 1 \ | |
scripts/vit_triplane_diffusion_sample.py \ | |
--num_workers 4 \ | |
--depth_lambda 0 \ | |
${TRAIN_FLAGS} \ | |
${SR_TRAIN_FLAGS} \ | |
${DIFFUSION_FLAGS} \ | |
${CONTROL_FLAGS} \ | |
${DDPM_MODEL_FLAGS} \ | |
${DATASET_FLAGS} \ | |
${CKPT_FLAGS} \ | |
${LR_FLAGS} \ | |
--lpips_lambda $lpips_lambda \ | |
--overfitting False \ | |
--load_pretrain_encoder True \ | |
--iterations 5000001 \ | |
--save_interval 10000 \ | |
--eval_interval 2500 \ | |
--decomposed True \ | |
--logdir $logdir \ | |
--cfg ffhq \ | |
--patch_size ${patch_size} \ | |
--eval_batch_size ${batch_size} \ | |
--prompt "$prompt" \ | |
--interval 5 \ | |
--save_img True \ | |
--num_samples ${num_samples} \ | |
--use_train_trajectory False \ | |
--normalize_clip_encoding True \ | |
--scale_clip_encoding ${scale_clip_encoding} \ | |
--overwrite_diff_inp_size 16 \ | |
--use_lmdb True \ | |
${DDIM_FLAGS} \ |