| set -x |
|
|
| lpips_lambda=0.8 |
|
|
| image_size=128 |
| image_size_encoder=224 |
|
|
| patch_size=14 |
|
|
| batch_size=1 |
| num_samples=1 |
|
|
| dataset_name=ffhq |
|
|
|
|
| DATASET_FLAGS=" |
| --data_dir /mnt/yslan/datasets/cache/lmdb_debug/${dataset_name} \ |
| " |
|
|
| lr=2e-5 |
| kl_lambda=0 |
| vit_lr=1e-5 |
|
|
| encoder_lr=$vit_lr |
| vit_decoder_lr=$vit_lr |
| conv_lr=0.0005 |
| triplane_decoder_lr=$conv_lr |
| super_resolution_lr=$conv_lr |
|
|
| scale_clip_encoding=18.4 |
| triplane_scaling_divider=1 |
|
|
| CKPT_FLAGS=" |
| --resume_checkpoint checkpoints/ffhq/model_joint_denoise_rec_model1580000.pt \ |
| " |
|
|
| LR_FLAGS="--encoder_lr $encoder_lr \ |
| --vit_decoder_lr $vit_decoder_lr \ |
| --triplane_decoder_lr $triplane_decoder_lr \ |
| --super_resolution_lr $super_resolution_lr \ |
| --lr $lr" |
|
|
| TRAIN_FLAGS="--iterations 10001 --anneal_lr False \ |
| --batch_size $batch_size --save_interval 10000 \ |
| --image_size_encoder $image_size_encoder \ |
| --image_size $image_size \ |
| --dino_version v2 \ |
| --sr_training False \ |
| --cls_token False \ |
| --weight_decay 0.05 \ |
| --image_size $image_size \ |
| --kl_lambda ${kl_lambda} \ |
| --no_dim_up_mlp True \ |
| --uvit_skip_encoder True \ |
| --fg_mse True \ |
| --bg_lamdba 0.01 \ |
| " |
| |
|
|
|
|
| DDPM_MODEL_FLAGS=" |
| --learn_sigma False \ |
| --num_heads 8 \ |
| --num_res_blocks 2 \ |
| --num_channels 320 \ |
| --attention_resolutions "4,2,1" \ |
| --use_spatial_transformer True \ |
| --transformer_depth 1 \ |
| --context_dim 768 \ |
| " |
|
|
|
|
| DIFFUSION_FLAGS="--diffusion_steps 1000 --noise_schedule linear \ |
| --use_kl False \ |
| --use_amp False \ |
| --triplane_scaling_divider ${triplane_scaling_divider} \ |
| --trainer_name vpsde_crossattn \ |
| --mixed_prediction True \ |
| --denoise_in_channels 12 \ |
| --denoise_out_channels 12 \ |
| --diffusion_input_size 32 \ |
| --p_rendering_loss False \ |
| --pred_type v \ |
| --predict_v True \ |
| " |
|
|
| DDIM_FLAGS=" |
| --timestep_respacing ddim250 \ |
| --use_ddim True \ |
| --unconditional_guidance_scale 6.5 \ |
| " |
|
|
| |
| CONTROL_FLAGS=" |
| --train_vae False \ |
| --create_controlnet False \ |
| --control_key img_sr \ |
| " |
|
|
| prompt="a middle aged woman with brown hair, wearing glasses." |
|
|
| logdir="./logs/LSGM/inference/t23d/${dataset_name}/crossattn-v1-ddim250/T23D_test/woman_glass-newcls" |
|
|
| SR_TRAIN_FLAGS_v1_2XC=" |
| --decoder_in_chans 32 \ |
| --out_chans 96 \ |
| --alpha_lambda 1 \ |
| --logdir $logdir \ |
| --arch_encoder vits \ |
| --arch_decoder vitb \ |
| --vit_decoder_wd 0.001 \ |
| --encoder_weight_decay 0.001 \ |
| --color_criterion mse \ |
| --triplane_in_chans 32 \ |
| --decoder_output_dim 32 \ |
| --ae_classname vit.vit_triplane.VAE_LDM_V4_vit3D_v3_conv3D_depth2_xformer_mha_PEinit_2d_sincos_uvit_RodinRollOutConv_4x4_lite_mlp_unshuffle_4XC_final \ |
| " |
|
|
| SR_TRAIN_FLAGS=${SR_TRAIN_FLAGS_v1_2XC} |
|
|
| NUM_GPUS=1 |
|
|
| rm -rf "$logdir"/runs |
| mkdir -p "$logdir"/ |
| cp "$0" "$logdir"/ |
|
|
| export OMP_NUM_THREADS=12 |
| export NCCL_ASYNC_ERROR_HANDLING=1 |
| export CUDA_VISIBLE_DEVICES=6 |
|
|
| torchrun --nproc_per_node=$NUM_GPUS \ |
| --master_port=0 \ |
| --rdzv_backend=c10d \ |
| --rdzv-endpoint=localhost:33385 \ |
| --nnodes 1 \ |
| scripts/vit_triplane_diffusion_sample.py \ |
| --num_workers 4 \ |
| --depth_lambda 0 \ |
| ${TRAIN_FLAGS} \ |
| ${SR_TRAIN_FLAGS} \ |
| ${DIFFUSION_FLAGS} \ |
| ${CONTROL_FLAGS} \ |
| ${DDPM_MODEL_FLAGS} \ |
| ${DATASET_FLAGS} \ |
| ${CKPT_FLAGS} \ |
| ${LR_FLAGS} \ |
| --lpips_lambda $lpips_lambda \ |
| --overfitting False \ |
| --load_pretrain_encoder True \ |
| --iterations 5000001 \ |
| --save_interval 10000 \ |
| --eval_interval 2500 \ |
| --decomposed True \ |
| --logdir $logdir \ |
| --cfg ffhq \ |
| --patch_size ${patch_size} \ |
| --eval_batch_size ${batch_size} \ |
| --prompt "$prompt" \ |
| --interval 5 \ |
| --save_img True \ |
| --num_samples ${num_samples} \ |
| --use_train_trajectory False \ |
| --normalize_clip_encoding True \ |
| --scale_clip_encoding ${scale_clip_encoding} \ |
| --overwrite_diff_inp_size 16 \ |
| --use_lmdb True \ |
| ${DDIM_FLAGS} \ |