{ | |
"adain": true, | |
"bottleneck_type": "rq", | |
"checkpointing": false, | |
"code_shape": [ | |
32, | |
32, | |
1 | |
], | |
"connect_list": [ | |
"32", | |
"64", | |
"128", | |
"256" | |
], | |
"ddconfig": { | |
"attn_resolutions": [ | |
32, | |
64, | |
128 | |
], | |
"ch": 64, | |
"ch_mult": [ | |
1, | |
2, | |
4, | |
4, | |
8 | |
], | |
"depths": [ | |
2, | |
2, | |
2, | |
2, | |
2 | |
], | |
"double_z": false, | |
"dropout": 0.0, | |
"in_channels": 3, | |
"num_frames": 3, | |
"num_head": 8, | |
"num_heads": [ | |
8, | |
8, | |
8, | |
8, | |
8 | |
], | |
"num_res_blocks": 1, | |
"out_ch": 3, | |
"resolution": 512, | |
"stages_atten": 4, | |
"window_size": [ | |
5, | |
5, | |
5 | |
], | |
"window_sizes": [ | |
[ | |
4, | |
4 | |
], | |
[ | |
4, | |
4 | |
], | |
[ | |
4, | |
4 | |
], | |
[ | |
4, | |
4 | |
], | |
[ | |
4, | |
4 | |
] | |
], | |
"z_channels": 256 | |
}, | |
"decay": 0.99, | |
"detach_16": true, | |
"dim_embd": 512, | |
"droprate": 0.0, | |
"embed_dim": 512, | |
"fix_modules": [ | |
"quantizer", | |
"decoder", | |
"conditionnet" | |
], | |
"latent_loss_weight": 0.25, | |
"latent_shape": [ | |
32, | |
32, | |
512 | |
], | |
"loss_type": "mse", | |
"n_embed": 1024, | |
"n_head": 8, | |
"n_layers": 9, | |
"restart_unused_codes": true, | |
"shared_codebook": true, | |
"tf": 3, | |
"type": "PGTFormer", | |
"w": 1 | |
} |