pgtformer-base / config.json
kepeng's picture
Push model using huggingface_hub.
bc0b7e3 verified
{
"adain": true,
"bottleneck_type": "rq",
"checkpointing": false,
"code_shape": [
32,
32,
1
],
"connect_list": [
"32",
"64",
"128",
"256"
],
"ddconfig": {
"attn_resolutions": [
32,
64,
128
],
"ch": 64,
"ch_mult": [
1,
2,
4,
4,
8
],
"depths": [
2,
2,
2,
2,
2
],
"double_z": false,
"dropout": 0.0,
"in_channels": 3,
"num_frames": 3,
"num_head": 8,
"num_heads": [
8,
8,
8,
8,
8
],
"num_res_blocks": 1,
"out_ch": 3,
"resolution": 512,
"stages_atten": 4,
"window_size": [
5,
5,
5
],
"window_sizes": [
[
4,
4
],
[
4,
4
],
[
4,
4
],
[
4,
4
],
[
4,
4
]
],
"z_channels": 256
},
"decay": 0.99,
"detach_16": true,
"dim_embd": 512,
"droprate": 0.0,
"embed_dim": 512,
"fix_modules": [
"quantizer",
"decoder",
"conditionnet"
],
"latent_loss_weight": 0.25,
"latent_shape": [
32,
32,
512
],
"loss_type": "mse",
"n_embed": 1024,
"n_head": 8,
"n_layers": 9,
"restart_unused_codes": true,
"shared_codebook": true,
"tf": 3,
"type": "PGTFormer",
"w": 1
}