momo / configs /transmomo.yaml
Fazhong Liu
fin
7ca9b42
trainer: TransmomoTrainer
K: 3
rotation_axes: &rotation_axes [0, 0, 1] # horizontal, depth, vertical
body_reference: &body_reference True # if set True, will use spine as vertical axis
# model options
n_joints: 15 # number of body joints
seq_len: 64 # length of motion sequence
# logger options
snapshot_save_iter: 20000
log_iter: 40
val_iter: 400
val_batches: 10
# optimization options
max_iter: 200000 # maximum number of training iterations
batch_size: 64 # batch size
weight_decay: 0.0001 # weight decay
beta1: 0.5 # Adam parameter
beta2: 0.999 # Adam parameter
init: kaiming # initialization [gaussian/kaiming/xavier/orthogonal]
lr: 0.0002 # initial learning rate
lr_policy: step # learning rate scheduler
step_size: 20000 # how often to decay learning rate
gamma: 0.5 # how much to decay learning rate
trans_gan_w: 2 # weight of GAN loss
trans_gan_ls_w: 0 # if set > 0, will treat limb-scaled data as "real" data
recon_x_w: 10 # weight of reconstruction loss
cross_x_w: 4 # weight of cross reconstruction loss
inv_v_ls_w: 2 # weight of view invariance loss against limb scale
inv_m_ls_w: 2 # weight of motion invariance loss against limb scale
inv_b_trans_w: 2 # weight of body invariance loss against rotation
inv_m_trans_w: 2 # weight of motion invariance loss against rotation
triplet_b_w: 10 # weight of body triplet loss
triplet_v_w: 10 # weight of view triplet loss
triplet_margin: 0.2 # triplet loss: margin
triplet_neg_range: [0.0, 0.5] # triplet loss: range of negative examples
# network options
autoencoder:
cls: Autoencoder3f
body_reference: *body_reference
motion_encoder:
cls: ConvEncoder
channels: [30, 64, 128, 128]
padding: 3
kernel_size: 8
conv_stride: 2
conv_pool: null
body_encoder:
cls: ConvEncoder
channels: [28, 64, 128, 256]
padding: 2
kernel_size: 7
conv_stride: 1
conv_pool: AvgPool1d
global_pool: avg_pool1d
view_encoder:
cls: ConvEncoder
channels: [28, 64, 32, 8]
padding: 2
kernel_size: 7
conv_stride: 1
conv_pool: MaxPool1d
global_pool: max_pool1d
decoder:
channels: [392, 256, 128, 45]
kernel_size: 7
discriminator:
encoder_cls: ConvEncoder
gan_type: lsgan
channels: [30, 64, 96, 128]
padding: 3
kernel_size: 8
conv_stride: 2
conv_pool: null
body_discriminator:
gan_type: lsgan
channels: [512, 128, 32]
# data options
data:
train_cls: MixamoLimbScaleDataset
eval_cls: MixamoDataset
global_range: [0.5, 2.0] # limb scale: range of gamma_g
local_range: [0.5, 2.0] # limb scale: range of the gammas
rotation_axes: *rotation_axes
unit: 128
# train_dir: ./data/mixamo/36_800_24/train
# test_dir: ./data/mixamo/36_800_24/test
num_workers: 4
train_meanpose_path: ./data/meanpose_with_view.npy
train_stdpose_path: ./data/stdpose_with_view.npy
test_meanpose_path: ./data/meanpose_with_view.npy
test_stdpose_path: ./data/stdpose_with_view.npy