trainer: TransmomoTrainer K: 3 rotation_axes: &rotation_axes [0, 0, 1] # horizontal, depth, vertical body_reference: &body_reference True # if set True, will use spine as vertical axis # model options n_joints: 15 # number of body joints seq_len: 64 # length of motion sequence # logger options snapshot_save_iter: 20000 log_iter: 40 val_iter: 400 val_batches: 10 # optimization options max_iter: 200000 # maximum number of training iterations batch_size: 64 # batch size weight_decay: 0.0001 # weight decay beta1: 0.5 # Adam parameter beta2: 0.999 # Adam parameter init: kaiming # initialization [gaussian/kaiming/xavier/orthogonal] lr: 0.0002 # initial learning rate lr_policy: step # learning rate scheduler step_size: 20000 # how often to decay learning rate gamma: 0.5 # how much to decay learning rate trans_gan_w: 2 # weight of GAN loss trans_gan_ls_w: 0 # if set > 0, will treat limb-scaled data as "real" data recon_x_w: 10 # weight of reconstruction loss cross_x_w: 4 # weight of cross reconstruction loss inv_v_ls_w: 2 # weight of view invariance loss against limb scale inv_m_ls_w: 2 # weight of motion invariance loss against limb scale inv_b_trans_w: 2 # weight of body invariance loss against rotation inv_m_trans_w: 2 # weight of motion invariance loss against rotation triplet_b_w: 10 # weight of body triplet loss triplet_v_w: 10 # weight of view triplet loss triplet_margin: 0.2 # triplet loss: margin triplet_neg_range: [0.0, 0.5] # triplet loss: range of negative examples # network options autoencoder: cls: Autoencoder3f body_reference: *body_reference motion_encoder: cls: ConvEncoder channels: [30, 64, 128, 128] padding: 3 kernel_size: 8 conv_stride: 2 conv_pool: null body_encoder: cls: ConvEncoder channels: [28, 64, 128, 256] padding: 2 kernel_size: 7 conv_stride: 1 conv_pool: AvgPool1d global_pool: avg_pool1d view_encoder: cls: ConvEncoder channels: [28, 64, 32, 8] padding: 2 kernel_size: 7 conv_stride: 1 conv_pool: MaxPool1d global_pool: max_pool1d decoder: channels: [392, 256, 128, 45] kernel_size: 7 discriminator: encoder_cls: ConvEncoder gan_type: lsgan channels: [30, 64, 96, 128] padding: 3 kernel_size: 8 conv_stride: 2 conv_pool: null body_discriminator: gan_type: lsgan channels: [512, 128, 32] # data options data: train_cls: SoloDanceDataset eval_cls: MixamoDataset global_range: [0.5, 2.0] # limb scale: range of gamma_g local_range: [0.5, 2.0] # limb scale: range of the gammas rotation_axes: *rotation_axes unit: 128 train_dir: ./data/solo_dance/train test_dir: ./data/mixamo/36_800_24/test num_workers: 4 train_meanpose_path: ./data/mixamo/36_800_24/meanpose_with_view.npy train_stdpose_path: ./data/mixamo/36_800_24/stdpose_with_view.npy test_meanpose_path: ./data/mixamo/36_800_24/meanpose_with_view.npy test_stdpose_path: ./data/mixamo/36_800_24/stdpose_with_view.npy