File size: 3,340 Bytes
7ca9b42
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
trainer: TransmomoTrainer
K: 3
rotation_axes: &rotation_axes [0, 0, 1] # horizontal, depth, vertical
body_reference: &body_reference True    # if set True, will use spine as vertical axis

# model options
n_joints: 15                  # number of body joints
seq_len: 64                   # length of motion sequence

# logger options
snapshot_save_iter: 20000
log_iter: 40
val_iter: 400
val_batches: 10

# optimization options
max_iter: 200000              # maximum number of training iterations
batch_size: 64                # batch size
weight_decay: 0.0001          # weight decay
beta1: 0.5                    # Adam parameter
beta2: 0.999                  # Adam parameter
init: kaiming                 # initialization [gaussian/kaiming/xavier/orthogonal]
lr: 0.0002                    # initial learning rate
lr_policy: step               # learning rate scheduler
step_size: 20000              # how often to decay learning rate
gamma: 0.5                    # how much to decay learning rate

trans_gan_w: 2                # weight of GAN loss
trans_gan_ls_w: 0             # if set > 0, will treat limb-scaled data as "real" data
recon_x_w: 10                 # weight of reconstruction loss
cross_x_w: 4                  # weight of cross reconstruction loss
inv_v_ls_w: 2                 # weight of view invariance loss against limb scale
inv_m_ls_w: 2                 # weight of motion invariance loss against limb scale
inv_b_trans_w: 2              # weight of body invariance loss against rotation
inv_m_trans_w: 2              # weight of motion invariance loss against rotation

triplet_b_w: 10               # weight of body triplet loss
triplet_v_w: 10               # weight of view triplet loss
triplet_margin: 0.2           # triplet loss: margin
triplet_neg_range: [0.0, 0.5] # triplet loss: range of negative examples

# network options
autoencoder:
  cls: Autoencoder3f
  body_reference: *body_reference
  motion_encoder:
    cls: ConvEncoder
    channels: [30, 64, 128, 128]
    padding: 3
    kernel_size: 8
    conv_stride: 2
    conv_pool: null
  body_encoder:
    cls: ConvEncoder
    channels: [28, 64, 128, 256]
    padding: 2
    kernel_size: 7
    conv_stride: 1
    conv_pool: AvgPool1d
    global_pool: avg_pool1d
  view_encoder:
    cls: ConvEncoder
    channels: [28, 64, 32, 8]
    padding: 2
    kernel_size: 7
    conv_stride: 1
    conv_pool: MaxPool1d
    global_pool: max_pool1d
  decoder:
    channels: [392, 256, 128, 45]
    kernel_size: 7

discriminator:
  encoder_cls: ConvEncoder
  gan_type: lsgan
  channels: [30, 64, 96, 128]
  padding: 3
  kernel_size: 8
  conv_stride: 2
  conv_pool: null

body_discriminator:
  gan_type: lsgan
  channels: [512, 128, 32]

# data options
data:
  train_cls: SoloDanceDataset
  eval_cls: MixamoDataset
  global_range: [0.5, 2.0]         # limb scale: range of gamma_g
  local_range: [0.5, 2.0]          # limb scale: range of the gammas
  rotation_axes: *rotation_axes
  unit: 128
  train_dir: ./data/solo_dance/train
  test_dir: ./data/mixamo/36_800_24/test
  num_workers: 4
  train_meanpose_path: ./data/mixamo/36_800_24/meanpose_with_view.npy
  train_stdpose_path: ./data/mixamo/36_800_24/stdpose_with_view.npy
  test_meanpose_path: ./data/mixamo/36_800_24/meanpose_with_view.npy
  test_stdpose_path: ./data/mixamo/36_800_24/stdpose_with_view.npy