Spaces:

faiimea
/

momo

Sleeping

App Files Files Community

Fazhong Liu commited on Apr 2

Commit

7ca9b42

•

1 Parent(s): a4660a7

fin

Browse files

Files changed (33) hide show

.gitattributes +35 -35
.gitignore +1 -0
README.md +4 -4
app.py +114 -0
configs/transmomo.yaml +100 -0
configs/transmomo_solo_dance.yaml +100 -0
data/meanpose_with_view.npy +0 -0
data/mse_description.json +1 -0
data/stdpose_with_view.npy +0 -0
lib/__init__.py +0 -0
lib/__pycache__/__init__.cpython-38.pyc +0 -0
lib/__pycache__/data.cpython-38.pyc +0 -0
lib/__pycache__/network.cpython-38.pyc +0 -0
lib/__pycache__/operation.cpython-38.pyc +0 -0
lib/data.py +421 -0
lib/loss.py +57 -0
lib/network.py +356 -0
lib/operation.py +219 -0
lib/trainer.py +298 -0
lib/util/__init__.py +0 -0
lib/util/__pycache__/__init__.cpython-37.pyc +0 -0
lib/util/__pycache__/__init__.cpython-38.pyc +0 -0
lib/util/__pycache__/general.cpython-37.pyc +0 -0
lib/util/__pycache__/general.cpython-38.pyc +0 -0
lib/util/__pycache__/motion.cpython-37.pyc +0 -0
lib/util/__pycache__/motion.cpython-38.pyc +0 -0
lib/util/__pycache__/visualization.cpython-37.pyc +0 -0
lib/util/__pycache__/visualization.cpython-38.pyc +0 -0
lib/util/general.py +361 -0
lib/util/global_norm.py +29 -0
lib/util/motion.py +309 -0
lib/util/visualization.py +448 -0
requirements.txt +17 -0

.gitattributes CHANGED Viewed

@@ -1,35 +1,35 @@
-*.7z filter=lfs diff=lfs merge=lfs -text
-*.arrow filter=lfs diff=lfs merge=lfs -text
-*.bin filter=lfs diff=lfs merge=lfs -text
-*.bz2 filter=lfs diff=lfs merge=lfs -text
-*.ckpt filter=lfs diff=lfs merge=lfs -text
-*.ftz filter=lfs diff=lfs merge=lfs -text
-*.gz filter=lfs diff=lfs merge=lfs -text
-*.h5 filter=lfs diff=lfs merge=lfs -text
-*.joblib filter=lfs diff=lfs merge=lfs -text
-*.lfs.* filter=lfs diff=lfs merge=lfs -text
-*.mlmodel filter=lfs diff=lfs merge=lfs -text
-*.model filter=lfs diff=lfs merge=lfs -text
-*.msgpack filter=lfs diff=lfs merge=lfs -text
-*.npy filter=lfs diff=lfs merge=lfs -text
-*.npz filter=lfs diff=lfs merge=lfs -text
-*.onnx filter=lfs diff=lfs merge=lfs -text
-*.ot filter=lfs diff=lfs merge=lfs -text
-*.parquet filter=lfs diff=lfs merge=lfs -text
-*.pb filter=lfs diff=lfs merge=lfs -text
-*.pickle filter=lfs diff=lfs merge=lfs -text
-*.pkl filter=lfs diff=lfs merge=lfs -text
-*.pt filter=lfs diff=lfs merge=lfs -text
-*.pth filter=lfs diff=lfs merge=lfs -text
-*.rar filter=lfs diff=lfs merge=lfs -text
-*.safetensors filter=lfs diff=lfs merge=lfs -text
-saved_model/**/* filter=lfs diff=lfs merge=lfs -text
-*.tar.* filter=lfs diff=lfs merge=lfs -text
-*.tar filter=lfs diff=lfs merge=lfs -text
-*.tflite filter=lfs diff=lfs merge=lfs -text
-*.tgz filter=lfs diff=lfs merge=lfs -text
-*.wasm filter=lfs diff=lfs merge=lfs -text
-*.xz filter=lfs diff=lfs merge=lfs -text
-*.zip filter=lfs diff=lfs merge=lfs -text
-*.zst filter=lfs diff=lfs merge=lfs -text
-*tfevents* filter=lfs diff=lfs merge=lfs -text

+# *.7z filter=lfs diff=lfs merge=lfs -text
+# *.arrow filter=lfs diff=lfs merge=lfs -text
+# *.bin filter=lfs diff=lfs merge=lfs -text
+# *.bz2 filter=lfs diff=lfs merge=lfs -text
+# *.ckpt filter=lfs diff=lfs merge=lfs -text
+# *.ftz filter=lfs diff=lfs merge=lfs -text
+# *.gz filter=lfs diff=lfs merge=lfs -text
+# *.h5 filter=lfs diff=lfs merge=lfs -text
+# *.joblib filter=lfs diff=lfs merge=lfs -text
+# *.lfs.* filter=lfs diff=lfs merge=lfs -text
+# *.mlmodel filter=lfs diff=lfs merge=lfs -text
+# *.model filter=lfs diff=lfs merge=lfs -text
+# *.msgpack filter=lfs diff=lfs merge=lfs -text
+# *.npy filter=lfs diff=lfs merge=lfs -text
+# *.npz filter=lfs diff=lfs merge=lfs -text
+# *.onnx filter=lfs diff=lfs merge=lfs -text
+# *.ot filter=lfs diff=lfs merge=lfs -text
+# *.parquet filter=lfs diff=lfs merge=lfs -text
+# *.pb filter=lfs diff=lfs merge=lfs -text
+# *.pickle filter=lfs diff=lfs merge=lfs -text
+# *.pkl filter=lfs diff=lfs merge=lfs -text
+# *.pt filter=lfs diff=lfs merge=lfs -text
+# *.pth filter=lfs diff=lfs merge=lfs -text
+# *.rar filter=lfs diff=lfs merge=lfs -text
+# *.safetensors filter=lfs diff=lfs merge=lfs -text
+# saved_model/**/* filter=lfs diff=lfs merge=lfs -text
+# *.tar.* filter=lfs diff=lfs merge=lfs -text
+# *.tar filter=lfs diff=lfs merge=lfs -text
+# *.tflite filter=lfs diff=lfs merge=lfs -text
+# *.tgz filter=lfs diff=lfs merge=lfs -text
+# *.wasm filter=lfs diff=lfs merge=lfs -text
+# *.xz filter=lfs diff=lfs merge=lfs -text
+# *.zip filter=lfs diff=lfs merge=lfs -text
+# *.zst filter=lfs diff=lfs merge=lfs -text
+# *tfevents* filter=lfs diff=lfs merge=lfs -text

.gitignore ADDED Viewed

	@@ -0,0 +1 @@


1	+ *.pt

README.md CHANGED Viewed

@@ -1,8 +1,8 @@
 ---
-title: Momo
-emoji: 📊
-colorFrom: gray
-colorTo: purple
 sdk: gradio
 sdk_version: 4.24.0
 app_file: app.py

 ---
+title: Transmomo
+emoji: 📈
+colorFrom: indigo
+colorTo: red
 sdk: gradio
 sdk_version: 4.24.0
 app_file: app.py

app.py ADDED Viewed

	@@ -0,0 +1,114 @@

+import time
+import shutil
+import gradio as gr
+import os
+import json
+import torch
+import argparse
+import numpy as np
+from lib.data import get_meanpose
+from lib.network import get_autoencoder
+from lib.util.motion import preprocess_mixamo, preprocess_test, postprocess
+from lib.util.general import get_config
+from lib.operation import rotate_and_maybe_project_world
+from itertools import combinations
+from lib.util.visualization import motion2video
+from PIL import Image
+def load_and_preprocess(path, config, mean_pose, std_pose):
+    motion3d = np.load(path)
+    # length must be multiples of 8 due to the size of convolution
+    _, _, T = motion3d.shape
+    T = (T // 8) * 8
+    motion3d = motion3d[:, :, :T]
+    # project to 2d
+    motion_proj = motion3d[:, [0, 2], :]
+    # reformat for mixamo data
+    motion_proj = preprocess_mixamo(motion_proj, unit=1.0)
+    # preprocess for network input
+    motion_proj, start = preprocess_test(motion_proj, mean_pose, std_pose, config.data.unit)
+    motion_proj = motion_proj.reshape((-1, motion_proj.shape[-1]))
+    motion_proj = torch.from_numpy(motion_proj).float()
+    return motion_proj, start
+def handle_motion_generation(npy1,npy2):
+    path1 = './data/a.npy'
+    path2 = './data/b.npy'
+    np.save(path1,npy1)
+    np.save(path2,npy2)
+    config_path = './configs/transmomo.yaml'  # 替换为您的配置文件路径
+    description_path = "./data/mse_description.json"
+    checkpoint_path = './data/autoencoder_00200000.pt'
+    out_dir_path = './output'  # 替换为输出目录的路径
+    config = get_config(config_path)
+    ae = get_autoencoder(config)
+    ae.load_state_dict(torch.load(checkpoint_path))
+    ae.cuda()
+    ae.eval()
+    mean_pose, std_pose = get_meanpose("test", config.data)
+    # print("loaded model")
+    description = json.load(open(description_path))
+    chars = list(description.keys())
+    os.makedirs(out_dir_path, exist_ok=True)
+    # path1 = '/home/fazhong/studio/transmomo.pytorch/data/mixamo/36_800_24/test/PUMPKINHULK_L/Back_Squat/motions/2.npy'
+    # path2 = '/home/fazhong/studio/transmomo.pytorch/data/mixamo/36_800_24/test/PUMPKINHULK_L/Golf_Post_Shot/motions/3.npy'
+    out_path1 = os.path.join(out_dir_path, "adv.npy")
+    x_a, x_a_start = load_and_preprocess(path1, config, mean_pose, std_pose)
+    x_b, x_b_start = load_and_preprocess(path2, config, mean_pose, std_pose)
+    x_a_batch = x_a.unsqueeze(0).cuda()
+    x_b_batch = x_b.unsqueeze(0).cuda()
+    x_ab = ae.cross2d(x_a_batch, x_b_batch, x_a_batch)
+    x_ab = postprocess(x_ab, mean_pose, std_pose, config.data.unit, start=x_a_start)
+    np.save(out_path1, x_ab)
+    motion_data = x_ab
+    height = 512  # 视频的高度
+    width = 512   # 视频的宽度
+    save_path = './an.mp4'  # 保存视频的路径
+    colors = [(255, 0, 0), (0, 255, 0), (0, 0, 255)]  # 关节颜色
+    bg_color = (255, 255, 255)  # 背景颜色
+    fps = 25  # 视频的帧率
+    # print(motion_data.shape)
+    # 调用函数生成视频
+    motion2video(motion_data, height, width, save_path, colors, bg_color=bg_color, transparency=False, fps=fps)
+    first_frame_image = Image.open('./an-frames/0000.png')
+    return first_frame_image
+    # print('hi')
+with gr.Blocks() as demo:
+    gr.Markdown("Upload two `.npy` files to generate motion and visualize the first frame of the output animation.")
+    with gr.Row():
+        file1 = gr.File(file_types=[".npy"], label="Upload first .npy file")
+        file2 = gr.File(file_types=[".npy"], label="Upload second .npy file")
+    with gr.Row():
+        generate_btn = gr.Button("Generate Motion")
+    output_image = gr.Image(label="First Frame of the Generated Animation")
+    generate_btn.click(
+        fn=handle_motion_generation,
+        inputs=[file1, file2],
+        outputs=output_image
+    )
+if __name__ == "__main__":
+    # tsp_page.launch(debug = True)
+    demo.launch()

configs/transmomo.yaml ADDED Viewed

	@@ -0,0 +1,100 @@

+trainer: TransmomoTrainer
+K: 3
+rotation_axes: &rotation_axes [0, 0, 1] # horizontal, depth, vertical
+body_reference: &body_reference True    # if set True, will use spine as vertical axis
+# model options
+n_joints: 15                  # number of body joints
+seq_len: 64                   # length of motion sequence
+# logger options
+snapshot_save_iter: 20000
+log_iter: 40
+val_iter: 400
+val_batches: 10
+# optimization options
+max_iter: 200000              # maximum number of training iterations
+batch_size: 64                # batch size
+weight_decay: 0.0001          # weight decay
+beta1: 0.5                    # Adam parameter
+beta2: 0.999                  # Adam parameter
+init: kaiming                 # initialization [gaussian/kaiming/xavier/orthogonal]
+lr: 0.0002                    # initial learning rate
+lr_policy: step               # learning rate scheduler
+step_size: 20000              # how often to decay learning rate
+gamma: 0.5                    # how much to decay learning rate
+trans_gan_w: 2                # weight of GAN loss
+trans_gan_ls_w: 0             # if set > 0, will treat limb-scaled data as "real" data
+recon_x_w: 10                 # weight of reconstruction loss
+cross_x_w: 4                  # weight of cross reconstruction loss
+inv_v_ls_w: 2                 # weight of view invariance loss against limb scale
+inv_m_ls_w: 2                 # weight of motion invariance loss against limb scale
+inv_b_trans_w: 2              # weight of body invariance loss against rotation
+inv_m_trans_w: 2              # weight of motion invariance loss against rotation
+triplet_b_w: 10               # weight of body triplet loss
+triplet_v_w: 10               # weight of view triplet loss
+triplet_margin: 0.2           # triplet loss: margin
+triplet_neg_range: [0.0, 0.5] # triplet loss: range of negative examples
+# network options
+autoencoder:
+  cls: Autoencoder3f
+  body_reference: *body_reference
+  motion_encoder:
+    cls: ConvEncoder
+    channels: [30, 64, 128, 128]
+    padding: 3
+    kernel_size: 8
+    conv_stride: 2
+    conv_pool: null
+  body_encoder:
+    cls: ConvEncoder
+    channels: [28, 64, 128, 256]
+    padding: 2
+    kernel_size: 7
+    conv_stride: 1
+    conv_pool: AvgPool1d
+    global_pool: avg_pool1d
+  view_encoder:
+    cls: ConvEncoder
+    channels: [28, 64, 32, 8]
+    padding: 2
+    kernel_size: 7
+    conv_stride: 1
+    conv_pool: MaxPool1d
+    global_pool: max_pool1d
+  decoder:
+    channels: [392, 256, 128, 45]
+    kernel_size: 7
+discriminator:
+  encoder_cls: ConvEncoder
+  gan_type: lsgan
+  channels: [30, 64, 96, 128]
+  padding: 3
+  kernel_size: 8
+  conv_stride: 2
+  conv_pool: null
+body_discriminator:
+  gan_type: lsgan
+  channels: [512, 128, 32]
+# data options
+data:
+  train_cls: MixamoLimbScaleDataset
+  eval_cls: MixamoDataset
+  global_range: [0.5, 2.0]         # limb scale: range of gamma_g
+  local_range: [0.5, 2.0]          # limb scale: range of the gammas
+  rotation_axes: *rotation_axes
+  unit: 128
+  # train_dir: ./data/mixamo/36_800_24/train
+  # test_dir: ./data/mixamo/36_800_24/test
+  num_workers: 4
+  train_meanpose_path: ./data/meanpose_with_view.npy
+  train_stdpose_path: ./data/stdpose_with_view.npy
+  test_meanpose_path: ./data/meanpose_with_view.npy
+  test_stdpose_path: ./data/stdpose_with_view.npy

configs/transmomo_solo_dance.yaml ADDED Viewed

	@@ -0,0 +1,100 @@

+trainer: TransmomoTrainer
+K: 3
+rotation_axes: &rotation_axes [0, 0, 1] # horizontal, depth, vertical
+body_reference: &body_reference True    # if set True, will use spine as vertical axis
+# model options
+n_joints: 15                  # number of body joints
+seq_len: 64                   # length of motion sequence
+# logger options
+snapshot_save_iter: 20000
+log_iter: 40
+val_iter: 400
+val_batches: 10
+# optimization options
+max_iter: 200000              # maximum number of training iterations
+batch_size: 64                # batch size
+weight_decay: 0.0001          # weight decay
+beta1: 0.5                    # Adam parameter
+beta2: 0.999                  # Adam parameter
+init: kaiming                 # initialization [gaussian/kaiming/xavier/orthogonal]
+lr: 0.0002                    # initial learning rate
+lr_policy: step               # learning rate scheduler
+step_size: 20000              # how often to decay learning rate
+gamma: 0.5                    # how much to decay learning rate
+trans_gan_w: 2                # weight of GAN loss
+trans_gan_ls_w: 0             # if set > 0, will treat limb-scaled data as "real" data
+recon_x_w: 10                 # weight of reconstruction loss
+cross_x_w: 4                  # weight of cross reconstruction loss
+inv_v_ls_w: 2                 # weight of view invariance loss against limb scale
+inv_m_ls_w: 2                 # weight of motion invariance loss against limb scale
+inv_b_trans_w: 2              # weight of body invariance loss against rotation
+inv_m_trans_w: 2              # weight of motion invariance loss against rotation
+triplet_b_w: 10               # weight of body triplet loss
+triplet_v_w: 10               # weight of view triplet loss
+triplet_margin: 0.2           # triplet loss: margin
+triplet_neg_range: [0.0, 0.5] # triplet loss: range of negative examples
+# network options
+autoencoder:
+  cls: Autoencoder3f
+  body_reference: *body_reference
+  motion_encoder:
+    cls: ConvEncoder
+    channels: [30, 64, 128, 128]
+    padding: 3
+    kernel_size: 8
+    conv_stride: 2
+    conv_pool: null
+  body_encoder:
+    cls: ConvEncoder
+    channels: [28, 64, 128, 256]
+    padding: 2
+    kernel_size: 7
+    conv_stride: 1
+    conv_pool: AvgPool1d
+    global_pool: avg_pool1d
+  view_encoder:
+    cls: ConvEncoder
+    channels: [28, 64, 32, 8]
+    padding: 2
+    kernel_size: 7
+    conv_stride: 1
+    conv_pool: MaxPool1d
+    global_pool: max_pool1d
+  decoder:
+    channels: [392, 256, 128, 45]
+    kernel_size: 7
+discriminator:
+  encoder_cls: ConvEncoder
+  gan_type: lsgan
+  channels: [30, 64, 96, 128]
+  padding: 3
+  kernel_size: 8
+  conv_stride: 2
+  conv_pool: null
+body_discriminator:
+  gan_type: lsgan
+  channels: [512, 128, 32]
+# data options
+data:
+  train_cls: SoloDanceDataset
+  eval_cls: MixamoDataset
+  global_range: [0.5, 2.0]         # limb scale: range of gamma_g
+  local_range: [0.5, 2.0]          # limb scale: range of the gammas
+  rotation_axes: *rotation_axes
+  unit: 128
+  train_dir: ./data/solo_dance/train
+  test_dir: ./data/mixamo/36_800_24/test
+  num_workers: 4
+  train_meanpose_path: ./data/mixamo/36_800_24/meanpose_with_view.npy
+  train_stdpose_path: ./data/mixamo/36_800_24/stdpose_with_view.npy
+  test_meanpose_path: ./data/mixamo/36_800_24/meanpose_with_view.npy
+  test_stdpose_path: ./data/mixamo/36_800_24/stdpose_with_view.npy

data/meanpose_with_view.npy ADDED Viewed

Binary file (488 Bytes). View file

data/mse_description.json ADDED Viewed

	@@ -0,0 +1 @@

+ {"ANDROMEDA": ["Goalkeeper_Directing_(1)", "Standing_Aim_Idle_02_Looking", "Pilot_Flips_Switches_(1)", "Running_Tired", "Slide_Hip_Hop_Dance", "Military_Signaling_(3)", "Aim_Pistol", "Standing_Idle_(1)", "Drinking_Fountain", "Golf_Putt_Failure", "Standing_Torch_Burn_Webs", "Back_Squat", "Baseball_Pitching", "Dancing_Twerk", "Superhuman_Choke_Lift", "Zombie_Crawl"], "PUMPKINHULK_L": ["Front_Raises", "Rifle_Idle", "Defender", "Talking_Phone_Pacing", "Sitting_Clap_(2)", "Standing_Clap", "Samba_Dancing_(6)", "Golf_Bad_Shot_(1)", "Golf_Putt_Victory", "Grab_Rifle_And_Put_Back", "Salsa_Dancing_(4)", "Military_Signaling_(2)", "Robot_Hip_Hop_Dance", "Speedbag", "Jog_In_Circle", "Looking_Around"], "SPORTY_GRANY": ["Standing_Torch_Idle_04", "Look_Around_(1)", "Happy", "Standing_Torch_Inspect_Downward", "Quarterback_Pass", "Zombie_Stand_Up_(2)", "Struck_In_Head", "Shooting_Gun", "Zombie_Transition", "Hostage_Situation_Idle_-_Hostage", "Jazz_Dancing_(2)", "Samba_Dancing_(5)", "Knocked_Out_(1)", "Being_Electrocuted", "Falling_From_Losing_Balance", "Pulling_A_Rope"], "TY": ["Golf_Pre-Putt_(1)", "Back_Flip_To_Uppercut", "Standing_Torch_Idle_03", "Tonic_Seizure", "Talking_At_Watercooler", "Sitting_Clap", "Shuffling", "Tender_Placement_(1)", "Helping_Out", "Northern_Soul_Spin_Combo", "Golf_Post_Shot", "Salsa_Dancing_(3)", "Sword_And_Shield_Idle_(1)", "Hip_Hop_Dancing_(1)", "Golf_Tee_Up_(1)"]}

data/stdpose_with_view.npy ADDED Viewed

Binary file (488 Bytes). View file

lib/__init__.py ADDED Viewed

File without changes

lib/__pycache__/__init__.cpython-38.pyc ADDED Viewed

Binary file (145 Bytes). View file

lib/__pycache__/data.cpython-38.pyc ADDED Viewed

Binary file (12.6 kB). View file

lib/__pycache__/network.cpython-38.pyc ADDED Viewed

Binary file (9.88 kB). View file

lib/__pycache__/operation.cpython-38.pyc ADDED Viewed

Binary file (6.16 kB). View file

lib/data.py ADDED Viewed

	@@ -0,0 +1,421 @@

+import sys, os
+thismodule = sys.modules[__name__]
+from lib.util.motion import preprocess_mixamo, rotate_motion_3d, limb_scale_motion_2d, normalize_motion, get_change_of_basis, localize_motion, scale_limbs
+import torch
+import glob
+import numpy as np
+import random
+from torch.utils.data import Dataset, DataLoader
+from easydict import EasyDict as edict
+from tqdm import tqdm
+view_angles = np.array([ i * np.pi / 6 for i in range(-3, 4)])
+def get_dataloader(phase, config):
+    config.data.batch_size = config.batch_size
+    config.data.seq_len = config.seq_len
+    dataset_cls_name = config.data.train_cls if phase == 'train' else config.data.eval_cls
+    dataset_cls = getattr(thismodule, dataset_cls_name)
+    dataset = dataset_cls(phase, config.data)
+    dataloader = DataLoader(dataset, shuffle=(phase=='train'),
+                            batch_size=config.batch_size,
+                            num_workers=(config.data.num_workers if phase == 'train' else 1),
+                            worker_init_fn=lambda _: np.random.seed(),
+                            drop_last=True)
+    return dataloader
+class _MixamoDatasetBase(Dataset):
+    def __init__(self, phase, config):
+        super(_MixamoDatasetBase, self).__init__()
+        assert phase in ['train', 'test']
+        self.phase = phase
+        self.data_root = config.train_dir if phase=='train' else config.test_dir
+        self.meanpose_path = config.train_meanpose_path if phase=='train' else config.test_meanpose_path
+        self.stdpose_path = config.train_stdpose_path if phase=='train' else config.test_stdpose_path
+        self.unit = config.unit
+        self.aug = (phase == 'train')
+        self.character_names = sorted(os.listdir(self.data_root))
+        items = glob.glob(os.path.join(self.data_root, self.character_names[0], '*/motions/*.npy'))
+        self.motion_names = ['/'.join(x.split('/')[-3:]) for x in items]
+        self.meanpose, self.stdpose = get_meanpose(phase, config)
+        self.meanpose = self.meanpose.astype(np.float32)
+        self.stdpose = self.stdpose.astype(np.float32)
+        if 'preload' in config and config.preload:
+            self.preload()
+            self.cached = True
+        else:
+            self.cached = False
+    def build_item(self, mot_name, char_name):
+        """
+        :param mot_name: animation_name/motions/xxx.npy
+        :param char_name: character_name
+        :return:
+        """
+        return os.path.join(self.data_root, char_name, mot_name)
+    def load_item(self, item):
+        if self.cached:
+            data = self.cache[item]
+        else:
+            data = np.load(item)
+        return data
+    def preload(self):
+        print("pre-loading into memory")
+        pbar = tqdm(total=len(self))
+        self.cache = {}
+        for motion_name in self.motion_names:
+            for character_name in self.character_names:
+                item = self.build_item(motion_name, character_name)
+                motion3d = np.load(item)
+                self.cache[item] = motion3d
+                pbar.update(1)
+    @staticmethod
+    def gen_aug_params(rotate=False):
+        if rotate:
+            params = {'ratio': np.random.uniform(0.8, 1.2),
+                    'roll': np.random.uniform((-np.pi / 9, -np.pi / 9, -np.pi / 6), (np.pi / 9, np.pi / 9, np.pi / 6))}
+        else:
+            params = {'ratio': np.random.uniform(0.5, 1.5)}
+        return edict(params)
+    @staticmethod
+    def augmentation(data, params=None):
+        """
+        :param data: numpy array of size (joints, 3, len_frames)
+        :return:
+        """
+        if params is None:
+            return data, params
+        # rotate
+        if 'roll' in params.keys():
+            cx, cy, cz = np.cos(params.roll)
+            sx, sy, sz = np.sin(params.roll)
+            mat33_x = np.array([
+                [1, 0, 0],
+                [0, cx, -sx],
+                [0, sx, cx]
+            ], dtype='float')
+            mat33_y = np.array([
+                [cy, 0, sy],
+                [0, 1, 0],
+                [-sy, 0, cy]
+            ], dtype='float')
+            mat33_z = np.array([
+                [cz, -sz, 0],
+                [sz, cz, 0],
+                [0, 0, 1]
+            ], dtype='float')
+            data = mat33_x @ mat33_y @ mat33_z @ data
+        # scale
+        if 'ratio' in params.keys():
+            data = data * params.ratio
+        return data, params
+    def __getitem__(self, index):
+        raise NotImplementedError
+    def __len__(self):
+        return len(self.motion_names) * len(self.character_names)
+def get_meanpose(phase, config):
+    meanpose_path = config.train_meanpose_path if phase == "train" else config.test_meanpose_path
+    stdpose_path = config.train_stdpose_path if phase == "train" else config.test_stdpose_path
+    if os.path.exists(meanpose_path) and os.path.exists(stdpose_path):
+        meanpose = np.load(meanpose_path)
+        stdpose = np.load(stdpose_path)
+    else:
+        meanpose, stdpose = gen_meanpose(phase, config)
+        np.save(meanpose_path, meanpose)
+        np.save(stdpose_path, stdpose)
+        print("meanpose saved at {}".format(meanpose_path))
+        print("stdpose saved at {}".format(stdpose_path))
+    if meanpose.shape[-1] == 2:
+        mean_x, mean_y = meanpose[:, 0], meanpose[:, 1]
+        meanpose = np.stack([mean_x, mean_x, mean_y], axis=1)
+    if stdpose.shape[-1] == 2:
+        std_x, std_y = stdpose[:, 0], stdpose[:, 1]
+        stdpose = np.stack([std_x, std_x, std_y], axis=1)
+    return meanpose, stdpose
+def gen_meanpose(phase, config, n_samp=20000):
+    data_dir = config.train_dir if phase == "train" else config.test_dir
+    all_paths = glob.glob(os.path.join(data_dir, '*/*/motions/*.npy'))
+    random.shuffle(all_paths)
+    all_paths = all_paths[:n_samp]
+    all_joints = []
+    print("computing meanpose and stdpose")
+    for path in tqdm(all_paths):
+        motion = np.load(path)
+        if motion.shape[1] == 3:
+            basis = None
+            if sum(config.rotation_axes) > 0:
+                x_angles = view_angles if config.rotation_axes[0] else np.array([0])
+                z_angles = view_angles if config.rotation_axes[1] else np.array([0])
+                y_angles = view_angles if config.rotation_axes[2] else np.array([0])
+                x_angles, z_angles, y_angles = np.meshgrid(x_angles, z_angles, y_angles)
+                angles = np.stack([x_angles.flatten(), z_angles.flatten(), y_angles.flatten()], axis=1)
+                i = np.random.choice(len(angles))
+                basis = get_change_of_basis(motion, angles[i])
+                motion = preprocess_mixamo(motion)
+                motion = rotate_motion_3d(motion, basis)
+                motion = localize_motion(motion)
+                all_joints.append(motion)
+            else:
+                motion = preprocess_mixamo(motion)
+                motion = rotate_motion_3d(motion, basis)
+                motion = localize_motion(motion)
+                all_joints.append(motion)
+        else:
+            motion = motion * 128
+            motion_proj = localize_motion(motion)
+            all_joints.append(motion_proj)
+    all_joints = np.concatenate(all_joints, axis=2)
+    meanpose = np.mean(all_joints, axis=2)
+    stdpose = np.std(all_joints, axis=2)
+    stdpose[np.where(stdpose == 0)] = 1e-9
+    return meanpose, stdpose
+class MixamoDataset(_MixamoDatasetBase):
+    def __init__(self, phase, config):
+        super(MixamoDataset, self).__init__(phase, config)
+        x_angles = view_angles if config.rotation_axes[0] else np.array([0])
+        z_angles = view_angles if config.rotation_axes[1] else np.array([0])
+        y_angles = view_angles if config.rotation_axes[2] else np.array([0])
+        x_angles, z_angles, y_angles = np.meshgrid(x_angles, z_angles, y_angles)
+        angles = np.stack([x_angles.flatten(), z_angles.flatten(), y_angles.flatten()], axis=1)
+        self.view_angles = angles
+    def preprocessing(self, motion3d, view_angle=None, params=None):
+        """
+        :param item: filename built from self.build_tiem
+        :return:
+        """
+        if self.aug: motion3d, params = self.augmentation(motion3d, params)
+        basis = None
+        if view_angle is not None: basis = get_change_of_basis(motion3d, view_angle)
+        motion3d = preprocess_mixamo(motion3d)
+        motion3d = rotate_motion_3d(motion3d, basis)
+        motion3d = localize_motion(motion3d)
+        motion3d = normalize_motion(motion3d, self.meanpose, self.stdpose)
+        motion2d = motion3d[:, [0, 2], :]
+        motion3d = motion3d.reshape([-1, motion3d.shape[-1]])
+        motion2d = motion2d.reshape([-1, motion2d.shape[-1]])
+        motion3d = torch.from_numpy(motion3d).float()
+        motion2d = torch.from_numpy(motion2d).float()
+        return motion3d, motion2d
+    def __getitem__(self, index):
+        # select two motions
+        idx_a, idx_b = np.random.choice(len(self.motion_names), size=2, replace=False)
+        mot_a, mot_b = self.motion_names[idx_a], self.motion_names[idx_b]
+        # select two characters
+        idx_a, idx_b = np.random.choice(len(self.character_names), size=2, replace=False)
+        char_a, char_b = self.character_names[idx_a], self.character_names[idx_b]
+        idx_a, idx_b = np.random.choice(len(self.view_angles), size=2, replace=False)
+        view_a, view_b = self.view_angles[idx_a], self.view_angles[idx_b]
+        if self.aug:
+            param_a = self.gen_aug_params(rotate=False)
+            param_b = self.gen_aug_params(rotate=False)
+        else:
+            param_a = param_b = None
+        item_a = self.load_item(self.build_item(mot_a, char_a))
+        item_b = self.load_item(self.build_item(mot_b, char_b))
+        item_ab = self.load_item(self.build_item(mot_a, char_b))
+        item_ba = self.load_item(self.build_item(mot_b, char_a))
+        X_a, x_a = self.preprocessing(item_a, view_a, param_a)
+        X_b, x_b = self.preprocessing(item_b, view_b, param_b)
+        X_aab, x_aab = self.preprocessing(item_a, view_b, param_a)
+        X_bba, x_bba = self.preprocessing(item_b, view_a, param_b)
+        X_aba, x_aba = self.preprocessing(item_ab, view_a, param_b)
+        X_bab, x_bab = self.preprocessing(item_ba, view_b, param_a)
+        X_abb, x_abb = self.preprocessing(item_ab, view_b, param_b)
+        X_baa, x_baa = self.preprocessing(item_ba, view_a, param_a)
+        return {"X_a": X_a, "X_b": X_b,
+                "X_aab": X_aab, "X_bba": X_bba,
+                "X_aba": X_aba, "X_bab": X_bab,
+                "X_abb": X_abb, "X_baa": X_baa,
+                "x_a": x_a, "x_b": x_b,
+                "x_aab": x_aab, "x_bba": x_bba,
+                "x_aba": x_aba, "x_bab": x_bab,
+                "x_abb": x_abb, "x_baa": x_baa,
+                "mot_a": mot_a, "mot_b": mot_b,
+                "char_a": char_a, "char_b": char_b,
+                "view_a": view_a, "view_b": view_b,
+                "meanpose": self.meanpose, "stdpose": self.stdpose}
+class MixamoLimbScaleDataset(_MixamoDatasetBase):
+    def __init__(self, phase, config):
+        super(MixamoLimbScaleDataset, self).__init__(phase, config)
+        self.global_range = config.global_range
+        self.local_range = config.local_range
+        x_angles = view_angles if config.rotation_axes[0] else np.array([0])
+        z_angles = view_angles if config.rotation_axes[1] else np.array([0])
+        y_angles = view_angles if config.rotation_axes[2] else np.array([0])
+        x_angles, z_angles, y_angles = np.meshgrid(x_angles, z_angles, y_angles)
+        angles = np.stack([x_angles.flatten(), z_angles.flatten(), y_angles.flatten()], axis=1)
+        self.view_angles = angles
+    def preprocessing(self, motion3d, view_angle=None, params=None):
+        if self.aug: motion3d, params = self.augmentation(motion3d, params)
+        basis = None
+        if view_angle is not None: basis = get_change_of_basis(motion3d, view_angle)
+        motion3d = preprocess_mixamo(motion3d)
+        motion3d = rotate_motion_3d(motion3d, basis)
+        motion2d = motion3d[:, [0, 2], :]
+        motion2d_scale = limb_scale_motion_2d(motion2d, self.global_range, self.local_range)
+        motion2d = localize_motion(motion2d)
+        motion2d_scale = localize_motion(motion2d_scale)
+        motion2d = normalize_motion(motion2d, self.meanpose, self.stdpose)
+        motion2d_scale = normalize_motion(motion2d_scale, self.meanpose, self.stdpose)
+        motion2d = motion2d.reshape([-1, motion2d.shape[-1]])
+        motion2d_scale = motion2d_scale.reshape((-1, motion2d_scale.shape[-1]))
+        motion2d = torch.from_numpy(motion2d).float()
+        motion2d_scale = torch.from_numpy(motion2d_scale).float()
+        return motion2d, motion2d_scale
+    def __getitem__(self, index):
+        # select two motions
+        motion_idx = np.random.choice(len(self.motion_names))
+        motion = self.motion_names[motion_idx]
+        # select two characters
+        char_idx = np.random.choice(len(self.character_names))
+        character = self.character_names[char_idx]
+        view_idx = np.random.choice(len(self.view_angles))
+        view = self.view_angles[view_idx]
+        if self.aug:
+            param = self.gen_aug_params(rotate=True)
+        else:
+            param = None
+        item = self.build_item(motion, character)
+        x, x_s = self.preprocessing(self.load_item(item), view, param)
+        return {"x": x, "x_s": x_s, "mot": motion, "char": character, "view": view,
+                "meanpose": self.meanpose, "stdpose": self.stdpose}
+class SoloDanceDataset(Dataset):
+    def __init__(self, phase, config):
+        super(SoloDanceDataset, self).__init__()
+        self.global_range = config.global_range
+        self.local_range = config.local_range
+        assert phase in ['train', 'test']
+        self.data_root = config.train_dir if phase=='train' else config.test_dir
+        self.phase = phase
+        self.unit = config.unit
+        self.meanpose_path = config.train_meanpose_path if phase == 'train' else config.test_meanpose_path
+        self.stdpose_path = config.train_stdpose_path if phase == 'train' else config.test_stdpose_path
+        self.character_names = sorted(os.listdir(self.data_root))
+        self.items = glob.glob(os.path.join(self.data_root, '*/*/motions/*.npy'))
+        self.meanpose, self.stdpose = get_meanpose(phase, config)
+        self.meanpose = self.meanpose.astype(np.float32)
+        self.stdpose = self.stdpose.astype(np.float32)
+        if 'preload' in config and config.preload:
+            self.preload()
+            self.cached = True
+        else:
+            self.cached = False
+    def load_item(self, item):
+        if self.cached:
+            data = self.cache[item]
+        else:
+            data = np.load(item)
+        return data
+    def preload(self):
+        print("pre-loading into memory")
+        pbar = tqdm(total=len(self))
+        self.cache = {}
+        for item in self.items:
+            motion = np.load(item)
+            self.cache[item] = motion
+            pbar.update(1)
+    def preprocessing(self, motion):
+        motion = motion * self.unit
+        motion[1, :, :] = (motion[2, :, :] + motion[5, :, :]) / 2
+        motion[8, :, :] = (motion[9, :, :] + motion[12, :, :]) / 2
+        global_scale = self.global_range[0] + np.random.random() * (self.global_range[1] - self.global_range[0])
+        local_scales = self.local_range[0] + np.random.random([8]) * (self.local_range[1] - self.local_range[0])
+        motion_scale = scale_limbs(motion, global_scale, local_scales)
+        motion = localize_motion(motion)
+        motion_scale = localize_motion(motion_scale)
+        motion = normalize_motion(motion, self.meanpose, self.stdpose)
+        motion_scale = normalize_motion(motion_scale, self.meanpose, self.stdpose)
+        motion = motion.reshape((-1, motion.shape[-1]))
+        motion_scale = motion_scale.reshape((-1, motion_scale.shape[-1]))
+        motion = torch.from_numpy(motion).float()
+        motion_scale = torch.from_numpy(motion_scale).float()
+        return motion, motion_scale
+    def __len__(self):
+        return len(self.items)
+    def __getitem__(self, index):
+        item = self.items[index]
+        motion = self.load_item(item)
+        x, x_s = self.preprocessing(motion)
+        return {"x": x, "x_s": x_s, "meanpose": self.meanpose, "stdpose": self.stdpose}

lib/loss.py ADDED Viewed

	@@ -0,0 +1,57 @@

+import torch
+import torch.nn.functional as F
+def kl_loss(code):
+    return torch.mean(torch.pow(code, 2))
+def pairwise_cosine_similarity(seqs_i, seqs_j):
+    # seqs_i, seqs_j: [batch, statics, channel]
+    n_statics = seqs_i.size(1)
+    seqs_i_exp = seqs_i.unsqueeze(2).repeat(1, 1, n_statics, 1)
+    seqs_j_exp = seqs_j.unsqueeze(1).repeat(1, n_statics, 1, 1)
+    return F.cosine_similarity(seqs_i_exp, seqs_j_exp, dim=3)
+def temporal_pairwise_cosine_similarity(seqs_i, seqs_j):
+    # seqs_i, seqs_j: [batch, channel, time]
+    seq_len = seqs_i.size(2)
+    seqs_i_exp = seqs_i.unsqueeze(3).repeat(1, 1, 1, seq_len)
+    seqs_j_exp = seqs_j.unsqueeze(2).repeat(1, 1, seq_len, 1)
+    return F.cosine_similarity(seqs_i_exp, seqs_j_exp, dim=1)
+def consecutive_cosine_similarity(seqs):
+    # seqs: [batch, channel, time]
+    seqs_roll = seqs.roll(shifts=1, dim=2)[1:]
+    seqs = seqs[:-1]
+    return F.cosine_similarity(seqs, seqs_roll)
+def triplet_margin_loss(seqs_a, seqs_b, neg_range=(0.0, 0.5), margin=0.2):
+    # seqs_a, seqs_b: [batch, channel, time]
+    neg_start, neg_end = neg_range
+    batch_size, _, seq_len = seqs_a.size()
+    n_neg_all = seq_len ** 2
+    n_neg = int(round(neg_end * n_neg_all))
+    n_neg_discard = int(round(neg_start * n_neg_all))
+    batch_size, _, seq_len = seqs_a.size()
+    sim_aa = temporal_pairwise_cosine_similarity(seqs_a, seqs_a)
+    sim_bb = temporal_pairwise_cosine_similarity(seqs_b, seqs_b)
+    sim_ab = temporal_pairwise_cosine_similarity(seqs_a, seqs_b)
+    sim_ba = sim_ab.transpose(1, 2)
+    diff_ab = (sim_ab - sim_aa).reshape(batch_size, -1)
+    diff_ba = (sim_ba - sim_bb).reshape(batch_size, -1)
+    diff = torch.cat([diff_ab, diff_ba], dim=0)
+    diff, _ = diff.topk(n_neg, dim=-1, sorted=True)
+    diff = diff[:, n_neg_discard:]
+    loss = diff + margin
+    loss = loss.clamp(min=0.)
+    loss = loss.mean()
+    return loss

lib/network.py ADDED Viewed

	@@ -0,0 +1,356 @@

+import sys
+thismodule = sys.modules[__name__]
+import torch
+import torch.nn as nn
+import torch.nn.functional as F
+import random
+import matplotlib.pyplot as plt
+import numpy as np
+torch.manual_seed(123)
+def get_autoencoder(config):
+    ae_cls = getattr(thismodule, config.autoencoder.cls)
+    return ae_cls(config.autoencoder)
+class ConvEncoder(nn.Module):
+    @classmethod
+    def build_from_config(cls, config):
+        conv_pool = None if config.conv_pool is None else getattr(nn, config.conv_pool)
+        encoder = cls(config.channels, config.padding, config.kernel_size, config.conv_stride, conv_pool)
+        return encoder
+    def __init__(self, channels, padding=3, kernel_size=8, conv_stride=2, conv_pool=None):
+        super(ConvEncoder, self).__init__()
+        self.in_channels = channels[0]
+        model = []
+        acti = nn.LeakyReLU(0.2)
+        nr_layer = len(channels) - 1
+        for i in range(nr_layer):
+            if conv_pool is None:
+                model.append(nn.ReflectionPad1d(padding))
+                model.append(nn.Conv1d(channels[i], channels[i+1], kernel_size=kernel_size, stride=conv_stride))
+                model.append(acti)
+            else:
+                model.append(nn.ReflectionPad1d(padding))
+                model.append(nn.Conv1d(channels[i], channels[i+1], kernel_size=kernel_size, stride=conv_stride))
+                model.append(acti)
+                model.append(conv_pool(kernel_size=2, stride=2))
+        self.model = nn.Sequential(*model)
+    def forward(self, x):
+        x = x[:, :self.in_channels, :]
+        x = self.model(x)
+        return x
+class ConvDecoder(nn.Module):
+    @classmethod
+    def build_from_config(cls, config):
+        decoder = cls(config.channels, config.kernel_size)
+        return decoder
+    def __init__(self, channels, kernel_size=7):
+        super(ConvDecoder, self).__init__()
+        model = []
+        pad = (kernel_size - 1) // 2
+        acti = nn.LeakyReLU(0.2)
+        for i in range(len(channels) - 1):
+            model.append(nn.Upsample(scale_factor=2, mode='nearest'))
+            model.append(nn.ReflectionPad1d(pad))
+            model.append(nn.Conv1d(channels[i], channels[i + 1],
+                                            kernel_size=kernel_size, stride=1))
+            if i == 0 or i == 1:
+                model.append(nn.Dropout(p=0.2))
+            if not i == len(channels) - 2:
+                model.append(acti)          # whether to add tanh a last?
+                #model.append(nn.Dropout(p=0.2))
+        self.model = nn.Sequential(*model)
+    def forward(self, x):
+        return self.model(x)
+class Discriminator(nn.Module):
+    def __init__(self, config):
+        super(Discriminator, self).__init__()
+        self.gan_type = config.gan_type
+        encoder_cls = getattr(thismodule, config.encoder_cls)
+        self.encoder = encoder_cls.build_from_config(config)
+        self.linear = nn.Linear(config.channels[-1], 1)
+    def forward(self, seqs):
+        code_seq = self.encoder(seqs)
+        logits = self.linear(code_seq.permute(0, 2, 1))
+        return logits
+    def calc_dis_loss(self, x_gen, x_real):
+        fake_logits = self.forward(x_gen)
+        real_logits = self.forward(x_real)
+        if self.gan_type == 'lsgan':
+            loss = torch.mean((fake_logits - 0) ** 2) + torch.mean((real_logits - 1) ** 2)
+        elif self.gan_type == 'nsgan':
+            all0 = torch.zeros_like(fake_logits, requires_grad=False)
+            all1 = torch.ones_like(real_logits, requires_grad=False)
+            loss = torch.mean(F.binary_cross_entropy(F.sigmoid(fake_logits), all0) +
+                              F.binary_cross_entropy(F.sigmoid(real_logits), all1))
+        else:
+            raise NotImplementedError
+        return loss
+    def calc_gen_loss(self, x_gen):
+        logits = self.forward(x_gen)
+        if self.gan_type == 'lsgan':
+            loss = torch.mean((logits - 1) ** 2)
+        elif self.gan_type == 'nsgan':
+            all1 = torch.ones_like(logits, requires_grad=False)
+            loss = torch.mean(F.binary_cross_entropy(F.sigmoid(logits), all1))
+        else:
+            raise NotImplementedError
+        return loss
+class Autoencoder3f(nn.Module):
+    def __init__(self, config):
+        super(Autoencoder3f, self).__init__()
+        assert config.motion_encoder.channels[-1] + config.body_encoder.channels[-1] + \
+               config.view_encoder.channels[-1] == config.decoder.channels[0]
+        self.n_joints = config.decoder.channels[-1] // 3
+        self.body_reference = config.body_reference
+        motion_cls = getattr(thismodule, config.motion_encoder.cls)
+        body_cls = getattr(thismodule, config.body_encoder.cls)
+        view_cls = getattr(thismodule, config.view_encoder.cls)
+        self.motion_encoder = motion_cls.build_from_config(config.motion_encoder)
+        self.body_encoder = body_cls.build_from_config(config.body_encoder)
+        self.view_encoder = view_cls.build_from_config(config.view_encoder)
+        self.decoder = ConvDecoder.build_from_config(config.decoder)
+        self.body_pool = getattr(F, config.body_encoder.global_pool) if config.body_encoder.global_pool is not None else None
+        self.view_pool = getattr(F, config.view_encoder.global_pool) if config.view_encoder.global_pool is not None else None
+    def forward(self, seqs):
+        return self.reconstruct(seqs)
+    def encode_motion(self, seqs):
+        motion_code_seq = self.motion_encoder(seqs)
+        return motion_code_seq
+    def encode_body(self, seqs):
+        body_code_seq = self.body_encoder(seqs)
+        kernel_size = body_code_seq.size(-1)
+        body_code = self.body_pool(body_code_seq, kernel_size)  if self.body_pool is not None else body_code_seq
+        return body_code, body_code_seq
+    def encode_view(self, seqs):
+        view_code_seq = self.view_encoder(seqs)
+        kernel_size = view_code_seq.size(-1)
+        view_code = self.view_pool(view_code_seq, kernel_size)  if self.view_pool is not None else view_code_seq
+        return view_code, view_code_seq
+    def decode(self, motion_code, body_code, view_code):
+        if body_code.size(-1) == 1:
+            body_code = body_code.repeat(1, 1, motion_code.shape[-1])
+        if view_code.size(-1) == 1:
+            view_code = view_code.repeat(1, 1, motion_code.shape[-1])
+        complete_code = torch.cat([motion_code, body_code, view_code], dim=1)
+        out = self.decoder(complete_code)
+        return out
+    def cross3d(self, x_a, x_b, x_c):
+        motion_a = self.encode_motion(x_a)
+        body_b, _ = self.encode_body(x_b)
+        view_c, _ = self.encode_view(x_c)
+        out = self.decode(motion_a, body_b, view_c)
+        return out
+    def cross2d(self, x_a, x_b, x_c):
+        motion_a = self.encode_motion(x_a)
+        body_b, _ = self.encode_body(x_b)
+        view_c, _ = self.encode_view(x_c)
+        out = self.decode(motion_a, body_b, view_c)
+        batch_size, channels, seq_len = out.size()
+        n_joints = channels // 3
+        out = out.view(batch_size, n_joints, 3, seq_len)
+        out = out[:, :, [0, 2], :]
+        out = out.view(batch_size, n_joints * 2, seq_len)
+        return out
+    def cross2d_adv(self, x_a, x_b, x_c):
+        x_a.cpu()
+        x_a_shape = x_a.shape
+        print(x_a.shape)
+        #motion_a_org = self.encode_motion(x_a)
+        print(x_a)
+        # The heatmap image is saved as 'tensor_heatmap.png' in the current directory
+        # for i in range(0,119):
+        #     x_a[0][11][i]+=1
+        #x_a[0][7][60]+=0.01
+        #motion_a = self.encode_motion(x_a)
+        # print(motion_a.shape)
+        # print(motion_a[0][0]-motion_a_org[0][0])
+        # res = motion_a[0] - motion_a_org[0]
+        # res = res.cpu().detach().numpy()
+        # # Code for plotting the heatmap
+        # plt.figure(figsize=(15, 10))
+        # plt.imshow(res, cmap='hot', interpolation='nearest')
+        # plt.colorbar()
+        # plt.title('Heatmap of the Tensor')
+        # # Save the heatmap to a local file
+        # plt.savefig('/home/fazhong/studio/transmomo.pytorch/tensor_heatmap2.png')
+        # plt.close()
+        initial_motion_a = self.encode_motion(x_a)  # 计算初始的motion_a
+        # 定义一个函数来计算motion的变化量
+        def motion_change(motion_a, initial_motion_a):
+            return (motion_a - initial_motion_a).norm()
+        # 设置初始的最大变化量为0
+        max_change = 0
+        # 扰动次数，可以根据需要更改
+        num_perturbations = 10000
+        init_a = x_a.clone()
+        for _ in range(num_perturbations):
+            # 复制x_a以避免在原始数据上修改
+            x_a_perturbed = x_a.clone().cpu()
+            # 选择要扰动的随机点
+            batch_idx, seq_idx, feature_idx = (torch.randint(0, x_a.size(0), (1,)),
+                                            torch.randint(0, x_a.size(1), (1,)),
+                                            torch.randint(0, x_a.size(2), (1,)))
+            # 在选定点上加上扰动
+            x_a_perturbed[batch_idx, seq_idx, feature_idx] += 10 * torch.randn(1)
+            # 计算扰动后的motion_a
+            perturbed_motion_a = self.encode_motion(x_a_perturbed.to('cuda:0'))
+            # 计算变化量
+            change = motion_change(perturbed_motion_a, initial_motion_a)
+            # 如果变化量大于之前保存的最大变化量，则更新x_a和最大变化量
+            if change > max_change:
+                x_a = x_a_perturbed
+                max_change = change
+        # 最后，x_a将是导致最大motion_a变化的扰动版本
+        # max_change是这个变化量
+        # print(max_change)
+        # print(max_change.shape)
+        print(x_a_perturbed - init_a.cpu())
+        motion_a = self.encode_motion(x_a_perturbed.to('cuda:0'))
+        # motion_a = self.encode_motion(x_a.to('cuda:0'))
+        body_b, _ = self.encode_body(x_b)
+        view_c, _ = self.encode_view(x_c)
+        out = self.decode(motion_a, body_b, view_c)
+        batch_size, channels, seq_len = out.size()
+        n_joints = channels // 3
+        out = out.view(batch_size, n_joints, 3, seq_len)
+        out = out[:, :, [0, 2], :]
+        out = out.view(batch_size, n_joints * 2, seq_len)
+        return out
+    def cross2d_one(self, x_a):
+        motion_a = self.encode_motion(x_a)
+        body_b, _ = self.encode_body(x_a)
+        view_c, _ = self.encode_view(x_a)
+        out = self.decode(motion_a, body_b, view_c)
+        batch_size, channels, seq_len = out.size()
+        n_joints = channels // 3
+        out = out.view(batch_size, n_joints, 3, seq_len)
+        out = out[:, :, [0, 2], :]
+        out = out.view(batch_size, n_joints * 2, seq_len)
+        return out
+    def adv_cross(self,x_a):
+        motion_a = self.encode_motion(x_a)
+        body_b, _ = self.encode_body(x_a)
+        view_c, _ = self.encode_view(x_a)
+        return motion_a
+    def reconstruct3d(self, x):
+        motion_code = self.encode_motion(x)
+        body_code, _ = self.encode_body(x)
+        view_code, _ = self.encode_view(x)
+        out = self.decode(motion_code, body_code, view_code)
+        return out
+    def reconstruct2d(self, x):
+        motion_code = self.encode_motion(x)
+        body_code, _ = self.encode_body(x)
+        view_code, _ = self.encode_view(x)
+        out = self.decode(motion_code, body_code, view_code)
+        batch_size, channels, seq_len = out.size()
+        n_joints = channels // 3
+        out = out.view(batch_size, n_joints, 3, seq_len)
+        out = out[:, :, [0, 2], :]
+        out = out.view(batch_size, n_joints * 2, seq_len)
+        return out
+    def interpolate(self, x_a, x_b, N):
+        step_size = 1. / (N-1)
+        batch_size, _, seq_len = x_a.size()
+        motion_a = self.encode_motion(x_a)
+        body_a, body_a_seq = self.encode_body(x_a)
+        view_a, view_a_seq = self.encode_view(x_a)
+        motion_b = self.encode_motion(x_b)
+        body_b, body_b_seq = self.encode_body(x_b)
+        view_b, view_b_seq = self.encode_view(x_b)
+        batch_out = torch.zeros([batch_size, N, N, 2 * self.n_joints, seq_len])
+        for i in range(N):
+            motion_weight = i * step_size
+            for j in range(N):
+                body_weight = j * step_size
+                motion = (1. - motion_weight) * motion_a + motion_weight * motion_b
+                body = (1. - body_weight) * body_a + body_weight * body_b
+                view = (1. - body_weight) * view_a + body_weight * view_b
+                out = self.decode(motion, body, view)
+                batch_size, channels, seq_len = out.size()
+                n_joints = channels // 3
+                out = out.view(batch_size, n_joints, 3, seq_len)
+                out = out[:, :, [0, 2], :]
+                out = out.view(batch_size, n_joints * 2, seq_len)
+                batch_out[:, i, j, :, :] = out
+        return batch_out

lib/operation.py ADDED Viewed

	@@ -0,0 +1,219 @@

+import torch
+import torch.nn.functional as F
+import numpy as np
+import imageio
+from math import pi
+from tqdm import tqdm
+from lib.data import get_dataloader, get_meanpose
+from lib.util.general import get_config
+from lib.util.visualization import motion2video_np, hex2rgb
+import os
+eps = 1e-16
+def localize_motion_torch(motion):
+    """
+    :param motion: B x J x D x T
+    :return:
+    """
+    B, J, D, T = motion.size()
+    # subtract centers to local coordinates
+    centers = motion[:, 8:9, :, :] # B x 1 x D x (T-1)
+    motion = motion - centers
+    # adding velocity
+    translation = centers[:, :, :, 1:] - centers[:, :, :, :-1] # B x 1 x D x (T-1)
+    velocity = F.pad(translation, [1, 0], "constant", 0.) # B x 1 x D x T
+    motion = torch.cat([motion[:, :8], motion[:, 9:], velocity], dim=1)
+    return motion
+def normalize_motion_torch(motion, meanpose, stdpose):
+    """
+    :param motion: (B, J, D, T)
+    :param meanpose: (J, D)
+    :param stdpose: (J, D)
+    :return:
+    """
+    B, J, D, T = motion.size()
+    if D == 2 and meanpose.size(1) == 3:
+        meanpose = meanpose[:, [0, 2]]
+    if D == 2 and stdpose.size(1) == 3:
+        stdpose = stdpose[:, [0, 2]]
+    return (motion - meanpose.view(1, J, D, 1)) / stdpose.view(1, J, D, 1)
+def normalize_motion_inv_torch(motion, meanpose, stdpose):
+    """
+    :param motion: (B, J, D, T)
+    :param meanpose: (J, D)
+    :param stdpose: (J, D)
+    :return:
+    """
+    B, J, D, T = motion.size()
+    if D == 2 and meanpose.size(1) == 3:
+        meanpose = meanpose[:, [0, 2]]
+    if D == 2 and stdpose.size(1) == 3:
+        stdpose = stdpose[:, [0, 2]]
+    return motion * stdpose.view(1, J, D, 1) + meanpose.view(1, J, D, 1)
+def globalize_motion_torch(motion):
+    """
+    :param motion: B x J x D x T
+    :return:
+    """
+    B, J, D, T = motion.size()
+    motion_inv = torch.zeros_like(motion)
+    motion_inv[:, :8] = motion[:, :8]
+    motion_inv[:, 9:] = motion[:, 8:-1]
+    velocity = motion[:, -1:, :, :]
+    centers = torch.zeros_like(velocity)
+    displacement = torch.zeros_like(velocity[:, :, :, 0])
+    for t in range(T):
+        displacement += velocity[:, :, :, t]
+        centers[:, :, :, t] = displacement
+    motion_inv = motion_inv + centers
+    return motion_inv
+def restore_world_space(motion, meanpose, stdpose, n_joints=15):
+    B, C, T = motion.size()
+    motion = motion.view(B, n_joints, C // n_joints, T)
+    motion = normalize_motion_inv_torch(motion, meanpose, stdpose)
+    motion = globalize_motion_torch(motion)
+    return motion
+def convert_to_learning_space(motion, meanpose, stdpose):
+    B, J, D, T = motion.size()
+    motion = localize_motion_torch(motion)
+    motion = normalize_motion_torch(motion, meanpose, stdpose)
+    motion = motion.view(B, J*D, T)
+    return motion
+# tensor operations for rotating and projecting 3d skeleton sequence
+def get_body_basis(motion_3d):
+    """
+    Get the unit vectors for vector rectangular coordinates for given 3D motion
+    :param motion_3d: 3D motion from 3D joints positions, shape (B, n_joints, 3, seq_len).
+    :param angles: (K, 3), Rotation angles around each axis.
+    :return: unit vectors for vector rectangular coordinates's , shape (B, 3, 3).
+    """
+    B = motion_3d.size(0)
+    # 2 RightArm 5 LeftArm 9 RightUpLeg 12 LeftUpLeg
+    horizontal = (motion_3d[:, 2] - motion_3d[:, 5] + motion_3d[:, 9] - motion_3d[:, 12]) / 2 # [B, 3, seq_len]
+    horizontal = horizontal.mean(dim=-1) # [B, 3]
+    horizontal = horizontal / horizontal.norm(dim=-1).unsqueeze(-1) # [B, 3]
+    vector_z = torch.tensor([0., 0., 1.], device=motion_3d.device, dtype=motion_3d.dtype).unsqueeze(0).repeat(B, 1) # [B, 3]
+    vector_y = torch.cross(horizontal, vector_z)   # [B, 3]
+    vector_y = vector_y / vector_y.norm(dim=-1).unsqueeze(-1)
+    vector_x = torch.cross(vector_y, vector_z)
+    vectors = torch.stack([vector_x, vector_y, vector_z], dim=2)  # [B, 3, 3]
+    vectors = vectors.detach()
+    return vectors
+def rotate_basis_euler(basis_vectors, angles):
+    """
+    Rotate vector rectangular coordinates from given angles.
+    :param basis_vectors: [B, 3, 3]
+    :param angles: [B, K, T, 3] Rotation angles around each axis.
+    :return: [B, K, T, 3, 3]
+    """
+    B, K, T, _ = angles.size()
+    cos, sin = torch.cos(angles), torch.sin(angles)
+    cx, cy, cz = cos[:, :, :, 0], cos[:, :, :, 1], cos[:, :, :, 2]  # [B, K, T]
+    sx, sy, sz = sin[:, :, :, 0], sin[:, :, :, 1], sin[:, :, :, 2]  # [B, K, T]
+    x = basis_vectors[:, 0, :]  # [B, 3]
+    o = torch.zeros_like(x[:, 0])  # [B]
+    x_cpm_0 = torch.stack([o, -x[:, 2], x[:, 1]], dim=1)  # [B, 3]
+    x_cpm_1 = torch.stack([x[:, 2], o, -x[:, 0]], dim=1)  # [B, 3]
+    x_cpm_2 = torch.stack([-x[:, 1], x[:, 0], o], dim=1)  # [B, 3]
+    x_cpm = torch.stack([x_cpm_0, x_cpm_1, x_cpm_2], dim=1)  # [B, 3, 3]
+    x_cpm = x_cpm.unsqueeze(1).unsqueeze(2) # [B, 1, 1, 3, 3]
+    x = x.unsqueeze(-1)  # [B, 3, 1]
+    xx = torch.matmul(x, x.transpose(-1, -2)).unsqueeze(1).unsqueeze(2)  # [B, 1, 1, 3, 3]
+    eye = torch.eye(n=3, dtype=basis_vectors.dtype, device=basis_vectors.device)
+    eye = eye.unsqueeze(0).unsqueeze(0).unsqueeze(0) # [1, 1, 1, 3, 3]
+    mat33_x = cx.unsqueeze(-1).unsqueeze(-1) * eye \
+              + sx.unsqueeze(-1).unsqueeze(-1) * x_cpm \
+              + (1. - cx).unsqueeze(-1).unsqueeze(-1) * xx  # [B, K, T, 3, 3]
+    o = torch.zeros_like(cz)
+    i = torch.ones_like(cz)
+    mat33_z_0 = torch.stack([cz, sz, o], dim=3)  # [B, K, T, 3]
+    mat33_z_1 = torch.stack([-sz, cz, o], dim=3)  # [B, K, T, 3]
+    mat33_z_2 = torch.stack([o, o, i], dim=3)  # [B, K, T, 3]
+    mat33_z = torch.stack([mat33_z_0, mat33_z_1, mat33_z_2], dim=3)  # [B, K, T, 3, 3]
+    basis_vectors = basis_vectors.unsqueeze(1).unsqueeze(2)
+    basis_vectors = basis_vectors @ mat33_x.transpose(-1, -2) @ mat33_z
+    return basis_vectors
+def change_of_basis(motion_3d, basis_vectors=None, project_2d=False):
+    # motion_3d: (B, n_joints, 3, seq_len)
+    # basis_vectors: (B, K, T, 3, 3)
+    if basis_vectors is None:
+        motion_proj = motion_3d[:, :, [0, 2], :]  # [B, n_joints, 2, seq_len]
+    else:
+        if project_2d: basis_vectors = basis_vectors[:, :, :, [0, 2], :]
+        _, K, seq_len, _, _ = basis_vectors.size()
+        motion_3d = motion_3d.unsqueeze(1).repeat(1, K, 1, 1, 1)
+        motion_3d = motion_3d.permute([0, 1, 4, 3, 2]) # [B, K, J, 3, T] -> [B, K, T, 3, J]
+        motion_proj = basis_vectors @ motion_3d  # [B, K, T, 2, 3] @ [B, K, T, 3, J] -> [B, K, T, 2, J]
+        motion_proj = motion_proj.permute([0, 1, 4, 3, 2]) # [B, K, T, 3, J] -> [B, K, J, 3, T]
+    return motion_proj
+def rotate_and_maybe_project_world(X, angles=None, body_reference=True, project_2d=False):
+    out_dim = 2 if project_2d else 3
+    batch_size, n_joints, _, seq_len = X.size()
+    if angles is not None:
+        K = angles.size(1)
+        basis_vectors = get_body_basis(X) if body_reference else \
+            torch.eye(3, device=X.device).unsqueeze(0).repeat(batch_size, 1, 1)
+        basis_vectors = rotate_basis_euler(basis_vectors, angles)
+        X_trans = change_of_basis(X, basis_vectors, project_2d=project_2d)
+        X_trans = X_trans.reshape(batch_size * K, n_joints, out_dim, seq_len)
+    else:
+        X_trans = change_of_basis(X, project_2d=project_2d)
+        X_trans = X_trans.reshape(batch_size, n_joints, out_dim, seq_len)
+    return X_trans
+def rotate_and_maybe_project_learning(X, meanpose, stdpose, angles=None, body_reference=True, project_2d=False):
+    batch_size, channels, seq_len = X.size()
+    n_joints = channels // 3
+    X = restore_world_space(X, meanpose, stdpose, n_joints)
+    X = rotate_and_maybe_project_world(X, angles, body_reference, project_2d)
+    X = convert_to_learning_space(X, meanpose, stdpose)
+    return X

lib/trainer.py ADDED Viewed

	@@ -0,0 +1,298 @@

+import os
+import torch
+import torch.nn as nn
+import numpy as np
+import random
+import lib.network
+from lib.loss import *
+from lib.util.general import weights_init, get_model_list, get_scheduler
+from lib.network import Discriminator
+from lib.operation import rotate_and_maybe_project_learning
+class BaseTrainer(nn.Module):
+    def __init__(self, config):
+        super(BaseTrainer, self).__init__()
+        lr = config.lr
+        autoencoder_cls = getattr(lib.network, config.autoencoder.cls)
+        self.autoencoder = autoencoder_cls(config.autoencoder)
+        self.discriminator = Discriminator(config.discriminator)
+        # Setup the optimizers
+        beta1 = config.beta1
+        beta2 = config.beta2
+        dis_params = list(self.discriminator.parameters())
+        ae_params = list(self.autoencoder.parameters())
+        self.dis_opt = torch.optim.Adam([p for p in dis_params if p.requires_grad],
+                                        lr=lr, betas=(beta1, beta2), weight_decay=config.weight_decay)
+        self.ae_opt = torch.optim.Adam([p for p in ae_params if p.requires_grad],
+                                        lr=lr, betas=(beta1, beta2), weight_decay=config.weight_decay)
+        self.dis_scheduler = get_scheduler(self.dis_opt, config)
+        self.ae_scheduler = get_scheduler(self.ae_opt, config)
+        # Network weight initialization
+        self.apply(weights_init(config.init))
+        self.discriminator.apply(weights_init('gaussian'))
+    def forward(self, data):
+        x_a, x_b = data["x_a"], data["x_b"]
+        batch_size = x_a.size(0)
+        self.eval()
+        body_a, body_b = self.sample_body_code(batch_size)
+        motion_a = self.autoencoder.encode_motion(x_a)
+        body_a_enc, _ = self.autoencoder.encode_body(x_a)
+        motion_b = self.autoencoder.encode_motion(x_b)
+        body_b_enc, _ = self.autoencoder.encode_body(x_b)
+        x_ab = self.autoencoder.decode(motion_a, body_b)
+        x_ba = self.autoencoder.decode(motion_b, body_a)
+        self.train()
+        return x_ab, x_ba
+    def dis_update(self, data, config):
+        raise NotImplemented
+    def ae_update(self, data, config):
+        raise NotImplemented
+    def recon_criterion(self, input, target):
+        raise NotImplemented
+    def update_learning_rate(self):
+        if self.dis_scheduler is not None:
+            self.dis_scheduler.step()
+        if self.ae_scheduler is not None:
+            self.ae_scheduler.step()
+    def resume(self, checkpoint_dir, config):
+        # Load generators
+        last_model_name = get_model_list(checkpoint_dir, "autoencoder")
+        state_dict = torch.load(last_model_name)
+        self.autoencoder.load_state_dict(state_dict)
+        iterations = int(last_model_name[-11:-3])
+        # Load discriminators
+        last_model_name = get_model_list(checkpoint_dir, "discriminator")
+        state_dict = torch.load(last_model_name)
+        self.discriminator.load_state_dict(state_dict)
+        # Load optimizers
+        state_dict = torch.load(os.path.join(checkpoint_dir, 'optimizer.pt'))
+        self.dis_opt.load_state_dict(state_dict['discriminator'])
+        self.ae_opt.load_state_dict(state_dict['autoencoder'])
+        # Reinitilize schedulers
+        self.dis_scheduler = get_scheduler(self.dis_opt, config, iterations)
+        self.ae_scheduler = get_scheduler(self.ae_opt, config, iterations)
+        print('Resume from iteration %d' % iterations)
+        return iterations
+    def save(self, snapshot_dir, iterations):
+        # Save generators, discriminators, and optimizers
+        ae_name = os.path.join(snapshot_dir, 'autoencoder_%08d.pt' % (iterations + 1))
+        dis_name = os.path.join(snapshot_dir, 'discriminator_%08d.pt' % (iterations + 1))
+        opt_name = os.path.join(snapshot_dir, 'optimizer.pt')
+        torch.save(self.autoencoder.state_dict(), ae_name)
+        torch.save(self.discriminator.state_dict(), dis_name)
+        torch.save({'autoencoder': self.ae_opt.state_dict(), 'discriminator': self.dis_opt.state_dict()}, opt_name)
+    def validate(self, data, config):
+        re_dict = self.evaluate(self.autoencoder, data, config)
+        for key, val in re_dict.items():
+            setattr(self, key, val)
+    @staticmethod
+    def recon_criterion(input, target):
+        return torch.mean(torch.abs(input - target))
+    @classmethod
+    def evaluate(cls, autoencoder, data, config):
+        autoencoder.eval()
+        x_a, x_b = data["x_a"], data["x_b"]
+        x_aba, x_bab = data["x_aba"], data["x_bab"]
+        batch_size, _, seq_len = x_a.size()
+        re_dict = {}
+        with torch.no_grad():  # 2D eval
+            x_a_recon = autoencoder.reconstruct2d(x_a)
+            x_b_recon = autoencoder.reconstruct2d(x_b)
+            x_aba_recon = autoencoder.cross2d(x_a, x_b, x_a)
+            x_bab_recon = autoencoder.cross2d(x_b, x_a, x_b)
+            re_dict['loss_val_recon_x'] = cls.recon_criterion(x_a_recon, x_a) + cls.recon_criterion(x_b_recon, x_b)
+            re_dict['loss_val_cross_body'] = cls.recon_criterion(x_aba_recon, x_aba) + cls.recon_criterion(
+                x_bab_recon, x_bab)
+            re_dict['loss_val_total'] = 0.5 * re_dict['loss_val_recon_x'] + 0.5 * re_dict['loss_val_cross_body']
+        autoencoder.train()
+        return re_dict
+class TransmomoTrainer(BaseTrainer):
+    def __init__(self, config):
+        super(TransmomoTrainer, self).__init__(config)
+        self.angle_unit = np.pi / (config.K + 1)
+        view_angles = np.array([i * self.angle_unit for i in range(1, config.K + 1)])
+        x_angles = view_angles if config.rotation_axes[0] else np.array([0])
+        z_angles = view_angles if config.rotation_axes[1] else np.array([0])
+        y_angles = view_angles if config.rotation_axes[2] else np.array([0])
+        x_angles, z_angles, y_angles = np.meshgrid(x_angles, z_angles, y_angles)
+        angles = np.stack([x_angles.flatten(), z_angles.flatten(), y_angles.flatten()], axis=1)
+        self.angles = torch.tensor(angles).float().cuda()
+        self.rotation_axes = torch.tensor(config.rotation_axes).float().cuda()
+        self.rotation_axes_mask = [(_ > 0) for _ in config.rotation_axes]
+    def dis_update(self, data, config):
+        x_a = data["x"]
+        x_s = data["x_s"] # the limb-scaled version of x_a
+        meanpose = data["meanpose"][0]
+        stdpose = data["stdpose"][0]
+        self.dis_opt.zero_grad()
+        # encode
+        motion_a = self.autoencoder.encode_motion(x_a)
+        body_a, body_a_seq = self.autoencoder.encode_body(x_a)
+        view_a, view_a_seq = self.autoencoder.encode_view(x_a)
+        motion_s = self.autoencoder.encode_motion(x_s)
+        body_s, body_s_seq = self.autoencoder.encode_body(x_s)
+        view_s, view_s_seq = self.autoencoder.encode_view(x_s)
+        # decode (reconstruct, transform)
+        inds = random.sample(list(range(self.angles.size(0))), config.K)
+        angles = self.angles[inds].clone().detach()  # [K, 3]
+        angles += self.angle_unit * self.rotation_axes * torch.randn([3], device=x_a.device)
+        angles = angles.unsqueeze(0).unsqueeze(2)  # [B=1, K, T=1, 3]
+        X_a_recon = self.autoencoder.decode(motion_a, body_a, view_a)
+        x_a_trans = rotate_and_maybe_project_learning(X_a_recon, meanpose, stdpose, angles=angles,
+                                                      body_reference=config.autoencoder.body_reference, project_2d=True)
+        x_a_exp = x_a.repeat_interleave(config.K, dim=0)
+        self.loss_dis_trans = self.discriminator.calc_dis_loss(x_a_trans.detach(), x_a_exp)
+        if config.trans_gan_ls_w > 0:
+            X_s_recon = self.autoencoder.decode(motion_s, body_s, view_s)
+            x_s_trans = rotate_and_maybe_project_learning(X_s_recon, meanpose, stdpose, angles=angles,
+                                                       body_reference=config.autoencoder.body_reference, project_2d=True)
+            x_s_exp = x_s.repeat_interleave(config.K, dim=0)
+            self.loss_dis_trans_ls = self.discriminator.calc_dis_loss(x_s_trans.detach(), x_s_exp)
+        else:
+            self.loss_dis_trans_ls = 0
+        self.loss_dis_total = config.trans_gan_w * self.loss_dis_trans + \
+                              config.trans_gan_ls_w * self.loss_dis_trans_ls
+        self.loss_dis_total.backward()
+        self.dis_opt.step()
+    def ae_update(self, data, config):
+        x_a = data["x"]
+        x_s = data["x_s"]
+        meanpose = data["meanpose"][0]
+        stdpose = data["stdpose"][0]
+        self.ae_opt.zero_grad()
+        # encode
+        motion_a = self.autoencoder.encode_motion(x_a)
+        body_a, body_a_seq = self.autoencoder.encode_body(x_a)
+        view_a, view_a_seq = self.autoencoder.encode_view(x_a)
+        motion_s = self.autoencoder.encode_motion(x_s)
+        body_s, body_s_seq = self.autoencoder.encode_body(x_s)
+        view_s, view_s_seq = self.autoencoder.encode_view(x_s)
+        # invariance loss
+        self.loss_inv_v_ls = self.recon_criterion(view_a, view_s) if config.inv_v_ls_w > 0 else 0
+        self.loss_inv_m_ls = self.recon_criterion(motion_a, motion_s) if config.inv_m_ls_w > 0 else 0
+        # body triplet loss
+        if config.triplet_b_w > 0:
+            self.loss_triplet_b = triplet_margin_loss(
+                body_a_seq, body_s_seq,
+                neg_range=config.triplet_neg_range,
+                margin=config.triplet_margin)
+        else:
+            self.loss_triplet_b = 0
+        # reconstruction
+        X_a_recon = self.autoencoder.decode(motion_a, body_a, view_a)
+        x_a_recon = rotate_and_maybe_project_learning(X_a_recon, meanpose, stdpose, angles=None,
+                                                      body_reference=config.autoencoder.body_reference, project_2d=True)
+        X_s_recon = self.autoencoder.decode(motion_s, body_s, view_s)
+        x_s_recon = rotate_and_maybe_project_learning(X_s_recon, meanpose, stdpose, angles=None,
+                                                      body_reference=config.autoencoder.body_reference, project_2d=True)
+        self.loss_recon_x = 0.5 * self.recon_criterion(x_a_recon, x_a) +\
+                               0.5 * self.recon_criterion(x_s_recon, x_s)
+        # cross reconstruction
+        X_as_recon = self.autoencoder.decode(motion_a, body_s, view_s)
+        x_as_recon = rotate_and_maybe_project_learning(X_as_recon, meanpose, stdpose, angles=None,
+                                                       body_reference=config.autoencoder.body_reference, project_2d=True)
+        X_sa_recon = self.autoencoder.decode(motion_s, body_a, view_a)
+        x_sa_recon = rotate_and_maybe_project_learning(X_sa_recon, meanpose, stdpose, angles=None,
+                                                       body_reference=config.autoencoder.body_reference, project_2d=True)
+        self.loss_cross_x = 0.5 * self.recon_criterion(x_as_recon, x_s) + 0.5 * self.recon_criterion(x_sa_recon, x_a)
+        # apply transformation
+        inds = random.sample(list(range(self.angles.size(0))), config.K)
+        angles = self.angles[inds].clone().detach()
+        angles += self.angle_unit * self.rotation_axes * torch.randn([3], device=x_a.device)
+        angles = angles.unsqueeze(0).unsqueeze(2)
+        x_a_trans = rotate_and_maybe_project_learning(X_a_recon, meanpose, stdpose, angles=angles,
+                                                      body_reference=config.autoencoder.body_reference, project_2d=True)
+        x_s_trans = rotate_and_maybe_project_learning(X_s_recon, meanpose, stdpose, angles=angles,
+                                                      body_reference=config.autoencoder.body_reference, project_2d=True)
+        # GAN loss
+        self.loss_gan_trans = self.discriminator.calc_gen_loss(x_a_trans)
+        self.loss_gan_trans_ls = self.discriminator.calc_gen_loss(x_s_trans) if config.trans_gan_ls_w > 0 else 0
+        # encode again
+        motion_a_trans = self.autoencoder.encode_motion(x_a_trans)
+        body_a_trans, _ = self.autoencoder.encode_body(x_a_trans)
+        view_a_trans, view_a_trans_seq = self.autoencoder.encode_view(x_a_trans)
+        motion_s_trans = self.autoencoder.encode_motion(x_s_trans)
+        body_s_trans, _ = self.autoencoder.encode_body(x_s_trans)
+        self.loss_inv_m_trans = 0.5 * self.recon_criterion(motion_a_trans, motion_a.repeat_interleave(config.K, dim=0)) + \
+                                     0.5 * self.recon_criterion(motion_s_trans, motion_s.repeat_interleave(config.K, dim=0))
+        self.loss_inv_b_trans = 0.5 * self.recon_criterion(body_a_trans, body_a.repeat_interleave(config.K, dim=0)) + \
+                                     0.5 * self.recon_criterion(body_s_trans, body_s.repeat_interleave(config.K, dim=0))
+        # view triplet loss
+        if config.triplet_v_w > 0:
+            view_a_seq_exp = view_a_seq.repeat_interleave(config.K, dim=0)
+            self.loss_triplet_v = triplet_margin_loss(
+                view_a_seq_exp, view_a_trans_seq,
+                neg_range=config.triplet_neg_range, margin=config.triplet_margin)
+        else:
+            self.loss_triplet_v = 0
+        # add all losses
+        self.loss_total = torch.tensor(0.).float().cuda()
+        self.loss_total += config.recon_x_w * self.loss_recon_x
+        self.loss_total += config.cross_x_w * self.loss_cross_x
+        self.loss_total += config.inv_v_ls_w * self.loss_inv_v_ls
+        self.loss_total += config.inv_m_ls_w * self.loss_inv_m_ls
+        self.loss_total += config.inv_b_trans_w * self.loss_inv_b_trans
+        self.loss_total += config.inv_m_trans_w * self.loss_inv_m_trans
+        self.loss_total += config.trans_gan_w * self.loss_gan_trans
+        self.loss_total += config.trans_gan_ls_w * self.loss_gan_trans_ls
+        self.loss_total += config.triplet_b_w * self.loss_triplet_b
+        self.loss_total += config.triplet_v_w * self.loss_triplet_v
+        self.loss_total.backward()
+        self.ae_opt.step()

lib/util/__init__.py ADDED Viewed

File without changes

lib/util/__pycache__/__init__.cpython-37.pyc ADDED Viewed

Binary file (146 Bytes). View file

lib/util/__pycache__/__init__.cpython-38.pyc ADDED Viewed

Binary file (150 Bytes). View file

lib/util/__pycache__/general.cpython-37.pyc ADDED Viewed

Binary file (13.3 kB). View file

lib/util/__pycache__/general.cpython-38.pyc ADDED Viewed

Binary file (13.4 kB). View file

lib/util/__pycache__/motion.cpython-37.pyc ADDED Viewed

Binary file (8.05 kB). View file

lib/util/__pycache__/motion.cpython-38.pyc ADDED Viewed

Binary file (8.07 kB). View file

lib/util/__pycache__/visualization.cpython-37.pyc ADDED Viewed

Binary file (12.6 kB). View file

lib/util/__pycache__/visualization.cpython-38.pyc ADDED Viewed

Binary file (12.7 kB). View file

lib/util/general.py ADDED Viewed

	@@ -0,0 +1,361 @@

+from PIL import Image
+import os
+import json
+import logging
+import shutil
+import csv
+# from lib.network.munit import Vgg16
+from torch.autograd import Variable
+from torch.optim import lr_scheduler
+from easydict import EasyDict as edict
+import torch
+import torch.nn as nn
+import os
+import math
+import torchvision.utils as vutils
+import yaml
+import numpy as np
+import torch.nn.init as init
+import time
+def get_config(config_path):
+    with open(config_path, 'r') as stream:
+        config = yaml.load(stream, Loader=yaml.SafeLoader)
+    config = edict(config)
+    _, config_filename = os.path.split(config_path)
+    config_name, _ = os.path.splitext(config_filename)
+    config.name = config_name
+    return config
+class TextLogger:
+    def __init__(self, log_path):
+        self.log_path = log_path
+        with open(self.log_path, "w") as f:
+            f.write("")
+    def log(self, log):
+        with open(self.log_path, "a+") as f:
+            f.write(log + "\n")
+def eformat(f, prec):
+    s = "%.*e"%(prec, f)
+    mantissa, exp = s.split('e')
+    # add 1 to digits as 1 is taken by sign +/-
+    return "%se%d"%(mantissa, int(exp))
+def __write_images(image_outputs, display_image_num, file_name):
+    image_outputs = [images.expand(-1, 3, -1, -1) for images in image_outputs] # expand gray-scale images to 3 channels
+    image_tensor = torch.cat([images[:display_image_num] for images in image_outputs], 0)
+    image_grid = vutils.make_grid(image_tensor.data, nrow=display_image_num, padding=0, normalize=True)
+    vutils.save_image(image_grid, file_name, nrow=1)
+def write_2images(image_outputs, display_image_num, image_directory, postfix):
+    n = len(image_outputs)
+    __write_images(image_outputs[0:n//2], display_image_num, '%s/gen_a2b_%s.jpg' % (image_directory, postfix))
+    __write_images(image_outputs[n//2:n], display_image_num, '%s/gen_b2a_%s.jpg' % (image_directory, postfix))
+def write_one_row_html(html_file, iterations, img_filename, all_size):
+    html_file.write("<h3>iteration [%d] (%s)</h3>" % (iterations,img_filename.split('/')[-1]))
+    html_file.write("""
+        <p><a href="%s">
+          <img src="%s" style="width:%dpx">
+        </a><br>
+        <p>
+        """ % (img_filename, img_filename, all_size))
+    return
+def write_html(filename, iterations, image_save_iterations, image_directory, all_size=1536):
+    html_file = open(filename, "w")
+    html_file.write('''
+    <!DOCTYPE html>
+    <html>
+    <head>
+      <title>Experiment name = %s</title>
+      <meta http-equiv="refresh" content="30">
+    </head>
+    <body>
+    ''' % os.path.basename(filename))
+    html_file.write("<h3>current</h3>")
+    write_one_row_html(html_file, iterations, '%s/gen_a2b_train_current.jpg' % (image_directory), all_size)
+    write_one_row_html(html_file, iterations, '%s/gen_b2a_train_current.jpg' % (image_directory), all_size)
+    for j in range(iterations, image_save_iterations-1, -1):
+        if j % image_save_iterations == 0:
+            write_one_row_html(html_file, j, '%s/gen_a2b_test_%08d.jpg' % (image_directory, j), all_size)
+            write_one_row_html(html_file, j, '%s/gen_b2a_test_%08d.jpg' % (image_directory, j), all_size)
+            write_one_row_html(html_file, j, '%s/gen_a2b_train_%08d.jpg' % (image_directory, j), all_size)
+            write_one_row_html(html_file, j, '%s/gen_b2a_train_%08d.jpg' % (image_directory, j), all_size)
+    html_file.write("</body></html>")
+    html_file.close()
+def write_loss(iterations, trainer, train_writer):
+    members = [attr for attr in dir(trainer) \
+               if not callable(getattr(trainer, attr)) and not attr.startswith("__") and ('loss' in attr or 'grad' in attr or 'nwd' in attr)]
+    for m in members:
+        train_writer.add_scalar(m, getattr(trainer, m), iterations + 1)
+def slerp(val, low, high):
+    """
+    original: Animating Rotation with Quaternion Curves, Ken Shoemake
+    https://arxiv.org/abs/1609.04468
+    Code: https://github.com/soumith/dcgan.torch/issues/14, Tom White
+    """
+    omega = np.arccos(np.dot(low / np.linalg.norm(low), high / np.linalg.norm(high)))
+    so = np.sin(omega)
+    return np.sin((1.0 - val) * omega) / so * low + np.sin(val * omega) / so * high
+def get_slerp_interp(nb_latents, nb_interp, z_dim):
+    """
+    modified from: PyTorch inference for "Progressive Growing of GANs" with CelebA snapshot
+    https://github.com/ptrblck/prog_gans_pytorch_inference
+    """
+    latent_interps = np.empty(shape=(0, z_dim), dtype=np.float32)
+    for _ in range(nb_latents):
+        low = np.random.randn(z_dim)
+        high = np.random.randn(z_dim)  # low + np.random.randn(512) * 0.7
+        interp_vals = np.linspace(0, 1, num=nb_interp)
+        latent_interp = np.array([slerp(v, low, high) for v in interp_vals],
+                                 dtype=np.float32)
+        latent_interps = np.vstack((latent_interps, latent_interp))
+    return latent_interps[:, :, np.newaxis, np.newaxis]
+# Get model list for resume
+def get_model_list(dirname, key):
+    if os.path.exists(dirname) is False:
+        return None
+    gen_models = [os.path.join(dirname, f) for f in os.listdir(dirname) if
+                  os.path.isfile(os.path.join(dirname, f)) and key in f and ".pt" in f]
+    if gen_models is None:
+        return None
+    gen_models.sort()
+    last_model_name = gen_models[-1]
+    return last_model_name
+def get_scheduler(optimizer, hyperparameters, iterations=-1):
+    if 'lr_policy' not in hyperparameters or hyperparameters['lr_policy'] == 'constant':
+        scheduler = None # constant scheduler
+    elif hyperparameters['lr_policy'] == 'step':
+        scheduler = lr_scheduler.StepLR(optimizer, step_size=hyperparameters['step_size'],
+                                        gamma=hyperparameters['gamma'], last_epoch=iterations)
+    else:
+        return NotImplementedError('learning rate policy [%s] is not implemented', hyperparameters['lr_policy'])
+    return scheduler
+def weights_init(init_type='gaussian'):
+    def init_fun(m):
+        classname = m.__class__.__name__
+        if (classname.find('Conv') == 0 or classname.find('Linear') == 0) and hasattr(m, 'weight'):
+            # print m.__class__.__name__
+            if init_type == 'gaussian':
+                init.normal_(m.weight.data, 0.0, 0.02)
+            elif init_type == 'xavier':
+                init.xavier_normal_(m.weight.data, gain=math.sqrt(2))
+            elif init_type == 'kaiming':
+                init.kaiming_normal_(m.weight.data, a=0, mode='fan_in')
+            elif init_type == 'orthogonal':
+                init.orthogonal_(m.weight.data, gain=math.sqrt(2))
+            elif init_type == 'default':
+                pass
+            else:
+                assert 0, "Unsupported initialization: {}".format(init_type)
+            if hasattr(m, 'bias') and m.bias is not None:
+                init.constant_(m.bias.data, 0.0)
+    return init_fun
+class Timer:
+    def __init__(self, msg):
+        self.msg = msg
+        self.start_time = None
+    def __enter__(self):
+        self.start_time = time.time()
+    def __exit__(self, exc_type, exc_value, exc_tb):
+        print(self.msg % (time.time() - self.start_time))
+class TrainClock(object):
+    def __init__(self):
+        self.epoch = 1
+        self.minibatch = 0
+        self.step = 0
+    def tick(self):
+        self.minibatch += 1
+        self.step += 1
+    def tock(self):
+        self.epoch += 1
+        self.minibatch = 0
+    def make_checkpoint(self):
+        return {
+            'epoch': self.epoch,
+            'minibatch': self.minibatch,
+            'step': self.step
+        }
+    def restore_checkpoint(self, clock_dict):
+        self.epoch = clock_dict['epoch']
+        self.minibatch = clock_dict['minibatch']
+        self.step = clock_dict['step']
+class Table(object):
+    def __init__(self, filename):
+        '''
+        create a table to record experiment results that can be opened by excel
+        :param filename: using '.csv' as postfix
+        '''
+        assert '.csv' in filename
+        self.filename = filename
+    @staticmethod
+    def merge_headers(header1, header2):
+        #return list(set(header1 + header2))
+        if len(header1) > len(header2):
+            return header1
+        else:
+            return header2
+    def write(self, ordered_dict):
+        '''
+        write an entry
+        :param ordered_dict: something like {'name':'exp1', 'acc':90.5, 'epoch':50}
+        :return:
+        '''
+        if os.path.exists(self.filename) == False:
+            headers = list(ordered_dict.keys())
+            prev_rec = None
+        else:
+            with open(self.filename) as f:
+                reader = csv.DictReader(f)
+                headers = reader.fieldnames
+                prev_rec = [row for row in reader]
+            headers = self.merge_headers(headers, list(ordered_dict.keys()))
+        with open(self.filename, 'w', newline='') as f:
+            writer = csv.DictWriter(f, headers)
+            writer.writeheader()
+            if not prev_rec == None:
+                writer.writerows(prev_rec)
+            writer.writerow(ordered_dict)
+class WorklogLogger:
+    def __init__(self, log_file):
+        logging.basicConfig(filename=log_file,
+                            level=logging.DEBUG,
+                            format='%(asctime)s - %(threadName)s -  %(levelname)s - %(message)s')
+        self.logger = logging.getLogger()
+    def put_line(self, line):
+        self.logger.info(line)
+class AverageMeter(object):
+    """Computes and stores the average and current value"""
+    def __init__(self, name):
+        self.name = name
+        self.reset()
+    def reset(self):
+        self.val = 0
+        self.avg = 0
+        self.sum = 0
+        self.count = 0
+    def update(self, val, n=1):
+        self.val = val
+        self.sum += val * n
+        self.count += n
+        self.avg = self.sum / self.count
+def save_args(args, save_dir):
+    param_path = os.path.join(save_dir, 'params.json')
+    with open(param_path, 'w') as fp:
+        json.dump(args.__dict__, fp, indent=4, sort_keys=True)
+def ensure_dir(path):
+    """
+    create path by first checking its existence,
+    :param paths: path
+    :return:
+    """
+    if not os.path.exists(path):
+        os.makedirs(path)
+def ensure_dirs(paths):
+    """
+    create paths by first checking their existence
+    :param paths: list of path
+    :return:
+    """
+    if isinstance(paths, list) and not isinstance(paths, str):
+        for path in paths:
+            ensure_dir(path)
+    else:
+        ensure_dir(paths)
+def remkdir(path):
+    """
+    if dir exists, remove it and create a new one
+    :param path:
+    :return:
+    """
+    if os.path.exists(path):
+        shutil.rmtree(path)
+    os.makedirs(path)
+def cycle(iterable):
+    while True:
+        for x in iterable:
+            yield x
+def save_image(image_numpy, image_path):
+    image_pil = Image.fromarray(image_numpy)
+    image_pil.save(image_path)
+def pad_to_16x(x):
+    if x % 16 > 0:
+        return x - x % 16 + 16
+    return x
+def pad_to_height(tar_height, img_height, img_width):
+    scale = tar_height / img_height
+    h = pad_to_16x(tar_height)
+    w = pad_to_16x(int(img_width * scale))
+    return h, w, scale
+def to_gpu(data):
+    for key, item in data.items():
+        if torch.is_tensor(item):
+            data[key] = item.cuda()
+    return data

lib/util/global_norm.py ADDED Viewed

	@@ -0,0 +1,29 @@

+import numpy as np
+def get_box(pose):
+    #input: pose([15,2])
+    return(np.min(pose[:,0]), np.max(pose[:,0]), np.min(pose[:,1]), np.max(pose[:,1]))
+def get_height(pose):
+    #input: pose([15,2])
+    mean_ankle = (pose[14]+pose[11])/2
+    nose = pose[0]
+    return np.linalg.norm(mean_ankle-nose)
+def get_base_mean(pose):
+    #input: pose([15,2])
+    x1, x2, y1, y2 = get_box(pose)
+    return np.array([(x1+x2)/2, y2])
+def global_norm(driving_npy, target_npy):
+    #input: pose([15,2,frame1]), pose([15,2,frame2])
+    target_mean = np.mean(target_npy, axis=2)
+    driving_mean = np.mean(driving_npy, axis=2)
+    k2 = get_height(target_mean)/get_height(driving_mean)
+    target_mean_base = get_base_mean(target_mean)
+    driving_mean_base = get_base_mean(driving_mean)
+    driving_npy_permuted = np.transpose(driving_npy, axes=[2, 0, 1])
+    k = [1, k2]
+    normalized_permuted = (driving_npy_permuted-driving_mean_base)*k+target_mean_base
+    normalized = np.transpose(normalized_permuted, axes=[1,2,0])
+    return normalized # pose([15,2,frame1])

lib/util/motion.py ADDED Viewed

	@@ -0,0 +1,309 @@

+from scipy.ndimage import gaussian_filter1d
+import numpy as np
+import json
+import os
+import torch
+def preprocess_test(motion, meanpose, stdpose, unit=128):
+    motion = motion * unit
+    motion[1, :, :] = (motion[2, :, :] + motion[5, :, :]) / 2
+    motion[8, :, :] = (motion[9, :, :] + motion[12, :, :]) / 2
+    start = motion[8, :, 0]
+    motion = localize_motion(motion)
+    motion = normalize_motion(motion, meanpose, stdpose)
+    return motion, start
+def postprocess(motion, meanpose, stdpose, unit=128, start=None):
+    motion = motion.detach().cpu().numpy()[0].reshape(-1, 2, motion.shape[-1])
+    motion = normalize_motion_inv(motion, meanpose, stdpose)
+    motion = globalize_motion(motion, start=start)
+    # motion = motion / unit
+    return motion
+def preprocess_mixamo(motion, unit=128):
+    _, D, _ = motion.shape
+    horizontal_dim = 0
+    vertical_dim = D - 1
+    motion[1, :, :] = (motion[2, :, :] + motion[5, :, :]) / 2
+    motion[8, :, :] = (motion[9, :, :] + motion[12, :, :]) / 2
+    # rotate 180
+    motion[:, horizontal_dim, :] = - motion[:, horizontal_dim, :]
+    motion[:, vertical_dim, :] = - motion[:, vertical_dim, :]
+    motion = motion * unit
+    return motion
+def rotate_motion_3d(motion3d, change_of_basis):
+    if change_of_basis is not None: motion3d = change_of_basis @ motion3d
+    return motion3d
+def limb_scale_motion_2d(motion2d, global_range, local_range):
+    global_scale = global_range[0] + np.random.random() * (global_range[1] - global_range[0])
+    local_scales = local_range[0] + np.random.random([8]) * (local_range[1] - local_range[0])
+    motion_scale = scale_limbs(motion2d, global_scale, local_scales)
+    return motion_scale
+def localize_motion(motion):
+    """
+    Motion fed into our network is the local motion, i.e. coordinates relative to the hip joint.
+    This function removes global motion of the hip joint, and instead represents global motion with velocity
+    """
+    D = motion.shape[1]
+    # subtract centers to local coordinates
+    centers = motion[8, :, :] # N_dim x T
+    motion = motion - centers
+    # adding velocity
+    translation = centers[:, 1:] - centers[:, :-1]
+    velocity = np.c_[np.zeros((D, 1)), translation]
+    velocity = velocity.reshape(1, D, -1)
+    motion = np.r_[motion[:8], motion[9:], velocity]
+    # motion_proj = np.r_[motion_proj[:8], motion_proj[9:]]
+    return motion
+def globalize_motion(motion, start=None, velocity=None):
+    """
+    inverse process of localize_motion
+    """
+    if velocity is None: velocity = motion[-1].copy()
+    motion_inv = np.r_[motion[:8], np.zeros((1, 2, motion.shape[-1])), motion[8:-1]]
+    # restore centre position
+    centers = np.zeros_like(velocity)
+    sum = 0
+    for i in range(motion.shape[-1]):
+        sum += velocity[:, i]
+        centers[:, i] = sum
+    centers += start.reshape([2, 1])
+    return motion_inv + centers.reshape((1, 2, -1))
+def normalize_motion(motion, meanpose, stdpose):
+    """
+    :param motion: (J, 2, T)
+    :param meanpose: (J, 2)
+    :param stdpose: (J, 2)
+    :return:
+    """
+    if motion.shape[1] == 2 and meanpose.shape[1] == 3:
+        meanpose = meanpose[:, [0, 2]]
+    if motion.shape[1] == 2 and stdpose.shape[1] == 3:
+        stdpose = stdpose[:, [0, 2]]
+    return (motion - meanpose[:, :, np.newaxis]) / stdpose[:, :, np.newaxis]
+def normalize_motion_inv(motion, meanpose, stdpose):
+    if motion.shape[1] == 2 and meanpose.shape[1] == 3:
+        meanpose = meanpose[:, [0, 2]]
+    if motion.shape[1] == 2 and stdpose.shape[1] == 3:
+        stdpose = stdpose[:, [0, 2]]
+    return motion * stdpose[:, :, np.newaxis] + meanpose[:, :, np.newaxis]
+def get_change_of_basis(motion3d, angles=None):
+    """
+    Get the unit vectors for local rectangular coordinates for given 3D motion
+    :param motion3d: numpy array. 3D motion from 3D joints positions, shape (nr_joints, 3, nr_frames).
+    :param angles: tuple of length 3. Rotation angles around each axis.
+    :return: numpy array. unit vectors for local rectangular coordinates's , shape (3, 3).
+    """
+    # 2 RightArm 5 LeftArm 9 RightUpLeg 12 LeftUpLeg
+    horizontal = (motion3d[2] - motion3d[5] + motion3d[9] - motion3d[12]) / 2
+    horizontal = np.mean(horizontal, axis=1)
+    horizontal = horizontal / np.linalg.norm(horizontal)
+    local_z = np.array([0, 0, 1])
+    local_y = np.cross(horizontal, local_z)  # bugs!!!, horizontal and local_Z may not be perpendicular
+    local_y = local_y / np.linalg.norm(local_y)
+    local_x = np.cross(local_y, local_z)
+    local = np.stack([local_x, local_y, local_z], axis=0)
+    if angles is not None:
+        local = rotate_basis(local, angles)
+    return local
+def rotate_basis(local3d, angles):
+    """
+    Rotate local rectangular coordinates from given view_angles.
+    :param local3d: numpy array. Unit vectors for local rectangular coordinates's , shape (3, 3).
+    :param angles: tuple of length 3. Rotation angles around each axis.
+    :return:
+    """
+    cx, cy, cz = np.cos(angles)
+    sx, sy, sz = np.sin(angles)
+    x = local3d[0]
+    x_cpm = np.array([
+        [0, -x[2], x[1]],
+        [x[2], 0, -x[0]],
+        [-x[1], x[0], 0]
+    ], dtype='float')
+    x = x.reshape(-1, 1)
+    mat33_x = cx * np.eye(3) + sx * x_cpm + (1.0 - cx) * np.matmul(x, x.T)
+    mat33_z = np.array([
+        [cz, sz, 0],
+        [-sz, cz, 0],
+        [0, 0, 1]
+    ], dtype='float')
+    local3d = local3d @ mat33_x.T @ mat33_z
+    return local3d
+def get_foot_vel(batch_motion, foot_idx):
+    return batch_motion[:, foot_idx, 1:] - batch_motion[:, foot_idx, :-1] + batch_motion[:, -2:, 1:].repeat(1, 2, 1)
+def get_limbs(motion):
+    J, D, T = motion.shape
+    limbs = np.zeros([14, D, T])
+    limbs[0] = motion[0] - motion[1] # neck
+    limbs[1] = motion[2] - motion[1] # r_shoulder
+    limbs[2] = motion[3] - motion[2] # r_arm
+    limbs[3] = motion[4] - motion[3] # r_forearm
+    limbs[4] = motion[5] - motion[1] # l_shoulder
+    limbs[5] = motion[6] - motion[5] # l_arm
+    limbs[6] = motion[7] - motion[6] # l_forearm
+    limbs[7] = motion[1] - motion[8] # spine
+    limbs[8] = motion[9] - motion[8] # r_pelvis
+    limbs[9] = motion[10] - motion[9] # r_thigh
+    limbs[10] = motion[11] - motion[10] # r_shin
+    limbs[11] = motion[12] - motion[8] # l_pelvis
+    limbs[12] = motion[13] - motion[12] # l_thigh
+    limbs[13] = motion[14] - motion[13] # l_shin
+    return limbs
+def scale_limbs(motion, global_scale, local_scales):
+    """
+    :param motion: joint sequence [J, 2, T]
+    :param local_scales: 8 numbers of scales
+    :return: scaled joint sequence
+    """
+    limb_dependents = [
+        [0],
+        [2, 3, 4],
+        [3, 4],
+        [4],
+        [5, 6, 7],
+        [6, 7],
+        [7],
+        [0, 1, 2, 3, 4, 5, 6, 7],
+        [9, 10, 11],
+        [10, 11],
+        [11],
+        [12, 13, 14],
+        [13, 14],
+        [14]
+    ]
+    limbs = get_limbs(motion)
+    scaled_limbs = limbs.copy() * global_scale
+    scaled_limbs[0] *= local_scales[0]
+    scaled_limbs[1] *= local_scales[1]
+    scaled_limbs[2] *= local_scales[2]
+    scaled_limbs[3] *= local_scales[3]
+    scaled_limbs[4] *= local_scales[1]
+    scaled_limbs[5] *= local_scales[2]
+    scaled_limbs[6] *= local_scales[3]
+    scaled_limbs[7] *= local_scales[4]
+    scaled_limbs[8] *= local_scales[5]
+    scaled_limbs[9] *= local_scales[6]
+    scaled_limbs[10] *= local_scales[7]
+    scaled_limbs[11] *= local_scales[5]
+    scaled_limbs[12] *= local_scales[6]
+    scaled_limbs[13] *= local_scales[7]
+    delta = scaled_limbs - limbs
+    scaled_motion = motion.copy()
+    scaled_motion[limb_dependents[7]] += delta[7] # spine
+    scaled_motion[limb_dependents[1]] += delta[1] # r_shoulder
+    scaled_motion[limb_dependents[4]] += delta[4] # l_shoulder
+    scaled_motion[limb_dependents[2]] += delta[2] # r_arm
+    scaled_motion[limb_dependents[5]] += delta[5] # l_arm
+    scaled_motion[limb_dependents[3]] += delta[3] # r_forearm
+    scaled_motion[limb_dependents[6]] += delta[6] # l_forearm
+    scaled_motion[limb_dependents[0]] += delta[0] # neck
+    scaled_motion[limb_dependents[8]] += delta[8] # r_pelvis
+    scaled_motion[limb_dependents[11]] += delta[11] # l_pelvis
+    scaled_motion[limb_dependents[9]] += delta[9]  # r_thigh
+    scaled_motion[limb_dependents[12]] += delta[12]  # l_thigh
+    scaled_motion[limb_dependents[10]] += delta[10]  # r_shin
+    scaled_motion[limb_dependents[13]] += delta[13]  # l_shin
+    return scaled_motion
+def get_limb_lengths(x):
+    _, dims, _ = x.shape
+    if dims == 2:
+        limbs = np.max(np.linalg.norm(get_limbs(x), axis=1), axis=-1)
+        limb_lengths = np.array([
+            limbs[0],                  # neck
+            max(limbs[1], limbs[4]),   # shoulders
+            max(limbs[2], limbs[5]),   # arms
+            max(limbs[3], limbs[6]),   # forearms
+            limbs[7],                  # spine
+            max(limbs[8], limbs[11]),  # pelvis
+            max(limbs[9], limbs[12]),  # thighs
+            max(limbs[10], limbs[13])  # shins
+        ])
+    else:
+        limbs = np.mean(np.linalg.norm(get_limbs(x), axis=1), axis=-1)
+        limb_lengths = np.array([
+            limbs[0],                     # neck
+            (limbs[1] + limbs[4]) / 2.,   # shoulders
+            (limbs[2] + limbs[5]) / 2.,   # arms
+            (limbs[3] + limbs[6]) / 2.,   # forearms
+            limbs[7],                     # spine
+            (limbs[8] + limbs[11]) / 2.,  # pelvis
+            (limbs[9] + limbs[12]) / 2.,  # thighs
+            (limbs[10] + limbs[13]) / 2.  # shins
+        ])
+    return limb_lengths
+def limb_norm(x_a, x_b):
+    limb_lengths_a = get_limb_lengths(x_a)
+    limb_lengths_b = get_limb_lengths(x_b)
+    limb_lengths_a[limb_lengths_a < 1e-3] = 1e-3
+    local_scales = limb_lengths_b / limb_lengths_a
+    x_ab = scale_limbs(x_a, global_scale=1.0, local_scales=local_scales)
+    return x_ab

lib/util/visualization.py ADDED Viewed

	@@ -0,0 +1,448 @@

+import numpy as np
+import os
+import cv2
+import math
+import imageio
+from tqdm import tqdm
+from PIL import Image
+from lib.util.motion import normalize_motion_inv, globalize_motion
+from lib.util.general import ensure_dir
+from threading import Thread, Lock
+def interpolate_color(color1, color2, alpha):
+    color_i = alpha * np.array(color1) + (1 - alpha) * np.array(color2)
+    return color_i.tolist()
+def two_pts_to_rectangle(point1, point2):
+    X = [point1[1], point2[1]]
+    Y = [point1[0], point2[0]]
+    length = ((X[0] - X[1]) ** 2 + (Y[0] - Y[1]) ** 2) ** 0.5
+    length = 5
+    alpha = math.degrees(math.atan2(X[0] - X[1], Y[0] - Y[1]))
+    beta = alpha - 90
+    if beta <= -180:
+        beta += 360
+    p1 = (   int(point1[0] - length*math.cos(math.radians(beta)))    ,   int(point1[1] - length*math.sin(math.radians(beta)))   )
+    p2 = (   int(point1[0] + length*math.cos(math.radians(beta)))    ,   int(point1[1] + length*math.sin(math.radians(beta)))   )
+    p3 = (   int(point2[0] + length*math.cos(math.radians(beta)))    ,   int(point2[1] + length*math.sin(math.radians(beta)))   )
+    p4 = (   int(point2[0] - length*math.cos(math.radians(beta)))    ,   int(point2[1] - length*math.sin(math.radians(beta)))   )
+    return [p1,p2,p3,p4]
+def rgb2rgba(color):
+    return (color[0], color[1], color[2], 255)
+def hex2rgb(hex, number_of_colors=3):
+    h = hex
+    rgb = []
+    for i in range(number_of_colors):
+        h = h.lstrip('#')
+        hex_color = h[0:6]
+        rgb_color = [int(hex_color[i:i+2], 16) for i in (0, 2 ,4)]
+        rgb.append(rgb_color)
+        h = h[6:]
+    return rgb
+def normalize_joints(joints_position, H=512, W=512):
+    # 找出关节坐标的最大值和最小值
+    min_x, min_y = np.min(joints_position, axis=0)
+    max_x, max_y = np.max(joints_position, axis=0)
+    # 计算关节坐标的范围
+    range_x, range_y = max_x - min_x, max_y - min_y
+    # 设定一个缩放的边界保护值，防止关节坐标在缩放后超出画布
+    buffer = 0.05  # 例如 5% 的边界保护
+    scale_x, scale_y = (1 - buffer) * W / range_x, (1 - buffer) * H / range_y
+    # 使用较小的缩放比例来保证所有关节都能适合画布
+    scale = min(scale_x, scale_y)
+    # 缩放关节坐标
+    joints_position_scaled = (joints_position - np.array([min_x, min_y])) * scale
+    # 计算缩放后关节坐标的新边界
+    new_min_x, new_min_y = np.min(joints_position_scaled, axis=0)
+    new_max_x, new_max_y = np.max(joints_position_scaled, axis=0)
+    # 计算平移量，将关节移到画布中心
+    translate_x = (W - (new_max_x - new_min_x)) / 2 - new_min_x
+    translate_y = (H - (new_max_y - new_min_y)) / 2 - new_min_y
+    # 平移关节坐标
+    joints_position_normalized = joints_position_scaled + np.array([translate_x, translate_y])
+    return joints_position_normalized
+def joints2image(joints_position, colors, transparency=False, H=512, W=512, nr_joints=15, imtype=np.uint8, grayscale=False, bg_color=(255, 255, 255)):
+    nr_joints = joints_position.shape[0]
+    joints_position=normalize_joints(joints_position)
+    if nr_joints == 49: # full joints(49): basic(15) + eyes(2) + toes(2) + hands(30)
+        limbSeq = [[0, 1], [1, 2], [1, 5], [1, 8], [2, 3], [3, 4], [5, 6], [6, 7], \
+                   [8, 9], [8, 13], [9, 10], [10, 11], [11, 12], [13, 14], [14, 15], [15, 16],
+                   ]#[0, 17], [0, 18]] #ignore eyes
+        L = rgb2rgba(colors[0]) if transparency else colors[0]
+        M = rgb2rgba(colors[1]) if transparency else colors[1]
+        R = rgb2rgba(colors[2]) if transparency else colors[2]
+        colors_joints = [M, M, L, L, L, R, R,
+                  R, M, L, L, L, L, R, R, R,
+                  R, R, L] + [L] * 15 + [R] * 15
+        colors_limbs = [M, L, R, M, L, L, R,
+                  R, L, R, L, L, L, R, R, R,
+                  R, R]
+    elif nr_joints == 15 or nr_joints == 17: # basic joints(15) + (eyes(2))
+        limbSeq = [[0, 1], [1, 2], [1, 5], [1, 8], [2, 3], [3, 4], [5, 6], [6, 7],
+                   [8, 9], [8, 12], [9, 10], [10, 11], [12, 13], [13, 14]]
+                    # [0, 15], [0, 16] two eyes are not drawn
+        L = rgb2rgba(colors[0]) if transparency else colors[0]
+        M = rgb2rgba(colors[1]) if transparency else colors[1]
+        R = rgb2rgba(colors[2]) if transparency else colors[2]
+        colors_joints = [M, M, L, L, L, R, R,
+                         R, M, L, L, L, R, R, R]
+        colors_limbs = [M, L, R, M, L, L, R,
+                        R, L, R, L, L, R, R]
+    else:
+        raise ValueError("Only support number of joints be 49 or 17 or 15")
+    if transparency:
+        canvas = np.zeros(shape=(H, W, 4))
+    else:
+        canvas = np.ones(shape=(H, W, 3)) * np.array(bg_color).reshape([1, 1, 3])
+    hips = joints_position[8]
+    neck = joints_position[1]
+    torso_length = ((hips[1] - neck[1]) ** 2 + (hips[0] - neck[0]) ** 2) ** 0.5
+    head_radius = int(torso_length/4.5)
+    end_effectors_radius = int(torso_length/15)
+    end_effectors_radius = 7
+    joints_radius = 7
+    # joints_position[0][0]*=200
+    # joints_position[0][1]*=200
+    cv2.circle(canvas, (int(joints_position[0][0]),int(joints_position[0][1])), head_radius, colors_joints[0], thickness=-1)
+    for i in range(1, len(colors_joints)):
+        # print(joints_position[i][0])
+        # joints_position[i][0]*=200
+        # joints_position[i][1]*=200
+        # print(joints_position[i][1])
+        if i in (17, 18):
+            continue
+        elif i > 18:
+            radius = 2
+        else:
+            radius = joints_radius
+        cv2.circle(canvas, (int(joints_position[i][0]),int(joints_position[i][1])), radius, colors_joints[i], thickness=-1)
+    stickwidth = 2
+    for i in range(len(limbSeq)):
+        limb = limbSeq[i]
+        cur_canvas = canvas.copy()
+        point1_index = limb[0]
+        point2_index = limb[1]
+        #if len(all_peaks[point1_index]) > 0 and len(all_peaks[point2_index]) > 0:
+        point1 = joints_position[point1_index]
+        point2 = joints_position[point2_index]
+        X = [point1[1], point2[1]]
+        Y = [point1[0], point2[0]]
+        mX = np.mean(X)
+        mY = np.mean(Y)
+        length = ((X[0] - X[1]) ** 2 + (Y[0] - Y[1]) ** 2) ** 0.5
+        alpha = math.degrees(math.atan2(X[0] - X[1], Y[0] - Y[1]))
+        polygon = cv2.ellipse2Poly((int(mY), int(mX)), (int(length / 2), stickwidth), int(alpha), 0, 360, 1)
+        cv2.fillConvexPoly(cur_canvas, polygon, colors_limbs[i])
+        canvas = cv2.addWeighted(canvas, 0.4, cur_canvas, 0.6, 0)
+        bb = bounding_box(canvas)
+        canvas_cropped = canvas[:,bb[2]:bb[3], :]
+    canvas = canvas.astype(imtype)
+    canvas_cropped = canvas_cropped.astype(imtype)
+    if grayscale:
+        if transparency:
+            canvas = cv2.cvtColor(canvas, cv2.COLOR_RGBA2GRAY)
+            canvas_cropped = cv2.cvtColor(canvas_cropped, cv2.COLOR_RGBA2GRAY)
+        else:
+            canvas = cv2.cvtColor(canvas, cv2.COLOR_RGB2GRAY)
+            canvas_cropped = cv2.cvtColor(canvas_cropped, cv2.COLOR_RGB2GRAY)
+    return [canvas, canvas_cropped]
+def joints2image_highlight(joints_position, colors, highlights, transparency=False, H=512, W=512, nr_joints=15, imtype=np.uint8, grayscale=False):
+    nr_joints = joints_position.shape[0]
+    limbSeq = [[0, 1], [1, 2], [1, 5], [1, 8], [2, 3], [3, 4], [5, 6], [6, 7],
+               [8, 9], [8, 12], [9, 10], [10, 11], [12, 13], [13, 14]]
+                # [0, 15], [0, 16] two eyes are not drawn
+    L = rgb2rgba(colors[0]) if transparency else colors[0]
+    M = rgb2rgba(colors[1]) if transparency else colors[1]
+    R = rgb2rgba(colors[2]) if transparency else colors[2]
+    Hi = rgb2rgba(colors[3]) if transparency else colors[3]
+    colors_joints = [M, M, L, L, L, R, R,
+                     R, M, L, L, L, R, R, R]
+    colors_limbs = [M, L, R, M, L, L, R,
+                    R, L, R, L, L, R, R]
+    for hi in highlights: colors_limbs[hi] = Hi
+    if transparency:
+        canvas = np.zeros(shape=(H, W, 4))
+    else:
+        canvas = np.ones(shape=(H, W, 3)) * 255
+    hips = joints_position[8]
+    neck = joints_position[1]
+    torso_length = ((hips[1] - neck[1]) ** 2 + (hips[0] - neck[0]) ** 2) ** 0.5
+    head_radius = int(torso_length/4.5)
+    end_effectors_radius = int(torso_length/15)
+    end_effectors_radius = 7
+    joints_radius = 7
+    cv2.circle(canvas, (int(joints_position[0][0]*500),int(joints_position[0][1]*500)), head_radius, colors_joints[0], thickness=-1)
+    for i in range(1, len(colors_joints)):
+        if i in (17, 18):
+            continue
+        elif i > 18:
+            radius = 2
+        else:
+            radius = joints_radius
+        cv2.circle(canvas, (int(joints_position[i][0]*500),int(joints_position[i][1]*500)), radius, colors_joints[i], thickness=-1)
+    stickwidth = 2
+    for i in range(len(limbSeq)):
+        limb = limbSeq[i]
+        cur_canvas = canvas.copy()
+        point1_index = limb[0]
+        point2_index = limb[1]
+        #if len(all_peaks[point1_index]) > 0 and len(all_peaks[point2_index]) > 0:
+        point1 = joints_position[point1_index]
+        point2 = joints_position[point2_index]
+        X = [point1[1], point2[1]]
+        Y = [point1[0], point2[0]]
+        mX = np.mean(X)
+        mY = np.mean(Y)
+        length = ((X[0] - X[1]) ** 2 + (Y[0] - Y[1]) ** 2) ** 0.5
+        alpha = math.degrees(math.atan2(X[0] - X[1], Y[0] - Y[1]))
+        polygon = cv2.ellipse2Poly((int(mY), int(mX)), (int(length / 2), stickwidth), int(alpha), 0, 360, 1)
+        cv2.fillConvexPoly(cur_canvas, polygon, colors_limbs[i])
+        canvas = cv2.addWeighted(canvas, 0.4, cur_canvas, 0.6, 0)
+        bb = bounding_box(canvas)
+        canvas_cropped = canvas[:,bb[2]:bb[3], :]
+    canvas = canvas.astype(imtype)
+    canvas_cropped = canvas_cropped.astype(imtype)
+    if grayscale:
+        if transparency:
+            canvas = cv2.cvtColor(canvas, cv2.COLOR_RGBA2GRAY)
+            canvas_cropped = cv2.cvtColor(canvas_cropped, cv2.COLOR_RGBA2GRAY)
+        else:
+            canvas = cv2.cvtColor(canvas, cv2.COLOR_RGB2GRAY)
+            canvas_cropped = cv2.cvtColor(canvas_cropped, cv2.COLOR_RGB2GRAY)
+    return [canvas, canvas_cropped]
+def motion2video(motion, h, w, save_path, colors, bg_color=(255, 255, 255), transparency=False, motion_tgt=None, fps=25, save_frame=True, grayscale=False, show_progress=True):
+    nr_joints = motion.shape[0]
+    as_array = save_path.endswith(".npy")
+    vlen = motion.shape[-1]
+    out_array = np.zeros([h, w, vlen]) if as_array else None
+    videowriter = None if as_array else imageio.get_writer(save_path, fps=fps, codec='libx264')
+    if save_frame:
+        frames_dir = save_path[:-4] + '-frames'
+        ensure_dir(frames_dir)
+    iterator = range(vlen)
+    if show_progress: iterator = tqdm(iterator)
+    for i in iterator:
+        [img, img_cropped] = joints2image(motion[:, :, i], colors, transparency=transparency, bg_color=bg_color, H=h, W=w, nr_joints=nr_joints, grayscale=grayscale)
+        if motion_tgt is not None:
+            [img_tgt, img_tgt_cropped] = joints2image(motion_tgt[:, :, i], colors, transparency=transparency, bg_color=bg_color, H=h, W=w, nr_joints=nr_joints, grayscale=grayscale)
+            img_ori = img.copy()
+            img = cv2.addWeighted(img_tgt, 0.3, img_ori, 0.7, 0)
+            img_cropped = cv2.addWeighted(img_tgt, 0.3, img_ori, 0.7, 0)
+            bb = bounding_box(img_cropped)
+            img_cropped = img_cropped[:, bb[2]:bb[3], :]
+        if save_frame:
+            save_image(img_cropped, os.path.join(frames_dir, "%04d.png" % i))
+        if as_array: out_array[:, :, i] = img
+        #else: videowriter.append_data(img)
+    if as_array: np.save(save_path, out_array)
+    else: videowriter.close()
+    return out_array
+def motion2video_np(motion, h, w, colors, bg_color=(255, 255, 255), transparency=False, motion_tgt=None, show_progress=True, workers=6):
+    nr_joints = motion.shape[0]
+    vlen = motion.shape[-1]
+    out_array = np.zeros([vlen, h, w , 3])
+    queue = [i for i in range(vlen)]
+    lock = Lock()
+    pbar = tqdm(total=vlen) if show_progress else None
+    class Worker(Thread):
+        def __init__(self):
+            super(Worker, self).__init__()
+        def run(self):
+            while True:
+                lock.acquire()
+                if len(queue) == 0:
+                    lock.release()
+                    break
+                else:
+                    i = queue.pop(0)
+                    lock.release()
+                    [img, img_cropped] = joints2image(motion[:, :, i], colors, transparency=transparency, bg_color=bg_color, H=h, W=w, nr_joints=nr_joints, grayscale=False)
+                    if motion_tgt is not None:
+                        [img_tgt, img_tgt_cropped] = joints2image(motion_tgt[:, :, i], colors, transparency=transparency, H=h, W=w, nr_joints=nr_joints, grayscale=False)
+                        img_ori = img.copy()
+                        img = cv2.addWeighted(img_tgt, 0.3, img_ori, 0.7, 0)
+                        # img_cropped = cv2.addWeighted(img_tgt, 0.3, img_ori, 0.7, 0)
+                        # bb = bounding_box(img_cropped)
+                        # img_cropped = img_cropped[:, bb[2]:bb[3], :]
+                    out_array[i, :, :] = img
+                    if show_progress: pbar.update(1)
+    pool = [Worker() for _ in range(workers)]
+    for worker in pool: worker.start()
+    for worker in pool: worker.join()
+    for worker in pool: del worker
+    return out_array
+def save_image(image_numpy, image_path):
+    image_pil = Image.fromarray(image_numpy)
+    image_pil.save(image_path)
+def bounding_box(img):
+    a = np.where(img != 0)
+    bbox = np.min(a[0]), np.max(a[0]), np.min(a[1]), np.max(a[1])
+    return bbox
+def pose2im_all(all_peaks, H=512, W=512):
+    limbSeq = [[1, 2], [2, 3], [3, 4],                       # right arm
+               [1, 5], [5, 6], [6, 7],                       # left arm
+               [8, 9], [9, 10], [10, 11],                    # right leg
+               [8, 12], [12, 13], [13, 14],                  # left leg
+               [1, 0],                                       # head/neck
+               [1, 8],                                       # body,
+               ]
+    limb_colors = [[0, 60, 255], [0, 120, 255], [0, 180, 255],
+                    [180, 255, 0], [120, 255, 0], [60, 255, 0],
+                    [170, 255, 0], [85, 255, 0], [0, 255, 0],
+                    [255, 170, 0], [255, 85, 0], [255, 0, 0],
+                    [0, 85, 255],
+                    [0, 0, 255],
+                   ]
+    joint_colors = [[85, 0, 255], [0, 0, 255], [0, 60, 255], [0, 120, 255], [0, 180, 255],
+                    [180, 255, 0], [120, 255, 0], [60, 255, 0], [0, 0, 255],
+                    [170, 255, 0], [85, 255, 0], [0, 255, 0],
+                    [255, 170, 0], [255, 85, 0], [255, 0, 0],
+                    ]
+    image = pose2im(all_peaks, limbSeq, limb_colors, joint_colors, H, W)
+    return image
+def pose2im(all_peaks, limbSeq, limb_colors, joint_colors, H, W, _circle=True, _limb=True, imtype=np.uint8):
+    canvas = np.zeros(shape=(H, W, 3))
+    canvas.fill(255)
+    if _circle:
+        for i in range(len(joint_colors)):
+            cv2.circle(canvas, (int(all_peaks[i][0]), int(all_peaks[i][1])), 2, joint_colors[i], thickness=2)
+    if _limb:
+        stickwidth = 2
+        for i in range(len(limbSeq)):
+            limb = limbSeq[i]
+            cur_canvas = canvas.copy()
+            point1_index = limb[0]
+            point2_index = limb[1]
+            if len(all_peaks[point1_index]) > 0 and len(all_peaks[point2_index]) > 0:
+                point1 = all_peaks[point1_index][0:2]
+                point2 = all_peaks[point2_index][0:2]
+                X = [point1[1], point2[1]]
+                Y = [point1[0], point2[0]]
+                mX = np.mean(X)
+                mY = np.mean(Y)
+                # cv2.line()
+                length = ((X[0] - X[1]) ** 2 + (Y[0] - Y[1]) ** 2) ** 0.5
+                angle = math.degrees(math.atan2(X[0] - X[1], Y[0] - Y[1]))
+                polygon = cv2.ellipse2Poly((int(mY), int(mX)), (int(length / 2), stickwidth), int(angle), 0, 360, 1)
+                cv2.fillConvexPoly(cur_canvas, polygon, limb_colors[i])
+                canvas = cv2.addWeighted(canvas, 0.4, cur_canvas, 0.6, 0)
+    return canvas.astype(imtype)
+def visualize_motion_in_training(outputs, mean_pose, std_pose, nr_visual=4, H=512, W=512):
+    ret = {}
+    for k, out in outputs.items():
+        motion = out[0].detach().cpu().numpy()
+        inds = np.linspace(0, motion.shape[1] - 1, nr_visual, dtype=int)
+        motion = motion[:, inds]
+        motion = motion.reshape(-1, 2, motion.shape[-1])
+        motion = normalize_motion_inv(motion, mean_pose, std_pose)
+        peaks = globalize_motion(motion)
+        heatmaps = []
+        for i in range(peaks.shape[2]):
+            skeleton = pose2im_all(peaks[:, :, i], H, W)
+            heatmaps.append(skeleton)
+        heatmaps = np.stack(heatmaps).transpose((0, 3, 1, 2)) / 255.0
+        ret[k] = heatmaps
+    return ret
+if __name__ == '__main__':
+    # 加载.npy文件
+    motion_data = np.load('/home/fazhong/studio/transmomo.pytorch/out/retarget_1_121.npy')
+    # 设置视频参数
+    height = 512  # 视频的高度
+    width = 512   # 视频的宽度
+    save_path = 'Angry.mp4'  # 保存视频的路径
+    colors = [(255, 0, 0), (0, 255, 0), (0, 0, 255)]  # 关节颜色
+    bg_color = (255, 255, 255)  # 背景颜色
+    fps = 25  # 视频的帧率
+    # 调用函数生成视频
+    motion2video(motion_data, height, width, save_path, colors, bg_color=bg_color, transparency=False, fps=fps)

requirements.txt ADDED Viewed

	@@ -0,0 +1,17 @@

+gradio
+fastapi
+aiohttp
+easydict
+imageio-ffmpeg
+matplotlib
+numpy
+Pillow
+protobuf
+PyYAML
+scikit-image
+scikit-learn
+scipy
+tensorboardX
+torch>=1.2.0
+torchvision
+tqdm