Spaces:

kevinwang676
/

MuseV-test

No application file

File size: 14,919 Bytes

6755a2d

#from __future__ import absolute_import
import sys
import io
import os
sys.argv = ['GPT_eval_multi.py']

# 将项目根目录添加到sys.path中
PROJECT_ROOT = os.path.dirname(os.path.abspath(__file__))
sys.path.insert(1, PROJECT_ROOT)
CKPT_ROOT="/cfs-datasets/public_models/motion"

from .options import option_transformer as option_trans

import sys
print(sys.path[0])

import clip
import torch
import cv2
import numpy as np
from  .models import vqvae as vqvae
from  .models import t2m_trans as trans
import warnings
from  .visualization import plot_3d_global as plot_3d
import matplotlib.pyplot as plt
import numpy as np
import matplotlib.colors as mcolors
from tqdm import tqdm
from mpl_toolkits.mplot3d import Axes3D
from PIL import Image

import time
import random


warnings.filterwarnings('ignore')
from matplotlib.axes._axes import _log as matplotlib_axes_logger
matplotlib_axes_logger.setLevel('ERROR')

from math import cos,sin,radians

args = option_trans.get_args_parser()

args.dataname = 't2m'
args.resume_pth = os.path.join(CKPT_ROOT,'pretrained/VQVAE/net_last.pth')
args.resume_trans = os.path.join(CKPT_ROOT,'pretrained/VQTransformer_corruption05/net_best_fid.pth')
args.down_t = 2
args.depth = 3
args.block_size = 51

def replace_space_with_underscore(s):
    return s.replace(' ', '_')


def Rz(angle):
  theta=radians(angle)
  return np.array([[cos(theta), -sin(theta), 0],
             [sin(theta), cos(theta),  0],
             [0,          0,           1]])


def Rx(angle):
  theta=radians(angle)
  return np.array(
    [[1,   0,         0],
    [0 , cos(theta), -sin(theta)],
    [0,  sin(theta), cos(theta)]])

def generate_cuid():
    timestamp = hex(int(time.time() * 1000))[2:]
    random_str = hex(random.randint(0, 0xfffff))[2:]
    return (timestamp + random_str).zfill(10)

def smpl_to_openpose18(smpl_keypoints):
    '''
    22关键点SMPL对应关系解释 
    [0, 2, 5, 8, 11]
    这个列表表示SMPL模型中左腿的连接方式，从骨盆（0号关键点）开始，连接左大腿（2号关键点）、左小腿（5号关键点）、左脚（8号关键点）和左脚尖（11号关键点）。
    
    [0, 1, 4, 7, 10]
    这个列表表示SMPL模型中右腿的连接方式，从骨盆（0号关键点）开始，连接右大腿（1号关键点）、右小腿（4号关键点）、右脚（7号关键点）和右脚尖（10号关键点）。
    
    [0, 3, 6, 9, 12, 15]
    这个列表表示SMPL模型中躯干的连接方式，从骨盆（0号关键点）开始，连接脊柱（3号关键点）、颈部（6号关键点）、头部（9号关键点）、左肩膀（12号关键点）、右肩膀（15号关键点）。
    
    [9, 14, 17, 19, 21]
    这个列表表示SMPL模型中左臂的连接方式，从左肩膀（9号关键点）开始，连接左上臂（14号关键点）、左前臂（17号关键点）、左手腕（19号关键点）和左手（21号关键点）。
    
    [9, 13, 16, 18, 20]
    这个列表表示SMPL模型中右臂的连接方式，从右肩膀（9号关键点）开始，连接右上臂（13号关键点）、右前臂（16号关键点）、右手腕（18号关键点）和右手（20号关键点）。
    
    目前转Openpose忽略掉了SMPL的肩膀关键点
    '''
    openpose_keypoints = np.zeros((18, 3))
    openpose_keypoints[0] = smpl_keypoints[9] # nose
    openpose_keypoints[0][1] = openpose_keypoints[0][1]+0.3 # 


    openpose_keypoints[1] = smpl_keypoints[6] # neck
    openpose_keypoints[2] = smpl_keypoints[16] # right shoulder 
    openpose_keypoints[3] = smpl_keypoints[18] # right elbow
    openpose_keypoints[4] = smpl_keypoints[20] # right wrist
    openpose_keypoints[5] = smpl_keypoints[17] # left shoulder
    openpose_keypoints[6] = smpl_keypoints[19] # left elbow
    openpose_keypoints[7] = smpl_keypoints[21] # left wrist

    #TODO: Experiment,将neck的关键点抬高&&将nose的关键点相对高度关系与neck保持一致
    openpose_keypoints[1][0]=(openpose_keypoints[2][0]+openpose_keypoints[5][0])/2
    openpose_keypoints[1][1]=(openpose_keypoints[2][1]+openpose_keypoints[5][1])/2
    openpose_keypoints[1][2]=(openpose_keypoints[2][2]+openpose_keypoints[5][2])/2
    openpose_keypoints[0][1] = openpose_keypoints[1][1]+0.3 # 


    openpose_keypoints[8] = smpl_keypoints[1] # right hip
    openpose_keypoints[9] = smpl_keypoints[4] # right knee
    openpose_keypoints[10] = smpl_keypoints[7] # right ankle
    openpose_keypoints[11] = smpl_keypoints[2] # left hip
    openpose_keypoints[12] = smpl_keypoints[5] # left knee
    openpose_keypoints[13] = smpl_keypoints[8] # left ankle

    #TODO: Experiment,手工指定脸部关键点测试是否能够指定身体朝向
    #openpose_keypoints[0][0] = openpose_keypoints[0][0]+0.3#测试0坐标轴方向(水平向右)
    #openpose_keypoints[0][2] = openpose_keypoints[0][2]#测试2坐标轴方向（向外
    #openpose_keypoints[0][1] = openpose_keypoints[0][1]+0.5#测试1坐标轴方向（垂直向上
    openpose_keypoints[14] = openpose_keypoints[0] # right eye
    openpose_keypoints[14][1]=openpose_keypoints[14][1]+0.05
    openpose_keypoints[14][0]=openpose_keypoints[14][0]+0.3*(openpose_keypoints[2][0]-openpose_keypoints[1][0])
    openpose_keypoints[14][2]=openpose_keypoints[14][2]+0.3*(openpose_keypoints[2][2]-openpose_keypoints[1][2])

    openpose_keypoints[15] = openpose_keypoints[0] # left eye
    openpose_keypoints[15][1]=openpose_keypoints[15][1]+0.05
    openpose_keypoints[15][0]=openpose_keypoints[15][0]+0.3*(openpose_keypoints[5][0]-openpose_keypoints[1][0])
    openpose_keypoints[15][2]=openpose_keypoints[15][2]+0.3*(openpose_keypoints[5][2]-openpose_keypoints[1][2])
    
    openpose_keypoints[16] = openpose_keypoints[0] # right ear
    openpose_keypoints[16][0]=openpose_keypoints[16][0]+0.7*(openpose_keypoints[2][0]-openpose_keypoints[1][0])
    openpose_keypoints[16][2]=openpose_keypoints[16][2]+0.7*(openpose_keypoints[2][2]-openpose_keypoints[1][2])    
    
    openpose_keypoints[17] = openpose_keypoints[0] # left ear
    openpose_keypoints[17][0]=openpose_keypoints[17][0]+0.7*(openpose_keypoints[5][0]-openpose_keypoints[1][0])
    openpose_keypoints[17][2]=openpose_keypoints[17][2]+0.7*(openpose_keypoints[5][2]-openpose_keypoints[1][2])    
    
    return openpose_keypoints






# TODO: debug only, need to be deleted before unload
## load clip model and datasets
clip_model, clip_preprocess = clip.load("ViT-B/32", device=torch.device('cuda'), jit=False, download_root=CKPT_ROOT)  # Must set jit=False for training
clip.model.convert_weights(clip_model)  # Actually this line is unnecessary since clip by default already on float16
clip_model.eval()
for p in clip_model.parameters():
    p.requires_grad = False
print("loaded CLIP model")
net = vqvae.HumanVQVAE(args, ## use args to define different parameters in different quantizers
                    args.nb_code,
                    args.code_dim,
                    args.output_emb_width,
                    args.down_t,
                    args.stride_t,
                    args.width,
                    args.depth,
                    args.dilation_growth_rate)


trans_encoder = trans.Text2Motion_Transformer(num_vq=args.nb_code,
                                embed_dim=1024,
                                clip_dim=args.clip_dim,
                                block_size=args.block_size,
                                num_layers=9,
                                n_head=16,
                                drop_out_rate=args.drop_out_rate,
                                fc_rate=args.ff_rate)


print ('loading checkpoint from {}'.format(args.resume_pth))
ckpt = torch.load(args.resume_pth, map_location='cpu')
net.load_state_dict(ckpt['net'], strict=True)
net.eval()
net.cuda()

print ('loading transformer checkpoint from {}'.format(args.resume_trans))
ckpt = torch.load(args.resume_trans, map_location='cpu')
trans_encoder.load_state_dict(ckpt['trans'], strict=True)
trans_encoder.eval()
trans_encoder.cuda()

mean = torch.from_numpy(np.load(os.path.join(CKPT_ROOT,'./checkpoints/t2m/VQVAEV3_CB1024_CMT_H1024_NRES3/meta/mean.npy'))).cuda()
std = torch.from_numpy(np.load(os.path.join(CKPT_ROOT,'./checkpoints/t2m/VQVAEV3_CB1024_CMT_H1024_NRES3/meta/std.npy'))).cuda()



def get_open_pose(text,height,width,save_path,video_length):
    CKPT_ROOT = os.path.dirname(os.path.abspath(__file__))

    clip_text=[text]
    print(f"Motion Prompt: {text}")
    # cuid=generate_cuid()
    # print(f"Motion Generation cuid: {cuid}")

    # clip_text = ["the person jump and spin twice,then running straght and sit down. "]  #支持单个token的生成

    # change the text here



    text = clip.tokenize(clip_text, truncate=False).cuda()
    feat_clip_text = clip_model.encode_text(text).float()
    index_motion = trans_encoder.sample(feat_clip_text[0:1], False)
    pred_pose = net.forward_decoder(index_motion)

    from utils.motion_process import recover_from_ric
    pred_xyz = recover_from_ric((pred_pose*std+mean).float(), 22) 
    xyz = pred_xyz.reshape(1, -1, 22, 3) 

    np.save('motion.npy', xyz.detach().cpu().numpy())


    pose_vis = plot_3d.draw_to_batch(xyz.detach().cpu().numpy(),clip_text, ['smpl.gif'])

    res=xyz.detach().cpu().numpy()
    points_3d_list=res[0]
    frame_num=points_3d_list.shape[0]

    open_pose_list=np.array(points_3d_list)
    print("The total SMPL sequence shape is : "+str(open_pose_list.shape))

    max_val = np.max(open_pose_list, axis=(0, 1))
    min_val = np.min(open_pose_list, axis=(0, 1))

    print("三维坐标在坐标系上的最大值：", max_val)
    print("三维坐标在坐标系上的最小值：", min_val)


    check= smpl_to_openpose18(open_pose_list[0]) # 18个关键点
    print("********SMPL_2_OpenPose_List(14/18)********")
    print(check)
    print("*************************")
    print(f"Total Frame Number: {frame_num}")
    img_list=[]
    for step in tqdm(range(0,frame_num)):
        # 生成图像
        dpi=84
        fig =plt.figure(figsize=(width/dpi, height/dpi), dpi=dpi)
        ax = fig.add_subplot(111, projection='3d')
        limits=2

        ax.set_xlim(-limits*0.7, limits*0.7)
        ax.set_ylim(0, limits*1.5)#上下
        ax.set_zlim(0, limits*1.5)# 前后
        ax.grid(b=False)
        #ax.dist = 1
        ax.set_box_aspect([1.4, 1.5, 1.5],zoom=3.5)#  坐标轴比例 TODO:这个比例可能有问题，会出现超出坐标范围的bug

        # 关键点坐标，每行包含(x, y, z)
        keypoints = smpl_to_openpose18(open_pose_list[step]) # 18个关键点

        # 运动学链 目前只用到body部分
        kinematic_chain = [(0, 1), (1, 2), (2, 3), (3, 4), (1, 5), (5, 6), (6, 7), (1, 8), (8, 9), (9, 10), (1, 11), (11, 12), (12, 13), (0, 14), (14, 16), (0, 15), (15, 17)]
        #kinematic_chain = [(0, 1), (1, 2), (2, 3), (3, 4), (1, 5), (5, 6), (6, 7), (1, 8), (8, 9), (9, 10), (1, 11), (11, 12), (12, 13)]

        # 颜色RGB

        colors = [(0, 0, 255), (0, 255, 255), (0, 255, 0), (255, 0, 0), (255, 0, 255), (255, 192, 203), (0, 165, 255), (19, 69, 139), (173, 216, 230), (34, 139, 34), (0, 0, 128), (184, 134, 11), (139, 0, 139), (0, 100, 0), (0, 255, 255), (0, 255, 0), (216, 191, 216), (255, 255, 224)]
        #colors=[(0, 0, 255), (0, 255, 255), (0, 255, 0), (255, 0, 0), (255, 0, 255), (255, 192, 203), (0, 165, 255), (19, 69, 139), (173, 216, 230), (34, 139, 34), (0, 0, 128), (184, 134, 11), (139, 0, 139), (0, 100, 0)]
        
        #18点
        joint_colors=[(255,0,0),(255,85,0),(255,170,0),(255,255,0),(170,255,0),(85,255,0),(0,255,0),(0,255,85),(0,255,170),(0,255,255),(0,170,255),(0,85,255),(0,0,255),(85,0,255),(170,0,255),(255,0,255),(255,0,170),(255,0,85),(255,0,0)]
        #14点主干
        #joint_colors=[(255,0,0),(255,85,0),(255,170,0),(255,255,0),(170,255,0),(85,255,0),(0,255,0),(0,255,85),(0,255,170),(0,255,255),(0,170,255),(0,85,255),(0,0,255),(85,0,255),(170,0,255)]
        #运动链连线是joint颜色的60%
        
        
        #plt颜色在0-1之间
        rgb_color2=[]
        joint_rgb_color2=[]
        kinematic_chain_rgb_color2=[]
        for color in joint_colors:
            joint_rgb_color2.append(tuple([x/255 for x in color]))
            kinematic_chain_rgb_color2.append(tuple([x*0.6/255 for x in color]))    #运动链连线是joint颜色的60%

        # 可视化结果
        for i in range(0,18):
            # 绘制关键点
            ax.scatter(keypoints[i][0], keypoints[i][1], keypoints[i][2], s=50, c=joint_rgb_color2[i], marker='o')

            # 绘制运动学链
            for j in range(len(kinematic_chain)):
                if kinematic_chain[j][1] == i:
                    ax.plot([keypoints[kinematic_chain[j][0]][0], keypoints[kinematic_chain[j][1]][0]], [keypoints[kinematic_chain[j][0]][1], keypoints[kinematic_chain[j][1]][1]], [keypoints[kinematic_chain[j][0]][2], keypoints[kinematic_chain[j][1]][2]], c=kinematic_chain_rgb_color2[i], linewidth=5)

        # 调整视角
        ax.view_init(elev=110, azim=-90)
        plt.axis('off')

        
        # 保存图片
        # 将图像数据输出为图像数组
        if not os.path.exists(save_path):
            os.makedirs(save_path)
        image_tmp_path=str(f"{save_path}/{str(step)}.jpg")
        plt.savefig(os.path.join(CKPT_ROOT,image_tmp_path))#RGB
        img=cv2.imread(os.path.join(CKPT_ROOT,image_tmp_path))
        img=cv2.cvtColor(img,cv2.COLOR_BGR2RGB)
        img_list.append(img)
    res=[]
    if len(img_list)>=video_length:
        key_frame_sample_step=int(len(img_list)/video_length)
    else:
        print("ERROR: video length is too long")
        key_frame_sample_step=1

    for i in range(0,len(img_list),key_frame_sample_step):
        res.append(img_list[i])
    
    return res



def offline_get_open_pose(text,motion_text,height,width,save_path):
    #motion_text=text

    clip_text=[text]
    print(f"Motion Prompt: {text}")
    cuid=generate_cuid()
    print(f"Motion Generation cuid: {cuid}")

    # clip_text = ["the person jump and spin twice,then running straght and sit down. "]  #支持单个token的生成

    # change the text here



    text = clip.tokenize(clip_text, truncate=False).cuda()
    feat_clip_text = clip_model.encode_text(text).float()
    index_motion = trans_encoder.sample(feat_clip_text[0:1], False)
    pred_pose = net.forward_decoder(index_motion)

    from utils.motion_process import recover_from_ric
    pred_xyz = recover_from_ric((pred_pose*std+mean).float(), 22) 
    xyz = pred_xyz.reshape(1, -1, 22, 3) 
    res=xyz.detach().cpu().numpy()
    np.save(f'{save_path}/{replace_space_with_underscore(motion_text)}.npy', res)


    pose_vis = plot_3d.draw_to_batch(res,clip_text, ['smpl.gif'])
    



if __name__ == "__main__":

    text="walk around, jump, run straght."
    pose = get_open_pose(text,512,512)
    #pdb.set_trace()