Spaces:
No application file
No application file
#from __future__ import absolute_import | |
import sys | |
import io | |
import os | |
sys.argv = ['GPT_eval_multi.py'] | |
# 将项目根目录添加到sys.path中 | |
PROJECT_ROOT = os.path.dirname(os.path.abspath(__file__)) | |
sys.path.insert(1, PROJECT_ROOT) | |
CKPT_ROOT="/cfs-datasets/public_models/motion" | |
from .options import option_transformer as option_trans | |
import sys | |
print(sys.path[0]) | |
import clip | |
import torch | |
import cv2 | |
import numpy as np | |
from .models import vqvae as vqvae | |
from .models import t2m_trans as trans | |
import warnings | |
from .visualization import plot_3d_global as plot_3d | |
import matplotlib.pyplot as plt | |
import numpy as np | |
import matplotlib.colors as mcolors | |
from tqdm import tqdm | |
from mpl_toolkits.mplot3d import Axes3D | |
from PIL import Image | |
import time | |
import random | |
warnings.filterwarnings('ignore') | |
from matplotlib.axes._axes import _log as matplotlib_axes_logger | |
matplotlib_axes_logger.setLevel('ERROR') | |
from math import cos,sin,radians | |
args = option_trans.get_args_parser() | |
args.dataname = 't2m' | |
args.resume_pth = os.path.join(CKPT_ROOT,'pretrained/VQVAE/net_last.pth') | |
args.resume_trans = os.path.join(CKPT_ROOT,'pretrained/VQTransformer_corruption05/net_best_fid.pth') | |
args.down_t = 2 | |
args.depth = 3 | |
args.block_size = 51 | |
def replace_space_with_underscore(s): | |
return s.replace(' ', '_') | |
def Rz(angle): | |
theta=radians(angle) | |
return np.array([[cos(theta), -sin(theta), 0], | |
[sin(theta), cos(theta), 0], | |
[0, 0, 1]]) | |
def Rx(angle): | |
theta=radians(angle) | |
return np.array( | |
[[1, 0, 0], | |
[0 , cos(theta), -sin(theta)], | |
[0, sin(theta), cos(theta)]]) | |
def generate_cuid(): | |
timestamp = hex(int(time.time() * 1000))[2:] | |
random_str = hex(random.randint(0, 0xfffff))[2:] | |
return (timestamp + random_str).zfill(10) | |
def smpl_to_openpose18(smpl_keypoints): | |
''' | |
22关键点SMPL对应关系解释 | |
[0, 2, 5, 8, 11] | |
这个列表表示SMPL模型中左腿的连接方式,从骨盆(0号关键点)开始,连接左大腿(2号关键点)、左小腿(5号关键点)、左脚(8号关键点)和左脚尖(11号关键点)。 | |
[0, 1, 4, 7, 10] | |
这个列表表示SMPL模型中右腿的连接方式,从骨盆(0号关键点)开始,连接右大腿(1号关键点)、右小腿(4号关键点)、右脚(7号关键点)和右脚尖(10号关键点)。 | |
[0, 3, 6, 9, 12, 15] | |
这个列表表示SMPL模型中躯干的连接方式,从骨盆(0号关键点)开始,连接脊柱(3号关键点)、颈部(6号关键点)、头部(9号关键点)、左肩膀(12号关键点)、右肩膀(15号关键点)。 | |
[9, 14, 17, 19, 21] | |
这个列表表示SMPL模型中左臂的连接方式,从左肩膀(9号关键点)开始,连接左上臂(14号关键点)、左前臂(17号关键点)、左手腕(19号关键点)和左手(21号关键点)。 | |
[9, 13, 16, 18, 20] | |
这个列表表示SMPL模型中右臂的连接方式,从右肩膀(9号关键点)开始,连接右上臂(13号关键点)、右前臂(16号关键点)、右手腕(18号关键点)和右手(20号关键点)。 | |
目前转Openpose忽略掉了SMPL的肩膀关键点 | |
''' | |
openpose_keypoints = np.zeros((18, 3)) | |
openpose_keypoints[0] = smpl_keypoints[9] # nose | |
openpose_keypoints[0][1] = openpose_keypoints[0][1]+0.3 # | |
openpose_keypoints[1] = smpl_keypoints[6] # neck | |
openpose_keypoints[2] = smpl_keypoints[16] # right shoulder | |
openpose_keypoints[3] = smpl_keypoints[18] # right elbow | |
openpose_keypoints[4] = smpl_keypoints[20] # right wrist | |
openpose_keypoints[5] = smpl_keypoints[17] # left shoulder | |
openpose_keypoints[6] = smpl_keypoints[19] # left elbow | |
openpose_keypoints[7] = smpl_keypoints[21] # left wrist | |
#TODO: Experiment,将neck的关键点抬高&&将nose的关键点相对高度关系与neck保持一致 | |
openpose_keypoints[1][0]=(openpose_keypoints[2][0]+openpose_keypoints[5][0])/2 | |
openpose_keypoints[1][1]=(openpose_keypoints[2][1]+openpose_keypoints[5][1])/2 | |
openpose_keypoints[1][2]=(openpose_keypoints[2][2]+openpose_keypoints[5][2])/2 | |
openpose_keypoints[0][1] = openpose_keypoints[1][1]+0.3 # | |
openpose_keypoints[8] = smpl_keypoints[1] # right hip | |
openpose_keypoints[9] = smpl_keypoints[4] # right knee | |
openpose_keypoints[10] = smpl_keypoints[7] # right ankle | |
openpose_keypoints[11] = smpl_keypoints[2] # left hip | |
openpose_keypoints[12] = smpl_keypoints[5] # left knee | |
openpose_keypoints[13] = smpl_keypoints[8] # left ankle | |
#TODO: Experiment,手工指定脸部关键点测试是否能够指定身体朝向 | |
#openpose_keypoints[0][0] = openpose_keypoints[0][0]+0.3#测试0坐标轴方向(水平向右) | |
#openpose_keypoints[0][2] = openpose_keypoints[0][2]#测试2坐标轴方向(向外 | |
#openpose_keypoints[0][1] = openpose_keypoints[0][1]+0.5#测试1坐标轴方向(垂直向上 | |
openpose_keypoints[14] = openpose_keypoints[0] # right eye | |
openpose_keypoints[14][1]=openpose_keypoints[14][1]+0.05 | |
openpose_keypoints[14][0]=openpose_keypoints[14][0]+0.3*(openpose_keypoints[2][0]-openpose_keypoints[1][0]) | |
openpose_keypoints[14][2]=openpose_keypoints[14][2]+0.3*(openpose_keypoints[2][2]-openpose_keypoints[1][2]) | |
openpose_keypoints[15] = openpose_keypoints[0] # left eye | |
openpose_keypoints[15][1]=openpose_keypoints[15][1]+0.05 | |
openpose_keypoints[15][0]=openpose_keypoints[15][0]+0.3*(openpose_keypoints[5][0]-openpose_keypoints[1][0]) | |
openpose_keypoints[15][2]=openpose_keypoints[15][2]+0.3*(openpose_keypoints[5][2]-openpose_keypoints[1][2]) | |
openpose_keypoints[16] = openpose_keypoints[0] # right ear | |
openpose_keypoints[16][0]=openpose_keypoints[16][0]+0.7*(openpose_keypoints[2][0]-openpose_keypoints[1][0]) | |
openpose_keypoints[16][2]=openpose_keypoints[16][2]+0.7*(openpose_keypoints[2][2]-openpose_keypoints[1][2]) | |
openpose_keypoints[17] = openpose_keypoints[0] # left ear | |
openpose_keypoints[17][0]=openpose_keypoints[17][0]+0.7*(openpose_keypoints[5][0]-openpose_keypoints[1][0]) | |
openpose_keypoints[17][2]=openpose_keypoints[17][2]+0.7*(openpose_keypoints[5][2]-openpose_keypoints[1][2]) | |
return openpose_keypoints | |
# TODO: debug only, need to be deleted before unload | |
## load clip model and datasets | |
clip_model, clip_preprocess = clip.load("ViT-B/32", device=torch.device('cuda'), jit=False, download_root=CKPT_ROOT) # Must set jit=False for training | |
clip.model.convert_weights(clip_model) # Actually this line is unnecessary since clip by default already on float16 | |
clip_model.eval() | |
for p in clip_model.parameters(): | |
p.requires_grad = False | |
print("loaded CLIP model") | |
net = vqvae.HumanVQVAE(args, ## use args to define different parameters in different quantizers | |
args.nb_code, | |
args.code_dim, | |
args.output_emb_width, | |
args.down_t, | |
args.stride_t, | |
args.width, | |
args.depth, | |
args.dilation_growth_rate) | |
trans_encoder = trans.Text2Motion_Transformer(num_vq=args.nb_code, | |
embed_dim=1024, | |
clip_dim=args.clip_dim, | |
block_size=args.block_size, | |
num_layers=9, | |
n_head=16, | |
drop_out_rate=args.drop_out_rate, | |
fc_rate=args.ff_rate) | |
print ('loading checkpoint from {}'.format(args.resume_pth)) | |
ckpt = torch.load(args.resume_pth, map_location='cpu') | |
net.load_state_dict(ckpt['net'], strict=True) | |
net.eval() | |
net.cuda() | |
print ('loading transformer checkpoint from {}'.format(args.resume_trans)) | |
ckpt = torch.load(args.resume_trans, map_location='cpu') | |
trans_encoder.load_state_dict(ckpt['trans'], strict=True) | |
trans_encoder.eval() | |
trans_encoder.cuda() | |
mean = torch.from_numpy(np.load(os.path.join(CKPT_ROOT,'./checkpoints/t2m/VQVAEV3_CB1024_CMT_H1024_NRES3/meta/mean.npy'))).cuda() | |
std = torch.from_numpy(np.load(os.path.join(CKPT_ROOT,'./checkpoints/t2m/VQVAEV3_CB1024_CMT_H1024_NRES3/meta/std.npy'))).cuda() | |
def get_open_pose(text,height,width,save_path,video_length): | |
CKPT_ROOT = os.path.dirname(os.path.abspath(__file__)) | |
clip_text=[text] | |
print(f"Motion Prompt: {text}") | |
# cuid=generate_cuid() | |
# print(f"Motion Generation cuid: {cuid}") | |
# clip_text = ["the person jump and spin twice,then running straght and sit down. "] #支持单个token的生成 | |
# change the text here | |
text = clip.tokenize(clip_text, truncate=False).cuda() | |
feat_clip_text = clip_model.encode_text(text).float() | |
index_motion = trans_encoder.sample(feat_clip_text[0:1], False) | |
pred_pose = net.forward_decoder(index_motion) | |
from utils.motion_process import recover_from_ric | |
pred_xyz = recover_from_ric((pred_pose*std+mean).float(), 22) | |
xyz = pred_xyz.reshape(1, -1, 22, 3) | |
np.save('motion.npy', xyz.detach().cpu().numpy()) | |
pose_vis = plot_3d.draw_to_batch(xyz.detach().cpu().numpy(),clip_text, ['smpl.gif']) | |
res=xyz.detach().cpu().numpy() | |
points_3d_list=res[0] | |
frame_num=points_3d_list.shape[0] | |
open_pose_list=np.array(points_3d_list) | |
print("The total SMPL sequence shape is : "+str(open_pose_list.shape)) | |
max_val = np.max(open_pose_list, axis=(0, 1)) | |
min_val = np.min(open_pose_list, axis=(0, 1)) | |
print("三维坐标在坐标系上的最大值:", max_val) | |
print("三维坐标在坐标系上的最小值:", min_val) | |
check= smpl_to_openpose18(open_pose_list[0]) # 18个关键点 | |
print("********SMPL_2_OpenPose_List(14/18)********") | |
print(check) | |
print("*************************") | |
print(f"Total Frame Number: {frame_num}") | |
img_list=[] | |
for step in tqdm(range(0,frame_num)): | |
# 生成图像 | |
dpi=84 | |
fig =plt.figure(figsize=(width/dpi, height/dpi), dpi=dpi) | |
ax = fig.add_subplot(111, projection='3d') | |
limits=2 | |
ax.set_xlim(-limits*0.7, limits*0.7) | |
ax.set_ylim(0, limits*1.5)#上下 | |
ax.set_zlim(0, limits*1.5)# 前后 | |
ax.grid(b=False) | |
#ax.dist = 1 | |
ax.set_box_aspect([1.4, 1.5, 1.5],zoom=3.5)# 坐标轴比例 TODO:这个比例可能有问题,会出现超出坐标范围的bug | |
# 关键点坐标,每行包含(x, y, z) | |
keypoints = smpl_to_openpose18(open_pose_list[step]) # 18个关键点 | |
# 运动学链 目前只用到body部分 | |
kinematic_chain = [(0, 1), (1, 2), (2, 3), (3, 4), (1, 5), (5, 6), (6, 7), (1, 8), (8, 9), (9, 10), (1, 11), (11, 12), (12, 13), (0, 14), (14, 16), (0, 15), (15, 17)] | |
#kinematic_chain = [(0, 1), (1, 2), (2, 3), (3, 4), (1, 5), (5, 6), (6, 7), (1, 8), (8, 9), (9, 10), (1, 11), (11, 12), (12, 13)] | |
# 颜色RGB | |
colors = [(0, 0, 255), (0, 255, 255), (0, 255, 0), (255, 0, 0), (255, 0, 255), (255, 192, 203), (0, 165, 255), (19, 69, 139), (173, 216, 230), (34, 139, 34), (0, 0, 128), (184, 134, 11), (139, 0, 139), (0, 100, 0), (0, 255, 255), (0, 255, 0), (216, 191, 216), (255, 255, 224)] | |
#colors=[(0, 0, 255), (0, 255, 255), (0, 255, 0), (255, 0, 0), (255, 0, 255), (255, 192, 203), (0, 165, 255), (19, 69, 139), (173, 216, 230), (34, 139, 34), (0, 0, 128), (184, 134, 11), (139, 0, 139), (0, 100, 0)] | |
#18点 | |
joint_colors=[(255,0,0),(255,85,0),(255,170,0),(255,255,0),(170,255,0),(85,255,0),(0,255,0),(0,255,85),(0,255,170),(0,255,255),(0,170,255),(0,85,255),(0,0,255),(85,0,255),(170,0,255),(255,0,255),(255,0,170),(255,0,85),(255,0,0)] | |
#14点主干 | |
#joint_colors=[(255,0,0),(255,85,0),(255,170,0),(255,255,0),(170,255,0),(85,255,0),(0,255,0),(0,255,85),(0,255,170),(0,255,255),(0,170,255),(0,85,255),(0,0,255),(85,0,255),(170,0,255)] | |
#运动链连线是joint颜色的60% | |
#plt颜色在0-1之间 | |
rgb_color2=[] | |
joint_rgb_color2=[] | |
kinematic_chain_rgb_color2=[] | |
for color in joint_colors: | |
joint_rgb_color2.append(tuple([x/255 for x in color])) | |
kinematic_chain_rgb_color2.append(tuple([x*0.6/255 for x in color])) #运动链连线是joint颜色的60% | |
# 可视化结果 | |
for i in range(0,18): | |
# 绘制关键点 | |
ax.scatter(keypoints[i][0], keypoints[i][1], keypoints[i][2], s=50, c=joint_rgb_color2[i], marker='o') | |
# 绘制运动学链 | |
for j in range(len(kinematic_chain)): | |
if kinematic_chain[j][1] == i: | |
ax.plot([keypoints[kinematic_chain[j][0]][0], keypoints[kinematic_chain[j][1]][0]], [keypoints[kinematic_chain[j][0]][1], keypoints[kinematic_chain[j][1]][1]], [keypoints[kinematic_chain[j][0]][2], keypoints[kinematic_chain[j][1]][2]], c=kinematic_chain_rgb_color2[i], linewidth=5) | |
# 调整视角 | |
ax.view_init(elev=110, azim=-90) | |
plt.axis('off') | |
# 保存图片 | |
# 将图像数据输出为图像数组 | |
if not os.path.exists(save_path): | |
os.makedirs(save_path) | |
image_tmp_path=str(f"{save_path}/{str(step)}.jpg") | |
plt.savefig(os.path.join(CKPT_ROOT,image_tmp_path))#RGB | |
img=cv2.imread(os.path.join(CKPT_ROOT,image_tmp_path)) | |
img=cv2.cvtColor(img,cv2.COLOR_BGR2RGB) | |
img_list.append(img) | |
res=[] | |
if len(img_list)>=video_length: | |
key_frame_sample_step=int(len(img_list)/video_length) | |
else: | |
print("ERROR: video length is too long") | |
key_frame_sample_step=1 | |
for i in range(0,len(img_list),key_frame_sample_step): | |
res.append(img_list[i]) | |
return res | |
def offline_get_open_pose(text,motion_text,height,width,save_path): | |
#motion_text=text | |
clip_text=[text] | |
print(f"Motion Prompt: {text}") | |
cuid=generate_cuid() | |
print(f"Motion Generation cuid: {cuid}") | |
# clip_text = ["the person jump and spin twice,then running straght and sit down. "] #支持单个token的生成 | |
# change the text here | |
text = clip.tokenize(clip_text, truncate=False).cuda() | |
feat_clip_text = clip_model.encode_text(text).float() | |
index_motion = trans_encoder.sample(feat_clip_text[0:1], False) | |
pred_pose = net.forward_decoder(index_motion) | |
from utils.motion_process import recover_from_ric | |
pred_xyz = recover_from_ric((pred_pose*std+mean).float(), 22) | |
xyz = pred_xyz.reshape(1, -1, 22, 3) | |
res=xyz.detach().cpu().numpy() | |
np.save(f'{save_path}/{replace_space_with_underscore(motion_text)}.npy', res) | |
pose_vis = plot_3d.draw_to_batch(res,clip_text, ['smpl.gif']) | |
if __name__ == "__main__": | |
text="walk around, jump, run straght." | |
pose = get_open_pose(text,512,512) | |
#pdb.set_trace() | |