Spaces:
Sleeping
Sleeping
import os | |
import sys | |
import os.path as osp | |
from pathlib import Path | |
import cv2 | |
import gradio as gr | |
import torch | |
import math | |
import spaces | |
try: | |
import mmpose | |
except: | |
os.system('pip install /home/user/app/main/transformer_utils') | |
os.system('cp -rf /home/user/app/assets/conversions.py /home/user/.pyenv/versions/3.9.18/lib/python3.9/site-packages/torchgeometry/core/conversions.py') | |
DEFAULT_MODEL='postometro' | |
OUT_FOLDER = '/home/user/app/demo_out' | |
os.makedirs(OUT_FOLDER, exist_ok=True) | |
def infer(image_input, in_threshold=0.5, num_people="Single person", render_mesh=False): | |
num_gpus = 1 if torch.cuda.is_available() else -1 | |
print("!!! torch.cuda.is_available: ", torch.cuda.is_available()) | |
print("!!! torch.cuda.device_count: ", torch.cuda.device_count()) | |
print("CUDA version: ", torch.version.cuda) | |
index = torch.cuda.current_device() | |
print("CUDA current_device: ", index) | |
print("CUDA device_name: ", torch.cuda.get_device_name(index)) | |
from main.inference import Inferer | |
inferer = Inferer(DEFAULT_MODEL, num_gpus, OUT_FOLDER) | |
os.system(f'rm -rf {OUT_FOLDER}/*') | |
multi_person = False if (num_people == "Single person") else True | |
vis_img, bbox = inferer.infer(image_input, in_threshold, 0, multi_person, not(render_mesh)) | |
# cap = cv2.VideoCapture(video_input) | |
# fps = math.ceil(cap.get(5)) | |
# width = int(cap.get(3)) | |
# height = int(cap.get(4)) | |
# fourcc = cv2.VideoWriter_fourcc(*'mp4v') | |
# video_path = osp.join(OUT_FOLDER, f'out.m4v') | |
# final_video_path = osp.join(OUT_FOLDER, f'out.mp4') | |
# video_output = cv2.VideoWriter(video_path, fourcc, fps, (width, height)) | |
# success = 1 | |
# frame = 0 | |
# while success: | |
# success, original_img = cap.read() | |
# if not success: | |
# break | |
# frame += 1 | |
# img, mesh_paths, smplx_paths = inferer.infer(original_img, in_threshold, frame, multi_person, not(render_mesh)) | |
# video_output.write(img) | |
# yield img, None, None, None | |
# cap.release() | |
# video_output.release() | |
# cv2.destroyAllWindows() | |
# os.system(f'ffmpeg -i {video_path} -c copy {final_video_path}') | |
# #Compress mesh and smplx files | |
# save_path_mesh = os.path.join(OUT_FOLDER, 'mesh') | |
# save_mesh_file = os.path.join(OUT_FOLDER, 'mesh.zip') | |
# os.makedirs(save_path_mesh, exist_ok= True) | |
# save_path_smplx = os.path.join(OUT_FOLDER, 'smplx') | |
# save_smplx_file = os.path.join(OUT_FOLDER, 'smplx.zip') | |
# os.makedirs(save_path_smplx, exist_ok= True) | |
# os.system(f'zip -r {save_mesh_file} {save_path_mesh}') | |
# os.system(f'zip -r {save_smplx_file} {save_path_smplx}') | |
# yield img, video_path, save_mesh_file, save_smplx_file | |
return vis_img, "bbox meta: {}".format(bbox) | |
TITLE = '''<h1 align="center">PostoMETRO: Pose Token Enhanced Mesh Transformer for Robust 3D Human Mesh Recovery</h1>''' | |
DESCRIPTION = ''' | |
<b>Official Gradio demo</b> for <b>PostoMETRO: Pose Token Enhanced Mesh Transformer for Robust 3D Human Mesh Recovery</b>.<br> | |
<p> | |
Note: You can drop a image at the panel (or select one of the examples) | |
to obtain the 3D parametric reconstructions of the detected humans. | |
</p> | |
''' | |
with gr.Blocks(title="PostoMETRO", css=".gradio-container") as demo: | |
gr.Markdown(TITLE) | |
gr.Markdown(DESCRIPTION) | |
with gr.Row(): | |
with gr.Column(): | |
image_input = gr.Image(label="Input image", elem_classes="Image") | |
threshold = gr.Slider(0, 1.0, value=0.5, label='BBox detection threshold') | |
num_people = gr.Radio( | |
choices=["Single person", "Multiple people"], | |
value="Single person", | |
label="Number of people", | |
info="Choose how many people are there in the video. Choose 'single person' for faster inference.", | |
interactive=True, | |
scale=1,) | |
mesh_as_vertices = gr.Checkbox( | |
label="Render as mesh", | |
info="By default, the estimated SMPL-X parameters are rendered as vertices for faster visualization. Check this option if you want to visualize meshes instead.", | |
interactive=True, | |
scale=1,) | |
send_button = gr.Button("Infer") | |
with gr.Column(): | |
processed_frames = gr.Image(label="Rendered Results") | |
debug_textbox = gr.Textbox(label="Debug information") | |
# example_images = gr.Examples([]) | |
send_button.click(fn=infer, inputs=[image_input, threshold, num_people, mesh_as_vertices], outputs=[processed_frames, debug_textbox]) | |
# with gr.Row(): | |
example_images = gr.Examples([ | |
['/home/user/app/assets/01.jpg'], | |
['/home/user/app/assets/02.jpg'], | |
['/home/user/app/assets/03.jpg'], | |
], | |
inputs=[image_input, 0.5]) | |
#demo.queue() | |
demo.queue().launch(debug=True) | |