import os import sys import os.path as osp from pathlib import Path import cv2 import gradio as gr import torch import math import spaces try: import mmpose except: os.system('pip install /home/user/app/main/transformer_utils') os.system('cp -rf /home/user/app/assets/conversions.py /home/user/.pyenv/versions/3.9.18/lib/python3.9/site-packages/torchgeometry/core/conversions.py') DEFAULT_MODEL='postometro' OUT_FOLDER = '/home/user/app/demo_out' os.makedirs(OUT_FOLDER, exist_ok=True) @spaces.GPU(enable_queue=True) def infer(image_input, in_threshold=0.5, num_people="Single person", render_mesh=False): num_gpus = 1 if torch.cuda.is_available() else -1 print("!!! torch.cuda.is_available: ", torch.cuda.is_available()) print("!!! torch.cuda.device_count: ", torch.cuda.device_count()) print("CUDA version: ", torch.version.cuda) index = torch.cuda.current_device() print("CUDA current_device: ", index) print("CUDA device_name: ", torch.cuda.get_device_name(index)) from main.inference import Inferer inferer = Inferer(DEFAULT_MODEL, num_gpus, OUT_FOLDER) os.system(f'rm -rf {OUT_FOLDER}/*') multi_person = False if (num_people == "Single person") else True vis_img, bbox = inferer.infer(image_input, in_threshold, 0, multi_person, not(render_mesh)) # cap = cv2.VideoCapture(video_input) # fps = math.ceil(cap.get(5)) # width = int(cap.get(3)) # height = int(cap.get(4)) # fourcc = cv2.VideoWriter_fourcc(*'mp4v') # video_path = osp.join(OUT_FOLDER, f'out.m4v') # final_video_path = osp.join(OUT_FOLDER, f'out.mp4') # video_output = cv2.VideoWriter(video_path, fourcc, fps, (width, height)) # success = 1 # frame = 0 # while success: # success, original_img = cap.read() # if not success: # break # frame += 1 # img, mesh_paths, smplx_paths = inferer.infer(original_img, in_threshold, frame, multi_person, not(render_mesh)) # video_output.write(img) # yield img, None, None, None # cap.release() # video_output.release() # cv2.destroyAllWindows() # os.system(f'ffmpeg -i {video_path} -c copy {final_video_path}') # #Compress mesh and smplx files # save_path_mesh = os.path.join(OUT_FOLDER, 'mesh') # save_mesh_file = os.path.join(OUT_FOLDER, 'mesh.zip') # os.makedirs(save_path_mesh, exist_ok= True) # save_path_smplx = os.path.join(OUT_FOLDER, 'smplx') # save_smplx_file = os.path.join(OUT_FOLDER, 'smplx.zip') # os.makedirs(save_path_smplx, exist_ok= True) # os.system(f'zip -r {save_mesh_file} {save_path_mesh}') # os.system(f'zip -r {save_smplx_file} {save_path_smplx}') # yield img, video_path, save_mesh_file, save_smplx_file return vis_img, "bbox meta: {}".format(bbox) TITLE = '''

PostoMETRO: Pose Token Enhanced Mesh Transformer for Robust 3D Human Mesh Recovery

''' DESCRIPTION = ''' Official Gradio demo for PostoMETRO: Pose Token Enhanced Mesh Transformer for Robust 3D Human Mesh Recovery.

Note: You can drop a image at the panel (or select one of the examples) to obtain the 3D parametric reconstructions of the detected humans.

''' with gr.Blocks(title="PostoMETRO", css=".gradio-container") as demo: gr.Markdown(TITLE) gr.Markdown(DESCRIPTION) with gr.Row(): with gr.Column(): image_input = gr.Image(label="Input image", elem_classes="Image") threshold = gr.Slider(0, 1.0, value=0.5, label='BBox detection threshold') num_people = gr.Radio( choices=["Single person", "Multiple people"], value="Single person", label="Number of people", info="Choose how many people are there in the video. Choose 'single person' for faster inference.", interactive=True, scale=1,) mesh_as_vertices = gr.Checkbox( label="Render as mesh", info="By default, the estimated SMPL-X parameters are rendered as vertices for faster visualization. Check this option if you want to visualize meshes instead.", interactive=True, scale=1,) send_button = gr.Button("Infer") with gr.Column(): processed_frames = gr.Image(label="Rendered Results") debug_textbox = gr.Textbox(label="Debug information") # example_images = gr.Examples([]) send_button.click(fn=infer, inputs=[image_input, threshold, num_people, mesh_as_vertices], outputs=[processed_frames, debug_textbox]) # with gr.Row(): example_images = gr.Examples([ ['/home/user/app/assets/01.jpg'], ['/home/user/app/assets/02.jpg'], ['/home/user/app/assets/03.jpg'], ], inputs=[image_input, 0.5]) #demo.queue() demo.queue().launch(debug=True)