Spaces:

SerdarHelli
/

Pix2Pix3D

Runtime error

App Files Files Community

SerdarHelli commited on Feb 19, 2023

Commit

b440279

•

1 Parent(s): b1809a9

Upload 2 files

Browse files

Files changed (2) hide show

app.py +252 -0
requirements.txt +19 -0

app.py ADDED Viewed

	@@ -0,0 +1,252 @@

+import sys
+import os
+import re
+from typing import List, Optional, Tuple, Union
+import click
+import dnnlib
+import numpy as np
+import PIL.Image
+import torch
+from tqdm import tqdm
+import legacy
+from camera_utils import LookAtPoseSampler
+from huggingface_hub import hf_hub_download
+from matplotlib import pyplot as plt
+from pathlib import Path
+import json
+import gradio as gr
+from training.utils import color_mask, color_list
+import plotly.graph_objects as go
+from tqdm import tqdm
+import imageio
+import argparse
+import trimesh
+import pyrender
+import mcubes
+os.environ["PYOPENGL_PLATFORM"] = "egl"
+def get_sigma_field_np(nerf, styles, resolution=512, block_resolution=64):
+    # return numpy array of forwarded sigma value
+    # bound = (nerf.rendering_kwargs['ray_end'] - nerf.rendering_kwargs['ray_start']) * 0.5
+    bound = nerf.rendering_kwargs['box_warp'] * 0.5
+    X = torch.linspace(-bound, bound, resolution).split(block_resolution)
+    sigma_np = np.zeros([resolution, resolution, resolution], dtype=np.float32)
+    for xi, xs in enumerate(X):
+        for yi, ys in enumerate(X):
+            for zi, zs in enumerate(X):
+                xx, yy, zz = torch.meshgrid(xs, ys, zs)
+                pts = torch.stack([xx, yy, zz], dim=-1).unsqueeze(0).to(styles.device)  # B, H, H, H, C
+                block_shape = [1, len(xs), len(ys), len(zs)]
+                out = nerf.sample_mixed(pts.reshape(1,-1,3), None, ws=styles, noise_mode='const')
+                feat_out, sigma_out = out['rgb'], out['sigma']
+                sigma_np[xi * block_resolution: xi * block_resolution + len(xs), \
+                yi * block_resolution: yi * block_resolution + len(ys), \
+                zi * block_resolution: zi * block_resolution + len(zs)] = sigma_out.reshape(block_shape[1:]).detach().cpu().numpy()
+                # print(feat_out.shape)
+    return sigma_np, bound
+def extract_geometry(nerf, styles, resolution, threshold):
+    # print('threshold: {}'.format(threshold))
+    u, bound = get_sigma_field_np(nerf, styles, resolution)
+    vertices, faces = mcubes.marching_cubes(u, threshold)
+    # vertices, faces, normals, values = skimage.measure.marching_cubes(
+    #     u, level=10
+    # )
+    b_min_np = np.array([-bound, -bound, -bound])
+    b_max_np = np.array([ bound,  bound,  bound])
+    vertices = vertices / (resolution - 1.0) * (b_max_np - b_min_np)[None, :] + b_min_np[None, :]
+    return vertices.astype('float32'), faces
+def render_video(G, ws, intrinsics, num_frames = 120, pitch_range = 0.25, yaw_range = 0.35, neural_rendering_resolution = 128, device='cuda'):
+    frames, frames_label = [], []
+    for frame_idx in tqdm(range(num_frames)):
+        cam2world_pose = LookAtPoseSampler.sample(3.14/2 + yaw_range * np.sin(2 * 3.14 * frame_idx / num_frames),
+                                                3.14/2 -0.05 + pitch_range * np.cos(2 * 3.14 * frame_idx / num_frames),
+                                                torch.tensor(G.rendering_kwargs['avg_camera_pivot'], device=device), radius=G.rendering_kwargs['avg_camera_radius'], device=device)
+        pose = torch.cat([cam2world_pose.reshape(-1, 16), intrinsics.reshape(-1, 9)], 1)
+        with torch.no_grad():
+            # out = G(z, pose, {'mask': batch['mask'].unsqueeze(0).to(device), 'pose': torch.tensor(batch['pose']).unsqueeze(0).to(device)})
+            out = G.synthesis(ws, pose, noise_mode='const', neural_rendering_resolution=neural_rendering_resolution)
+        frames.append(((out['image'].cpu().numpy()[0] + 1) * 127.5).clip(0, 255).astype(np.uint8).transpose(1, 2, 0))
+        frames_label.append(color_mask(torch.argmax(out['semantic'], dim=1).cpu().numpy()[0]).astype(np.uint8))
+    return frames, frames_label
+def return_plot_go(mesh_trimesh):
+  x=np.asarray(mesh_trimesh.vertices).T[0]
+  y=np.asarray(mesh_trimesh.vertices).T[1]
+  z=np.asarray(mesh_trimesh.vertices).T[2]
+  i=np.asarray(mesh_trimesh.faces).T[0]
+  j=np.asarray(mesh_trimesh.faces).T[1]
+  k=np.asarray(mesh_trimesh.faces).T[2]
+  fig = go.Figure(go.Mesh3d(x=x, y=y, z=z,
+                i=i, j=j, k=k,
+                vertexcolor=np.asarray(mesh_trimesh.visual.vertex_colors) ,
+              lighting=dict(ambient=0.5,
+                            diffuse=1,
+                            fresnel=4,
+                            specular=0.5,
+                            roughness=0.05,
+                            facenormalsepsilon=0,
+                            vertexnormalsepsilon=0),
+              lightposition=dict(x=100,
+                                y=100,
+                                z=1000)))
+  return fig
+network_cat=hf_hub_download("SerdarHelli/pix2pix3d_seg2cat", filename="pix2pix3d_seg2cat.pkl",revision="main")
+models={"seg2cat":network_cat
+        }
+device='cuda' if torch.cuda.is_available() else 'cpu'
+outdir="/content/"
+def get_all(cfg,input,truncation_psi,mesh_resolution,random_seed,fps,num_frames):
+        newtork=models[cfg]
+        with dnnlib.util.open_url(network) as f:
+                G = legacy.load_network_pkl(f)['G_ema'].eval().to(device)
+        if cfg == 'seg2cat' or cfg == 'seg2face':
+            neural_rendering_resolution = 128
+            data_type = 'seg'
+            # Initialize pose sampler.
+            forward_cam2world_pose = LookAtPoseSampler.sample(3.14/2, 3.14/2, torch.tensor(G.rendering_kwargs['avg_camera_pivot'], device=device),
+                                                            radius=G.rendering_kwargs['avg_camera_radius'], device=device)
+            focal_length = 4.2647 # shapenet has higher FOV
+            intrinsics = torch.tensor([[focal_length, 0, 0.5], [0, focal_length, 0.5], [0, 0, 1]], device=device)
+            forward_pose = torch.cat([forward_cam2world_pose.reshape(-1, 16), intrinsics.reshape(-1, 9)], 1)
+        elif cfg == 'edge2car':
+            neural_rendering_resolution = 64
+            data_type= 'edge'
+        else:
+            print('Invalid cfg')
+        save_dir = Path(outdir)
+        input_label = PIL.Image.open(input)
+        input_label = PIL.ImageOps.grayscale(input_label)
+        input_label = np.asarray(input_label).astype(np.uint8)
+        input_label = torch.from_numpy(input_label).unsqueeze(0).unsqueeze(0).to(device)
+        print(input_label.shape)
+        input_pose = forward_pose.to(device)
+        # Generate videos
+        z = torch.from_numpy(np.random.RandomState(int(0)).randn(1, G.z_dim).astype('float32')).to(device)
+        with torch.no_grad():
+            ws = G.mapping(z, input_pose, {'mask': input_label, 'pose': input_pose})
+            out = G.synthesis(ws, input_pose, noise_mode='const', neural_rendering_resolution=neural_rendering_resolution)
+        image_color = ((out['image'][0].permute(1, 2, 0).cpu().numpy().clip(-1, 1) + 1) * 127.5).astype(np.uint8)
+        image_seg = color_mask(torch.argmax(out['semantic'][0], dim=0).cpu().numpy()).astype(np.uint8)
+        mesh_trimesh = trimesh.Trimesh(*extract_geometry(G, ws, resolution=mesh_resolution, threshold=50.))
+        verts_np = np.array(mesh_trimesh.vertices)
+        colors = torch.zeros((verts_np.shape[0], 3), device=device)
+        semantic_colors = torch.zeros((verts_np.shape[0], 6), device=device)
+        samples_color = torch.tensor(verts_np, device=device).unsqueeze(0).float()
+        head = 0
+        max_batch = 10000000
+        with tqdm(total = verts_np.shape[0]) as pbar:
+            with torch.no_grad():
+                while head < verts_np.shape[0]:
+                    torch.manual_seed(0)
+                    out = G.sample_mixed(samples_color[:, head:head+max_batch], None, ws, truncation_psi=truncation_psi, noise_mode='const')
+                    # sigma = out['sigma']
+                    colors[head:head+max_batch, :] = out['rgb'][0,:,:3]
+                    seg = out['rgb'][0, :, 32:32+6]
+                    semantic_colors[head:head+max_batch, :] = seg
+                    # semantics[:, head:head+max_batch] = out['semantic']
+                    head += max_batch
+                    pbar.update(max_batch)
+        semantic_colors = torch.tensor(color_list,device=device)[torch.argmax(semantic_colors, dim=-1)]
+        mesh_trimesh.visual.vertex_colors = semantic_colors.cpu().numpy().astype(np.uint8)
+        frames, frames_label = render_video(G, ws, intrinsics, num_frames = num_frames, pitch_range = 0.25, yaw_range = 0.35, neural_rendering_resolution=neural_rendering_resolution, device=device)
+        # Save the video
+        video=save_dir / f'{cfg}_color.mp4'
+        video_label=save_dir / f'{cfg}_label.mp4'
+        imageio.mimsave(video, frames, fps=fps)
+        imageio.mimsave(video_label, frames_label, fps=fps),
+        fig_mesh=return_plot_go(mesh_trimesh)
+        return fig_mesh,image_color,image_seg,video,video_label
+markdown=f'''
+  # 3D-aware Conditional Image Synthesis
+  [Arxiv:  "3D-aware Conditional Image Synthesis".](https://arxiv.org/abs/2302.08509)
+  [Project Page.](https://www.cs.cmu.edu/~pix2pix3D/)
+  [For the official implementation.](https://github.com/dunbar12138/pix2pix3D)
+  ### Future Work based on interest
+  - Adding new models for new type objects
+  - New Customization
+  It is running on {device}
+  The process can take long time.Especially ,To generate videos and the time of process depends the number of frames,Mesh Resolution and current compiler device.
+'''
+with gr.Blocks() as demo:
+    gr.Markdown(markdown)
+    with gr.Row():
+      with gr.Column():
+        input=gr.Image(type="filepath",shape=(512, 512))
+      with gr.Column():
+        cfg=gr.Dropdown(choices=["seg2cat"],label="Choose Model",value="seg2cat")
+        truncation_psi = gr.Slider( minimum=0, maximum=2,label='Truncation PSI',value=1)
+        mesh_resolution = gr.Slider( minimum=32, maximum=512,label='Mesh Resolution',value=32)
+        random_seed = gr.Slider( minimum=0, maximum=2**16,label='Seed',value=128)
+        fps = gr.Slider( minimum=10, maximum=120,label='FPS',value=30)
+        num_frames = gr.Slider( minimum=10, maximum=120,label='The Number of Frames',value=30)
+    with gr.Row():
+      btn = gr.Button(value="Generate")
+    with gr.Row():
+      with gr.Column():
+            image_color=gr.Image(type="pil",shape=(256,256))
+      with gr.Column():
+            image_label=gr.Image(type="pil",shape=(256,256))
+    with gr.Row():
+          mesh = gr.Plot()
+    with  gr.Row():
+        with gr.Column():
+            video_color=gr.Video()
+        with gr.Column():
+            video_label=gr.Video()
+    btn.click(get_all, [cfg,input,truncation_psi,mesh_resolution,random_seed,fps,num_frames],[ mesh,image_color,image_label,video_color,video_label])
+demo.launch(debug=True,share=True)

requirements.txt ADDED Viewed

	@@ -0,0 +1,19 @@

+torch
+trimesh
+pyrender
+PyMCubes
+pycollada
+einops
+ninja
+imageio-ffmpeg
+imgui==1.3.0
+glfw==2.2.0
+pyopengl==3.1.5
+pyspng
+psutil
+mrcfile
+opencv-python
+tqdm
+scipy
+pillow
+numpy