LGM-mini

Runtime error

Dylan Ebert commited on Feb 21

Commit

92f2e1f

•

1 Parent(s): dec9ec5

update gradio version

.

.

.

debug

Update app.py

validate exists

.

.

.

.

.

match requirements

add xformers

flip output

rotate output

rot mat fix

.

.

Files changed (5) hide show

README.md +2 -2
app.py +44 -126
core/gs.py +3 -0
core/models.py +11 -0
requirements.txt +5 -5

README.md CHANGED Viewed

@@ -1,10 +1,10 @@
 ---
-title: LGM
 emoji: 🦀
 colorFrom: red
 colorTo: indigo
 sdk: gradio
-sdk_version: 4.17.0
 app_file: app.py
 pinned: false
 license: mit

 ---
+title: LGM-Mini
 emoji: 🦀
 colorFrom: red
 colorTo: indigo
 sdk: gradio
+sdk_version: 4.19.0
 app_file: app.py
 pinned: false
 license: mit

app.py CHANGED Viewed

@@ -1,10 +1,6 @@
 import os
-import tyro
-import imageio
 import numpy as np
-import tqdm
 import torch
-import torch.nn as nn
 import torch.nn.functional as F
 import torchvision.transforms.functional as TF
 from safetensors.torch import load_file
@@ -15,23 +11,23 @@ import gradio as gr
 from huggingface_hub import hf_hub_download
 ckpt_path = hf_hub_download(repo_id="ashawkey/LGM", filename="model_fp16.safetensors")
-# NOTE: no -e... else it's not working!
-os.system("pip install ./diff-gaussian-rasterization")
 import kiui
 from kiui.op import recenter
-from kiui.cam import orbit_camera
-from core.options import AllConfigs, Options
 from core.models import LGM
 from mvdream.pipeline_mvdream import MVDreamPipeline
-import spaces
 IMAGENET_DEFAULT_MEAN = (0.485, 0.456, 0.406)
 IMAGENET_DEFAULT_STD = (0.229, 0.224, 0.225)
-GRADIO_VIDEO_PATH = 'gradio_output.mp4'
-GRADIO_PLY_PATH = 'gradio_output.ply'
 # opt = tyro.cli(AllConfigs)
 opt = Options(
@@ -67,9 +63,9 @@ model.eval()
 tan_half_fov = np.tan(0.5 * np.deg2rad(opt.fovy))
 proj_matrix = torch.zeros(4, 4, dtype=torch.float32, device=device)
-proj_matrix[0, 0] = 1 / tan_half_fov
-proj_matrix[1, 1] = 1 / tan_half_fov
-proj_matrix[2, 2] = (opt.zfar + opt.znear) / (opt.zfar - opt.znear)
 proj_matrix[3, 2] = - (opt.zfar * opt.znear) / (opt.zfar - opt.znear)
 proj_matrix[2, 3] = 1
@@ -94,44 +90,22 @@ pipe_image = pipe_image.to(device)
 bg_remover = rembg.new_session()
 # process function
-@spaces.GPU
-def process(input_image, prompt, prompt_neg='', input_elevation=0, input_num_steps=30, input_seed=42):
     # seed
-    kiui.seed_everything(input_seed)
-    os.makedirs(opt.workspace, exist_ok=True)
-    output_video_path = os.path.join(opt.workspace, GRADIO_VIDEO_PATH)
-    output_ply_path = os.path.join(opt.workspace, GRADIO_PLY_PATH)
-    # text-conditioned
-    if input_image is None:
-        mv_image_uint8 = pipe_text(prompt, negative_prompt=prompt_neg, num_inference_steps=input_num_steps, guidance_scale=7.5, elevation=input_elevation)
-        mv_image_uint8 = (mv_image_uint8 * 255).astype(np.uint8)
-        # bg removal
-        mv_image = []
-        for i in range(4):
-            image = rembg.remove(mv_image_uint8[i], session=bg_remover) # [H, W, 4]
-            # to white bg
-            image = image.astype(np.float32) / 255
-            image = recenter(image, image[..., 0] > 0, border_ratio=0.2)
-            image = image[..., :3] * image[..., -1:] + (1 - image[..., -1:])
-            mv_image.append(image)
-    # image-conditioned (may also input text, but no text usually works too)
-    else:
-        input_image = np.array(input_image) # uint8
-        # bg removal
-        carved_image = rembg.remove(input_image, session=bg_remover) # [H, W, 4]
-        mask = carved_image[..., -1] > 0
-        image = recenter(carved_image, mask, border_ratio=0.2)
-        image = image.astype(np.float32) / 255.0
-        image = image[..., :3] * image[..., 3:4] + (1 - image[..., 3:4])
-        mv_image = pipe_image(prompt, image, negative_prompt=prompt_neg, num_inference_steps=input_num_steps, guidance_scale=5.0,  elevation=input_elevation)
-    mv_image_grid = np.concatenate([
-        np.concatenate([mv_image[1], mv_image[2]], axis=1),
-        np.concatenate([mv_image[3], mv_image[0]], axis=1),
-    ], axis=0)
     # generate gaussians
     input_image = np.stack([mv_image[1], mv_image[2], mv_image[3], mv_image[0]], axis=0) # [4, 256, 256, 3], float32
@@ -139,7 +113,7 @@ def process(input_image, prompt, prompt_neg='', input_elevation=0, input_num_ste
     input_image = F.interpolate(input_image, size=(opt.input_size, opt.input_size), mode='bilinear', align_corners=False)
     input_image = TF.normalize(input_image, IMAGENET_DEFAULT_MEAN, IMAGENET_DEFAULT_STD)
-    rays_embeddings = model.prepare_default_rays(device, elevation=input_elevation)
     input_image = torch.cat([input_image, rays_embeddings], dim=1).unsqueeze(0) # [1, 4, 9, H, W]
     with torch.no_grad():
@@ -149,47 +123,8 @@ def process(input_image, prompt, prompt_neg='', input_elevation=0, input_num_ste
         # save gaussians
         model.gs.save_ply(gaussians, output_ply_path)
-        # render 360 video
-        images = []
-        elevation = 0
-        if opt.fancy_video:
-            azimuth = np.arange(0, 720, 4, dtype=np.int32)
-            for azi in tqdm.tqdm(azimuth):
-                cam_poses = torch.from_numpy(orbit_camera(elevation, azi, radius=opt.cam_radius, opengl=True)).unsqueeze(0).to(device)
-                cam_poses[:, :3, 1:3] *= -1 # invert up & forward direction
-                # cameras needed by gaussian rasterizer
-                cam_view = torch.inverse(cam_poses).transpose(1, 2) # [V, 4, 4]
-                cam_view_proj = cam_view @ proj_matrix # [V, 4, 4]
-                cam_pos = - cam_poses[:, :3, 3] # [V, 3]
-                scale = min(azi / 360, 1)
-                image = model.gs.render(gaussians, cam_view.unsqueeze(0), cam_view_proj.unsqueeze(0), cam_pos.unsqueeze(0), scale_modifier=scale)['image']
-                images.append((image.squeeze(1).permute(0,2,3,1).contiguous().float().cpu().numpy() * 255).astype(np.uint8))
-        else:
-            azimuth = np.arange(0, 360, 2, dtype=np.int32)
-            for azi in tqdm.tqdm(azimuth):
-                cam_poses = torch.from_numpy(orbit_camera(elevation, azi, radius=opt.cam_radius, opengl=True)).unsqueeze(0).to(device)
-                cam_poses[:, :3, 1:3] *= -1 # invert up & forward direction
-                # cameras needed by gaussian rasterizer
-                cam_view = torch.inverse(cam_poses).transpose(1, 2) # [V, 4, 4]
-                cam_view_proj = cam_view @ proj_matrix # [V, 4, 4]
-                cam_pos = - cam_poses[:, :3, 3] # [V, 3]
-                image = model.gs.render(gaussians, cam_view.unsqueeze(0), cam_view_proj.unsqueeze(0), cam_pos.unsqueeze(0), scale_modifier=1)['image']
-                images.append((image.squeeze(1).permute(0,2,3,1).contiguous().float().cpu().numpy() * 255).astype(np.uint8))
-        images = np.concatenate(images, axis=0)
-        imageio.mimwrite(output_video_path, images, fps=30)
-    return output_ply_path, output_ply_path
 # gradio UI
@@ -197,12 +132,23 @@ _TITLE = '''LGM Mini'''
 _DESCRIPTION = '''
 <div>
-A lightweight version of <a href="https://huggingface.co/spaces/ashawkey/LGM">LGM: Large Multi-View Gaussian Model for High-Resolution 3D Content Creation</a>
 </div>
 '''
-block = gr.Blocks(title=_TITLE).queue()
 with block:
     with gr.Row():
         with gr.Column(scale=1):
             gr.Markdown('# ' + _TITLE)
@@ -211,26 +157,15 @@ with block:
     with gr.Row(variant='panel'):
         with gr.Column(scale=1):
             # input image
-            input_image = gr.Image(label="image", type='pil')
-            # input prompt
-            input_text = gr.Textbox(label="prompt")
-            # negative prompt
-            input_neg_text = gr.Textbox(label="negative prompt", value='ugly, blurry, pixelated obscure, unnatural colors, poor lighting, dull, unclear, cropped, lowres, low quality, artifacts, duplicate')
-            # elevation
-            input_elevation = gr.Slider(label="elevation", minimum=-90, maximum=90, step=1, value=0)
-            # inference steps
-            input_num_steps = gr.Slider(label="inference steps", minimum=1, maximum=100, step=1, value=30)
-            # random seed
-            input_seed = gr.Slider(label="random seed", minimum=0, maximum=100000, step=1, value=0)
             # gen button
             button_gen = gr.Button("Generate")
         with gr.Column(scale=1):
             output_splat = gr.Model3D(label="3D Gaussians")
-            output_file = gr.File(label="3D Gaussians (ply format)")
-        button_gen.click(process, inputs=[input_image, input_text, input_neg_text, input_elevation, input_num_steps, input_seed], outputs=[output_splat, output_file])
     gr.Examples(
         examples=[
@@ -242,27 +177,10 @@ with block:
             "data_test/gso_rabbit.jpg",
         ],
         inputs=[input_image],
-        outputs=[output_splat, output_file],
-        fn=lambda x: process(input_image=x, prompt=''),
         cache_examples=True,
         label='Image-to-3D Examples'
     )
-    gr.Examples(
-        examples=[
-            "teddy bear",
-            "hamburger",
-            "oldman's head sculpture",
-            "headphone",
-            "motorbike",
-            "mech suit"
-        ],
-        inputs=[input_text],
-        outputs=[output_splat, output_file],
-        fn=lambda x: process(input_image=None, prompt=x),
-        cache_examples=True,
-        label='Text-to-3D Examples'
-    )
-block.launch()

 import os
 import numpy as np
 import torch
 import torch.nn.functional as F
 import torchvision.transforms.functional as TF
 from safetensors.torch import load_file
 from huggingface_hub import hf_hub_download
 ckpt_path = hf_hub_download(repo_id="ashawkey/LGM", filename="model_fp16.safetensors")
+try:
+    import diff_gaussian_rasterization
+except ImportError:
+    os.system("pip install ./diff-gaussian-rasterization")
 import kiui
 from kiui.op import recenter
+from core.options import Options
 from core.models import LGM
 from mvdream.pipeline_mvdream import MVDreamPipeline
 IMAGENET_DEFAULT_MEAN = (0.485, 0.456, 0.406)
 IMAGENET_DEFAULT_STD = (0.229, 0.224, 0.225)
+TMP_DIR = '/tmp'
+os.makedirs(TMP_DIR, exist_ok=True)
 # opt = tyro.cli(AllConfigs)
 opt = Options(
 tan_half_fov = np.tan(0.5 * np.deg2rad(opt.fovy))
 proj_matrix = torch.zeros(4, 4, dtype=torch.float32, device=device)
+proj_matrix[0, 0] = -1 / tan_half_fov
+proj_matrix[1, 1] = -1 / tan_half_fov
+proj_matrix[2, 2] = - (opt.zfar + opt.znear) / (opt.zfar - opt.znear)
 proj_matrix[3, 2] = - (opt.zfar * opt.znear) / (opt.zfar - opt.znear)
 proj_matrix[2, 3] = 1
 bg_remover = rembg.new_session()
 # process function
+def run(input_image):
+    prompt_neg = "ugly, blurry, pixelated obscure, unnatural colors, poor lighting, dull, unclear, cropped, lowres, low quality, artifacts, duplicate"
     # seed
+    kiui.seed_everything(42)
+    output_ply_path = os.path.join(TMP_DIR, 'output.ply')
+    input_image = np.array(input_image) # uint8
+    # bg removal
+    carved_image = rembg.remove(input_image, session=bg_remover) # [H, W, 4]
+    mask = carved_image[..., -1] > 0
+    image = recenter(carved_image, mask, border_ratio=0.2)
+    image = image.astype(np.float32) / 255.0
+    image = image[..., :3] * image[..., 3:4] + (1 - image[..., 3:4])
+    mv_image = pipe_image("", image, negative_prompt=prompt_neg, num_inference_steps=30, guidance_scale=5.0,  elevation=0)
     # generate gaussians
     input_image = np.stack([mv_image[1], mv_image[2], mv_image[3], mv_image[0]], axis=0) # [4, 256, 256, 3], float32
     input_image = F.interpolate(input_image, size=(opt.input_size, opt.input_size), mode='bilinear', align_corners=False)
     input_image = TF.normalize(input_image, IMAGENET_DEFAULT_MEAN, IMAGENET_DEFAULT_STD)
+    rays_embeddings = model.prepare_default_rays(device, elevation=0)
     input_image = torch.cat([input_image, rays_embeddings], dim=1).unsqueeze(0) # [1, 4, 9, H, W]
     with torch.no_grad():
         # save gaussians
         model.gs.save_ply(gaussians, output_ply_path)
+    return output_ply_path
 # gradio UI
 _DESCRIPTION = '''
 <div>
+A lightweight version of <a href="https://huggingface.co/spaces/ashawkey/LGM">LGM: Large Multi-View Gaussian Model for High-Resolution 3D Content Creation</a>.
 </div>
 '''
+css = '''
+#duplicate-button {
+    margin: auto;
+    color: white;
+    background: #1565c0;
+    border-radius: 100vh;
+}
+'''
+block = gr.Blocks(title=_TITLE, css=css)
 with block:
+    gr.DuplicateButton(value="Duplicate Space for private use", elem_id="duplicate-button")
     with gr.Row():
         with gr.Column(scale=1):
             gr.Markdown('# ' + _TITLE)
     with gr.Row(variant='panel'):
         with gr.Column(scale=1):
             # input image
+            input_image = gr.Image(label="image", type='pil', height=300)
             # gen button
             button_gen = gr.Button("Generate")
         with gr.Column(scale=1):
             output_splat = gr.Model3D(label="3D Gaussians")
+        button_gen.click(fn=run, inputs=[input_image], outputs=[output_splat])
     gr.Examples(
         examples=[
             "data_test/gso_rabbit.jpg",
         ],
         inputs=[input_image],
+        outputs=[output_splat],
+        fn=lambda x: run(input_image=x),
         cache_examples=True,
         label='Image-to-3D Examples'
     )
+block.queue().launch(debug=True, share=True)

core/gs.py CHANGED Viewed

@@ -1,3 +1,4 @@
 import numpy as np
 import torch
@@ -105,6 +106,8 @@ class GaussianRenderer:
         assert gaussians.shape[0] == 1, 'only support batch size 1'
         from plyfile import PlyData, PlyElement
         means3D = gaussians[0, :, 0:3].contiguous().float()
         opacity = gaussians[0, :, 3:4].contiguous().float()

+import os
 import numpy as np
 import torch
         assert gaussians.shape[0] == 1, 'only support batch size 1'
         from plyfile import PlyData, PlyElement
+        os.makedirs(os.path.dirname(path), exist_ok=True)
         means3D = gaussians[0, :, 0:3].contiguous().float()
         opacity = gaussians[0, :, 3:4].contiguous().float()

core/models.py CHANGED Viewed

@@ -112,6 +112,17 @@ class LGM(nn.Module):
         rotation = self.rot_act(x[..., 7:11])
         rgbs = self.rgb_act(x[..., 11:])
         gaussians = torch.cat([pos, opacity, scale, rotation, rgbs], dim=-1) # [B, N, 14]
         return gaussians

         rotation = self.rot_act(x[..., 7:11])
         rgbs = self.rgb_act(x[..., 11:])
+        rot_matrix = torch.tensor([[1.0, 0.0, 0.0, 0.0],
+                                   [0.0, -1.0, 0.0, 0.0],
+                                   [0.0, 0.0, -1.0, 0.0],
+                                   [0.0, 0.0, 0.0, 1.0]], dtype=torch.float32, device=images.device)
+        pos_4d = torch.cat([pos, torch.ones_like(pos[..., :1])], dim=-1)
+        pos = torch.matmul(pos_4d, rot_matrix) # [B, N, 4]
+        pos = pos[..., :3]
+        rotation = torch.matmul(rotation, rot_matrix)
         gaussians = torch.cat([pos, opacity, scale, rotation, rgbs], dim=-1) # [B, N, 14]
         return gaussians

requirements.txt CHANGED Viewed

@@ -1,7 +1,3 @@
---extra-index-url https://download.pytorch.org/whl/cu118
-torch==2.0.0
-xformers
 numpy
 tyro
 diffusers
@@ -28,4 +24,8 @@ trimesh
 kiui >= 0.2.3
 xatlas
 roma
-plyfile

 numpy
 tyro
 diffusers
 kiui >= 0.2.3
 xatlas
 roma
+plyfile
+torch==2.0.0 --index-url https://download.pytorch.org/whl/cu118
+torchvision==0.15.1 --index-url https://download.pytorch.org/whl/cu118
+torchaudio==2.0.1 --index-url https://download.pytorch.org/whl/cu118
+xformers