Spaces:

jiawei011
/

dreamgaussian4d

Runtime error

App Files Files Community

jiaweir commited on Jun 10

Commit

cdc7dcc

•

1 Parent(s): c122ae9

optimize

Browse files

Files changed (6) hide show

app.py +8 -4
configs/4d_demo.yaml +1 -1
gs_renderer_4d.py +42 -0
lgm/core/models.py +41 -0
lgm/infer_demo.py +15 -34
main_4d_demo.py +27 -32

app.py CHANGED Viewed

@@ -224,7 +224,7 @@ def optimize_stage_2(image_block: Image.Image, seed_slider: int):
     process_dg4d(os.path.join("configs", "4d_demo.yaml"), os.path.join("tmp_data", f"{img_hash}_rgba.png"), guidance_zero123)
     # os.rename(os.path.join('logs', f'{img_hash}_rgba_frames'), os.path.join('logs', f'{img_hash}_{seed_slider:03d}_rgba_frames'))
     image_dir = os.path.join('logs', f'{img_hash}_rgba_frames')
-    # return 'vis_data/tmp_rgba.mp4', [os.path.join(image_dir, file) for file in os.listdir(image_dir) if file.endswith('.ply')]
     return [image_dir+f'/{t:03d}.ply' for t in range(28)]
@@ -256,7 +256,7 @@ if __name__ == "__main__":
         # Image-to-3D
         with gr.Row(variant='panel'):
-            with gr.Column(scale=4):
                 image_block = gr.Image(type='pil', image_mode='RGBA', height=290, label='Input image')
                 # elevation_slider = gr.Slider(-90, 90, value=0, step=1, label='Estimated elevation angle')
@@ -282,8 +282,12 @@ if __name__ == "__main__":
                 img_guide_text = gr.Markdown(_IMG_USER_GUIDE, visible=True)
             with gr.Column(scale=5):
-                dirving_video = gr.Video(label="video",height=290)
-                obj3d = gr.Video(label="3D Model",height=290)
                 obj4d = Model4DGS(label="4D Model", height=500, fps=14)

     process_dg4d(os.path.join("configs", "4d_demo.yaml"), os.path.join("tmp_data", f"{img_hash}_rgba.png"), guidance_zero123)
     # os.rename(os.path.join('logs', f'{img_hash}_rgba_frames'), os.path.join('logs', f'{img_hash}_{seed_slider:03d}_rgba_frames'))
     image_dir = os.path.join('logs', f'{img_hash}_rgba_frames')
+    # return os.path.join('vis_data', f'{img_hash}_rgba.mp4'), [image_dir+f'/{t:03d}.ply' for t in range(28)]
     return [image_dir+f'/{t:03d}.ply' for t in range(28)]
         # Image-to-3D
         with gr.Row(variant='panel'):
+            with gr.Column(scale=5):
                 image_block = gr.Image(type='pil', image_mode='RGBA', height=290, label='Input image')
                 # elevation_slider = gr.Slider(-90, 90, value=0, step=1, label='Estimated elevation angle')
                 img_guide_text = gr.Markdown(_IMG_USER_GUIDE, visible=True)
             with gr.Column(scale=5):
+                with gr.Row():
+                    with gr.Column(scale=5):
+                        dirving_video = gr.Video(label="video",height=290)
+                    with gr.Column(scale=5):
+                        obj3d = gr.Video(label="3D Model",height=290)
+                # video4d =  gr.Video(label="4D video",height=290)
                 obj4d = Model4DGS(label="4D Model", height=500, fps=14)

configs/4d_demo.yaml CHANGED Viewed

@@ -30,7 +30,7 @@ lambda_svd: 0
 # training batch size per iter
 batch_size: 7
 # training iterations for stage 1
-iters: 300
 # training iterations for stage 2
 iters_refine: 50
 # training camera radius

 # training batch size per iter
 batch_size: 7
 # training iterations for stage 1
+iters: 400
 # training iterations for stage 2
 iters_refine: 50
 # training camera radius

gs_renderer_4d.py CHANGED Viewed

@@ -150,6 +150,48 @@ class Renderer:
         self.opacity_deform_T = opacity_deform_T.reshape([self.T, means3D_deform_T.shape[0]//self.T, -1])
         self.scales_deform_T = scales_deform_T.reshape([self.T, means3D_deform_T.shape[0]//self.T, -1])
         self.rotations_deform_T = rotations_deform_T.reshape([self.T, means3D_deform_T.shape[0]//self.T, -1])
     def render(

         self.opacity_deform_T = opacity_deform_T.reshape([self.T, means3D_deform_T.shape[0]//self.T, -1])
         self.scales_deform_T = scales_deform_T.reshape([self.T, means3D_deform_T.shape[0]//self.T, -1])
         self.rotations_deform_T = rotations_deform_T.reshape([self.T, means3D_deform_T.shape[0]//self.T, -1])
+    def prepare_render_4x(
+        self,
+    ):
+        means3D = self.gaussians.get_xyz
+        opacity = self.gaussians._opacity
+        scales = self.gaussians._scaling
+        rotations = self.gaussians._rotation
+        means3D_T = []
+        opacity_T = []
+        scales_T = []
+        rotations_T = []
+        time_T = []
+        for t in range(self.T * 4):
+            tt = t / 4.
+            time = torch.tensor(tt).to(means3D.device).repeat(means3D.shape[0],1)
+            time = ((time.float() / self.T) - 0.5) * 2
+            means3D_T.append(means3D)
+            opacity_T.append(opacity)
+            scales_T.append(scales)
+            rotations_T.append(rotations)
+            time_T.append(time)
+        means3D_T = torch.cat(means3D_T)
+        opacity_T = torch.cat(opacity_T)
+        scales_T = torch.cat(scales_T)
+        rotations_T = torch.cat(rotations_T)
+        time_T = torch.cat(time_T)
+        means3D_deform_T, scales_deform_T, rotations_deform_T, opacity_deform_T = self.gaussians._deformation(means3D_T, scales_T,
+                                                            rotations_T, opacity_T,
+                                                            time_T) #  time is not none
+        self.means3D_deform_T = means3D_deform_T.reshape([self.T *4, means3D_deform_T.shape[0]//self.T // 4, -1])
+        self.opacity_deform_T = opacity_deform_T.reshape([self.T*4, means3D_deform_T.shape[0]//self.T//4, -1])
+        self.scales_deform_T = scales_deform_T.reshape([self.T*4, means3D_deform_T.shape[0]//self.T//4, -1])
+        self.rotations_deform_T = rotations_deform_T.reshape([self.T*4, means3D_deform_T.shape[0]//self.T//4, -1])
     def render(

lgm/core/models.py CHANGED Viewed

@@ -116,6 +116,47 @@ class LGM(nn.Module):
         return gaussians
     def forward(self, data, step_ratio=1):
         # data: output of the dataloader

         return gaussians
+    def forward_gaussians_downsample(self, images):
+        # images: [B, 4, 9, H, W]
+        # return: Gaussians: [B, dim_t]
+        B, V, C, H, W = images.shape
+        images = images.view(B*V, C, H, W)
+        x = self.unet(images) # [B*4, 14, h, w]
+        x = self.conv(x) # [B*4, 14, h, w]
+        x_orig_res = x.clone()
+        x = F.interpolate(x, (self.opt.splat_size // 4,  self.opt.splat_size//4), mode='nearest')
+        x = x.reshape(B, 4, 14, self.opt.splat_size//4, self.opt.splat_size//4)
+        x = x.permute(0, 1, 3, 4, 2).reshape(B, -1, 14)
+        pos = self.pos_act(x[..., 0:3]) # [B, N, 3]
+        opacity = self.opacity_act(x[..., 3:4])
+        scale = self.scale_act(x[..., 4:7]) * 4
+        rotation = self.rot_act(x[..., 7:11])
+        rgbs = self.rgb_act(x[..., 11:])
+        gaussians = torch.cat([pos, opacity, scale, rotation, rgbs], dim=-1) # [B, N, 14]
+        x = x_orig_res.reshape(B, 4, 14, self.opt.splat_size, self.opt.splat_size)
+        x = x.permute(0, 1, 3, 4, 2).reshape(B, -1, 14)
+        pos = self.pos_act(x[..., 0:3]) # [B, N, 3]
+        opacity = self.opacity_act(x[..., 3:4])
+        scale = self.scale_act(x[..., 4:7])
+        rotation = self.rot_act(x[..., 7:11])
+        rgbs = self.rgb_act(x[..., 11:])
+        gaussians_orig_res = torch.cat([pos, opacity, scale, rotation, rgbs], dim=-1) # [B, N, 14]
+        return gaussians, gaussians_orig_res
     def forward(self, data, step_ratio=1):
         # data: output of the dataloader

lgm/infer_demo.py CHANGED Viewed

@@ -151,7 +151,7 @@ def process(opt: Options, path, pipe, model, rays_embeddings, seed):
         with torch.autocast(device_type='cuda', dtype=torch.float16):
             # generate gaussians
-            gaussians = model.forward_gaussians(input_image)
         # save gaussians
         model.gs.save_ply(gaussians, os.path.join('logs', name + '_model.ply'))
@@ -160,39 +160,20 @@ def process(opt: Options, path, pipe, model, rays_embeddings, seed):
         images = []
         elevation = 0
-        if opt.fancy_video:
-            azimuth = np.arange(0, 720, 4, dtype=np.int32)
-            for azi in tqdm.tqdm(azimuth):
-                cam_poses = torch.from_numpy(orbit_camera(elevation, azi, radius=opt.cam_radius, opengl=True)).unsqueeze(0).to(device)
-                cam_poses[:, :3, 1:3] *= -1 # invert up & forward direction
-                # cameras needed by gaussian rasterizer
-                cam_view = torch.inverse(cam_poses).transpose(1, 2) # [V, 4, 4]
-                cam_view_proj = cam_view @ proj_matrix # [V, 4, 4]
-                cam_pos = - cam_poses[:, :3, 3] # [V, 3]
-                scale = min(azi / 360, 1)
-                image = model.gs.render(gaussians, cam_view.unsqueeze(0), cam_view_proj.unsqueeze(0), cam_pos.unsqueeze(0), scale_modifier=scale)['image']
-                images.append((image.squeeze(1).permute(0,2,3,1).contiguous().float().cpu().numpy() * 255).astype(np.uint8))
-        else:
-            azimuth = np.arange(0, 360, 2, dtype=np.int32)
-            for azi in tqdm.tqdm(azimuth):
-                cam_poses = torch.from_numpy(orbit_camera(elevation, azi, radius=opt.cam_radius, opengl=True)).unsqueeze(0).to(device)
-                cam_poses[:, :3, 1:3] *= -1 # invert up & forward direction
-                # cameras needed by gaussian rasterizer
-                cam_view = torch.inverse(cam_poses).transpose(1, 2) # [V, 4, 4]
-                cam_view_proj = cam_view @ proj_matrix # [V, 4, 4]
-                cam_pos = - cam_poses[:, :3, 3] # [V, 3]
-                image = model.gs.render(gaussians, cam_view.unsqueeze(0), cam_view_proj.unsqueeze(0), cam_pos.unsqueeze(0), scale_modifier=1)['image']
-                images.append((image.squeeze(1).permute(0,2,3,1).contiguous().float().cpu().numpy() * 255).astype(np.uint8))
         images = np.concatenate(images, axis=0)
         imageio.mimwrite(os.path.join('vis_data', name + '_static.mp4'), images, fps=30)

         with torch.autocast(device_type='cuda', dtype=torch.float16):
             # generate gaussians
+            gaussians, gaussians_orig_res = model.forward_gaussians_downsample(input_image)
         # save gaussians
         model.gs.save_ply(gaussians, os.path.join('logs', name + '_model.ply'))
         images = []
         elevation = 0
+        azimuth = np.arange(0, 360, 2, dtype=np.int32)
+        for azi in tqdm.tqdm(azimuth):
+            cam_poses = torch.from_numpy(orbit_camera(elevation, azi, radius=opt.cam_radius, opengl=True)).unsqueeze(0).to(device)
+            cam_poses[:, :3, 1:3] *= -1 # invert up & forward direction
+            # cameras needed by gaussian rasterizer
+            cam_view = torch.inverse(cam_poses).transpose(1, 2) # [V, 4, 4]
+            cam_view_proj = cam_view @ proj_matrix # [V, 4, 4]
+            cam_pos = - cam_poses[:, :3, 3] # [V, 3]
+            image = model.gs.render(gaussians_orig_res, cam_view.unsqueeze(0), cam_view_proj.unsqueeze(0), cam_pos.unsqueeze(0), scale_modifier=1)['image']
+            images.append((image.squeeze(1).permute(0,2,3,1).contiguous().float().cpu().numpy() * 255).astype(np.uint8))
         images = np.concatenate(images, axis=0)
         imageio.mimwrite(os.path.join('vis_data', name + '_static.mp4'), images, fps=30)

main_4d_demo.py CHANGED Viewed

@@ -540,38 +540,33 @@ class GUI:
         # render eval
         image_list =[]
-        nframes = self.opt.batch_size * 7 + 15 * 7
-        hor = 180
-        delta_hor = 45 / 15
-        delta_time = 1
-        for i in range(8):
-            time = 0
-            for j in range(self.opt.batch_size + 15):
-                pose = orbit_camera(self.opt.elevation, hor-180, self.opt.radius)
-                cur_cam = MiniCam(
-                    pose,
-                    512,
-                    512,
-                    self.cam.fovy,
-                    self.cam.fovx,
-                    self.cam.near,
-                    self.cam.far,
-                    time=time
-                )
-                with torch.no_grad():
-                    outputs = self.renderer.render(cur_cam)
-                out = outputs["image"].cpu().detach().numpy().astype(np.float32)
-                out = np.transpose(out, (1, 2, 0))
-                out = np.uint8(out*255)
-                image_list.append(out)
-                time = (time + delta_time) % self.opt.batch_size
-                if j >= self.opt.batch_size:
-                    hor = (hor+delta_hor) % 360
-        imageio.mimwrite(f'vis_data/{self.opt.save_path}.mp4', image_list, fps=7)
         if self.gui:
             while True:

         # render eval
         image_list =[]
+        fps = 14
+        delta_time = 1 / 30
+        self.renderer.prepare_render_4x()
+        time = 0
+        for hor in range(720):
+            pose = orbit_camera(self.opt.elevation, hor, self.opt.radius)
+            cur_cam = MiniCam(
+                pose,
+                512,
+                512,
+                self.cam.fovy,
+                self.cam.fovx,
+                self.cam.near,
+                self.cam.far,
+                time=int(time * fps) % (self.opt.batch_size * 4)
+            )
+            with torch.no_grad():
+                outputs = self.renderer.render(cur_cam)
+            out = outputs["image"].cpu().detach().numpy().astype(np.float32)
+            out = np.transpose(out, (1, 2, 0))
+            out = np.uint8(out*255)
+            image_list.append(out)
+            time += delta_time
+        imageio.mimwrite(f'vis_data/{self.opt.save_path}.mp4', image_list, fps=30)
         if self.gui:
             while True: