xrg commited on
Commit
16da407
1 Parent(s): 5357224

separate generation process

Browse files
Files changed (3) hide show
  1. app.py +22 -14
  2. core/models.py +0 -133
  3. core/tensorBase.py +1 -29
app.py CHANGED
@@ -35,7 +35,6 @@ GRADIO_OBJ_SHADING_PATH = 'gradio_output_shading.obj'
35
  #opt = tyro.cli(AllConfigs)
36
 
37
  ckpt_path = hf_hub_download(repo_id="rgxie/LDM", filename="LDM6v01.ckpt")
38
- #ckpt_path = '/ssd3/xrg/tensor23d/pretrained/last_6view_0610_14.ckpt'
39
 
40
  opt = Options(
41
  input_size=512,
@@ -83,8 +82,6 @@ if opt.resume is not None:
83
  print(f'[INFO] load resume success!')
84
 
85
  device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')
86
- print('first device')
87
- print(device)
88
  model = model.half().to(device)
89
  model.eval()
90
 
@@ -141,18 +138,14 @@ pipe_image_plus = pipe_image_plus.to(device)
141
  # load rembg
142
  bg_remover = rembg.new_session()
143
 
144
- # process function
145
- @spaces.GPU
146
- def process(condition_input_image, prompt, prompt_neg='', input_elevation=0, input_num_steps=30, input_seed=42, mv_moedl_option=None):
147
 
 
 
148
  # seed
149
  kiui.seed_everything(input_seed)
150
 
151
  os.makedirs(os.path.join(opt.workspace, "gradio"), exist_ok=True)
152
  output_video_path = os.path.join(opt.workspace,"gradio", GRADIO_VIDEO_PATH)
153
- output_obj_rgb_path = os.path.join(opt.workspace,"gradio", GRADIO_OBJ_PATH)
154
- output_obj_albedo_path = os.path.join(opt.workspace,"gradio", GRADIO_OBJ_ALBEDO_PATH)
155
- output_obj_shading_path = os.path.join(opt.workspace,"gradio", GRADIO_OBJ_SHADING_PATH)
156
 
157
  # text-conditioned
158
  if condition_input_image is None:
@@ -199,7 +192,15 @@ def process(condition_input_image, prompt, prompt_neg='', input_elevation=0, inp
199
  mv_image_grid = rearrange(mv_image, 'c (n h) (m w) -> (m h) (n w) c', n=3, m=2).numpy()
200
  mv_image = rearrange(mv_image, 'c (n h) (m w) -> (n m) h w c', n=3, m=2).numpy()
201
  input_image = mv_image
202
-
 
 
 
 
 
 
 
 
203
  # generate gaussians
204
  # [4, 256, 256, 3], float32
205
  input_image = torch.from_numpy(input_image).permute(0, 3, 1, 2).float().to(device) # [4, 3, 256, 256]
@@ -274,7 +275,8 @@ def process(condition_input_image, prompt, prompt_neg='', input_elevation=0, inp
274
  save_obj(vertices, faces, vertex_colors[2], output_obj_shading_path)
275
 
276
 
277
- return mv_image_grid, processed_image, output_obj_rgb_path, output_obj_albedo_path, output_obj_shading_path
 
278
 
279
  # gradio UI
280
 
@@ -385,7 +387,13 @@ with block:
385
  )
386
 
387
 
388
- button_gen.click(process, inputs=[condition_input_image, input_text, input_neg_text, input_elevation, input_num_steps, input_seed,mv_moedl_option], outputs=[mv_image_grid,processed_image, output_obj_rgb_path, output_obj_albedo_path, output_obj_shading_path])
389
-
390
-
 
 
 
 
 
 
391
  block.launch(server_name="0.0.0.0", share=False)
 
35
  #opt = tyro.cli(AllConfigs)
36
 
37
  ckpt_path = hf_hub_download(repo_id="rgxie/LDM", filename="LDM6v01.ckpt")
 
38
 
39
  opt = Options(
40
  input_size=512,
 
82
  print(f'[INFO] load resume success!')
83
 
84
  device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')
 
 
85
  model = model.half().to(device)
86
  model.eval()
87
 
 
138
  # load rembg
139
  bg_remover = rembg.new_session()
140
 
 
 
 
141
 
142
+ @spaces.GPU
143
+ def generate_mv(condition_input_image, prompt, prompt_neg='', input_elevation=0, input_num_steps=30, input_seed=42, mv_moedl_option=None):
144
  # seed
145
  kiui.seed_everything(input_seed)
146
 
147
  os.makedirs(os.path.join(opt.workspace, "gradio"), exist_ok=True)
148
  output_video_path = os.path.join(opt.workspace,"gradio", GRADIO_VIDEO_PATH)
 
 
 
149
 
150
  # text-conditioned
151
  if condition_input_image is None:
 
192
  mv_image_grid = rearrange(mv_image, 'c (n h) (m w) -> (m h) (n w) c', n=3, m=2).numpy()
193
  mv_image = rearrange(mv_image, 'c (n h) (m w) -> (n m) h w c', n=3, m=2).numpy()
194
  input_image = mv_image
195
+ return mv_image_grid, processed_image, input_image
196
+
197
+ @spaces.GPU
198
+ def generate_3d(input_image, condition_input_image, mv_moedl_option=None, input_seed=42):
199
+ kiui.seed_everything(input_seed)
200
+
201
+ output_obj_rgb_path = os.path.join(opt.workspace,"gradio", GRADIO_OBJ_PATH)
202
+ output_obj_albedo_path = os.path.join(opt.workspace,"gradio", GRADIO_OBJ_ALBEDO_PATH)
203
+ output_obj_shading_path = os.path.join(opt.workspace,"gradio", GRADIO_OBJ_SHADING_PATH)
204
  # generate gaussians
205
  # [4, 256, 256, 3], float32
206
  input_image = torch.from_numpy(input_image).permute(0, 3, 1, 2).float().to(device) # [4, 3, 256, 256]
 
275
  save_obj(vertices, faces, vertex_colors[2], output_obj_shading_path)
276
 
277
 
278
+ return output_obj_rgb_path, output_obj_albedo_path, output_obj_shading_path
279
+
280
 
281
  # gradio UI
282
 
 
387
  )
388
 
389
 
390
+ input_image = gr.State()
391
+ button_gen.click(fn=generate_mv, inputs=[condition_input_image, input_text, input_neg_text, input_elevation, input_num_steps, input_seed, mv_moedl_option],
392
+ outputs=[mv_image_grid, processed_image, input_image],).success(
393
+ fn=generate_3d,
394
+ inputs=[input_image, condition_input_image, mv_moedl_option, input_seed],
395
+ outputs=[output_obj_rgb_path, output_obj_albedo_path, output_obj_shading_path] ,
396
+ )
397
+
398
+
399
  block.launch(server_name="0.0.0.0", share=False)
core/models.py CHANGED
@@ -50,13 +50,9 @@ class LTRFM_NeRF(nn.Module):
50
  )
51
 
52
  aabb = torch.tensor([[-1, -1, -1], [1, 1, 1]]).cuda()
53
- print(aabb.device)
54
  grid_size = torch.tensor([opt.splat_size, opt.splat_size, opt.splat_size]).cuda()
55
  near_far =torch.tensor([opt.znear, opt.zfar]).cuda()
56
 
57
- print(device)
58
- print('aabb')
59
- print(aabb.device)
60
  # tensorf Renderer
61
  self.tensorRF = TensorVMSplit_NeRF(aabb, grid_size, density_n_comp=opt.density_n_comp,appearance_n_comp=opt.app_n_comp,app_dim=opt.app_dim,\
62
  density_dim=opt.density_dim,near_far=near_far, shadingMode=opt.shadingMode, pos_pe=opt.pos_pe, view_pe=opt.view_pe, fea_pe=opt.fea_pe)
@@ -127,9 +123,6 @@ class LTRFM_NeRF(nn.Module):
127
  xyz_samples=xyz_samples.permute(0,2,3,4,1)
128
  xyz_samples=xyz_samples.view(1,-1,1,3)
129
 
130
- print("app and xyz")
131
- print(planes['app_planes'].device)
132
- print(xyz_samples.device)
133
 
134
  grid_out = self.tensorRF.predict_sdf(planes,xyz_samples)
135
  grid_out['sigma']=grid_out['sigma'].view(grid_size,grid_size,grid_size).float()
@@ -191,68 +184,6 @@ class LTRFM_NeRF(nn.Module):
191
  return vertices, faces, uvs, mesh_tex_idx, [texture_map,texture_map_albedo]
192
 
193
 
194
- def forward(self, data, step_ratio=1):
195
- # data: output of the dataloader
196
- # return: loss
197
- #self.set_beta(data['t'])
198
- results = {}
199
- loss = 0
200
-
201
- images = data['input'] # [B, 4, 9, h, W], input features
202
-
203
- # use the first view to predict gaussians
204
- svd_volume = self.forward_svd_volume(images,data) # [B, N, 14]
205
-
206
- results['svd_volume'] = svd_volume
207
-
208
- # always use white bg
209
- bg_color = torch.ones(3, dtype=torch.float32).to(device)
210
-
211
- # use the other views for rendering and supervision
212
- results = self.tensorRF(svd_volume, data['all_rays_o'], data['all_rays_d'],is_train=True, bg_color=bg_color, N_samples=self.opt.n_sample)
213
- pred_shading = results['image'] # [B, V, C, output_size, output_size]
214
- pred_alphas = results['alpha'] # [B, V, 1, output_size, output_size]
215
- pred_albedos = results['albedo'] # [B, V, C, output_size, output_size]
216
-
217
- pred_images = pred_shading*pred_albedos
218
-
219
- results['images_pred'] = pred_images
220
- results['alphas_pred'] = pred_alphas
221
- results['pred_albedos'] = pred_albedos
222
- results['pred_shading'] = pred_shading
223
-
224
-
225
- gt_images = data['images_output'] # [B, V, 3, output_size, output_size], ground-truth novel views
226
- gt_albedos = data['albedos_output'] # [B, V, 3, output_size, output_size], ground-truth novel views
227
- gt_masks = data['masks_output'] # [B, V, 1, output_size, output_size], ground-truth masks
228
-
229
- gt_images = gt_images * gt_masks + bg_color.view(1, 1, 3, 1, 1) * (1 - gt_masks)
230
- gt_albedos = gt_albedos * gt_masks + bg_color.view(1, 1, 3, 1, 1) * (1 - gt_masks)
231
-
232
- loss_mse = F.mse_loss(pred_images, gt_images) + F.mse_loss(pred_alphas, gt_masks) + F.mse_loss(pred_albedos, gt_albedos)
233
- loss = loss + loss_mse
234
-
235
- # eikonal_loss = ((results['eik_grads'].norm(2, dim=1) - 1) ** 2).mean()
236
- # loss = loss+ 0.1*eikonal_loss
237
-
238
- if self.opt.lambda_lpips > 0:
239
- loss_lpips = self.lpips_loss(
240
- F.interpolate(gt_images.view(-1, 3, self.opt.output_size, self.opt.output_size) * 2 - 1, (256, 256), mode='bilinear', align_corners=False),
241
- F.interpolate(pred_images.view(-1, 3, self.opt.output_size, self.opt.output_size) * 2 - 1, (256, 256), mode='bilinear', align_corners=False),
242
- ).mean()
243
- results['loss_lpips'] = loss_lpips
244
- loss = loss + self.opt.lambda_lpips * loss_lpips
245
-
246
- results['loss'] = loss
247
-
248
- # metric
249
- with torch.no_grad():
250
- psnr = -10 * torch.log10(torch.mean((pred_images.detach() - gt_images) ** 2))
251
- results['psnr'] = psnr
252
-
253
- return results
254
-
255
-
256
  def render_frame(self, data):
257
  # data: output of the dataloader
258
  # return: loss
@@ -631,70 +562,6 @@ class LTRFM_Mesh(nn.Module):
631
  }
632
  return out
633
 
634
- def forward(self, data, step_ratio=1):
635
- # data: output of the dataloader
636
- # return: loss
637
-
638
- results = {}
639
- loss = 0
640
-
641
- images = data['input'] # [B, 4, 9, h, W], input features
642
-
643
- # use the first view to predict gaussians
644
- svd_volume = self.forward_svd_volume(images,data) # [B, N, 14]
645
-
646
- results['svd_volume'] = svd_volume
647
-
648
- # return the rendered images
649
- results = self.forward_geometry(svd_volume, data['w2c'], self.opt.output_size)
650
-
651
-
652
- # always use white bg
653
- bg_color = torch.ones(3, dtype=torch.float32).to(device)
654
-
655
- # use the other views for rendering and supervision
656
- #results = self.tensorRF(svd_volume, data['all_rays_o'], data['all_rays_d'],is_train=True, bg_color=bg_color, N_samples=self.opt.n_sample)
657
-
658
-
659
- pred_shading = results['image'] # [B, V, C, output_size, output_size]
660
- pred_alphas = results['mask'] # [B, V, 1, output_size, output_size]
661
- pred_albedos = results['albedo'] # [B, V, C, output_size, output_size]
662
-
663
- pred_images=pred_shading*pred_albedos
664
-
665
- results['images_pred'] = pred_images
666
- results['alphas_pred'] = pred_alphas
667
- results['pred_albedos'] = pred_albedos
668
- results['pred_shading'] = pred_shading
669
-
670
-
671
- gt_images = data['images_output'] # [B, V, 3, output_size, output_size], ground-truth novel views
672
- gt_albedos = data['albedos_output'] # [B, V, 3, output_size, output_size], ground-truth novel views
673
- gt_masks = data['masks_output'] # [B, V, 1, output_size, output_size], ground-truth masks
674
-
675
- gt_images = gt_images * gt_masks + bg_color.view(1, 1, 3, 1, 1) * (1 - gt_masks)
676
- gt_albedos = gt_albedos * gt_masks + bg_color.view(1, 1, 3, 1, 1) * (1 - gt_masks)
677
-
678
- loss_mse = F.mse_loss(pred_images, gt_images) + F.mse_loss(pred_alphas, gt_masks) + F.mse_loss(pred_albedos, gt_albedos)
679
- loss = loss + loss_mse
680
-
681
- if self.opt.lambda_lpips > 0:
682
- loss_lpips = self.lpips_loss(
683
- F.interpolate(gt_images.view(-1, 3, self.opt.output_size, self.opt.output_size) * 2 - 1, (256, 256), mode='bilinear', align_corners=False),
684
- F.interpolate(pred_images.view(-1, 3, self.opt.output_size, self.opt.output_size) * 2 - 1, (256, 256), mode='bilinear', align_corners=False),
685
- ).mean()
686
- results['loss_lpips'] = loss_lpips
687
- loss = loss + self.opt.lambda_lpips * loss_lpips
688
-
689
- results['loss'] = loss
690
-
691
- # metric
692
- with torch.no_grad():
693
- psnr = -10 * torch.log10(torch.mean((pred_images.detach() - gt_images) ** 2))
694
- results['psnr'] = psnr
695
-
696
- return results
697
-
698
 
699
  def render_frame(self, data):
700
  # data: output of the dataloader
 
50
  )
51
 
52
  aabb = torch.tensor([[-1, -1, -1], [1, 1, 1]]).cuda()
 
53
  grid_size = torch.tensor([opt.splat_size, opt.splat_size, opt.splat_size]).cuda()
54
  near_far =torch.tensor([opt.znear, opt.zfar]).cuda()
55
 
 
 
 
56
  # tensorf Renderer
57
  self.tensorRF = TensorVMSplit_NeRF(aabb, grid_size, density_n_comp=opt.density_n_comp,appearance_n_comp=opt.app_n_comp,app_dim=opt.app_dim,\
58
  density_dim=opt.density_dim,near_far=near_far, shadingMode=opt.shadingMode, pos_pe=opt.pos_pe, view_pe=opt.view_pe, fea_pe=opt.fea_pe)
 
123
  xyz_samples=xyz_samples.permute(0,2,3,4,1)
124
  xyz_samples=xyz_samples.view(1,-1,1,3)
125
 
 
 
 
126
 
127
  grid_out = self.tensorRF.predict_sdf(planes,xyz_samples)
128
  grid_out['sigma']=grid_out['sigma'].view(grid_size,grid_size,grid_size).float()
 
184
  return vertices, faces, uvs, mesh_tex_idx, [texture_map,texture_map_albedo]
185
 
186
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
187
  def render_frame(self, data):
188
  # data: output of the dataloader
189
  # return: loss
 
562
  }
563
  return out
564
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
565
 
566
  def render_frame(self, data):
567
  # data: output of the dataloader
core/tensorBase.py CHANGED
@@ -205,23 +205,7 @@ class TensorBase(torch.nn.Module):
205
  self.near_far = near_far
206
  self.step_ratio = 0.9 #step_ratio原作0.5
207
 
208
- print("aabb", self.aabb.view(-1))
209
- print("grid size", gridSize)
210
- self.aabbSize = self.aabb[1] - self.aabb[0]
211
- print(self.aabbSize.device)
212
- self.invaabbSize = 2.0/self.aabbSize
213
- print(self.invaabbSize.device)
214
- # self.invaabbSize = self.invaabbSize.to(self.aabb.device)
215
- # print(self.invaabbSize.device)
216
- self.gridSize= gridSize.float()
217
- self.units=self.aabbSize / (self.gridSize-1)
218
- self.stepSize=torch.mean(self.units)*self.step_ratio # TBD step_ratio? why so small 0.5
219
- self.aabbDiag = torch.sqrt(torch.sum(torch.square(self.aabbSize)))
220
- self.nSamples=int((self.aabbDiag / self.stepSize).item()) + 1
221
- print("sampling step size: ", self.stepSize)
222
- print("sampling number: ", self.nSamples)
223
-
224
- # self.update_stepSize(gridSize)
225
 
226
  self.matMode = [[0,1], [0,2], [1,2]]
227
  self.vecMode = [2, 1, 0]
@@ -252,14 +236,8 @@ class TensorBase(torch.nn.Module):
252
  print(self.renderModule)
253
 
254
  def update_stepSize(self, gridSize):
255
- print("aabb", self.aabb.view(-1))
256
- print("grid size", gridSize)
257
  self.aabbSize = self.aabb[1] - self.aabb[0]
258
- print(self.aabbSize.device)
259
  self.invaabbSize = 2.0/self.aabbSize
260
- print(self.invaabbSize.device)
261
- # self.invaabbSize = self.invaabbSize.to(self.aabb.device)
262
- # print(self.invaabbSize.device)
263
  self.gridSize= gridSize.float()
264
  self.units=self.aabbSize / (self.gridSize-1)
265
  self.stepSize=torch.mean(self.units)*self.step_ratio # TBD step_ratio? why so small 0.5
@@ -281,14 +259,8 @@ class TensorBase(torch.nn.Module):
281
  pass
282
 
283
  def normalize_coord(self, xyz_sampled):
284
- print("debug")
285
-
286
- print(xyz_sampled.device)
287
- print(self.aabb[0].device)
288
- print(self.invaabbSize.device)
289
  if xyz_sampled.device!=self.invaabbSize.device:
290
  self.invaabbSize=self.invaabbSize.to(xyz_sampled.device)
291
-
292
  return (xyz_sampled-self.aabb[0]) * self.invaabbSize - 1
293
 
294
  def get_optparam_groups(self, lr_init_spatial = 0.02, lr_init_network = 0.001):
 
205
  self.near_far = near_far
206
  self.step_ratio = 0.9 #step_ratio原作0.5
207
 
208
+ self.update_stepSize(gridSize)
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
209
 
210
  self.matMode = [[0,1], [0,2], [1,2]]
211
  self.vecMode = [2, 1, 0]
 
236
  print(self.renderModule)
237
 
238
  def update_stepSize(self, gridSize):
 
 
239
  self.aabbSize = self.aabb[1] - self.aabb[0]
 
240
  self.invaabbSize = 2.0/self.aabbSize
 
 
 
241
  self.gridSize= gridSize.float()
242
  self.units=self.aabbSize / (self.gridSize-1)
243
  self.stepSize=torch.mean(self.units)*self.step_ratio # TBD step_ratio? why so small 0.5
 
259
  pass
260
 
261
  def normalize_coord(self, xyz_sampled):
 
 
 
 
 
262
  if xyz_sampled.device!=self.invaabbSize.device:
263
  self.invaabbSize=self.invaabbSize.to(xyz_sampled.device)
 
264
  return (xyz_sampled-self.aabb[0]) * self.invaabbSize - 1
265
 
266
  def get_optparam_groups(self, lr_init_spatial = 0.02, lr_init_network = 0.001):