dance-monkey

Paused

App Files Files Community

guardiancc commited on Sep 2

Commit

f823cf1

•

1 Parent(s): f3672f8

Update mimicmotion/pipelines/pipeline_mimicmotion.py

Browse files

Files changed (1) hide show

mimicmotion/pipelines/pipeline_mimicmotion.py +14 -26

mimicmotion/pipelines/pipeline_mimicmotion.py CHANGED Viewed

@@ -556,21 +556,17 @@ class MimicMotionPipeline(DiffusionPipeline):
                 # expand the latents if we are doing classifier free guidance
                 latent_model_input = torch.cat([latents] * 2) if self.do_classifier_free_guidance else latents
                 latent_model_input = self.scheduler.scale_model_input(latent_model_input, t)
                 # Concatenate image_latents over channels dimension
                 latent_model_input = torch.cat([latent_model_input, image_latents], dim=2)
                 # predict the noise residual
                 noise_pred = torch.zeros_like(image_latents)
                 noise_pred_cnt = image_latents.new_zeros((num_frames,))
                 weight = (torch.arange(tile_size, device=device) + 0.5) * 2. / tile_size
                 weight = torch.minimum(weight, 2 - weight)
-                # Paralelização do loop sobre `indices` usando ThreadPoolExecutor
-                def process_index(idx):
-                    nonlocal noise_pred, noise_pred_cnt
-                    result = torch.zeros_like(image_latents[:1, idx])  # Placeholder for thread-safe accumulation
                     # classification-free inference
                     pose_latents = self.pose_net(image_pose[idx].to(device))
                     _noise_pred = self.unet(
@@ -582,8 +578,8 @@ class MimicMotionPipeline(DiffusionPipeline):
                         image_only_indicator=image_only_indicator,
                         return_dict=False,
                     )[0]
-                    result[:1] += _noise_pred * weight[:, None, None, None]
                     # normal inference
                     _noise_pred = self.unet(
                         latent_model_input[1:, idx],
@@ -594,34 +590,26 @@ class MimicMotionPipeline(DiffusionPipeline):
                         image_only_indicator=image_only_indicator,
                         return_dict=False,
                     )[0]
-                    result[1:] += _noise_pred * weight[:, None, None, None]
-                    return result, idx
-                with concurrent.futures.ThreadPoolExecutor() as executor:
-                    futures = [executor.submit(process_index, idx) for idx in indices]
-                    for future in concurrent.futures.as_completed(futures):
-                        _noise_pred, idx = future.result()
-                        noise_pred[:, idx] += _noise_pred
-                        noise_pred_cnt[idx] += weight
-                        progress_bar.update()
                 noise_pred.div_(noise_pred_cnt[:, None, None, None])
                 # perform guidance
                 if self.do_classifier_free_guidance:
                     noise_pred_uncond, noise_pred_cond = noise_pred.chunk(2)
                     noise_pred = noise_pred_uncond + self.guidance_scale * (noise_pred_cond - noise_pred_uncond)
                 # compute the previous noisy sample x_t -> x_t-1
                 latents = self.scheduler.step(noise_pred, t, latents, **extra_step_kwargs, return_dict=False)[0]
                 if callback_on_step_end is not None:
                     callback_kwargs = {}
                     for k in callback_on_step_end_tensor_inputs:
                         callback_kwargs[k] = locals()[k]
                     callback_outputs = callback_on_step_end(self, i, t, callback_kwargs)
                     latents = callback_outputs.pop("latents", latents)
         self.pose_net.cpu()

                 # expand the latents if we are doing classifier free guidance
                 latent_model_input = torch.cat([latents] * 2) if self.do_classifier_free_guidance else latents
                 latent_model_input = self.scheduler.scale_model_input(latent_model_input, t)
                 # Concatenate image_latents over channels dimension
                 latent_model_input = torch.cat([latent_model_input, image_latents], dim=2)
                 # predict the noise residual
                 noise_pred = torch.zeros_like(image_latents)
                 noise_pred_cnt = image_latents.new_zeros((num_frames,))
                 weight = (torch.arange(tile_size, device=device) + 0.5) * 2. / tile_size
                 weight = torch.minimum(weight, 2 - weight)
+                for idx in indices:
                     # classification-free inference
                     pose_latents = self.pose_net(image_pose[idx].to(device))
                     _noise_pred = self.unet(
                         image_only_indicator=image_only_indicator,
                         return_dict=False,
                     )[0]
+                    noise_pred[:1, idx] += _noise_pred * weight[:, None, None, None]
                     # normal inference
                     _noise_pred = self.unet(
                         latent_model_input[1:, idx],
                         image_only_indicator=image_only_indicator,
                         return_dict=False,
                     )[0]
+                    noise_pred[1:, idx] += _noise_pred * weight[:, None, None, None]
+                    noise_pred_cnt[idx] += weight
+                    progress_bar.update()
                 noise_pred.div_(noise_pred_cnt[:, None, None, None])
                 # perform guidance
                 if self.do_classifier_free_guidance:
                     noise_pred_uncond, noise_pred_cond = noise_pred.chunk(2)
                     noise_pred = noise_pred_uncond + self.guidance_scale * (noise_pred_cond - noise_pred_uncond)
                 # compute the previous noisy sample x_t -> x_t-1
                 latents = self.scheduler.step(noise_pred, t, latents, **extra_step_kwargs, return_dict=False)[0]
                 if callback_on_step_end is not None:
                     callback_kwargs = {}
                     for k in callback_on_step_end_tensor_inputs:
                         callback_kwargs[k] = locals()[k]
                     callback_outputs = callback_on_step_end(self, i, t, callback_kwargs)
                     latents = callback_outputs.pop("latents", latents)
         self.pose_net.cpu()