FlipSketch

Paused

fffiloni commited on 4 days ago

Commit

55659e5

•

1 Parent(s): b6a6144

Update app_gradio.py

Files changed (1) hide show

app_gradio.py CHANGED Viewed

@@ -67,23 +67,15 @@ def prepare_latents(pipe, x_aug):
 @torch.no_grad()
 def invert(pipe, inv, load_name, device="cuda", dtype=torch.bfloat16):
-    # Load and process the image
-    input_img = load_image(load_name, target_size=256).to(device, dtype=torch.float32)  # Shape: (1, C, H, W)
-    input_img = input_img.unsqueeze(1).repeat(1, 5, 1, 1, 1)  # Add time dimension and repeat for T=5
-    # Shape: (B=1, T=5, C=3, H=256, W=256)
-    # Convert image to latent space
-    latents = prepare_latents(pipe, input_img).to(dtype)  # Shape: (B, latent_dim, T, H/8, W/8)
-    # Configure the inversion process
     inv.set_timesteps(25)
-    # Perform inversion and extract final latent representation
-    id_latents = dd_inversion(pipe, inv, video_latent=latents, num_inv_steps=25, prompt="")[-1]
-    id_latents = id_latents.to(dtype)  # Ensure correct dtype
-    id_latents = torch.mean(id_latents, dim=2, keepdim=True)  # Shape: (B, latent_dim, 1, H/8, W/8)
-    return id_latents
 def load_primary_models(pretrained_model_path):
     return (

 @torch.no_grad()
 def invert(pipe, inv, load_name, device="cuda", dtype=torch.bfloat16):
+    input_img = [load_image(load_name, 256).to(device, dtype=dtype).unsqueeze(1)] * 5
+    input_img = torch.cat(input_img, dim=1)
+    torch.cuda.synchronize()  # Ensure image tensor preparation is complete
+    latents = prepare_latents(pipe, input_img).to(torch.bfloat16)
+    torch.cuda.synchronize()  # Wait for latents to finish encoding
     inv.set_timesteps(25)
+    id_latents = dd_inversion(pipe, inv, video_latent=latents, num_inv_steps=25, prompt="")[-1].to(dtype)
+    torch.cuda.synchronize()  # Ensure DDIM inversion is complete
+    return torch.mean(id_latents, dim=2, keepdim=True)
 def load_primary_models(pretrained_model_path):
     return (