Spaces:

wondervictor
/

ControlAR

Running on Zero

wondervictor commited on Nov 4, 2024

Commit

9ed9b88

verified ·

1 Parent(s): 113349b

Update model.py

Files changed (1) hide show

model.py CHANGED Viewed

@@ -13,6 +13,10 @@ import time
 from autoregressive.models.generate import generate
 from condition.midas.depth import MidasDetector
 models = {
     "canny": "checkpoints/canny_MR.safetensors",
     "depth": "checkpoints/depth_MR.safetensors",
@@ -48,7 +52,8 @@ class Model:
         self.gpt_model_canny = self.load_gpt(condition_type='canny')
         self.gpt_model_depth = self.load_gpt(condition_type='depth')
         self.get_control_canny = CannyDetector()
-        self.get_control_depth = MidasDetector('cuda')
     def to(self, device):
         self.gpt_model_canny.to('cuda')
@@ -196,11 +201,18 @@ class Model:
         # self.get_control_depth.model.to(self.device)
         # self.vq_model.to(self.device)
         image_tensor = torch.from_numpy(np.array(image)).to(self.device)
-        condition_img = torch.from_numpy(
-            self.get_control_depth(image_tensor)).unsqueeze(0)
-        condition_img = condition_img.unsqueeze(0).repeat(2, 3, 1, 1)
-        condition_img = condition_img.to(self.device)
-        condition_img = 2 * (condition_img / 255 - 0.5)
         prompts = [prompt] * 2
         caption_embs, emb_masks = self.t5_model.get_text_embeddings(prompts)

 from autoregressive.models.generate import generate
 from condition.midas.depth import MidasDetector
+from controlnet_aux import (
+    MidasDetector,
+)
 models = {
     "canny": "checkpoints/canny_MR.safetensors",
     "depth": "checkpoints/depth_MR.safetensors",
         self.gpt_model_canny = self.load_gpt(condition_type='canny')
         self.gpt_model_depth = self.load_gpt(condition_type='depth')
         self.get_control_canny = CannyDetector()
+        # self.get_control_depth = MidasDetector('cuda')
+        self.get_control_depth =  MidasDetector.from_pretrained("lllyasviel/Annotators")
     def to(self, device):
         self.gpt_model_canny.to('cuda')
         # self.get_control_depth.model.to(self.device)
         # self.vq_model.to(self.device)
         image_tensor = torch.from_numpy(np.array(image)).to(self.device)
+        # condition_img = torch.from_numpy(
+        #     self.get_control_depth(image_tensor)).unsqueeze(0)
+        # condition_img = condition_img.unsqueeze(0).repeat(2, 3, 1, 1)
+        # condition_img = condition_img.to(self.device)
+        # condition_img = 2 * (condition_img / 255 - 0.5)
+        control_image = self.get_control_depth(
+                image=image,
+                image_resolution=512,
+                detect_resolution=512,
+            )
         prompts = [prompt] * 2
         caption_embs, emb_masks = self.t5_model.get_text_embeddings(prompts)