Spaces:

wondervictor
/

ControlAR

Running on Zero

App Files Files Community

wondervictor commited on 30 days ago

Commit

108d3f4

verified ·

1 Parent(s): 0342c0e

Update model.py

Browse files

Files changed (1) hide show

model.py +20 -11

model.py CHANGED Viewed

@@ -26,8 +26,9 @@ class Model:
         self.task_name = ""
         self.vq_model = self.load_vq()
         self.t5_model = self.load_t5()
-        self.gpt_model_edge = self.load_gpt(condition_type='edge')
-        self.gpt_model_depth = self.load_gpt(condition_type='depth')
         self.preprocessor = Preprocessor()
     def to(self, device):
@@ -45,7 +46,7 @@ class Model:
         return vq_model
     def load_gpt(self, condition_type='edge'):
-        gpt_ckpt = models[condition_type]
         # precision = torch.bfloat16
         precision = torch.float32
         latent_size = 512 // 16
@@ -56,12 +57,19 @@ class Model:
             condition_type=condition_type,
             adapter_size='base',
         ).to(device='cpu', dtype=precision)
-        model_weight = load_file(gpt_ckpt)
-        gpt_model.load_state_dict(model_weight, strict=False)
-        gpt_model.eval()
-        print("gpt model is loaded")
         return gpt_model
     def load_t5(self):
         # precision = torch.bfloat16
         precision = torch.float32
@@ -92,7 +100,8 @@ class Model:
         preprocessor_name: str,
     ) -> list[PIL.Image.Image]:
         self.t5_model.model.to('cuda').to(torch.bfloat16)
-        self.gpt_model_edge.to('cuda').to(torch.bfloat16)
         self.vq_model.to('cuda')
         if isinstance(image, np.ndarray):
             image = Image.fromarray(image)
@@ -114,10 +123,10 @@ class Model:
         condition_img = condition_img.resize((512,512))
         W, H = condition_img.size
-        condition_img = torch.from_numpy(np.array(condition_img)).unsqueeze(0).permute(0,3,1,2).repeat(1,1,1,1)
         condition_img = condition_img.to(self.device)
         condition_img = 2*(condition_img/255 - 0.5)
-        prompts = [prompt] * 1
         caption_embs, emb_masks = self.t5_model.get_text_embeddings(prompts)
         print(f"processing left-padding...")
@@ -137,7 +146,7 @@ class Model:
         t1 = time.time()
         print(caption_embs.device)
         index_sample = generate(
-            self.gpt_model_edge,
             c_indices,
             (H // 16) * (W // 16),
             c_emb_masks,

         self.task_name = ""
         self.vq_model = self.load_vq()
         self.t5_model = self.load_t5()
+        # self.gpt_model_edge = self.load_gpt(condition_type='edge')
+        # self.gpt_model_depth = self.load_gpt(condition_type='depth')
+        self.gpt_model = self.load_gpt()
         self.preprocessor = Preprocessor()
     def to(self, device):
         return vq_model
     def load_gpt(self, condition_type='edge'):
+        # gpt_ckpt = models[condition_type]
         # precision = torch.bfloat16
         precision = torch.float32
         latent_size = 512 // 16
             condition_type=condition_type,
             adapter_size='base',
         ).to(device='cpu', dtype=precision)
+        # model_weight = load_file(gpt_ckpt)
+        # gpt_model.load_state_dict(model_weight, strict=False)
+        # gpt_model.eval()
+        # print("gpt model is loaded")
         return gpt_model
+    def load_gpt_weight(self, condition_type='edge'):
+        gpt_ckpt = models[condition_type]
+        model_weight = load_file(gpt_ckpt)
+        self.gpt_model.load_state_dict(model_weight, strict=False)
+        self.gpt_model.eval()
+        # print("gpt model is loaded")
     def load_t5(self):
         # precision = torch.bfloat16
         precision = torch.float32
         preprocessor_name: str,
     ) -> list[PIL.Image.Image]:
         self.t5_model.model.to('cuda').to(torch.bfloat16)
+        self.load_gpt_weight('edge')
+        self.gpt_model.to('cuda').to(torch.bfloat16)
         self.vq_model.to('cuda')
         if isinstance(image, np.ndarray):
             image = Image.fromarray(image)
         condition_img = condition_img.resize((512,512))
         W, H = condition_img.size
+        condition_img = torch.from_numpy(np.array(condition_img)).unsqueeze(0).permute(0,3,1,2).repeat(3,1,1,1)
         condition_img = condition_img.to(self.device)
         condition_img = 2*(condition_img/255 - 0.5)
+        prompts = [prompt] * 3
         caption_embs, emb_masks = self.t5_model.get_text_embeddings(prompts)
         print(f"processing left-padding...")
         t1 = time.time()
         print(caption_embs.device)
         index_sample = generate(
+            self.gpt_model,
             c_indices,
             (H // 16) * (W // 16),
             c_emb_masks,