Spaces:

wondervictor
/

ControlAR

Running on Zero

wondervictor commited on Nov 11, 2024

Commit

0e7e92c

1 Parent(s): b62a9c0

update

Files changed (3) hide show

app.py CHANGED Viewed

@@ -54,8 +54,8 @@ hf_hub_download(repo_id="facebook/dinov2-small", filename="pytorch_model.bin", l
 DESCRIPTION = "# [ControlAR: Controllable Image Generation with Autoregressive Models](https://arxiv.org/abs/2410.02705) \n ### The first row in outputs is the input image and condition. The second row is the images generated by ControlAR.  \n ### You can run locally by following the instruction on our [Github Repo](https://github.com/hustvl/ControlAR)."
 SHOW_DUPLICATE_BUTTON = os.getenv("SHOW_DUPLICATE_BUTTON") == "1"
 model = Model()
-device = "cuda"
-model.to(device)
 with gr.Blocks(css="style.css") as demo:
     gr.Markdown(DESCRIPTION)
     gr.DuplicateButton(

 DESCRIPTION = "# [ControlAR: Controllable Image Generation with Autoregressive Models](https://arxiv.org/abs/2410.02705) \n ### The first row in outputs is the input image and condition. The second row is the images generated by ControlAR.  \n ### You can run locally by following the instruction on our [Github Repo](https://github.com/hustvl/ControlAR)."
 SHOW_DUPLICATE_BUTTON = os.getenv("SHOW_DUPLICATE_BUTTON") == "1"
 model = Model()
+# device = "cuda"
+# model.to(device)
 with gr.Blocks(css="style.css") as demo:
     gr.Markdown(DESCRIPTION)
     gr.DuplicateButton(

autoregressive/models/generate.py CHANGED Viewed

@@ -145,7 +145,7 @@ def generate(model, cond, max_new_tokens, emb_masks=None, cfg_scale=1.0, cfg_int
             print(condition)
             condition = torch.ones_like(condition)
             condition = model.adapter_mlp(condition)
-            print(condition)
     if model.model_type == 'c2i':
         if cfg_scale > 1.0:
             cond_null = torch.ones_like(cond) * model.num_classes

             print(condition)
             condition = torch.ones_like(condition)
             condition = model.adapter_mlp(condition)
+            #print(condition)
     if model.model_type == 'c2i':
         if cfg_scale > 1.0:
             cond_null = torch.ones_like(cond) * model.num_classes

model.py CHANGED Viewed

@@ -44,7 +44,7 @@ class Model:
     def __init__(self):
         self.device = torch.device(
-            "cuda:0")
         self.base_model_id = ""
         self.task_name = ""
         self.vq_model = self.load_vq()
@@ -63,7 +63,7 @@ class Model:
     def load_vq(self):
         vq_model = VQ_models["VQ-16"](codebook_size=16384,
                                       codebook_embed_dim=8)
-        vq_model.to('cuda')
         vq_model.eval()
         checkpoint = torch.load(f"checkpoints/vq_ds16_t2i.pt",
                                 map_location="cpu")
@@ -82,11 +82,13 @@ class Model:
             cls_token_num=120,
             model_type='t2i',
             condition_type=condition_type,
-        ).to(device='cuda', dtype=precision)
         model_weight = load_file(gpt_ckpt)
-        # gpt_model.load_state_dict(model_weight, strict=True)
         gpt_model.eval()
         print("gpt model is loaded")
         return gpt_model
@@ -121,8 +123,9 @@ class Model:
         image = resize_image_to_16_multiple(image, 'canny')
         W, H = image.size
         print(W, H)
-        self.t5_model.model.to(self.device)
-        self.gpt_model_canny.to(self.device)
         condition_img = self.get_control_canny(np.array(image), low_threshold,
                                                high_threshold)

     def __init__(self):
         self.device = torch.device(
+            "cuda")
         self.base_model_id = ""
         self.task_name = ""
         self.vq_model = self.load_vq()
     def load_vq(self):
         vq_model = VQ_models["VQ-16"](codebook_size=16384,
                                       codebook_embed_dim=8)
+        # vq_model.to('cuda')
         vq_model.eval()
         checkpoint = torch.load(f"checkpoints/vq_ds16_t2i.pt",
                                 map_location="cpu")
             cls_token_num=120,
             model_type='t2i',
             condition_type=condition_type,
+        ).to(device='cpu', dtype=precision)
         model_weight = load_file(gpt_ckpt)
+        # print("prev:", model_weight['adapter.model.embeddings.patch_embeddings.projection.weight'])
+        gpt_model.load_state_dict(model_weight, strict=True)
         gpt_model.eval()
+        print("loaded:", gpt_model.adapter.model.embeddings.patch_embeddings.projection.weight)
         print("gpt model is loaded")
         return gpt_model
         image = resize_image_to_16_multiple(image, 'canny')
         W, H = image.size
         print(W, H)
+        print("before cuda", self.gpt_model_canny.adapter.model.embeddings.patch_embeddings.projection.weight)
+        self.t5_model.model.to('cuda')
+        self.gpt_model_canny.to('cuda')
         condition_img = self.get_control_canny(np.array(image), low_threshold,
                                                high_threshold)