File size: 11,990 Bytes

b0afe49

{
 "cells": [
  {
   "cell_type": "code",
   "execution_count": 1,
   "id": "54f1a5b8-ed8b-4add-83a0-fa40732f80cc",
   "metadata": {},
   "outputs": [
    {
     "name": "stderr",
     "output_type": "stream",
     "text": [
      "/root/miniconda3/envs/control/lib/python3.8/site-packages/tqdm/auto.py:21: TqdmWarning: IProgress not found. Please update jupyter and ipywidgets. See https://ipywidgets.readthedocs.io/en/stable/user_install.html\n",
      "  from .autonotebook import tqdm as notebook_tqdm\n"
     ]
    },
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "logging improved.\n",
      "Enabled sliced_attention.\n",
      "No module 'xformers'. Proceeding without it.\n",
      "ControlInpaintLDM: Running in eps-prediction mode\n",
      "DiffusionWrapper has 859.52 M params.\n",
      "making attention of type 'vanilla' with 512 in_channels\n",
      "Working with z of shape (1, 4, 32, 32) = 4096 dimensions.\n",
      "making attention of type 'vanilla' with 512 in_channels\n",
      "Loaded model config from [models/cldm_v15-mask.yaml]\n",
      "Loaded state_dict from [/storage/ckpts15/c-20999.ckpt]\n"
     ]
    }
   ],
   "source": [
    "from share import *\n",
    "import config\n",
    "\n",
    "import einops\n",
    "import gradio as gr\n",
    "import numpy as np\n",
    "import torch\n",
    "import random\n",
    "\n",
    "from pytorch_lightning import seed_everything\n",
    "from annotator.util import resize_image, HWC3\n",
    "from cldm.model import create_model, load_state_dict\n",
    "from cldm.ddim_hacked import DDIMSampler\n",
    "\n",
    "\n",
    "model = create_model('models/cldm_v15-mask.yaml').cpu()\n",
    "model.load_state_dict(load_state_dict('/storage/ckpts15/c-20999.ckpt', location='cuda'))\n",
    "model = model.cuda()\n",
    "ddim_sampler = DDIMSampler(model)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 2,
   "id": "1ca4b664-22b5-4007-a663-696bbffa5d0a",
   "metadata": {},
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "Automatic pdb calling has been turned ON\n"
     ]
    }
   ],
   "source": [
    "%pdb on"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 3,
   "id": "bfbe23ec-793b-4f0c-a312-9f67177b8b43",
   "metadata": {},
   "outputs": [],
   "source": [
    "def process(ref_image, control_img, num_samples, image_resolution, ddim_steps, guess_mode, strength, scale, seed, eta):\n",
    "    with torch.no_grad():\n",
    "        ref = resize_image(HWC3(ref_image), image_resolution)\n",
    "        H, W, C = ref.shape\n",
    "        \n",
    "        control = resize_image(HWC3(control_img), image_resolution)\n",
    "\n",
    "        control = torch.from_numpy(np.array(control).astype(np.float32)).cuda() / 255.0\n",
    "        control = torch.stack([control for _ in range(num_samples)], dim=0)\n",
    "        control = einops.rearrange(control, 'b h w c -> b c h w').clone()\n",
    "    \n",
    "        ref = torch.from_numpy(np.array(ref).astype(np.float32)).cuda() / 255.0\n",
    "        ref = torch.stack([ref for _ in range(num_samples)], dim=0)\n",
    "        ref = einops.rearrange(ref, 'b h w c -> b c h w').clone()\n",
    "        \n",
    "        if seed == -1:\n",
    "            seed = random.randint(0, 65535)\n",
    "        seed_everything(seed)\n",
    "\n",
    "        if config.save_memory:\n",
    "            model.low_vram_shift(is_diffusing=False)\n",
    "\n",
    "        cond = {\"c_concat\": [control], \"c_crossattn\": [model.get_learned_conditioning(ref * num_samples)]}\n",
    "        #un_cond = {\"c_concat\": None if guess_mode else [control], \"c_crossattn\": [model.get_learned_conditioning([c] * num_samples)]}\n",
    "        shape = (4, H // 8, W // 8)\n",
    "\n",
    "        if config.save_memory:\n",
    "            model.low_vram_shift(is_diffusing=True)\n",
    "\n",
    "        model.control_scales = [strength * (0.825 ** float(12 - i)) for i in range(13)] if guess_mode else ([strength] * 13)  # Magic number. IDK why. Perhaps because 0.825**12<0.01 but 0.826**12>0.01\n",
    "        samples, intermediates = ddim_sampler.sample(ddim_steps, num_samples,\n",
    "                                                     shape, cond, verbose=False, eta=eta,\n",
    "                                                     unconditional_guidance_scale=1,\n",
    "                                                     unconditional_conditioning=None)\n",
    "\n",
    "        if config.save_memory:\n",
    "            model.low_vram_shift(is_diffusing=False)\n",
    "\n",
    "        x_samples = model.decode_first_stage(samples)\n",
    "        x_samples = (einops.rearrange(x_samples, 'b c h w -> b h w c') * 127.5 + 127.5).cpu().numpy().clip(0, 255).astype(np.uint8)\n",
    "\n",
    "        results = [x_samples[i] for i in range(num_samples)]\n",
    "    return [255 - detected_map] + results"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "id": "389293fb-0b9e-4a7c-9b49-e6e13af53143",
   "metadata": {},
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "Running on local URL:  http://127.0.0.1:7860\n",
      "Running on public URL: https://af551707-d6f9-4083.gradio.live\n",
      "\n",
      "This share link expires in 72 hours. For free permanent hosting and GPU upgrades (NEW!), check out Spaces: https://huggingface.co/spaces\n"
     ]
    },
    {
     "data": {
      "text/html": [
       "<div><iframe src=\"https://af551707-d6f9-4083.gradio.live\" width=\"100%\" height=\"500\" allow=\"autoplay; camera; microphone; clipboard-read; clipboard-write;\" frameborder=\"0\" allowfullscreen></iframe></div>"
      ],
      "text/plain": [
       "<IPython.core.display.HTML object>"
      ]
     },
     "metadata": {},
     "output_type": "display_data"
    },
    {
     "name": "stderr",
     "output_type": "stream",
     "text": [
      "Global seed set to 1646781339\n",
      "Traceback (most recent call last):\n",
      "  File \"/root/miniconda3/envs/control/lib/python3.8/site-packages/gradio/routes.py\", line 337, in run_predict\n",
      "    output = await app.get_blocks().process_api(\n",
      "  File \"/root/miniconda3/envs/control/lib/python3.8/site-packages/gradio/blocks.py\", line 1015, in process_api\n",
      "    result = await self.call_function(\n",
      "  File \"/root/miniconda3/envs/control/lib/python3.8/site-packages/gradio/blocks.py\", line 833, in call_function\n",
      "    prediction = await anyio.to_thread.run_sync(\n",
      "  File \"/root/miniconda3/envs/control/lib/python3.8/site-packages/anyio/to_thread.py\", line 31, in run_sync\n",
      "    return await get_asynclib().run_sync_in_worker_thread(\n",
      "  File \"/root/miniconda3/envs/control/lib/python3.8/site-packages/anyio/_backends/_asyncio.py\", line 937, in run_sync_in_worker_thread\n",
      "    return await future\n",
      "  File \"/root/miniconda3/envs/control/lib/python3.8/site-packages/anyio/_backends/_asyncio.py\", line 867, in run\n",
      "    result = context.run(func, *args)\n",
      "  File \"/tmp/ipykernel_2934/284040618.py\", line 23, in process\n",
      "    cond = {\"c_concat\": [control], \"c_crossattn\": [model.get_learned_conditioning(ref * num_samples)]}\n",
      "  File \"/notebooks/ControlNet/ldm/models/diffusion/ddpm.py\", line 667, in get_learned_conditioning\n",
      "    c = self.cond_stage_model.encode(c)\n",
      "  File \"/notebooks/ControlNet/ldm/modules/encoders/modules.py\", line 169, in encode\n",
      "    return self(image)\n",
      "  File \"/root/miniconda3/envs/control/lib/python3.8/site-packages/torch/nn/modules/module.py\", line 1130, in _call_impl\n",
      "    return forward_call(*input, **kwargs)\n",
      "  File \"/notebooks/ControlNet/ldm/modules/encoders/modules.py\", line 160, in forward\n",
      "    outputs = self.transformer(pixel_values=image)\n",
      "  File \"/root/miniconda3/envs/control/lib/python3.8/site-packages/torch/nn/modules/module.py\", line 1130, in _call_impl\n",
      "    return forward_call(*input, **kwargs)\n",
      "  File \"/root/miniconda3/envs/control/lib/python3.8/site-packages/transformers/models/clip/modeling_clip.py\", line 834, in forward\n",
      "    return self.vision_model(\n",
      "  File \"/root/miniconda3/envs/control/lib/python3.8/site-packages/torch/nn/modules/module.py\", line 1130, in _call_impl\n",
      "    return forward_call(*input, **kwargs)\n",
      "  File \"/root/miniconda3/envs/control/lib/python3.8/site-packages/transformers/models/clip/modeling_clip.py\", line 765, in forward\n",
      "    hidden_states = self.embeddings(pixel_values)\n",
      "  File \"/root/miniconda3/envs/control/lib/python3.8/site-packages/torch/nn/modules/module.py\", line 1130, in _call_impl\n",
      "    return forward_call(*input, **kwargs)\n",
      "  File \"/root/miniconda3/envs/control/lib/python3.8/site-packages/transformers/models/clip/modeling_clip.py\", line 138, in forward\n",
      "    embeddings = embeddings + self.position_embedding(self.position_ids)\n",
      "RuntimeError: The size of tensor a (1801) must match the size of tensor b (257) at non-singleton dimension 1\n"
     ]
    }
   ],
   "source": [
    "block = gr.Blocks().queue()\n",
    "with block:\n",
    "    with gr.Row():\n",
    "        gr.Markdown(\"## Control Stable Diffusion with Canny Edge Maps\")\n",
    "    with gr.Row():\n",
    "        with gr.Column():\n",
    "            control_image = gr.Image(label=\"img mask\", source='upload', type=\"numpy\")\n",
    "            ref_image = gr.Image(label=\"ref image\", source='upload', type=\"numpy\")\n",
    "            run_button = gr.Button(label=\"Run\")\n",
    "            with gr.Accordion(\"Advanced options\", open=False):\n",
    "                num_samples = gr.Slider(label=\"Images\", minimum=1, maximum=12, value=1, step=1)\n",
    "                image_resolution = gr.Slider(label=\"Image Resolution\", minimum=256, maximum=768, value=512, step=64)\n",
    "                strength = gr.Slider(label=\"Control Strength\", minimum=0.0, maximum=2.0, value=1.0, step=0.01)\n",
    "                guess_mode = gr.Checkbox(label='Guess Mode', value=False)\n",
    "                ddim_steps = gr.Slider(label=\"Steps\", minimum=1, maximum=100, value=20, step=1)\n",
    "                scale = gr.Slider(label=\"Guidance Scale\", minimum=0.1, maximum=30.0, value=9.0, step=0.1)\n",
    "                seed = gr.Slider(label=\"Seed\", minimum=-1, maximum=2147483647, step=1, randomize=True)\n",
    "                eta = gr.Number(label=\"eta (DDIM)\", value=0.0)\n",
    "                \n",
    "        with gr.Column():\n",
    "            result_gallery = gr.Gallery(label='Output', show_label=False, elem_id=\"gallery\").style(grid=2, height='auto')\n",
    "    ips = [ref_image, control_image, num_samples, image_resolution, ddim_steps, guess_mode, strength, scale, seed, eta]\n",
    "    run_button.click(fn=process, inputs=ips, outputs=[result_gallery])\n",
    "\n",
    "\n",
    "block.launch(debug=True, share=True)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "id": "5142b6dd-0a56-4779-82dc-ff786986535d",
   "metadata": {},
   "outputs": [],
   "source": []
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "id": "b8f00ec0-2a16-4e73-8d69-859ca1c665df",
   "metadata": {},
   "outputs": [],
   "source": []
  }
 ],
 "metadata": {
  "kernelspec": {
   "display_name": "controlnet",
   "language": "python",
   "name": "controlnet"
  },
  "language_info": {
   "codemirror_mode": {
    "name": "ipython",
    "version": 3
   },
   "file_extension": ".py",
   "mimetype": "text/x-python",
   "name": "python",
   "nbconvert_exporter": "python",
   "pygments_lexer": "ipython3",
   "version": "3.8.5"
  }
 },
 "nbformat": 4,
 "nbformat_minor": 5
}