{ "cells": [ { "cell_type": "code", "execution_count": 1, "id": "54f1a5b8-ed8b-4add-83a0-fa40732f80cc", "metadata": {}, "outputs": [ { "name": "stderr", "output_type": "stream", "text": [ "/root/miniconda3/envs/control/lib/python3.8/site-packages/tqdm/auto.py:21: TqdmWarning: IProgress not found. Please update jupyter and ipywidgets. See https://ipywidgets.readthedocs.io/en/stable/user_install.html\n", " from .autonotebook import tqdm as notebook_tqdm\n" ] }, { "name": "stdout", "output_type": "stream", "text": [ "logging improved.\n", "Enabled sliced_attention.\n", "No module 'xformers'. Proceeding without it.\n", "ControlInpaintLDM: Running in eps-prediction mode\n", "DiffusionWrapper has 859.52 M params.\n", "making attention of type 'vanilla' with 512 in_channels\n", "Working with z of shape (1, 4, 32, 32) = 4096 dimensions.\n", "making attention of type 'vanilla' with 512 in_channels\n", "Loaded model config from [models/cldm_v15-mask.yaml]\n", "Loaded state_dict from [/storage/ckpts15/c-20999.ckpt]\n" ] } ], "source": [ "from share import *\n", "import config\n", "\n", "import einops\n", "import gradio as gr\n", "import numpy as np\n", "import torch\n", "import random\n", "\n", "from pytorch_lightning import seed_everything\n", "from annotator.util import resize_image, HWC3\n", "from cldm.model import create_model, load_state_dict\n", "from cldm.ddim_hacked import DDIMSampler\n", "\n", "\n", "model = create_model('models/cldm_v15-mask.yaml').cpu()\n", "model.load_state_dict(load_state_dict('/storage/ckpts15/c-20999.ckpt', location='cuda'))\n", "model = model.cuda()\n", "ddim_sampler = DDIMSampler(model)" ] }, { "cell_type": "code", "execution_count": 2, "id": "1ca4b664-22b5-4007-a663-696bbffa5d0a", "metadata": {}, "outputs": [ { "name": "stdout", "output_type": "stream", "text": [ "Automatic pdb calling has been turned ON\n" ] } ], "source": [ "%pdb on" ] }, { "cell_type": "code", "execution_count": 3, "id": "bfbe23ec-793b-4f0c-a312-9f67177b8b43", "metadata": {}, "outputs": [], "source": [ "def process(ref_image, control_img, num_samples, image_resolution, ddim_steps, guess_mode, strength, scale, seed, eta):\n", " with torch.no_grad():\n", " ref = resize_image(HWC3(ref_image), image_resolution)\n", " H, W, C = ref.shape\n", " \n", " control = resize_image(HWC3(control_img), image_resolution)\n", "\n", " control = torch.from_numpy(np.array(control).astype(np.float32)).cuda() / 255.0\n", " control = torch.stack([control for _ in range(num_samples)], dim=0)\n", " control = einops.rearrange(control, 'b h w c -> b c h w').clone()\n", " \n", " ref = torch.from_numpy(np.array(ref).astype(np.float32)).cuda() / 255.0\n", " ref = torch.stack([ref for _ in range(num_samples)], dim=0)\n", " ref = einops.rearrange(ref, 'b h w c -> b c h w').clone()\n", " \n", " if seed == -1:\n", " seed = random.randint(0, 65535)\n", " seed_everything(seed)\n", "\n", " if config.save_memory:\n", " model.low_vram_shift(is_diffusing=False)\n", "\n", " cond = {\"c_concat\": [control], \"c_crossattn\": [model.get_learned_conditioning(ref * num_samples)]}\n", " #un_cond = {\"c_concat\": None if guess_mode else [control], \"c_crossattn\": [model.get_learned_conditioning([c] * num_samples)]}\n", " shape = (4, H // 8, W // 8)\n", "\n", " if config.save_memory:\n", " model.low_vram_shift(is_diffusing=True)\n", "\n", " model.control_scales = [strength * (0.825 ** float(12 - i)) for i in range(13)] if guess_mode else ([strength] * 13) # Magic number. IDK why. Perhaps because 0.825**12<0.01 but 0.826**12>0.01\n", " samples, intermediates = ddim_sampler.sample(ddim_steps, num_samples,\n", " shape, cond, verbose=False, eta=eta,\n", " unconditional_guidance_scale=1,\n", " unconditional_conditioning=None)\n", "\n", " if config.save_memory:\n", " model.low_vram_shift(is_diffusing=False)\n", "\n", " x_samples = model.decode_first_stage(samples)\n", " x_samples = (einops.rearrange(x_samples, 'b c h w -> b h w c') * 127.5 + 127.5).cpu().numpy().clip(0, 255).astype(np.uint8)\n", "\n", " results = [x_samples[i] for i in range(num_samples)]\n", " return [255 - detected_map] + results" ] }, { "cell_type": "code", "execution_count": null, "id": "389293fb-0b9e-4a7c-9b49-e6e13af53143", "metadata": {}, "outputs": [ { "name": "stdout", "output_type": "stream", "text": [ "Running on local URL: http://127.0.0.1:7860\n", "Running on public URL: https://af551707-d6f9-4083.gradio.live\n", "\n", "This share link expires in 72 hours. For free permanent hosting and GPU upgrades (NEW!), check out Spaces: https://huggingface.co/spaces\n" ] }, { "data": { "text/html": [ "
" ], "text/plain": [ "" ] }, "metadata": {}, "output_type": "display_data" }, { "name": "stderr", "output_type": "stream", "text": [ "Global seed set to 1646781339\n", "Traceback (most recent call last):\n", " File \"/root/miniconda3/envs/control/lib/python3.8/site-packages/gradio/routes.py\", line 337, in run_predict\n", " output = await app.get_blocks().process_api(\n", " File \"/root/miniconda3/envs/control/lib/python3.8/site-packages/gradio/blocks.py\", line 1015, in process_api\n", " result = await self.call_function(\n", " File \"/root/miniconda3/envs/control/lib/python3.8/site-packages/gradio/blocks.py\", line 833, in call_function\n", " prediction = await anyio.to_thread.run_sync(\n", " File \"/root/miniconda3/envs/control/lib/python3.8/site-packages/anyio/to_thread.py\", line 31, in run_sync\n", " return await get_asynclib().run_sync_in_worker_thread(\n", " File \"/root/miniconda3/envs/control/lib/python3.8/site-packages/anyio/_backends/_asyncio.py\", line 937, in run_sync_in_worker_thread\n", " return await future\n", " File \"/root/miniconda3/envs/control/lib/python3.8/site-packages/anyio/_backends/_asyncio.py\", line 867, in run\n", " result = context.run(func, *args)\n", " File \"/tmp/ipykernel_2934/284040618.py\", line 23, in process\n", " cond = {\"c_concat\": [control], \"c_crossattn\": [model.get_learned_conditioning(ref * num_samples)]}\n", " File \"/notebooks/ControlNet/ldm/models/diffusion/ddpm.py\", line 667, in get_learned_conditioning\n", " c = self.cond_stage_model.encode(c)\n", " File \"/notebooks/ControlNet/ldm/modules/encoders/modules.py\", line 169, in encode\n", " return self(image)\n", " File \"/root/miniconda3/envs/control/lib/python3.8/site-packages/torch/nn/modules/module.py\", line 1130, in _call_impl\n", " return forward_call(*input, **kwargs)\n", " File \"/notebooks/ControlNet/ldm/modules/encoders/modules.py\", line 160, in forward\n", " outputs = self.transformer(pixel_values=image)\n", " File \"/root/miniconda3/envs/control/lib/python3.8/site-packages/torch/nn/modules/module.py\", line 1130, in _call_impl\n", " return forward_call(*input, **kwargs)\n", " File \"/root/miniconda3/envs/control/lib/python3.8/site-packages/transformers/models/clip/modeling_clip.py\", line 834, in forward\n", " return self.vision_model(\n", " File \"/root/miniconda3/envs/control/lib/python3.8/site-packages/torch/nn/modules/module.py\", line 1130, in _call_impl\n", " return forward_call(*input, **kwargs)\n", " File \"/root/miniconda3/envs/control/lib/python3.8/site-packages/transformers/models/clip/modeling_clip.py\", line 765, in forward\n", " hidden_states = self.embeddings(pixel_values)\n", " File \"/root/miniconda3/envs/control/lib/python3.8/site-packages/torch/nn/modules/module.py\", line 1130, in _call_impl\n", " return forward_call(*input, **kwargs)\n", " File \"/root/miniconda3/envs/control/lib/python3.8/site-packages/transformers/models/clip/modeling_clip.py\", line 138, in forward\n", " embeddings = embeddings + self.position_embedding(self.position_ids)\n", "RuntimeError: The size of tensor a (1801) must match the size of tensor b (257) at non-singleton dimension 1\n" ] } ], "source": [ "block = gr.Blocks().queue()\n", "with block:\n", " with gr.Row():\n", " gr.Markdown(\"## Control Stable Diffusion with Canny Edge Maps\")\n", " with gr.Row():\n", " with gr.Column():\n", " control_image = gr.Image(label=\"img mask\", source='upload', type=\"numpy\")\n", " ref_image = gr.Image(label=\"ref image\", source='upload', type=\"numpy\")\n", " run_button = gr.Button(label=\"Run\")\n", " with gr.Accordion(\"Advanced options\", open=False):\n", " num_samples = gr.Slider(label=\"Images\", minimum=1, maximum=12, value=1, step=1)\n", " image_resolution = gr.Slider(label=\"Image Resolution\", minimum=256, maximum=768, value=512, step=64)\n", " strength = gr.Slider(label=\"Control Strength\", minimum=0.0, maximum=2.0, value=1.0, step=0.01)\n", " guess_mode = gr.Checkbox(label='Guess Mode', value=False)\n", " ddim_steps = gr.Slider(label=\"Steps\", minimum=1, maximum=100, value=20, step=1)\n", " scale = gr.Slider(label=\"Guidance Scale\", minimum=0.1, maximum=30.0, value=9.0, step=0.1)\n", " seed = gr.Slider(label=\"Seed\", minimum=-1, maximum=2147483647, step=1, randomize=True)\n", " eta = gr.Number(label=\"eta (DDIM)\", value=0.0)\n", " \n", " with gr.Column():\n", " result_gallery = gr.Gallery(label='Output', show_label=False, elem_id=\"gallery\").style(grid=2, height='auto')\n", " ips = [ref_image, control_image, num_samples, image_resolution, ddim_steps, guess_mode, strength, scale, seed, eta]\n", " run_button.click(fn=process, inputs=ips, outputs=[result_gallery])\n", "\n", "\n", "block.launch(debug=True, share=True)" ] }, { "cell_type": "code", "execution_count": null, "id": "5142b6dd-0a56-4779-82dc-ff786986535d", "metadata": {}, "outputs": [], "source": [] }, { "cell_type": "code", "execution_count": null, "id": "b8f00ec0-2a16-4e73-8d69-859ca1c665df", "metadata": {}, "outputs": [], "source": [] } ], "metadata": { "kernelspec": { "display_name": "controlnet", "language": "python", "name": "controlnet" }, "language_info": { "codemirror_mode": { "name": "ipython", "version": 3 }, "file_extension": ".py", "mimetype": "text/x-python", "name": "python", "nbconvert_exporter": "python", "pygments_lexer": "ipython3", "version": "3.8.5" } }, "nbformat": 4, "nbformat_minor": 5 }