Spaces:

ysheng
/

SSN-Soft-Shadow-Network-for-Image-Composition

Runtime error

App Files Files Community

yichen-purdue commited on Sep 8, 2023

Commit

34fb220

•

1 Parent(s): 96d9168

init

Browse files

Files changed (32) hide show

app.py +214 -0
configs/GSSN.yaml +57 -0
configs/SSN.yaml +51 -0
model_utils.py +53 -0
models/Attention.ipynb +509 -0
models/Attention_SSN.py +218 -0
models/Attention_Unet.py +165 -0
models/GSSN.py +176 -0
models/Loss/Loss.py +271 -0
models/Loss/__init__.py +0 -0
models/Loss/__pycache__/Loss.cpython-39.pyc +0 -0
models/Loss/__pycache__/__init__.cpython-39.pyc +0 -0
models/Loss/__pycache__/vgg19_loss.cpython-39.pyc +0 -0
models/Loss/pytorch_ssim/__init__.py +73 -0
models/Loss/pytorch_ssim/__pycache__/__init__.cpython-39.pyc +0 -0
models/Loss/vgg19_loss.py +54 -0
models/SSN.py +143 -0
models/SSN_Model.py +333 -0
models/SSN_v1.py +290 -0
models/Sparse_PH.py +185 -0
models/__init__.py +43 -0
models/__pycache__/SSN.cpython-39.pyc +0 -0
models/__pycache__/SSN_Model.cpython-39.pyc +0 -0
models/__pycache__/__init__.cpython-39.pyc +0 -0
models/__pycache__/abs_model.cpython-39.pyc +0 -0
models/__pycache__/blocks.cpython-39.pyc +0 -0
models/abs_model.py +73 -0
models/attention.py +85 -0
models/blocks.py +238 -0
models/pvt_attention.py +240 -0
models/template.py +114 -0
weights/SSN/0000001760.pt +3 -0

app.py ADDED Viewed

	@@ -0,0 +1,214 @@

+import torch
+from torch import nn
+import logging
+from pathlib import Path
+import gradio as gr
+import numpy as np
+import cv2
+import model_utils
+from models.SSN import SSN
+import matplotlib
+matplotlib.use('TkAgg')
+import numpy as np
+import matplotlib.pyplot as plt
+config_file = 'configs/SSN.yaml'
+weight      = 'weights/SSN/0000001760.pt'
+device      = torch.device('cuda:0')
+model       = model_utils.load_model(config_file, weight, SSN, device)
+DEFAULT_INTENSITY = 0.9
+DEFAULT_GAMMA = 2.0
+logging.info('Model loading succeed')
+cur_rgba = None
+cur_shadow = None
+cur_intensity = DEFAULT_INTENSITY
+cur_gamma = DEFAULT_GAMMA
+def resize(img, size):
+    h, w = img.shape[:2]
+    if h > w:
+        newh = size
+        neww = int(w / h * size)
+    else:
+        neww = size
+        newh = int(h / w * size)
+    resized_img = cv2.resize(img, (neww, newh), interpolation=cv2.INTER_AREA)
+    if len(img.shape) != len(resized_img.shape):
+        resized_img = resized_img[..., none]
+    return resized_img
+def ibl_normalize(ibl, energy=30.0):
+    total_energy = np.sum(ibl)
+    if total_energy < 1e-3:
+        # print('small energy: ', total_energy)
+        h,w = ibl.shape
+        return np.zeros((h,w))
+    return ibl * energy / total_energy
+def padding_mask(rgba_input: np.array):
+    """ Padding the mask input so that it fits the training dataset view range
+    If the rgba does not have enough padding area, we need to pad the area
+    :param rgba_input: H x W x 4 inputs, the first 3 channels are RGB, the last channel is the alpha
+    :returns: H x W x 4 padded RGBAD
+    """
+    padding = 50
+    padding_size = 256 - padding * 2
+    h, w = rgba_input.shape[:2]
+    rgb = rgba_input[:, :, :3]
+    alpha = rgba_input[:, :, -1:]
+    zeros = np.where(alpha==0)
+    hh, ww = zeros[0], zeros[1]
+    h_min, h_max = hh.min(), hh.max()
+    w_min, w_max = ww.min(), ww.max()
+    # if the area already has enough padding
+    if h_max - h_min < padding_size and w_max - w_min < padding_size:
+        return rgba_input
+    padding_output = np.zeros((256, 256, 4))
+    padding_output[..., :3] = 1.0
+    padded_rgba  = resize(rgba_input, padding_size)
+    new_h, new_w = padded_rgba.shape[:2]
+    padding_output[padding:padding+new_h, padding:padding+new_w, :] = padded_rgba
+    return padding_output
+def shadow_composite(rgba, shadow, intensity, gamma):
+    rgb = rgba[..., :3]
+    mask = rgba[..., 3:]
+    if len(shadow.shape) == 2:
+        shadow = shadow[..., None]
+    new_shadow = 1.0 - shadow ** gamma * intensity
+    ret = rgb * mask + (1.0 - mask) * new_shadow
+    return ret, new_shadow[..., 0]
+def render_btn_fn(mask, ibl):
+    global cur_rgba, cur_shadow, cur_gamma, cur_intensity
+    print("Button clicked!")
+    mask = mask / 255.0
+    ibl = ibl/ 255.0
+    # smoothing ibl
+    ibl = cv2.GaussianBlur(ibl, (11, 11), 0)
+    # padding mask
+    mask = padding_mask(mask)
+    cur_rgba = np.copy(mask)
+    print('mask shape: {}/{}/{}/{}, ibl shape: {}/{}/{}/{}'.format(mask.shape, mask.dtype, mask.min(), mask.max(),
+                                                                   ibl.shape, ibl.dtype, ibl.min(), ibl.max()))
+    # ret = np.random.randn(256, 256, 3)
+    # ret = (ret - ret.min()) / (ret.max() - ret.min() + 1e-8)
+    rgb, mask = mask[..., :3], mask[..., 3]
+    ibl = ibl_normalize(cv2.resize(ibl, (32, 16)))
+    # ibl = 1.0 - ibl
+    x = {
+        'mask': mask,
+        'ibl': ibl
+    }
+    shadow = model.inference(x)
+    cur_shadow = np.copy(shadow)
+    # gamma
+    # shadow = np.power(shadow, 2.2)
+    # shadow = shadow * 0.8
+    # shadow = 1.0 - shadow
+    # composite the shadow
+    # shadow = shadow[..., None]
+    # mask = mask[..., None]
+    # ret = rgb * mask + (1.0 - mask) * shadow
+    ret, shadow = shadow_composite(cur_rgba, shadow, cur_intensity, cur_gamma)
+    # import pdb; pdb.set_trace()
+    # ret = (1.0-mask) * shadow
+    print('IBL range: {}/{} Shadow range: {} {}'.format(ibl.min(), ibl.max(), shadow.min(), shadow.max()))
+    plt.figure(figsize=(15, 10))
+    plt.subplot(1,3,1)
+    plt.imshow(mask)
+    plt.subplot(1,3,2)
+    plt.imshow(ibl)
+    plt.subplot(1,3,3)
+    plt.imshow(ret)
+    plt.savefig('tmp.png')
+    plt.close()
+    logging.info('Finished')
+    return ret, shadow
+def intensity_change(x):
+    global cur_rgba, cur_shadow, cur_gamma, cur_intensity
+    cur_intensity = x
+    ret, shadow = shadow_composite(cur_rgba, cur_shadow, cur_intensity, cur_gamma)
+    return ret, shadow
+def gamma_change(x):
+    global cur_rgba, cur_shadow, cur_gamma, cur_intensity
+    cur_gamma = x
+    ret, shadow = shadow_composite(cur_rgba, cur_shadow, cur_intensity, cur_gamma)
+    return ret, shadow
+ibl_h = 128
+ibl_w = ibl_h * 2
+with gr.Blocks() as demo:
+    with gr.Row():
+        mask_input = gr.Image(shape=(256, 256), image_mode="RGBA", label="Mask")
+        ibl_input = gr.Sketchpad(shape=(ibl_w, ibl_h), image_mode="L", label="IBL", tool='sketch', invert_colors=True)
+        output = gr.Image(shape=(256, 256), height=256, width=256, image_mode="RGB", label="Output")
+        shadow_output = gr.Image(shape=(256, 256), height=256, width=256, image_mode="L", label="Shadow Layer")
+    with gr.Row():
+        intensity_slider = gr.Slider(0.0, 1.0, value=DEFAULT_INTENSITY, step=0.1, label="Intensity", info="Choose between 0.0 and 1.0")
+        gamma_slider = gr.Slider(1.0, 4.0, value=DEFAULT_GAMMA, step=0.1, label="Gamma", info="Gamma correction for shadow")
+        render_btn = gr.Button(label="Render")
+    render_btn.click(render_btn_fn, inputs=[mask_input, ibl_input], outputs=[output, shadow_output])
+    intensity_slider.release(intensity_change, inputs=[intensity_slider], outputs=[output, shadow_output])
+    gamma_slider.release(gamma_change, inputs=[gamma_slider], outputs=[output, shadow_output])
+    logging.info('Finished')
+demo.launch()

configs/GSSN.yaml ADDED Viewed

	@@ -0,0 +1,57 @@

+exp_name: GSSN_ALL_Channels_2e_5
+# model related
+model:
+  name: 'GSSN'
+  # backbone: 'vanilla'
+  backbone: 'SSN_v1'
+  in_channels: 6
+  out_channels: 1
+  resnet: True
+  mid_act: "gelu"
+  out_act: "gelu"
+  optimizer: 'Adam'
+  weight_decay: 4e-5
+  beta1: 0.9
+  focal: False
+# dataset
+dataset:
+  name: 'GSSN_Dataset'
+  hdf5_file: 'Dataset1/more_general_scenes/train/ALL_SIZE_WALL/dataset.hdf5'
+  type: 'BC_Boundary'
+  rech_grad: True
+test_dataset:
+  name: 'GSSN_Testing_Dataset'
+  hdf5_file: 'Dataset/standalone_test_split/test/ALL_SIZE_MORE/dataset.hdf5'
+  type: 'BC_Boundary'
+  ignore_shading: True
+  rech_grad: True
+# training related
+hyper_params:
+  lr: 2e-5
+  epochs: 100000
+  workers: 52
+  batch_size: 52
+  save_epoch: 10
+  eval_batch: 10
+  eval_save: False
+  # visualization
+  vis_iter: 100     # iteration for visualization
+  save_iter: 100
+  n_cols: 5
+  gpus:
+    - 0
+  default_folder: 'weights'
+  resume: False
+  # resume: True
+  weight_file: 'latest'

configs/SSN.yaml ADDED Viewed

	@@ -0,0 +1,51 @@

+exp_name: SSN
+# model related
+model:
+  name: 'SSN'
+  in_channels: 1
+  out_channels: 1
+  resnet: False
+  mid_act: "relu"
+  out_act: 'relu'
+  optimizer: 'Adam'
+  weight_decay: 4e-5
+  beta1: 0.9
+# dataset
+dataset:
+  name: 'SSN_Dataset'
+  hdf5_file: 'Dataset/SSN/ssn_shadow/shadow_base/ssn_base.hdf5'
+  shadow_per_epoch: 10
+# test_dataset:
+#   name: 'SSN_Dataset'
+#   hdf5_file: 'Dataset/SSN/ssn_shadow/shadow_base/ssn_base.hdf5'
+# training related
+hyper_params:
+  lr: 1e-3
+  epochs: 100000
+  workers: 40
+  batch_size: 10
+  save_epoch: 10
+  eval_batch: 10
+  eval_save: False
+  # visualization
+  vis_iter: 100     # iteration for visualization
+  save_iter: 100
+  n_cols: 5
+  gpus:
+    - 0
+    - 1
+  default_folder: 'weights'
+  resume: False
+  weight_file: 'latest'

model_utils.py ADDED Viewed

	@@ -0,0 +1,53 @@

+import os
+import yaml
+import logging
+import torch
+def parse_configs(config: str):
+    """ Parse the config file and return a dictionary of configs
+    :param config: path to the config file
+    :returns:
+    """
+    if not os.path.exists(config):
+        logging.error('Cannot find the config file: {}'.format(config))
+        exit()
+    with open(config, 'r') as stream:
+        try:
+            configs=yaml.safe_load(stream)
+            return configs
+        except yaml.YAMLError as exc:
+            logging.error(exc)
+            return {}
+def load_model(config: str, weight: str, model_def, device):
+    """ Load the model from the config file and the weight file
+    :param config: path to the config file
+    :param weight: path to the weight file
+    :param model_def: model class definition
+    :param device: pytorch device
+    :returns:
+    """
+    assert os.path.exists(weight), 'Cannot find the weight file: {}'.format(weight)
+    assert os.path.exists(config), 'Cannot find the config file: {}'.format(config)
+    opt = parse_configs(config)
+    model = model_def(opt)
+    cp = torch.load(weight)
+    models = model.get_models()
+    for k, m in models.items():
+        m.load_state_dict(cp[k])
+        m.to(device)
+    model.set_models(models)
+    return model

models/Attention.ipynb ADDED Viewed

	@@ -0,0 +1,509 @@

+{
+ "cells": [
+  {
+   "cell_type": "code",
+   "execution_count": 30,
+   "id": "9ba18e04-aa6b-44d8-bbcc-73417ededcfd",
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "import torch\n",
+    "import torch.nn as nn\n",
+    "import torch.nn.functional as F\n",
+    "from functools import partial\n",
+    "import math\n",
+    "import torch as th"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 31,
+   "id": "b273789d-9136-4c10-806d-12c19ff1ae68",
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "class GroupNorm32(nn.GroupNorm):\n",
+    "    def forward(self, x):\n",
+    "        return super().forward(x.float()).type(x.dtype)\n",
+    "\n",
+    "def normalization(channels):\n",
+    "    \"\"\"\n",
+    "    Make a standard normalization layer.\n",
+    "    :param channels: number of input channels.\n",
+    "    :return: an nn.Module for normalization.\n",
+    "    \"\"\"\n",
+    "    return GroupNorm32(32, channels)\n",
+    "\n",
+    "\n",
+    "def conv_nd(dims, *args, **kwargs):\n",
+    "    \"\"\"\n",
+    "    Create a 1D, 2D, or 3D convolution module.\n",
+    "    \"\"\"\n",
+    "    if dims == 1:\n",
+    "        return nn.Conv1d(*args, **kwargs)\n",
+    "    elif dims == 2:\n",
+    "        return nn.Conv2d(*args, **kwargs)\n",
+    "    elif dims == 3:\n",
+    "        return nn.Conv3d(*args, **kwargs)\n",
+    "    raise ValueError(f\"unsupported dimensions: {dims}\")\n"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 32,
+   "id": "8ad13d44-7efc-4cf3-8f18-3c6ed4999963",
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "class QKVAttentionLegacy(nn.Module):\n",
+    "    \"\"\"\n",
+    "    A module which performs QKV attention. Matches legacy QKVAttention + input/ouput heads shaping\n",
+    "    \"\"\"\n",
+    "\n",
+    "    def __init__(self, n_heads):\n",
+    "        super().__init__()\n",
+    "        self.n_heads = n_heads\n",
+    "\n",
+    "    def forward(self, qkv):\n",
+    "        \"\"\"\n",
+    "        Apply QKV attention.\n",
+    "        :param qkv: an [N x (H * 3 * C) x T] tensor of Qs, Ks, and Vs.\n",
+    "        :return: an [N x (H * C) x T] tensor after attention.\n",
+    "        \"\"\"\n",
+    "        bs, width, length = qkv.shape\n",
+    "        assert width % (3 * self.n_heads) == 0\n",
+    "        ch = width // (3 * self.n_heads)\n",
+    "        q, k, v = qkv.reshape(bs * self.n_heads, ch * 3, length).split(ch, dim=1)\n",
+    "        scale = 1 / math.sqrt(math.sqrt(ch))\n",
+    "        weight = th.einsum(\n",
+    "            \"bct,bcs->bts\", q * scale, k * scale\n",
+    "        )  # More stable with f16 than dividing afterwards\n",
+    "        weight = th.softmax(weight.float(), dim=-1).type(weight.dtype)\n",
+    "        a = th.einsum(\"bts,bcs->bct\", weight, v)\n",
+    "        return a.reshape(bs, -1, length)\n",
+    "\n",
+    "    @staticmethod\n",
+    "    def count_flops(model, _x, y):\n",
+    "        return count_flops_attn(model, _x, y)"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 33,
+   "id": "fd354430-2484-4f46-85f6-3397ae571fe9",
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "def zero_module(module):\n",
+    "    \"\"\"\n",
+    "    Zero out the parameters of a module and return it.\n",
+    "    \"\"\"\n",
+    "    for p in module.parameters():\n",
+    "        p.detach().zero_()\n",
+    "    return module\n"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 37,
+   "id": "af42604f-c5fe-467b-95e9-e376fe90d4a5",
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "class AttentionBlock(nn.Module):\n",
+    "    \"\"\"\n",
+    "    An attention block that allows spatial positions to attend to each other.\n",
+    "    Originally ported from here, but adapted to the N-d case.\n",
+    "    https://github.com/hojonathanho/diffusion/blob/1e0dceb3b3495bbe19116a5e1b3596cd0706c543/diffusion_tf/models/unet.py#L66.\n",
+    "    \"\"\"\n",
+    "\n",
+    "    def __init__(\n",
+    "        self,\n",
+    "        channels,\n",
+    "        num_heads=1,\n",
+    "        num_head_channels=-1,\n",
+    "        use_new_attention_order=False,\n",
+    "    ):\n",
+    "        super().__init__()\n",
+    "        self.channels = channels\n",
+    "        if num_head_channels == -1:\n",
+    "            self.num_heads = num_heads\n",
+    "        else:\n",
+    "            assert (\n",
+    "                channels % num_head_channels == 0\n",
+    "            ), f\"q,k,v channels {channels} is not divisible by num_head_channels {num_head_channels}\"\n",
+    "            self.num_heads = channels // num_head_channels\n",
+    "        self.norm = normalization(channels)\n",
+    "        self.qkv = conv_nd(1, channels, channels * 3, 1)\n",
+    "        if use_new_attention_order:\n",
+    "            # split qkv before split heads\n",
+    "            self.attention = QKVAttention(self.num_heads)\n",
+    "        else:\n",
+    "            # split heads before split qkv\n",
+    "            self.attention = QKVAttentionLegacy(self.num_heads)\n",
+    "\n",
+    "        self.proj_out = zero_module(conv_nd(1, channels, channels, 1))\n",
+    "\n",
+    "    def forward(self, x):\n",
+    "        \n",
+    "        import pdb; pdb.set_trace()\n",
+    "        \n",
+    "        b, c, *spatial = x.shape\n",
+    "        x = x.reshape(b, c, -1)\n",
+    "        qkv = self.qkv(self.norm(x))\n",
+    "        h = self.attention(qkv)\n",
+    "        h = self.proj_out(h)\n",
+    "        return (x + h).reshape(b, c, *spatial)"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 38,
+   "id": "7b180b84-f22c-446b-b2da-0fa987274953",
+   "metadata": {},
+   "outputs": [
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "> \u001b[0;32m/tmp/ipykernel_456404/3277534714.py\u001b[0m(39)\u001b[0;36mforward\u001b[0;34m()\u001b[0m\n",
+      "\u001b[0;32m     37 \u001b[0;31m        \u001b[0;32mimport\u001b[0m \u001b[0mpdb\u001b[0m\u001b[0;34m;\u001b[0m \u001b[0mpdb\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mset_trace\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n",
+      "\u001b[0m\u001b[0;32m     38 \u001b[0;31m\u001b[0;34m\u001b[0m\u001b[0m\n",
+      "\u001b[0m\u001b[0;32m---> 39 \u001b[0;31m        \u001b[0mb\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0mc\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0;34m*\u001b[0m\u001b[0mspatial\u001b[0m \u001b[0;34m=\u001b[0m \u001b[0mx\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mshape\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n",
+      "\u001b[0m\u001b[0;32m     40 \u001b[0;31m        \u001b[0mx\u001b[0m \u001b[0;34m=\u001b[0m \u001b[0mx\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mreshape\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0mb\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0mc\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0;34m-\u001b[0m\u001b[0;36m1\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n",
+      "\u001b[0m\u001b[0;32m     41 \u001b[0;31m        \u001b[0mqkv\u001b[0m \u001b[0;34m=\u001b[0m \u001b[0mself\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mqkv\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0mself\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mnorm\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0mx\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n",
+      "\u001b[0m\n"
+     ]
+    },
+    {
+     "name": "stdin",
+     "output_type": "stream",
+     "text": [
+      "ipdb>  n\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "> \u001b[0;32m/tmp/ipykernel_456404/3277534714.py\u001b[0m(40)\u001b[0;36mforward\u001b[0;34m()\u001b[0m\n",
+      "\u001b[0;32m     38 \u001b[0;31m\u001b[0;34m\u001b[0m\u001b[0m\n",
+      "\u001b[0m\u001b[0;32m     39 \u001b[0;31m        \u001b[0mb\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0mc\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0;34m*\u001b[0m\u001b[0mspatial\u001b[0m \u001b[0;34m=\u001b[0m \u001b[0mx\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mshape\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n",
+      "\u001b[0m\u001b[0;32m---> 40 \u001b[0;31m        \u001b[0mx\u001b[0m \u001b[0;34m=\u001b[0m \u001b[0mx\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mreshape\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0mb\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0mc\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0;34m-\u001b[0m\u001b[0;36m1\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n",
+      "\u001b[0m\u001b[0;32m     41 \u001b[0;31m        \u001b[0mqkv\u001b[0m \u001b[0;34m=\u001b[0m \u001b[0mself\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mqkv\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0mself\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mnorm\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0mx\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n",
+      "\u001b[0m\u001b[0;32m     42 \u001b[0;31m        \u001b[0mh\u001b[0m \u001b[0;34m=\u001b[0m \u001b[0mself\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mattention\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0mqkv\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n",
+      "\u001b[0m\n"
+     ]
+    },
+    {
+     "name": "stdin",
+     "output_type": "stream",
+     "text": [
+      "ipdb>  n\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "> \u001b[0;32m/tmp/ipykernel_456404/3277534714.py\u001b[0m(41)\u001b[0;36mforward\u001b[0;34m()\u001b[0m\n",
+      "\u001b[0;32m     39 \u001b[0;31m        \u001b[0mb\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0mc\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0;34m*\u001b[0m\u001b[0mspatial\u001b[0m \u001b[0;34m=\u001b[0m \u001b[0mx\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mshape\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n",
+      "\u001b[0m\u001b[0;32m     40 \u001b[0;31m        \u001b[0mx\u001b[0m \u001b[0;34m=\u001b[0m \u001b[0mx\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mreshape\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0mb\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0mc\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0;34m-\u001b[0m\u001b[0;36m1\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n",
+      "\u001b[0m\u001b[0;32m---> 41 \u001b[0;31m        \u001b[0mqkv\u001b[0m \u001b[0;34m=\u001b[0m \u001b[0mself\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mqkv\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0mself\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mnorm\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0mx\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n",
+      "\u001b[0m\u001b[0;32m     42 \u001b[0;31m        \u001b[0mh\u001b[0m \u001b[0;34m=\u001b[0m \u001b[0mself\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mattention\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0mqkv\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n",
+      "\u001b[0m\u001b[0;32m     43 \u001b[0;31m        \u001b[0mh\u001b[0m \u001b[0;34m=\u001b[0m \u001b[0mself\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mproj_out\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0mh\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n",
+      "\u001b[0m\n"
+     ]
+    },
+    {
+     "name": "stdin",
+     "output_type": "stream",
+     "text": [
+      "ipdb>  x.shape\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "torch.Size([5, 32, 16384])\n"
+     ]
+    },
+    {
+     "name": "stdin",
+     "output_type": "stream",
+     "text": [
+      "ipdb>  t = self.norm(x)\n",
+      "ipdb>  t.shape\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "torch.Size([5, 32, 16384])\n"
+     ]
+    },
+    {
+     "name": "stdin",
+     "output_type": "stream",
+     "text": [
+      "ipdb>  self.qkv\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "Conv1d(32, 96, kernel_size=(1,), stride=(1,))\n"
+     ]
+    },
+    {
+     "name": "stdin",
+     "output_type": "stream",
+     "text": [
+      "ipdb>  n\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "> \u001b[0;32m/tmp/ipykernel_456404/3277534714.py\u001b[0m(42)\u001b[0;36mforward\u001b[0;34m()\u001b[0m\n",
+      "\u001b[0;32m     40 \u001b[0;31m        \u001b[0mx\u001b[0m \u001b[0;34m=\u001b[0m \u001b[0mx\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mreshape\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0mb\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0mc\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0;34m-\u001b[0m\u001b[0;36m1\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n",
+      "\u001b[0m\u001b[0;32m     41 \u001b[0;31m        \u001b[0mqkv\u001b[0m \u001b[0;34m=\u001b[0m \u001b[0mself\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mqkv\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0mself\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mnorm\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0mx\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n",
+      "\u001b[0m\u001b[0;32m---> 42 \u001b[0;31m        \u001b[0mh\u001b[0m \u001b[0;34m=\u001b[0m \u001b[0mself\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mattention\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0mqkv\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n",
+      "\u001b[0m\u001b[0;32m     43 \u001b[0;31m        \u001b[0mh\u001b[0m \u001b[0;34m=\u001b[0m \u001b[0mself\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mproj_out\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0mh\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n",
+      "\u001b[0m\u001b[0;32m     44 \u001b[0;31m        \u001b[0;32mreturn\u001b[0m \u001b[0;34m(\u001b[0m\u001b[0mx\u001b[0m \u001b[0;34m+\u001b[0m \u001b[0mh\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mreshape\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0mb\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0mc\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0;34m*\u001b[0m\u001b[0mspatial\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n",
+      "\u001b[0m\n"
+     ]
+    },
+    {
+     "name": "stdin",
+     "output_type": "stream",
+     "text": [
+      "ipdb>  qkv.shape\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "torch.Size([5, 96, 16384])\n"
+     ]
+    },
+    {
+     "name": "stdin",
+     "output_type": "stream",
+     "text": [
+      "ipdb>  t.shape\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "torch.Size([5, 32, 16384])\n"
+     ]
+    },
+    {
+     "name": "stdin",
+     "output_type": "stream",
+     "text": [
+      "ipdb>  n\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "> \u001b[0;32m/tmp/ipykernel_456404/3277534714.py\u001b[0m(43)\u001b[0;36mforward\u001b[0;34m()\u001b[0m\n",
+      "\u001b[0;32m     40 \u001b[0;31m        \u001b[0mx\u001b[0m \u001b[0;34m=\u001b[0m \u001b[0mx\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mreshape\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0mb\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0mc\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0;34m-\u001b[0m\u001b[0;36m1\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n",
+      "\u001b[0m\u001b[0;32m     41 \u001b[0;31m        \u001b[0mqkv\u001b[0m \u001b[0;34m=\u001b[0m \u001b[0mself\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mqkv\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0mself\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mnorm\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0mx\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n",
+      "\u001b[0m\u001b[0;32m     42 \u001b[0;31m        \u001b[0mh\u001b[0m \u001b[0;34m=\u001b[0m \u001b[0mself\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mattention\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0mqkv\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n",
+      "\u001b[0m\u001b[0;32m---> 43 \u001b[0;31m        \u001b[0mh\u001b[0m \u001b[0;34m=\u001b[0m \u001b[0mself\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mproj_out\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0mh\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n",
+      "\u001b[0m\u001b[0;32m     44 \u001b[0;31m        \u001b[0;32mreturn\u001b[0m \u001b[0;34m(\u001b[0m\u001b[0mx\u001b[0m \u001b[0;34m+\u001b[0m \u001b[0mh\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mreshape\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0mb\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0mc\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0;34m*\u001b[0m\u001b[0mspatial\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n",
+      "\u001b[0m\n"
+     ]
+    },
+    {
+     "name": "stdin",
+     "output_type": "stream",
+     "text": [
+      "ipdb>  h.shape\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "*** No help for '.shape'\n"
+     ]
+    },
+    {
+     "name": "stdin",
+     "output_type": "stream",
+     "text": [
+      "ipdb>  h.shape\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "*** No help for '.shape'\n"
+     ]
+    },
+    {
+     "name": "stdin",
+     "output_type": "stream",
+     "text": [
+      "ipdb>  print(h.shape)\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "torch.Size([5, 32, 16384])\n"
+     ]
+    },
+    {
+     "name": "stdin",
+     "output_type": "stream",
+     "text": [
+      "ipdb>  self.proj_out\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "Conv1d(32, 32, kernel_size=(1,), stride=(1,))\n"
+     ]
+    },
+    {
+     "name": "stdin",
+     "output_type": "stream",
+     "text": [
+      "ipdb>  n\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "> \u001b[0;32m/tmp/ipykernel_456404/3277534714.py\u001b[0m(44)\u001b[0;36mforward\u001b[0;34m()\u001b[0m\n",
+      "\u001b[0;32m     40 \u001b[0;31m        \u001b[0mx\u001b[0m \u001b[0;34m=\u001b[0m \u001b[0mx\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mreshape\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0mb\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0mc\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0;34m-\u001b[0m\u001b[0;36m1\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n",
+      "\u001b[0m\u001b[0;32m     41 \u001b[0;31m        \u001b[0mqkv\u001b[0m \u001b[0;34m=\u001b[0m \u001b[0mself\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mqkv\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0mself\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mnorm\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0mx\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n",
+      "\u001b[0m\u001b[0;32m     42 \u001b[0;31m        \u001b[0mh\u001b[0m \u001b[0;34m=\u001b[0m \u001b[0mself\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mattention\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0mqkv\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n",
+      "\u001b[0m\u001b[0;32m     43 \u001b[0;31m        \u001b[0mh\u001b[0m \u001b[0;34m=\u001b[0m \u001b[0mself\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mproj_out\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0mh\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n",
+      "\u001b[0m\u001b[0;32m---> 44 \u001b[0;31m        \u001b[0;32mreturn\u001b[0m \u001b[0;34m(\u001b[0m\u001b[0mx\u001b[0m \u001b[0;34m+\u001b[0m \u001b[0mh\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mreshape\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0mb\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0mc\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0;34m*\u001b[0m\u001b[0mspatial\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n",
+      "\u001b[0m\n"
+     ]
+    },
+    {
+     "name": "stdin",
+     "output_type": "stream",
+     "text": [
+      "ipdb>  \n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "--Return--\n",
+      "tensor([[[[ 1...iasBackward0>)\n",
+      "> \u001b[0;32m/tmp/ipykernel_456404/3277534714.py\u001b[0m(44)\u001b[0;36mforward\u001b[0;34m()\u001b[0m\n",
+      "\u001b[0;32m     40 \u001b[0;31m        \u001b[0mx\u001b[0m \u001b[0;34m=\u001b[0m \u001b[0mx\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mreshape\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0mb\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0mc\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0;34m-\u001b[0m\u001b[0;36m1\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n",
+      "\u001b[0m\u001b[0;32m     41 \u001b[0;31m        \u001b[0mqkv\u001b[0m \u001b[0;34m=\u001b[0m \u001b[0mself\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mqkv\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0mself\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mnorm\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0mx\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n",
+      "\u001b[0m\u001b[0;32m     42 \u001b[0;31m        \u001b[0mh\u001b[0m \u001b[0;34m=\u001b[0m \u001b[0mself\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mattention\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0mqkv\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n",
+      "\u001b[0m\u001b[0;32m     43 \u001b[0;31m        \u001b[0mh\u001b[0m \u001b[0;34m=\u001b[0m \u001b[0mself\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mproj_out\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0mh\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n",
+      "\u001b[0m\u001b[0;32m---> 44 \u001b[0;31m        \u001b[0;32mreturn\u001b[0m \u001b[0;34m(\u001b[0m\u001b[0mx\u001b[0m \u001b[0;34m+\u001b[0m \u001b[0mh\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mreshape\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0mb\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0mc\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0;34m*\u001b[0m\u001b[0mspatial\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n",
+      "\u001b[0m\n"
+     ]
+    },
+    {
+     "name": "stdin",
+     "output_type": "stream",
+     "text": [
+      "ipdb>  q\n"
+     ]
+    },
+    {
+     "ename": "BdbQuit",
+     "evalue": "",
+     "output_type": "error",
+     "traceback": [
+      "\u001b[0;31m---------------------------------------------------------------------------\u001b[0m",
+      "\u001b[0;31mBdbQuit\u001b[0m                                   Traceback (most recent call last)",
+      "\u001b[0;32m/tmp/ipykernel_456404/1120562961.py\u001b[0m in \u001b[0;36m<module>\u001b[0;34m\u001b[0m\n\u001b[1;32m      3\u001b[0m \u001b[0mmodel\u001b[0m \u001b[0;34m=\u001b[0m \u001b[0mAttentionBlock\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0;36m32\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0;36m1\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m      4\u001b[0m \u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0;32m----> 5\u001b[0;31m \u001b[0my\u001b[0m \u001b[0;34m=\u001b[0m \u001b[0mmodel\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0mtest_input\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0m",
+      "\u001b[0;32m~/anaconda3/envs/py38/lib/python3.8/site-packages/torch/nn/modules/module.py\u001b[0m in \u001b[0;36m_call_impl\u001b[0;34m(self, *input, **kwargs)\u001b[0m\n\u001b[1;32m   1100\u001b[0m         if not (self._backward_hooks or self._forward_hooks or self._forward_pre_hooks or _global_backward_hooks\n\u001b[1;32m   1101\u001b[0m                 or _global_forward_hooks or _global_forward_pre_hooks):\n\u001b[0;32m-> 1102\u001b[0;31m             \u001b[0;32mreturn\u001b[0m \u001b[0mforward_call\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0;34m*\u001b[0m\u001b[0minput\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0;34m**\u001b[0m\u001b[0mkwargs\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0m\u001b[1;32m   1103\u001b[0m         \u001b[0;31m# Do not call functions when jit is used\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m   1104\u001b[0m         \u001b[0mfull_backward_hooks\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0mnon_full_backward_hooks\u001b[0m \u001b[0;34m=\u001b[0m \u001b[0;34m[\u001b[0m\u001b[0;34m]\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0;34m[\u001b[0m\u001b[0;34m]\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n",
+      "\u001b[0;32m/tmp/ipykernel_456404/3277534714.py\u001b[0m in \u001b[0;36mforward\u001b[0;34m(self, x)\u001b[0m\n\u001b[1;32m     42\u001b[0m         \u001b[0mh\u001b[0m \u001b[0;34m=\u001b[0m \u001b[0mself\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mattention\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0mqkv\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m     43\u001b[0m         \u001b[0mh\u001b[0m \u001b[0;34m=\u001b[0m \u001b[0mself\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mproj_out\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0mh\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0;32m---> 44\u001b[0;31m         \u001b[0;32mreturn\u001b[0m \u001b[0;34m(\u001b[0m\u001b[0mx\u001b[0m \u001b[0;34m+\u001b[0m \u001b[0mh\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mreshape\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0mb\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0mc\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0;34m*\u001b[0m\u001b[0mspatial\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0m",
+      "\u001b[0;32m~/anaconda3/envs/py38/lib/python3.8/bdb.py\u001b[0m in \u001b[0;36mtrace_dispatch\u001b[0;34m(self, frame, event, arg)\u001b[0m\n\u001b[1;32m     90\u001b[0m             \u001b[0;32mreturn\u001b[0m \u001b[0mself\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mdispatch_call\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0mframe\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0marg\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m     91\u001b[0m         \u001b[0;32mif\u001b[0m \u001b[0mevent\u001b[0m \u001b[0;34m==\u001b[0m \u001b[0;34m'return'\u001b[0m\u001b[0;34m:\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0;32m---> 92\u001b[0;31m             \u001b[0;32mreturn\u001b[0m \u001b[0mself\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mdispatch_return\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0mframe\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0marg\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0m\u001b[1;32m     93\u001b[0m         \u001b[0;32mif\u001b[0m \u001b[0mevent\u001b[0m \u001b[0;34m==\u001b[0m \u001b[0;34m'exception'\u001b[0m\u001b[0;34m:\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m     94\u001b[0m             \u001b[0;32mreturn\u001b[0m \u001b[0mself\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mdispatch_exception\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0mframe\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0marg\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n",
+      "\u001b[0;32m~/anaconda3/envs/py38/lib/python3.8/bdb.py\u001b[0m in \u001b[0;36mdispatch_return\u001b[0;34m(self, frame, arg)\u001b[0m\n\u001b[1;32m    152\u001b[0m             \u001b[0;32mfinally\u001b[0m\u001b[0;34m:\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m    153\u001b[0m                 \u001b[0mself\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mframe_returning\u001b[0m \u001b[0;34m=\u001b[0m \u001b[0;32mNone\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0;32m--> 154\u001b[0;31m             \u001b[0;32mif\u001b[0m \u001b[0mself\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mquitting\u001b[0m\u001b[0;34m:\u001b[0m \u001b[0;32mraise\u001b[0m \u001b[0mBdbQuit\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0m\u001b[1;32m    155\u001b[0m             \u001b[0;31m# The user issued a 'next' or 'until' command.\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m    156\u001b[0m             \u001b[0;32mif\u001b[0m \u001b[0mself\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mstopframe\u001b[0m \u001b[0;32mis\u001b[0m \u001b[0mframe\u001b[0m \u001b[0;32mand\u001b[0m \u001b[0mself\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mstoplineno\u001b[0m \u001b[0;34m!=\u001b[0m \u001b[0;34m-\u001b[0m\u001b[0;36m1\u001b[0m\u001b[0;34m:\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n",
+      "\u001b[0;31mBdbQuit\u001b[0m: "
+     ]
+    }
+   ],
+   "source": [
+    "test_input = torch.randn(5, 32, 128, 128)\n",
+    "\n",
+    "model = AttentionBlock(32, 1)\n",
+    "\n",
+    "y = model(test_input)"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 36,
+   "id": "3109500e-146d-46c4-8709-6a1e8d24e4ac",
+   "metadata": {},
+   "outputs": [
+    {
+     "data": {
+      "text/plain": [
+       "torch.Size([5, 32, 128, 128])"
+      ]
+     },
+     "execution_count": 36,
+     "metadata": {},
+     "output_type": "execute_result"
+    }
+   ],
+   "source": [
+    "y.shape"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "id": "0c916f9c-5dba-499d-99ea-e56f2855c9cc",
+   "metadata": {},
+   "outputs": [],
+   "source": []
+  }
+ ],
+ "metadata": {
+  "kernelspec": {
+   "display_name": "Python 3 (ipykernel)",
+   "language": "python",
+   "name": "python3"
+  },
+  "language_info": {
+   "codemirror_mode": {
+    "name": "ipython",
+    "version": 3
+   },
+   "file_extension": ".py",
+   "mimetype": "text/x-python",
+   "name": "python",
+   "nbconvert_exporter": "python",
+   "pygments_lexer": "ipython3",
+   "version": "3.8.12"
+  }
+ },
+ "nbformat": 4,
+ "nbformat_minor": 5
+}

models/Attention_SSN.py ADDED Viewed

	@@ -0,0 +1,218 @@

+from abc import abstractmethod
+from functools import partial
+from typing import Iterable
+import math
+import numpy as np
+import torch
+import torch.nn as nn
+import torch.nn.functional as F
+from .SSN import  Conv, Conv2DMod, Decoder, Up
+from .attention import AttentionBlock
+from .blocks import ResBlock, Res_Type, get_activation
+class Attention_Encoder(nn.Module):
+    def __init__(self, in_channels=3, mid_act='gelu', dropout=0.0, num_heads=8, resnet=True):
+        super(Attention_Encoder, self).__init__()
+        self.in_conv        = Conv(in_channels, 32-in_channels, stride=1, activation=mid_act, resnet=resnet)
+        self.down_32_64     = Conv(32, 64, stride=2, activation=mid_act, resnet=resnet)
+        self.down_64_64_1   = Conv(64, 64, activation=mid_act, resnet=resnet)
+        self.down_64_128    = Conv(64, 128, stride=2, activation=mid_act, resnet=resnet)
+        self.down_128_128_1 = Conv(128, 128,  activation=mid_act, resnet=resnet)
+        self.down_128_256   = Conv(128, 256, stride=2, activation=mid_act, resnet=resnet)
+        self.down_256_256_1 = Conv(256, 256, activation=mid_act, resnet=resnet)
+        self.down_256_256_1_attn = AttentionBlock(256, num_heads)
+        self.down_256_512   = Conv(256, 512, stride=2, activation=mid_act, resnet=resnet)
+        self.down_512_512_1 = Conv(512, 512, activation=mid_act, resnet=resnet)
+        self.down_512_512_1_attn = AttentionBlock(512, num_heads)
+        self.down_512_512_2 = Conv(512, 512, activation=mid_act, resnet=resnet)
+        self.down_512_512_2_attn = AttentionBlock(512, num_heads)
+        self.down_512_512_3 = Conv(512, 512, activation=mid_act, resnet=resnet)
+        self.down_512_512_3_attn = AttentionBlock(512, num_heads)
+    def forward(self, x):
+        x1 = self.in_conv(x)  # 32 x 256 x 256
+        x1 = torch.cat((x, x1), dim=1)
+        x2 = self.down_32_64(x1)
+        x3 = self.down_64_64_1(x2)
+        x4 = self.down_64_128(x3)
+        x5 = self.down_128_128_1(x4)
+        x6 = self.down_128_256(x5)
+        x7 = self.down_256_256_1(x6)
+        x7 = self.down_256_256_1_attn(x7)
+        x8 = self.down_256_512(x7)
+        x9 = self.down_512_512_1(x8)
+        x9 = self.down_512_512_1_attn(x9)
+        x10 = self.down_512_512_2(x9)
+        x10 = self.down_512_512_2_attn(x10)
+        x11 = self.down_512_512_3(x10)
+        x11 = self.down_512_512_3_attn(x11)
+        return x11, x10, x9, x8, x7, x6, x5, x4, x3, x2, x1
+class Attention_Decoder(nn.Module):
+    def __init__(self, out_channels=3, mid_act='gelu', out_act='sigmoid', resnet = True, num_heads=8):
+        super(Attention_Decoder, self).__init__()
+        input_channel = 512
+        fea_dim       = 100
+        self.to_style1 = nn.Linear(in_features=fea_dim, out_features=input_channel)
+        self.up_16_16_1 = Conv(input_channel, 256, activation=mid_act, style=True, resnet=resnet)
+        self.up_16_16_1_attn = AttentionBlock(256, num_heads=num_heads)
+        self.up_16_16_2 = Conv(768, 512, activation=mid_act, resnet=resnet)
+        self.up_16_16_2_attn = AttentionBlock(512, num_heads=num_heads)
+        self.up_16_16_3      = Conv(1024, 512, activation=mid_act, resnet=resnet)
+        self.up_16_16_3_attn = AttentionBlock(512, num_heads=num_heads)
+        self.up_16_32        = Up(1024, 256, activation=mid_act, resnet=resnet)
+        self.to_style2       = nn.Linear(in_features=fea_dim, out_features=512)
+        self.up_32_32_1      = Conv(512, 256, activation=mid_act, style=True, resnet=resnet)
+        self.up_32_32_1_attn = AttentionBlock(256, num_heads=num_heads)
+        self.up_32_64   = Up(512, 128, activation=mid_act, resnet=resnet)
+        self.to_style3  = nn.Linear(in_features=fea_dim, out_features=256)
+        self.up_64_64_1 = Conv(256, 128, activation=mid_act, style=True, resnet=resnet)
+        self.up_64_128    = Up(256, 64, activation=mid_act, resnet=resnet)
+        self.to_style4    = nn.Linear(in_features=fea_dim, out_features=128)
+        self.up_128_128_1 = Conv(128, 64, activation=mid_act, style=True, resnet=resnet)
+        self.up_128_256 = Up(128, 32, activation=mid_act, resnet=resnet)
+        self.out_conv   = Conv(64, out_channels, activation=out_act)
+        self.out_act = get_activation(out_act)
+    def forward(self, x, style):
+        x11, x10, x9, x8, x7, x6, x5, x4, x3, x2, x1 = x
+        style1 = self.to_style1(style)
+        y = self.up_16_16_1(x11, style1)  # 256 x 16 x 16
+        y = self.up_16_16_1_attn(y)
+        y = torch.cat((x10, y), dim=1)  # 768 x 16 x 16
+        y = self.up_16_16_2(y, y)  # 512 x 16 x 16
+        y = self.up_16_16_2_attn(y)
+        y = torch.cat((x9, y), dim=1)  # 1024 x 16 x 16
+        y = self.up_16_16_3(y, y)  # 512 x 16 x 16
+        y = self.up_16_16_3_attn(y)
+        y = torch.cat((x8, y), dim=1)  # 1024 x 16 x 16
+        y = self.up_16_32(y, y)  # 256 x 32 x 32
+        y = torch.cat((x7, y), dim=1)
+        style2 = self.to_style2(style)
+        y = self.up_32_32_1(y, style2)  # 256 x 32 x 32
+        y = self.up_32_32_1_attn(y)
+        y = torch.cat((x6, y), dim=1)
+        y = self.up_32_64(y, y)
+        y = torch.cat((x5, y), dim=1)
+        style3 = self.to_style3(style)
+        y = self.up_64_64_1(y, style3)  # 128 x 64 x 64
+        y = torch.cat((x4, y), dim=1)
+        y = self.up_64_128(y, y)
+        y = torch.cat((x3, y), dim=1)
+        style4 = self.to_style4(style)
+        y = self.up_128_128_1(y, style4)  # 64 x 128 x 128
+        y = torch.cat((x2, y), dim=1)
+        y = self.up_128_256(y, y)  # 32 x 256 x 256
+        y = torch.cat((x1, y), dim=1)
+        y = self.out_conv(y, y)  # 3 x 256 x 256
+        y = self.out_act(y)
+        return y
+class Attention_SSN(nn.Module):
+    def __init__(self, in_channels, out_channels, num_heads=8, resnet=True, mid_act='gelu', out_act='gelu'):
+        super(Attention_SSN, self).__init__()
+        self.encoder = Attention_Encoder(in_channels, mid_act, num_heads, resnet)
+        self.decoder = Attention_Decoder(out_channels, mid_act, out_act, resnet)
+    def forward(self, x, softness):
+        latent  = self.encoder(x)
+        pred    = self.decoder(latent, softness)
+        return pred
+def get_model_size(model):
+    param_size = 0
+    import pdb; pdb.set_trace()
+    for param in model.parameters():
+        param_size += param.nelement() * param.element_size()
+    buffer_size = 0
+    for buffer in model.buffers():
+        buffer_size += buffer.nelement() * buffer.element_size()
+    size_all_mb = (param_size + buffer_size) / 1024 ** 2
+    print('model size: {:.3f}MB'.format(size_all_mb))
+    # return param_size + buffer_size
+    return size_all_mb
+if __name__ == '__main__':
+    model = AttentionBlock(in_channels=256, num_heads=8)
+    x = torch.randn(5, 256, 64, 64)
+    y = model(x)
+    print('{}, {}'.format(x.shape, y.shape))
+    # ------------------------------------------------------------------ #
+    in_channels  = 3
+    out_channels = 1
+    num_heads    = 8
+    resnet       = True
+    mid_act      = 'gelu'
+    out_act      = 'gelu'
+    model = Attention_SSN(in_channels=in_channels,
+                           out_channels=out_channels,
+                           num_heads=num_heads,
+                           resnet=resnet,
+                           mid_act=mid_act,
+                           out_act=out_act)
+    x        = torch.randn(5, 3, 256, 256)
+    softness = torch.randn(5, 100)
+    y = model(x, softness)
+    print('x: {}, y: {}'.format(x.shape, y.shape))
+    get_model_size(model)
+    # ------------------------------------------------------------------ #

models/Attention_Unet.py ADDED Viewed

	@@ -0,0 +1,165 @@

+import torch
+import torch.nn as nn
+import torch.nn.functional as F
+from torchvision import transforms
+import numpy as np
+from .SSN import  Conv, Conv2DMod, Decoder, Up
+from .attention import AttentionBlock
+from .blocks import ResBlock, Res_Type, get_activation
+class Attention_Encoder(nn.Module):
+    def __init__(self, in_channels=3, mid_act='gelu', dropout=0.0, num_heads=8, resnet=True):
+        super(Attention_Encoder, self).__init__()
+        self.in_conv        = Conv(in_channels, 32-in_channels, stride=1, activation=mid_act, resnet=resnet)
+        self.down_32_64     = Conv(32, 64, stride=2, activation=mid_act, resnet=resnet)
+        self.down_64_64_1   = Conv(64, 64, activation=mid_act, resnet=resnet)
+        self.down_64_128    = Conv(64, 128, stride=2, activation=mid_act, resnet=resnet)
+        self.down_128_128_1 = Conv(128, 128,  activation=mid_act, resnet=resnet)
+        self.down_128_256   = Conv(128, 256, stride=2, activation=mid_act, resnet=resnet)
+        self.down_256_256_1 = Conv(256, 256, activation=mid_act, resnet=resnet)
+        self.down_256_256_1_attn = AttentionBlock(256, num_heads)
+        self.down_256_512   = Conv(256, 512, stride=2, activation=mid_act, resnet=resnet)
+        self.down_512_512_1 = Conv(512, 512, activation=mid_act, resnet=resnet)
+        self.down_512_512_1_attn = AttentionBlock(512, num_heads)
+        self.down_512_512_2 = Conv(512, 512, activation=mid_act, resnet=resnet)
+        self.down_512_512_2_attn = AttentionBlock(512, num_heads)
+        self.down_512_512_3 = Conv(512, 512, activation=mid_act, resnet=resnet)
+        self.down_512_512_3_attn = AttentionBlock(512, num_heads)
+    def forward(self, x):
+        x1 = self.in_conv(x)  # 32 x 256 x 256
+        x1 = torch.cat((x, x1), dim=1)
+        x2 = self.down_32_64(x1)
+        x3 = self.down_64_64_1(x2)
+        x4 = self.down_64_128(x3)
+        x5 = self.down_128_128_1(x4)
+        x6 = self.down_128_256(x5)
+        x7 = self.down_256_256_1(x6)
+        x7 = self.down_256_256_1_attn(x7)
+        x8 = self.down_256_512(x7)
+        x9 = self.down_512_512_1(x8)
+        x9 = self.down_512_512_1_attn(x9)
+        x10 = self.down_512_512_2(x9)
+        x10 = self.down_512_512_2_attn(x10)
+        x11 = self.down_512_512_3(x10)
+        x11 = self.down_512_512_3_attn(x11)
+        return x11, x10, x9, x8, x7, x6, x5, x4, x3, x2, x1
+class Attention_Decoder(nn.Module):
+    def __init__(self, out_channels=3, mid_act='gelu', out_act='sigmoid', resnet = True, num_heads=8):
+        super(Attention_Decoder, self).__init__()
+        input_channel = 512
+        fea_dim       = 100
+        self.to_style1 = nn.Linear(in_features=fea_dim, out_features=input_channel)
+        self.up_16_16_1 = Conv(input_channel, 256, activation=mid_act, style=False, resnet=resnet)
+        self.up_16_16_1_attn = AttentionBlock(256, num_heads=num_heads)
+        self.up_16_16_2 = Conv(768, 512, activation=mid_act, resnet=resnet)
+        self.up_16_16_2_attn = AttentionBlock(512, num_heads=num_heads)
+        self.up_16_16_3      = Conv(1024, 512, activation=mid_act, resnet=resnet)
+        self.up_16_16_3_attn = AttentionBlock(512, num_heads=num_heads)
+        self.up_16_32        = Up(1024, 256, activation=mid_act, resnet=resnet)
+        self.to_style2       = nn.Linear(in_features=fea_dim, out_features=512)
+        self.up_32_32_1      = Conv(512, 256, activation=mid_act, style=False, resnet=resnet)
+        self.up_32_32_1_attn = AttentionBlock(256, num_heads=num_heads)
+        self.up_32_64   = Up(512, 128, activation=mid_act, resnet=resnet)
+        self.to_style3  = nn.Linear(in_features=fea_dim, out_features=256)
+        self.up_64_64_1 = Conv(256, 128, activation=mid_act, style=False, resnet=resnet)
+        self.up_64_128    = Up(256, 64, activation=mid_act, resnet=resnet)
+        self.to_style4    = nn.Linear(in_features=fea_dim, out_features=128)
+        self.up_128_128_1 = Conv(128, 64, activation=mid_act, style=False, resnet=resnet)
+        self.up_128_256 = Up(128, 32, activation=mid_act, resnet=resnet)
+        self.out_conv   = Conv(64, out_channels, activation=out_act)
+        self.out_act = get_activation(out_act)
+    def forward(self, x):
+        x11, x10, x9, x8, x7, x6, x5, x4, x3, x2, x1 = x
+        y = self.up_16_16_1(x11)  # 256 x 16 x 16
+        y = self.up_16_16_1_attn(y)
+        y = torch.cat((x10, y), dim=1)  # 768 x 16 x 16
+        y = self.up_16_16_2(y, y)  # 512 x 16 x 16
+        y = self.up_16_16_2_attn(y)
+        y = torch.cat((x9, y), dim=1)  # 1024 x 16 x 16
+        y = self.up_16_16_3(y, y)  # 512 x 16 x 16
+        y = self.up_16_16_3_attn(y)
+        y = torch.cat((x8, y), dim=1)  # 1024 x 16 x 16
+        y = self.up_16_32(y, y)  # 256 x 32 x 32
+        y = torch.cat((x7, y), dim=1)
+        y = self.up_32_32_1(y)  # 256 x 32 x 32
+        y = self.up_32_32_1_attn(y)
+        y = torch.cat((x6, y), dim=1)
+        y = self.up_32_64(y, y)
+        y = torch.cat((x5, y), dim=1)
+        y = self.up_64_64_1(y)  # 128 x 64 x 64
+        y = torch.cat((x4, y), dim=1)
+        y = self.up_64_128(y, y)
+        y = torch.cat((x3, y), dim=1)
+        y = self.up_128_128_1(y)  # 64 x 128 x 128
+        y = torch.cat((x2, y), dim=1)
+        y = self.up_128_256(y, y)  # 32 x 256 x 256
+        y = torch.cat((x1, y), dim=1)
+        y = self.out_conv(y, y)  # 3 x 256 x 256
+        y = self.out_act(y)
+        return y
+class Attention_Unet(nn.Module):
+    def __init__(self, in_channels, out_channels, num_heads=8, resnet=True, mid_act='gelu', out_act='gelu'):
+        super(Attention_Unet, self).__init__()
+        self.encoder = Attention_Encoder(in_channels, mid_act, num_heads, resnet)
+        self.decoder = Attention_Decoder(out_channels, mid_act, out_act, resnet)
+    def forward(self, x):
+        latent  = self.encoder(x)
+        pred    = self.decoder(latent)
+        return pred
+if __name__ == '__main__':
+    test_input = torch.randn(5, 1, 256, 256)
+    style = torch.randn(5, 100)
+    model = SSN_v1(1, 1, mid_act='gelu', out_act='gelu', resnet=True)
+    test_out = model(test_input, style)
+    print('Ouptut shape: ', test_out.shape)

models/GSSN.py ADDED Viewed

	@@ -0,0 +1,176 @@

+import torch
+import torch.nn as nn
+import torch.nn.functional as F
+from torchvision import utils
+from collections import OrderedDict
+import numpy as np
+import matplotlib.cm as cm
+import matplotlib as mpl
+from .abs_model import abs_model
+from .blocks import *
+from .SSN import SSN
+from .SSN_v1 import SSN_v1
+from .Loss.Loss import norm_loss
+class GSSN(abs_model):
+    def __init__(self, opt):
+        mid_act      = opt['model']['mid_act']
+        out_act      = opt['model']['out_act']
+        in_channels  = opt['model']['in_channels']
+        out_channels = opt['model']['out_channels']
+        resnet       = opt['model']['resnet']
+        self.ncols   = opt['hyper_params']['n_cols']
+        self.focal   = opt['model']['focal']
+        if 'backbone' not in opt['model'].keys():
+            self.model = SSN(in_channels=in_channels,
+                             out_channels=out_channels,
+                             mid_act=mid_act,
+                             out_act=out_act,
+                             resnet=resnet)
+        else:
+            backbone = opt['model']['backbone']
+            if backbone == 'vanilla':
+                self.model = SSN(in_channels=in_channels,
+                                 out_channels=out_channels,
+                                 mid_act=mid_act,
+                                 out_act=out_act,
+                                 resnet=resnet)
+            elif backbone == 'SSN_v1':
+                self.model = SSN_v1(in_channels=in_channels,
+                                    out_channels=out_channels,
+                                    mid_act=mid_act,
+                                    out_act=out_act,
+                                    resnet=resnet)
+            else:
+                raise NotImplementedError('{} has not implemented yet'.format(backbone))
+        self.optimizer = get_optimizer(opt, self.model)
+        self.visualization = {}
+        self.norm_loss = norm_loss()
+        # inference related
+        BINs    = 100
+        MAX_RAD = 20
+        self.size_interval     = MAX_RAD / BINs
+        self.soft_distribution = [[np.exp(-0.2 * (i - j) ** 2) for i in np.arange(BINs)] for j in np.arange(BINs)]
+    def setup_input(self, x):
+        return x
+    def forward(self, x):
+        x, softness = x
+        return self.model(x, softness)
+    def compute_loss(self, y, pred):
+        b = y.shape[0]
+        total_loss = self.norm_loss.loss(y, pred)
+        if self.focal:
+            total_loss = torch.pow(total_loss, 3)
+        return total_loss
+    def supervise(self, input_x, y, is_training:bool)->float:
+        optimizer = self.optimizer
+        model = self.model
+        x, softness = input_x['x'], input_x['softness']
+        optimizer.zero_grad()
+        pred = model(x, softness)
+        loss = self.compute_loss(y, pred)
+        if is_training:
+            loss.backward()
+            optimizer.step()
+        xc = x.shape[1]
+        for i in range(xc):
+            self.visualization['x{}'.format(i)] = x[:, i:i+1].detach()
+        self.visualization['y']    = y.detach()
+        self.visualization['pred'] = pred.detach()
+        return loss.item()
+    def get_visualize(self) -> OrderedDict:
+        """ Convert to visualization numpy array
+        """
+        nrows          = self.ncols
+        visualizations = self.visualization
+        ret_vis        = OrderedDict()
+        for k, v in visualizations.items():
+            batch = v.shape[0]
+            n     = min(nrows, batch)
+            plot_v = v[:n]
+            ret_vis[k] = np.clip(utils.make_grid(plot_v.cpu(), nrow=nrows).numpy().transpose(1,2,0), 0.0, 1.0)
+            ret_vis[k] = self.plasma(ret_vis[k])
+        return ret_vis
+    def get_logs(self):
+        pass
+    def inference(self, x):
+        x, l, device = x['x'], x['l'], x['device']
+        x = torch.from_numpy(x.transpose((2,0,1))).unsqueeze(dim=0).to(device)
+        l = torch.from_numpy(np.array(self.soft_distribution[int(l/self.size_interval)]).astype(np.float32)).unsqueeze(dim=0).to(device)
+        pred = self.forward((x, l))
+        pred = pred[0].detach().cpu().numpy().transpose((1,2,0))
+        return pred
+    def batch_inference(self, x):
+        x, l = x['x'], x['softness']
+        pred = self.forward((x, l))
+        return pred
+    """ Getter & Setter
+    """
+    def get_models(self) -> dict:
+        return {'model': self.model}
+    def get_optimizers(self) -> dict:
+        return {'optimizer': self.optimizer}
+    def set_models(self, models: dict) :
+        # input test
+        if 'model' not in models.keys():
+            raise ValueError('{} not in self.model'.format('model'))
+        self.model = models['model']
+    def set_optimizers(self, optimizer: dict):
+        self.optimizer = optimizer['optimizer']
+    ####################
+    # Personal Methods #
+    ####################
+    def plasma(self, x):
+        norm   = mpl.colors.Normalize(vmin=0.0, vmax=1)
+        mapper = cm.ScalarMappable(norm=norm, cmap='plasma')
+        bimg   = mapper.to_rgba(x[:,:,0])[:,:,:3]
+        return bimg

models/Loss/Loss.py ADDED Viewed

	@@ -0,0 +1,271 @@

+import torch
+import torch.nn as nn
+import torch.nn.functional as F
+import torchvision.transforms as T
+from torch.autograd import Variable
+import numpy as np
+import cv2
+# from vgg19_loss import VGG19Loss
+# import pytorch_ssim
+from .vgg19_loss import VGG19Loss
+from . import pytorch_ssim
+from abc import ABC, abstractmethod
+from collections import OrderedDict
+class abs_loss(ABC):
+    def loss(self, gt_img, pred_img):
+        pass
+class norm_loss(abs_loss):
+    def __init__(self, norm=1):
+        self.norm = norm
+    def loss(self, gt_img, pred_img):
+        """ M * (I-I') """
+        b, c, h, w = gt_img.shape
+        return torch.norm(gt_img-pred_img, self.norm)/(h * w * b)
+class ssim_loss(abs_loss):
+    def __init__(self, window_size=11, channel=1):
+        """ Let's try mean ssim!
+        """
+        self.channel     = channel
+        self.window_size = window_size
+        self.window      = self.create_mean_window(window_size, channel)
+    def loss(self, gt_img, pred_img):
+        b, c, h, w = gt_img.shape
+        if c != self.channel:
+            self.channel = c
+            self.window = self.create_mean_window(self.window_size, self.channel)
+        self.window = self.window.to(gt_img).type_as(gt_img)
+        l = 1.0 - self.ssim_compute(gt_img, pred_img)
+        return l
+    def create_mean_window(self, window_size, channel):
+        window = Variable(torch.ones(channel, 1, window_size, window_size).float())
+        window = window/(window_size * window_size)
+        return window
+    def ssim_compute(self, gt_img, pred_img):
+        window      = self.window
+        window_size = self.window_size
+        channel     = self.channel
+        mu1 = F.conv2d(gt_img, window, padding = window_size//2, groups = channel)
+        mu2 = F.conv2d(pred_img, window, padding = window_size//2, groups = channel)
+        mu1_sq = mu1.pow(2)
+        mu2_sq = mu2.pow(2)
+        mu1_mu2 = mu1*mu2
+        sigma1_sq = F.conv2d(gt_img*gt_img, window, padding = window_size//2, groups = channel) - mu1_sq
+        sigma2_sq = F.conv2d(pred_img*pred_img, window, padding = window_size//2, groups = channel) - mu2_sq
+        sigma12   = F.conv2d(gt_img*pred_img, window, padding = window_size//2, groups = channel) - mu1_mu2
+        C1 = 0.01**2
+        C2 = 0.03**2
+        ssim_map = ((2*mu1_mu2 + C1)*(2*sigma12 + C2))/((mu1_sq + mu2_sq + C1)*(sigma1_sq + sigma2_sq + C2))
+        return ssim_map.mean()
+class hierarchical_ssim_loss(abs_loss):
+    def __init__(self, patch_list: list):
+        self.ssim_loss_list = [pytorch_ssim.SSIM(window_size=ws) for ws in patch_list]
+    def loss(self, gt_img, pred_img):
+        b, c, h, w = gt_img.shape
+        total_loss = 0.0
+        for loss_func in self.ssim_loss_list:
+            total_loss +=  (1.0-loss_func(gt_img, pred_img))
+        return total_loss/b
+class vgg_loss(abs_loss):
+    def __init__(self):
+        self.vgg19_ = VGG19Loss()
+    def loss(self, gt_img, pred_img):
+        b, c, h, w = gt_img.shape
+        v = self.vgg19_(gt_img, pred_img, pred_img.device)
+        return v/b
+class grad_loss(abs_loss):
+    def __init__(self, k=4):
+        self.k = 4
+    def loss(self, disp_img, rgb_img=None):
+        """ Note, gradient loss should be weighted by an edge-aware weight
+        """
+        b, c, h, w = disp_img.shape
+        grad_loss = 0.0
+        for i in range(self.k):
+            div_factor               = 2 ** i
+            cur_transform            = T.Resize([h // div_factor, ])
+            # cur_diff                 = cur_transform(diff)
+            # cur_diff_dx, cur_diff_dy = self.img_grad(cur_diff)
+            cur_disp = cur_transform(disp_img)
+            cur_disp_dx, cur_disp_dy = self.img_grad(cur_disp)
+            if rgb_img is not None:
+                cur_rgb  = cur_transform(rgb_img)
+                cur_rgb_dx, cur_rgb_dy = self.img_grad(cur_rgb)
+                cur_rgb_dx = torch.exp(-torch.mean(torch.abs(cur_rgb_dx), dim=1, keepdims=True))
+                cur_rgb_dy = torch.exp(-torch.mean(torch.abs(cur_rgb_dy), dim=1, keepdims=True))
+                grad_loss += (torch.sum(torch.abs(cur_disp_dx) * cur_rgb_dx) + torch.sum(torch.abs(cur_disp_dy) * cur_rgb_dy)) / (h * w * self.k)
+            else:
+                grad_loss += (torch.sum(torch.abs(cur_disp_dx)) + torch.sum(torch.abs(cur_disp_dy))) / (h * w * self.k)
+        return grad_loss/b
+    def gloss(self, gt, pred):
+        """ Loss on the gradient domain
+        """
+        b, c, h, w = gt.shape
+        gt_dx, gt_dy = self.img_grad(gt)
+        pred_dx, pred_dy = self.img_grad(pred)
+        loss = (gt_dx-pred_dx) ** 2 + (gt_dy - pred_dy) ** 2
+        return loss.sum()/(b * h * w)
+    def laploss(self, pred):
+        b, c, h, w = pred.shape
+        lap = self.img_laplacian(pred)
+        return torch.abs(lap).sum()/(b * h * w)
+    def img_laplacian(self, img):
+        b, c, h, w = img.shape
+        laplacian  = torch.tensor([[1, 4, 1], [4, -20, 4], [1, 4, 1]])
+        laplacian_kernel = laplacian.float().unsqueeze(0).expand(1, c, 3, 3).to(img)
+        lap = F.conv2d(img, laplacian_kernel, padding=1, stride=1)
+        return lap
+    def img_grad(self, img):
+        """ Comptue image gradient by sobel filtering
+            img: B x C x H x W
+        """
+        b, c, h, w = img.shape
+        ysobel     = torch.tensor([[1, 2, 1], [0, 0, 0], [-1, -2, -1]])
+        xsobel     = ysobel.transpose(0,1)
+        xsobel_kernel = xsobel.float().unsqueeze(0).expand(1, c, 3, 3).to(img)
+        ysobel_kernel = ysobel.float().unsqueeze(0).expand(1, c, 3, 3).to(img)
+        dx = F.conv2d(img, xsobel_kernel, padding=1, stride=1)
+        dy = F.conv2d(img, ysobel_kernel, padding=1, stride=1)
+        return dx, dy
+class sharp_loss(abs_loss):
+    """  Sharpness term
+            1. laplacian
+            2. image contrast
+            3. image variance
+    """
+    def __init__(self, window_size=11, channel=1):
+        self.window_size = window_size
+        self.channel     = channel
+        self.window      = self.create_mean_window(window_size, self.channel)
+    def loss(self, gt_img, pred_img):
+        """ Note, gradient loss should be weighted by an edge-aware weight
+        """
+        b, c, h, w = gt_img.shape
+        if c != self.channel:
+            self.channel = c
+            self.window = self.create_mean_window(self.window_size, self.channel)
+        self.window = self.window.to(gt_img).type_as(gt_img)
+        channel     = self.channel
+        window      = self.window
+        window_size = self.window_size
+        mu1 = F.conv2d(gt_img, window, padding = window_size//2, groups = channel)  + 1e-6
+        mu2 = F.conv2d(pred_img, window, padding = window_size//2, groups = channel) + 1e-6
+        constrast1 = torch.absolute((gt_img - mu1)/mu1)
+        constrast2 = torch.absolute((pred_img - mu2)/mu2)
+        variance1 = (gt_img-mu1) ** 2
+        variance2 = (pred_img-mu2) ** 2
+        laplacian1 = self.img_laplacian(gt_img)
+        laplacian2 = self.img_laplacian(pred_img)
+        S1 = -laplacian1 - constrast1 - variance1
+        S2 = -laplacian2 - constrast2 - variance2
+        # import pdb; pdb.set_trace()
+        total = torch.absolute(S1-S2).mean()
+        return total
+    def img_laplacian(self, img):
+        b, c, h, w = img.shape
+        laplacian  = torch.tensor([[1, 4, 1], [4, -20, 4], [1, 4, 1]])
+        laplacian_kernel = laplacian.float().unsqueeze(0).expand(1, c, 3, 3).to(img)
+        lap = F.conv2d(img, laplacian_kernel, padding=1, stride=1)
+        return lap
+    def create_mean_window(self, window_size, channel):
+        window = Variable(torch.ones(channel, 1, window_size, window_size).float())
+        window = window/(window_size * window_size)
+        return window
+if __name__ == '__main__':
+    a = torch.rand(3,3,128,128)
+    b = torch.rand(3,3,128,128)
+    ssim = ssim_loss()
+    loss = ssim.loss(a, b)
+    print(loss.shape, loss)
+    loss = ssim.loss(a, a)
+    print(loss.shape, loss)
+    loss = ssim.loss(b, b)
+    print(loss.shape, loss)
+    grad = grad_loss()
+    loss = grad.loss(a, [b, b])
+    print(loss.shape, loss)
+    sharp = sharp_loss()
+    loss = sharp.loss(a, b)
+    print(loss.shape, loss)

models/Loss/__init__.py ADDED Viewed

File without changes

models/Loss/__pycache__/Loss.cpython-39.pyc ADDED Viewed

Binary file (8.36 kB). View file

models/Loss/__pycache__/__init__.cpython-39.pyc ADDED Viewed

Binary file (164 Bytes). View file

models/Loss/__pycache__/vgg19_loss.cpython-39.pyc ADDED Viewed

Binary file (2.08 kB). View file

models/Loss/pytorch_ssim/__init__.py ADDED Viewed

	@@ -0,0 +1,73 @@

+import torch
+import torch.nn.functional as F
+from torch.autograd import Variable
+import numpy as np
+from math import exp
+def gaussian(window_size, sigma):
+    gauss = torch.Tensor([exp(-(x - window_size//2)**2/float(2*sigma**2)) for x in range(window_size)])
+    return gauss/gauss.sum()
+def create_window(window_size, channel):
+    _1D_window = gaussian(window_size, 1.5).unsqueeze(1)
+    _2D_window = _1D_window.mm(_1D_window.t()).float().unsqueeze(0).unsqueeze(0)
+    window = Variable(_2D_window.expand(channel, 1, window_size, window_size).contiguous())
+    return window
+def _ssim(img1, img2, window, window_size, channel, size_average = True):
+    mu1 = F.conv2d(img1, window, padding = window_size//2, groups = channel)
+    mu2 = F.conv2d(img2, window, padding = window_size//2, groups = channel)
+    mu1_sq = mu1.pow(2)
+    mu2_sq = mu2.pow(2)
+    mu1_mu2 = mu1*mu2
+    sigma1_sq = F.conv2d(img1*img1, window, padding = window_size//2, groups = channel) - mu1_sq
+    sigma2_sq = F.conv2d(img2*img2, window, padding = window_size//2, groups = channel) - mu2_sq
+    sigma12 = F.conv2d(img1*img2, window, padding = window_size//2, groups = channel) - mu1_mu2
+    C1 = 0.01**2
+    C2 = 0.03**2
+    ssim_map = ((2*mu1_mu2 + C1)*(2*sigma12 + C2))/((mu1_sq + mu2_sq + C1)*(sigma1_sq + sigma2_sq + C2))
+    if size_average:
+        return ssim_map.mean()
+    else:
+        return ssim_map.mean(1).mean(1).mean(1)
+class SSIM(torch.nn.Module):
+    def __init__(self, window_size = 11, size_average = True):
+        super(SSIM, self).__init__()
+        self.window_size = window_size
+        self.size_average = size_average
+        self.channel = 1
+        self.window = create_window(window_size, self.channel)
+    def forward(self, img1, img2):
+        (_, channel, _, _) = img1.size()
+        if channel == self.channel and self.window.data.type() == img1.data.type():
+            window = self.window
+        else:
+            window = create_window(self.window_size, channel)
+            if img1.is_cuda:
+                window = window.cuda(img1.get_device())
+            window = window.type_as(img1)
+            self.window = window
+            self.channel = channel
+        return _ssim(img1, img2, window, self.window_size, channel, self.size_average)
+def ssim(img1, img2, window_size = 11, size_average = True):
+    (_, channel, _, _) = img1.size()
+    window = create_window(window_size, channel)
+    if img1.is_cuda:
+        window = window.cuda(img1.get_device())
+    window = window.type_as(img1)
+    return _ssim(img1, img2, window, window_size, channel, size_average)

models/Loss/pytorch_ssim/__pycache__/__init__.cpython-39.pyc ADDED Viewed

Binary file (2.65 kB). View file

models/Loss/vgg19_loss.py ADDED Viewed

	@@ -0,0 +1,54 @@

+import torch
+import torch.nn as nn
+import torchvision
+class FeatureExtractor(nn.Module):
+    def __init__(self, cnn, feature_layer=11):
+        super(FeatureExtractor, self).__init__()
+        self.features = nn.Sequential(*list(cnn.features.children())[:(feature_layer + 1)])
+    def normalize(self, tensors, mean, std):
+        if not torch.is_tensor(tensors):
+            raise TypeError('tensor is not a torch image.')
+        for tensor in tensors:
+            for t, m, s in zip(tensor, mean, std):
+                t.sub_(m).div_(s)
+        return tensors
+    def forward(self, x):
+        # it image is gray scale then make it to 3 channel
+        if x.size()[1] == 1:
+            x = x.expand(-1, 3, -1, -1)
+        # [-1: 1] image to  [0:1] image---------------------------------------------------(1)
+        x = (x + 1) * 0.5
+        # https://pytorch.org/docs/stable/torchvision/models.html
+        x.data = self.normalize(x.data, mean=[0.485, 0.456, 0.406], std=[0.229, 0.224, 0.225])
+        return self.features(x)
+# Feature extracting using vgg19
+vgg19 = torchvision.models.vgg19(pretrained=True)
+feature_extractor = FeatureExtractor(vgg19, feature_layer=35)
+feature_extractor.eval()
+class VGG19Loss(object):
+    def __init__(self):
+        global feature_extractor
+        self.initialized = False
+        self.feature_extractor = feature_extractor
+        self.MSE = nn.MSELoss()
+    def __call__(self, output, target, device):
+        if self.initialized == False:
+            self.feature_extractor = self.feature_extractor.to(device)
+            self.MSE = self.MSE.to(device)
+            self.initialized = True
+        # [-1: 1] image to  [0:1] image---------------------------------------------------(2)
+        output = (output + 1) * 0.5
+        target = (target + 1) * 0.5
+        output = self.feature_extractor(output)
+        target = self.feature_extractor(target).data
+        return self.MSE(output, target)

models/SSN.py ADDED Viewed

	@@ -0,0 +1,143 @@

+import torch
+import torch.nn as nn
+import torch.nn.functional as F
+from torchvision import utils
+from collections import OrderedDict
+import numpy as np
+from .abs_model import abs_model
+from .Loss.Loss import norm_loss
+from .blocks import *
+from .SSN_Model import SSN_Model
+class SSN(abs_model):
+    def __init__(self, opt):
+        mid_act      = opt['model']['mid_act']
+        out_act      = opt['model']['out_act']
+        in_channels  = opt['model']['in_channels']
+        out_channels = opt['model']['out_channels']
+        self.ncols   = opt['hyper_params']['n_cols']
+        self.model         = SSN_Model(in_channels=in_channels, out_channels=out_channels, mid_act=mid_act, out_act=out_act)
+        self.optimizer     = get_optimizer(opt, self.model)
+        self.visualization = {}
+        self.norm_loss_ = norm_loss(norm=1)
+    def setup_input(self, x):
+        return x
+    def forward(self, x):
+        keys = ['mask', 'ibl']
+        for k in keys:
+            assert k in x.keys(), '{} not in input'.format(k)
+        mask = x['mask']
+        ibl  = x['ibl']
+        return self.model(mask, ibl)
+    def compute_loss(self, y, pred):
+        total_loss = self.norm_loss_.loss(y, pred)
+        return total_loss
+    def supervise(self, input_x, y, is_training:bool)->float:
+        optimizer = self.optimizer
+        model = self.model
+        optimizer.zero_grad()
+        pred = self.forward(input_x)
+        loss = self.compute_loss(y, pred)
+        # logging.info('Pred/Target: {}, {}/{}, {}'.format(pred.min().item(), pred.max().item(), y.min().item(), y.max().item()))
+        if is_training:
+            loss.backward()
+            optimizer.step()
+        self.visualization['mask'] = input_x['mask'].detach()
+        self.visualization['ibl'] = input_x['ibl'].detach()
+        self.visualization['y']    = y.detach()
+        self.visualization['pred'] = pred.detach()
+        return loss.item()
+    def get_visualize(self) -> OrderedDict:
+        """ Convert to visualization numpy array
+        """
+        nrows          = self.ncols
+        visualizations = self.visualization
+        ret_vis        = OrderedDict()
+        for k, v in visualizations.items():
+            batch = v.shape[0]
+            n     = min(nrows, batch)
+            plot_v = v[:n]
+            plot_v = (plot_v - plot_v.min())/(plot_v.max() - plot_v.min())
+            ret_vis[k] = np.clip(utils.make_grid(plot_v.cpu(), nrow=nrows).numpy().transpose(1,2,0), 0.0, 1.0)
+        return ret_vis
+    def get_logs(self):
+        pass
+    def inference(self, x):
+        keys = ['mask', 'ibl']
+        for k in keys:
+            assert k in x.keys(), '{} not in input'.format(k)
+            assert len(x[k].shape) == 2, '{} should be 2D tensor'.format(k)
+        device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')
+        mask = torch.tensor(x['mask'])[None, None, ...].float().to(device)
+        ibl  = torch.tensor(x['ibl'])[None, None, ...].float().to(device)
+        input_x = {'mask': mask, 'ibl': ibl}
+        pred    = self.forward(input_x)
+        pred = np.clip(pred[0, 0].detach().cpu().numpy() / 30.0, 0.0, 1.0)
+        return pred
+    def batch_inference(self, x):
+        # TODO
+        pass
+    """ Getter & Setter
+    """
+    def get_models(self) -> dict:
+        return {'model': self.model}
+    def get_optimizers(self) -> dict:
+        return {'optimizer': self.optimizer}
+    def set_models(self, models: dict) :
+        # input test
+        if 'model' not in models.keys():
+            raise ValueError('{} not in self.model'.format('model'))
+        self.model = models['model']
+    def set_optimizers(self, optimizer: dict):
+        self.optimizer = optimizer['optimizer']
+    ####################
+    # Personal Methods #
+    ####################

models/SSN_Model.py ADDED Viewed

	@@ -0,0 +1,333 @@

+import torch
+import torch.nn as nn
+import torch.nn.functional as F
+import torch.optim as optim
+import logging
+def weights_init(init_type='gaussian', std=0.02):
+    def init_fun(m):
+        classname = m.__class__.__name__
+        if (classname.find('Conv') == 0 or classname.find(
+                'Linear') == 0) and hasattr(m, 'weight'):
+            if init_type == 'gaussian':
+                nn.init.normal_(m.weight, 0.0, std)
+            elif init_type == 'xavier':
+                nn.init.xavier_normal_(m.weight, gain=math.sqrt(2))
+            elif init_type == 'kaiming':
+                nn.init.kaiming_normal_(m.weight, a=0, mode='fan_in')
+            elif init_type == 'orthogonal':
+                nn.init.orthogonal_(m.weight, gain=math.sqrt(2))
+            elif init_type == 'default':
+                pass
+            else:
+                assert 0, "Unsupported initialization: {}".format(init_type)
+            if hasattr(m, 'bias') and m.bias is not None:
+                nn.init.constant_(m.bias, 0.0)
+    return init_fun
+def freeze(module):
+    for param in module.parameters():
+        param.requires_grad = False
+def unfreeze(module):
+    for param in module.parameters():
+        param.requires_grad = True
+def get_optimizer(opt, model):
+    lr           = float(opt['hyper_params']['lr'])
+    beta1        = float(opt['model']['beta1'])
+    weight_decay = float(opt['model']['weight_decay'])
+    opt_name     = opt['model']['optimizer']
+    optim_params = []
+    # weight decay
+    for key, value in model.named_parameters():
+        if not value.requires_grad:
+            continue  # frozen weights
+        if key[-4:] == 'bias':
+            optim_params += [{'params': value, 'weight_decay': 0.0}]
+        else:
+            optim_params += [{'params': value,
+                              'weight_decay': weight_decay}]
+    if opt_name == 'Adam':
+        return optim.Adam(optim_params,
+                            lr=lr,
+                            betas=(beta1, 0.999),
+                            eps=1e-5)
+    else:
+        err = '{} not implemented yet'.format(opt_name)
+        logging.error(err)
+        raise NotImplementedError(err)
+def get_activation(activation):
+    if activation is None:
+        return nn.Identity()
+    act_func = {
+        'relu':nn.ReLU(),
+        'sigmoid':nn.Sigmoid(),
+        'tanh':nn.Tanh(),
+        'prelu':nn.PReLU(),
+        'leaky':nn.LeakyReLU(0.2),
+        'gelu':nn.GELU(),
+        }
+    if activation not in act_func.keys():
+        logging.error("activation {} is not implemented yet".format(activation))
+        assert False
+    return act_func[activation]
+def get_norm(out_channels, norm_type='Instance'):
+    norm_set = ['Instance', 'Batch', 'Group']
+    if norm_type not in norm_set:
+        err = "Normalization {} has not been implemented yet"
+        logging.error(err)
+        raise ValueError(err)
+    if norm_type == 'Instance':
+        return nn.InstanceNorm2d(out_channels, affine=True)
+    if norm_type == 'Batch':
+        return nn.BatchNorm2d(out_channels)
+    if norm_type == 'Group':
+        if out_channels >= 32:
+            groups = 32
+        else:
+            groups = 1
+        return nn.GroupNorm(groups, out_channels)
+    else:
+        raise NotImplementedError('{} has not implemented yet'.format(norm_type))
+def get_layer_info(out_channels, activation_func='relu'):
+    activation = get_activation(activation_func)
+    norm_layer = get_norm(out_channels, 'Group')
+    return norm_layer, activation
+class Conv(nn.Module):
+    """ (convolution => [BN] => ReLU) """
+    def __init__(self,
+                 in_channels,
+                 out_channels,
+                 kernel_size=3,
+                 stride=1,
+                 padding=1,
+                 bias=True,
+                 activation='leaky',
+                 resnet=True):
+        super().__init__()
+        norm_layer, act_func = get_layer_info(out_channels,activation)
+        if resnet and in_channels == out_channels:
+            self.resnet = True
+        else:
+            self.resnet = False
+        self.conv = nn.Sequential(
+            nn.Conv2d(in_channels, out_channels, stride=stride, kernel_size=kernel_size, padding=padding, bias=bias),
+            norm_layer,
+            act_func)
+    def forward(self, x):
+        res = self.conv(x)
+        if self.resnet:
+            res = res + x
+        return res
+class Up(nn.Module):
+    """ Upscaling then conv """
+    def __init__(self, in_channels, out_channels, activation='relu',  resnet=True):
+        super().__init__()
+        self.up_layer = nn.Upsample(scale_factor=2, mode='bilinear', align_corners=True)
+        self.up       = Conv(in_channels, out_channels, activation=activation, resnet=resnet)
+    def forward(self, x):
+        x = self.up_layer(x)
+        return self.up(x)
+class DConv(nn.Module):
+    """ Double Conv Layer
+    """
+    def __init__(self, in_channels, out_channels, activation='relu', resnet=True):
+        super().__init__()
+        self.conv1 = Conv(in_channels, out_channels, activation=activation, resnet=resnet)
+        self.conv2 = Conv(out_channels, out_channels, activation=activation, resnet=resnet)
+    def forward(self, x):
+        return self.conv2(self.conv1(x))
+class Encoder(nn.Module):
+    def __init__(self, in_channels=3, mid_act='leaky', resnet=True):
+        super(Encoder, self).__init__()
+        self.in_conv        = Conv(in_channels, 32-in_channels, stride=1, activation=mid_act, resnet=resnet)
+        self.down_32_64     = Conv(32, 64, stride=2, activation=mid_act, resnet=resnet)
+        self.down_64_64_1   = Conv(64, 64, activation=mid_act, resnet=resnet)
+        self.down_64_128    = Conv(64, 128, stride=2, activation=mid_act, resnet=resnet)
+        self.down_128_128_1 = Conv(128, 128,  activation=mid_act, resnet=resnet)
+        self.down_128_256   = Conv(128, 256, stride=2, activation=mid_act, resnet=resnet)
+        self.down_256_256_1 = Conv(256, 256, activation=mid_act, resnet=resnet)
+        self.down_256_512   = Conv(256, 512, stride=2, activation=mid_act, resnet=resnet)
+        self.down_512_512_1 = Conv(512, 512, activation=mid_act, resnet=resnet)
+        self.down_512_512_2 = Conv(512, 512, activation=mid_act, resnet=resnet)
+        self.down_512_512_3 = Conv(512, 512, activation=mid_act, resnet=resnet)
+    def forward(self, x):
+        x1 = self.in_conv(x)  # 32 x 256 x 256
+        x1 = torch.cat((x, x1), dim=1)
+        x2 = self.down_32_64(x1)
+        x3 = self.down_64_64_1(x2)
+        x4 = self.down_64_128(x3)
+        x5 = self.down_128_128_1(x4)
+        x6 = self.down_128_256(x5)
+        x7 = self.down_256_256_1(x6)
+        x8 = self.down_256_512(x7)
+        x9 = self.down_512_512_1(x8)
+        x10 = self.down_512_512_2(x9)
+        x11 = self.down_512_512_3(x10)
+        return x11, x10, x9, x8, x7, x6, x5, x4, x3, x2, x1
+class Decoder(nn.Module):
+    """ Up Stream Sequence """
+    def __init__(self,
+                 out_channels=3,
+                 mid_act='relu',
+                 out_act='sigmoid',
+                 resnet = True):
+        super(Decoder, self).__init__()
+        input_channel = 512
+        fea_dim       = 100
+        self.up_16_16_1 = Conv(input_channel, 256, activation=mid_act, resnet=resnet)
+        self.up_16_16_2 = Conv(768, 512, activation=mid_act, resnet=resnet)
+        self.up_16_16_3 = Conv(1024, 512, activation=mid_act, resnet=resnet)
+        self.up_16_32   = Up(1024, 256, activation=mid_act, resnet=resnet)
+        self.up_32_32_1 = Conv(512, 256, activation=mid_act, resnet=resnet)
+        self.up_32_64   = Up(512, 128, activation=mid_act, resnet=resnet)
+        self.up_64_64_1 = Conv(256, 128, activation=mid_act, resnet=resnet)
+        self.up_64_128    = Up(256, 64, activation=mid_act, resnet=resnet)
+        self.up_128_128_1 = Conv(128, 64, activation=mid_act, resnet=resnet)
+        self.up_128_256 = Up(128, 32, activation=mid_act, resnet=resnet)
+        self.out_conv   = Conv(64, out_channels, activation=out_act)
+    def forward(self, x, ibl):
+        x11, x10, x9, x8, x7, x6, x5, x4, x3, x2, x1 = x
+        h,w = x10.shape[2:]
+        y = ibl.view(-1, 512, 1, 1).repeat(1, 1, h, w)
+        y = self.up_16_16_1(y)  # 256 x 16 x 16
+        y = torch.cat((x10, y), dim=1)  # 768 x 16 x 16
+        y = self.up_16_16_2(y)  # 512 x 16 x 16
+        y = torch.cat((x9, y), dim=1)  # 1024 x 16 x 16
+        y = self.up_16_16_3(y)  # 512 x 16 x 16
+        y = torch.cat((x8, y), dim=1)  # 1024 x 16 x 16
+        y = self.up_16_32(y)  # 256 x 32 x 32
+        y = torch.cat((x7, y), dim=1)
+        y = self.up_32_32_1(y)  # 256 x 32 x 32
+        y = torch.cat((x6, y), dim=1)
+        y = self.up_32_64(y)
+        y = torch.cat((x5, y), dim=1)
+        y = self.up_64_64_1(y)  # 128 x 64 x 64
+        y = torch.cat((x4, y), dim=1)
+        y = self.up_64_128(y)
+        y = torch.cat((x3, y), dim=1)
+        y = self.up_128_128_1(y)  # 64 x 128 x 128
+        y = torch.cat((x2, y), dim=1)
+        y = self.up_128_256(y)  # 32 x 256 x 256
+        y = torch.cat((x1, y), dim=1)
+        y = self.out_conv(y)  # 3 x 256 x 256
+        return y
+class SSN_Model(nn.Module):
+    """ Implementation of Relighting Net """
+    def __init__(self,
+                 in_channels=3,
+                 out_channels=3,
+                 mid_act='leaky',
+                 out_act='sigmoid',
+                 resnet=True):
+        super(SSN_Model, self).__init__()
+        self.out_act = out_act
+        self.encoder = Encoder(in_channels, mid_act=mid_act, resnet=resnet)
+        self.decoder = Decoder(out_channels, mid_act=mid_act, out_act=out_act, resnet=resnet)
+        # init weights
+        init_func = weights_init('gaussian', std=1e-3)
+        self.encoder.apply(init_func)
+        self.decoder.apply(init_func)
+    def forward(self, x, ibl):
+        """
+            Input is (source image, target light, source light, )
+            Output is: predicted new image, predicted source light, self-supervision image
+        """
+        latent  = self.encoder(x)
+        pred    = self.decoder(latent, ibl)
+        if self.out_act == 'sigmoid':
+            pred = pred * 30.0
+        return pred
+if __name__ == '__main__':
+    x = torch.randn(5,1,256,256)
+    ibl = torch.randn(5, 1, 32, 16)
+    model = SSN_Model(1,1)
+    y = model(x, ibl)
+    print('Output: ', y.shape)

models/SSN_v1.py ADDED Viewed

	@@ -0,0 +1,290 @@

+import torch
+import torch.nn as nn
+import torch.nn.functional as F
+from torchvision import transforms
+import numpy as np
+def get_activation(activation_func):
+    act_func = {
+            "relu":nn.ReLU(),
+            "sigmoid":nn.Sigmoid(),
+            "prelu":nn.PReLU(num_parameters=1),
+            "leaky_relu": nn.LeakyReLU(negative_slope=0.2, inplace=False),
+            "gelu":nn.GELU()
+            }
+    if activation_func is None:
+        return nn.Identity()
+    if activation_func not in act_func.keys():
+        raise ValueError("activation function({}) is not found".format(activation_func))
+    activation = act_func[activation_func]
+    return activation
+def get_layer_info(out_channels, activation_func='relu'):
+    #act_func = {"relu":nn.ReLU(), "sigmoid":nn.Sigmoid(), "prelu":nn.PReLU(num_parameters=out_channels)}
+    # norm_layer = nn.BatchNorm2d(out_channels, momentum=0.9)
+    if out_channels >= 32:
+        groups = 32
+    else:
+        groups = 1
+    norm_layer = nn.GroupNorm(groups, out_channels)
+    activation = get_activation(activation_func)
+    return norm_layer, activation
+class Conv(nn.Module):
+    """ (convolution => [BN] => ReLU) """
+    def __init__(self,
+                 in_channels,
+                 out_channels,
+                 kernel_size=3,
+                 stride=1,
+                 padding=1,
+                 bias=True,
+                 activation='leaky',
+                 style=False,
+                 resnet=True):
+        super().__init__()
+        self.style = style
+        norm_layer, act_func = get_layer_info(in_channels, activation)
+        if resnet and in_channels == out_channels:
+            self.resnet = True
+        else:
+            self.resnet = False
+        if style:
+            self.styleconv = Conv2DMod(in_channels, out_channels, kernel_size)
+            self.relu = nn.LeakyReLU(0.2, inplace=True)
+        else:
+            self.norm = norm_layer
+            self.conv = nn.Conv2d(in_channels, out_channels, stride=stride, kernel_size=kernel_size, padding=padding, bias=bias)
+            self.act  = act_func
+    def forward(self, x, style_fea=None):
+        if self.style:
+            res = self.styleconv(x, style_fea)
+            res = self.relu(res)
+        else:
+            h = self.conv(self.act(self.norm(x)))
+            if self.resnet:
+                res = h + x
+            else:
+                res = h
+        return res
+class Conv2DMod(nn.Module):
+    def __init__(self, in_chan, out_chan, kernel, demod=True, stride=1, dilation=1, eps=1e-8, **kwargs):
+        super().__init__()
+        self.filters = out_chan
+        self.demod = demod
+        self.kernel = kernel
+        self.stride = stride
+        self.dilation = dilation
+        self.weight = nn.Parameter(torch.randn((out_chan, in_chan, kernel, kernel)))
+        self.eps = eps
+        nn.init.kaiming_normal_(self.weight, a=0, mode='fan_in', nonlinearity='leaky_relu')
+    def _get_same_padding(self, size, kernel, dilation, stride):
+        return ((size - 1) * (stride - 1) + dilation * (kernel - 1)) // 2
+    def forward(self, x, y):
+        b, c, h, w = x.shape
+        w1 = y[:, None, :, None, None]
+        w2 = self.weight[None, :, :, :, :]
+        weights = w2 * (w1 + 1)
+        if self.demod:
+            d = torch.rsqrt((weights ** 2).sum(dim=(2, 3, 4), keepdim=True) + self.eps)
+            weights = weights * d
+        x = x.reshape(1, -1, h, w)
+        _, _, *ws = weights.shape
+        weights = weights.reshape(b * self.filters, *ws)
+        padding = self._get_same_padding(h, self.kernel, self.dilation, self.stride)
+        x = F.conv2d(x, weights, padding=padding, groups=b)
+        x = x.reshape(-1, self.filters, h, w)
+        return x
+class Up(nn.Module):
+    """ Upscaling then conv """
+    def __init__(self, in_channels, out_channels, activation='relu', resnet=True):
+        super().__init__()
+        self.up_layer = nn.Upsample(scale_factor=2, mode='bilinear', align_corners=True)
+        self.up       = Conv(in_channels, out_channels, activation=activation, resnet=resnet)
+    def forward(self, x):
+        x = self.up_layer(x)
+        return self.up(x)
+class DConv(nn.Module):
+    """ Double Conv Layer
+    """
+    def __init__(self, in_channels, out_channels, activation='relu', resnet=True):
+        super().__init__()
+        self.conv1 = Conv(in_channels, out_channels, activation=activation, resnet=resnet)
+        self.conv2 = Conv(out_channels, out_channels, activation=activation, resnet=resnet)
+    def forward(self, x):
+        return self.conv2(self.conv1(x))
+class Encoder(nn.Module):
+    def __init__(self, in_channels=3, mid_act='leaky', resnet=True):
+        super(Encoder, self).__init__()
+        self.in_conv        = Conv(in_channels, 32-in_channels, stride=1, activation=mid_act, resnet=resnet)
+        self.down_32_64     = Conv(32, 64, stride=2, activation=mid_act, resnet=resnet)
+        self.down_64_64_1   = Conv(64, 64, activation=mid_act, resnet=resnet)
+        self.down_64_128    = Conv(64, 128, stride=2, activation=mid_act, resnet=resnet)
+        self.down_128_128_1 = Conv(128, 128,  activation=mid_act, resnet=resnet)
+        self.down_128_256   = Conv(128, 256, stride=2, activation=mid_act, resnet=resnet)
+        self.down_256_256_1 = Conv(256, 256, activation=mid_act, resnet=resnet)
+        self.down_256_512   = Conv(256, 512, stride=2, activation=mid_act, resnet=resnet)
+        self.down_512_512_1 = Conv(512, 512, activation=mid_act, resnet=resnet)
+        self.down_512_512_2 = Conv(512, 512, activation=mid_act, resnet=resnet)
+        self.down_512_512_3 = Conv(512, 512, activation=mid_act, resnet=resnet)
+    def forward(self, x):
+        x1 = self.in_conv(x)  # 32 x 256 x 256
+        x1 = torch.cat((x, x1), dim=1)
+        x2 = self.down_32_64(x1)
+        x3 = self.down_64_64_1(x2)
+        x4 = self.down_64_128(x3)
+        x5 = self.down_128_128_1(x4)
+        x6 = self.down_128_256(x5)
+        x7 = self.down_256_256_1(x6)
+        x8 = self.down_256_512(x7)
+        x9 = self.down_512_512_1(x8)
+        x10 = self.down_512_512_2(x9)
+        x11 = self.down_512_512_3(x10)
+        return x11, x10, x9, x8, x7, x6, x5, x4, x3, x2, x1
+class Decoder(nn.Module):
+    def __init__(self,
+                 out_channels=3,
+                 mid_act='relu',
+                 out_act='sigmoid',
+                 resnet = True):
+        super(Decoder, self).__init__()
+        input_channel = 512
+        fea_dim       = 100
+        self.to_style1 = nn.Linear(in_features=fea_dim, out_features=input_channel)
+        self.up_16_16_1 = Conv(input_channel, 256, activation=mid_act, resnet=resnet)
+        self.up_16_16_2 = Conv(768, 512, activation=mid_act, resnet=resnet)
+        self.up_16_16_3 = Conv(1024, 512, activation=mid_act, resnet=resnet)
+        self.up_16_32   = Up(1024, 256, activation=mid_act, resnet=resnet)
+        self.up_32_32_1 = Conv(512, 256, activation=mid_act, resnet=resnet)
+        self.up_32_64   = Up(512, 128, activation=mid_act, resnet=resnet)
+        self.up_64_64_1 = Conv(256, 128, activation=mid_act, resnet=resnet)
+        self.up_64_128    = Up(256, 64, activation=mid_act, resnet=resnet)
+        self.up_128_128_1 = Conv(128, 64, activation=mid_act, resnet=resnet)
+        self.up_128_256 = Up(128, 32, activation=mid_act, resnet=resnet)
+        self.out_conv   = Conv(64, out_channels, activation=mid_act)
+        self.out_act = get_activation(out_act)
+    def forward(self, x):
+        x11, x10, x9, x8, x7, x6, x5, x4, x3, x2, x1 = x
+        y = self.up_16_16_1(x11)
+        y = torch.cat((x10, y), dim=1)
+        y = self.up_16_16_2(y)
+        y = torch.cat((x9, y), dim=1)
+        y = self.up_16_16_3(y)
+        y = torch.cat((x8, y), dim=1)
+        y = self.up_16_32(y)
+        y = torch.cat((x7, y), dim=1)
+        y = self.up_32_32_1(y)
+        y = torch.cat((x6, y), dim=1)
+        y = self.up_32_64(y)
+        y = torch.cat((x5, y), dim=1)
+        y = self.up_64_64_1(y)  # 128 x 64 x 64
+        y = torch.cat((x4, y), dim=1)
+        y = self.up_64_128(y)
+        y = torch.cat((x3, y), dim=1)
+        y = self.up_128_128_1(y)  # 64 x 128 x 128
+        y = torch.cat((x2, y), dim=1)
+        y = self.up_128_256(y)  # 32 x 256 x 256
+        y = torch.cat((x1, y), dim=1)
+        y = self.out_conv(y)  # 3 x 256 x 256
+        y = self.out_act(y)
+        return y
+class SSN_v1(nn.Module):
+    """ Implementation of Relighting Net """
+    def __init__(self,
+                 in_channels=3,
+                 out_channels=3,
+                 mid_act='leaky',
+                 out_act='sigmoid',
+                 resnet=True):
+        super(SSN_v1, self).__init__()
+        self.encoder = Encoder(in_channels, mid_act=mid_act, resnet=resnet)
+        self.decoder = Decoder(out_channels, mid_act=mid_act, out_act=out_act, resnet=resnet)
+    def forward(self, x, softness):
+        """
+            Input is (source image, target light, source light, )
+            Output is: predicted new image, predicted source light, self-supervision image
+        """
+        latent  = self.encoder(x)
+        pred    = self.decoder(latent)
+        return pred
+if __name__ == '__main__':
+    test_input = torch.randn(5, 1, 256, 256)
+    style = torch.randn(5, 100)
+    model = SSN_v1(1, 1, mid_act='gelu', out_act='gelu', resnet=True)
+    test_out = model(test_input, style)
+    print('Ouptut shape: ', test_out.shape)

models/Sparse_PH.py ADDED Viewed

	@@ -0,0 +1,185 @@

+import torch
+import torch.nn as nn
+import torch.nn.functional as F
+from torchvision import utils
+from torchvision.transforms import Resize
+from collections import OrderedDict
+import numpy as np
+import matplotlib.cm as cm
+import matplotlib as mpl
+from torchvision.transforms import InterpolationMode
+from .abs_model import abs_model
+from .blocks import *
+from .SSN import SSN
+from .SSN_v1 import SSN_v1
+from .Loss.Loss import norm_loss, grad_loss
+from .Attention_Unet import Attention_Unet
+class Sparse_PH(abs_model):
+    def __init__(self, opt):
+        mid_act      = opt['model']['mid_act']
+        out_act      = opt['model']['out_act']
+        in_channels  = opt['model']['in_channels']
+        out_channels = opt['model']['out_channels']
+        resnet       = opt['model']['resnet']
+        backbone     = opt['model']['backbone']
+        self.ncols   = opt['hyper_params']['n_cols']
+        self.focal   = opt['model']['focal']
+        self.clip    = opt['model']['clip']
+        self.norm_loss_  = opt['model']['norm_loss']
+        self.grad_loss_  = opt['model']['grad_loss']
+        self.ggrad_loss_ = opt['model']['ggrad_loss']
+        self.lap_loss    = opt['model']['lap_loss']
+        self.clip_range = opt['dataset']['linear_scale'] + opt['dataset']['linear_offset']
+        if backbone == 'Default':
+            self.model = SSN_v1(in_channels=in_channels,
+                                out_channels=out_channels,
+                                mid_act=mid_act,
+                                out_act=out_act,
+                                resnet=resnet)
+        elif backbone == 'ATTN':
+            self.model = Attention_Unet(in_channels, out_channels, mid_act=mid_act, out_act=out_act)
+        self.optimizer = get_optimizer(opt, self.model)
+        self.visualization = {}
+        self.norm_loss = norm_loss()
+        self.grad_loss = grad_loss()
+    def setup_input(self, x):
+        return x
+    def forward(self, x):
+        return self.model(x)
+    def compute_loss(self, y, pred):
+        b = y.shape[0]
+        # total_loss = avg_norm_loss(y, pred)
+        nloss   = self.norm_loss.loss(y, pred) * self.norm_loss_
+        gloss   = self.grad_loss.loss(pred) * self.grad_loss_
+        ggloss  = self.grad_loss.gloss(y, pred) * self.ggrad_loss_
+        laploss = self.grad_loss.laploss(pred) * self.lap_loss
+        total_loss = nloss + gloss + ggloss + laploss
+        self.loss_log = {
+            'norm_loss': nloss.item(),
+            'grad_loss': gloss.item(),
+            'grad_l1_loss': ggloss.item(),
+            'lap_loss': laploss.item(),
+        }
+        if self.focal:
+            total_loss = torch.pow(total_loss, 3)
+        return total_loss
+    def supervise(self, input_x, y, is_training:bool)->float:
+        optimizer = self.optimizer
+        model = self.model
+        x = input_x['x']
+        optimizer.zero_grad()
+        pred = self.forward(x)
+        if self.clip:
+            pred = torch.clip(pred, 0.0, self.clip_range)
+        loss = self.compute_loss(y, pred)
+        if is_training:
+            loss.backward()
+            optimizer.step()
+        xc = x.shape[1]
+        for i in range(xc):
+            self.visualization['x{}'.format(i)] = x[:, i:i+1].detach()
+        self.visualization['y_fore']    = y[:, 0:1].detach()
+        self.visualization['y_back']    = y[:, 1:2].detach()
+        self.visualization['pred_fore'] = pred[:, 0:1].detach()
+        self.visualization['pred_back'] = pred[:, 1:2].detach()
+        return loss.item()
+    def get_visualize(self) -> OrderedDict:
+        """ Convert to visualization numpy array
+        """
+        nrows          = self.ncols
+        visualizations = self.visualization
+        ret_vis        = OrderedDict()
+        for k, v in visualizations.items():
+            batch = v.shape[0]
+            n     = min(nrows, batch)
+            plot_v = v[:n]
+            ret_vis[k] = np.clip(utils.make_grid(plot_v.cpu(), nrow=nrows).numpy().transpose(1,2,0), 0.0, 1.0)
+            ret_vis[k] = self.plasma(ret_vis[k])
+        return ret_vis
+    def get_logs(self):
+        return self.loss_log
+    def inference(self, x):
+        x, device = x['x'], x['device']
+        x = torch.from_numpy(x.transpose((2,0,1))).unsqueeze(dim=0).float().to(device)
+        pred = self.forward(x)
+        pred = pred[0].detach().cpu().numpy().transpose((1,2,0))
+        return pred
+    def batch_inference(self, x):
+        x = x['x']
+        pred = self.forward(x)
+        return pred
+    """ Getter & Setter
+    """
+    def get_models(self) -> dict:
+        return {'model': self.model}
+    def get_optimizers(self) -> dict:
+        return {'optimizer': self.optimizer}
+    def set_models(self, models: dict) :
+        # input test
+        if 'model' not in models.keys():
+            raise ValueError('{} not in self.model'.format('model'))
+        self.model = models['model']
+    def set_optimizers(self, optimizer: dict):
+        self.optimizer = optimizer['optimizer']
+    ####################
+    # Personal Methods #
+    ####################
+    def plasma(self, x):
+        norm   = mpl.colors.Normalize(vmin=0.0, vmax=1)
+        mapper = cm.ScalarMappable(norm=norm, cmap='plasma')
+        bimg   = mapper.to_rgba(x[:,:,0])[:,:,:3]
+        return bimg

models/__init__.py ADDED Viewed

	@@ -0,0 +1,43 @@

+# SRC: https://github.com/junyanz/pytorch-CycleGAN-and-pix2pix/blob/master/models/__init__.py
+import logging
+import importlib
+from .abs_model import abs_model
+def find_model_using_name(model_name):
+    """Import the module "models/[model_name].py".
+    In the file, the class called DatasetNameModel() will
+    be instantiated. It has to be a subclass of BaseModel,
+    and it is case-insensitive.
+    """
+    model_filename = "models." + model_name
+    modellib = importlib.import_module(model_filename)
+    model = None
+    target_model_name = model_name
+    for name, cls in modellib.__dict__.items():
+        if name.lower() == target_model_name.lower() \
+           and issubclass(cls, abs_model):
+            model = cls
+    if model is None:
+        err = "In %s.py, there should be a subclass of BaseModel with class name that matches %s in lowercase." % (model_filename, target_model_name)
+        logging.error(err)
+        exit(0)
+    return model
+def create_model(opt):
+    """Create a model given the option.
+    This funct
+    This is the main interface between this package and 'train.py'/'test.py'
+    Example:
+        >>> from models import create_model
+        >>> model = create_model(opt)
+    """
+    model = find_model_using_name(opt['model']['name'])
+    instance = model(opt)
+    logging.info("model [%s] was created" % type(instance).__name__)
+    return instance

models/__pycache__/SSN.cpython-39.pyc ADDED Viewed

Binary file (4.11 kB). View file

models/__pycache__/SSN_Model.cpython-39.pyc ADDED Viewed

Binary file (8.96 kB). View file

models/__pycache__/__init__.cpython-39.pyc ADDED Viewed

Binary file (1.42 kB). View file

models/__pycache__/abs_model.cpython-39.pyc ADDED Viewed

Binary file (2.14 kB). View file

models/__pycache__/blocks.cpython-39.pyc ADDED Viewed

Binary file (6.92 kB). View file

models/abs_model.py ADDED Viewed

	@@ -0,0 +1,73 @@

+from abc import ABC, abstractmethod
+from collections import OrderedDict
+class abs_model(ABC):
+    """ Training Related Interface
+    """
+    @abstractmethod
+    def setup_input(self, x):
+        pass
+    @abstractmethod
+    def forward(self, x):
+        pass
+    @abstractmethod
+    def supervise(self, input_x, y, is_training:bool)->float:
+        pass
+    @abstractmethod
+    def get_visualize(self) -> OrderedDict:
+        return {}
+    """ Inference Related Interface
+    """
+    @abstractmethod
+    def inference(self, x):
+        pass
+    @abstractmethod
+    def batch_inference(self, x):
+        pass
+    """ Logging/Visualization Related Interface
+    """
+    @abstractmethod
+    def get_logs(self):
+        pass
+    """ Getter & Setter
+    """
+    @abstractmethod
+    def get_models(self) -> dict:
+        """ GAN may have two models
+        """
+        pass
+    @abstractmethod
+    def get_optimizers(self) -> dict:
+        """ GAN may have two optimizer
+        """
+        pass
+    @abstractmethod
+    def set_models(self, models) -> dict:
+        """ GAN may have two models
+        """
+        pass
+    @abstractmethod
+    def set_optimizers(self, optimizers: dict):
+        """ GAN may have two optimizer
+        """
+        pass

models/attention.py ADDED Viewed

	@@ -0,0 +1,85 @@

+from inspect import isfunction
+import math
+import torch
+from torch import nn, einsum
+import torch.nn.functional as F
+from .blocks import get_norm, zero_module
+def QKV_Attention(qkv, num_heads):
+    """
+    Apply QKV attention.
+    :param qkv: an [N x (3 * C) x T] tensor of Qs, Ks, and Vs.
+    :return: an [N x H' x T] tensor after attention.
+    """
+    B, C, HW = qkv.shape
+    if C % 3 != 0:
+        raise ValueError('QKV shape is wrong: {}, {}, {}'.format(B, C, HW))
+    split_size = C // (3 * num_heads)
+    q, k, v = qkv.chunk(3, dim=1)
+    scale      = 1.0/math.sqrt(math.sqrt(split_size))
+    weight = torch.einsum('bct, bcs->bts',
+                          (q * scale).view(B * num_heads, split_size, HW),
+                          (k * scale).view(B * num_heads, split_size, HW))
+    weight = torch.softmax(weight.float(), dim=-1).type(weight.dtype)
+    ret    = torch.einsum("bts,bcs->bct", weight, v.reshape(B * num_heads, split_size, HW))
+    return ret.reshape(B, -1, HW)
+class AttentionBlock(nn.Module):
+    """
+        https://github.com/CompVis/latent-diffusion/blob/main/ldm/modules/diffusionmodules/openaimodel.py
+        https://github.com/whai362/PVT/blob/a24ba02c249a510581a84f821c26322534b03a10/detection/pvt_v2.py#L57
+    """
+    def __init__(self, in_channels, num_heads, qkv_bias=False, sr_ratio=1, linear=True):
+        super().__init__()
+        self.num_heads = num_heads
+        self.norm = get_norm(in_channels, 'Group')
+        self.qkv  = nn.Conv1d(in_channels=in_channels, out_channels=in_channels * 3, kernel_size = 1)
+        self.proj = zero_module(nn.Conv1d(in_channels=in_channels, out_channels=in_channels, kernel_size = 1))
+    def forward(self, x):
+        b, c, *spatial = x.shape
+        num_heads = self.num_heads
+        x   = x.reshape(b, c, -1) # B x C x HW
+        x   = self.norm(x)
+        qkv = self.qkv(x) # b x c x HW ->  B x 3C x HW
+        h   = QKV_Attention(qkv, num_heads)
+        h   = self.proj(h)
+        return (x + h).reshape(b,c,*spatial) # additive attention, similar to ResNet?
+def get_model_size(model):
+    param_size = 0
+    for param in model.parameters():
+        param_size += param.nelement() * param.element_size()
+    buffer_size = 0
+    for buffer in model.buffers():
+        buffer_size += buffer.nelement() * buffer.element_size()
+    size_all_mb = (param_size + buffer_size) / 1024 ** 2
+    print('model size: {:.3f}MB'.format(size_all_mb))
+    # return param_size + buffer_size
+    return size_all_mb
+if __name__ == '__main__':
+    model = AttentionBlock(in_channels=256, num_heads=8)
+    x = torch.randn(5, 256, 32, 32, dtype=torch.float32)
+    y = model(x)
+    print('{}, {}'.format(x.shape, y.shape))
+    get_model_size(model)

models/blocks.py ADDED Viewed

	@@ -0,0 +1,238 @@

+from enum import Enum
+import torch
+import torch.nn as nn
+import torch.nn.functional as F
+import torch.optim as optim
+import logging
+def get_model_size(model):
+    param_size = 0
+    for param in model.parameters():
+        param_size += param.nelement() * param.element_size()
+    buffer_size = 0
+    for buffer in model.buffers():
+        buffer_size += buffer.nelement() * buffer.element_size()
+    size_all_mb = (param_size + buffer_size) / 1024 ** 2
+    print('model size: {:.3f}MB'.format(size_all_mb))
+    # return param_size + buffer_size
+    return size_all_mb
+def weights_init(init_type='gaussian'):
+    def init_fun(m):
+        classname = m.__class__.__name__
+        if (classname.find('Conv') == 0 or classname.find(
+                'Linear') == 0) and hasattr(m, 'weight'):
+            if init_type == 'gaussian':
+                nn.init.normal_(m.weight, 0.0, 0.02)
+            elif init_type == 'xavier':
+                nn.init.xavier_normal_(m.weight, gain=math.sqrt(2))
+            elif init_type == 'kaiming':
+                nn.init.kaiming_normal_(m.weight, a=0, mode='fan_in')
+            elif init_type == 'orthogonal':
+                nn.init.orthogonal_(m.weight, gain=math.sqrt(2))
+            elif init_type == 'default':
+                pass
+            else:
+                assert 0, "Unsupported initialization: {}".format(init_type)
+            if hasattr(m, 'bias') and m.bias is not None:
+                nn.init.constant_(m.bias, 0.0)
+    return init_fun
+def freeze(module):
+    for param in module.parameters():
+        param.requires_grad = False
+def unfreeze(module):
+    for param in module.parameters():
+        param.requires_grad = True
+def get_optimizer(opt, model):
+    lr           = float(opt['hyper_params']['lr'])
+    beta1        = float(opt['model']['beta1'])
+    weight_decay = float(opt['model']['weight_decay'])
+    opt_name     = opt['model']['optimizer']
+    optim_params = []
+    # weight decay
+    for key, value in model.named_parameters():
+        if not value.requires_grad:
+            continue  # frozen weights
+        if key[-4:] == 'bias':
+            optim_params += [{'params': value, 'weight_decay': 0.0}]
+        else:
+            optim_params += [{'params': value,
+                              'weight_decay': weight_decay}]
+    if opt_name == 'Adam':
+        return optim.Adam(optim_params,
+                            lr=lr,
+                            betas=(beta1, 0.999),
+                            eps=1e-5)
+    else:
+        err = '{} not implemented yet'.format(opt_name)
+        logging.error(err)
+        raise NotImplementedError(err)
+def get_activation(activation):
+    act_func = {
+        'relu':nn.ReLU(),
+        'sigmoid':nn.Sigmoid(),
+        'tanh':nn.Tanh(),
+        'prelu':nn.PReLU(),
+        'leaky_relu':nn.LeakyReLU(0.2),
+        'gelu':nn.GELU(),
+        }
+    if activation not in act_func.keys():
+        logging.error("activation {} is not implemented yet".format(activation))
+        assert False
+    return act_func[activation]
+def get_norm(out_channels, norm_type='Group', groups=32):
+    norm_set = ['Instance', 'Batch', 'Group']
+    if norm_type not in norm_set:
+        err = "Normalization {} has not been implemented yet"
+        logging.error(err)
+        raise ValueError(err)
+    if norm_type == 'Instance':
+        return nn.InstanceNorm2d(out_channels, affine=True)
+    if norm_type == 'Batch':
+        return nn.BatchNorm2d(out_channels)
+    if norm_type == 'Group':
+        if out_channels >= 32:
+            groups = 32
+        else:
+            groups = max(out_channels // 2, 1)
+        return nn.GroupNorm(groups, out_channels)
+    else:
+        raise NotImplementedError
+class Conv(nn.Module):
+    def __init__(self, in_channels, out_channels, stride=1, norm_type='Batch', activation='relu'):
+        super().__init__()
+        act_func   = get_activation(activation)
+        norm_layer = get_norm(out_channels, norm_type)
+        self.conv = nn.Sequential(
+            nn.Conv2d(in_channels, out_channels, kernel_size=3, stride=stride, padding=1, bias=True, padding_mode='reflect'),
+            norm_layer,
+            act_func)
+    def forward(self, x):
+        return self.conv(x)
+def zero_module(module):
+    """
+    Zero out the parameters of a module and return it.
+    """
+    for p in module.parameters():
+        p.detach().zero_()
+    return module
+class Up(nn.Module):
+    def __init__(self):
+        super().__init__()
+        pass
+    def forward(self, x):
+        return F.interpolate(x, scale_factor=2, mode='bilinear')
+class Down(nn.Module):
+    def __init__(self, channels, use_conv):
+        super().__init__()
+        self.use_conv = use_conv
+        if self.use_conv:
+            self.op = nn.Conv2d(channels, channels, 3, stride=2, padding=1)
+        else:
+            self.op = nn.AvgPool2d(kernel_size=3, stride=2, padding=1)
+    def forward(self, x):
+        return self.op(x)
+class Res_Type(Enum):
+    UP   = 1
+    DOWN = 2
+    SAME = 3
+class ResBlock(nn.Module):
+    def __init__(self, in_channels: int, out_channels: int, dropout=0.0, updown=Res_Type.DOWN, mid_act='leaky'):
+        """ ResBlock to cover several cases:
+              1. Up/Down/Same
+              2. in_channels != out_channels
+        """
+        super().__init__()
+        self.updown = updown
+        self.in_norm = get_norm(out_channels, 'Group')
+        self.in_act  = get_activation(mid_act)
+        self.in_conv = nn.Conv2d(in_channels, out_channels, kernel_size=3, padding=1, bias=True)
+        # up down
+        if self.updown == Res_Type.DOWN:
+            self.h_updown = Down(in_channels, use_conv=True)
+            self.x_updown = Down(in_channels, use_conv=True)
+        elif self.updown == Res_Type.UP:
+            self.h_updown = Up()
+            self.x_updown = Up()
+        else:
+            self.h_updown = nn.Identity()
+        self.out_layer = nn.Sequential(
+            get_norm(out_channels, 'Group'),
+            get_activation(mid_act),
+            nn.Dropout(p=dropout),
+            zero_module(nn.Conv2d(out_channels, out_channels, kernel_size=3, padding=1, bias=True))
+        )
+    def forward(self, x):
+        # in layer
+        h = self.in_act(self.in_norm(x))
+        h = self.in_conv(self.h_updown(h))
+        x = self.x_updown(x)
+        # out layer
+        h = self.out_layer(h)
+        return x + h
+if __name__ == '__main__':
+    x = torch.randn(5, 3, 256, 256)
+    up = Up()
+    conv_down = Down(3, True)
+    pool_down = Down(3, False)
+    print('Up: {}'.format(up(x).shape))
+    print('Conv down: {}'.format(conv_down(x).shape))
+    print('Pool down: {}'.format(pool_down(x).shape))
+    up_model = ResBlock(3, 6, updown=True)
+    down_model = ResBlock(3, 6, updown=False)
+    print('model down: {}'.format(up_model(x).shape))
+    print('model down: {}'.format(down_model(x).shape))

models/pvt_attention.py ADDED Viewed

	@@ -0,0 +1,240 @@

+import torch
+import torch.nn as nn
+import torch.nn.functional as F
+from functools import partial
+from timm.models.layers import DropPath, to_2tuple, trunc_normal_
+from timm.models.registry import register_model
+from timm.models.vision_transformer import _cfg
+import math
+class DWConv(nn.Module):
+    def __init__(self, dim=768):
+        super(DWConv, self).__init__()
+        self.dwconv = nn.Conv2d(dim, dim, 3, 1, 1, bias=True, groups=dim)
+    def forward(self, x, H, W):
+        B, N, C = x.shape
+        x = x.transpose(1, 2).view(B, C, H, W)
+        x = self.dwconv(x)
+        x = x.flatten(2).transpose(1, 2)
+        return x
+class Mlp(nn.Module):
+    def __init__(self, in_features, hidden_features=None, out_features=None, act_layer=nn.GELU, drop=0., linear=False):
+        super().__init__()
+        out_features = out_features or in_features
+        hidden_features = hidden_features or in_features
+        self.fc1 = nn.Linear(in_features, hidden_features)
+        self.dwconv = DWConv(hidden_features)
+        self.act = act_layer()
+        self.fc2 = nn.Linear(hidden_features, out_features)
+        self.drop = nn.Dropout(drop)
+        self.linear = linear
+        if self.linear:
+            self.relu = nn.ReLU(inplace=True)
+        self.apply(self._init_weights)
+    def _init_weights(self, m):
+        if isinstance(m, nn.Linear):
+            trunc_normal_(m.weight, std=.02)
+            if isinstance(m, nn.Linear) and m.bias is not None:
+                nn.init.constant_(m.bias, 0)
+        elif isinstance(m, nn.LayerNorm):
+            nn.init.constant_(m.bias, 0)
+            nn.init.constant_(m.weight, 1.0)
+        elif isinstance(m, nn.Conv2d):
+            fan_out = m.kernel_size[0] * m.kernel_size[1] * m.out_channels
+            fan_out //= m.groups
+            m.weight.data.normal_(0, math.sqrt(2.0 / fan_out))
+            if m.bias is not None:
+                m.bias.data.zero_()
+    def forward(self, x, H, W):
+        x = self.fc1(x)
+        if self.linear:
+            x = self.relu(x)
+        x = self.dwconv(x, H, W)
+        x = self.act(x)
+        x = self.drop(x)
+        x = self.fc2(x)
+        x = self.drop(x)
+        return x
+class Attention(nn.Module):
+    def __init__(self, dim, num_heads=8, qkv_bias=False, qk_scale=None, attn_drop=0., proj_drop=0., sr_ratio=1, linear=False):
+        super().__init__()
+        assert dim % num_heads == 0, f"dim {dim} should be divided by num_heads {num_heads}."
+        self.dim = dim
+        self.num_heads = num_heads
+        head_dim = dim // num_heads
+        self.scale = qk_scale or head_dim ** -0.5
+        self.q = nn.Linear(dim, dim, bias=qkv_bias)
+        self.kv = nn.Linear(dim, dim * 2, bias=qkv_bias)
+        self.attn_drop = nn.Dropout(attn_drop)
+        self.proj = nn.Linear(dim, dim)
+        self.proj_drop = nn.Dropout(proj_drop)
+        self.linear = linear
+        self.sr_ratio = sr_ratio
+        if not linear:
+            if sr_ratio > 1:
+                self.sr = nn.Conv2d(dim, dim, kernel_size=sr_ratio, stride=sr_ratio)
+                self.norm = nn.LayerNorm(dim)
+        else:
+            self.pool = nn.AdaptiveAvgPool2d(7)
+            self.sr = nn.Conv2d(dim, dim, kernel_size=1, stride=1)
+            self.norm = nn.LayerNorm(dim)
+            self.act = nn.GELU()
+        self.apply(self._init_weights)
+    def _init_weights(self, m):
+        if isinstance(m, nn.Linear):
+            trunc_normal_(m.weight, std=.02)
+            if isinstance(m, nn.Linear) and m.bias is not None:
+                nn.init.constant_(m.bias, 0)
+        elif isinstance(m, nn.LayerNorm):
+            nn.init.constant_(m.bias, 0)
+            nn.init.constant_(m.weight, 1.0)
+        elif isinstance(m, nn.Conv2d):
+            fan_out = m.kernel_size[0] * m.kernel_size[1] * m.out_channels
+            fan_out //= m.groups
+            m.weight.data.normal_(0, math.sqrt(2.0 / fan_out))
+            if m.bias is not None:
+                m.bias.data.zero_()
+    def forward(self, x, H, W):
+        B, N, C = x.shape
+        q = self.q(x).reshape(B, N, self.num_heads, C // self.num_heads).permute(0, 2, 1, 3)
+        if not self.linear:
+            if self.sr_ratio > 1:
+                x_ = x.permute(0, 2, 1).reshape(B, C, H, W)
+                x_ = self.sr(x_).reshape(B, C, -1).permute(0, 2, 1)
+                x_ = self.norm(x_)
+                kv = self.kv(x_).reshape(B, -1, 2, self.num_heads, C // self.num_heads).permute(2, 0, 3, 1, 4)
+            else:
+                kv = self.kv(x).reshape(B, -1, 2, self.num_heads, C // self.num_heads).permute(2, 0, 3, 1, 4)
+        else:
+            x_ = x.permute(0, 2, 1).reshape(B, C, H, W)
+            x_ = self.sr(self.pool(x_)).reshape(B, C, -1).permute(0, 2, 1)
+            x_ = self.norm(x_)
+            x_ = self.act(x_)
+            kv = self.kv(x_).reshape(B, -1, 2, self.num_heads, C // self.num_heads).permute(2, 0, 3, 1, 4)
+        k, v = kv[0], kv[1]
+        attn = (q @ k.transpose(-2, -1)) * self.scale
+        attn = attn.softmax(dim=-1)
+        attn = self.attn_drop(attn)
+        x = (attn @ v).transpose(1, 2).reshape(B, N, C)
+        x = self.proj(x)
+        x = self.proj_drop(x)
+        return x
+class Block(nn.Module):
+    def __init__(self, dim, num_heads, mlp_ratio=4., qkv_bias=False, qk_scale=None, drop=0., attn_drop=0.,
+                 drop_path=0., act_layer=nn.GELU, norm_layer=nn.LayerNorm, sr_ratio=1, linear=False):
+        super().__init__()
+        self.norm1 = norm_layer(dim)
+        self.attn = Attention(
+            dim,
+            num_heads=num_heads, qkv_bias=qkv_bias, qk_scale=qk_scale,
+            attn_drop=attn_drop, proj_drop=drop, sr_ratio=sr_ratio, linear=linear)
+        # NOTE: drop path for stochastic depth, we shall see if this is better than dropout here
+        self.drop_path = DropPath(drop_path) if drop_path > 0. else nn.Identity()
+        self.norm2 = norm_layer(dim)
+        mlp_hidden_dim = int(dim * mlp_ratio)
+        self.mlp = Mlp(in_features=dim, hidden_features=mlp_hidden_dim, act_layer=act_layer, drop=drop, linear=linear)
+        self.apply(self._init_weights)
+    def _init_weights(self, m):
+        if isinstance(m, nn.Linear):
+            trunc_normal_(m.weight, std=.02)
+            if isinstance(m, nn.Linear) and m.bias is not None:
+                nn.init.constant_(m.bias, 0)
+        elif isinstance(m, nn.LayerNorm):
+            nn.init.constant_(m.bias, 0)
+            nn.init.constant_(m.weight, 1.0)
+        elif isinstance(m, nn.Conv2d):
+            fan_out = m.kernel_size[0] * m.kernel_size[1] * m.out_channels
+            fan_out //= m.groups
+            m.weight.data.normal_(0, math.sqrt(2.0 / fan_out))
+            if m.bias is not None:
+                m.bias.data.zero_()
+    def forward(self, x, H, W):
+        x = x + self.drop_path(self.attn(self.norm1(x), H, W))
+        x = x + self.drop_path(self.mlp(self.norm2(x), H, W))
+        return x
+class OverlapPatchEmbed(nn.Module):
+    """ Image to Patch Embedding
+    """
+    def __init__(self, img_size=224, patch_size=7, stride=4, in_chans=3, embed_dim=768):
+        super().__init__()
+        img_size = to_2tuple(img_size)
+        patch_size = to_2tuple(patch_size)
+        assert max(patch_size) > stride, "Set larger patch_size than stride"
+        self.img_size = img_size
+        self.patch_size = patch_size
+        self.H, self.W = img_size[0] // stride, img_size[1] // stride
+        self.num_patches = self.H * self.W
+        self.proj = nn.Conv2d(in_chans, embed_dim, kernel_size=patch_size, stride=stride,
+                              padding=(patch_size[0] // 2, patch_size[1] // 2))
+        self.norm = nn.LayerNorm(embed_dim)
+        self.apply(self._init_weights)
+    def _init_weights(self, m):
+        if isinstance(m, nn.Linear):
+            trunc_normal_(m.weight, std=.02)
+            if isinstance(m, nn.Linear) and m.bias is not None:
+                nn.init.constant_(m.bias, 0)
+        elif isinstance(m, nn.LayerNorm):
+            nn.init.constant_(m.bias, 0)
+            nn.init.constant_(m.weight, 1.0)
+        elif isinstance(m, nn.Conv2d):
+            fan_out = m.kernel_size[0] * m.kernel_size[1] * m.out_channels
+            fan_out //= m.groups
+            m.weight.data.normal_(0, math.sqrt(2.0 / fan_out))
+            if m.bias is not None:
+                m.bias.data.zero_()
+    def forward(self, x):
+        x = self.proj(x)
+        _, _, H, W = x.shape
+        import pdb; pdb.set_trace()
+        x = x.flatten(2).transpose(1, 2)
+        x = self.norm(x)
+        return x, H, W
+if __name__ == '__main__':
+    test = torch.randn(5, 3, 224, 224)
+    embed_dim = 768
+    patch_embed = OverlapPatchEmbed(embed_dim=embed_dim)
+    block = Block(embed_dim, 1)
+    import pdb; pdb.set_trace()
+    print('x: {}'.format(test.shape))
+    pe, H, W = patch_embed(test)
+    print('After patch: {}'.format(pe.shape))
+    y = block(pe, H, W)
+    print('After block: {}'.format(y.shape))

models/template.py ADDED Viewed

	@@ -0,0 +1,114 @@

+import torch
+import torch.nn as nn
+import torch.nn.functional as F
+from torchvision import utils
+from collections import OrderedDict
+from .abs_model import abs_model
+from .blocks import *
+from .Loss.Loss import avg_norm_loss
+class Template(abs_model):
+    """ Standard Unet Implementation
+        src: https://arxiv.org/pdf/1505.04597.pdf
+    """
+    def __init__(self, opt):
+        resunet      = opt['model']['resunet']
+        out_act      = opt['model']['out_act']
+        norm_type    = opt['model']['norm_type']
+        in_channels  = opt['model']['in_channels']
+        out_channels = opt['model']['out_channels']
+        self.ncols   = opt['hyper_params']['n_cols']
+        self.model = Unet(in_channels=in_channels,
+                          out_channels=out_channels,
+                          norm_type=norm_type,
+                          out_act=out_act,
+                          resunet=resunet)
+        self.optimizer = get_optimizer(opt, self.model)
+        self.visualization = {}
+    def setup_input(self, x):
+        return x
+    def forward(self, x):
+        return self.model(x)
+    def compute_loss(self, y, pred):
+        return avg_norm_loss(y, pred)
+    def supervise(self, input_x, y, is_training:bool)->float:
+        optimizer = self.optimizer
+        model = self.model
+        optimizer.zero_grad()
+        pred = model(input_x)
+        loss = self.compute_loss(y, pred)
+        if is_training:
+            loss.backward()
+            optimizer.step()
+        self.visualization['y']    = pred.detach()
+        self.visualization['pred'] = pred.detach()
+        return loss.item()
+    def get_visualize(self) -> OrderedDict:
+        """ Convert to visualization numpy array
+        """
+        nrows          = self.ncols
+        visualizations = self.visualization
+        ret_vis        = OrderedDict()
+        for k, v in visualizations.items():
+            batch = v.shape[0]
+            n     = min(nrows, batch)
+            plot_v = v[:n]
+            ret_vis[k] = utils.make_grid(plot_v.cpu(), nrow=nrows).numpy().transpose(1,2,0)
+        return ret_vis
+    def inference(self, x):
+        # TODO
+        pass
+    def batch_inference(self, x):
+        # TODO
+        pass
+    """ Getter & Setter
+    """
+    def get_models(self) -> dict:
+        return {'model': self.model}
+    def get_optimizers(self) -> dict:
+        return {'optimizer': self.optimizer}
+    def set_models(self, models: dict) :
+        # input test
+        if 'model' not in models.keys():
+            raise ValueError('{} not in self.model'.format('model'))
+        self.model = models['model']
+    def set_optimizers(self, optimizer: dict):
+        self.optimizer = optimizer['optimizer']
+    ####################
+    # Personal Methods #
+    ####################

weights/SSN/0000001760.pt ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:44328317fa836804554ae453fe1492a45cff724b5c13b5070211d6d860096089
+size 283511041