diff --git a/IIR-Lab/Dockerfile b/IIR-Lab/Dockerfile
new file mode 100644
index 0000000000000000000000000000000000000000..6b4c569cf70f5b73a495a84b657d839709791861
--- /dev/null
+++ b/IIR-Lab/Dockerfile
@@ -0,0 +1,17 @@
+FROM pytorch/pytorch:2.0.1-cuda11.7-cudnn8-runtime
+
+ENV TZ=Asia
+ARG DEBIAN_FRONTEND=noninteractive
+
+RUN apt-get update && apt-get install -y \
+    libpng-dev libjpeg-dev \
+    libopencv-dev ffmpeg \
+    libgl1-mesa-glx 
+
+COPY requirements.txt .
+RUN python -m pip install --upgrade pip
+RUN pip install --no-cache -r requirements.txt
+
+COPY . /nightimage
+RUN chmod +x /nightimage/run.sh
+WORKDIR /nightimage
\ No newline at end of file
diff --git a/IIR-Lab/ISP_pipeline/.gitignore b/IIR-Lab/ISP_pipeline/.gitignore
new file mode 100644
index 0000000000000000000000000000000000000000..b6e47617de110dea7ca47e087ff1347cc2646eda
--- /dev/null
+++ b/IIR-Lab/ISP_pipeline/.gitignore
@@ -0,0 +1,129 @@
+# Byte-compiled / optimized / DLL files
+__pycache__/
+*.py[cod]
+*$py.class
+
+# C extensions
+*.so
+
+# Distribution / packaging
+.Python
+build/
+develop-eggs/
+dist/
+downloads/
+eggs/
+.eggs/
+lib/
+lib64/
+parts/
+sdist/
+var/
+wheels/
+pip-wheel-metadata/
+share/python-wheels/
+*.egg-info/
+.installed.cfg
+*.egg
+MANIFEST
+
+# PyInstaller
+#  Usually these files are written by a python script from a template
+#  before PyInstaller builds the exe, so as to inject date/other infos into it.
+*.manifest
+*.spec
+
+# Installer logs
+pip-log.txt
+pip-delete-this-directory.txt
+
+# Unit test / coverage reports
+htmlcov/
+.tox/
+.nox/
+.coverage
+.coverage.*
+.cache
+nosetests.xml
+coverage.xml
+*.cover
+*.py,cover
+.hypothesis/
+.pytest_cache/
+
+# Translations
+*.mo
+*.pot
+
+# Django stuff:
+*.log
+local_settings.py
+db.sqlite3
+db.sqlite3-journal
+
+# Flask stuff:
+instance/
+.webassets-cache
+
+# Scrapy stuff:
+.scrapy
+
+# Sphinx documentation
+docs/_build/
+
+# PyBuilder
+target/
+
+# Jupyter Notebook
+.ipynb_checkpoints
+
+# IPython
+profile_default/
+ipython_config.py
+
+# pyenv
+.python-version
+
+# pipenv
+#   According to pypa/pipenv#598, it is recommended to include Pipfile.lock in version control.
+#   However, in case of collaboration, if having platform-specific dependencies or dependencies
+#   having no cross-platform support, pipenv may install dependencies that don't work, or not
+#   install all needed dependencies.
+#Pipfile.lock
+
+# PEP 582; used by e.g. github.com/David-OConnor/pyflow
+__pypackages__/
+
+# Celery stuff
+celerybeat-schedule
+celerybeat.pid
+
+# SageMath parsed files
+*.sage.py
+
+# Environments
+.env
+.venv
+env/
+venv/
+ENV/
+env.bak/
+venv.bak/
+
+# Spyder project settings
+.spyderproject
+.spyproject
+
+# Rope project settings
+.ropeproject
+
+# mkdocs documentation
+/site
+
+# mypy
+.mypy_cache/
+.dmypy.json
+dmypy.json
+
+# Pyre type checker
+.pyre/
diff --git a/IIR-Lab/ISP_pipeline/Dockerfile b/IIR-Lab/ISP_pipeline/Dockerfile
new file mode 100644
index 0000000000000000000000000000000000000000..1c92a995e40704663df77e4598a31af9b9de4256
--- /dev/null
+++ b/IIR-Lab/ISP_pipeline/Dockerfile
@@ -0,0 +1,12 @@
+FROM python:3.9
+
+RUN apt-get update && apt-get install -y \
+    libpng-dev libjpeg-dev \
+    libopencv-dev ffmpeg \
+    libgl1-mesa-glx
+
+COPY requirements.txt .
+RUN python -m pip install --no-cache -r requirements.txt
+
+COPY . /nightimaging
+WORKDIR /nightimaging
\ No newline at end of file
diff --git a/IIR-Lab/ISP_pipeline/LICENSE b/IIR-Lab/ISP_pipeline/LICENSE
new file mode 100644
index 0000000000000000000000000000000000000000..5f5ae4a8c40d61a0d26503dce7e6c1c7234a07e6
--- /dev/null
+++ b/IIR-Lab/ISP_pipeline/LICENSE
@@ -0,0 +1,21 @@
+MIT License
+
+Copyright (c) 2023 Color Reproduction and Synthesis
+
+Permission is hereby granted, free of charge, to any person obtaining a copy
+of this software and associated documentation files (the "Software"), to deal
+in the Software without restriction, including without limitation the rights
+to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
+copies of the Software, and to permit persons to whom the Software is
+furnished to do so, subject to the following conditions:
+
+The above copyright notice and this permission notice shall be included in all
+copies or substantial portions of the Software.
+
+THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
+AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
+OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+SOFTWARE.
diff --git a/IIR-Lab/ISP_pipeline/__pycache__/debayer.cpython-312.pyc b/IIR-Lab/ISP_pipeline/__pycache__/debayer.cpython-312.pyc
new file mode 100644
index 0000000000000000000000000000000000000000..62e8b3216c60b54463fe832372ac66548c9df700
Binary files /dev/null and b/IIR-Lab/ISP_pipeline/__pycache__/debayer.cpython-312.pyc differ
diff --git a/IIR-Lab/ISP_pipeline/__pycache__/debayer.cpython-39.pyc b/IIR-Lab/ISP_pipeline/__pycache__/debayer.cpython-39.pyc
new file mode 100644
index 0000000000000000000000000000000000000000..10a3f15f2a8737e9d070f18872bbe033e84369a8
Binary files /dev/null and b/IIR-Lab/ISP_pipeline/__pycache__/debayer.cpython-39.pyc differ
diff --git a/IIR-Lab/ISP_pipeline/__pycache__/imaging.cpython-312.pyc b/IIR-Lab/ISP_pipeline/__pycache__/imaging.cpython-312.pyc
new file mode 100644
index 0000000000000000000000000000000000000000..05d9736fbf0012d3ede06b9418fec800d07b4561
Binary files /dev/null and b/IIR-Lab/ISP_pipeline/__pycache__/imaging.cpython-312.pyc differ
diff --git a/IIR-Lab/ISP_pipeline/__pycache__/imaging.cpython-39.pyc b/IIR-Lab/ISP_pipeline/__pycache__/imaging.cpython-39.pyc
new file mode 100644
index 0000000000000000000000000000000000000000..0ad7f78e53609cdee6851eca2e8a5e7a7fb4e39f
Binary files /dev/null and b/IIR-Lab/ISP_pipeline/__pycache__/imaging.cpython-39.pyc differ
diff --git a/IIR-Lab/ISP_pipeline/__pycache__/process_pngs_isp.cpython-312.pyc b/IIR-Lab/ISP_pipeline/__pycache__/process_pngs_isp.cpython-312.pyc
new file mode 100644
index 0000000000000000000000000000000000000000..5f38a928c24365b967cd61cf376d9d8fd9fa1e82
Binary files /dev/null and b/IIR-Lab/ISP_pipeline/__pycache__/process_pngs_isp.cpython-312.pyc differ
diff --git a/IIR-Lab/ISP_pipeline/__pycache__/process_pngs_isp.cpython-39.pyc b/IIR-Lab/ISP_pipeline/__pycache__/process_pngs_isp.cpython-39.pyc
new file mode 100644
index 0000000000000000000000000000000000000000..1bbd415f279692aa63128c4cfdc0ec163e7e81b5
Binary files /dev/null and b/IIR-Lab/ISP_pipeline/__pycache__/process_pngs_isp.cpython-39.pyc differ
diff --git a/IIR-Lab/ISP_pipeline/__pycache__/utility.cpython-312.pyc b/IIR-Lab/ISP_pipeline/__pycache__/utility.cpython-312.pyc
new file mode 100644
index 0000000000000000000000000000000000000000..78364817de05cfcccae13dae2f93b484804f3fff
Binary files /dev/null and b/IIR-Lab/ISP_pipeline/__pycache__/utility.cpython-312.pyc differ
diff --git a/IIR-Lab/ISP_pipeline/__pycache__/utility.cpython-39.pyc b/IIR-Lab/ISP_pipeline/__pycache__/utility.cpython-39.pyc
new file mode 100644
index 0000000000000000000000000000000000000000..3a0cc3db3952e096dc802ad15c4ee62debaa2c3d
Binary files /dev/null and b/IIR-Lab/ISP_pipeline/__pycache__/utility.cpython-39.pyc differ
diff --git a/IIR-Lab/ISP_pipeline/cfa_pattern_change.py b/IIR-Lab/ISP_pipeline/cfa_pattern_change.py
new file mode 100644
index 0000000000000000000000000000000000000000..abbd1d883677af0d6752c411506b1100e8af2df7
--- /dev/null
+++ b/IIR-Lab/ISP_pipeline/cfa_pattern_change.py
@@ -0,0 +1,74 @@
+import numpy as np
+import os
+import json
+import cv2
+
+def change_cfa_pattern(img):
+    raw_colors = np.asarray([3,1,2,0]).reshape((2, 2))
+    changed_raw_colors = np.asarray([0,1,2,3]).reshape((2, 2))
+    demosaiced_image = np.zeros((img.shape[0]//2, img.shape[1]//2, 4))
+    for i in range(2):
+        for j in range(2):
+            ch = raw_colors[i, j]
+            demosaiced_image[:, :, ch] = img[i::2, j::2]
+    for i in range(2):
+        for j in range(2):
+            ch1 = changed_raw_colors[i, j]
+            img[i::2, j::2] = demosaiced_image[:, :, ch1]
+
+    return img
+
+def rggb_raw(raw):
+    # pack RGGB Bayer raw to 4 channels
+    H, W = raw.shape
+    raw = raw[None, ...]
+    raw_pack = np.concatenate((raw[:, 0:H:2, 0:W:2],
+                               raw[:, 0:H:2, 1:W:2],
+                               raw[:, 1:H:2, 0:W:2],
+                               raw[:, 1:H:2, 1:W:2]), axis=0)
+    # tmp = rggb[...,0]
+    # rggb[...,0] = rggb[...,-1]
+    # rggb[...,-1] = tmp
+    return raw_pack
+
+def raw_rggb(raws):
+    # depack 4 channels raw to RGGB Bayer
+    C, H, W = raws.shape
+    output = np.zeros((H * 2, W * 2)).astype(np.uint16)
+
+    output[0:2 * H:2, 0:2 * W:2] = raws[0:1, :, :]
+    output[0:2 * H:2, 1:2 * W:2] = raws[1:2, :, :]
+    output[1:2 * H:2, 0:2 * W:2] = raws[2:3, :, :]
+    output[1:2 * H:2, 1:2 * W:2] = raws[3:4, :, :]
+
+    return output
+
+if __name__ == "__main__":
+    json_path = "/data1/02_data/Train_Data/"
+    file_name = os.listdir(json_path)
+    json_list = []
+    for file_name_all in file_name:
+        if file_name_all.endswith(".json"):
+            json_list.append(json_path+file_name_all)
+    a = []
+    for i in range(len(json_list)):
+        with open(json_list[i],'r',encoding='UTF-8') as f:
+            result = json.load(f)
+            # a,b = result["noise_profile"]
+            # black = result["white_level"]
+            cfa_pattern = result["cfa_pattern"]
+            if cfa_pattern[0] == 2:
+                a.append(json_list[i])
+    for j in range(len(a)):
+        pic_name,_ = os.path.splitext(a[j])
+        img = cv2.imread(pic_name+str(".png"), cv2.IMREAD_UNCHANGED)
+        # img1 = cv2.imread(pic_name+".png", cv2.IMREAD_UNCHANGED)
+        # test = img - img1
+        # print(test)
+        changed_img = change_cfa_pattern(img=img)
+        # cv2.imwrite(pic_name+"test1.png",changed_img)
+        np.save(pic_name+"origin.npy",img)
+        np.save(pic_name+"changed.npy",changed_img)
+    # np.save("./json_all.npy",result)
+    
+    
\ No newline at end of file
diff --git a/IIR-Lab/ISP_pipeline/debayer.py b/IIR-Lab/ISP_pipeline/debayer.py
new file mode 100644
index 0000000000000000000000000000000000000000..5e9855b53c633c5671a39d1042ec70d2dbeb80da
--- /dev/null
+++ b/IIR-Lab/ISP_pipeline/debayer.py
@@ -0,0 +1,1295 @@
+import numpy as np
+import math
+import time
+import utility
+from scipy import signal
+
+# =============================================================
+# function: dbayer_mhc
+#   demosaicing using Malvar-He-Cutler algorithm
+#   http://www.ipol.im/pub/art/2011/g_mhcd/
+# =============================================================
+def debayer_mhc(raw, bayer_pattern="rggb", clip_range=[0, 65535], timeshow=False):
+
+    # convert to float32 in case it was not
+    raw = np.float32(raw)
+
+    # dimensions
+    width, height = utility.helpers(raw).get_width_height()
+
+    # number of pixels to pad
+    no_of_pixel_pad = 2
+    raw = np.pad(raw, \
+                 (no_of_pixel_pad, no_of_pixel_pad),\
+                 'reflect') # reflect would not repeat the border value
+
+    # allocate space for the R, G, B planes
+    R = np.empty( (height + no_of_pixel_pad * 2, width + no_of_pixel_pad * 2), dtype = np.float32 )
+    G = np.empty( (height + no_of_pixel_pad * 2, width + no_of_pixel_pad * 2), dtype = np.float32 )
+    B = np.empty( (height + no_of_pixel_pad * 2, width + no_of_pixel_pad * 2), dtype = np.float32 )
+
+    # create a RGB output
+    demosaic_out = np.empty( (height, width, 3), dtype = np.float32 )
+
+    # fill up the directly available values according to the Bayer pattern
+    if (bayer_pattern == "rggb"):
+
+        G[::2, 1::2]  = raw[::2, 1::2]
+        G[1::2, ::2]  = raw[1::2, ::2]
+        R[::2, ::2]   = raw[::2, ::2]
+        B[1::2, 1::2] = raw[1::2, 1::2]
+
+        # Green channel
+        for i in range(no_of_pixel_pad, height + no_of_pixel_pad):
+
+            # to display progress
+            t0 = time.process_time()
+
+            for j in range(no_of_pixel_pad, width + no_of_pixel_pad):
+
+                # G at Red location
+                if (((i % 2) == 0) and ((j % 2) == 0)):
+                    G[i, j] = 0.125 * np.sum([-1. * R[i-2, j], \
+                    2. * G[i-1, j], \
+                    -1. * R[i, j-2], 2. * G[i, j-1], 4. * R[i,j], 2. * G[i, j+1], -1. * R[i, j+2],\
+                    2. * G[i+1, j], \
+                    -1. * R[i+2, j]])
+                # G at Blue location
+                elif (((i % 2) != 0) and ((j % 2) != 0)):
+                    G[i, j] = 0.125 * np.sum([-1. * B[i-2, j], \
+                    2. * G[i-1, j], \
+                    -1. * B[i, j-2], 2. * G[i, j-1], 4. * B[i,j], 2. * G[i, j+1], -1. * B[i, j+2], \
+                    2. * G[i+1, j],\
+                    -1. * B[i+2, j]])
+            if (timeshow):
+                elapsed_time = time.process_time() - t0
+                print("Green: row index: " + str(i-1) + " of " + str(height) + \
+                      " | elapsed time: " + "{:.3f}".format(elapsed_time) + " seconds")
+
+        # Red and Blue channel
+        for i in range(no_of_pixel_pad, height + no_of_pixel_pad):
+
+            # to display progress
+            t0 = time.process_time()
+
+            for j in range(no_of_pixel_pad, width + no_of_pixel_pad):
+
+                # Green locations in Red rows
+                if (((i % 2) == 0) and ((j % 2) != 0)):
+                    # R at Green locations in Red rows
+                    R[i, j] = 0.125 * np.sum([.5 * G[i-2, j],\
+                     -1. * G[i-1, j-1], -1. * G[i-1, j+1], \
+                     -1. * G[i, j-2], 4. * R[i, j-1], 5. * G[i,j], 4. * R[i, j+1], -1. * G[i, j+2], \
+                     -1. * G[i+1, j-1], -1. * G[i+1, j+1], \
+                      .5 * G[i+2, j]])
+
+                    # B at Green locations in Red rows
+                    B[i, j] = 0.125 * np.sum([-1. * G[i-2, j], \
+                    -1. * G[i-1, j-1], 4. * B[i-1, j], -1. * G[i-1, j+1], \
+                    .5 * G[i, j-2], 5. * G[i,j], .5 * G[i, j+2], \
+                    -1. * G[i+1, j-1], 4. * B[i+1,j],  -1. * G[i+1, j+1], \
+                    -1. * G[i+2, j]])
+
+                # Green locations in Blue rows
+                elif (((i % 2) != 0) and ((j % 2) == 0)):
+
+                    # R at Green locations in Blue rows
+                    R[i, j] = 0.125 * np.sum([-1. * G[i-2, j], \
+                    -1. * G[i-1, j-1], 4. * R[i-1, j], -1. * G[i-1, j+1], \
+                    .5 * G[i, j-2], 5. * G[i,j], .5 * G[i, j+2], \
+                    -1. * G[i+1, j-1], 4. * R[i+1, j],  -1. * G[i+1, j+1], \
+                    -1. * G[i+2, j]])
+
+                    # B at Green locations in Blue rows
+                    B[i, j] = 0.125 * np.sum([.5 * G[i-2, j], \
+                    -1. * G [i-1, j-1], -1. * G[i-1, j+1], \
+                    -1. * G[i, j-2], 4. * B[i, j-1], 5. * G[i,j], 4. * B[i, j+1], -1. * G[i, j+2], \
+                    -1. * G[i+1, j-1], -1. * G[i+1, j+1], \
+                    .5 * G[i+2, j]])
+
+                # R at Blue locations
+                elif (((i % 2) != 0) and ((j % 2) != 0)):
+                    R[i, j] = 0.125 * np.sum([-1.5 * B[i-2, j], \
+                    2. * R[i-1, j-1], 2. * R[i-1, j+1], \
+                    -1.5 * B[i, j-2], 6. * B[i,j], -1.5 * B[i, j+2], \
+                    2. * R[i+1, j-1], 2. * R[i+1, j+1], \
+                    -1.5 * B[i+2, j]])
+
+                # B at Red locations
+                elif (((i % 2) == 0) and ((j % 2) == 0)):
+                    B[i, j] = 0.125 * np.sum([-1.5 * R[i-2, j], \
+                    2. * B[i-1, j-1], 2. * B[i-1, j+1], \
+                    -1.5 * R[i, j-2], 6. * R[i,j], -1.5 * R[i, j+2], \
+                    2. * B[i+1, j-1], 2. * B[i+1, j+1], \
+                    -1.5 * R[i+2, j]])
+
+            if (timeshow):
+                elapsed_time = time.process_time() - t0
+                print("Red/Blue: row index: " + str(i-1) + " of " + str(height) + \
+                      " | elapsed time: " + "{:.3f}".format(elapsed_time) + " seconds")
+
+
+    elif (bayer_pattern == "gbrg"):
+
+        G[::2, ::2]   = raw[::2, ::2]
+        G[1::2, 1::2] = raw[1::2, 1::2]
+        R[1::2, ::2]  = raw[1::2, ::2]
+        B[::2, 1::2]  = raw[::2, 1::2]
+
+        # Green channel
+        for i in range(no_of_pixel_pad, height + no_of_pixel_pad):
+
+            # to display progress
+            t0 = time.process_time()
+
+            for j in range(no_of_pixel_pad, width + no_of_pixel_pad):
+
+                # G at Red location
+                if (((i % 2) != 0) and ((j % 2) == 0)):
+                    G[i, j] = 0.125 * np.sum([-1. * R[i-2, j], \
+                    2. * G[i-1, j], \
+                    -1. * R[i, j-2], 2. * G[i, j-1], 4. * R[i,j], 2. * G[i, j+1], -1. * R[i, j+2],\
+                    2. * G[i+1, j], \
+                    -1. * R[i+2, j]])
+                # G at Blue location
+                elif (((i % 2) == 0) and ((j % 2) != 0)):
+                    G[i, j] = 0.125 * np.sum([-1. * B[i-2, j], \
+                    2. * G[i-1, j], \
+                    -1. * B[i, j-2], 2. * G[i, j-1], 4. * B[i,j], 2. * G[i, j+1], -1. * B[i, j+2], \
+                    2. * G[i+1, j],\
+                    -1. * B[i+2, j]])
+            if (timeshow):
+                elapsed_time = time.process_time() - t0
+                print("Green: row index: " + str(i-1) + " of " + str(height) + \
+                      " | elapsed time: " + "{:.3f}".format(elapsed_time) + " seconds")
+
+        # Red and Blue channel
+        for i in range(no_of_pixel_pad, height + no_of_pixel_pad):
+
+            # to display progress
+            t0 = time.process_time()
+
+            for j in range(no_of_pixel_pad, width + no_of_pixel_pad):
+
+                # Green locations in Red rows
+                if (((i % 2) != 0) and ((j % 2) != 0)):
+                    # R at Green locations in Red rows
+                    R[i, j] = 0.125 * np.sum([.5 * G[i-2, j],\
+                     -1. * G[i-1, j-1], -1. * G[i-1, j+1], \
+                     -1. * G[i, j-2], 4. * R[i, j-1], 5. * G[i,j], 4. * R[i, j+1], -1. * G[i, j+2], \
+                     -1. * G[i+1, j-1], -1. * G[i+1, j+1], \
+                      .5 * G[i+2, j]])
+
+                    # B at Green locations in Red rows
+                    B[i, j] = 0.125 * np.sum([-1. * G[i-2, j], \
+                    -1. * G[i-1, j-1], 4. * B[i-1, j], -1. * G[i-1, j+1], \
+                    .5 * G[i, j-2], 5. * G[i,j], .5 * G[i, j+2], \
+                    -1. * G[i+1, j-1], 4. * B[i+1,j],  -1. * G[i+1, j+1], \
+                    -1. * G[i+2, j]])
+
+                # Green locations in Blue rows
+                elif (((i % 2) == 0) and ((j % 2) == 0)):
+
+                    # R at Green locations in Blue rows
+                    R[i, j] = 0.125 * np.sum([-1. * G[i-2, j], \
+                    -1. * G[i-1, j-1], 4. * R[i-1, j], -1. * G[i-1, j+1], \
+                    .5 * G[i, j-2], 5. * G[i,j], .5 * G[i, j+2], \
+                    -1. * G[i+1, j-1], 4. * R[i+1, j],  -1. * G[i+1, j+1], \
+                    -1. * G[i+2, j]])
+
+                    # B at Green locations in Blue rows
+                    B[i, j] = 0.125 * np.sum([.5 * G[i-2, j], \
+                    -1. * G [i-1, j-1], -1. * G[i-1, j+1], \
+                    -1. * G[i, j-2], 4. * B[i, j-1], 5. * G[i,j], 4. * B[i, j+1], -1. * G[i, j+2], \
+                    -1. * G[i+1, j-1], -1. * G[i+1, j+1], \
+                    .5 * G[i+2, j]])
+
+                # R at Blue locations
+                elif (((i % 2) == 0) and ((j % 2) != 0)):
+                    R[i, j] = 0.125 * np.sum([-1.5 * B[i-2, j], \
+                    2. * R[i-1, j-1], 2. * R[i-1, j+1], \
+                    -1.5 * B[i, j-2], 6. * B[i,j], -1.5 * B[i, j+2], \
+                    2. * R[i+1, j-1], 2. * R[i+1, j+1], \
+                    -1.5 * B[i+2, j]])
+
+                # B at Red locations
+                elif (((i % 2) != 0) and ((j % 2) == 0)):
+                    B[i, j] = 0.125 * np.sum([-1.5 * R[i-2, j], \
+                    2. * B[i-1, j-1], 2. * B[i-1, j+1], \
+                    -1.5 * R[i, j-2], 6. * R[i,j], -1.5 * R[i, j+2], \
+                    2. * B[i+1, j-1], 2. * B[i+1, j+1], \
+                    -1.5 * R[i+2, j]])
+
+            if (timeshow):
+                elapsed_time = time.process_time() - t0
+                print("Red/Blue: row index: " + str(i-1) + " of " + str(height) + \
+                      " | elapsed time: " + "{:.3f}".format(elapsed_time) + " seconds")
+
+    elif (bayer_pattern == "grbg"):
+
+        G[::2, ::2]   = raw[::2, ::2]
+        G[1::2, 1::2] = raw[1::2, 1::2]
+        R[::2, 1::2]  = raw[::2, 1::2]
+        B[1::2, ::2]  = raw[1::2, ::2]
+
+        # Green channel
+        for i in range(no_of_pixel_pad, height + no_of_pixel_pad):
+
+            # to display progress
+            t0 = time.process_time()
+
+            for j in range(no_of_pixel_pad, width + no_of_pixel_pad):
+
+                # G at Red location
+                if (((i % 2) == 0) and ((j % 2) != 0)):
+                    G[i, j] = 0.125 * np.sum([-1. * R[i-2, j], \
+                    2. * G[i-1, j], \
+                    -1. * R[i, j-2], 2. * G[i, j-1], 4. * R[i,j], 2. * G[i, j+1], -1. * R[i, j+2],\
+                    2. * G[i+1, j], \
+                    -1. * R[i+2, j]])
+                # G at Blue location
+                elif (((i % 2) != 0) and ((j % 2) == 0)):
+                    G[i, j] = 0.125 * np.sum([-1. * B[i-2, j], \
+                    2. * G[i-1, j], \
+                    -1. * B[i, j-2], 2. * G[i, j-1], 4. * B[i,j], 2. * G[i, j+1], -1. * B[i, j+2], \
+                    2. * G[i+1, j],\
+                    -1. * B[i+2, j]])
+            if (timeshow):
+                elapsed_time = time.process_time() - t0
+                print("Green: row index: " + str(i-1) + " of " + str(height) + \
+                      " | elapsed time: " + "{:.3f}".format(elapsed_time) + " seconds")
+
+        # Red and Blue channel
+        for i in range(no_of_pixel_pad, height + no_of_pixel_pad):
+
+            # to display progress
+            t0 = time.process_time()
+
+            for j in range(no_of_pixel_pad, width + no_of_pixel_pad):
+
+                # Green locations in Red rows
+                if (((i % 2) == 0) and ((j % 2) == 0)):
+                    # R at Green locations in Red rows
+                    R[i, j] = 0.125 * np.sum([.5 * G[i-2, j],\
+                     -1. * G[i-1, j-1], -1. * G[i-1, j+1], \
+                     -1. * G[i, j-2], 4. * R[i, j-1], 5. * G[i,j], 4. * R[i, j+1], -1. * G[i, j+2], \
+                     -1. * G[i+1, j-1], -1. * G[i+1, j+1], \
+                      .5 * G[i+2, j]])
+
+                    # B at Green locations in Red rows
+                    B[i, j] = 0.125 * np.sum([-1. * G[i-2, j], \
+                    -1. * G[i-1, j-1], 4. * B[i-1, j], -1. * G[i-1, j+1], \
+                    .5 * G[i, j-2], 5. * G[i,j], .5 * G[i, j+2], \
+                    -1. * G[i+1, j-1], 4. * B[i+1,j],  -1. * G[i+1, j+1], \
+                    -1. * G[i+2, j]])
+
+                # Green locations in Blue rows
+                elif (((i % 2) != 0) and ((j % 2) != 0)):
+
+                    # R at Green locations in Blue rows
+                    R[i, j] = 0.125 * np.sum([-1. * G[i-2, j], \
+                    -1. * G[i-1, j-1], 4. * R[i-1, j], -1. * G[i-1, j+1], \
+                    .5 * G[i, j-2], 5. * G[i,j], .5 * G[i, j+2], \
+                    -1. * G[i+1, j-1], 4. * R[i+1, j],  -1. * G[i+1, j+1], \
+                    -1. * G[i+2, j]])
+
+                    # B at Green locations in Blue rows
+                    B[i, j] = 0.125 * np.sum([.5 * G[i-2, j], \
+                    -1. * G [i-1, j-1], -1. * G[i-1, j+1], \
+                    -1. * G[i, j-2], 4. * B[i, j-1], 5. * G[i,j], 4. * B[i, j+1], -1. * G[i, j+2], \
+                    -1. * G[i+1, j-1], -1. * G[i+1, j+1], \
+                    .5 * G[i+2, j]])
+
+                # R at Blue locations
+                elif (((i % 2) != 0) and ((j % 2) == 0)):
+                    R[i, j] = 0.125 * np.sum([-1.5 * B[i-2, j], \
+                    2. * R[i-1, j-1], 2. * R[i-1, j+1], \
+                    -1.5 * B[i, j-2], 6. * B[i,j], -1.5 * B[i, j+2], \
+                    2. * R[i+1, j-1], 2. * R[i+1, j+1], \
+                    -1.5 * B[i+2, j]])
+
+                # B at Red locations
+                elif (((i % 2) == 0) and ((j % 2) != 0)):
+                    B[i, j] = 0.125 * np.sum([-1.5 * R[i-2, j], \
+                    2. * B[i-1, j-1], 2. * B[i-1, j+1], \
+                    -1.5 * R[i, j-2], 6. * R[i,j], -1.5 * R[i, j+2], \
+                    2. * B[i+1, j-1], 2. * B[i+1, j+1], \
+                    -1.5 * R[i+2, j]])
+
+            if (timeshow):
+                elapsed_time = time.process_time() - t0
+                print("Red/Blue: row index: " + str(i-1) + " of " + str(height) + \
+                      " | elapsed time: " + "{:.3f}".format(elapsed_time) + " seconds")
+
+    elif (bayer_pattern == "bggr"):
+
+        G[::2, 1::2]  = raw[::2, 1::2]
+        G[1::2, ::2]  = raw[1::2, ::2]
+        R[1::2, 1::2] = raw[1::2, 1::2]
+        B[::2, ::2]   = raw[::2, ::2]
+
+        # Green channel
+        for i in range(no_of_pixel_pad, height + no_of_pixel_pad):
+
+            # to display progress
+            t0 = time.process_time()
+
+            for j in range(no_of_pixel_pad, width + no_of_pixel_pad):
+
+                # G at Red location
+                if (((i % 2) != 0) and ((j % 2) != 0)):
+                    G[i, j] = 0.125 * np.sum([-1. * R[i-2, j], \
+                    2. * G[i-1, j], \
+                    -1. * R[i, j-2], 2. * G[i, j-1], 4. * R[i,j], 2. * G[i, j+1], -1. * R[i, j+2],\
+                    2. * G[i+1, j], \
+                    -1. * R[i+2, j]])
+                # G at Blue location
+                elif (((i % 2) == 0) and ((j % 2) == 0)):
+                    G[i, j] = 0.125 * np.sum([-1. * B[i-2, j], \
+                    2. * G[i-1, j], \
+                    -1. * B[i, j-2], 2. * G[i, j-1], 4. * B[i,j], 2. * G[i, j+1], -1. * B[i, j+2], \
+                    2. * G[i+1, j],\
+                    -1. * B[i+2, j]])
+            if (timeshow):
+                elapsed_time = time.process_time() - t0
+                print("Green: row index: " + str(i-1) + " of " + str(height) + \
+                      " | elapsed time: " + "{:.3f}".format(elapsed_time) + " seconds")
+
+        # Red and Blue channel
+        for i in range(no_of_pixel_pad, height + no_of_pixel_pad):
+
+            # to display progress
+            t0 = time.process_time()
+
+            for j in range(no_of_pixel_pad, width + no_of_pixel_pad):
+
+                # Green locations in Red rows
+                if (((i % 2) != 0) and ((j % 2) == 0)):
+                    # R at Green locations in Red rows
+                    R[i, j] = 0.125 * np.sum([.5 * G[i-2, j],\
+                     -1. * G[i-1, j-1], -1. * G[i-1, j+1], \
+                     -1. * G[i, j-2], 4. * R[i, j-1], 5. * G[i,j], 4. * R[i, j+1], -1. * G[i, j+2], \
+                     -1. * G[i+1, j-1], -1. * G[i+1, j+1], \
+                      .5 * G[i+2, j]])
+
+                    # B at Green locations in Red rows
+                    B[i, j] = 0.125 * np.sum([-1. * G[i-2, j], \
+                    -1. * G[i-1, j-1], 4. * B[i-1, j], -1. * G[i-1, j+1], \
+                    .5 * G[i, j-2], 5. * G[i,j], .5 * G[i, j+2], \
+                    -1. * G[i+1, j-1], 4. * B[i+1,j],  -1. * G[i+1, j+1], \
+                    -1. * G[i+2, j]])
+
+                # Green locations in Blue rows
+                elif (((i % 2) == 0) and ((j % 2) != 0)):
+
+                    # R at Green locations in Blue rows
+                    R[i, j] = 0.125 * np.sum([-1. * G[i-2, j], \
+                    -1. * G[i-1, j-1], 4. * R[i-1, j], -1. * G[i-1, j+1], \
+                    .5 * G[i, j-2], 5. * G[i,j], .5 * G[i, j+2], \
+                    -1. * G[i+1, j-1], 4. * R[i+1, j],  -1. * G[i+1, j+1], \
+                    -1. * G[i+2, j]])
+
+                    # B at Green locations in Blue rows
+                    B[i, j] = 0.125 * np.sum([.5 * G[i-2, j], \
+                    -1. * G [i-1, j-1], -1. * G[i-1, j+1], \
+                    -1. * G[i, j-2], 4. * B[i, j-1], 5. * G[i,j], 4. * B[i, j+1], -1. * G[i, j+2], \
+                    -1. * G[i+1, j-1], -1. * G[i+1, j+1], \
+                    .5 * G[i+2, j]])
+
+                # R at Blue locations
+                elif (((i % 2) == 0) and ((j % 2) == 0)):
+                    R[i, j] = 0.125 * np.sum([-1.5 * B[i-2, j], \
+                    2. * R[i-1, j-1], 2. * R[i-1, j+1], \
+                    -1.5 * B[i, j-2], 6. * B[i,j], -1.5 * B[i, j+2], \
+                    2. * R[i+1, j-1], 2. * R[i+1, j+1], \
+                    -1.5 * B[i+2, j]])
+
+                # B at Red locations
+                elif (((i % 2) != 0) and ((j % 2) != 0)):
+                    B[i, j] = 0.125 * np.sum([-1.5 * R[i-2, j], \
+                    2. * B[i-1, j-1], 2. * B[i-1, j+1], \
+                    -1.5 * R[i, j-2], 6. * R[i,j], -1.5 * R[i, j+2], \
+                    2. * B[i+1, j-1], 2. * B[i+1, j+1], \
+                    -1.5 * R[i+2, j]])
+
+            if (timeshow):
+                elapsed_time = time.process_time() - t0
+                print("Red/Blue: row index: " + str(i-1) + " of " + str(height) + \
+                      " | elapsed time: " + "{:.3f}".format(elapsed_time) + " seconds")
+
+    else:
+        print("Invalid bayer pattern. Valid pattern can be rggb, gbrg, grbg, bggr")
+        return demosaic_out # This will be all zeros
+
+    # Fill up the RGB output with interpolated values
+    demosaic_out[0:height, 0:width, 0] = R[no_of_pixel_pad : height + no_of_pixel_pad, \
+                                           no_of_pixel_pad : width + no_of_pixel_pad]
+    demosaic_out[0:height, 0:width, 1] = G[no_of_pixel_pad : height + no_of_pixel_pad, \
+                                           no_of_pixel_pad : width + no_of_pixel_pad]
+    demosaic_out[0:height, 0:width, 2] = B[no_of_pixel_pad : height + no_of_pixel_pad, \
+                                           no_of_pixel_pad : width + no_of_pixel_pad]
+
+    demosaic_out = np.clip(demosaic_out, clip_range[0], clip_range[1])
+    return demosaic_out
+
+
+def fill_channel_directional_weight(data, bayer_pattern):
+
+    #== Calculate the directional weights (weight_N, weight_E, weight_S, weight_W.
+    # where N, E, S, W stand for north, east, south, and west.)
+    data = np.asarray(data)
+    v = np.asarray(signal.convolve2d(data, [[1],[0],[-1]], mode="same", boundary="symm"))
+    h = np.asarray(signal.convolve2d(data, [[1, 0, -1]], mode="same", boundary="symm"))
+
+    weight_N = np.zeros(np.shape(data), dtype=np.float32)
+    weight_E = np.zeros(np.shape(data), dtype=np.float32)
+    weight_S = np.zeros(np.shape(data), dtype=np.float32)
+    weight_W = np.zeros(np.shape(data), dtype=np.float32)
+
+    value_N = np.zeros(np.shape(data), dtype=np.float32)
+    value_E = np.zeros(np.shape(data), dtype=np.float32)
+    value_S = np.zeros(np.shape(data), dtype=np.float32)
+    value_W = np.zeros(np.shape(data), dtype=np.float32)
+
+    if ((bayer_pattern == "rggb") or (bayer_pattern == "bggr")):
+
+
+        # note that in the following the locations in the comments are given
+        # assuming the bayer_pattern rggb
+
+        #== CALCULATE WEIGHTS IN B LOCATIONS
+        weight_N[1::2, 1::2] = np.abs(v[1::2, 1::2]) + np.abs(v[::2, 1::2])
+
+        # repeating the column before the last to the right so that sampling
+        # does not cause any dimension mismatch
+        temp_h_b = np.hstack((h, np.atleast_2d(h[:, -2]).T))
+        weight_E[1::2, 1::2] = np.abs(h[1::2, 1::2]) + np.abs(temp_h_b[1::2, 2::2])
+
+        # repeating the row before the last row to the bottom so that sampling
+        # does not cause any dimension mismatch
+        temp_v_b = np.vstack((v, v[-1]))
+        weight_S[1::2, 1::2] = np.abs(v[1::2, 1::2]) + np.abs(temp_v_b[2::2, 1::2])
+        weight_W[1::2, 1::2] = np.abs(h[1::2, 1::2]) + np.abs(h[1::2, ::2])
+
+        #== CALCULATE WEIGHTS IN R LOCATIONS
+        # repeating the second row at the top of matrix so that sampling does
+        # not cause any dimension mismatch, also remove the bottom row
+        temp_v_r = np.delete(np.vstack((v[1], v)), -1, 0)
+        weight_N[::2, ::2] = np.abs(v[::2, ::2]) + np.abs(temp_v_r[::2, ::2])
+
+        weight_E[::2, ::2] = np.abs(h[::2, ::2]) + np.abs(h[::2, 1::2])
+
+        weight_S[::2, ::2] = np.abs(v[::2, ::2]) + np.abs(v[1::2, ::2])
+
+        # repeating the second column at the left of matrix so that sampling
+        # does not cause any dimension mismatch, also remove the rightmost
+        # column
+        temp_h_r = np.delete(np.hstack((np.atleast_2d(h[:, 1]).T, h)), -1, 1)
+        weight_W[::2, ::2] = np.abs(h[::2, ::2]) + np.abs(temp_h_r[::2, ::2])
+
+        weight_N = np.divide(1., 1. + weight_N)
+        weight_E = np.divide(1., 1. + weight_E)
+        weight_S = np.divide(1., 1. + weight_S)
+        weight_W = np.divide(1., 1. + weight_W)
+
+        #== CALCULATE DIRECTIONAL ESTIMATES IN B LOCATIONS
+        value_N[1::2, 1::2] = data[::2, 1::2] + v[::2, 1::2] / 2.
+
+        # repeating the column before the last to the right so that sampling
+        # does not cause any dimension mismatch
+        temp = np.hstack((data, np.atleast_2d(data[:, -2]).T))
+        value_E[1::2, 1::2] = temp[1::2, 2::2] - temp_h_b[1::2, 2::2] / 2.
+
+        # repeating the row before the last row to the bottom so that sampling
+        # does not cause any dimension mismatch
+        temp = np.vstack((data, data[-1]))
+        value_S[1::2, 1::2] = temp[2::2, 1::2] - temp_v_b[2::2, 1::2] / 2.
+
+        value_W[1::2, 1::2] = data[1::2, ::2] + h[1::2, ::2] / 2.
+
+        #== CALCULATE DIRECTIONAL ESTIMATES IN R LOCATIONS
+        # repeating the second row at the top of matrix so that sampling does
+        # not cause any dimension mismatch, also remove the bottom row
+        temp = np.delete(np.vstack((data[1], data)), -1, 0)
+        value_N[::2, ::2] = temp[::2, ::2] + temp_v_r[::2, ::2] / 2.
+
+        value_E[::2, ::2] = data[::2, 1::2] - h[::2, 1::2] / 2.
+
+        value_S[::2, ::2] = data[1::2, ::2] - v[1::2, ::2] / 2.
+
+        # repeating the second column at the left of matrix so that sampling
+        # does not cause any dimension mismatch, also remove the rightmost
+        # column
+        temp = np.delete(np.hstack((np.atleast_2d(data[:, 1]).T, data)), -1, 1)
+        value_W[::2, ::2] = temp[::2, ::2] + temp_h_r[::2, ::2] / 2.
+
+        output = np.zeros(np.shape(data), dtype=np.float32)
+        output = np.divide((np.multiply(value_N, weight_N) + \
+                            np.multiply(value_E, weight_E) + \
+                            np.multiply(value_S, weight_S) + \
+                            np.multiply(value_W, weight_W)),\
+                            (weight_N + weight_E + weight_S + weight_W))
+
+        output[::2, 1::2] = data[::2, 1::2]
+        output[1::2, ::2] = data[1::2, ::2]
+
+        return output
+
+    elif ((bayer_pattern == "gbrg") or (bayer_pattern == "grbg")):
+
+        # note that in the following the locations in the comments are given
+        # assuming the bayer_pattern gbrg
+
+        #== CALCULATE WEIGHTS IN B LOCATIONS
+        # repeating the second row at the top of matrix so that sampling does
+        # not cause any dimension mismatch, also remove the bottom row
+        temp_v_b = np.delete(np.vstack((v[1], v)), -1, 0)
+        weight_N[::2, 1::2] = np.abs(v[::2, 1::2]) + np.abs(temp_v_b[::2, 1::2])
+
+        # repeating the column before the last to the right so that sampling
+        # does not cause any dimension mismatch
+        temp_h_b = np.hstack((h, np.atleast_2d(h[:, -2]).T))
+        weight_E[::2, 1::2] = np.abs(h[::2, 1::2]) + np.abs(temp_h_b[::2, 2::2])
+
+        # repeating the row before the last row to the bottom so that sampling
+        # does not cause any dimension mismatch
+        weight_S[::2, 1::2] = np.abs(v[::2, 1::2]) + np.abs(v[1::2, 1::2])
+        weight_W[::2, 1::2] = np.abs(h[::2, 1::2]) + np.abs(h[::2, ::2])
+
+        #== CALCULATE WEIGHTS IN R LOCATIONS
+        weight_N[1::2, ::2] = np.abs(v[1::2, ::2]) + np.abs(v[::2, ::2])
+        weight_E[1::2, ::2] = np.abs(h[1::2, ::2]) + np.abs(h[1::2, 1::2])
+
+        # repeating the row before the last row to the bottom so that sampling
+        # does not cause any dimension mismatch
+        temp_v_r = np.vstack((v, v[-1]))
+        weight_S[1::2, ::2] = np.abs(v[1::2, ::2]) + np.abs(temp_v_r[2::2, ::2])
+
+        # repeating the second column at the left of matrix so that sampling
+        # does not cause any dimension mismatch, also remove the rightmost
+        # column
+        temp_h_r = np.delete(np.hstack((np.atleast_2d(h[:, 1]).T, h)), -1, 1)
+        weight_W[1::2, ::2] = np.abs(h[1::2, ::2]) + np.abs(temp_h_r[1::2, ::2])
+
+        weight_N = np.divide(1., 1. + weight_N)
+        weight_E = np.divide(1., 1. + weight_E)
+        weight_S = np.divide(1., 1. + weight_S)
+        weight_W = np.divide(1., 1. + weight_W)
+
+        #== CALCULATE DIRECTIONAL ESTIMATES IN B LOCATIONS
+        # repeating the second row at the top of matrix so that sampling does
+        # not cause any dimension mismatch, also remove the bottom row
+        temp = np.delete(np.vstack((data[1], data)), -1, 0)
+        value_N[::2, 1::2] = temp[::2, 1::2] + temp_v_b[::2, 1::2] / 2.
+
+        # repeating the column before the last to the right so that sampling
+        # does not cause any dimension mismatch
+        temp = np.hstack((data, np.atleast_2d(data[:, -2]).T))
+        value_E[::2, 1::2] = temp[::2, 2::2] - temp_h_b[::2, 2::2] / 2.
+
+        # repeating the row before the last row to the bottom so that sampling
+        # does not cause any dimension mismatch
+        value_S[::2, 1::2] = data[1::2, 1::2] - v[1::2, 1::2] / 2.
+
+        value_W[::2, 1::2] = data[::2, ::2] + h[::2, ::2] / 2.
+
+        #== CALCULATE DIRECTIONAL ESTIMATES IN R LOCATIONS
+        # repeating the second row at the top of matrix so that sampling does
+        # not cause any dimension mismatch, also remove the bottom row
+        value_N[1::2, ::2] = data[::2, ::2] + v[::2, ::2] / 2.
+        value_E[1::2, ::2] = data[1::2, 1::2] - h[1::2, 1::2] / 2.
+
+        # repeating the row before the last row to the bottom so that sampling
+        # does not cause any dimension mismatch
+        temp = np.vstack((data, data[-1]))
+        value_S[1::2, ::2] = temp[2::2, ::2] - temp_v_r[2::2, ::2] / 2.
+
+        # repeating the second column at the left of matrix so that sampling
+        # does not cause any dimension mismatch, also remove the rightmost
+        # column
+        temp = np.delete(np.hstack((np.atleast_2d(data[:, 1]).T, data)), -1, 1)
+        value_W[1::2, ::2] = temp[1::2, ::2] + temp_h_r[1::2, ::2] / 2.
+
+        output = np.zeros(np.shape(data), dtype=np.float32)
+        output = np.divide((np.multiply(value_N, weight_N) + \
+                            np.multiply(value_E, weight_E) + \
+                            np.multiply(value_S, weight_S) + \
+                            np.multiply(value_W, weight_W)),\
+                            (weight_N + weight_E + weight_S + weight_W))
+
+        output[::2, ::2] = data[::2, ::2]
+        output[1::2, 1::2] = data[1::2, 1::2]
+
+        return output
+
+
+def fill_br_locations(data, G, bayer_pattern):
+
+    # Fill up the B/R values interpolated at R/B locations
+    B = np.zeros(np.shape(data), dtype=np.float32)
+    R = np.zeros(np.shape(data), dtype=np.float32)
+
+    data = np.asarray(data)
+    G = np.asarray(G)
+    d1 = np.asarray(signal.convolve2d(data, [[-1, 0, 0],[0, 0, 0], [0, 0, 1]], mode="same", boundary="symm"))
+    d2 = np.asarray(signal.convolve2d(data, [[0, 0, 1], [0, 0, 0], [-1, 0, 0]], mode="same", boundary="symm"))
+
+    df_NE = np.asarray(signal.convolve2d(G, [[0, 0, 0], [0, 1, 0], [-1, 0, 0]], mode="same", boundary="symm"))
+    df_SE = np.asarray(signal.convolve2d(G, [[-1, 0, 0], [0, 1, 0], [0, 0, 0]], mode="same", boundary="symm"))
+    df_SW = np.asarray(signal.convolve2d(G, [[0, 0, -1], [0, 1, 0], [0, 0, 0]], mode="same", boundary="symm"))
+    df_NW = np.asarray(signal.convolve2d(G, [[0, 0, 0], [0, 1, 0], [0, 0, -1]], mode="same", boundary="symm"))
+
+    weight_NE = np.zeros(np.shape(data), dtype=np.float32)
+    weight_SE = np.zeros(np.shape(data), dtype=np.float32)
+    weight_SW = np.zeros(np.shape(data), dtype=np.float32)
+    weight_NW = np.zeros(np.shape(data), dtype=np.float32)
+
+    value_NE = np.zeros(np.shape(data), dtype=np.float32)
+    value_SE = np.zeros(np.shape(data), dtype=np.float32)
+    value_SW = np.zeros(np.shape(data), dtype=np.float32)
+    value_NW = np.zeros(np.shape(data), dtype=np.float32)
+
+    if ((bayer_pattern == "rggb") or (bayer_pattern == "bggr")):
+
+        #== weights for B in R locations
+        weight_NE[::2, ::2] = np.abs(d2[::2, ::2]) + np.abs(df_NE[::2, ::2])
+        weight_SE[::2, ::2] = np.abs(d1[::2, ::2]) + np.abs(df_SE[::2, ::2])
+        weight_SW[::2, ::2] = np.abs(d2[::2, ::2]) + np.abs(df_SW[::2, ::2])
+        weight_NW[::2, ::2] = np.abs(d1[::2, ::2]) + np.abs(df_NW[::2, ::2])
+
+        #== weights for R in B locations
+        weight_NE[1::2, 1::2] = np.abs(d2[1::2, 1::2]) + np.abs(df_NE[1::2, 1::2])
+        weight_SE[1::2, 1::2] = np.abs(d1[1::2, 1::2]) + np.abs(df_SE[1::2, 1::2])
+        weight_SW[1::2, 1::2] = np.abs(d2[1::2, 1::2]) + np.abs(df_SW[1::2, 1::2])
+        weight_NW[1::2, 1::2] = np.abs(d1[1::2, 1::2]) + np.abs(df_NW[1::2, 1::2])
+
+        weight_NE = np.divide(1., 1. + weight_NE)
+        weight_SE = np.divide(1., 1. + weight_SE)
+        weight_SW = np.divide(1., 1. + weight_SW)
+        weight_NW = np.divide(1., 1. + weight_NW)
+
+        #== directional estimates of B in R locations
+        # repeating the second row at the top of matrix so that sampling does
+        # not cause any dimension mismatch, also remove the bottom row
+        temp = np.delete(np.vstack((data[1], data)), -1, 0)
+        value_NE[::2, ::2] = temp[::2, 1::2] + df_NE[::2, ::2] / 2.
+        value_SE[::2, ::2] = data[1::2, 1::2] + df_SE[::2, ::2] / 2.
+        # repeating the second column at the left of matrix so that sampling
+        # does not cause any dimension mismatch, also remove the rightmost
+        # column
+        temp = np.delete(np.hstack((np.atleast_2d(data[:, 1]).T, data)), -1, 1)
+        value_SW[::2, ::2] = temp[1::2, ::2] + df_SW[::2, ::2] / 2.
+
+        # repeating the second row at the top of matrix so that sampling does
+        # not cause any dimension mismatch, also remove the bottom row
+        temp = np.delete(np.vstack((data[1], data)), -1, 0)
+        # repeating the second column at the left of matrix so that sampling
+        # does not cause any dimension mismatch, also remove the rightmost
+        # column
+        temp = np.delete(np.hstack((np.atleast_2d(temp[:, 1]).T, temp)), -1, 1)
+        value_NW[::2, ::2] = temp[::2, ::2] + df_NW[::2, ::2]
+
+        #== directional estimates of R in B locations
+        # repeating the column before the last to the right so that sampling
+        # does not cause any dimension mismatch
+        temp = np.hstack((data, np.atleast_2d(data[:, -2]).T))
+        value_NE[1::2, 1::2] = temp[::2, 2::2] + df_NE[1::2, 1::2] / 2.
+        # repeating the column before the last to the right so that sampling
+        # does not cause any dimension mismatch
+        temp = np.hstack((data, np.atleast_2d(data[:, -2]).T))
+        # repeating the row before the last row to the bottom so that sampling
+        # does not cause any dimension mismatch
+        temp = np.vstack((temp, temp[-1]))
+        value_SE[1::2, 1::2] = temp[2::2, 2::2] + df_SE[1::2, 1::2] / 2.
+        # repeating the row before the last row to the bottom so that sampling
+        # does not cause any dimension mismatch
+        temp = np.vstack((data, data[-1]))
+        value_SW[1::2, 1::2] = temp[2::2, ::2] + df_SW[1::2, 1::2] / 2.
+        value_NW[1::2, 1::2] = data[::2, ::2] + df_NW[1::2, 1::2] / 2.
+
+        RB = np.divide(np.multiply(weight_NE, value_NE) + \
+                       np.multiply(weight_SE, value_SE) + \
+                       np.multiply(weight_SW, value_SW) + \
+                       np.multiply(weight_NW, value_NW),\
+                       (weight_NE + weight_SE + weight_SW + weight_NW))
+
+        if (bayer_pattern == "rggb"):
+
+            R[1::2, 1::2] = RB[1::2, 1::2]
+            R[::2, ::2] = data[::2, ::2]
+            B[::2, ::2] = RB[::2, ::2]
+            B[1::2, 1::2] = data[1::2, 1::2]
+
+        elif (bayer_pattern == "bggr"):
+            R[::2, ::2] = RB[::2, ::2]
+            R[1::2, 1::2] = data[1::2, 1::2]
+            B[1::2, 1::2] = RB[1::2, 1::2]
+            B[::2, ::2] = data[::2, ::2]
+
+
+        R[1::2, ::2] = G[1::2, ::2]
+        R[::2, 1::2] = G[::2, 1::2]
+        R = fill_channel_directional_weight(R, "gbrg")
+
+        B[1::2, ::2] = G[1::2, ::2]
+        B[::2, 1::2] = G[::2, 1::2]
+        B = fill_channel_directional_weight(B, "gbrg")
+
+
+    elif ((bayer_pattern == "grbg") or (bayer_pattern == "gbrg")):
+        #== weights for B in R locations
+        weight_NE[::2, 1::2] = np.abs(d2[::2, 1::2]) + np.abs(df_NE[::2, 1::2])
+        weight_SE[::2, 1::2] = np.abs(d1[::2, 1::2]) + np.abs(df_SE[::2, 1::2])
+        weight_SW[::2, 1::2] = np.abs(d2[::2, 1::2]) + np.abs(df_SW[::2, 1::2])
+        weight_NW[::2, 1::2] = np.abs(d1[::2, 1::2]) + np.abs(df_NW[::2, 1::2])
+
+        #== weights for R in B locations
+        weight_NE[1::2, ::2] = np.abs(d2[1::2, ::2]) + np.abs(df_NE[1::2, ::2])
+        weight_SE[1::2, ::2] = np.abs(d1[1::2, ::2]) + np.abs(df_SE[1::2, ::2])
+        weight_SW[1::2, ::2] = np.abs(d2[1::2, ::2]) + np.abs(df_SW[1::2, ::2])
+        weight_NW[1::2, ::2] = np.abs(d1[1::2, ::2]) + np.abs(df_NW[1::2, ::2])
+
+        weight_NE = np.divide(1., 1. + weight_NE)
+        weight_SE = np.divide(1., 1. + weight_SE)
+        weight_SW = np.divide(1., 1. + weight_SW)
+        weight_NW = np.divide(1., 1. + weight_NW)
+
+        #== directional estimates of B in R locations
+        # repeating the second row at the top of matrix so that sampling does
+        # not cause any dimension mismatch, also remove the bottom row
+        temp = np.delete(np.vstack((data[1], data)), -1, 0)
+        # repeating the column before the last to the right so that sampling
+        # does not cause any dimension mismatch
+        temp = np.hstack((temp, np.atleast_2d(temp[:, -2]).T))
+        value_NE[::2, 1::2] = temp[::2, 2::2] + df_NE[::2, 1::2] / 2.
+        # repeating the column before the last to the right so that sampling
+        # does not cause any dimension mismatch
+        temp = np.hstack((data, np.atleast_2d(data[:, -2]).T))
+        value_SE[::2, 1::2] = temp[1::2, 2::2] + df_SE[::2, 1::2] / 2.
+        value_SW[::2, 1::2] = data[1::2, ::2] + df_SW[::2, 1::2] / 2.
+
+        # repeating the second row at the top of matrix so that sampling does
+        # not cause any dimension mismatch, also remove the bottom row
+        temp = np.delete(np.vstack((data[1], data)), -1, 0)
+        value_NW[::2, 1::2] = temp[::2, ::2] + df_NW[::2, 1::2]
+
+        #== directional estimates of R in B locations
+        value_NE[1::2, ::2] = data[::2, 1::2] + df_NE[1::2, ::2] / 2.
+        # repeating the column before the last to the right so that sampling
+        # does not cause any dimension mismatch
+        temp = np.hstack((data, np.atleast_2d(data[:, -2]).T))
+        # repeating the row before the last row to the bottom so that sampling
+        # does not cause any dimension mismatch
+        temp = np.vstack((temp, temp[-1]))
+        value_SE[1::2, ::2] = temp[2::2, 1::2] + df_SE[1::2, ::2] / 2.
+        # repeating the row before the last row to the bottom so that sampling
+        # does not cause any dimension mismatch
+        temp = np.vstack((data, data[-1]))
+        # repeating the second column at the left of matrix so that sampling
+        # does not cause any dimension mismatch, also remove the rightmost
+        # column
+        temp = np.delete(np.hstack((np.atleast_2d(temp[:, 1]).T, temp)), -1, 1)
+        value_SW[1::2, ::2] = temp[2::2, ::2] + df_SW[1::2, ::2] / 2.
+        # repeating the second column at the left of matrix so that sampling
+        # does not cause any dimension mismatch, also remove the rightmost
+        # column
+        temp = np.delete(np.hstack((np.atleast_2d(data[:, 1]).T, data)), -1, 1)
+        value_NW[1::2, ::2] = temp[::2, ::2] + df_NW[1::2, ::2] / 2.
+
+        RB = np.divide(np.multiply(weight_NE, value_NE) + \
+                       np.multiply(weight_SE, value_SE) + \
+                       np.multiply(weight_SW, value_SW) + \
+                       np.multiply(weight_NW, value_NW),\
+                       (weight_NE + weight_SE + weight_SW + weight_NW))
+
+        if (bayer_pattern == "grbg"):
+
+            R[1::2, ::2] = RB[1::2, ::2]
+            R[::2, 1::2] = data[::2, 1::2]
+            B[::2, 1::2] = RB[::2, 1::2]
+            B[1::2, ::2] = data[1::2, ::2]
+
+        elif (bayer_pattern == "gbrg"):
+            R[::2, 1::2] = RB[::2, 1::2]
+            R[1::2, ::2] = data[1::2, ::2]
+            B[1::2, ::2] = RB[1::2, ::2]
+            B[::2, 1::2] = data[::2, 1::2]
+
+
+        R[::2, ::2] = G[::2, ::2]
+        R[1::2, 1::2] = G[1::2, 1::2]
+        R = fill_channel_directional_weight(R, "rggb")
+
+        B[1::2, 1::2] = G[1::2, 1::2]
+        B[::2, ::2] = G[::2, ::2]
+        B = fill_channel_directional_weight(B, "rggb")
+
+
+    return B, R
+
+# # =============================================================
+# # function: dbayer_mhc_fast
+# #   demosaicing using Malvar-He-Cutler algorithm
+# #   http://www.ipol.im/pub/art/2011/g_mhcd/
+# # =============================================================
+# def debayer_mhc_fast(raw, bayer_pattern="rggb", clip_range=[0, 65535], timeshow=False):
+#
+#     # convert to float32 in case it was not
+#     raw = np.float32(raw)
+#
+#     # dimensions
+#     width, height = utility.helpers(raw).get_width_height()
+#
+#     # allocate space for the R, G, B planes
+#     R = np.empty((height, width), dtype = np.float32)
+#     G = np.empty((height, width), dtype = np.float32)
+#     B = np.empty((height, width), dtype = np.float32)
+#
+#     # create a RGB output
+#     demosaic_out = np.empty( (height, width, 3), dtype = np.float32 )
+#
+#     # define the convolution kernels
+#     kernel_g_at_rb = [[0., 0., -1., 0., 0.],\
+#                       [0., 0., 2., 0., 0.],\
+#                       [-1., 2., 4., 2., -1.],\
+#                       [0., 0., 2., 0., 0.],\
+#                       [0., 0., -1., 0., 0.]] * .125
+#
+#     kernel_r_at_gr = [[0., 0., .5, 0., 0.],\
+#                       [0., -1., 0., -1., 0.],\
+#                       [-1., 4., 5., 4., -1.],\
+#                       [0., -1., 0., -1., 0.],\
+#                       [0., 0., .5, 0., 0.]] * .125
+#
+#     kernel_b_at_gr = [[0., 0., -1., 0., 0.],\
+#                       [0., -1., 4., -1., 0.],\
+#                       [.5., 0., 5., 0., .5],\
+#                       [0., -1., 4., -1., 0],\
+#                       [0., 0., -1., 0., 0.]] * .125
+#
+#     kernel_r_at_gb = [[0., 0., -1., 0., 0.],\
+#                       [0., -1., 4., -1., 0.],\
+#                       [.5, 0., 5., 0., .5],\
+#                       [0., -1., 4., -1., 0.],\
+#                       [0., 0., -1., 0., 0.]] * .125
+#
+#     kernel_b_at_gb = [[0., 0., .5, 0., 0.],\
+#                       [0., -1., 0., -1., 0.],\
+#                       [-1., 4., 5., 4., -1.],\
+#                       [0., -1., 0., -1., 0.],\
+#                       [0., 0., .5, 0., 0.]] * .125
+#
+#     kernel_r_at_b = [[0., 0., -1.5, 0., 0.],\
+#                      [0., 2., 0., 2., 0.],\
+#                      [-1.5, 0., 6., 0., -1.5],\
+#                      [0., 2., 0., 2., 0.],\
+#                      [0., 0., -1.5, 0., 0.]] * .125
+#
+#     kernel_b_at_r = [[0., 0., -1.5, 0., 0.],\
+#                      [0., 2., 0., 2., 0.],\
+#                      [-1.5, 0., 6., 0., -1.5],\
+#                      [0., 2., 0., 2., 0.],\
+#                      [0., 0., -1.5, 0., 0.]] * .125
+#
+#
+#
+#     # fill up the directly available values according to the Bayer pattern
+#     if (bayer_pattern == "rggb"):
+#
+#         G[::2, 1::2]  = raw[::2, 1::2]
+#         G[1::2, ::2]  = raw[1::2, ::2]
+#         R[::2, ::2]   = raw[::2, ::2]
+#         B[1::2, 1::2] = raw[1::2, 1::2]
+#
+#         # Green channel
+#         for i in range(no_of_pixel_pad, height + no_of_pixel_pad):
+#
+#             # to display progress
+#             t0 = time.process_time()
+#
+#             for j in range(no_of_pixel_pad, width + no_of_pixel_pad):
+#
+#                 # G at Red location
+#                 if (((i % 2) == 0) and ((j % 2) == 0)):
+#                     G[i, j] = 0.125 * np.sum([-1. * R[i-2, j], \
+#                     2. * G[i-1, j], \
+#                     -1. * R[i, j-2], 2. * G[i, j-1], 4. * R[i,j], 2. * G[i, j+1], -1. * R[i, j+2],\
+#                     2. * G[i+1, j], \
+#                     -1. * R[i+2, j]])
+#                 # G at Blue location
+#                 elif (((i % 2) != 0) and ((j % 2) != 0)):
+#                     G[i, j] = 0.125 * np.sum([-1. * B[i-2, j], \
+#                     2. * G[i-1, j], \
+#                     -1. * B[i, j-2], 2. * G[i, j-1], 4. * B[i,j], 2. * G[i, j+1], -1. * B[i, j+2], \
+#                     2. * G[i+1, j],\
+#                     -1. * B[i+2, j]])
+#             if (timeshow):
+#                 elapsed_time = time.process_time() - t0
+#                 print("Green: row index: " + str(i-1) + " of " + str(height) + \
+#                       " | elapsed time: " + "{:.3f}".format(elapsed_time) + " seconds")
+#
+#         # Red and Blue channel
+#         for i in range(no_of_pixel_pad, height + no_of_pixel_pad):
+#
+#             # to display progress
+#             t0 = time.process_time()
+#
+#             for j in range(no_of_pixel_pad, width + no_of_pixel_pad):
+#
+#                 # Green locations in Red rows
+#                 if (((i % 2) == 0) and ((j % 2) != 0)):
+#                     # R at Green locations in Red rows
+#                     R[i, j] = 0.125 * np.sum([.5 * G[i-2, j],\
+#                      -1. * G[i-1, j-1], -1. * G[i-1, j+1], \
+#                      -1. * G[i, j-2], 4. * R[i, j-1], 5. * G[i,j], 4. * R[i, j+1], -1. * G[i, j+2], \
+#                      -1. * G[i+1, j-1], -1. * G[i+1, j+1], \
+#                       .5 * G[i+2, j]])
+#
+#                     # B at Green locations in Red rows
+#                     B[i, j] = 0.125 * np.sum([-1. * G[i-2, j], \
+#                     -1. * G[i-1, j-1], 4. * B[i-1, j], -1. * G[i-1, j+1], \
+#                     .5 * G[i, j-2], 5. * G[i,j], .5 * G[i, j+2], \
+#                     -1. * G[i+1, j-1], 4. * B[i+1,j],  -1. * G[i+1, j+1], \
+#                     -1. * G[i+2, j]])
+#
+#                 # Green locations in Blue rows
+#                 elif (((i % 2) != 0) and ((j % 2) == 0)):
+#
+#                     # R at Green locations in Blue rows
+#                     R[i, j] = 0.125 * np.sum([-1. * G[i-2, j], \
+#                     -1. * G[i-1, j-1], 4. * R[i-1, j], -1. * G[i-1, j+1], \
+#                     .5 * G[i, j-2], 5. * G[i,j], .5 * G[i, j+2], \
+#                     -1. * G[i+1, j-1], 4. * R[i+1, j],  -1. * G[i+1, j+1], \
+#                     -1. * G[i+2, j]])
+#
+#                     # B at Green locations in Blue rows
+#                     B[i, j] = 0.125 * np.sum([.5 * G[i-2, j], \
+#                     -1. * G [i-1, j-1], -1. * G[i-1, j+1], \
+#                     -1. * G[i, j-2], 4. * B[i, j-1], 5. * G[i,j], 4. * B[i, j+1], -1. * G[i, j+2], \
+#                     -1. * G[i+1, j-1], -1. * G[i+1, j+1], \
+#                     .5 * G[i+2, j]])
+#
+#                 # R at Blue locations
+#                 elif (((i % 2) != 0) and ((j % 2) != 0)):
+#                     R[i, j] = 0.125 * np.sum([-1.5 * B[i-2, j], \
+#                     2. * R[i-1, j-1], 2. * R[i-1, j+1], \
+#                     -1.5 * B[i, j-2], 6. * B[i,j], -1.5 * B[i, j+2], \
+#                     2. * R[i+1, j-1], 2. * R[i+1, j+1], \
+#                     -1.5 * B[i+2, j]])
+#
+#                 # B at Red locations
+#                 elif (((i % 2) == 0) and ((j % 2) == 0)):
+#                     B[i, j] = 0.125 * np.sum([-1.5 * R[i-2, j], \
+#                     2. * B[i-1, j-1], 2. * B[i-1, j+1], \
+#                     -1.5 * R[i, j-2], 6. * R[i,j], -1.5 * R[i, j+2], \
+#                     2. * B[i+1, j-1], 2. * B[i+1, j+1], \
+#                     -1.5 * R[i+2, j]])
+#
+#             if (timeshow):
+#                 elapsed_time = time.process_time() - t0
+#                 print("Red/Blue: row index: " + str(i-1) + " of " + str(height) + \
+#                       " | elapsed time: " + "{:.3f}".format(elapsed_time) + " seconds")
+#
+#
+#     elif (bayer_pattern == "gbrg"):
+#
+#         G[::2, ::2]   = raw[::2, ::2]
+#         G[1::2, 1::2] = raw[1::2, 1::2]
+#         R[1::2, ::2]  = raw[1::2, ::2]
+#         B[::2, 1::2]  = raw[::2, 1::2]
+#
+#         # Green channel
+#         for i in range(no_of_pixel_pad, height + no_of_pixel_pad):
+#
+#             # to display progress
+#             t0 = time.process_time()
+#
+#             for j in range(no_of_pixel_pad, width + no_of_pixel_pad):
+#
+#                 # G at Red location
+#                 if (((i % 2) != 0) and ((j % 2) == 0)):
+#                     G[i, j] = 0.125 * np.sum([-1. * R[i-2, j], \
+#                     2. * G[i-1, j], \
+#                     -1. * R[i, j-2], 2. * G[i, j-1], 4. * R[i,j], 2. * G[i, j+1], -1. * R[i, j+2],\
+#                     2. * G[i+1, j], \
+#                     -1. * R[i+2, j]])
+#                 # G at Blue location
+#                 elif (((i % 2) == 0) and ((j % 2) != 0)):
+#                     G[i, j] = 0.125 * np.sum([-1. * B[i-2, j], \
+#                     2. * G[i-1, j], \
+#                     -1. * B[i, j-2], 2. * G[i, j-1], 4. * B[i,j], 2. * G[i, j+1], -1. * B[i, j+2], \
+#                     2. * G[i+1, j],\
+#                     -1. * B[i+2, j]])
+#             if (timeshow):
+#                 elapsed_time = time.process_time() - t0
+#                 print("Green: row index: " + str(i-1) + " of " + str(height) + \
+#                       " | elapsed time: " + "{:.3f}".format(elapsed_time) + " seconds")
+#
+#         # Red and Blue channel
+#         for i in range(no_of_pixel_pad, height + no_of_pixel_pad):
+#
+#             # to display progress
+#             t0 = time.process_time()
+#
+#             for j in range(no_of_pixel_pad, width + no_of_pixel_pad):
+#
+#                 # Green locations in Red rows
+#                 if (((i % 2) != 0) and ((j % 2) != 0)):
+#                     # R at Green locations in Red rows
+#                     R[i, j] = 0.125 * np.sum([.5 * G[i-2, j],\
+#                      -1. * G[i-1, j-1], -1. * G[i-1, j+1], \
+#                      -1. * G[i, j-2], 4. * R[i, j-1], 5. * G[i,j], 4. * R[i, j+1], -1. * G[i, j+2], \
+#                      -1. * G[i+1, j-1], -1. * G[i+1, j+1], \
+#                       .5 * G[i+2, j]])
+#
+#                     # B at Green locations in Red rows
+#                     B[i, j] = 0.125 * np.sum([-1. * G[i-2, j], \
+#                     -1. * G[i-1, j-1], 4. * B[i-1, j], -1. * G[i-1, j+1], \
+#                     .5 * G[i, j-2], 5. * G[i,j], .5 * G[i, j+2], \
+#                     -1. * G[i+1, j-1], 4. * B[i+1,j],  -1. * G[i+1, j+1], \
+#                     -1. * G[i+2, j]])
+#
+#                 # Green locations in Blue rows
+#                 elif (((i % 2) == 0) and ((j % 2) == 0)):
+#
+#                     # R at Green locations in Blue rows
+#                     R[i, j] = 0.125 * np.sum([-1. * G[i-2, j], \
+#                     -1. * G[i-1, j-1], 4. * R[i-1, j], -1. * G[i-1, j+1], \
+#                     .5 * G[i, j-2], 5. * G[i,j], .5 * G[i, j+2], \
+#                     -1. * G[i+1, j-1], 4. * R[i+1, j],  -1. * G[i+1, j+1], \
+#                     -1. * G[i+2, j]])
+#
+#                     # B at Green locations in Blue rows
+#                     B[i, j] = 0.125 * np.sum([.5 * G[i-2, j], \
+#                     -1. * G [i-1, j-1], -1. * G[i-1, j+1], \
+#                     -1. * G[i, j-2], 4. * B[i, j-1], 5. * G[i,j], 4. * B[i, j+1], -1. * G[i, j+2], \
+#                     -1. * G[i+1, j-1], -1. * G[i+1, j+1], \
+#                     .5 * G[i+2, j]])
+#
+#                 # R at Blue locations
+#                 elif (((i % 2) == 0) and ((j % 2) != 0)):
+#                     R[i, j] = 0.125 * np.sum([-1.5 * B[i-2, j], \
+#                     2. * R[i-1, j-1], 2. * R[i-1, j+1], \
+#                     -1.5 * B[i, j-2], 6. * B[i,j], -1.5 * B[i, j+2], \
+#                     2. * R[i+1, j-1], 2. * R[i+1, j+1], \
+#                     -1.5 * B[i+2, j]])
+#
+#                 # B at Red locations
+#                 elif (((i % 2) != 0) and ((j % 2) == 0)):
+#                     B[i, j] = 0.125 * np.sum([-1.5 * R[i-2, j], \
+#                     2. * B[i-1, j-1], 2. * B[i-1, j+1], \
+#                     -1.5 * R[i, j-2], 6. * R[i,j], -1.5 * R[i, j+2], \
+#                     2. * B[i+1, j-1], 2. * B[i+1, j+1], \
+#                     -1.5 * R[i+2, j]])
+#
+#             if (timeshow):
+#                 elapsed_time = time.process_time() - t0
+#                 print("Red/Blue: row index: " + str(i-1) + " of " + str(height) + \
+#                       " | elapsed time: " + "{:.3f}".format(elapsed_time) + " seconds")
+#
+#     elif (bayer_pattern == "grbg"):
+#
+#         G[::2, ::2]   = raw[::2, ::2]
+#         G[1::2, 1::2] = raw[1::2, 1::2]
+#         R[::2, 1::2]  = raw[::2, 1::2]
+#         B[1::2, ::2]  = raw[1::2, ::2]
+#
+#         # Green channel
+#         for i in range(no_of_pixel_pad, height + no_of_pixel_pad):
+#
+#             # to display progress
+#             t0 = time.process_time()
+#
+#             for j in range(no_of_pixel_pad, width + no_of_pixel_pad):
+#
+#                 # G at Red location
+#                 if (((i % 2) == 0) and ((j % 2) != 0)):
+#                     G[i, j] = 0.125 * np.sum([-1. * R[i-2, j], \
+#                     2. * G[i-1, j], \
+#                     -1. * R[i, j-2], 2. * G[i, j-1], 4. * R[i,j], 2. * G[i, j+1], -1. * R[i, j+2],\
+#                     2. * G[i+1, j], \
+#                     -1. * R[i+2, j]])
+#                 # G at Blue location
+#                 elif (((i % 2) != 0) and ((j % 2) == 0)):
+#                     G[i, j] = 0.125 * np.sum([-1. * B[i-2, j], \
+#                     2. * G[i-1, j], \
+#                     -1. * B[i, j-2], 2. * G[i, j-1], 4. * B[i,j], 2. * G[i, j+1], -1. * B[i, j+2], \
+#                     2. * G[i+1, j],\
+#                     -1. * B[i+2, j]])
+#             if (timeshow):
+#                 elapsed_time = time.process_time() - t0
+#                 print("Green: row index: " + str(i-1) + " of " + str(height) + \
+#                       " | elapsed time: " + "{:.3f}".format(elapsed_time) + " seconds")
+#
+#         # Red and Blue channel
+#         for i in range(no_of_pixel_pad, height + no_of_pixel_pad):
+#
+#             # to display progress
+#             t0 = time.process_time()
+#
+#             for j in range(no_of_pixel_pad, width + no_of_pixel_pad):
+#
+#                 # Green locations in Red rows
+#                 if (((i % 2) == 0) and ((j % 2) == 0)):
+#                     # R at Green locations in Red rows
+#                     R[i, j] = 0.125 * np.sum([.5 * G[i-2, j],\
+#                      -1. * G[i-1, j-1], -1. * G[i-1, j+1], \
+#                      -1. * G[i, j-2], 4. * R[i, j-1], 5. * G[i,j], 4. * R[i, j+1], -1. * G[i, j+2], \
+#                      -1. * G[i+1, j-1], -1. * G[i+1, j+1], \
+#                       .5 * G[i+2, j]])
+#
+#                     # B at Green locations in Red rows
+#                     B[i, j] = 0.125 * np.sum([-1. * G[i-2, j], \
+#                     -1. * G[i-1, j-1], 4. * B[i-1, j], -1. * G[i-1, j+1], \
+#                     .5 * G[i, j-2], 5. * G[i,j], .5 * G[i, j+2], \
+#                     -1. * G[i+1, j-1], 4. * B[i+1,j],  -1. * G[i+1, j+1], \
+#                     -1. * G[i+2, j]])
+#
+#                 # Green locations in Blue rows
+#                 elif (((i % 2) != 0) and ((j % 2) != 0)):
+#
+#                     # R at Green locations in Blue rows
+#                     R[i, j] = 0.125 * np.sum([-1. * G[i-2, j], \
+#                     -1. * G[i-1, j-1], 4. * R[i-1, j], -1. * G[i-1, j+1], \
+#                     .5 * G[i, j-2], 5. * G[i,j], .5 * G[i, j+2], \
+#                     -1. * G[i+1, j-1], 4. * R[i+1, j],  -1. * G[i+1, j+1], \
+#                     -1. * G[i+2, j]])
+#
+#                     # B at Green locations in Blue rows
+#                     B[i, j] = 0.125 * np.sum([.5 * G[i-2, j], \
+#                     -1. * G [i-1, j-1], -1. * G[i-1, j+1], \
+#                     -1. * G[i, j-2], 4. * B[i, j-1], 5. * G[i,j], 4. * B[i, j+1], -1. * G[i, j+2], \
+#                     -1. * G[i+1, j-1], -1. * G[i+1, j+1], \
+#                     .5 * G[i+2, j]])
+#
+#                 # R at Blue locations
+#                 elif (((i % 2) != 0) and ((j % 2) == 0)):
+#                     R[i, j] = 0.125 * np.sum([-1.5 * B[i-2, j], \
+#                     2. * R[i-1, j-1], 2. * R[i-1, j+1], \
+#                     -1.5 * B[i, j-2], 6. * B[i,j], -1.5 * B[i, j+2], \
+#                     2. * R[i+1, j-1], 2. * R[i+1, j+1], \
+#                     -1.5 * B[i+2, j]])
+#
+#                 # B at Red locations
+#                 elif (((i % 2) == 0) and ((j % 2) != 0)):
+#                     B[i, j] = 0.125 * np.sum([-1.5 * R[i-2, j], \
+#                     2. * B[i-1, j-1], 2. * B[i-1, j+1], \
+#                     -1.5 * R[i, j-2], 6. * R[i,j], -1.5 * R[i, j+2], \
+#                     2. * B[i+1, j-1], 2. * B[i+1, j+1], \
+#                     -1.5 * R[i+2, j]])
+#
+#             if (timeshow):
+#                 elapsed_time = time.process_time() - t0
+#                 print("Red/Blue: row index: " + str(i-1) + " of " + str(height) + \
+#                       " | elapsed time: " + "{:.3f}".format(elapsed_time) + " seconds")
+#
+#     elif (bayer_pattern == "bggr"):
+#
+#         G[::2, 1::2]  = raw[::2, 1::2]
+#         G[1::2, ::2]  = raw[1::2, ::2]
+#         R[1::2, 1::2] = raw[1::2, 1::2]
+#         B[::2, ::2]   = raw[::2, ::2]
+#
+#         # Green channel
+#         for i in range(no_of_pixel_pad, height + no_of_pixel_pad):
+#
+#             # to display progress
+#             t0 = time.process_time()
+#
+#             for j in range(no_of_pixel_pad, width + no_of_pixel_pad):
+#
+#                 # G at Red location
+#                 if (((i % 2) != 0) and ((j % 2) != 0)):
+#                     G[i, j] = 0.125 * np.sum([-1. * R[i-2, j], \
+#                     2. * G[i-1, j], \
+#                     -1. * R[i, j-2], 2. * G[i, j-1], 4. * R[i,j], 2. * G[i, j+1], -1. * R[i, j+2],\
+#                     2. * G[i+1, j], \
+#                     -1. * R[i+2, j]])
+#                 # G at Blue location
+#                 elif (((i % 2) == 0) and ((j % 2) == 0)):
+#                     G[i, j] = 0.125 * np.sum([-1. * B[i-2, j], \
+#                     2. * G[i-1, j], \
+#                     -1. * B[i, j-2], 2. * G[i, j-1], 4. * B[i,j], 2. * G[i, j+1], -1. * B[i, j+2], \
+#                     2. * G[i+1, j],\
+#                     -1. * B[i+2, j]])
+#             if (timeshow):
+#                 elapsed_time = time.process_time() - t0
+#                 print("Green: row index: " + str(i-1) + " of " + str(height) + \
+#                       " | elapsed time: " + "{:.3f}".format(elapsed_time) + " seconds")
+#
+#         # Red and Blue channel
+#         for i in range(no_of_pixel_pad, height + no_of_pixel_pad):
+#
+#             # to display progress
+#             t0 = time.process_time()
+#
+#             for j in range(no_of_pixel_pad, width + no_of_pixel_pad):
+#
+#                 # Green locations in Red rows
+#                 if (((i % 2) != 0) and ((j % 2) == 0)):
+#                     # R at Green locations in Red rows
+#                     R[i, j] = 0.125 * np.sum([.5 * G[i-2, j],\
+#                      -1. * G[i-1, j-1], -1. * G[i-1, j+1], \
+#                      -1. * G[i, j-2], 4. * R[i, j-1], 5. * G[i,j], 4. * R[i, j+1], -1. * G[i, j+2], \
+#                      -1. * G[i+1, j-1], -1. * G[i+1, j+1], \
+#                       .5 * G[i+2, j]])
+#
+#                     # B at Green locations in Red rows
+#                     B[i, j] = 0.125 * np.sum([-1. * G[i-2, j], \
+#                     -1. * G[i-1, j-1], 4. * B[i-1, j], -1. * G[i-1, j+1], \
+#                     .5 * G[i, j-2], 5. * G[i,j], .5 * G[i, j+2], \
+#                     -1. * G[i+1, j-1], 4. * B[i+1,j],  -1. * G[i+1, j+1], \
+#                     -1. * G[i+2, j]])
+#
+#                 # Green locations in Blue rows
+#                 elif (((i % 2) == 0) and ((j % 2) != 0)):
+#
+#                     # R at Green locations in Blue rows
+#                     R[i, j] = 0.125 * np.sum([-1. * G[i-2, j], \
+#                     -1. * G[i-1, j-1], 4. * R[i-1, j], -1. * G[i-1, j+1], \
+#                     .5 * G[i, j-2], 5. * G[i,j], .5 * G[i, j+2], \
+#                     -1. * G[i+1, j-1], 4. * R[i+1, j],  -1. * G[i+1, j+1], \
+#                     -1. * G[i+2, j]])
+#
+#                     # B at Green locations in Blue rows
+#                     B[i, j] = 0.125 * np.sum([.5 * G[i-2, j], \
+#                     -1. * G [i-1, j-1], -1. * G[i-1, j+1], \
+#                     -1. * G[i, j-2], 4. * B[i, j-1], 5. * G[i,j], 4. * B[i, j+1], -1. * G[i, j+2], \
+#                     -1. * G[i+1, j-1], -1. * G[i+1, j+1], \
+#                     .5 * G[i+2, j]])
+#
+#                 # R at Blue locations
+#                 elif (((i % 2) == 0) and ((j % 2) == 0)):
+#                     R[i, j] = 0.125 * np.sum([-1.5 * B[i-2, j], \
+#                     2. * R[i-1, j-1], 2. * R[i-1, j+1], \
+#                     -1.5 * B[i, j-2], 6. * B[i,j], -1.5 * B[i, j+2], \
+#                     2. * R[i+1, j-1], 2. * R[i+1, j+1], \
+#                     -1.5 * B[i+2, j]])
+#
+#                 # B at Red locations
+#                 elif (((i % 2) != 0) and ((j % 2) != 0)):
+#                     B[i, j] = 0.125 * np.sum([-1.5 * R[i-2, j], \
+#                     2. * B[i-1, j-1], 2. * B[i-1, j+1], \
+#                     -1.5 * R[i, j-2], 6. * R[i,j], -1.5 * R[i, j+2], \
+#                     2. * B[i+1, j-1], 2. * B[i+1, j+1], \
+#                     -1.5 * R[i+2, j]])
+#
+#             if (timeshow):
+#                 elapsed_time = time.process_time() - t0
+#                 print("Red/Blue: row index: " + str(i-1) + " of " + str(height) + \
+#                       " | elapsed time: " + "{:.3f}".format(elapsed_time) + " seconds")
+#
+#     else:
+#         print("Invalid bayer pattern. Valid pattern can be rggb, gbrg, grbg, bggr")
+#         return demosaic_out # This will be all zeros
+#
+#     # Fill up the RGB output with interpolated values
+#     demosaic_out[0:height, 0:width, 0] = R[no_of_pixel_pad : height + no_of_pixel_pad, \
+#                                            no_of_pixel_pad : width + no_of_pixel_pad]
+#     demosaic_out[0:height, 0:width, 1] = G[no_of_pixel_pad : height + no_of_pixel_pad, \
+#                                            no_of_pixel_pad : width + no_of_pixel_pad]
+#     demosaic_out[0:height, 0:width, 2] = B[no_of_pixel_pad : height + no_of_pixel_pad, \
+#                                            no_of_pixel_pad : width + no_of_pixel_pad]
+#
+#     demosaic_out = np.clip(demosaic_out, clip_range[0], clip_range[1])
+#     return demosaic_out
diff --git a/IIR-Lab/ISP_pipeline/docker_guidelines.md b/IIR-Lab/ISP_pipeline/docker_guidelines.md
new file mode 100644
index 0000000000000000000000000000000000000000..b691d28b1fae7775dfc59dadf2738124f00c2bf4
--- /dev/null
+++ b/IIR-Lab/ISP_pipeline/docker_guidelines.md
@@ -0,0 +1,29 @@
+# Final submission docker guidelines
+
+All final submissions should be submitted as a docker image. The docker image should be built from the dockerfile in the root of the repository. The docker image should be built with the following command:
+
+```bash
+    docker build -t <your_team_name> .
+```
+
+The docker image should be run with the following command:
+
+```bash
+    docker run -it --rm -v $(pwd)/data:/data <your_team_name> ./run.sh
+```
+
+As output, the docker image should produce images in `JPEG` format in the `/data` directory. All produced files should be named as the input files, but with the `.jpg` extension. The filenames should be the same as the RAW input filenames in `/data`. Make sure that your code does not create any other folders in the `/data` directory. Docker should contain all the necessary dependencies to run the code. It also should include the `run.sh` script as the entrypoint. Take into account that inside the docker image, the `/data` directory will be mounted to the `$(pwd)/data` directory of the host machine. This means that the docker image should be able to read the input files from the `/data` directory and write the output files to the `/data` directory.
+
+## Example
+
+We providing an example of a docker image that can be used as a reference. It can be found in our [github repository](https://github.com/createcolor/nightimaging23)
+
+Your dockerfile may look like this:
+
+```dockerfile
+FROM tensorflow/tensorflow:2.3.0
+WORKDIR /opt/app
+COPY . .
+RUN pip install -r /app/requirements.txt
+CMD ["./run.sh"]
+```
diff --git a/IIR-Lab/ISP_pipeline/imaging.py b/IIR-Lab/ISP_pipeline/imaging.py
new file mode 100644
index 0000000000000000000000000000000000000000..786a0c896ca7b69d1ff93f18a136b7cc685accd2
--- /dev/null
+++ b/IIR-Lab/ISP_pipeline/imaging.py
@@ -0,0 +1,1293 @@
+# Note:
+#   The functions try to operate in float32 data precision
+
+# =============================================================
+# Import the libraries
+# =============================================================
+import numpy as np  # array operations
+import math         # basing math operations
+from matplotlib import pylab as plt
+import time         # measure runtime
+import utility
+import debayer
+import sys          # float precision
+from scipy import signal        # convolutions
+from scipy import interpolate   # for interpolation
+
+
+
+# =============================================================
+# class: ImageInfo
+#   Helps set up necessary information/metadata of the image
+# =============================================================
+class ImageInfo:
+    def __init__(self, name = "unknown", data = -1, is_show = False):
+        self.name   = name
+        self.data   = data
+        self.size   = np.shape(self.data)
+        self.is_show = is_show
+        self.color_space = "unknown"
+        self.bayer_pattern = "unknown"
+        self.channel_gain = (1.0, 1.0, 1.0, 1.0)
+        self.bit_depth = 0
+        self.black_level = (0, 0, 0, 0)
+        self.white_level = (1, 1, 1, 1)
+        self.color_matrix = [[1., .0, .0],\
+                             [.0, 1., .0],\
+                             [.0, .0, 1.]] # xyz2cam
+        self.min_value = np.min(self.data)
+        self.max_value = np.max(self.data)
+        self.data_type = self.data.dtype
+
+        # Display image only isShow = True
+        if (self.is_show):
+            plt.imshow(self.data)
+            plt.show()
+
+    def set_data(self, data):
+        # This function updates data and corresponding fields
+        self.data = data
+        self.size = np.shape(self.data)
+        self.data_type = self.data.dtype
+        self.min_value = np.min(self.data)
+        self.max_value = np.max(self.data)
+
+    def get_size(self):
+        return self.size
+
+    def get_width(self):
+        return self.size[1]
+
+    def get_height(self):
+        return self.size[0]
+
+    def get_depth(self):
+        if np.ndim(self.data) > 2:
+            return self.size[2]
+        else:
+            return 0
+
+    def set_color_space(self, color_space):
+        self.color_space = color_space
+
+    def get_color_space(self):
+        return self.color_space
+
+    def set_channel_gain(self, channel_gain):
+        self.channel_gain = channel_gain
+
+    def get_channel_gain(self):
+        return self.channel_gain
+
+    def set_color_matrix(self, color_matrix):
+        self.color_matrix = color_matrix
+
+    def get_color_matrix(self):
+        return self.color_matrix
+
+    def set_bayer_pattern(self, bayer_pattern):
+        self.bayer_pattern = bayer_pattern
+
+    def get_bayer_pattern(self):
+        return self.bayer_pattern
+
+    def set_bit_depth(self, bit_depth):
+        self.bit_depth = bit_depth
+
+    def get_bit_depth(self):
+        return self.bit_depth
+
+    def set_black_level(self, black_level):
+        self.black_level = black_level
+
+    def get_black_level(self):
+        return self.black_level
+
+    def set_white_level(self, white_level):
+        self.white_level = white_level
+
+    def get_white_level(self):
+        return self.white_level
+
+    def get_min_value(self):
+        return self.min_value
+
+    def get_max_value(self):
+        return self.max_value
+
+    def get_data_type(self):
+        return self.data_type
+
+    def __str__(self):
+        return "Image " + self.name + " info:" + \
+                          "\n\tname:\t" + self.name + \
+                          "\n\tsize:\t" + str(self.size) + \
+                          "\n\tcolor space:\t" + self.color_space + \
+                          "\n\tbayer pattern:\t" + self.bayer_pattern + \
+                          "\n\tchannel gains:\t" + str(self.channel_gain) + \
+                          "\n\tbit depth:\t" + str(self.bit_depth) + \
+                          "\n\tdata type:\t" + str(self.data_type) + \
+                          "\n\tblack level:\t" + str(self.black_level) + \
+                          "\n\tminimum value:\t" + str(self.min_value) + \
+                          "\n\tmaximum value:\t" + str(self.max_value)
+
+
+# =============================================================
+# function: black_level_correction
+#   subtracts the black level channel wise
+# =============================================================
+def black_level_correction(raw, black_level, white_level, clip_range):
+
+    print("----------------------------------------------------")
+    print("Running black level correction...")
+
+    # make float32 in case if it was not
+    black_level = np.float32(black_level)
+    white_level = np.float32(white_level)
+    raw = np.float32(raw)
+
+    # create new data so that original raw data do not change
+    data = np.zeros(raw.shape)
+
+    # bring data in range 0 to 1
+    data[::2, ::2]   = (raw[::2, ::2] - black_level[0]) / (white_level[0] - black_level[0])
+    data[::2, 1::2]  = (raw[::2, 1::2] - black_level[1]) / (white_level[1] - black_level[1])
+    data[1::2, ::2]  = (raw[1::2, ::2] - black_level[2]) / (white_level[2] - black_level[2])
+    data[1::2, 1::2] = (raw[1::2, 1::2]- black_level[3]) / (white_level[3] - black_level[3])
+
+    # bring within the bit depth range
+    data = data * clip_range[1]
+
+    # clip within the range
+    data = np.clip(data, clip_range[0], clip_range[1]) # upper level not necessary
+    data = np.float32(data)
+
+    return data
+
+
+# =============================================================
+# function: channel_gain_white_balance
+#   multiply with the white balance channel gains
+# =============================================================
+def channel_gain_white_balance(data, channel_gain):
+
+    print("----------------------------------------------------")
+    print("Running channel gain white balance...")
+
+    # convert into float32 in case they were not
+    data = np.float32(data)
+    channel_gain = np.float32(channel_gain)
+
+    # multiply with the channel gains
+    data[::2, ::2]   = data[::2, ::2] * channel_gain[0]
+    data[::2, 1::2]  = data[::2, 1::2] * channel_gain[1]
+    data[1::2, ::2]  = data[1::2, ::2] * channel_gain[2]
+    data[1::2, 1::2] = data[1::2, 1::2] * channel_gain[3]
+
+    # clipping within range
+    data = np.clip(data, 0., None) # upper level not necessary
+
+    return data
+
+
+# =============================================================
+# function: bad_pixel_correction
+#   correct for the bad (dead, stuck, or hot) pixels
+# =============================================================
+def bad_pixel_correction(data, neighborhood_size):
+
+    print("----------------------------------------------------")
+    print("Running bad pixel correction...")
+
+    if ((neighborhood_size % 2) == 0):
+        print("neighborhood_size shoud be odd number, recommended value 3")
+        return data
+
+    # convert to float32 in case they were not
+    # Being consistent in data format to be float32
+    data = np.float32(data)
+
+    # Separate out the quarter resolution images
+    D = {} # Empty dictionary
+    D[0] = data[::2, ::2]
+    D[1] = data[::2, 1::2]
+    D[2] = data[1::2, ::2]
+    D[3] = data[1::2, 1::2]
+
+    # number of pixels to be padded at the borders
+    no_of_pixel_pad = math.floor(neighborhood_size / 2.)
+
+    for idx in range(0, len(D)): # perform same operation for each quarter
+
+        # display progress
+        print("bad pixel correction: Quarter " + str(idx+1) + " of 4")
+
+        img = D[idx]
+        width, height = utility.helpers(img).get_width_height()
+
+        # pad pixels at the borders
+        img = np.pad(img, \
+                     (no_of_pixel_pad, no_of_pixel_pad),\
+                     'reflect') # reflect would not repeat the border value
+
+        for i in range(no_of_pixel_pad, height + no_of_pixel_pad):
+            for j in range(no_of_pixel_pad, width + no_of_pixel_pad):
+
+                # save the middle pixel value
+                mid_pixel_val = img[i, j]
+
+                # extract the neighborhood
+                neighborhood = img[i - no_of_pixel_pad : i + no_of_pixel_pad+1,\
+                                   j - no_of_pixel_pad : j + no_of_pixel_pad+1]
+
+                # set the center pixels value same as the left pixel
+                # Does not matter replace with right or left pixel
+                # is used to replace the center pixels value
+                neighborhood[no_of_pixel_pad, no_of_pixel_pad] = neighborhood[no_of_pixel_pad, no_of_pixel_pad-1]
+
+                min_neighborhood = np.min(neighborhood)
+                max_neighborhood = np.max(neighborhood)
+
+                if (mid_pixel_val < min_neighborhood):
+                    img[i,j] = min_neighborhood
+                elif (mid_pixel_val > max_neighborhood):
+                    img[i,j] = max_neighborhood
+                else:
+                    img[i,j] = mid_pixel_val
+
+        # Put the corrected image to the dictionary
+        D[idx] = img[no_of_pixel_pad : height + no_of_pixel_pad,\
+                     no_of_pixel_pad : width + no_of_pixel_pad]
+
+    # Regrouping the data
+    data[::2, ::2]   = D[0]
+    data[::2, 1::2]  = D[1]
+    data[1::2, ::2]  = D[2]
+    data[1::2, 1::2] = D[3]
+
+    return data
+
+
+# =============================================================
+# class: demosaic
+# =============================================================
+class demosaic:
+    def __init__(self, data, bayer_pattern="rggb", clip_range=[0, 65535], name="demosaic"):
+        self.data = np.float32(data)
+        self.bayer_pattern = bayer_pattern
+        self.clip_range = clip_range
+        self.name = name
+
+    def mhc(self, timeshow=False):
+
+        print("----------------------------------------------------")
+        print("Running demosaicing using Malvar-He-Cutler algorithm...")
+
+        return debayer.debayer_mhc(self.data, self.bayer_pattern, self.clip_range, timeshow)
+
+    def post_process_local_color_ratio(self, beta):
+        # Objective is to reduce high chroma jump
+        # Beta is controlling parameter, higher gives more effect,
+        # however, too high does not make any more change
+
+        print("----------------------------------------------------")
+        print("Demosaicing post process using local color ratio...")
+
+        data = self.data
+
+        # add beta with the data to prevent divide by zero
+        data_beta = self.data + beta
+
+        # convolution kernels
+        # zeta1 averages the up, down, left, and right four values of a 3x3 window
+        zeta1 = np.multiply([[0., 1., 0.], [1., 0., 1.], [0., 1., 0.]], .25)
+        # zeta2 averages the four corner values of a 3x3 window
+        zeta2 = np.multiply([[1., 0., 1.], [0., 0., 0.], [1., 0., 1.]], .25)
+
+        # average of color ratio
+        g_over_b = signal.convolve2d(np.divide(data_beta[:, :, 1], data_beta[:, :, 2]), zeta1, mode="same", boundary="symm")
+        g_over_r = signal.convolve2d(np.divide(data_beta[:, :, 1], data_beta[:, :, 0]), zeta1, mode="same", boundary="symm")
+        b_over_g_zeta2 = signal.convolve2d(np.divide(data_beta[:, :, 2], data_beta[:, :, 1]), zeta2, mode="same", boundary="symm")
+        r_over_g_zeta2 = signal.convolve2d(np.divide(data_beta[:, :, 0], data_beta[:, :, 1]), zeta2, mode="same", boundary="symm")
+        b_over_g_zeta1 = signal.convolve2d(np.divide(data_beta[:, :, 2], data_beta[:, :, 1]), zeta1, mode="same", boundary="symm")
+        r_over_g_zeta1 = signal.convolve2d(np.divide(data_beta[:, :, 0], data_beta[:, :, 1]), zeta1, mode="same", boundary="symm")
+
+        # G at B locations and G at R locations
+        if self.bayer_pattern == "rggb":
+            # G at B locations
+            data[1::2, 1::2, 1] = -beta + np.multiply(data_beta[1::2, 1::2, 2], g_over_b[1::2, 1::2])
+            # G at R locations
+            data[::2, ::2, 1] = -beta + np.multiply(data_beta[::2, ::2, 0], g_over_r[::2, ::2])
+            # B at R locations
+            data[::2, ::2, 2] = -beta + np.multiply(data_beta[::2, ::2, 1], b_over_g_zeta2[::2, ::2])
+            # R at B locations
+            data[1::2, 1::2, 0] = -beta + np.multiply(data_beta[1::2, 1::2, 1], r_over_g_zeta2[1::2, 1::2])
+            # B at G locations
+            data[::2, 1::2, 2] = -beta + np.multiply(data_beta[::2, 1::2, 1], b_over_g_zeta1[::2, 1::2])
+            data[1::2, ::2, 2] = -beta + np.multiply(data_beta[1::2, ::2, 1], b_over_g_zeta1[1::2, ::2])
+            # R at G locations
+            data[::2, 1::2, 0] = -beta + np.multiply(data_beta[::2, 1::2, 1], r_over_g_zeta1[::2, 1::2])
+            data[1::2, ::2, 0] = -beta + np.multiply(data_beta[1::2, ::2, 1], r_over_g_zeta1[1::2, ::2])
+
+        elif self.bayer_pattern == "grbg":
+            # G at B locations
+            data[1::2, ::2, 1] = -beta + np.multiply(data_beta[1::2, ::2, 2], g_over_b[1::2, 1::2])
+            # G at R locations
+            data[::2, 1::2, 1] = -beta + np.multiply(data_beta[::2, 1::2, 0], g_over_r[::2, 1::2])
+            # B at R locations
+            data[::2, 1::2, 2] = -beta + np.multiply(data_beta[::2, 1::2, 1], b_over_g_zeta2[::2, 1::2])
+            # R at B locations
+            data[1::2, ::2, 0] = -beta + np.multiply(data_beta[1::2, ::2, 1], r_over_g_zeta2[1::2, ::2])
+            # B at G locations
+            data[::2, ::2, 2] = -beta + np.multiply(data_beta[::2, ::2, 1], b_over_g_zeta1[::2, ::2])
+            data[1::2, 1::2, 2] = -beta + np.multiply(data_beta[1::2, 1::2, 1], b_over_g_zeta1[1::2, 1::2])
+            # R at G locations
+            data[::2, ::2, 0] = -beta + np.multiply(data_beta[::2, ::2, 1], r_over_g_zeta1[::2, ::2])
+            data[1::2, 1::2, 0] = -beta + np.multiply(data_beta[1::2, 1::2, 1], r_over_g_zeta1[1::2, 1::2])
+
+        elif self.bayer_pattern == "gbrg":
+            # G at B locations
+            data[::2, 1::2, 1] = -beta + np.multiply(data_beta[::2, 1::2, 2], g_over_b[::2, 1::2])
+            # G at R locations
+            data[1::2, ::2, 1] = -beta + np.multiply(data_beta[1::2, ::2, 0], g_over_r[1::2, ::2])
+            # B at R locations
+            data[1::2, ::2, 2] = -beta + np.multiply(data_beta[1::2, ::2, 1], b_over_g_zeta2[1::2, ::2])
+            # R at B locations
+            data[::2, 1::2, 0] = -beta + np.multiply(data_beta[::2, 1::2, 1], r_over_g_zeta2[::2, 1::2])
+            # B at G locations
+            data[::2, ::2, 2] = -beta + np.multiply(data_beta[::2, ::2, 1], b_over_g_zeta1[::2, ::2])
+            data[1::2, 1::2, 2] = -beta + np.multiply(data_beta[1::2, 1::2, 1], b_over_g_zeta1[1::2, 1::2])
+            # R at G locations
+            data[::2, ::2, 0] = -beta + np.multiply(data_beta[::2, ::2, 1], r_over_g_zeta1[::2, ::2])
+            data[1::2, 1::2, 0] = -beta + np.multiply(data_beta[1::2, 1::2, 1], r_over_g_zeta1[1::2, 1::2])
+
+        elif self.bayer_pattern == "bggr":
+            # G at B locations
+            data[::2, ::2, 1] = -beta + np.multiply(data_beta[::2, ::2, 2], g_over_b[::2, ::2])
+            # G at R locations
+            data[1::2, 1::2, 1] = -beta + np.multiply(data_beta[1::2, 1::2, 0], g_over_r[1::2, 1::2])
+            # B at R locations
+            data[1::2, 1::2, 2] = -beta + np.multiply(data_beta[1::2, 1::2, 1], b_over_g_zeta2[1::2, 1::2])
+            # R at B locations
+            data[::2, ::2, 0] = -beta + np.multiply(data_beta[::2, ::2, 1], r_over_g_zeta2[::2, ::2])
+            # B at G locations
+            data[::2, 1::2, 2] = -beta + np.multiply(data_beta[::2, 1::2, 1], b_over_g_zeta1[::2, 1::2])
+            data[1::2, ::2, 2] = -beta + np.multiply(data_beta[1::2, ::2, 1], b_over_g_zeta1[1::2, ::2])
+            # R at G locations
+            data[::2, 1::2, 0] = -beta + np.multiply(data_beta[::2, 1::2, 1], r_over_g_zeta1[::2, 1::2])
+            data[1::2, ::2, 0] = -beta + np.multiply(data_beta[1::2, ::2, 1], r_over_g_zeta1[1::2, ::2])
+
+
+        return np.clip(data, self.clip_range[0], self.clip_range[1])
+
+
+    def directionally_weighted_gradient_based_interpolation(self):
+        # Reference:
+        # http://www.arl.army.mil/arlreports/2010/ARL-TR-5061.pdf
+
+        print("----------------------------------------------------")
+        print("Running demosaicing using directionally weighted gradient based interpolation...")
+
+        # Fill up the green channel
+        G = debayer.fill_channel_directional_weight(self.data, self.bayer_pattern)
+
+        B, R = debayer.fill_br_locations(self.data, G, self.bayer_pattern)
+
+        width, height = utility.helpers(self.data).get_width_height()
+        output = np.empty((height, width, 3), dtype=np.float32)
+        output[:, :, 0] = R
+        output[:, :, 1] = G
+        output[:, :, 2] = B
+
+        return np.clip(output, self.clip_range[0], self.clip_range[1])
+
+
+    def post_process_median_filter(self, edge_detect_kernel_size=3, edge_threshold=0, median_filter_kernel_size=3, clip_range=[0, 65535]):
+        # Objective is to reduce the zipper effect around the edges
+        # Inputs:
+        #   edge_detect_kernel_size: the neighborhood size used to detect edges
+        #   edge_threshold: the threshold value above which (compared against)
+        #                   the gradient_magnitude to declare if it is an edge
+        #   median_filter_kernel_size: the neighborhood size used to perform
+        #                               median filter operation
+        #   clip_range: used for scaling in edge_detection
+        #
+        # Output:
+        #   output: median filtered output around the edges
+        #   edge_location: a debug image to see where the edges were detected
+        #                   based on the threshold
+
+
+        # detect edge locations
+        edge_location = utility.edge_detection(self.data).sobel(edge_detect_kernel_size, "is_edge", edge_threshold, clip_range)
+
+        # allocate space for output
+        output = np.empty(np.shape(self.data), dtype=np.float32)
+
+        if (np.ndim(self.data) > 2):
+
+            for i in range(0, np.shape(self.data)[2]):
+                output[:, :, i] = utility.helpers(self.data[:, :, i]).edge_wise_median(median_filter_kernel_size, edge_location[:, :, i])
+
+        elif (np.ndim(self.data) == 2):
+            output = utility.helpers(self.data).edge_wise_median(median_filter_kernel_size, edge_location)
+
+        return output, edge_location
+
+    def __str__(self):
+        return self.name
+
+
+# =============================================================
+# class: lens_shading_correction
+#   Correct the lens shading / vignetting
+# =============================================================
+class lens_shading_correction:
+    def __init__(self, data, name="lens_shading_correction"):
+        # convert to float32 in case it was not
+        self.data = np.float32(data)
+        self.name = name
+
+    def flat_field_compensation(self, dark_current_image, flat_field_image):
+        # dark_current_image:
+        #       is captured from the camera with cap on
+        #       and fully dark condition, several images captured and
+        #       temporally averaged
+        # flat_field_image:
+        #       is found by capturing an image of a flat field test chart
+        #       with certain lighting condition
+        # Note: flat_field_compensation is memory intensive procedure because
+        #       both the dark_current_image and flat_field_image need to be
+        #       saved in memory beforehand
+        print("----------------------------------------------------")
+        print("Running lens shading correction with flat field compensation...")
+
+        # convert to float32 in case it was not
+        dark_current_image = np.float32(dark_current_image)
+        flat_field_image = np.float32(flat_field_image)
+        temp = flat_field_image - dark_current_image
+        return np.average(temp) * np.divide((self.data - dark_current_image), temp)
+
+    def approximate_mathematical_compensation(self, params, clip_min=0, clip_max=65535):
+        # parms:
+        #       parameters of a parabolic model y = a*(x-b)^2 + c
+        #       For example, params = [0.01759, -28.37, -13.36]
+        # Note: approximate_mathematical_compensation require less memory
+        print("----------------------------------------------------")
+        print("Running lens shading correction with approximate mathematical compensation...")
+        width, height = utility.helpers(self.data).get_width_height()
+
+        center_pixel_pos = [height/2, width/2]
+        max_distance = utility.distance_euclid(center_pixel_pos, [height, width])
+
+        # allocate memory for output
+        temp = np.empty((height, width), dtype=np.float32)
+
+        for i in range(0, height):
+            for j in range(0, width):
+                distance = utility.distance_euclid(center_pixel_pos, [i, j]) / max_distance
+                # parabolic model
+                gain = params[0] * (distance - params[1])**2 + params[2]
+                temp[i, j] = self.data[i, j] * gain
+
+        temp = np.clip(temp, clip_min, clip_max)
+        return temp
+
+    def __str__(self):
+        return "lens shading correction. There are two methods: " + \
+                "\n (1) flat_field_compensation: requires dark_current_image and flat_field_image" + \
+                "\n (2) approximate_mathematical_compensation:"
+
+
+# =============================================================
+# class: lens_shading_correction
+#   Correct the lens shading / vignetting
+# =============================================================
+class bayer_denoising:
+    def __init__(self, data, name="bayer_denoising"):
+        # convert to float32 in case it was not
+        self.data = np.float32(data)
+        self.name = name
+
+    def utilize_hvs_behavior(self, bayer_pattern, initial_noise_level, hvs_min, hvs_max, threshold_red_blue, clip_range):
+        # Objective: bayer denoising
+        # Inputs:
+        #   bayer_pattern:  rggb, gbrg, grbg, bggr
+        #   initial_noise_level:
+        # Output:
+        #   denoised bayer raw output
+        # Source: Based on paper titled "Noise Reduction for CFA Image Sensors
+        #   Exploiting HVS Behaviour," by Angelo Bosco, Sebastiano Battiato,
+        #   Arcangelo Bruna and Rosetta Rizzo
+        #   Sensors 2009, 9, 1692-1713; doi:10.3390/s90301692
+
+        print("----------------------------------------------------")
+        print("Running bayer denoising utilizing hvs behavior...")
+
+        # copy the self.data to raw and we will only work on raw
+        # to make sure no change happen to self.data
+        raw = self.data
+        raw = np.clip(raw, clip_range[0], clip_range[1])
+        width, height = utility.helpers(raw).get_width_height()
+
+        # First make the bayer_pattern rggb
+        # The algorithm is written only for rggb pattern, thus convert all other
+        # pattern to rggb. Furthermore, this shuffling does not affect the
+        # algorithm output
+        if (bayer_pattern != "rggb"):
+            raw = utility.helpers(self.data).shuffle_bayer_pattern(bayer_pattern, "rggb")
+
+        # fixed neighborhood_size
+        neighborhood_size = 5 # we are keeping this fixed
+                              # bigger size such as 9 can be declared
+                              # however, the code need to be changed then
+
+        # pad two pixels at the border
+        no_of_pixel_pad = math.floor(neighborhood_size / 2)   # number of pixels to pad
+
+        raw = np.pad(raw, \
+                     (no_of_pixel_pad, no_of_pixel_pad),\
+                     'reflect') # reflect would not repeat the border value
+
+        # allocating space for denoised output
+        denoised_out = np.empty((height, width), dtype=np.float32)
+
+        texture_degree_debug = np.empty((height, width), dtype=np.float32)
+        for i in range(no_of_pixel_pad, height + no_of_pixel_pad):
+            for j in range(no_of_pixel_pad, width + no_of_pixel_pad):
+
+                # center pixel
+                center_pixel = raw[i, j]
+
+                # signal analyzer block
+                half_max = clip_range[1] / 2
+                if (center_pixel <= half_max):
+                    hvs_weight = -(((hvs_max - hvs_min) * center_pixel) / half_max) + hvs_max
+                else:
+                    hvs_weight = (((center_pixel - clip_range[1]) * (hvs_max - hvs_min))/(clip_range[1] - half_max)) + hvs_max
+
+                # noise level estimator previous value
+                if (j < no_of_pixel_pad+2):
+                    noise_level_previous_red   = initial_noise_level
+                    noise_level_previous_blue  = initial_noise_level
+                    noise_level_previous_green = initial_noise_level
+                else:
+                    noise_level_previous_green = noise_level_current_green
+                    if ((i % 2) == 0): # red
+                        noise_level_previous_red = noise_level_current_red
+                    elif ((i % 2) != 0): # blue
+                        noise_level_previous_blue = noise_level_current_blue
+
+                # Processings depending on Green or Red/Blue
+                # Red
+                if (((i % 2) == 0) and ((j % 2) == 0)):
+                    # get neighborhood
+                    neighborhood = [raw[i-2, j-2], raw[i-2, j], raw[i-2, j+2],\
+                                    raw[i, j-2], raw[i, j+2],\
+                                    raw[i+2, j-2], raw[i+2, j], raw[i+2, j+2]]
+
+                    # absolute difference from the center pixel
+                    d =  np.abs(neighborhood - center_pixel)
+
+                    # maximum and minimum difference
+                    d_max = np.max(d)
+                    d_min = np.min(d)
+
+                    # calculate texture_threshold
+                    texture_threshold = hvs_weight + noise_level_previous_red
+
+                    # texture degree analyzer
+                    if (d_max <= threshold_red_blue):
+                        texture_degree = 1.
+                    elif ((d_max > threshold_red_blue) and (d_max <= texture_threshold)):
+                        texture_degree = -((d_max - threshold_red_blue) / (texture_threshold - threshold_red_blue)) + 1.
+                    elif (d_max > texture_threshold):
+                        texture_degree = 0.
+
+                    # noise level estimator update
+                    noise_level_current_red = texture_degree * d_max + (1 - texture_degree) * noise_level_previous_red
+
+                # Blue
+                elif (((i % 2) != 0) and ((j % 2) != 0)):
+
+                    # get neighborhood
+                    neighborhood = [raw[i-2, j-2], raw[i-2, j], raw[i-2, j+2],\
+                                    raw[i, j-2], raw[i, j+2],\
+                                    raw[i+2, j-2], raw[i+2, j], raw[i+2, j+2]]
+
+                    # absolute difference from the center pixel
+                    d =  np.abs(neighborhood - center_pixel)
+
+                    # maximum and minimum difference
+                    d_max = np.max(d)
+                    d_min = np.min(d)
+
+                    # calculate texture_threshold
+                    texture_threshold = hvs_weight + noise_level_previous_blue
+
+                    # texture degree analyzer
+                    if (d_max <= threshold_red_blue):
+                        texture_degree = 1.
+                    elif ((d_max > threshold_red_blue) and (d_max <= texture_threshold)):
+                        texture_degree = -((d_max - threshold_red_blue) / (texture_threshold - threshold_red_blue)) + 1.
+                    elif (d_max > texture_threshold):
+                        texture_degree = 0.
+
+                    # noise level estimator update
+                    noise_level_current_blue = texture_degree * d_max + (1 - texture_degree) * noise_level_previous_blue
+
+                # Green
+                elif ((((i % 2) == 0) and ((j % 2) != 0)) or (((i % 2) != 0) and ((j % 2) == 0))):
+
+                    neighborhood = [raw[i-2, j-2], raw[i-2, j], raw[i-2, j+2],\
+                                    raw[i-1, j-1], raw[i-1, j+1],\
+                                    raw[i, j-2], raw[i, j+2],\
+                                    raw[i+1, j-1], raw[i+1, j+1],\
+                                    raw[i+2, j-2], raw[i+2, j], raw[i+2, j+2]]
+
+                    # difference from the center pixel
+                    d = np.abs(neighborhood - center_pixel)
+
+                    # maximum and minimum difference
+                    d_max = np.max(d)
+                    d_min = np.min(d)
+
+                    # calculate texture_threshold
+                    texture_threshold = hvs_weight + noise_level_previous_green
+
+                    # texture degree analyzer
+                    if (d_max == 0):
+                        texture_degree = 1
+                    elif ((d_max > 0) and (d_max <= texture_threshold)):
+                        texture_degree = -(d_max / texture_threshold) + 1.
+                    elif (d_max > texture_threshold):
+                        texture_degree = 0
+
+                    # noise level estimator update
+                    noise_level_current_green = texture_degree * d_max + (1 - texture_degree) * noise_level_previous_green
+
+                # similarity threshold calculation
+                if (texture_degree == 1):
+                    threshold_low = threshold_high = d_max
+                elif (texture_degree == 0):
+                    threshold_low = d_min
+                    threshold_high = (d_max + d_min) / 2
+                elif ((texture_degree > 0) and (texture_degree < 1)):
+                    threshold_high = (d_max + ((d_max + d_min) / 2)) / 2
+                    threshold_low = (d_min + threshold_high) / 2
+
+                # weight computation
+                weight = np.empty(np.size(d), dtype=np.float32)
+                pf = 0.
+                for w_i in range(0, np.size(d)):
+                    if (d[w_i] <= threshold_low):
+                        weight[w_i] = 1.
+                    elif (d[w_i] > threshold_high):
+                        weight[w_i] = 0.
+                    elif ((d[w_i] > threshold_low) and (d[w_i] < threshold_high)):
+                        weight[w_i] = 1. + ((d[w_i] - threshold_low) / (threshold_low - threshold_high))
+
+                    pf += weight[w_i] * neighborhood[w_i] + (1. - weight[w_i]) * center_pixel
+
+                denoised_out[i - no_of_pixel_pad, j-no_of_pixel_pad] = pf / np.size(d)
+                # texture_degree_debug is a debug output
+                texture_degree_debug[i - no_of_pixel_pad, j-no_of_pixel_pad] = texture_degree
+
+        if (bayer_pattern != "rggb"):
+            denoised_out = utility.shuffle_bayer_pattern(denoised_out, "rggb", bayer_pattern)
+
+        return np.clip(denoised_out, clip_range[0], clip_range[1]), texture_degree_debug
+
+    def __str__(self):
+        return self.name
+
+
+# =============================================================
+# class: color_correction
+#   Correct the color in linaer domain
+# =============================================================
+class color_correction:
+    def __init__(self, data, color_matrix, color_space="srgb", illuminant="d65", name="color correction", clip_range=[0, 65535]):
+        # Inputs:
+        #   data:   linear rgb image before nonlinearity/gamma
+        #   xyz2cam: 3x3 matrix found from the camera metedata, specifically
+        #            color matrix 2 from the metadata
+        #   color_space: output color space
+        #   illuminance: the illuminant of the lighting condition
+        #   name: name of the class
+        self.data = np.float32(data)
+        self.xyz2cam = np.float32(color_matrix)
+        self.color_space = color_space
+        self.illuminant = illuminant
+        self.name = name
+        self.clip_range = clip_range
+
+    def get_rgb2xyz(self):
+        # Objective: get the rgb2xyz matrix dependin on the output color space
+        #            and the illuminant
+        # Source: http://www.brucelindbloom.com/index.html?Eqn_RGB_XYZ_Matrix.html
+        if (self.color_space == "srgb"):
+            if (self.illuminant == "d65"):
+                return [[.4124564,  .3575761,  .1804375],\
+                        [.2126729,  .7151522,  .0721750],\
+                        [.0193339,  .1191920,  .9503041]]
+            elif (self.illuminant == "d50"):
+                return [[.4360747,  .3850649,  .1430804],\
+                        [.2225045,  .7168786,  .0606169],\
+                        [.0139322,  .0971045,  .7141733]]
+            else:
+                print("for now, color_space must be d65 or d50")
+                return
+
+        elif (self.color_space == "adobe-rgb-1998"):
+            if (self.illuminant == "d65"):
+                return [[.5767309,  .1855540,  .1881852],\
+                        [.2973769,  .6273491,  .0752741],\
+                        [.0270343,  .0706872,  .9911085]]
+            elif (self.illuminant == "d50"):
+                return [[.6097559,  .2052401,  .1492240],\
+                        [.3111242,  .6256560,  .0632197],\
+                        [.0194811,  .0608902,  .7448387]]
+            else:
+                print("for now, illuminant must be d65 or d50")
+                return
+        else:
+            print("for now, color_space must be srgb or adobe-rgb-1998")
+            return
+
+    def calculate_cam2rgb(self):
+        # Objective: Calculates the color correction matrix
+
+        # matric multiplication
+        rgb2cam = np.dot(self.xyz2cam, self.get_rgb2xyz())
+
+        # make sum of each row to be 1.0, necessary to preserve white balance
+        # basically divice each value by its row wise sum
+        rgb2cam = np.divide(rgb2cam, np.reshape(np.sum(rgb2cam, 1), [3, 1]))
+
+        # - inverse the matrix to get cam2rgb.
+        # - cam2rgb should also have the characteristic that sum of each row
+        # equal to 1.0 to preserve white balance
+        # - check if rgb2cam is invertible by checking the condition of
+        # rgb2cam. If rgb2cam is singular it will give a warning and
+        # return an identiry matrix
+        if (np.linalg.cond(rgb2cam) < (1 / sys.float_info.epsilon)):
+            return np.linalg.inv(rgb2cam) # this is cam2rgb / color correction matrix
+        else:
+            print("Warning! matrix not invertible.")
+            return np.identity(3, dtype=np.float32)
+
+    def apply_cmatrix(self):
+        # Objective: Apply the color correction matrix (cam2rgb)
+
+        print("----------------------------------------------------")
+        print("running color correction...")
+
+        # check if data is 3 dimensional
+        if (np.ndim(self.data) != 3):
+            print("data need to be three dimensional")
+            return
+
+        # get the color correction matrix
+        cam2rgb = self.calculate_cam2rgb()
+
+        # get width and height
+        width, height = utility.helpers(self.data).get_width_height()
+
+        # apply the matrix
+        R = self.data[:, :, 0]
+        G = self.data[:, :, 1]
+        B = self.data[:, :, 2]
+
+        color_corrected = np.empty((height, width, 3), dtype=np.float32)
+        color_corrected[:, :, 0] = R * cam2rgb[0, 0] + G * cam2rgb[0, 1] + B * cam2rgb[0, 2]
+        color_corrected[:, :, 1] = R * cam2rgb[1, 0] + G * cam2rgb[1, 1] + B * cam2rgb[1, 2]
+        color_corrected[:, :, 2] = R * cam2rgb[2, 0] + G * cam2rgb[2, 1] + B * cam2rgb[2, 2]
+
+        return np.clip(color_corrected, self.clip_range[0], self.clip_range[1])
+
+    def __str__(self):
+        return self.name
+
+
+# =============================================================
+# class: nonlinearity
+#   apply gamma or degamma
+# =============================================================
+class nonlinearity:
+    def __init__(self, data, name="nonlinearity"):
+        self.data = np.float32(data)
+        self.name = name
+
+    def luma_adjustment(self, multiplier, clip_range=[0, 65535]):
+        # The multiplier is applied only on luma channel
+        # by a multipler in log10 scale:
+        #   multipler 10 means multiplied by 1.
+        #   multipler 100 means multiplied by 2. as such
+
+        print("----------------------------------------------------")
+        print("Running brightening...")
+
+        return np.clip(np.log10(multiplier) * self.data, clip_range[0], clip_range[1])
+
+    def by_value(self, value, clip_range):
+
+        print("----------------------------------------------------")
+        print("Running nonlinearity by value...")
+
+        # clip within the range
+        data = np.clip(self.data, clip_range[0], clip_range[1])
+        # make 0 to 1
+        data = data / clip_range[1]
+        # apply nonlinearity
+        return np.clip(clip_range[1] * (data**value), clip_range[0], clip_range[1])
+
+    def by_table(self, table, nonlinearity_type="gamma", clip_range=[0, 65535]):
+
+        print("----------------------------------------------------")
+        print("Running nonlinearity by table...")
+
+        gamma_table = np.loadtxt(table)
+        gamma_table = clip_range[1] * gamma_table / np.max(gamma_table)
+        linear_table = np.linspace(clip_range[0], clip_range[1], np.size(gamma_table))
+
+        # linear interpolation, query is the self.data
+        if (nonlinearity_type == "gamma"):
+            # mapping is from linear_table to gamma_table
+            return np.clip(np.interp(self.data, linear_table, gamma_table), clip_range[0], clip_range[1])
+        elif (nonlinearity_type == "degamma"):
+            # mapping is from gamma_table to linear_table
+            return np.clip(np.interp(self.data, gamma_table, linear_table), clip_range[0], clip_range[1])
+
+    def by_equation(self, a, b, clip_range):
+
+        print("----------------------------------------------------")
+        print("Running nonlinearity by equation...")
+
+        # clip within the range
+        data = np.clip(self.data, clip_range[0], clip_range[1])
+        # make 0 to 1
+        data = data / clip_range[1]
+
+        # apply nonlinearity
+        return np.clip(clip_range[1] * (a * np.exp(b * data) + data + a * data - a * np.exp(b) * data - a), clip_range[0], clip_range[1])
+
+    def __str__(self):
+        return self.name
+
+
+# =============================================================
+# class: tone_mapping
+#   improve the overall tone of the image
+# =============================================================
+class tone_mapping:
+    def __init__(self, data, name="tone mapping"):
+        self.data = np.float32(data)
+        self.name = name
+
+    def nonlinear_masking(self, strength_multiplier=1.0, gaussian_kernel_size=[5, 5], gaussian_sigma=1.0, clip_range=[0, 65535]):
+        # Objective: improves the overall tone of the image
+        # Inputs:
+        #   strength_multiplier: >0. The higher the more aggressing tone mapping
+        #   gaussian_kernel_size: kernel size for calculating the mask image
+        #   gaussian_sigma: spread of the gaussian kernel for calculating the
+        #                   mask image
+        #
+        # Source:
+        # N. Moroney, “Local color correction using non-linear masking”,
+        # Proc. IS&T/SID 8th Color Imaging Conference, pp. 108-111, (2000)
+        #
+        # Note, Slight changes is carried by mushfiqul alam, specifically
+        # introducing the strength_multiplier
+
+        print("----------------------------------------------------")
+        print("Running tone mapping by non linear masking...")
+
+        # convert to gray image
+        if (np.ndim(self.data) == 3):
+            gray_image = utility.color_conversion(self.data).rgb2gray()
+        else:
+            gray_image = self.data
+
+        # gaussian blur the gray image
+        gaussian_kernel = utility.create_filter().gaussian(gaussian_kernel_size, gaussian_sigma)
+
+        # the mask image:   (1) blur
+        #                   (2) bring within range 0 to 1
+        #                   (3) multiply with strength_multiplier
+        mask = signal.convolve2d(gray_image, gaussian_kernel, mode="same", boundary="symm")
+        mask = strength_multiplier * mask / clip_range[1]
+
+        # calculate the alpha image
+        temp = np.power(0.5, mask)
+        if (np.ndim(self.data) == 3):
+            width, height = utility.helpers(self.data).get_width_height()
+            alpha = np.empty((height, width, 3), dtype=np.float32)
+            alpha[:, :, 0] = temp
+            alpha[:, :, 1] = temp
+            alpha[:, :, 2] = temp
+        else:
+            alpha = temp
+
+        # output
+        return np.clip(clip_range[1] * np.power(self.data/clip_range[1], alpha), clip_range[0], clip_range[1])
+
+    def dynamic_range_compression(self, drc_type="normal", drc_bound=[-40., 260.], clip_range=[0, 65535]):
+
+        ycc = utility.color_conversion(self.data).rgb2ycc("bt601")
+        y = ycc[:, :, 0]
+        cb = ycc[:, :, 1]
+        cr = ycc[:, :, 2]
+
+        if (drc_type == "normal"):
+            edge = y
+        elif (drc_type == "joint"):
+            edge = utility.edge_detection(y).sobel(3, "gradient_magnitude")
+
+        y_bilateral_filtered = utility.special_function(y).bilateral_filter(edge)
+        detail = np.divide(ycc[:, :, 0], y_bilateral_filtered)
+
+        C = drc_bound[0] * clip_range[1] / 255.
+        temp = drc_bound[1] * clip_range[1] / 255.
+        F = (temp * (C + clip_range[1])) / (clip_range[1] * (temp - C))
+        y_bilateral_filtered_contrast_reduced = F * (y_bilateral_filtered - (clip_range[1] / 2.)) + (clip_range[1] / 2.)
+
+        y_out = np.multiply(y_bilateral_filtered_contrast_reduced, detail)
+
+        ycc_out = ycc
+        ycc_out[:, :, 0] = y_out
+        rgb_out = utility.color_conversion(ycc_out).ycc2rgb("bt601")
+
+        return np.clip(rgb_out, clip_range[0], clip_range[1])
+
+
+# =============================================================
+# class: sharpening
+#   sharpens the image
+# =============================================================
+class sharpening:
+    def __init__(self, data, name="sharpening"):
+        self.data = np.float32(data)
+        self.name = name
+
+    def unsharp_masking(self, gaussian_kernel_size=[5, 5], gaussian_sigma=2.0,\
+                        slope=1.5, tau_threshold=0.05, gamma_speed=4., clip_range=[0, 65535]):
+        # Objective: sharpen image
+        # Input:
+        #   gaussian_kernel_size:   dimension of the gaussian blur filter kernel
+        #
+        #   gaussian_sigma:         spread of the gaussian blur filter kernel
+        #                           bigger sigma more sharpening
+        #
+        #   slope:                  controls the boost.
+        #                           the amount of sharpening, higher slope
+        #                           means more aggresssive sharpening
+        #
+        #   tau_threshold:          controls the amount of coring.
+        #                           threshold value till which the image is
+        #                           not sharpened. The lower the value of
+        #                           tau_threshold the more frequencies
+        #                           goes through the sharpening process
+        #
+        #   gamma_speed:            controls the speed of convergence to the slope
+        #                           smaller value gives a little bit more
+        #                           sharpened image, this may be a fine tuner
+
+        print("----------------------------------------------------")
+        print("Running sharpening by unsharp masking...")
+
+        # create gaussian kernel
+        gaussian_kernel = utility.create_filter().gaussian(gaussian_kernel_size, gaussian_sigma)
+
+        # convolove the image with the gaussian kernel
+        # first input is the image
+        # second input is the kernel
+        # output shape will be the same as the first input
+        # boundary will be padded by using symmetrical method while convolving
+        if np.ndim(self.data > 2):
+            image_blur = np.empty(np.shape(self.data), dtype=np.float32)
+            for i in range(0, np.shape(self.data)[2]):
+                image_blur[:, :, i] = signal.convolve2d(self.data[:, :, i], gaussian_kernel, mode="same", boundary="symm")
+        else:
+            image_blur = signal.convolove2d(self.data, gaussian_kernel, mode="same", boundary="symm")
+
+        # the high frequency component image
+        image_high_pass = self.data - image_blur
+
+        # soft coring (see in utility)
+        # basically pass the high pass image via a slightly nonlinear function
+        tau_threshold = tau_threshold * clip_range[1]
+
+        # add the soft cored high pass image to the original and clip
+        # within range and return
+        return np.clip(self.data + utility.special_function(\
+                   image_high_pass).soft_coring(\
+                   slope, tau_threshold, gamma_speed), clip_range[0], clip_range[1])
+
+    def __str__(self):
+        return self.name
+
+
+# =============================================================
+# class: noise_reduction
+#   reduce noise of the nonlinear image (after gamma)
+# =============================================================
+class noise_reduction:
+    def __init__(self, data, clip_range=[0, 65535], name="noise reduction"):
+        self.data = np.float32(data)
+        self.clip_range = clip_range
+        self.name = name
+
+    def sigma_filter(self, neighborhood_size=7, sigma=[6, 6, 6]):
+
+        print("----------------------------------------------------")
+        print("Running noise reduction by sigma filter...")
+
+        if np.ndim(self.data > 2): # if rgb image
+            output = np.empty(np.shape(self.data), dtype=np.float32)
+            for i in range(0, np.shape(self.data)[2]):
+                output[:, :, i] = utility.helpers(self.data[:, :, i]).sigma_filter_helper(neighborhood_size, sigma[i])
+            return np.clip(output, self.clip_range[0], self.clip_range[1])
+        else: # gray image
+            return np.clip(utility.helpers(self.data).sigma_filter_helper(neighborhood_size, sigma), self.clip_range[0], self.clip_range[1])
+
+    def __str__(self):
+        return self.name
+
+
+# =============================================================
+# class: distortion_correction
+#   correct the distortion
+# =============================================================
+class distortion_correction:
+    def __init__(self, data, name="distortion correction"):
+        self.data = np.float32(data)
+        self.name = name
+
+
+    def empirical_correction(self, correction_type="pincushion-1", strength=0.1, zoom_type="crop", clip_range=[0, 65535]):
+        #------------------------------------------------------
+        # Objective:
+        #   correct geometric distortion with the assumption that the distortion
+        #   is symmetric and the center is at the center of of the image
+        # Input:
+        #   correction_type:    which type of correction needed to be carried
+        #                       out, choose one the four:
+        #                       pincushion-1, pincushion-2, barrel-1, barrel-2
+        #                       1 and 2 are difference between the power
+        #                       over the radius
+        #
+        #   strength:           should be equal or greater than 0.
+        #                       0 means no correction will be done.
+        #                       if negative value were applied correction_type
+        #                       will be reversed. Thus,>=0 value expected.
+        #
+        #   zoom_type:          either "fit" or "crop"
+        #                       fit will return image with full content
+        #                       in the whole area
+        #                       crop will return image will 0 values outsise
+        #                       the border
+        #
+        #   clip_range:         to clip the final image within the range
+        #------------------------------------------------------
+
+        if (strength < 0):
+            print("Warning! strength should be equal of greater than 0.")
+            return self.data
+
+        print("----------------------------------------------------")
+        print("Running distortion correction by empirical method...")
+
+        # get half_width and half_height, assume this is the center
+        width, height = utility.helpers(self.data).get_width_height()
+        half_width = width / 2
+        half_height = height / 2
+
+        # create a meshgrid of points
+        xi, yi = np.meshgrid(np.linspace(-half_width, half_width, width),\
+                             np.linspace(-half_height, half_height, height))
+
+        # cartesian to polar coordinate
+        r = np.sqrt(xi**2 + yi**2)
+        theta = np.arctan2(yi, xi)
+
+        # maximum radius
+        R = math.sqrt(width**2 + height**2)
+
+        # make r within range 0~1
+        r = r / R
+
+        # apply the radius to the desired transformation
+        s = utility.special_function(r).distortion_function(correction_type, strength)
+
+        # select a scaling_parameter based on zoon_type and k value
+        if ((correction_type=="barrel-1") or (correction_type=="barrel-2")):
+            if (zoom_type == "fit"):
+                scaling_parameter = r[0, 0] / s[0, 0]
+            elif (zoom_type == "crop"):
+                scaling_parameter = 1. / (1. + strength * (np.min([half_width, half_height])/R)**2)
+        elif ((correction_type=="pincushion-1") or (correction_type=="pincushion-2")):
+            if (zoom_type == "fit"):
+                scaling_parameter = 1. / (1. + strength * (np.min([half_width, half_height])/R)**2)
+            elif (zoom_type == "crop"):
+                scaling_parameter = r[0, 0] / s[0, 0]
+
+        # multiply by scaling_parameter and un-normalize
+        s = s * scaling_parameter * R
+
+        # convert back to cartesian coordinate and add back the center coordinate
+        xt = np.multiply(s, np.cos(theta))
+        yt = np.multiply(s, np.sin(theta))
+
+        # interpolation
+        if np.ndim(self.data == 3):
+
+            output = np.empty(np.shape(self.data), dtype=np.float32)
+
+            output[:, :, 0] = utility.helpers(self.data[:, :, 0]).bilinear_interpolation(xt + half_width, yt + half_height)
+            output[:, :, 1] = utility.helpers(self.data[:, :, 1]).bilinear_interpolation(xt + half_width, yt + half_height)
+            output[:, :, 2] = utility.helpers(self.data[:, :, 2]).bilinear_interpolation(xt + half_width, yt + half_height)
+
+        elif np.ndim(self.data == 2):
+
+            output = utility.helpers(self.data).bilinear_interpolation(xt + half_width, yt + half_height)
+
+        return np.clip(output, clip_range[0], clip_range[1])
+
+
+    def __str__(self):
+        return self.name
+
+
+# =============================================================
+# class: memory_color_enhancement
+#   enhance memory colors such as sky, grass, skin color
+# =============================================================
+class memory_color_enhancement:
+    def __init__(self, data, name="memory color enhancement"):
+        self.data = np.float32(data)
+        self.name = name
+
+    def by_hue_squeeze(self, target_hue, hue_preference, hue_sigma, is_both_side, multiplier, chroma_preference, chroma_sigma, color_space="srgb", illuminant="d65", clip_range=[0, 65535], cie_version="1931"):
+
+        # RGB to xyz
+        data = utility.color_conversion(self.data).rgb2xyz(color_space, clip_range)
+        # xyz to lab
+        data = utility.color_conversion(data).xyz2lab(cie_version, illuminant)
+        # lab to lch
+        data = utility.color_conversion(data).lab2lch()
+
+        # hue squeezing
+        # we are traversing through different color preferences
+        width, height = utility.helpers(self.data).get_width_height()
+        hue_correction = np.zeros((height, width), dtype=np.float32)
+        for i in range(0, np.size(target_hue)):
+
+            delta_hue = data[:, :, 2] - hue_preference[i]
+
+            if is_both_side[i]:
+                weight_temp = np.exp( -np.power(data[:, :, 2] - target_hue[i], 2) / (2 * hue_sigma[i]**2)) + \
+                              np.exp( -np.power(data[:, :, 2] + target_hue[i], 2) / (2 * hue_sigma[i]**2))
+            else:
+                weight_temp = np.exp( -np.power(data[:, :, 2] - target_hue[i], 2) / (2 * hue_sigma[i]**2))
+
+            weight_hue = multiplier[i] * weight_temp / np.max(weight_temp)
+
+            weight_chroma = np.exp( -np.power(data[:, :, 1] - chroma_preference[i], 2) / (2 * chroma_sigma[i]**2))
+
+            hue_correction = hue_correction + np.multiply(np.multiply(delta_hue, weight_hue), weight_chroma)
+
+        # correct the hue
+        data[:, :, 2] = data[:, :, 2] - hue_correction
+
+        # lch to lab
+        data = utility.color_conversion(data).lch2lab()
+        # lab to xyz
+        data = utility.color_conversion(data).lab2xyz(cie_version, illuminant)
+        # xyz to rgb
+        data = utility.color_conversion(data).xyz2rgb(color_space, clip_range)
+
+        return data
+
+
+    def __str__(self):
+        return self.name
+
+
+# =============================================================
+# class: chromatic_aberration_correction
+#   removes artifacts similar to result from chromatic
+#   aberration
+# =============================================================
+class chromatic_aberration_correction:
+    def __init__(self, data, name="chromatic aberration correction"):
+        self.data = np.float32(data)
+        self.name = name
+
+    def purple_fringe_removal(self, nsr_threshold, cr_threshold, clip_range=[0, 65535]):
+        # --------------------------------------------------------------
+        # nsr_threshold: near saturated region threshold (in percentage)
+        # cr_threshold: candidate region threshold
+        # --------------------------------------------------------------
+
+        width, height = utility.helpers(self.data).get_width_height()
+
+        r = self.data[:, :, 0]
+        g = self.data[:, :, 1]
+        b = self.data[:, :, 2]
+
+        ## Detection of purple fringe
+        # near saturated region detection
+        nsr_threshold = clip_range[1] * nsr_threshold / 100
+        temp = (r + g + b) / 3
+        temp = np.asarray(temp)
+        mask = temp > nsr_threshold
+        nsr = np.zeros((height, width)).astype(int)
+        nsr[mask] = 1
+
+        # candidate region detection
+        temp = r - b
+        temp1 = b - g
+        temp = np.asarray(temp)
+        temp1 = np.asarray(temp1)
+        mask = (temp < cr_threshold) & (temp1 > cr_threshold)
+        cr = np.zeros((height, width)).astype(int)
+        cr[mask] = 1
+
+        # quantization
+        qr = utility.helpers(r).nonuniform_quantization()
+        qg = utility.helpers(g).nonuniform_quantization()
+        qb = utility.helpers(b).nonuniform_quantization()
+
+        g_qr = utility.edge_detection(qr).sobel(5, "gradient_magnitude")
+        g_qg = utility.edge_detection(qg).sobel(5, "gradient_magnitude")
+        g_qb = utility.edge_detection(qb).sobel(5, "gradient_magnitude")
+
+        g_qr = np.asarray(g_qr)
+        g_qg = np.asarray(g_qg)
+        g_qb = np.asarray(g_qb)
+
+        # bgm: binary gradient magnitude
+        bgm = np.zeros((height, width), dtype=np.float32)
+        mask = (g_qr != 0) | (g_qg != 0) | (g_qb != 0)
+        bgm[mask] = 1
+
+        fringe_map = np.multiply(np.multiply(nsr, cr), bgm)
+        fring_map = np.asarray(fringe_map)
+        mask = (fringe_map == 1)
+
+        r1 = r
+        g1 = g
+        b1 = b
+        r1[mask] = g1[mask] = b1[mask] = (r[mask] + g[mask] + b[mask]) / 3.
+
+        output = np.empty(np.shape(self.data), dtype=np.float32)
+        output[:, :, 0] = r1
+        output[:, :, 1] = g1
+        output[:, :, 2] = b1
+
+        return np.float32(output)
+
+
+    def __str__(self):
+        return self.name
diff --git a/IIR-Lab/ISP_pipeline/lsc_table_r_gr_gb_b_2.npy b/IIR-Lab/ISP_pipeline/lsc_table_r_gr_gb_b_2.npy
new file mode 100644
index 0000000000000000000000000000000000000000..4e644a21d2d7b4fc4f86854504df101082d4e684
--- /dev/null
+++ b/IIR-Lab/ISP_pipeline/lsc_table_r_gr_gb_b_2.npy
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:0932e4b3ed5feb111880988eadae26a3cc77a5bd448150087b0983d8a62bb2b7
+size 402653312
diff --git a/IIR-Lab/ISP_pipeline/process_pngs_isp.py b/IIR-Lab/ISP_pipeline/process_pngs_isp.py
new file mode 100644
index 0000000000000000000000000000000000000000..23ddd1bb3e041564c5324d8c9065e9f393e7d52b
--- /dev/null
+++ b/IIR-Lab/ISP_pipeline/process_pngs_isp.py
@@ -0,0 +1,276 @@
+import sys
+sys.path.append('ISP_pipeline')
+from raw_prc_pipeline.pipeline import RawProcessingPipelineDemo
+import cv2
+import numpy as np
+import json
+import PIL.Image as Image
+import os,sys
+from raw_prc_pipeline import io
+from copy import deepcopy
+import torch
+
+def resize_using_pil(img, width=1024, height=768):
+    img_pil = Image.fromarray(img)
+    out_size = (width, height)
+    if img_pil.size == out_size:
+        return img
+    out_img = img_pil.resize(out_size, Image.LANCZOS)
+    # out_img = img_pil
+    out_img = np.array(out_img)
+    return out_img
+
+def fix_orientation(image, orientation):
+
+    if type(orientation) is list:
+        orientation = orientation[0]
+
+    if orientation == 'Horizontal(normal)':
+        pass
+    elif orientation == "Mirror horizontal":
+        image = cv2.flip(image, 0)
+    elif orientation == "Rotate 180":
+        image = cv2.rotate(image, cv2.ROTATE_180)
+    elif orientation == "Mirror vertical":
+        image = cv2.flip(image, 1)
+    elif orientation == "Mirror horizontal and rotate 270 CW":
+        image = cv2.flip(image, 0)
+        image = cv2.rotate(image, cv2.ROTATE_90_COUNTERCLOCKWISE)
+    elif orientation == "Rotate 90 CW":
+        image = cv2.rotate(image, cv2.ROTATE_90_CLOCKWISE)
+    elif orientation == "Mirror horizontal and rotate 90 CW":
+        image = cv2.flip(image, 0)
+        image = cv2.rotate(image, cv2.ROTATE_90_CLOCKWISE)
+    elif orientation == "Rotate 270 CW":
+        image = cv2.rotate(image, cv2.ROTATE_90_COUNTERCLOCKWISE)
+
+    return image
+
+def isp_night_imaging(data, meta_data, iso,
+                        do_demosaic = True,  # H/2 W/2
+
+                        do_channel_gain_white_balance = True,
+                        do_xyz_transform = True,
+                        do_srgb_transform = True,
+
+                        do_gamma_correct = True,  # con
+
+                        do_refinement = True,   # 32 bit
+                        do_to_uint8 = True,
+
+                        do_resize_using_pil = True,   # H/8, W/8
+                        do_fix_orientation = True
+                    ):
+    
+    pipeline_params = {
+        'tone_mapping': 'Flash', # options: Flash, Storm, Base, Linear, Drago, Mantiuk, Reinhard
+        'illumination_estimation': 'gw', # ie algorithm, options: "gw", "wp", "sog", "iwp"
+        'denoise_flg': True,
+        'out_landscape_width': 1024,
+        'out_landscape_height': 768,
+        "color_matrix": [ 1.06835938, -0.29882812, -0.14257812, 
+                        -0.43164062,  1.35546875,  0.05078125, 
+                        -0.1015625,   0.24414062,  0.5859375]
+    }
+
+    pipeline_demo = RawProcessingPipelineDemo(**pipeline_params)
+    
+    # ===================================
+    # Demosacing
+    # ===================================
+    if do_demosaic:
+        data = torch.stack((data[0,:,:], (data[1,:,:]+data[2,:,:])/2, data[3,:,:]), dim=0)
+        data = data.permute(1, 2, 0).contiguous()
+        # torch.cuda.empty_cache()
+    else:
+        pass
+    
+    # ===================================
+    # Channel gain for white balance
+    # ===================================
+    if do_channel_gain_white_balance:
+        data = pipeline_demo.white_balance(data, img_meta=meta_data)
+
+    else:
+        pass
+
+    # ===================================
+    # xyz_transform
+    # ===================================
+    if do_xyz_transform:
+        data = pipeline_demo.xyz_transform(data,img_meta=meta_data)   # CCM
+    else:
+        pass
+
+    # ===================================
+    # srgb_transform
+    # ===================================
+    if do_srgb_transform:
+        data = pipeline_demo.srgb_transform(data, img_meta=meta_data)   # fix ccm
+    else:
+        pass
+    
+    # ===================================
+    # gamma_correct
+    # ===================================
+    if do_gamma_correct:
+        data = pipeline_demo.gamma_correct(data, img_meta=meta_data)
+    else:
+        pass
+    
+    # ===================================
+    # refinement
+    # ===================================
+    if do_refinement:
+        if iso < 1000:
+            pth1 = "Rendering_models/low_iso.pth"
+            data = pipeline_demo.do_refinement(data, "csrnet", pth1)
+        else:
+            pth1 = "Rendering_models/high_iso.pth"
+            data = pipeline_demo.do_refinement(data, "csrnet", pth1)
+        torch.cuda.empty_cache()
+        
+    else:
+        pass
+
+    # ===================================
+    # to_uint8
+    # ===================================
+    if do_to_uint8:
+        data = pipeline_demo.to_uint8(data, img_meta=meta_data)
+        torch.cuda.empty_cache()
+    else:
+        pass
+
+    # ===================================
+    # resize_using_pil
+    # ===================================
+    if do_resize_using_pil:
+        data = resize_using_pil(data, pipeline_demo.params["out_landscape_width"], pipeline_demo.params["out_landscape_height"])
+
+    else:
+        pass
+
+    # ===================================
+    # fix_orientation
+    # ===================================
+    if do_fix_orientation:
+        data = fix_orientation(data, meta_data["orientation"])
+    else:
+        pass
+
+    return data
+
+def readjson(json_path,):
+    with open(json_path,'r',encoding='UTF-8') as f:
+        result = json.load(f)
+        
+    return result
+
+def get_smooth_kernel_size(factor):
+        if factor == 1:
+            return (5, 5)
+        elif factor == 0.5:
+            return (3, 3)
+        elif factor == 0.375:
+            return (3, 3)
+        elif factor in [0.2, 0.25]:
+            return (5, 5)
+        elif factor == 0.125:
+            return (7, 7)
+        else:
+            raise Exception('Unknown factor')
+
+def read_rawpng(path, metadata):
+    
+    raw = cv2.imread(str(path), cv2.IMREAD_UNCHANGED)
+
+    if raw.shape[0] == 4:
+        return raw * 959
+    raw = (raw.astype(np.float32) - 256.) / (4095.- 256.)
+    
+    raw = bayer2raw(raw, metadata)
+    raw = np.clip(raw, 0., 1.)
+    return raw
+
+def bayer2raw(raw, metadata):
+    # pack RGGB Bayer raw to 4 channels
+    H, W = raw.shape
+    raw = raw[None, ...]
+    if metadata['cfa_pattern'][0] == 0:
+        # RGGB
+        raw_pack = np.concatenate((raw[:, 0:H:2, 0:W:2],
+                                raw[:, 0:H:2, 1:W:2],
+                                raw[:, 1:H:2, 0:W:2],
+                                raw[:, 1:H:2, 1:W:2]), axis=0)
+    else :
+        # BGGR
+        raw_pack = np.concatenate((raw[:, 1:H:2, 1:W:2],
+                                raw[:, 0:H:2, 1:W:2],
+                                raw[:, 1:H:2, 0:W:2],
+                                raw[:, 0:H:2, 0:W:2]), axis=0)
+    return raw_pack
+
+def raw_rggb_float32(raws):
+    # depack 4 channels raw to RGGB Bayer
+    C, H, W = raws.shape
+    output = np.zeros((H * 2, W * 2)).astype(np.float32)
+
+    output[0:2 * H:2, 0:2 * W:2] = raws[0:1, :, :]
+    output[0:2 * H:2, 1:2 * W:2] = raws[1:2, :, :]
+    output[1:2 * H:2, 0:2 * W:2] = raws[2:3, :, :]
+    output[1:2 * H:2, 1:2 * W:2] = raws[3:4, :, :]
+
+    return output
+
+def json_read(pth):
+    with open(pth) as j:
+        data = json.load(j)
+    return data
+
+def linear_insert_1color(img_dt, resize, fx=128, fy=128):
+    pos_0_0, pos_0_1, pos_1_1, pos_1_0, m, n = insert_linear_pos(img_dt=img_dt, resize=resize, x_scale=fx, y_scale=fy)
+    a = (pos_1_0 - pos_0_0)
+    b = (pos_0_1 - pos_0_0)
+    c = pos_1_1 + pos_0_0 - pos_1_0 - pos_0_1
+    return np.round(a * n + b * m + c * n * m + pos_0_0).astype(int)
+
+def insert_linear_pos(img_dt, resize, x_scale=128, y_scale=128):
+    m_, n_ = img_dt.shape
+    # 获取新的图像的大小
+    if resize is None:
+        n_new, m_new  =  np.round(x_scale * n_).astype(int), np.round(y_scale * m_).astype(int)
+    else:
+        n_new, m_new  = resize
+
+    n_scale, m_scale = n_ / n_new, m_ / m_new # src_with/dst_with, Src_height/dst_heaight
+    # 一、获取位置对应的四个点
+    # 1-1- 初始化位置
+    m_indxs = np.repeat(np.arange(m_new), n_new).reshape(m_new, n_new)
+    n_indxs = np.array(list(range(n_new))*m_new).reshape(m_new, n_new)
+    # 1-2- 初始化位置
+    m_indxs_c = (m_indxs + 0.5 ) * m_scale - 0.5
+    n_indxs_c = (n_indxs + 0.5 ) * n_scale - 0.5
+    ### 将小于零的数处理成0 
+    m_indxs_c[np.where(m_indxs_c < 0)] = 0.0
+    n_indxs_c[np.where(n_indxs_c < 0)] = 0.0
+
+    # 1-3 获取正方形顶点坐标
+    m_indxs_c_down = m_indxs_c.astype(int)
+    n_indxs_c_down = n_indxs_c.astype(int)
+    m_indxs_c_up = m_indxs_c_down + 1
+    n_indxs_c_up = n_indxs_c_down + 1
+    ### 溢出部分修正
+    m_max = m_ - 1
+    n_max = n_ - 1
+    m_indxs_c_up[np.where(m_indxs_c_up > m_max)] = m_max
+    n_indxs_c_up[np.where(n_indxs_c_up > n_max)] = n_max
+
+    # 1-4 获取正方形四个顶点的位置
+    pos_0_0 = img_dt[m_indxs_c_down, n_indxs_c_down].astype(int)
+    pos_0_1 = img_dt[m_indxs_c_up, n_indxs_c_down].astype(int)
+    pos_1_1 = img_dt[m_indxs_c_up, n_indxs_c_up].astype(int)
+    pos_1_0 = img_dt[m_indxs_c_down, n_indxs_c_up].astype(int)
+    # 1-5 获取浮点位置
+    m, n = np.modf(m_indxs_c)[0], np.modf(n_indxs_c)[0]
+    return pos_0_0, pos_0_1, pos_1_1, pos_1_0, m, n
diff --git a/IIR-Lab/ISP_pipeline/raw_prc_pipeline/__init__.py b/IIR-Lab/ISP_pipeline/raw_prc_pipeline/__init__.py
new file mode 100644
index 0000000000000000000000000000000000000000..145944fea100721d2745c2584ec7557df753950f
--- /dev/null
+++ b/IIR-Lab/ISP_pipeline/raw_prc_pipeline/__init__.py
@@ -0,0 +1,3 @@
+expected_img_ext = '.jpg'
+expected_landscape_img_height = 866
+expected_landscape_img_width = 1300
\ No newline at end of file
diff --git a/IIR-Lab/ISP_pipeline/raw_prc_pipeline/__pycache__/__init__.cpython-312.pyc b/IIR-Lab/ISP_pipeline/raw_prc_pipeline/__pycache__/__init__.cpython-312.pyc
new file mode 100644
index 0000000000000000000000000000000000000000..fbe18bd8d1fd89d1d68b3481740d41ed88a008f8
Binary files /dev/null and b/IIR-Lab/ISP_pipeline/raw_prc_pipeline/__pycache__/__init__.cpython-312.pyc differ
diff --git a/IIR-Lab/ISP_pipeline/raw_prc_pipeline/__pycache__/__init__.cpython-39.pyc b/IIR-Lab/ISP_pipeline/raw_prc_pipeline/__pycache__/__init__.cpython-39.pyc
new file mode 100644
index 0000000000000000000000000000000000000000..5c2191acdb9f99b93c69cece093f852919acc5e8
Binary files /dev/null and b/IIR-Lab/ISP_pipeline/raw_prc_pipeline/__pycache__/__init__.cpython-39.pyc differ
diff --git a/IIR-Lab/ISP_pipeline/raw_prc_pipeline/__pycache__/arch_util.cpython-312.pyc b/IIR-Lab/ISP_pipeline/raw_prc_pipeline/__pycache__/arch_util.cpython-312.pyc
new file mode 100644
index 0000000000000000000000000000000000000000..5d982291defc518df78fa3a7cd7444336838d779
Binary files /dev/null and b/IIR-Lab/ISP_pipeline/raw_prc_pipeline/__pycache__/arch_util.cpython-312.pyc differ
diff --git a/IIR-Lab/ISP_pipeline/raw_prc_pipeline/__pycache__/color.cpython-312.pyc b/IIR-Lab/ISP_pipeline/raw_prc_pipeline/__pycache__/color.cpython-312.pyc
new file mode 100644
index 0000000000000000000000000000000000000000..7f6a873959f5780e4fe729c842c373af142011bf
Binary files /dev/null and b/IIR-Lab/ISP_pipeline/raw_prc_pipeline/__pycache__/color.cpython-312.pyc differ
diff --git a/IIR-Lab/ISP_pipeline/raw_prc_pipeline/__pycache__/color.cpython-39.pyc b/IIR-Lab/ISP_pipeline/raw_prc_pipeline/__pycache__/color.cpython-39.pyc
new file mode 100644
index 0000000000000000000000000000000000000000..2c3a0741f0dfd7cc30b97afa41fe1c6bbce40eb9
Binary files /dev/null and b/IIR-Lab/ISP_pipeline/raw_prc_pipeline/__pycache__/color.cpython-39.pyc differ
diff --git a/IIR-Lab/ISP_pipeline/raw_prc_pipeline/__pycache__/csrnet_network.cpython-312.pyc b/IIR-Lab/ISP_pipeline/raw_prc_pipeline/__pycache__/csrnet_network.cpython-312.pyc
new file mode 100644
index 0000000000000000000000000000000000000000..15d4f02699b6a967b87e696c4c0f2e7b38c15a32
Binary files /dev/null and b/IIR-Lab/ISP_pipeline/raw_prc_pipeline/__pycache__/csrnet_network.cpython-312.pyc differ
diff --git a/IIR-Lab/ISP_pipeline/raw_prc_pipeline/__pycache__/csrnet_network.cpython-39.pyc b/IIR-Lab/ISP_pipeline/raw_prc_pipeline/__pycache__/csrnet_network.cpython-39.pyc
new file mode 100644
index 0000000000000000000000000000000000000000..bf60e0892388f6cd2a5c2dc06b9dc86381cab320
Binary files /dev/null and b/IIR-Lab/ISP_pipeline/raw_prc_pipeline/__pycache__/csrnet_network.cpython-39.pyc differ
diff --git a/IIR-Lab/ISP_pipeline/raw_prc_pipeline/__pycache__/exif_data_formats.cpython-312.pyc b/IIR-Lab/ISP_pipeline/raw_prc_pipeline/__pycache__/exif_data_formats.cpython-312.pyc
new file mode 100644
index 0000000000000000000000000000000000000000..68dd5004acdd82dd1a2e078ac4b2d22fb3742141
Binary files /dev/null and b/IIR-Lab/ISP_pipeline/raw_prc_pipeline/__pycache__/exif_data_formats.cpython-312.pyc differ
diff --git a/IIR-Lab/ISP_pipeline/raw_prc_pipeline/__pycache__/exif_data_formats.cpython-39.pyc b/IIR-Lab/ISP_pipeline/raw_prc_pipeline/__pycache__/exif_data_formats.cpython-39.pyc
new file mode 100644
index 0000000000000000000000000000000000000000..54b2d49f26a6ec079f98fa71971895cf480d4723
Binary files /dev/null and b/IIR-Lab/ISP_pipeline/raw_prc_pipeline/__pycache__/exif_data_formats.cpython-39.pyc differ
diff --git a/IIR-Lab/ISP_pipeline/raw_prc_pipeline/__pycache__/exif_utils.cpython-312.pyc b/IIR-Lab/ISP_pipeline/raw_prc_pipeline/__pycache__/exif_utils.cpython-312.pyc
new file mode 100644
index 0000000000000000000000000000000000000000..5a8112462b25275c6be2a2ad64f409453a41ab46
Binary files /dev/null and b/IIR-Lab/ISP_pipeline/raw_prc_pipeline/__pycache__/exif_utils.cpython-312.pyc differ
diff --git a/IIR-Lab/ISP_pipeline/raw_prc_pipeline/__pycache__/exif_utils.cpython-39.pyc b/IIR-Lab/ISP_pipeline/raw_prc_pipeline/__pycache__/exif_utils.cpython-39.pyc
new file mode 100644
index 0000000000000000000000000000000000000000..9628c90d86a561cc416d356a7a207a8a3aeb83ff
Binary files /dev/null and b/IIR-Lab/ISP_pipeline/raw_prc_pipeline/__pycache__/exif_utils.cpython-39.pyc differ
diff --git a/IIR-Lab/ISP_pipeline/raw_prc_pipeline/__pycache__/fs.cpython-312.pyc b/IIR-Lab/ISP_pipeline/raw_prc_pipeline/__pycache__/fs.cpython-312.pyc
new file mode 100644
index 0000000000000000000000000000000000000000..a3c6975dce23c2cfc8f450e31129c8216d14ad2f
Binary files /dev/null and b/IIR-Lab/ISP_pipeline/raw_prc_pipeline/__pycache__/fs.cpython-312.pyc differ
diff --git a/IIR-Lab/ISP_pipeline/raw_prc_pipeline/__pycache__/fs.cpython-39.pyc b/IIR-Lab/ISP_pipeline/raw_prc_pipeline/__pycache__/fs.cpython-39.pyc
new file mode 100644
index 0000000000000000000000000000000000000000..d48459227043bbeacec46c3efb1811cb61e5a020
Binary files /dev/null and b/IIR-Lab/ISP_pipeline/raw_prc_pipeline/__pycache__/fs.cpython-39.pyc differ
diff --git a/IIR-Lab/ISP_pipeline/raw_prc_pipeline/__pycache__/io.cpython-312.pyc b/IIR-Lab/ISP_pipeline/raw_prc_pipeline/__pycache__/io.cpython-312.pyc
new file mode 100644
index 0000000000000000000000000000000000000000..f8e338eee14957aad749860343ea75a5ceb1f708
Binary files /dev/null and b/IIR-Lab/ISP_pipeline/raw_prc_pipeline/__pycache__/io.cpython-312.pyc differ
diff --git a/IIR-Lab/ISP_pipeline/raw_prc_pipeline/__pycache__/io.cpython-39.pyc b/IIR-Lab/ISP_pipeline/raw_prc_pipeline/__pycache__/io.cpython-39.pyc
new file mode 100644
index 0000000000000000000000000000000000000000..cf3690caa1168b5531834ceb99eac2b30024df17
Binary files /dev/null and b/IIR-Lab/ISP_pipeline/raw_prc_pipeline/__pycache__/io.cpython-39.pyc differ
diff --git a/IIR-Lab/ISP_pipeline/raw_prc_pipeline/__pycache__/lut_network.cpython-312.pyc b/IIR-Lab/ISP_pipeline/raw_prc_pipeline/__pycache__/lut_network.cpython-312.pyc
new file mode 100644
index 0000000000000000000000000000000000000000..33b62ba43af43152c5c28551108a489598c24603
Binary files /dev/null and b/IIR-Lab/ISP_pipeline/raw_prc_pipeline/__pycache__/lut_network.cpython-312.pyc differ
diff --git a/IIR-Lab/ISP_pipeline/raw_prc_pipeline/__pycache__/misc.cpython-312.pyc b/IIR-Lab/ISP_pipeline/raw_prc_pipeline/__pycache__/misc.cpython-312.pyc
new file mode 100644
index 0000000000000000000000000000000000000000..6b6c965995ebd6f3197aceb83053df276da1148f
Binary files /dev/null and b/IIR-Lab/ISP_pipeline/raw_prc_pipeline/__pycache__/misc.cpython-312.pyc differ
diff --git a/IIR-Lab/ISP_pipeline/raw_prc_pipeline/__pycache__/misc.cpython-39.pyc b/IIR-Lab/ISP_pipeline/raw_prc_pipeline/__pycache__/misc.cpython-39.pyc
new file mode 100644
index 0000000000000000000000000000000000000000..9c32161d29dbb5e15723bfea06f6008f76fc2e51
Binary files /dev/null and b/IIR-Lab/ISP_pipeline/raw_prc_pipeline/__pycache__/misc.cpython-39.pyc differ
diff --git a/IIR-Lab/ISP_pipeline/raw_prc_pipeline/__pycache__/optim.cpython-312.pyc b/IIR-Lab/ISP_pipeline/raw_prc_pipeline/__pycache__/optim.cpython-312.pyc
new file mode 100644
index 0000000000000000000000000000000000000000..d704966e579136719f38e6ff50b28470da8475ec
Binary files /dev/null and b/IIR-Lab/ISP_pipeline/raw_prc_pipeline/__pycache__/optim.cpython-312.pyc differ
diff --git a/IIR-Lab/ISP_pipeline/raw_prc_pipeline/__pycache__/optim.cpython-39.pyc b/IIR-Lab/ISP_pipeline/raw_prc_pipeline/__pycache__/optim.cpython-39.pyc
new file mode 100644
index 0000000000000000000000000000000000000000..a01a80b1260b20527116b4dbdf60c01ff54e72d3
Binary files /dev/null and b/IIR-Lab/ISP_pipeline/raw_prc_pipeline/__pycache__/optim.cpython-39.pyc differ
diff --git a/IIR-Lab/ISP_pipeline/raw_prc_pipeline/__pycache__/pipeline.cpython-312.pyc b/IIR-Lab/ISP_pipeline/raw_prc_pipeline/__pycache__/pipeline.cpython-312.pyc
new file mode 100644
index 0000000000000000000000000000000000000000..247a3da50412ffad85a2d0f710b2fc27813e2686
Binary files /dev/null and b/IIR-Lab/ISP_pipeline/raw_prc_pipeline/__pycache__/pipeline.cpython-312.pyc differ
diff --git a/IIR-Lab/ISP_pipeline/raw_prc_pipeline/__pycache__/pipeline.cpython-39.pyc b/IIR-Lab/ISP_pipeline/raw_prc_pipeline/__pycache__/pipeline.cpython-39.pyc
new file mode 100644
index 0000000000000000000000000000000000000000..cbe8427e47b064bc745f2bdfa63403a5c2d0c8aa
Binary files /dev/null and b/IIR-Lab/ISP_pipeline/raw_prc_pipeline/__pycache__/pipeline.cpython-39.pyc differ
diff --git a/IIR-Lab/ISP_pipeline/raw_prc_pipeline/__pycache__/pipeline_bm3d.cpython-312.pyc b/IIR-Lab/ISP_pipeline/raw_prc_pipeline/__pycache__/pipeline_bm3d.cpython-312.pyc
new file mode 100644
index 0000000000000000000000000000000000000000..16a24c409b9490abbdd09e398c979ed21bf96b1a
Binary files /dev/null and b/IIR-Lab/ISP_pipeline/raw_prc_pipeline/__pycache__/pipeline_bm3d.cpython-312.pyc differ
diff --git a/IIR-Lab/ISP_pipeline/raw_prc_pipeline/__pycache__/pipeline_bm3d.cpython-39.pyc b/IIR-Lab/ISP_pipeline/raw_prc_pipeline/__pycache__/pipeline_bm3d.cpython-39.pyc
new file mode 100644
index 0000000000000000000000000000000000000000..a67dcc4c1bf1dd19c90e3f7065f81f4124ffb2b1
Binary files /dev/null and b/IIR-Lab/ISP_pipeline/raw_prc_pipeline/__pycache__/pipeline_bm3d.cpython-39.pyc differ
diff --git a/IIR-Lab/ISP_pipeline/raw_prc_pipeline/__pycache__/pipeline_utils.cpython-312.pyc b/IIR-Lab/ISP_pipeline/raw_prc_pipeline/__pycache__/pipeline_utils.cpython-312.pyc
new file mode 100644
index 0000000000000000000000000000000000000000..cb2273bd40e7f1ea4b87d32cdb3c3b849d9597fa
Binary files /dev/null and b/IIR-Lab/ISP_pipeline/raw_prc_pipeline/__pycache__/pipeline_utils.cpython-312.pyc differ
diff --git a/IIR-Lab/ISP_pipeline/raw_prc_pipeline/__pycache__/pipeline_utils.cpython-39.pyc b/IIR-Lab/ISP_pipeline/raw_prc_pipeline/__pycache__/pipeline_utils.cpython-39.pyc
new file mode 100644
index 0000000000000000000000000000000000000000..f8793a15091901e79e587d9903da63cce922c56c
Binary files /dev/null and b/IIR-Lab/ISP_pipeline/raw_prc_pipeline/__pycache__/pipeline_utils.cpython-39.pyc differ
diff --git a/IIR-Lab/ISP_pipeline/raw_prc_pipeline/__pycache__/refine_network.cpython-312.pyc b/IIR-Lab/ISP_pipeline/raw_prc_pipeline/__pycache__/refine_network.cpython-312.pyc
new file mode 100644
index 0000000000000000000000000000000000000000..f2c93de8e68e9783ab30f49da50d1933563e834b
Binary files /dev/null and b/IIR-Lab/ISP_pipeline/raw_prc_pipeline/__pycache__/refine_network.cpython-312.pyc differ
diff --git a/IIR-Lab/ISP_pipeline/raw_prc_pipeline/arch_util.py b/IIR-Lab/ISP_pipeline/raw_prc_pipeline/arch_util.py
new file mode 100644
index 0000000000000000000000000000000000000000..3b0480f800f3ca33720886abbca6841c796c3b9c
--- /dev/null
+++ b/IIR-Lab/ISP_pipeline/raw_prc_pipeline/arch_util.py
@@ -0,0 +1,626 @@
+import torch
+import torch.nn as nn
+import torch.nn.init as init
+import torch.nn.functional as F
+
+
+def initialize_weights(net_l, scale=1):
+    if not isinstance(net_l, list):
+        net_l = [net_l]
+    for net in net_l:
+        for m in net.modules():
+            if isinstance(m, nn.Conv2d):
+                init.kaiming_normal_(m.weight, a=0, mode='fan_in')
+                m.weight.data *= scale  # for residual block
+                if m.bias is not None:
+                    m.bias.data.zero_()
+            elif isinstance(m, nn.Linear):
+                init.kaiming_normal_(m.weight, a=0, mode='fan_in')
+                m.weight.data *= scale
+                if m.bias is not None:
+                    m.bias.data.zero_()
+            elif isinstance(m, nn.BatchNorm2d):
+                init.constant_(m.weight, 1)
+                init.constant_(m.bias.data, 0.0)
+
+
+def make_layer(block, n_layers):
+    layers = []
+    for _ in range(n_layers):
+        layers.append(block())
+    return nn.Sequential(*layers)
+
+
+class ResidualBlock_noBN(nn.Module):
+    '''Residual block w/o BN
+    ---Conv-ReLU-Conv-+-
+     |________________|
+    '''
+
+    def __init__(self, nf=64):
+        super(ResidualBlock_noBN, self).__init__()
+        self.conv1 = nn.Conv2d(nf, nf, 3, 1, 1, bias=True)
+        self.conv2 = nn.Conv2d(nf, nf, 3, 1, 1, bias=True)
+
+        # initialization
+        initialize_weights([self.conv1, self.conv2], 0.1)
+
+    def forward(self, x):
+        identity = x
+        out = F.relu(self.conv1(x), inplace=True)
+        out = self.conv2(out)
+        return identity + out
+
+
+def flow_warp(x, flow, interp_mode='bilinear', padding_mode='zeros'):
+    """Warp an image or feature map with optical flow
+    Args:
+        x (Tensor): size (N, C, H, W)
+        flow (Tensor): size (N, H, W, 2), normal value
+        interp_mode (str): 'nearest' or 'bilinear'
+        padding_mode (str): 'zeros' or 'border' or 'reflection'
+
+    Returns:
+        Tensor: warped image or feature map
+    """
+    assert x.size()[-2:] == flow.size()[1:3]
+    B, C, H, W = x.size()
+    # mesh grid
+    grid_y, grid_x = torch.meshgrid(torch.arange(0, H), torch.arange(0, W))
+    grid = torch.stack((grid_x, grid_y), 2).float()  # W(x), H(y), 2
+    grid.requires_grad = False
+    grid = grid.type_as(x)
+    vgrid = grid + flow
+    # scale grid to [-1,1]
+    vgrid_x = 2.0 * vgrid[:, :, :, 0] / max(W - 1, 1) - 1.0
+    vgrid_y = 2.0 * vgrid[:, :, :, 1] / max(H - 1, 1) - 1.0
+    vgrid_scaled = torch.stack((vgrid_x, vgrid_y), dim=3)
+    output = F.grid_sample(x, vgrid_scaled, mode=interp_mode, padding_mode=padding_mode)
+    return output
+
+"""
+Copyright (c) 2022 Samsung Electronics Co., Ltd.
+
+Author(s):
+Luxi Zhao (lucy.zhao@samsung.com; lucyzhao.zlx@gmail.com)
+Abdelrahman Abdelhamed (abdoukamel@gmail.com)
+
+Licensed under the Creative Commons Attribution-NonCommercial-ShareAlike 4.0 International (CC BY-NC-SA 4.0) License, (the "License");
+you may not use this file except in compliance with the License.
+You may obtain a copy of the License at https://creativecommons.org/licenses/by-nc-sa/4.0
+Unless required by applicable law or agreed to in writing, software distributed under the License is distributed on an
+"AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+See the License for the specific language governing permissions and limitations under the License.
+For conditions of distribution and use, see the accompanying LICENSE.md file.
+
+"""
+
+import numpy as np
+import torch
+import torch.nn as nn
+import torch.nn.functional as F
+def utils_get_image_stats(image_shape, grid_size):
+    """
+    Information about the cropped image.
+    :return: grid size, tile size, sizes of the 4 margins, meshgrids.
+    """
+
+    grid_rows = grid_size[0]
+    grid_cols = grid_size[1]
+
+    residual_height = image_shape[0] % grid_rows
+    residual_width = image_shape[1] % grid_cols
+
+    tile_height = image_shape[0] // grid_rows
+    tile_width = image_shape[1] // grid_cols
+
+    margin_top = tile_height // 2
+    margin_left = tile_width // 2
+
+    margin_bot = tile_height + residual_height - margin_top
+    margin_right = tile_width + residual_width - margin_left
+
+    return tile_height, tile_width, margin_top, margin_left, margin_bot, margin_right
+
+def apply_ltm_lut(imgs, luts):
+
+        imgs = (imgs - .5) * 2.
+
+        grids = imgs.unsqueeze(0).unsqueeze(0)
+        luts = luts.unsqueeze(0)
+
+        outs = F.grid_sample(luts, grids,
+        mode='bilinear', padding_mode='border', align_corners=True)
+
+        return outs.squeeze(0).squeeze(1).permute(1,2,0)
+
+
+def apply_ltm(image, tone_curve, num_curves):
+    """
+    Apply tone curve to an image (patch).
+    :param image: (h, w, 3) if num_curves == 3, else (h, w)
+    :param tone_curve: (num_curves, control_points)
+    :param num_curves: 3 for 1 curve per channel, 1 for 1 curve for all channels.
+    :return: tone-mapped image.
+    """
+    
+    if image.shape[-1] == 3:
+        if type(image) == np.ndarray:
+            r = tone_curve[0][image[..., 0]]
+            g = tone_curve[1][image[..., 1]]
+            b = tone_curve[2][image[..., 2]]
+            new_image = np.stack((r, g, b), axis=-1)
+        else:
+            r = tone_curve[0][image[..., 0].reshape(-1).long()].reshape(image[..., 0].shape)
+            g = tone_curve[1][image[..., 1].reshape(-1).long()].reshape(image[..., 1].shape)
+            b = tone_curve[2][image[..., 2].reshape(-1).long()].reshape(image[..., 2].shape)
+            new_image = torch.stack((r, g, b), axis=-1)
+            # new_image = np.stack((r, g, b), axis=-1)
+    else:
+        new_image = tone_curve[0][image[..., 0].reshape(-1).long()].reshape(image[..., 0].shape).unsqueeze(dim=2)
+        #tone_curve[0][image[..., 0].reshape(-1).long()].reshape(image[..., 0].shape)
+    
+    return new_image
+
+
+def apply_gtm(image, tone_curve, num_curves):
+    """
+    Apply a single tone curve to an image.
+    :param image: (h, w, 3) if num_curves == 3, else (h, w)
+    :param tone_curve: (1, num_curves, control_points)
+    :param num_curves: 3 for 1 curve per channel, 1 for 1 curve for all channels.
+    :return: tone-mapped image.
+    """
+    tone_curve = tone_curve[0]
+    out = apply_ltm(image, tone_curve, num_curves)
+    return out
+
+
+def apply_ltm_center(image, tone_curves, stats, num_curves):
+    """
+    Apply tone curves to the center region of an image.
+    :param image: the original image.
+    :param tone_curves: a list of all tone curves in row scan order.
+    :return: interpolated center region of an image.
+    """
+    grid_rows, grid_cols, tile_height, tile_width, margin_top, margin_left, margin_bot, margin_right, meshgrids = stats
+    xs_tl, ys_tl, xs_br, ys_br = meshgrids['center']
+
+    if torch.cuda.is_available():
+        device = torch.device("cuda")
+    else:
+        device = torch.device("cpu")
+    xs_tl = xs_tl.to(device)
+    ys_tl = ys_tl.to(device)
+    xs_br = xs_br.to(device)
+    ys_br = ys_br.to(device)
+
+
+    # Get neighbourhoods
+    neighbourhoods = []
+    for y in range(margin_top, image.shape[0]-margin_bot, tile_height):
+        for x in range(margin_left, image.shape[1]-margin_right, tile_width):
+            neighbourhoods.append(image[y:y + tile_height, x:x + tile_width, :])
+
+    assert len(neighbourhoods) == (grid_rows-1) * (grid_cols-1)
+
+    # Get indices for all 4-tile neighbourhoods
+    tile_ids = []
+    for i in range(grid_rows - 1):
+        for j in range(grid_cols - 1):
+            start = i * grid_cols + j
+            tile_ids.append([start, start + 1, start + grid_cols, start + grid_cols + 1])
+
+    # Apply LTM and interpolate
+    new_ns = []
+    for i, n in enumerate(neighbourhoods):
+        n_tile_ids = tile_ids[i]  # ids of the 4 tone curves (tiles) of the neighbourhood
+        # n_4versions = [apply_ltm(n, tone_curves[j], num_curves) for j in n_tile_ids]  # tl, tr, bl, br
+        n_4versions = [apply_ltm_lut(n, tone_curves[j])for j in n_tile_ids]
+        out = ys_br * xs_br * n_4versions[0] + ys_br * xs_tl * n_4versions[1] + ys_tl * xs_br * n_4versions[2] + ys_tl * xs_tl * n_4versions[3]
+        out /= (tile_height-1) * (tile_width-1)
+
+        new_ns.append(out)
+
+    # Stack the interpolated neighbourhoods together
+    rows = []
+    for i in range(grid_rows - 1):
+        cols = [new_ns[i * (grid_cols - 1) + j] for j in range(grid_cols - 1)]
+        row = torch.cat(cols, dim=1)
+        rows.append(row)
+    out = torch.cat(rows, dim=0)
+    return out
+
+
+def apply_ltm_border(image, tone_curves, stats, num_curves=3):
+    """
+    Apply tone curves to the border, not including corner areas.
+    :param image: the original image.
+    :param tone_curves: a list of all tone curves in row scan order.
+    :return: interpolated border regions of the image. In order of top, bottom, left, right.
+    """
+    grid_rows, grid_cols, tile_height, tile_width, margin_top, margin_left, margin_bot, margin_right, meshgrids = stats
+    (top_xs_l, top_xs_r), (bot_xs_l, bot_xs_r), (left_ys_t, left_ys_b), (right_ys_t, right_ys_b) = meshgrids['border']
+
+    if torch.cuda.is_available():
+        device = torch.device("cuda")
+    else:
+        device = torch.device("cpu")
+
+    top_xs_l = top_xs_l.to(device) 
+    top_xs_r = top_xs_r.to(device)
+    bot_xs_l = bot_xs_l.to(device)
+    bot_xs_r = bot_xs_r.to(device) 
+
+    left_ys_t = left_ys_t.to(device)
+    left_ys_b = left_ys_b.to(device)  
+    right_ys_t = right_ys_t.to(device)
+    right_ys_b = right_ys_b.to(device)
+    # top, bottom, left, right neighbourhoods to be interpolated
+    ntop = []
+    nbot = []
+    nleft = []
+    nright = []
+
+    for x in range(margin_left, image.shape[1] - margin_right, tile_width):
+        ntop.append(image[:margin_top, x:x + tile_width, :])
+        nbot.append(image[-margin_bot:, x:x + tile_width, :])
+
+    for y in range(margin_top, image.shape[0] - margin_bot, tile_height):
+        nleft.append(image[y:y + tile_height, :margin_left, :])
+        nright.append(image[y:y + tile_height, -margin_right:, :])
+
+    def apply_ltm_two_tiles(tc1, tc2, meshgrid1, meshgrid2, nbhd, interp_length, num_curves):
+        """
+        Apply tone curve to, and interpolate a two-tile neighbourhood, either horizontal or vertical
+        :param tc1: left / top tone curves
+        :param tc2: right / bottom tone curves
+        :param meshgrid1: left / top meshgrids (leftmost / topmost positions are 0)
+        :param meshgrid2: right / bottom meshgrids (rightmost / bottommost positions are 0)
+        :param nbhd: neighbourhood to interpolate
+        :param interp_length: normalizing factor of the meshgrid.
+               Example: if xs = np.meshgrid(np.arange(10)), then interp_length = 9
+        :return: interpolated neighbourhood
+        """
+
+        # new_nbhd1 = apply_ltm(nbhd, tc1, num_curves)
+        # new_nbhd2 = apply_ltm(nbhd, tc2, num_curves)
+
+        new_nbhd1 = apply_ltm_lut(nbhd, tc1)
+        new_nbhd2 = apply_ltm_lut(nbhd, tc2)
+
+        out = meshgrid1 * new_nbhd2 + meshgrid2 * new_nbhd1
+        out /= interp_length
+        return out
+
+    new_ntop = [apply_ltm_two_tiles(tone_curves[i],  # left tone curve
+                                    tone_curves[i + 1],  # right tone curve
+                                    top_xs_l, top_xs_r,
+                                    n, tile_width - 1, num_curves) for i, n in enumerate(ntop)]
+
+    new_nbot = [apply_ltm_two_tiles(tone_curves[(grid_rows - 1) * grid_cols + i],  # left tone curve
+                                    tone_curves[(grid_rows - 1) * grid_cols + i + 1],  # right tone curve
+                                    bot_xs_l, bot_xs_r,
+                                    n, tile_width - 1, num_curves) for i, n in enumerate(nbot)]
+
+    new_nleft = [apply_ltm_two_tiles(tone_curves[i * grid_cols],  # top tone curve
+                                     tone_curves[(i + 1) * grid_cols],  # bottom tone curve
+                                     left_ys_t, left_ys_b,
+                                     n, tile_height - 1, num_curves) for i, n in enumerate(nleft)]
+
+    new_nright = [apply_ltm_two_tiles(tone_curves[(i + 1) * grid_cols - 1],  # top tone curve
+                                      tone_curves[(i + 2) * grid_cols - 1],  # bottom tone curve
+                                      right_ys_t, right_ys_b,
+                                      n, tile_height - 1, num_curves) for i, n in enumerate(nright)]
+
+    new_ntop = torch.cat(new_ntop, dim=1)
+    new_nbot = torch.cat(new_nbot, dim=1)
+    new_nleft = torch.cat(new_nleft, dim=0)
+    new_nright = torch.cat(new_nright, dim=0)
+    return new_ntop, new_nbot, new_nleft, new_nright
+
+
+def apply_ltm_corner(image, tone_curves, stats, num_curves=3):
+    """
+    tone_curves: a list of all tone curves in row scan order.
+    return: interpolated corner tiles in the order of top left, top right, bot left, bot right
+    """
+    grid_rows, grid_cols, tile_height, tile_width, margin_top, margin_left, margin_bot, margin_right, _ = stats
+
+    corner_ids = [0, grid_cols - 1, -grid_rows, -1]
+    tl_tile = image[:margin_top, :margin_left]
+    tr_tile = image[:margin_top, -margin_right:]
+    bl_tile = image[-margin_bot:, :margin_left]
+    br_tile = image[-margin_bot:, -margin_right:]
+
+    corner_tiles = [tl_tile, tr_tile, bl_tile, br_tile]
+    corner_tcs = [tone_curves[i] for i in corner_ids]  # tcs: (grid_size, num_curves, control_points)
+    #new_tiles = [apply_ltm(corner_tiles[i], corner_tcs[i], num_curves) for i in range(len(corner_tcs))]
+    new_tiles = [apply_ltm_lut(corner_tiles[i],corner_tcs[i]) for i in range(len(corner_tcs))]
+
+    return new_tiles[0], new_tiles[1], new_tiles[2], new_tiles[3]
+
+
+# def get_meshgrids(height, width):
+#     """
+#     Get two meshgrids of size (height, width). One with top left corner being (0, 0),
+#     the other with bottom right corner being (0, 0).
+#     :return: top left xs, ys, bottom right xs, ys
+#     """
+#     xs, ys = np.meshgrid(np.arange(width), np.arange(height))
+#     newys, newxs = torch.meshgrid(torch.arange(height, dtype=torch.int32), torch.arange(width, dtype=torch.int32))
+#     # mesh grid for top left corner
+#     xs_tl = np.tile(np.abs(xs)[..., np.newaxis], 3)  # [0, 1, 2, ..., tile_width-1]
+#     ys_tl = np.tile(np.abs(ys)[..., np.newaxis], 3)
+#     new_xs_tl = newxs[..., None].abs().repeat(1, 1, 3)
+#     new_ys_tl = newys[..., None].abs().repeat(1, 1, 3)
+#     # mesh grid for bottom right corner
+#     xs_br = np.tile(np.abs(xs - width + 1)[..., np.newaxis], 3)  # [-(tile_width-1), ..., -2, -1, 0]
+#     ys_br = np.tile(np.abs(ys - height + 1)[..., np.newaxis], 3)
+
+#     new_xs_br = (newxs - width + 1).abs()[..., None].repeat(1, 1, 3)
+#     new_ys_br = (newys - width + 1).abs()[..., None].repeat(1, 1, 3)
+#     # return xs_tl, ys_tl, xs_br, ys_br
+#     return new_xs_tl, new_ys_tl, new_xs_br, new_ys_br
+def get_meshgrids(height, width):
+    """
+    Get two meshgrids of size (height, width). One with top left corner being (0, 0),
+    the other with bottom right corner being (0, 0).
+    :return: top left xs, ys, bottom right xs, ys
+    """
+    xs, ys = np.meshgrid(np.arange(width), np.arange(height))
+    # mesh grid for top left corner
+    xs_tl = np.tile(np.abs(xs)[..., np.newaxis], 3)  # [0, 1, 2, ..., tile_width-1]
+    ys_tl = np.tile(np.abs(ys)[..., np.newaxis], 3)
+    # mesh grid for bottom right corner
+    xs_br = np.tile(np.abs(xs - width + 1)[..., np.newaxis], 3)  # [-(tile_width-1), ..., -2, -1, 0]
+    ys_br = np.tile(np.abs(ys - height + 1)[..., np.newaxis], 3)
+    
+    return torch.tensor(xs_tl), torch.tensor(ys_tl), torch.tensor(xs_br), torch.tensor(ys_br)
+
+
+
+def get_meshgrid_center(tile_height, tile_width):
+    return get_meshgrids(tile_height, tile_width)
+
+
+def get_meshgrid_border(tile_height, tile_width, margin_top, margin_left, margin_bot, margin_right):
+    """
+    :return: meshgrids for the 4 border regions, in the order of top, bottom, left, right
+    """
+    # top
+    top_xs_l, _, top_xs_r, _ = get_meshgrids(margin_top, tile_width)
+
+    # bottom
+    bot_xs_l, _, bot_xs_r, _ = get_meshgrids(margin_bot, tile_width)
+
+    # left
+    _, left_ys_t, _, left_ys_b = get_meshgrids(tile_height, margin_left)
+
+    # right
+    _, right_ys_t, _, right_ys_b = get_meshgrids(tile_height, margin_right)
+
+    return (top_xs_l, top_xs_r), (bot_xs_l, bot_xs_r), (left_ys_t, left_ys_b), (right_ys_t, right_ys_b)
+
+
+def get_image_stats(image, grid_size):
+    """
+    Information about the cropped image.
+    :param image: the original image
+    :return: grid size, tile size, sizes of the 4 margins, meshgrids.
+    """
+
+    grid_rows = grid_size[0]
+    grid_cols = grid_size[1]
+
+    tile_height, tile_width, margin_top, margin_left, margin_bot, margin_right = utils_get_image_stats(image.shape,
+                                                                                                       grid_size)
+
+    meshgrid_center = get_meshgrid_center(tile_height, tile_width)
+    meshgrid_border = get_meshgrid_border(tile_height, tile_width, margin_top, margin_left, margin_bot, margin_right)
+
+    meshgrids = {
+        'center': meshgrid_center,
+        'border': meshgrid_border
+    }
+
+    return grid_rows, grid_cols, tile_height, tile_width, margin_top, margin_left, margin_bot, margin_right, meshgrids
+
+
+#自己构造image 1 * 512 * 512 *3,tone_curve 1 * 64 * 3 * 256 维度得tf tensor 然后只在这里debug  
+def do_interpolation_lut(image, tone_curves, grid_size, num_curves=3):
+    """
+    Perform tone mapping and interpolation on an image.
+    Center region: bilinear interpolation.
+    Border region: linear interpolation.
+    Corner region: no interpolation.
+    :param num_curves: 3 -> 1 curve for each R,G,B channel, 1 -> 1 curve for all channels
+    :param image: input int8
+    :param tone_curves: (grid_size, num_curves, control_points)
+    :param grid_size: (ncols, nrows)
+    :return: image: float32, between [0-1]
+    """
+    if grid_size[0] == 1 and grid_size[1] == 1:
+        return apply_gtm(image, tone_curves, num_curves).astype(np.float64)
+
+    # get image statistics
+    stats = get_image_stats(image, grid_size)
+
+
+
+    # Center area:
+    center = apply_ltm_center(image, tone_curves, stats, num_curves)
+
+    # Border area:
+    b_top, b_bot, b_left, b_right = apply_ltm_border(image, tone_curves, stats, num_curves)
+
+    # Corner area:
+    tlc, trc, blc, brc = apply_ltm_corner(image, tone_curves, stats, num_curves)
+
+    # stack the corners, borders, and center together
+    row_t = torch.cat([tlc, b_top, trc], dim=1)
+    row_c = torch.cat([b_left, center, b_right], dim=1)
+    row_b = torch.cat([blc, b_bot, brc], dim=1)
+    out = torch.cat([row_t, row_c, row_b], dim=0)
+
+    assert out.shape == image.shape
+
+    return out
+
+
+
+class Model(nn.Module):
+    def __init__(self):
+        super(Model, self).__init__()
+        # self.conv1 = nn.Conv2d(3, 4, kernel_size=3, padding=1)  
+        # self.pool1 = nn.MaxPool2d(2)
+        
+        # self.conv2 = nn.Conv2d(4, 8, kernel_size=3, padding=1)
+        # self.pool2 = nn.MaxPool2d(2) 
+        
+        # self.conv3 = nn.Conv2d(8, 16, kernel_size=3, padding=1)
+        # self.pool3 = nn.MaxPool2d(2)
+
+        # self.conv4 = nn.Conv2d(16, 32, kernel_size=3, padding=1) 
+        # self.pool4 = nn.MaxPool2d(2)
+
+        # self.conv5 = nn.Conv2d(32, 64, kernel_size=3, padding=1)
+        # self.pool5 = nn.MaxPool2d(2)
+        
+        # self.conv6 = nn.Conv2d(64, 768, kernel_size=3, padding=1)
+        # self.pool6 = nn.MaxPool2d(2)
+
+        self.layer_1 = nn.Sequential(
+            nn.Conv2d(3, 4, kernel_size=3, padding=1),
+            nn.BatchNorm2d(4),  
+            nn.ReLU(),
+            nn.MaxPool2d(2)
+            )
+        self.layer_2 = nn.Sequential(
+            nn.Conv2d(4, 8, kernel_size=3, padding=1),
+            nn.BatchNorm2d(8),  
+            nn.ReLU(),
+            nn.MaxPool2d(2)
+        )
+        self.layer_3 = nn.Sequential(
+            nn.Conv2d(8, 16, kernel_size=3, padding=1),
+            nn.BatchNorm2d(16),  
+            nn.ReLU(),
+            nn.MaxPool2d(2)
+        )
+        self.layer_4 = nn.Sequential(
+            nn.Conv2d(16, 32, kernel_size=3, padding=1),
+            nn.BatchNorm2d(32),  
+            nn.ReLU(),
+            nn.MaxPool2d(2)
+        )
+        self.layer_5 = nn.Sequential(
+            nn.Conv2d(32, 64, kernel_size=3, padding=1),
+            nn.BatchNorm2d(64),  
+            nn.ReLU(),
+            nn.MaxPool2d(2)
+        )
+        self.layer_6 = nn.Sequential(
+            nn.Conv2d(64, 768, kernel_size=3, padding=1),
+            nn.BatchNorm2d(768),  
+            nn.Sigmoid(),
+            nn.MaxPool2d(2)
+        )
+
+        
+    def forward(self, x):
+        
+        '''
+
+        original = x
+        x = self.conv1(x)
+        x = self.pool1(x)
+        
+        x = self.conv2(x) 
+        x = self.pool2(x)
+
+        x = self.conv3(x)
+        x = self.pool3(x)
+        
+        x = self.conv4(x)
+        x = self.pool4(x)
+
+        x = self.conv5(x) 
+        x = self.pool5(x)
+
+        x = self.conv6(x)
+        x = self.pool6(x)
+        oldres = x
+        x = original
+        '''
+
+        x = self.layer_1(x)
+
+        x = self.layer_2(x)
+
+        x = self.layer_3(x)
+
+        x = self.layer_4(x)
+
+        x = self.layer_5(x)
+
+        x = self.layer_6(x)
+
+        x = x.reshape(x.shape[0], x.shape[2] * x.shape[3], 3, int(x.shape[1] / 3))
+        return x
+
+
+def _lut_transform(imgs, luts):
+    # img (b, 3, h, w), lut (b, c, m, m, m)
+    if imgs.shape[1]==1:
+
+        #for gray image pro-processs
+        luts = luts.expand(1,1,64,64,64)
+        # normalize pixel values
+        imgs = (imgs - .5) * 2.
+        grids = (imgs.unsqueeze(4)).repeat(1,1,1,1,3)
+    else:
+        # normalize pixel values
+        imgs = (imgs - .5) * 2.
+        # reshape img to grid of shape (b, 1, h, w, 3)
+        # imgs = imgs.permute(2,0,1).unsqueeze(dim=0)
+        # grids = imgs.permute(0, 2, 3, 1).unsqueeze(1)
+        grids = imgs.unsqueeze(0).unsqueeze(0)
+        luts = luts.unsqueeze(0)
+        # after gridsampling, output is of shape (b, c, 1, h, w)
+    outs = F.grid_sample(luts, grids,
+        mode='bilinear', padding_mode='border', align_corners=True)
+    return outs.squeeze(2)
+
+
+if __name__ == '__main__':
+
+    import torch
+    import cv2
+
+    grid_size = [8,8]
+    
+    np.random.seed(42)
+    rand_img  = np.random.random((512, 512, 3))
+    luts_np = np.random.random((64, 3, 9))
+
+    img_torch = torch.tensor(rand_img, dtype=torch.float32).cuda()
+    luts_torch = torch.tensor(luts_np, dtype=torch.float32).cuda()
+
+
+    iluts = []
+    for i in range(luts_torch.shape[0]):
+        iluts.append(torch.stack(
+            torch.meshgrid(*(luts_torch[i].unbind(0)[::-1])),
+            dim=0).flip(0))
+    iluts = torch.stack(iluts, dim=0)
+ 
+
+    result = do_interpolation_lut(img_torch, iluts, grid_size)
+    print(result)
+
+
+
+
+
+
diff --git a/IIR-Lab/ISP_pipeline/raw_prc_pipeline/color.py b/IIR-Lab/ISP_pipeline/raw_prc_pipeline/color.py
new file mode 100644
index 0000000000000000000000000000000000000000..f18d489e83ef7f6ba8721a8226d04950846c8186
--- /dev/null
+++ b/IIR-Lab/ISP_pipeline/raw_prc_pipeline/color.py
@@ -0,0 +1,306 @@
+import numpy as np
+
+
+def rgb2gray(data):
+    return 0.299 * data[:, :, 0] + \
+           0.587 * data[:, :, 1] + \
+           0.114 * data[:, :, 2]
+
+
+def rgb2ycc(data, rule="bt601"):
+    # map to select kr and kb
+    kr_kb_dict = {"bt601": [0.299, 0.114],
+                  "bt709": [0.2126, 0.0722],
+                  "bt2020": [0.2627, 0.0593]}
+
+    kr = kr_kb_dict[rule][0]
+    kb = kr_kb_dict[rule][1]
+    kg = 1 - (kr + kb)
+
+    output = np.empty(np.shape(data), dtype=np.float32)
+    output[:, :, 0] = kr * data[:, :, 0] + \
+                      kg * data[:, :, 1] + \
+                      kb * data[:, :, 2]
+    output[:, :, 1] = 0.5 * ((data[:, :, 2] - output[:, :, 0]) / (1 - kb))
+    output[:, :, 2] = 0.5 * ((data[:, :, 0] - output[:, :, 0]) / (1 - kr))
+
+    return output
+
+
+def ycc2rgb(data, rule="bt601"):
+    # map to select kr and kb
+    kr_kb_dict = {"bt601": [0.299, 0.114],
+                  "bt709": [0.2126, 0.0722],
+                  "bt2020": [0.2627, 0.0593]}
+
+    kr = kr_kb_dict[rule][0]
+    kb = kr_kb_dict[rule][1]
+    kg = 1 - (kr + kb)
+
+    output = np.empty(np.shape(data), dtype=np.float32)
+    output[:, :, 0] = 2. * data[:, :, 2] * (1 - kr) + data[:, :, 0]
+    output[:, :, 2] = 2. * data[:, :, 1] * (1 - kb) + data[:, :, 0]
+    output[:, :, 1] = (data[:, :, 0] - kr * output[:, :, 0] - kb * output[:, :, 2]) / kg
+
+    return output
+
+
+def degamma_srgb(data, clip_range=[0, 65535]):
+    # bring data in range 0 to 1
+    data = np.clip(data, clip_range[0], clip_range[1])
+    data = np.divide(data, clip_range[1])
+
+    data = np.asarray(data)
+    mask = data > 0.04045
+
+    # basically, if data[x, y, c] > 0.04045, data[x, y, c] = ( (data[x, y, c] + 0.055) / 1.055 ) ^ 2.4
+    #            else, data[x, y, c] = data[x, y, c] / 12.92
+    data[mask] += 0.055
+    data[mask] /= 1.055
+    data[mask] **= 2.4
+
+    data[np.invert(mask)] /= 12.92
+
+    # rescale
+    return np.clip(data * clip_range[1], clip_range[0], clip_range[1])
+
+
+def degamma_adobe_rgb_1998(data, clip_range=[0, 65535]):
+    # bring data in range 0 to 1
+    data = np.clip(data, clip_range[0], clip_range[1])
+    data = np.divide(data, clip_range[1])
+
+    data = np.power(data, 2.2)  # originally raised to 2.19921875
+
+    # rescale
+    return np.clip(data * clip_range[1], clip_range[0], clip_range[1])
+
+
+def rgb2xyz(data, color_space="srgb", clip_range=[0, 255]):
+    # input rgb in range clip_range
+    # output xyz is in range 0 to 1
+    if color_space == "srgb":
+        # degamma / linearization
+        data = degamma_srgb(data, clip_range)
+        data = np.float32(data)
+        data = np.divide(data, clip_range[1])
+
+        # matrix multiplication`
+        output = np.empty(np.shape(data), dtype=np.float32)
+        output[:, :, 0] = data[:, :, 0] * 0.4124 + data[:, :, 1] * 0.3576 + data[:, :, 2] * 0.1805
+        output[:, :, 1] = data[:, :, 0] * 0.2126 + data[:, :, 1] * 0.7152 + data[:, :, 2] * 0.0722
+        output[:, :, 2] = data[:, :, 0] * 0.0193 + data[:, :, 1] * 0.1192 + data[:, :, 2] * 0.9505
+    elif color_space == "adobe-rgb-1998":
+        # degamma / linearization
+        data = degamma_adobe_rgb_1998(data, clip_range)
+        data = np.float32(data)
+        data = np.divide(data, clip_range[1])
+
+        # matrix multiplication
+        output = np.empty(np.shape(data), dtype=np.float32)
+        output[:, :, 0] = data[:, :, 0] * 0.5767309 + data[:, :, 1] * 0.1855540 + data[:, :, 2] * 0.1881852
+        output[:, :, 1] = data[:, :, 0] * 0.2973769 + data[:, :, 1] * 0.6273491 + data[:, :, 2] * 0.0752741
+        output[:, :, 2] = data[:, :, 0] * 0.0270343 + data[:, :, 1] * 0.0706872 + data[:, :, 2] * 0.9911085
+    elif color_space == "linear":
+        # matrix multiplication`
+        output = np.empty(np.shape(data), dtype=np.float32)
+        data = np.float32(data)
+        data = np.divide(data, clip_range[1])
+        output[:, :, 0] = data[:, :, 0] * 0.4124 + data[:, :, 1] * 0.3576 + data[:, :, 2] * 0.1805
+        output[:, :, 1] = data[:, :, 0] * 0.2126 + data[:, :, 1] * 0.7152 + data[:, :, 2] * 0.0722
+        output[:, :, 2] = data[:, :, 0] * 0.0193 + data[:, :, 1] * 0.1192 + data[:, :, 2] * 0.9505
+    else:
+        print("Warning! color_space must be srgb or adobe-rgb-1998.")
+        return
+
+    return output
+
+
+def gamma_srgb(data, clip_range=[0, 65535]):
+    # bring data in range 0 to 1
+    data = np.clip(data, clip_range[0], clip_range[1])
+    data = np.divide(data, clip_range[1])
+
+    data = np.asarray(data)
+    mask = data > 0.0031308
+
+    # basically, if data[x, y, c] > 0.0031308, data[x, y, c] = 1.055 * ( var_R(i, j) ^ ( 1 / 2.4 ) ) - 0.055
+    #            else, data[x, y, c] = data[x, y, c] * 12.92
+    data[mask] **= 0.4167
+    data[mask] *= 1.055
+    data[mask] -= 0.055
+
+    data[np.invert(mask)] *= 12.92
+
+    # rescale
+    return np.clip(data * clip_range[1], clip_range[0], clip_range[1])
+
+
+def gamma_adobe_rgb_1998(data, clip_range=[0, 65535]):
+    # bring data in range 0 to 1
+    data = np.clip(data, clip_range[0], clip_range[1])
+    data = np.divide(data, clip_range[1])
+
+    data = np.power(data, 0.4545)
+
+    # rescale
+    return np.clip(data * clip_range[1], clip_range[0], clip_range[1])
+
+
+def xyz2rgb(data, color_space="srgb", clip_range=[0, 255]):
+    # input xyz is in range 0 to 1
+    # output rgb in clip_range
+
+    # allocate space for output
+    output = np.empty(np.shape(data), dtype=np.float32)
+
+    if color_space == "srgb":
+        # matrix multiplication
+        output[:, :, 0] = data[:, :, 0] * 3.2406 + data[:, :, 1] * -1.5372 + data[:, :, 2] * -0.4986
+        output[:, :, 1] = data[:, :, 0] * -0.9689 + data[:, :, 1] * 1.8758 + data[:, :, 2] * 0.0415
+        output[:, :, 2] = data[:, :, 0] * 0.0557 + data[:, :, 1] * -0.2040 + data[:, :, 2] * 1.0570
+
+        # gamma to retain nonlinearity
+        output = gamma_srgb(output * clip_range[1], clip_range)
+    elif color_space == "adobe-rgb-1998":
+        # matrix multiplication
+        output[:, :, 0] = data[:, :, 0] * 2.0413690 + data[:, :, 1] * -0.5649464 + data[:, :, 2] * -0.3446944
+        output[:, :, 1] = data[:, :, 0] * -0.9692660 + data[:, :, 1] * 1.8760108 + data[:, :, 2] * 0.0415560
+        output[:, :, 2] = data[:, :, 0] * 0.0134474 + data[:, :, 1] * -0.1183897 + data[:, :, 2] * 1.0154096
+
+        # gamma to retain nonlinearity
+        output = gamma_adobe_rgb_1998(output * clip_range[1], clip_range)
+    elif color_space == "linear":
+
+        # matrix multiplication
+        output[:, :, 0] = data[:, :, 0] * 3.2406 + data[:, :, 1] * -1.5372 + data[:, :, 2] * -0.4986
+        output[:, :, 1] = data[:, :, 0] * -0.9689 + data[:, :, 1] * 1.8758 + data[:, :, 2] * 0.0415
+        output[:, :, 2] = data[:, :, 0] * 0.0557 + data[:, :, 1] * -0.2040 + data[:, :, 2] * 1.0570
+
+        # gamma to retain nonlinearity
+        output = output * clip_range[1]
+    else:
+        print("Warning! color_space must be srgb or adobe-rgb-1998.")
+        return
+
+    return output
+
+
+def get_xyz_reference(cie_version="1931", illuminant="d65"):
+    if cie_version == "1931":
+        xyz_reference_dictionary = {"A": [109.850, 100.0, 35.585],
+                                    "B": [99.0927, 100.0, 85.313],
+                                    "C": [98.074, 100.0, 118.232],
+                                    "d50": [96.422, 100.0, 82.521],
+                                    "d55": [95.682, 100.0, 92.149],
+                                    "d65": [95.047, 100.0, 108.883],
+                                    "d75": [94.972, 100.0, 122.638],
+                                    "E": [100.0, 100.0, 100.0],
+                                    "F1": [92.834, 100.0, 103.665],
+                                    "F2": [99.187, 100.0, 67.395],
+                                    "F3": [103.754, 100.0, 49.861],
+                                    "F4": [109.147, 100.0, 38.813],
+                                    "F5": [90.872, 100.0, 98.723],
+                                    "F6": [97.309, 100.0, 60.191],
+                                    "F7": [95.044, 100.0, 108.755],
+                                    "F8": [96.413, 100.0, 82.333],
+                                    "F9": [100.365, 100.0, 67.868],
+                                    "F10": [96.174, 100.0, 81.712],
+                                    "F11": [100.966, 100.0, 64.370],
+                                    "F12": [108.046, 100.0, 39.228]}
+    elif cie_version == "1964":
+        xyz_reference_dictionary = {"A": [111.144, 100.0, 35.200],
+                                    "B": [99.178, 100.0, 84.3493],
+                                    "C": [97.285, 100.0, 116.145],
+                                    "D50": [96.720, 100.0, 81.427],
+                                    "D55": [95.799, 100.0, 90.926],
+                                    "D65": [94.811, 100.0, 107.304],
+                                    "D75": [94.416, 100.0, 120.641],
+                                    "E": [100.0, 100.0, 100.0],
+                                    "F1": [94.791, 100.0, 103.191],
+                                    "F2": [103.280, 100.0, 69.026],
+                                    "F3": [108.968, 100.0, 51.965],
+                                    "F4": [114.961, 100.0, 40.963],
+                                    "F5": [93.369, 100.0, 98.636],
+                                    "F6": [102.148, 100.0, 62.074],
+                                    "F7": [95.792, 100.0, 107.687],
+                                    "F8": [97.115, 100.0, 81.135],
+                                    "F9": [102.116, 100.0, 67.826],
+                                    "F10": [99.001, 100.0, 83.134],
+                                    "F11": [103.866, 100.0, 65.627],
+                                    "F12": [111.428, 100.0, 40.353]}
+    else:
+        print("Warning! cie_version must be 1931 or 1964.")
+        return
+    return np.divide(xyz_reference_dictionary[illuminant], 100.0)
+
+
+def xyz2lab(data, cie_version="1931", illuminant="d65"):
+    xyz_reference = get_xyz_reference(cie_version, illuminant)
+
+    data = data
+    data[:, :, 0] = data[:, :, 0] / xyz_reference[0]
+    data[:, :, 1] = data[:, :, 1] / xyz_reference[1]
+    data[:, :, 2] = data[:, :, 2] / xyz_reference[2]
+
+    data = np.asarray(data)
+
+    # if data[x, y, c] > 0.008856, data[x, y, c] = data[x, y, c] ^ (1/3)
+    # else, data[x, y, c] = 7.787 * data[x, y, c] + 16/116
+    mask = data > 0.008856
+    data[mask] **= 1. / 3.
+    data[np.invert(mask)] *= 7.787
+    data[np.invert(mask)] += 16. / 116.
+
+    data = np.float32(data)
+    output = np.empty(np.shape(data), dtype=np.float32)
+    output[:, :, 0] = 116. * data[:, :, 1] - 16.
+    output[:, :, 1] = 500. * (data[:, :, 0] - data[:, :, 1])
+    output[:, :, 2] = 200. * (data[:, :, 1] - data[:, :, 2])
+
+    return output
+
+
+def lab2xyz(data, cie_version="1931", illuminant="d65"):
+    output = np.empty(np.shape(data), dtype=np.float32)
+
+    output[:, :, 1] = (data[:, :, 0] + 16.) / 116.
+    output[:, :, 0] = (data[:, :, 1] / 500.) + output[:, :, 1]
+    output[:, :, 2] = output[:, :, 1] - (data[:, :, 2] / 200.)
+
+    # if output[x, y, c] > 0.008856, output[x, y, c] ^ 3
+    # else, output[x, y, c] = ( output[x, y, c] - 16/116 ) / 7.787
+    output = np.asarray(output)
+    mask = output > 0.008856
+    output[mask] **= 3.
+    output[np.invert(mask)] -= 16 / 116
+    output[np.invert(mask)] /= 7.787
+
+    xyz_reference = get_xyz_reference(cie_version, illuminant)
+
+    output = np.float32(output)
+    output[:, :, 0] = output[:, :, 0] * xyz_reference[0]
+    output[:, :, 1] = output[:, :, 1] * xyz_reference[1]
+    output[:, :, 2] = output[:, :, 2] * xyz_reference[2]
+
+    return output
+
+
+def lab2lch(data):
+    output = np.empty(np.shape(data), dtype=np.float32)
+
+    output[:, :, 0] = data[:, :, 0]  # L transfers directly
+    output[:, :, 1] = np.power(np.power(data[:, :, 1], 2) + np.power(data[:, :, 2], 2), 0.5)
+    output[:, :, 2] = np.arctan2(data[:, :, 2], data[:, :, 1]) * 180 / np.pi
+
+    return output
+
+
+def lch2lab(data):
+    output = np.empty(np.shape(data), dtype=np.float32)
+
+    output[:, :, 0] = data[:, :, 0]  # L transfers directly
+    output[:, :, 1] = np.multiply(np.cos(data[:, :, 2] * np.pi / 180), data[:, :, 1])
+    output[:, :, 2] = np.multiply(np.sin(data[:, :, 2] * np.pi / 180), data[:, :, 1])
+
+    return output
diff --git a/IIR-Lab/ISP_pipeline/raw_prc_pipeline/csrnet_network.py b/IIR-Lab/ISP_pipeline/raw_prc_pipeline/csrnet_network.py
new file mode 100644
index 0000000000000000000000000000000000000000..c0483fa328a819378111beb9f30e09bba208c873
--- /dev/null
+++ b/IIR-Lab/ISP_pipeline/raw_prc_pipeline/csrnet_network.py
@@ -0,0 +1,76 @@
+import functools
+import math
+import torch
+import torch.nn as nn
+import torch.nn.functional as F
+
+
+class Condition(nn.Module):
+    def __init__(self, in_nc=3, nf=32):
+        super(Condition, self).__init__()
+        stride = 2
+        pad = 0
+        self.pad = nn.ZeroPad2d(1)
+        self.conv1 = nn.Conv2d(in_nc, nf, 7, stride, pad, bias=True)
+        self.conv2 = nn.Conv2d(nf, nf, 3, stride, pad, bias=True)
+        self.conv3 = nn.Conv2d(nf, nf, 3, stride, pad, bias=True)
+        self.act = nn.ReLU(inplace=True)
+
+    def forward(self, x):
+        conv1_out = self.act(self.conv1(self.pad(x)))
+        conv2_out = self.act(self.conv2(self.pad(conv1_out)))
+        conv3_out = self.act(self.conv3(self.pad(conv2_out)))
+        out = torch.mean(conv3_out, dim=[2, 3], keepdim=False)
+
+        return out
+
+
+# 3layers with control
+class CSRNet(nn.Module):
+    def __init__(self, in_nc=3, out_nc=3, base_nf=48, cond_nf=24):
+        super(CSRNet, self).__init__()
+
+        self.base_nf = base_nf
+        self.out_nc = out_nc
+
+        self.cond_net = Condition(in_nc=in_nc, nf=cond_nf)
+      
+        self.cond_scale1 = nn.Linear(cond_nf, base_nf, bias=True)
+        self.cond_scale2 = nn.Linear(cond_nf, base_nf,  bias=True)
+        self.cond_scale3 = nn.Linear(cond_nf, 3, bias=True)
+
+        self.cond_shift1 = nn.Linear(cond_nf, base_nf, bias=True)
+        self.cond_shift2 = nn.Linear(cond_nf, base_nf, bias=True)
+        self.cond_shift3 = nn.Linear(cond_nf, 3, bias=True)
+
+        self.conv1 = nn.Conv2d(in_nc, base_nf, 1, 1, bias=True) 
+        self.conv2 = nn.Conv2d(base_nf, base_nf, 1, 1, bias=True)
+        self.conv3 = nn.Conv2d(base_nf, out_nc, 1, 1, bias=True)
+
+        self.act = nn.ReLU(inplace=True)
+
+
+    def forward(self, x):
+        cond = self.cond_net(x)
+
+        scale1 = self.cond_scale1(cond)
+        shift1 = self.cond_shift1(cond)
+
+        scale2 = self.cond_scale2(cond)
+        shift2 = self.cond_shift2(cond)
+
+        scale3 = self.cond_scale3(cond)
+        shift3 = self.cond_shift3(cond)
+
+        out = self.conv1(x)
+        out = out * scale1.view(-1, self.base_nf, 1, 1) + shift1.view(-1, self.base_nf, 1, 1) + out
+        out = self.act(out)
+        
+
+        out = self.conv2(out)
+        out = out * scale2.view(-1, self.base_nf, 1, 1) + shift2.view(-1, self.base_nf, 1, 1) + out
+        out = self.act(out)
+
+        out = self.conv3(out)
+        out = out * scale3.view(-1, self.out_nc, 1, 1) + shift3.view(-1, self.out_nc, 1, 1) + out
+        return out
\ No newline at end of file
diff --git a/IIR-Lab/ISP_pipeline/raw_prc_pipeline/exif_data_formats.py b/IIR-Lab/ISP_pipeline/raw_prc_pipeline/exif_data_formats.py
new file mode 100644
index 0000000000000000000000000000000000000000..3854a24d790348fac8b81590d8c24bb48fe80e81
--- /dev/null
+++ b/IIR-Lab/ISP_pipeline/raw_prc_pipeline/exif_data_formats.py
@@ -0,0 +1,22 @@
+class ExifFormat:
+    def __init__(self, id, name, size, short_name):
+        self.id = id
+        self.name = name
+        self.size = size
+        self.short_name = short_name  # used with struct.unpack()
+
+
+exif_formats = {
+    1: ExifFormat(1, 'unsigned byte', 1, 'B'),
+    2: ExifFormat(2, 'ascii string', 1, 's'),
+    3: ExifFormat(3, 'unsigned short', 2, 'H'),
+    4: ExifFormat(4, 'unsigned long', 4, 'L'),
+    5: ExifFormat(5, 'unsigned rational', 8, ''),
+    6: ExifFormat(6, 'signed byte', 1, 'b'),
+    7: ExifFormat(7, 'undefined', 1, 'B'),  # consider `undefined` as `unsigned byte`
+    8: ExifFormat(8, 'signed short', 2, 'h'),
+    9: ExifFormat(9, 'signed long', 4, 'l'),
+    10: ExifFormat(10, 'signed rational', 8, ''),
+    11: ExifFormat(11, 'single float', 4, 'f'),
+    12: ExifFormat(12, 'double float', 8, 'd'),
+}
\ No newline at end of file
diff --git a/IIR-Lab/ISP_pipeline/raw_prc_pipeline/exif_utils.py b/IIR-Lab/ISP_pipeline/raw_prc_pipeline/exif_utils.py
new file mode 100644
index 0000000000000000000000000000000000000000..566620c1cc33d9f7ec1c75a182192176daeb1253
--- /dev/null
+++ b/IIR-Lab/ISP_pipeline/raw_prc_pipeline/exif_utils.py
@@ -0,0 +1,208 @@
+"""
+Manual parsing of image file directories (IFDs).
+"""
+
+
+import struct
+from fractions import Fraction
+from raw_prc_pipeline.exif_data_formats import exif_formats
+
+class Ifd:
+    def __init__(self):
+        self.offset = -1
+        self.tags = {}  # <key, tag> dict; tag number will be key.
+
+
+class Tag:
+    def __init__(self):
+        self.offset = -1
+        self.tag_num = -1
+        self.data_format = -1
+        self.num_values = -1
+        self.values = []
+
+
+def parse_exif(image_path, verbose=True):
+    """
+    Parse EXIF tags from a binary file and return IFDs.
+    Returned IFDs include EXIF SubIFDs, if any.
+    """
+
+    def print_(str_):
+        if verbose:
+            print(str_)
+
+    ifds = {}  # dict of <offset, Ifd> pairs; using offset to IFD as key.
+
+    with open(image_path, 'rb') as fid:
+        fid.seek(0)
+        b0 = fid.read(1)
+        _ = fid.read(1)
+        # byte storage direction (endian):
+        # +1: b'M' (big-endian/Motorola)
+        # -1: b'I' (little-endian/Intel)
+        endian = 1 if b0 == b'M' else -1
+        print_("Endian = {}".format(b0))
+        endian_sign = "<" if endian == -1 else ">"  # used in struct.unpack
+        print_("Endian sign = {}".format(endian_sign))
+        _ = fid.read(2)  # 0x002A
+        b4_7 = fid.read(4)  # offset to first IFD
+        offset_ = struct.unpack(endian_sign + "I", b4_7)[0]
+        i = 0
+        ifd_offsets = [offset_]
+        while len(ifd_offsets) > 0:
+            offset_ = ifd_offsets.pop(0)
+            # check if IFD at this offset was already parsed before
+            if offset_ in ifds:
+                continue
+            print_("=========== Parsing IFD # {} ===========".format(i))
+            ifd_ = parse_exif_ifd(fid, offset_, endian_sign, verbose)
+            ifds.update({ifd_.offset: ifd_})
+            print_("=========== Finished parsing IFD # {} ===========".format(i))
+            i += 1
+            # check SubIFDs; zero or more offsets at tag 0x014a
+            sub_idfs_tag_num = int('0x014a', 16)
+            if sub_idfs_tag_num in ifd_.tags:
+                ifd_offsets.extend(ifd_.tags[sub_idfs_tag_num].values)
+            # check Exif SUbIDF; usually one offset at tag 0x8769
+            exif_sub_idf_tag_num = int('0x8769', 16)
+            if exif_sub_idf_tag_num in ifd_.tags:
+                ifd_offsets.extend(ifd_.tags[exif_sub_idf_tag_num].values)
+    return ifds
+
+
+def parse_exif_ifd(binary_file, offset_, endian_sign, verbose=True):
+    """
+    Parse an EXIF IFD.
+    """
+
+    def print_(str_):
+        if verbose:
+            print(str_)
+
+    ifd = Ifd()
+    ifd.offset = offset_
+    print_("IFD offset = {}".format(ifd.offset))
+    binary_file.seek(offset_)
+    num_entries = struct.unpack(endian_sign + "H", binary_file.read(2))[0]  # format H = unsigned short
+    print_("Number of entries = {}".format(num_entries))
+    for t in range(num_entries):
+        print_("---------- Tag {} / {} ----------".format(t + 1, num_entries))
+        if t == 22:
+            ttt = 1
+        tag_ = parse_exif_tag(binary_file, endian_sign, verbose)
+        ifd.tags.update({tag_.tag_num: tag_})  # supposedly, EXIF tag numbers won't repeat in the same IFD
+    # TODO: check for subsequent IFDs by parsing the next 4 bytes immediately after the IFD
+    return ifd
+
+
+def parse_exif_tag(binary_file, endian_sign, verbose=True):
+    """
+    Parse EXIF tag from a binary file starting from the current file pointer and returns the tag values.
+    """
+
+    def print_(str_):
+        if verbose:
+            print(str_)
+
+    tag = Tag()
+
+    # tag offset
+    tag.offset = binary_file.tell()
+    print_("Tag offset = {}".format(tag.offset))
+
+    # tag number
+    bytes_ = binary_file.read(2)
+    tag.tag_num = struct.unpack(endian_sign + "H", bytes_)[0]  # H: unsigned 2-byte short
+    print_("Tag number = {} = 0x{:04x}".format(tag.tag_num, tag.tag_num))
+
+    # data format (some value between [1, 12])
+    tag.data_format = struct.unpack(endian_sign + "H", binary_file.read(2))[0]  # H: unsigned 2-byte short
+    exif_format = exif_formats[tag.data_format]
+    print_("Data format = {} = {}".format(tag.data_format, exif_format.name))
+
+    # number of components/values
+    tag.num_values = struct.unpack(endian_sign + "I", binary_file.read(4))[0]  # I: unsigned 4-byte integer
+    print_("Number of values = {}".format(tag.num_values))
+
+    # total number of data bytes
+    total_bytes = tag.num_values * exif_format.size
+    print_("Total bytes = {}".format(total_bytes))
+
+    # seek to data offset (if needed)
+    data_is_offset = False
+    current_offset = binary_file.tell()
+    if total_bytes > 4:
+        print_("Total bytes > 4; The next 4 bytes are an offset.")
+        data_is_offset = True
+        data_offset = struct.unpack(endian_sign + "I", binary_file.read(4))[0]
+        current_offset = binary_file.tell()
+        print_("Current offset = {}".format(current_offset))
+        print_("Seeking to data offset = {}".format(data_offset))
+        binary_file.seek(data_offset)
+
+    # read values
+    # TODO: need to distinguish between numeric and text values?
+    if tag.num_values == 1 and total_bytes < 4:
+        # special case: data is a single value that is less than 4 bytes inside 4 bytes, take care of endian
+        val_bytes = binary_file.read(4)
+        # if endian_sign == ">":
+        # val_bytes = val_bytes[4 - total_bytes:]
+        # else:
+        # val_bytes = val_bytes[:total_bytes][::-1]
+        val_bytes = val_bytes[:total_bytes]
+        tag.values.append(struct.unpack(endian_sign + exif_format.short_name, val_bytes)[0])
+    else:
+        # read data values one by one
+        for k in range(tag.num_values):
+            val_bytes = binary_file.read(exif_format.size)
+            if exif_format.name == 'unsigned rational':
+                tag.values.append(eight_bytes_to_fraction(val_bytes, endian_sign, signed=False))
+            elif exif_format.name == 'signed rational':
+                tag.values.append(eight_bytes_to_fraction(val_bytes, endian_sign, signed=True))
+            else:
+                tag.values.append(struct.unpack(endian_sign + exif_format.short_name, val_bytes)[0])
+        if total_bytes < 4:
+            # special case: multiple values less than 4 bytes in total, inside the 4 bytes; skip the extra bytes
+            binary_file.seek(4 - total_bytes, 1)
+
+    if verbose:
+        if len(tag.values) > 100:
+            print_("Got more than 100 values; printing first 100 only:")
+            print_("Tag values = {}".format(tag.values[:100]))
+        else:
+            print_("Tag values = {}".format(tag.values))
+    if tag.data_format == 2:
+        print_("Tag values (string) = {}".format(b''.join(tag.values).decode()))
+
+    if data_is_offset:
+        # seek back to current position to read the next tag
+        print_("Seeking back to current offset = {}".format(current_offset))
+        binary_file.seek(current_offset)
+
+    return tag
+
+
+def get_tag_values_from_ifds(tag_num, ifds):
+    """
+    Return values of a tag, if found in ifds. Return None otherwise.
+    Assuming any tag exists only once in all ifds.
+    """
+    for key, ifd in ifds.items():
+        if tag_num in ifd.tags:
+            return ifd.tags[tag_num].values
+    return None
+
+
+def eight_bytes_to_fraction(eight_bytes, endian_sign, signed):
+    """
+    Convert 8-byte array into a Fraction. Take care of endian and sign.
+    """
+    if signed:
+        num = struct.unpack(endian_sign + "l", eight_bytes[:4])[0]
+        den = struct.unpack(endian_sign + "l", eight_bytes[4:])[0]
+    else:
+        num = struct.unpack(endian_sign + "L", eight_bytes[:4])[0]
+        den = struct.unpack(endian_sign + "L", eight_bytes[4:])[0]
+    den = den if den != 0 else 1
+    return Fraction(num, den)
\ No newline at end of file
diff --git a/IIR-Lab/ISP_pipeline/raw_prc_pipeline/fs.py b/IIR-Lab/ISP_pipeline/raw_prc_pipeline/fs.py
new file mode 100644
index 0000000000000000000000000000000000000000..675da56513a81eb4acfbba50d568f14191c4e3d6
--- /dev/null
+++ b/IIR-Lab/ISP_pipeline/raw_prc_pipeline/fs.py
@@ -0,0 +1,43 @@
+import cv2
+import numpy as np
+
+
+def perform_flash(source, a=5, target=-1, perform_gamma_correction=True):
+    rows, cols, _ = source.shape
+
+    v = np.max(source, axis=2)
+    vd = np.copy(v)
+    vd[vd == 0] = 1e-9
+    result = source / (a * np.exp(np.mean(np.log(vd))) + np.tile(np.expand_dims(vd, axis=2), (1, 1, 3)))
+
+    if perform_gamma_correction:
+        result **= 1.0 / 2.2
+
+    if target >= 0:
+        result *= target / np.mean((0.299 * result[:, :, 2] + 0.587 * result[:, :, 1] + 0.114 * result[:, :, 0]))
+    else:
+        result *= 255.0 / np.max(result)
+
+    return result
+
+
+def perform_storm(source, a=5, target=-1, kernels=(1, 4, 16, 64, 256), perform_gamma_correction=True):
+    rows, cols, _ = source.shape
+
+    v = np.max(source, axis=2)
+    vd = np.copy(v)
+    vd[vd == 0] = 1e-9
+    lv = np.log(vd)
+    result = sum([source / np.tile(
+        np.expand_dims(a * np.exp(cv2.boxFilter(lv, -1, (int(min(rows // kernel, cols // kernel)),) * 2)) + vd, axis=2),
+        (1, 1, 3)) for kernel in kernels])
+
+    if perform_gamma_correction:
+        result **= 1.0 / 2.2
+
+    if target >= 0:
+        result *= target / np.mean((0.299 * result[:, :, 2] + 0.587 * result[:, :, 1] + 0.114 * result[:, :, 0]))
+    else:
+        result *= 255.0 / np.max(result)
+
+    return result
diff --git a/IIR-Lab/ISP_pipeline/raw_prc_pipeline/io.py b/IIR-Lab/ISP_pipeline/raw_prc_pipeline/io.py
new file mode 100644
index 0000000000000000000000000000000000000000..e62baacff2dd91ba4a01936642a8e90c0a4544fa
--- /dev/null
+++ b/IIR-Lab/ISP_pipeline/raw_prc_pipeline/io.py
@@ -0,0 +1,53 @@
+import cv2
+import json
+import torch
+from pathlib import Path
+from fractions import Fraction
+
+
+def get_device(gpu_id=None):
+    cuda_device = "cuda" 
+    if gpu_id is not None:
+        assert gpu_id in ["0", "1"]  # for local setup with 2 GPUs
+        cuda_device += f":{gpu_id}"
+    return torch.device(cuda_device if torch.cuda.is_available() else "cpu")
+
+
+def fraction_from_json(json_object):
+    if 'Fraction' in json_object:
+        return Fraction(*json_object['Fraction'])
+    return json_object
+
+
+def json_read(fname, **kwargs):
+    with open(fname) as j:
+        data = json.load(j, **kwargs)
+    return data
+
+
+def read_image(path):
+    png_path = Path(path)
+    raw_image = cv2.imread(str(png_path), cv2.IMREAD_UNCHANGED)
+    metadata = json_read(png_path.with_suffix('.json'), object_hook=fraction_from_json)
+    return raw_image, metadata
+
+
+def write_processed_as_jpg(out, dst_path, quality=100):
+    cv2.imwrite(dst_path, out, [cv2.IMWRITE_JPEG_QUALITY, quality])
+
+
+def download_weights(url, fname):
+    import requests
+    r = requests.get(url, stream=True)
+    with open(fname, 'wb') as f:
+        total_length = int(r.headers.get('content-length'))
+        for chunk in r.iter_content(chunk_size=1024): 
+            if chunk:
+                f.write(chunk)
+                f.flush()
+                
+                
+def unzip(path_to_zip_file, directory_to_extract_to):
+    import zipfile
+    with zipfile.ZipFile(path_to_zip_file, 'r') as zip_ref:
+        zip_ref.extractall(directory_to_extract_to)
\ No newline at end of file
diff --git a/IIR-Lab/ISP_pipeline/raw_prc_pipeline/misc.py b/IIR-Lab/ISP_pipeline/raw_prc_pipeline/misc.py
new file mode 100644
index 0000000000000000000000000000000000000000..6d906066cacf95a83bda19ee93789547e808ac3b
--- /dev/null
+++ b/IIR-Lab/ISP_pipeline/raw_prc_pipeline/misc.py
@@ -0,0 +1,251 @@
+import numpy as np
+from math import ceil
+import torch
+from modeling.DeepWB.utilities import imresize
+
+
+def decode_cfa_pattern(cfa_pattern):
+    cfa_dict = {0: 'B', 1: 'G', 2: 'R'}
+    return "".join([cfa_dict[x] for x in cfa_pattern])
+
+
+def to_tensor(im, dims=3):
+    """ Converts a given ndarray image to torch tensor image.
+
+  Args:
+    im: ndarray image (height x width x channel x [sample]).
+    dims: dimension number of the given image. If dims = 3, the image should
+      be in (height x width x channel) format; while if dims = 4, the image
+      should be in (height x width x channel x sample) format; default is 3.
+
+  Returns:
+    torch tensor in the format (channel x height x width)  or (sample x
+      channel x height x width).
+  """
+
+    assert (dims == 3 or dims == 4)
+    if dims == 3:
+        im = im.transpose((2, 0, 1))
+    elif dims == 4:
+        im = im.transpose((0, 3, 1, 2))
+    else:
+        raise NotImplementedError
+
+    return torch.from_numpy(im.copy())
+
+
+def outOfGamutClipping(I, range=1.):
+    """ Clips out-of-gamut pixels. """
+    if range == 1.:
+        I[I > 1] = 1  # any pixel is higher than 1, clip it to 1
+        I[I < 0] = 0  # any pixel is below 0, clip it to 0
+    else:
+        I[I > 255] = 255  # any pixel is higher than 255, clip it to 255
+        I[I < 0] = 0  # any pixel is below 0, clip it to 0
+    return I
+
+
+def ratios2floats(ratios):
+    floats = []
+    for ratio in ratios:
+        floats.append(float(ratio.num) / ratio.den)
+    return floats
+
+
+def fractions2floats(fractions):
+    floats = []
+    for fraction in fractions:
+        floats.append(float(fraction.numerator) / fraction.denominator)
+    return floats
+
+
+def gaussian(kernel_size, sigma):
+    # calculate which number to where the grid should be
+    # remember that, kernel_size[0] is the width of the kernel
+    # and kernel_size[1] is the height of the kernel
+    temp = np.floor(np.float32(kernel_size) / 2.)
+
+    # create the grid
+    # example: if kernel_size = [5, 3], then:
+    # x: array([[-2., -1.,  0.,  1.,  2.],
+    #           [-2., -1.,  0.,  1.,  2.],
+    #           [-2., -1.,  0.,  1.,  2.]])
+    # y: array([[-1., -1., -1., -1., -1.],
+    #           [ 0.,  0.,  0.,  0.,  0.],
+    #           [ 1.,  1.,  1.,  1.,  1.]])
+    x, y = np.meshgrid(np.linspace(-temp[0], temp[0], kernel_size[0]), np.linspace(-temp[1], temp[1], kernel_size[1]))
+
+    # Gaussian equation
+    temp = np.exp(-(x ** 2 + y ** 2) / (2. * sigma ** 2))
+
+    # make kernel sum equal to 1
+    return temp / np.sum(temp)
+
+
+def aspect_ratio_imresize(im, max_output=256):
+    h, w, c = im.shape
+    if max(h, w) > max_output:
+        ratio = max_output / max(h, w)
+        im = imresize.imresize(im, scalar_scale=ratio)
+        h, w, c = im.shape
+
+    if w % (2 ** 4) == 0:
+        new_size_w = w
+    else:
+        new_size_w = w + (2 ** 4) - w % (2 ** 4)
+
+    if h % (2 ** 4) == 0:
+        new_size_h = h
+    else:
+        new_size_h = h + (2 ** 4) - h % (2 ** 4)
+
+    new_size = (new_size_h, new_size_w)
+    if not ((h, w) == new_size):
+        im = imresize.imresize(im, output_shape=new_size)
+
+    return im
+
+
+def cubic(x):
+    x = np.array(x).astype(np.float64)
+    absx = np.absolute(x)
+    absx2 = np.multiply(absx, absx)
+    absx3 = np.multiply(absx2, absx)
+    f = np.multiply(1.5*absx3 - 2.5*absx2 + 1, absx <= 1) + np.multiply(-0.5*absx3 + 2.5*absx2 - 4*absx + 2, (1 < absx) & (absx <= 2))
+    return f
+
+
+def triangle(x):
+    x = np.array(x).astype(np.float64)
+    lessthanzero = np.logical_and((x>=-1),x<0)
+    greaterthanzero = np.logical_and((x<=1),x>=0)
+    f = np.multiply((x+1),lessthanzero) + np.multiply((1-x),greaterthanzero)
+    return f
+
+
+def deriveSizeFromScale(img_shape, scale):
+    output_shape = []
+    for k in range(2):
+        output_shape.append(int(ceil(scale[k] * img_shape[k])))
+    return output_shape
+
+
+def deriveScaleFromSize(img_shape_in, img_shape_out):
+    scale = []
+    for k in range(2):
+        scale.append(1.0 * img_shape_out[k] / img_shape_in[k])
+    return scale
+
+
+def contributions(in_length, out_length, scale, kernel, k_width):
+    if scale < 1:
+        h = lambda x: scale * kernel(scale * x)
+        kernel_width = 1.0 * k_width / scale
+    else:
+        h = kernel
+        kernel_width = k_width
+    x = np.arange(1, out_length+1).astype(np.float64)
+    u = x / scale + 0.5 * (1 - 1 / scale)
+    left = np.floor(u - kernel_width / 2)
+    P = int(ceil(kernel_width)) + 2
+    ind = np.expand_dims(left, axis=1) + np.arange(P) - 1 # -1 because indexing from 0
+    indices = ind.astype(np.int32)
+    weights = h(np.expand_dims(u, axis=1) - indices - 1) # -1 because indexing from 0
+    weights = np.divide(weights, np.expand_dims(np.sum(weights, axis=1), axis=1))
+    aux = np.concatenate((np.arange(in_length), np.arange(in_length - 1, -1, step=-1))).astype(np.int32)
+    indices = aux[np.mod(indices, aux.size)]
+    ind2store = np.nonzero(np.any(weights, axis=0))
+    weights = weights[:, ind2store]
+    indices = indices[:, ind2store]
+    return weights, indices
+
+
+def imresizemex(inimg, weights, indices, dim):
+    in_shape = inimg.shape
+    w_shape = weights.shape
+    out_shape = list(in_shape)
+    out_shape[dim] = w_shape[0]
+    outimg = np.zeros(out_shape)
+    if dim == 0:
+        for i_img in range(in_shape[1]):
+            for i_w in range(w_shape[0]):
+                w = weights[i_w, :]
+                ind = indices[i_w, :]
+                im_slice = inimg[ind, i_img].astype(np.float64)
+                outimg[i_w, i_img] = np.sum(np.multiply(np.squeeze(im_slice, axis=0), w.T), axis=0)
+    elif dim == 1:
+        for i_img in range(in_shape[0]):
+            for i_w in range(w_shape[0]):
+                w = weights[i_w, :]
+                ind = indices[i_w, :]
+                im_slice = inimg[i_img, ind].astype(np.float64)
+                outimg[i_img, i_w] = np.sum(np.multiply(np.squeeze(im_slice, axis=0), w.T), axis=0)        
+    if inimg.dtype == np.uint8:
+        outimg = np.clip(outimg, 0, 255)
+        return np.around(outimg).astype(np.uint8)
+    else:
+        return outimg
+    
+
+def imresizevec(inimg, weights, indices, dim):
+    wshape = weights.shape
+    if dim == 0:
+        weights = weights.reshape((wshape[0], wshape[2], 1, 1))
+        outimg =  np.sum(weights*((inimg[indices].squeeze(axis=1)).astype(np.float64)), axis=1)
+    elif dim == 1:
+        weights = weights.reshape((1, wshape[0], wshape[2], 1))
+        outimg =  np.sum(weights*((inimg[:, indices].squeeze(axis=2)).astype(np.float64)), axis=2)
+    if inimg.dtype == np.uint8:
+        outimg = np.clip(outimg, 0, 255)
+        return np.around(outimg).astype(np.uint8)
+    else:
+        return outimg
+    
+
+def resizeAlongDim(A, dim, weights, indices, mode="vec"):
+    if mode == "org":
+        out = imresizemex(A, weights, indices, dim)
+    else:
+        out = imresizevec(A, weights, indices, dim)
+    return out
+
+
+def imresize(I, scalar_scale=None, method='bicubic', output_shape=None, mode="vec"):
+    if method == 'bicubic':
+        kernel = cubic
+    elif method == 'bilinear':
+        kernel = triangle
+    else:
+        print ('Error: Unidentified method supplied')
+        
+    kernel_width = 4.0
+    # Fill scale and output_size
+    if scalar_scale is not None:
+        scalar_scale = float(scalar_scale)
+        scale = [scalar_scale, scalar_scale]
+        output_size = deriveSizeFromScale(I.shape, scale)
+    elif output_shape is not None:
+        scale = deriveScaleFromSize(I.shape, output_shape)
+        output_size = list(output_shape)
+    else:
+        print ('Error: scalar_scale OR output_shape should be defined!')
+        return
+    scale_np = np.array(scale)
+    order = np.argsort(scale_np)
+    weights = []
+    indices = []
+    for k in range(2):
+        w, ind = contributions(I.shape[k], output_size[k], scale[k], kernel, kernel_width)
+        weights.append(w)
+        indices.append(ind)
+    B = np.copy(I) 
+    flag2D = False
+    if B.ndim == 2:
+        B = np.expand_dims(B, axis=2)
+        flag2D = True
+    for k in range(2):
+        dim = order[k]
+        B = resizeAlongDim(B, dim, weights[dim], indices[dim], mode)
+    if flag2D:
+        B = np.squeeze(B, axis=2)
+    return B
\ No newline at end of file
diff --git a/IIR-Lab/ISP_pipeline/raw_prc_pipeline/optim.py b/IIR-Lab/ISP_pipeline/raw_prc_pipeline/optim.py
new file mode 100644
index 0000000000000000000000000000000000000000..cc58d57d7826791df68734dbec88486fc2fa4de5
--- /dev/null
+++ b/IIR-Lab/ISP_pipeline/raw_prc_pipeline/optim.py
@@ -0,0 +1,30 @@
+import numpy as np
+from sklearn.linear_model import LinearRegression
+
+
+def kernelP(I):
+    """ Kernel function: kernel(r, g, b) -> (r,g,b,rg,rb,gb,r^2,g^2,b^2,rgb,1)
+      Ref: Hong, et al., "A study of digital camera colorimetric characterization
+       based on polynomial modeling." Color Research & Application, 2001. """
+    return (np.transpose(
+        (I[:, 0], I[:, 1], I[:, 2], I[:, 0] * I[:, 1], I[:, 0] * I[:, 2],
+         I[:, 1] * I[:, 2], I[:, 0] * I[:, 0], I[:, 1] * I[:, 1],
+         I[:, 2] * I[:, 2], I[:, 0] * I[:, 1] * I[:, 2],
+         np.repeat(1, np.shape(I)[0]))))
+
+
+def get_mapping_func(image1, image2):
+    """ Computes the polynomial mapping """
+    image1 = np.reshape(image1, [-1, 3])
+    image2 = np.reshape(image2, [-1, 3])
+    m = LinearRegression().fit(kernelP(image1), image2)
+    return m
+
+
+def apply_mapping_func(image, m):
+    """ Applies the polynomial mapping """
+    sz = image.shape
+    image = np.reshape(image, [-1, 3])
+    result = m.predict(kernelP(image))
+    result = np.reshape(result, [sz[0], sz[1], sz[2]])
+    return result
diff --git a/IIR-Lab/ISP_pipeline/raw_prc_pipeline/pipeline.py b/IIR-Lab/ISP_pipeline/raw_prc_pipeline/pipeline.py
new file mode 100644
index 0000000000000000000000000000000000000000..7cc06f37b84049a711d5da487d00ce52119723aa
--- /dev/null
+++ b/IIR-Lab/ISP_pipeline/raw_prc_pipeline/pipeline.py
@@ -0,0 +1,268 @@
+"""
+Demo raw processing pipeline and pipeline executor.
+"""
+import sys
+sys.path.append('ISP_pipeline')
+import numpy as np
+from raw_prc_pipeline.pipeline_utils import *
+from copy import deepcopy
+import hdf5storage
+import torch
+import json
+from fractions import Fraction
+import bm3d
+
+class RawProcessingPipelineDemo:
+    """
+    Demonstration pipeline of raw image processing.
+
+    This pipeline is a baseline pipeline to process raw image.
+    The public methods of this class are successive steps of raw image processing pipeline.
+    The declaration order of the public methods must correspond to the order in which these methods (steps) are supposed to be called when processing raw image.
+
+    It is assumed that each public method has 2 parameters:
+    raw_img : ndarray
+        Array with images data.
+    img_meta : Dict
+        Some metadata of image.
+    
+    Also each such public method must return an image (ndarray) as the result of processing.
+    """
+    def __init__(self, illumination_estimation='', 
+                       denoise_flg=True, 
+                       tone_mapping='Flash', 
+                       out_landscape_width=None, 
+                       out_landscape_height=None,
+                       color_matrix = [  1.06835938, -0.29882812, -0.14257812, 
+                                        -0.43164062,  1.35546875,  0.05078125, 
+                                        -0.1015625,   0.24414062,  0.5859375]):
+        """
+        RawProcessingPipelineDemo __init__ method.
+
+        Parameters
+        ----------
+        illumination_estimation : str, optional
+            Options for illumination estimation algorithms: '', 'gw', 'wp', 'sog', 'iwp', by default ''.
+        denoise_flg : bool, optional
+            Denoising flag, by default True.
+            If True, resulted images will be denoised with some predefined parameters.
+        tone_mapping : str, optional
+            Options for tone mapping methods, defined in function `apply_tone_map` from `pipeline_utils` module.
+            By default 'Flash'.
+        out_landscape_width : int, optional
+            The width of output image (when orientation is landscape). If None, the image resize will not be performed.
+            By default None.
+        out_landscape_height : int, optional
+            The height of output image (when orientation is landscape). If None, the image resize will not be performed.
+            By default None.
+        color_matrix : list, optional
+            Avg color tranformation matrix. If None, average color transformation matrix of Huawei Mate 40 Pro is used.
+        """
+
+        self.params = locals()
+        del self.params['self']
+
+    # Linearization not handled.
+    def linearize_raw(self, raw_img, img_meta):
+        return raw_img
+
+    def normalize(self, linearized_raw, img_meta):
+        return normalize(linearized_raw, img_meta['black_level'], img_meta['white_level'])
+
+    def demosaic(self, normalized, img_meta):
+        return simple_demosaic(normalized, [0, 1, 1, 2])
+
+    
+    def denoise(self, normalized, img_meta):
+        nr_bm3d = bm3d.bm3d(normalized, sigma_psd=10/4095, stage_arg=bm3d.BM3DStages.HARD_THRESHOLDING)
+        data = (0.3 * (normalized - nr_bm3d) + nr_bm3d).clip(0, 1.)
+        return data
+
+    def white_balance(self, demosaic, img_meta):    
+        wb_params = img_meta['as_shot_neutral']
+        
+        white_balanced = white_balance(demosaic, wb_params)
+        return white_balanced
+    
+    def bgr_gtm(self, raw_img, gtm_key1, gtm_key2):
+        raw_img = bgr_gtm(raw_img, gtm_key1, gtm_key2)
+        return raw_img
+    
+    def xyz_transform(self, white_balanced, img_meta):
+        img_meta["color_matrix_1"] = self.params["color_matrix"]
+        img_meta["color_matrix_2"] = self.params["color_matrix"]
+
+        return apply_color_space_transform(white_balanced, img_meta['color_matrix_1'], img_meta['color_matrix_2'])
+
+    def srgb_transform(self, xyz, img_meta):
+        return transform_xyz_to_srgb(xyz)
+
+    def tone_mapping(self, srgb, style):
+        if self.params['tone_mapping'] is None:
+            return apply_tone_map(srgb, 'Base')
+        return apply_tone_map(srgb, style)
+
+    def gamma_correct(self, srgb, img_meta):
+        return apply_gamma(srgb)
+
+    def autocontrast(self, srgb, img_meta):
+        # return autocontrast(srgb)
+        return autocontrast_using_pil(srgb, style='new')
+    
+    def perform_autocontrast(self, srgb, method):
+        assert method in ["pil", "standard", "channel1", "channel2"]
+        if method == "pil":
+            return autocontrast_using_pil(srgb)
+        elif method == "standard":
+            return perform_autocontrast_standard(srgb, cutoff=(4, 0))
+        elif method == "old":
+            return perform_autocontrast_channel1(srgb)
+        else:
+            return perform_autocontrast_channel2(srgb)
+    
+    def do_refinement(self, srgb, method, pth):
+        srgb = csrnet(srgb, pth)
+        torch.cuda.empty_cache()
+        return srgb
+    
+    def adjust_contrast_brightness(self, srgb):
+        # return autocontrast(srgb)
+        return adjust_contrast_brightness(srgb, contrast=1.2, brightness=16)
+    
+    def to_uint8(self, srgb, img_meta):
+        return (srgb*255).type(torch.uint8).cpu().numpy()
+    
+    def to_uint16(self, srgb, img_meta):
+        return (srgb*65535).astype(np.uint16)
+    
+    def process_sharpen(self, bgr, img_meta):
+        bgr = bgr.clip(0, 1)
+        maxvalue = 255
+        if (np.max(bgr) <= 1):
+            bgr = bgr * maxvalue
+
+        bgr = sharpen_bilateralFilter(bgr)
+        bgr = bgr/255.
+        return bgr
+
+    def resize(self, img, img_meta):
+        if self.params['out_landscape_width'] is None or self.params['out_landscape_height'] is None:
+            return img
+        return resize_using_pil(img, self.params['out_landscape_width'], self.params['out_landscape_height'])
+    
+    def fix_orientation(self, img, img_meta):
+        return fix_orientation(img, img_meta['orientation'])
+
+
+class PipelineExecutor:
+    """
+    Pipeline executor class.
+
+    This class can be used to successively execute the steps of some image pipeline class (for example `RawProcessingPipelineDemo`).
+    The declaration order of the public methods of pipeline class must correspond to the order in which these methods (steps) are supposed to be called when processing image.
+
+    It is assumed that each public method of the pipeline class has 2 parameters:
+    raw_img : ndarray
+        Array with images data.
+    img_meta : Dict
+        Some meta data of image.
+    
+    Also each such public method must return an image (ndarray) as the result of processing.
+    """
+    def __init__(self, img, img_meta, pipeline_obj, first_stage=None, last_stage=None):
+        """
+        PipelineExecutor __init__ method.
+
+        Parameters
+        ----------
+        img : ndarray
+            Image that should be processed by pipeline.
+        img_meta : Dict
+            Some image metadata.
+        pipeline_obj : pipeline object
+            Some pipeline object such as RawProcessingPipelineDemo.
+        first_stage : str, optional
+            The name of first public method of pipeline object that should be called by PipelineExecutor.
+            If None, the first public method from defined in pipeline object will be considered as `first_stage` method.
+            By default None.
+        last_stage : str, optional
+            The name of last public method of pipeline object that should be called by PipelineExecutor.
+            If None, the last public method from defined in pipeline object will be considered as `last_stage` method.
+            By default None.
+        """
+        self.pipeline_obj = pipeline_obj
+        self.stages_dict = self._init_stages()
+        self.stages_names, self.stages = list(
+            self.stages_dict.keys()), list(self.stages_dict.values())
+
+        if first_stage is None:
+            self.next_stage_indx = 0
+        else:
+            assert first_stage in self.stages_names, f"Invalid first_stage={first_stage}. Try use the following stages: {self.stages_names}"
+            self.next_stage_indx = self.stages_names.index(first_stage)
+
+        if last_stage is None:
+            self.last_stage_indx = len(self.stages_names) - 1
+        else:
+            assert last_stage in self.stages_names, f"Invalid last_stage={last_stage}. Try use the following stages: {self.stages_names}"
+            self.last_stage_indx = self.stages_names.index(last_stage)
+            if self.next_stage_indx > self.last_stage_indx:
+                print(f'Warning: the specified first_stage={first_stage} follows the specified last_stage={last_stage}, so using __call__ no image processing will be done.')
+
+        self.current_image = img
+        self.img_meta = img_meta
+
+    def _init_stages(self):
+        stages = {func: getattr(self.pipeline_obj, func) for func in self.pipeline_obj.__class__.__dict__ if callable(
+            getattr(self.pipeline_obj, func)) and not func.startswith("_")}
+        return stages
+
+    @property
+    def next_stage(self):
+        if self.next_stage_indx < len(self.stages):
+            return self.stages_names[self.next_stage_indx]
+        else:
+            return None
+
+    @property
+    def last_stage(self):
+        return self.stages_names[self.last_stage_indx]
+
+    def __iter__(self):
+        return self
+
+    def __next__(self):
+        if self.next_stage_indx < len(self.stages):
+            stage_func = self.stages[self.next_stage_indx]
+            self.current_image = stage_func(self.current_image, self.img_meta)
+            self.next_stage_indx += 1
+            return self.current_image
+        else:
+            raise StopIteration
+
+    def __call__(self):
+        """
+        PipelineExecutor __call__ method.
+
+        This method will sequentially execute the methods defined in the pipeline object from the `first_stage` to the `last_stage` inclusive.
+
+        Returns
+        -------
+        ndarray
+            Resulted processed raw image.
+        """
+        for current_image in self:
+            if self.next_stage_indx > self.last_stage_indx:
+                return current_image
+        return self.current_image
+
+def json_read(fname, **kwargs):
+    with open(fname) as j:
+        data = json.load(j, **kwargs)
+    return data
+
+
+def fraction_from_json(json_object):
+    if 'Fraction' in json_object:
+        return Fraction(*json_object['Fraction'])
+    return json_object
\ No newline at end of file
diff --git a/IIR-Lab/ISP_pipeline/raw_prc_pipeline/pipeline_bm3d.py b/IIR-Lab/ISP_pipeline/raw_prc_pipeline/pipeline_bm3d.py
new file mode 100644
index 0000000000000000000000000000000000000000..fe7e292ca864f95bfc6d6de3cf46361f720a0dcb
--- /dev/null
+++ b/IIR-Lab/ISP_pipeline/raw_prc_pipeline/pipeline_bm3d.py
@@ -0,0 +1,223 @@
+"""
+Demo raw processing pipeline and pipeline executor.
+"""
+
+import numpy as np
+from raw_prc_pipeline.pipeline_utils import *
+from bm3d import bm3d_rgb
+
+class RawProcessingPipelineDemo:
+    """
+    Demonstration pipeline of raw image processing.
+
+    This pipeline is a baseline pipeline to process raw image.
+    The public methods of this class are successive steps of raw image processing pipeline.
+    The declaration order of the public methods must correspond to the order in which these methods (steps) are supposed to be called when processing raw image.
+
+    It is assumed that each public method has 2 parameters:
+    raw_img : ndarray
+        Array with images data.
+    img_meta : Dict
+        Some metadata of image.
+    
+    Also each such public method must return an image (ndarray) as the result of processing.
+    """
+    def __init__(self, illumination_estimation='', 
+                       denoise_flg=True, 
+                       tone_mapping='Flash', 
+                       out_landscape_width=None, 
+                       out_landscape_height=None,
+                       color_matrix = [  1.06835938, -0.29882812, -0.14257812, 
+                                        -0.43164062,  1.35546875,  0.05078125, 
+                                        -0.1015625,   0.24414062,  0.5859375]):
+        """
+        RawProcessingPipelineDemo __init__ method.
+
+        Parameters
+        ----------
+        illumination_estimation : str, optional
+            Options for illumination estimation algorithms: '', 'gw', 'wp', 'sog', 'iwp', by default ''.
+        denoise_flg : bool, optional
+            Denoising flag, by default True.
+            If True, resulted images will be denoised with some predefined parameters.
+        tone_mapping : str, optional
+            Options for tone mapping methods, defined in function `apply_tone_map` from `pipeline_utils` module.
+            By default 'Flash'.
+        out_landscape_width : int, optional
+            The width of output image (when orientation is landscape). If None, the image resize will not be performed.
+            By default None.
+        out_landscape_height : int, optional
+            The height of output image (when orientation is landscape). If None, the image resize will not be performed.
+            By default None.
+        color_matrix : list, optional
+            Avg color tranformation matrix. If None, average color transformation matrix of Huawei Mate 40 Pro is used.
+        """
+
+        self.params = locals()
+        del self.params['self']
+
+    # Linearization not handled.
+    def linearize_raw(self, raw_img, img_meta):
+        return raw_img
+
+    def normalize(self, linearized_raw, img_meta):
+        return normalize(linearized_raw, img_meta['black_level'], img_meta['white_level'])
+
+    def demosaic(self, normalized, img_meta):
+        return simple_demosaic(normalized, img_meta['cfa_pattern'])
+
+    def denoise(self, demosaic, img_meta):
+        if not self.params['denoise_flg']:
+            return demosaic
+        return denoise_image(demosaic)
+
+    # def denoise(self, demosaic, img_meta):
+    #     if not self.params['denoise_flg']:
+    #         return demosaic
+    #     return bm3d_rgb(demosaic, sigma_psd=0.1)
+
+    def white_balance(self, demosaic, img_meta):
+        if self.params['illumination_estimation'] == '':
+            wb_params = img_meta['as_shot_neutral']
+        else:
+            wb_params = illumination_parameters_estimation(
+                demosaic, self.params['illumination_estimation'])
+
+        white_balanced = white_balance(demosaic, wb_params)
+        return white_balanced
+
+    def xyz_transform(self, white_balanced, img_meta):
+        # in case of absence of color matrix we use mean color matrix
+        if "color_matrix_1" not in img_meta.keys():
+            img_meta["color_matrix_1"] = self.params["color_matrix"]
+            img_meta["color_matrix_2"] = self.params["color_matrix"]
+        return apply_color_space_transform(white_balanced, img_meta['color_matrix_1'], img_meta['color_matrix_2'])
+
+    def srgb_transform(self, xyz, img_meta):
+        return transform_xyz_to_srgb(xyz)
+
+    def tone_mapping(self, srgb, img_meta):
+        if self.params['tone_mapping'] is None:
+            return apply_tone_map(srgb, 'Base')
+        return apply_tone_map(srgb, self.params['tone_mapping'])
+
+    def gamma_correct(self, srgb, img_meta):
+        return apply_gamma(srgb)
+
+    def autocontrast(self, srgb, img_meta):
+        # return autocontrast(srgb)
+        return autocontrast_using_pil(srgb)
+
+    def to_uint8(self, srgb, img_meta):
+        return (srgb*255).astype(np.uint8)
+    
+    def resize(self, img, img_meta):
+        if self.params['out_landscape_width'] is None or self.params['out_landscape_height'] is None:
+            return img
+        return resize_using_pil(img, self.params['out_landscape_width'], self.params['out_landscape_height'])
+    
+    def fix_orientation(self, img, img_meta):
+        return fix_orientation(img, img_meta['orientation'])
+
+
+class PipelineExecutor:
+    """
+    Pipeline executor class.
+
+    This class can be used to successively execute the steps of some image pipeline class (for example `RawProcessingPipelineDemo`).
+    The declaration order of the public methods of pipeline class must correspond to the order in which these methods (steps) are supposed to be called when processing image.
+
+    It is assumed that each public method of the pipeline class has 2 parameters:
+    raw_img : ndarray
+        Array with images data.
+    img_meta : Dict
+        Some meta data of image.
+    
+    Also each such public method must return an image (ndarray) as the result of processing.
+    """
+    def __init__(self, img, img_meta, pipeline_obj, first_stage=None, last_stage=None):
+        """
+        PipelineExecutor __init__ method.
+
+        Parameters
+        ----------
+        img : ndarray
+            Image that should be processed by pipeline.
+        img_meta : Dict
+            Some image metadata.
+        pipeline_obj : pipeline object
+            Some pipeline object such as RawProcessingPipelineDemo.
+        first_stage : str, optional
+            The name of first public method of pipeline object that should be called by PipelineExecutor.
+            If None, the first public method from defined in pipeline object will be considered as `first_stage` method.
+            By default None.
+        last_stage : str, optional
+            The name of last public method of pipeline object that should be called by PipelineExecutor.
+            If None, the last public method from defined in pipeline object will be considered as `last_stage` method.
+            By default None.
+        """
+        self.pipeline_obj = pipeline_obj
+        self.stages_dict = self._init_stages()
+        self.stages_names, self.stages = list(
+            self.stages_dict.keys()), list(self.stages_dict.values())
+
+        if first_stage is None:
+            self.next_stage_indx = 0
+        else:
+            assert first_stage in self.stages_names, f"Invalid first_stage={first_stage}. Try use the following stages: {self.stages_names}"
+            self.next_stage_indx = self.stages_names.index(first_stage)
+
+        if last_stage is None:
+            self.last_stage_indx = len(self.stages_names) - 1
+        else:
+            assert last_stage in self.stages_names, f"Invalid last_stage={last_stage}. Try use the following stages: {self.stages_names}"
+            self.last_stage_indx = self.stages_names.index(last_stage)
+            if self.next_stage_indx > self.last_stage_indx:
+                print(f'Warning: the specified first_stage={first_stage} follows the specified last_stage={last_stage}, so using __call__ no image processing will be done.')
+
+        self.current_image = img
+        self.img_meta = img_meta
+
+    def _init_stages(self):
+        stages = {func: getattr(self.pipeline_obj, func) for func in self.pipeline_obj.__class__.__dict__ if callable(
+            getattr(self.pipeline_obj, func)) and not func.startswith("_")}
+        return stages
+
+    @property
+    def next_stage(self):
+        if self.next_stage_indx < len(self.stages):
+            return self.stages_names[self.next_stage_indx]
+        else:
+            return None
+
+    @property
+    def last_stage(self):
+        return self.stages_names[self.last_stage_indx]
+
+    def __iter__(self):
+        return self
+
+    def __next__(self):
+        if self.next_stage_indx < len(self.stages):
+            stage_func = self.stages[self.next_stage_indx]
+            self.current_image = stage_func(self.current_image, self.img_meta)
+            self.next_stage_indx += 1
+            return self.current_image
+        else:
+            raise StopIteration
+
+    def __call__(self):
+        """
+        PipelineExecutor __call__ method.
+
+        This method will sequentially execute the methods defined in the pipeline object from the `first_stage` to the `last_stage` inclusive.
+
+        Returns
+        -------
+        ndarray
+            Resulted processed raw image.
+        """
+        for current_image in self:
+            if self.next_stage_indx > self.last_stage_indx:
+                return current_image
+        return self.current_image
diff --git a/IIR-Lab/ISP_pipeline/raw_prc_pipeline/pipeline_utils.py b/IIR-Lab/ISP_pipeline/raw_prc_pipeline/pipeline_utils.py
new file mode 100644
index 0000000000000000000000000000000000000000..ae137adcd04db20bc5bcfbf6c99f2bfa10b8a7b0
--- /dev/null
+++ b/IIR-Lab/ISP_pipeline/raw_prc_pipeline/pipeline_utils.py
@@ -0,0 +1,712 @@
+"""
+Camera pipeline utilities.
+"""
+
+import os
+from fractions import Fraction
+
+import cv2
+import numpy as np
+import exifread
+# from exifread import Ratio
+from exifread.utils import Ratio
+import rawpy
+from scipy.io import loadmat
+from raw_prc_pipeline.exif_utils import parse_exif, get_tag_values_from_ifds
+from raw_prc_pipeline.fs import perform_storm, perform_flash
+from PIL import Image, ImageOps
+from skimage.restoration import denoise_bilateral
+from skimage.transform import resize as skimage_resize
+import torch
+
+from kornia.geometry.transform import resize
+
+# from modeling import weight_refinement
+# from raw_prc_pipeline import misc, optim, color
+import pdb
+from copy import deepcopy
+import hdf5storage
+import torch
+from scipy import signal
+
+def get_visible_raw_image(image_path):
+    raw_image = rawpy.imread(image_path).raw_image_visible.copy()
+    # raw_image = rawpy.imread(image_path).raw_image.copy()
+    return raw_image
+
+
+def get_image_tags(image_path):
+    with open(image_path, 'rb') as f:
+        tags = exifread.process_file(f)
+    return tags
+
+
+def get_image_ifds(image_path):
+    ifds = parse_exif(image_path, verbose=False)
+    return ifds
+
+
+def get_metadata(image_path):
+    metadata = {}
+    tags = get_image_tags(image_path)
+    ifds = get_image_ifds(image_path)
+    metadata['linearization_table'] = get_linearization_table(tags, ifds)
+    metadata['black_level'] = get_black_level(tags, ifds)
+    metadata['white_level'] = get_white_level(tags, ifds)
+    metadata['cfa_pattern'] = get_cfa_pattern(tags, ifds)
+    metadata['as_shot_neutral'] = get_as_shot_neutral(tags, ifds)
+    color_matrix_1, color_matrix_2 = get_color_matrices(tags, ifds)
+    metadata['color_matrix_1'] = color_matrix_1
+    metadata['color_matrix_2'] = color_matrix_2
+    metadata['orientation'] = get_orientation(tags, ifds)
+    # isn't used
+    metadata['noise_profile'] = get_noise_profile(tags, ifds)
+    # ...
+    # fall back to default values, if necessary
+    if metadata['black_level'] is None:
+        metadata['black_level'] = 0
+        print("Black level is None; using 0.")
+    if metadata['white_level'] is None:
+        metadata['white_level'] = 2 ** 16
+        print("White level is None; using 2 ** 16.")
+    if metadata['cfa_pattern'] is None:
+        metadata['cfa_pattern'] = [0, 1, 1, 2]
+        print("CFAPattern is None; using [0, 1, 1, 2] (RGGB)")
+    if metadata['as_shot_neutral'] is None:
+        metadata['as_shot_neutral'] = [1, 1, 1]
+        print("AsShotNeutral is None; using [1, 1, 1]")
+    if metadata['color_matrix_1'] is None:
+        metadata['color_matrix_1'] = [1] * 9
+        print("ColorMatrix1 is None; using [1, 1, 1, 1, 1, 1, 1, 1, 1]")
+    if metadata['color_matrix_2'] is None:
+        metadata['color_matrix_2'] = [1] * 9
+        print("ColorMatrix2 is None; using [1, 1, 1, 1, 1, 1, 1, 1, 1]")
+    if metadata['orientation'] is None:
+        metadata['orientation'] = 0
+        print("Orientation is None; using 0.")
+    # ...
+    return metadata
+
+
+def get_linearization_table(tags, ifds):
+    possible_keys = ['Image Tag 0xC618', 'Image Tag 50712',
+                     'LinearizationTable', 'Image LinearizationTable']
+    return get_values(tags, possible_keys)
+
+
+def get_black_level(tags, ifds):
+    possible_keys = ['Image Tag 0xC61A', 'Image Tag 50714',
+                     'BlackLevel', 'Image BlackLevel']
+    vals = get_values(tags, possible_keys)
+    if vals is None:
+        # print("Black level not found in exifread tags. Searching IFDs.")
+        vals = get_tag_values_from_ifds(50714, ifds)
+    return vals
+
+
+def get_white_level(tags, ifds):
+    possible_keys = ['Image Tag 0xC61D', 'Image Tag 50717',
+                     'WhiteLevel', 'Image WhiteLevel']
+    vals = get_values(tags, possible_keys)
+    if vals is None:
+        # print("White level not found in exifread tags. Searching IFDs.")
+        vals = get_tag_values_from_ifds(50717, ifds)
+    return vals
+
+
+def get_cfa_pattern(tags, ifds):
+    possible_keys = ['CFAPattern', 'Image CFAPattern']
+    vals = get_values(tags, possible_keys)
+    if vals is None:
+        # print("CFAPattern not found in exifread tags. Searching IFDs.")
+        vals = get_tag_values_from_ifds(33422, ifds)
+    return vals
+
+
+def get_as_shot_neutral(tags, ifds):
+    possible_keys = ['Image Tag 0xC628', 'Image Tag 50728',
+                     'AsShotNeutral', 'Image AsShotNeutral']
+    return get_values(tags, possible_keys)
+
+
+def get_color_matrices(tags, ifds):
+    possible_keys_1 = ['Image Tag 0xC621', 'Image Tag 50721',
+                       'ColorMatrix1', 'Image ColorMatrix1']
+    color_matrix_1 = get_values(tags, possible_keys_1)
+    possible_keys_2 = ['Image Tag 0xC622', 'Image Tag 50722',
+                       'ColorMatrix2', 'Image ColorMatrix2']
+    color_matrix_2 = get_values(tags, possible_keys_2)
+    #print(f'Color matrix 1:{color_matrix_1}')
+    #print(f'Color matrix 2:{color_matrix_2}')
+    #print(np.sum(np.abs(np.array(color_matrix_1) - np.array(color_matrix_2))))
+    return color_matrix_1, color_matrix_2
+
+
+def get_orientation(tags, ifds):
+    possible_tags = ['Orientation', 'Image Orientation']
+    return get_values(tags, possible_tags)
+
+
+def get_noise_profile(tags, ifds):
+    possible_keys = ['Image Tag 0xC761', 'Image Tag 51041',
+                     'NoiseProfile', 'Image NoiseProfile']
+    vals = get_values(tags, possible_keys)
+    if vals is None:
+        # print("Noise profile not found in exifread tags. Searching IFDs.")
+        vals = get_tag_values_from_ifds(51041, ifds)
+    return vals
+
+
+def get_values(tags, possible_keys):
+    values = None
+    for key in possible_keys:
+        if key in tags.keys():
+            values = tags[key].values
+    return values
+
+
+def normalize(raw_image, black_level, white_level):
+    if type(black_level) is list and len(black_level) == 1:
+        black_level = float(black_level[0])
+    if type(white_level) is list and len(white_level) == 1:
+        white_level = float(white_level[0])
+    black_level_mask = black_level
+    if type(black_level) is list and len(black_level) == 4:
+        if type(black_level[0]) is Ratio:
+            black_level = ratios2floats(black_level)
+        if type(black_level[0]) is Fraction:
+            black_level = fractions2floats(black_level)
+        black_level_mask = np.zeros(raw_image.shape)
+        idx2by2 = [[0, 0], [0, 1], [1, 0], [1, 1]]
+        step2 = 2
+        for i, idx in enumerate(idx2by2):
+            black_level_mask[idx[0]::step2, idx[1]::step2] = black_level[i]
+    normalized_image = raw_image.astype(np.float32) - black_level_mask
+    # if some values were smaller than black level
+    normalized_image[normalized_image < 0] = 0
+    normalized_image = normalized_image / (white_level - black_level_mask)
+    return normalized_image
+
+
+def ratios2floats(ratios):
+    floats = []
+    for ratio in ratios:
+        floats.append(float(ratio.num) / ratio.den)
+    return floats
+
+
+def fractions2floats(fractions):
+    floats = []
+    for fraction in fractions:
+        floats.append(float(fraction.numerator) / fraction.denominator)
+    return floats
+
+
+def illumination_parameters_estimation(current_image, illumination_estimation_option):
+    ie_method = illumination_estimation_option.lower()
+    if ie_method == "gw":
+        ie = np.mean(current_image, axis=(0, 1))
+        ie /= ie[1]
+        return ie
+    elif ie_method == "sog":
+        sog_p = 4.
+        ie = np.mean(current_image**sog_p, axis=(0, 1))**(1/sog_p)
+        ie /= ie[1]
+        return ie
+    elif ie_method == "wp":
+        ie = np.max(current_image, axis=(0, 1))
+        ie /= ie[1]
+        return ie
+    elif ie_method == "iwp":
+        samples_count = 20
+        sample_size = 20
+        rows, cols = current_image.shape[:2]
+        data = np.reshape(current_image, (rows*cols, 3))
+        maxima = np.zeros((samples_count, 3))
+        for i in range(samples_count):
+            maxima[i, :] = np.max(data[np.random.randint(
+                low=0, high=rows*cols, size=(sample_size)), :], axis=0)
+        ie = np.mean(maxima, axis=0)
+        ie /= ie[1]
+        return ie
+    else:
+        raise ValueError(
+            'Bad illumination_estimation_option value! Use the following options: "gw", "wp", "sog", "iwp"')
+
+
+def white_balance(demosaic_img, as_shot_neutral):
+    if type(as_shot_neutral[0]) is Ratio:
+        as_shot_neutral = ratios2floats(as_shot_neutral)
+
+    as_shot_neutral = np.asarray(as_shot_neutral)
+    # transform vector into matrix
+    if as_shot_neutral.shape == (3,):
+        as_shot_neutral = np.diag(1./as_shot_neutral)
+
+    assert as_shot_neutral.shape == (3, 3)
+
+    as_shot_neutral = torch.tensor(as_shot_neutral.T, dtype=torch.float32).cuda()
+    demosaic_img = demosaic_img @ as_shot_neutral
+    demosaic_img = torch.clamp(demosaic_img, 0.0, 1.0)
+
+    return demosaic_img
+
+
+def simple_demosaic(img, cfa_pattern):
+    raw_colors = np.asarray(cfa_pattern).reshape((2, 2))
+    demosaiced_image = np.zeros((img.shape[0]//2, img.shape[1]//2, 3))
+    for i in range(2):
+        for j in range(2):
+            ch = raw_colors[i, j]
+            if ch == 1:
+                demosaiced_image[:, :, ch] += img[i::2, j::2] / 2
+            else:
+                demosaiced_image[:, :, ch] = img[i::2, j::2]
+    return demosaiced_image
+
+
+def denoise_image(demosaiced_image):
+    current_image = denoise_bilateral(
+        demosaiced_image, sigma_color=None, sigma_spatial=2., channel_axis=-1, mode='reflect')
+    return current_image
+
+def apply_color_space_transform(demosaiced_image, color_matrix_1, color_matrix_2):
+    # pdb.set_trace()
+    # if isinstance(color_matrix_1[0], Fraction):
+    #     color_matrix_1 = fractions2floats(color_matrix_1)
+    
+    # xyz2cam1 = np.reshape(np.asarray(color_matrix_1), (3, 3))
+   
+    # # normalize rows (needed?)
+    # xyz2cam1 = xyz2cam1 / np.sum(xyz2cam1, axis=1, keepdims=True)
+    
+    # # inverse
+    # cam2xyz1 = np.linalg.inv(xyz2cam1)
+
+    cam2xyz1 = torch.tensor([[ 0.64782996,  0.18070131,  0.17146873],
+                        [ 0.20529524,  0.78768572,  0.00701903],
+                        [ 0.02675084, -0.29688082,  1.27012997]], dtype=torch.float32).cuda()
+    
+    # for now, use one matrix  # TODO: interpolate btween both
+    # simplified matrix multiplication
+    xyz_image = cam2xyz1.unsqueeze(0).unsqueeze(0) * \
+        demosaiced_image.unsqueeze(2)
+    xyz_image = torch.sum(xyz_image, dim=-1)
+    xyz_image = torch.clamp(xyz_image, 0.0, 1.0)
+    del demosaiced_image, cam2xyz1
+    return xyz_image
+
+
+def transform_xyz_to_srgb(xyz_image):
+    # srgb2xyz = np.array([[0.4124564, 0.3575761, 0.1804375],
+    #                      [0.2126729, 0.7151522, 0.0721750],
+    #                      [0.0193339, 0.1191920, 0.9503041]])
+
+    # xyz2srgb = np.linalg.inv(srgb2xyz)
+    
+    # xyz2srgb = np.array( [[ 1.9712269,-0.6789218, -0.29230508],
+    #                 [-0.29104823, 1.748401 , -0.45735288],
+    #                 [ 0.02051281,-0.5380369,  1.5175241 ]])
+    
+    # P40 ccm
+    # xyz2srgb = np.array([[1.521689, -0.673763, 0.152074],
+    #                      [-0.145724, 1.266507, -0.120783],
+    #                      [-0.0397583, -0.561249, 1.60100734]])
+    
+    # xyz2srgb = np.array([[3.2404542, -1.5371385, -0.4985314],
+    #                      [-0.9692660, 1.8760108, 0.0415560],
+    #                      [0.0556434, -0.2040259, 1.0572252]])
+
+    # # normalize rows (needed?)
+    # xyz2srgb = xyz2srgb / np.sum(xyz2srgb, axis=-1, keepdims=True)
+
+    xyz2srgb = torch.tensor([[ 2.68965507, -1.27586199, -0.41379307],
+                        [-1.02210817,  1.97828664,  0.04382154],
+                        [ 0.06122446, -0.22448978,  1.16326533]], dtype=torch.float32).cuda()
+
+    srgb_image = xyz2srgb.unsqueeze(0).unsqueeze(0) * xyz_image.unsqueeze(2)
+    srgb_image = torch.sum(srgb_image, dim=-1)
+    srgb_image = torch.clip(srgb_image, 0.0, 1.0)
+    del xyz_image, xyz2srgb
+    return srgb_image
+
+
+def reverse_orientation(image, orientation):
+    # 1 = Horizontal(normal)
+    # 2 = Mirror horizontal
+    # 3 = Rotate 180
+    # 4 = Mirror vertical
+    # 5 = Mirror horizontal and rotate 270 CW
+    # 6 = Rotate 90 CW
+    # 7 = Mirror horizontal and rotate 90 CW
+    # 8 = Rotate 270 CW
+    rev_orientations = np.array([1, 2, 3, 4, 5, 8, 7, 6])
+    return fix_orientation(image, rev_orientations[orientation - 1])
+
+
+def apply_gamma(x):
+    # return x ** (1.0 / 2.2)
+    # x = x.copy()
+    idx = x <= 0.0031308
+    x[idx] *= 12.92
+    x[idx == False] = (x[idx == False] ** (1.0 / 2.4)) * 1.055 - 0.055
+    return x
+
+def bgr_gtm(bgr, key_1, key_2):
+    min_val = 1 / (2 ** 16)
+    y = bgr[:, :, 0] * 0.299 + bgr[:, :, 1] * 0.587 + bgr[:, :, 1] * 0.144
+    b = y * (key_2 - key_1) + key_1
+    ld = (np.log(y + b) - np.log(b)) / (np.log(1 + b) - np.log(b))
+    y = np.maximum(y, min_val)
+    gain = ld / y
+    gain = np.stack([gain, gain, gain], axis=-1)
+    bgr = bgr * gain
+
+    return bgr
+
+def sharpen_bilateralFilter(RGB):
+    d = 3  # kernel size
+    sigmaColor = 10  # color domain sigma
+    sigmaSpace = 10  # space domain sigma
+
+    weight = 2
+    weight_ratio = 0.1
+
+    h, w, c = RGB.shape
+    ycc = rgb2ycbcr(RGB, w, h)
+    ycc_out=ycc
+    y = ycc[:, :, 0]
+    cb = ycc[:, :, 1]
+    cr = ycc[:, :, 2]
+
+    y_bilateral_filtered = cv2.bilateralFilter(y.astype(np.float32), d, sigmaColor, sigmaSpace)
+    detail = ycc[:, :, 0] - y_bilateral_filtered
+
+    y_out = y_bilateral_filtered + weight * detail
+    y_out = np.clip(y_out, 0, 255)
+
+    ycc_out[:, :, 0] = y_out
+    rgb_out = ycbcr2rgb(ycc_out, w, h)
+    return rgb_out
+
+# 0~255 的 Ycbcr转换
+def rgb2ycbcr(image, width, height):
+    ycbcr_img = np.zeros(shape=(height, width, 3))
+    ycbcr_img[:,:,0] = 0.299*image[:,:,0] + 0.5877*image[:,:,1] + 0.114*image[:,:,2]
+    ycbcr_img[:,:,1] = 128 - 0.168736*image[:,:,0] - 0.331264*image[:,:,1] + 0.5*image[:,:,2]
+    ycbcr_img[:,:,2] = 128 + 0.5*image[:,:,0] - 0.418688*image[:,:,1] - 0.081312*image[:,:,2]
+    ycbcr_img = np.clip(ycbcr_img, 0, 255)
+    return ycbcr_img
+# 0~255 的 Ycbcr转换
+def ycbcr2rgb(image, width, height):
+    rgb_img = np.zeros(shape=(height, width, 3))
+    rgb_img[:,:,0] = image[:,:,0] + 1.402*(image[:,:,2]-128)  # R = Y + 1.402*(Cr-128)
+    rgb_img[:,:,1] = image[:,:,0] - 0.344136*(image[:,:,1]-128) - 0.714136*(image[:,:,2]-128)  # G = Y - 0.344136*(Cb-128) - 0.714136*(Cr-128)
+    rgb_img[:,:,2] = image[:,:,0] + 1.772*(image[:,:,1]-128)  # B = Y + 1.772*(Cb-128)
+    rgb_img = np.clip(rgb_img, 0, 255)
+    return rgb_img
+
+def apply_tone_map(x, tone_mapping='Base'):
+    if tone_mapping == 'Flash':
+        return perform_flash(x, perform_gamma_correction=0)/255.
+    elif tone_mapping == 'Storm':
+        return perform_storm(x, perform_gamma_correction=0)/255.
+    elif tone_mapping == 'Drago':
+        tonemap = cv2.createTonemapDrago()
+        return tonemap.process(x.astype(np.float32))
+    elif tone_mapping == 'Mantiuk':
+        tonemap = cv2.createTonemapMantiuk()
+        return tonemap.process(x.astype(np.float32))
+    elif tone_mapping == 'Reinhard':
+        tonemap = cv2.createTonemapReinhard()
+        return tonemap.process(x.astype(np.float32))
+    elif tone_mapping == 'Linear':
+        return np.clip(x/np.sort(x.flatten())[-50000], 0, 1)
+    elif tone_mapping == 'Base':
+        # return 3 * x ** 2 - 2 * x ** 3
+        # tone_curve = loadmat('tone_curve.mat')
+        tone_curve = loadmat(os.path.join(os.path.dirname(
+            os.path.realpath(__file__)), 'tone_curve.mat'))
+        tone_curve = tone_curve['tc']
+        x = np.round(x * (len(tone_curve) - 1)).astype(int)
+        tone_mapped_image = np.squeeze(tone_curve[x])
+        return tone_mapped_image
+    else:
+        raise ValueError(
+            'Bad tone_mapping option value! Use the following options: "Base", "Flash", "Storm", "Linear", "Drago", "Mantiuk", "Reinhard"')
+
+
+def autocontrast(output_image, cutoff_prcnt=2, preserve_tone=False):
+    if preserve_tone:
+        min_val, max_val = np.percentile(output_image, [cutoff_prcnt, 100 - cutoff_prcnt])
+        output_image = (output_image - min_val)/(max_val - min_val)
+    else:
+        channels = [None]*3
+        for ch in range(3):
+            min_val, max_val = np.percentile(output_image[...,ch], [cutoff_prcnt, 100 - cutoff_prcnt])
+            channels[ch] = (output_image[...,ch] - min_val)/(max_val - min_val)
+        output_image = np.dstack(channels)
+    output_image = np.clip(output_image, 0, 1)
+    return output_image
+
+
+def autocontrast_using_pil(img, style='new', cutoff=4):
+    img_uint8 = np.clip(255*img, 0, 255).astype(np.uint8)
+    img_pil = Image.fromarray(img_uint8)
+    img_pil = ImageOps.autocontrast(img_pil, cutoff=cutoff)
+    output_image = np.array(img_pil).astype(np.float32) / 255 
+    return output_image
+
+def _lut(image, lut):
+    if image.mode == "P":
+        # FIXME: apply to lookup table, not image data
+        raise NotImplementedError("mode P support coming soon")
+    elif image.mode in ("L", "RGB"):
+        if image.mode == "RGB" and len(lut) == 256:
+            lut = lut + lut + lut
+        return image.point(lut)
+    else:
+        raise OSError("not supported for this image mode")
+    
+    
+def autocontrast(image, cutoff=(0, 0), ignore=None):
+    """
+    Maximize (normalize) image contrast. This function calculates a
+    histogram of the input image, removes **cutoff** percent of the
+    lightest and darkest pixels from the histogram, and remaps the image
+    so that the darkest pixel becomes black (0), and the lightest
+    becomes white (255).
+
+    :param image: The image to process.
+    :param cutoff: How many percent to cut off from the histogram.
+    :param ignore: The background pixel value (use None for no background).
+    :return: An image.
+    """
+    histogram = image.histogram()
+    lut = []
+    for layer in range(0, len(histogram), 256):
+        h = histogram[layer : layer + 256]
+        if ignore is not None:
+            # get rid of outliers
+            try:
+                h[ignore] = 0
+            except TypeError:
+                # assume sequence
+                for ix in ignore:
+                    h[ix] = 0
+        if cutoff:
+            # cut off pixels from both ends of the histogram
+            # get number of pixels
+            n = 0
+            for ix in range(256):
+                n = n + h[ix]
+            # remove cutoff% pixels from the low end
+            cut = n * cutoff[0] // 100
+            for lo in range(256):
+                if cut > h[lo]:
+                    cut = cut - h[lo]
+                    h[lo] = 0
+                else:
+                    h[lo] -= cut
+                    cut = 0
+                if cut <= 0:
+                    break
+            # remove cutoff% samples from the hi end
+            cut = n * cutoff[1] // 100
+            for hi in range(255, -1, -1):
+                if cut > h[hi]:
+                    cut = cut - h[hi]
+                    h[hi] = 0
+                else:
+                    h[hi] -= cut
+                    cut = 0
+                if cut <= 0:
+                    break
+        # find lowest/highest samples after preprocessing
+        for lo in range(256):
+            if h[lo]:
+                break
+        for hi in range(255, -1, -1):
+            if h[hi]:
+                break
+        if hi <= lo:
+            # don't bother
+            lut.extend(list(range(256)))
+        else:
+            scale = 255.0 / (hi - lo)
+            offset = -lo * scale
+            for ix in range(256):
+                ix = int(ix * scale + offset)
+                if ix < 0:
+                    ix = 0
+                elif ix > 255:
+                    ix = 255
+                lut.append(ix)
+    return _lut(image, lut)
+
+
+def perform_autocontrast_standard(img, cutoff=(2, 0)):
+    img_uint8 = np.clip(255*img, 0, 255).astype(np.uint8)
+    img_pil = Image.fromarray(img_uint8)
+    img_pil = autocontrast(img_pil, cutoff=cutoff)
+    output_image = np.array(img_pil).astype(np.float32) / 255.
+    return output_image
+
+
+def perform_autocontrast_channel1(img):
+    
+    def reject_outliers(data, m=1.2):
+        return abs(data - np.mean(data)) < m * np.std(data)
+    
+    def get_cutoff(img_ch):
+        values, _ = np.histogram(img_ch, bins=32)
+        ratios = values / values.sum()
+        cutoff = 4 if reject_outliers(values)[0] else 4 - np.log(100 * np.abs(ratios[1]-ratios[0]))
+        if cutoff < 0:
+            cutoff = 0
+        return int(cutoff)
+
+    img_uint8 = np.clip(255*img, 0, 255).astype(np.uint8)
+    img_pil = Image.fromarray(img_uint8)
+    r, g, b = img_pil.split()
+    cutoff_r = get_cutoff(np.array(r).flatten())
+    cutoff_g = get_cutoff(np.array(g).flatten())
+    cutoff_b = get_cutoff(np.array(b).flatten())
+    r_ = autocontrast(r, cutoff=(cutoff_r, 0))
+    g_ = autocontrast(g, cutoff=(cutoff_g, 0))
+    b_ = autocontrast(b, cutoff=(cutoff_b, 0))
+    output_r = np.array(r_).astype(np.float32) / 255.
+    output_g = np.array(g_).astype(np.float32) / 255.
+    output_b = np.array(b_).astype(np.float32) / 255.
+    output_image = np.transpose(np.array([output_r, output_g, output_b]), (1, 2, 0))
+    return output_image
+
+
+def perform_autocontrast_channel2(img):
+    
+    def get_cutoff(img_uint8, base_cutoff=4):
+        cutoff = list()
+        h, w, _ = img_uint8.shape
+        for ch in Image.fromarray(img_uint8).split():
+            values, _ = np.histogram(np.array(ch).flatten(), bins=32)
+            cutoff.append(np.ceil((values.cumsum() / (h * w))[0] * 100).astype(int))
+        cutoff = [coff if coff > base_cutoff else base_cutoff for coff in cutoff]
+        return cutoff
+
+    img_uint8 = np.clip(255*img, 0, 255).astype(np.uint8)
+    cutoff = get_cutoff(img_uint8)
+    output = np.array([
+        np.array(autocontrast(ch, cutoff=(coff, 0))).astype(np.float32) / 255. 
+        for ch, coff in zip(Image.fromarray(img_uint8).split(), cutoff)
+    ])
+    return np.transpose(output, (1, 2, 0))
+
+def csrnet(img, pth):
+    device = torch.device("cuda")
+    from raw_prc_pipeline.csrnet_network import CSRNet as NET
+    checkpoint_path = pth
+    model = get_parm(NET, checkpoint_path, device)
+
+    # img = np.pad(img, ((0, 24), (0, 16), (0, 0)), 'reflect')
+    # img = img.astype(np.float32)
+
+    img = img.unsqueeze(0).permute(0,3,1,2).contiguous()
+
+    with torch.no_grad():
+        output_image = model(img)
+    
+    # output_image = output_image.detach().cpu().squeeze(0).numpy().transpose(1, 2, 0)
+    output_image = output_image[0].permute(1,2,0)
+    output_image = torch.clamp(output_image, 0, 1)
+    
+    torch.cuda.empty_cache()
+    
+    return output_image
+
+def get_net(NET, checkpoint_path, device):
+    net = NET()
+    load_net = torch.load(checkpoint_path, map_location="cuda")
+    # pdb.set_trace()
+    try:
+        load_net = load_net['params']
+    except:
+        load_net = load_net['state_dict_model']
+
+    # remove unnecessary 'module.'
+    for k, v in deepcopy(load_net).items():
+        if k.startswith('module.'):
+            load_net[k[7:]] = v
+            load_net.pop(k)
+
+    net.load_state_dict(load_net, strict=True)
+    net = net.to(device)
+    net = net.eval()
+    return net
+
+def get_parm(NET, checkpoint_path, device):
+    net = NET()
+    load_net = torch.load(checkpoint_path, map_location="cuda")
+    
+    net.load_state_dict(load_net, strict=True)
+    net = net.to(device)
+    net = net.eval()
+    return net
+
+def raw_rgb_to_cct(rawRgb, xyz2cam1, xyz2cam2):
+    """Convert raw-RGB triplet to corresponding correlated color temperature (CCT)"""
+    pass
+    
+def resize_using_skimage(img, width=1296, height=864):
+    out_shape = (height, width) + img.shape[2:]
+    if img.shape == out_shape:
+        return img
+    out_img = skimage_resize(img, out_shape, preserve_range=True, anti_aliasing=True)
+    out_img = out_img.astype(np.uint8)
+    return out_img
+
+
+def resize_using_pil(img, width=1296, height=864):
+    img_pil = Image.fromarray(img)
+    out_size = (width, height)
+    if img_pil.size == out_size:
+        return img
+    # out_img = img_pil.resize(out_size, Image.ANTIALIAS)
+    out_img = img_pil
+    out_img = np.array(out_img)
+    return out_img
+
+
+def fix_orientation(image, orientation):
+
+    if type(orientation) is list:
+        orientation = orientation[0]
+
+    if orientation == 1:
+        pass
+    elif orientation == 2:
+        image = cv2.flip(image, 0)
+    elif orientation == 3:
+        image = cv2.rotate(image, cv2.ROTATE_180)
+    elif orientation == 4:
+        image = cv2.flip(image, 1)
+    elif orientation == 5:
+        image = cv2.flip(image, 0)
+        image = cv2.rotate(image, cv2.ROTATE_90_COUNTERCLOCKWISE)
+    elif orientation == 6:
+        image = cv2.rotate(image, cv2.ROTATE_90_CLOCKWISE)
+    elif orientation == 7:
+        image = cv2.flip(image, 0)
+        image = cv2.rotate(image, cv2.ROTATE_90_CLOCKWISE)
+    elif orientation == 8:
+        image = cv2.rotate(image, cv2.ROTATE_90_COUNTERCLOCKWISE)
+
+    return image
+
+def adjust_contrast_brightness(img, contrast:float=1.0, brightness:int=0):
+    """
+    Adjusts contrast and brightness of an uint8 image.
+    contrast:   (0.0,  inf) with 1.0 leaving the contrast as is
+    brightness: [-255, 255] with 0 leaving the brightness as is
+    """
+    brightness += int(round(255*(1-contrast)/2))
+    return cv2.addWeighted(img, contrast, img, 0, brightness)
\ No newline at end of file
diff --git a/IIR-Lab/ISP_pipeline/raw_prc_pipeline/refine_network.py b/IIR-Lab/ISP_pipeline/raw_prc_pipeline/refine_network.py
new file mode 100644
index 0000000000000000000000000000000000000000..38165babe0886a9eb87c0eb5dab6ea7de8813dac
--- /dev/null
+++ b/IIR-Lab/ISP_pipeline/raw_prc_pipeline/refine_network.py
@@ -0,0 +1,496 @@
+import math
+import torch
+import torch.optim as optim
+import torch.nn as nn
+import torch.nn.functional as F
+from torch.nn import init, Module
+import functools
+from torch.optim import lr_scheduler
+from collections import OrderedDict
+import numpy as np
+
+'''
+# ===================================
+# Advanced nn.Sequential
+# reform nn.Sequentials and nn.Modules
+# to a single nn.Sequential
+# ===================================
+'''
+
+def seq(*args):
+    if len(args) == 1:
+        args = args[0]
+    if isinstance(args, nn.Module):
+        return args
+    modules = OrderedDict()
+    if isinstance(args, OrderedDict):
+        for k, v in args.items():
+            modules[k] = seq(v)
+        return nn.Sequential(modules)
+    assert isinstance(args, (list, tuple))
+    return nn.Sequential(*[seq(i) for i in args])
+
+'''
+# ===================================
+# Useful blocks
+# --------------------------------
+# conv (+ normaliation + relu)
+# concat
+# sum
+# resblock (ResBlock)
+# resdenseblock (ResidualDenseBlock_5C)
+# resinresdenseblock (RRDB)
+# ===================================
+'''
+
+# -------------------------------------------------------
+# return nn.Sequantial of (Conv + BN + ReLU)
+# -------------------------------------------------------
+def conv(in_channels=64, out_channels=64, kernel_size=3, stride=1, padding=1,
+         output_padding=0, dilation=1, groups=1, bias=True,
+         padding_mode='zeros', mode='CBR'):
+    L = []
+    for t in mode:
+        if t == 'C':
+            L.append(nn.Conv2d(in_channels=in_channels,
+                               out_channels=out_channels,
+                               kernel_size=kernel_size,
+                               stride=stride,
+                               padding=padding,
+                               dilation=dilation,
+                               groups=groups,
+                               bias=bias,
+                               padding_mode=padding_mode))
+        elif t == 'X':
+            assert in_channels == out_channels
+            L.append(nn.Conv2d(in_channels=in_channels,
+                               out_channels=out_channels,
+                               kernel_size=kernel_size,
+                               stride=stride,
+                               padding=padding,
+                               dilation=dilation,
+                               groups=in_channels,
+                               bias=bias,
+                               padding_mode=padding_mode))
+        elif t == 'T':
+            L.append(nn.ConvTranspose2d(in_channels=in_channels,
+                                        out_channels=out_channels,
+                                        kernel_size=kernel_size,
+                                        stride=stride,
+                                        padding=padding,
+                                        output_padding=output_padding,
+                                        groups=groups,
+                                        bias=bias,
+                                        dilation=dilation,
+                                        padding_mode=padding_mode))
+        elif t == 'B':
+            L.append(nn.BatchNorm2d(out_channels))
+        elif t == 'I':
+            L.append(nn.InstanceNorm2d(out_channels, affine=True))
+        elif t == 'i':
+            L.append(nn.InstanceNorm2d(out_channels))
+        elif t == 'R':
+            L.append(nn.ReLU(inplace=True))
+        elif t == 'r':
+            L.append(nn.ReLU(inplace=False))
+        elif t == 'P':
+            L.append(nn.PReLU())
+        elif t == 'L':
+            L.append(nn.LeakyReLU(negative_slope=1e-1, inplace=True))
+        elif t == 'l':
+            L.append(nn.LeakyReLU(negative_slope=1e-1, inplace=False))
+        elif t == '2':
+            L.append(nn.PixelShuffle(upscale_factor=2))
+        elif t == '3':
+            L.append(nn.PixelShuffle(upscale_factor=3))
+        elif t == '4':
+            L.append(nn.PixelShuffle(upscale_factor=4))
+        elif t == 'U':
+            L.append(nn.Upsample(scale_factor=2, mode='nearest'))
+        elif t == 'u':
+            L.append(nn.Upsample(scale_factor=3, mode='nearest'))
+        elif t == 'M':
+            L.append(nn.MaxPool2d(kernel_size=kernel_size,
+                                  stride=stride,
+                                  padding=0))
+        elif t == 'A':
+            L.append(nn.AvgPool2d(kernel_size=kernel_size,
+                                  stride=stride,
+                                  padding=0))
+        else:
+            raise NotImplementedError('Undefined type: '.format(t))
+    return seq(*L)
+
+# -------------------------------------------------------
+# Concat the output of a submodule to its input
+# -------------------------------------------------------
+class ConcatBlock(nn.Module):
+    def __init__(self, submodule):
+        super(ConcatBlock, self).__init__()
+
+        self.sub = submodule
+
+    def forward(self, x):
+        output = torch.cat((x, self.sub(x)), dim=1)
+        return output
+
+    def __repr__(self):
+        return self.sub.__repr__() + '_concat'
+
+# -------------------------------------------------------
+# Elementwise sum the output of a submodule to its input
+# -------------------------------------------------------
+class ShortcutBlock(nn.Module):
+    def __init__(self, submodule):
+        super(ShortcutBlock, self).__init__()
+
+        self.sub = submodule
+
+    def forward(self, x):
+        output = x + self.sub(x)
+        return output
+
+    def __repr__(self):
+        tmpstr = 'Identity + \n|'
+        modstr = self.sub.__repr__().replace('\n', '\n|')
+        tmpstr = tmpstr + modstr
+        return tmpstr
+
+class DWTForward(nn.Module):
+    def __init__(self):
+        super(DWTForward, self).__init__()
+        ll = np.array([[0.5, 0.5], [0.5, 0.5]])
+        lh = np.array([[-0.5, -0.5], [0.5, 0.5]])
+        hl = np.array([[-0.5, 0.5], [-0.5, 0.5]])
+        hh = np.array([[0.5, -0.5], [-0.5, 0.5]])
+        filts = np.stack([ll[None,::-1,::-1], lh[None,::-1,::-1],
+                            hl[None,::-1,::-1], hh[None,::-1,::-1]],
+                            axis=0)
+        self.weight = nn.Parameter(
+            torch.tensor(filts).to(torch.get_default_dtype()),
+            requires_grad=False)
+    def forward(self, x):
+        C = x.shape[1]
+        filters = torch.cat([self.weight,] * C, dim=0)
+        y = F.conv2d(x, filters, groups=C, stride=2)
+        return y
+
+class DWTInverse(nn.Module):
+    def __init__(self):
+        super(DWTInverse, self).__init__()
+        ll = np.array([[0.5, 0.5], [0.5, 0.5]])
+        lh = np.array([[-0.5, -0.5], [0.5, 0.5]])
+        hl = np.array([[-0.5, 0.5], [-0.5, 0.5]])
+        hh = np.array([[0.5, -0.5], [-0.5, 0.5]])
+        filts = np.stack([ll[None, ::-1, ::-1], lh[None, ::-1, ::-1],
+                            hl[None, ::-1, ::-1], hh[None, ::-1, ::-1]],
+                            axis=0)
+        self.weight = nn.Parameter(
+            torch.tensor(filts).to(torch.get_default_dtype()),
+            requires_grad=False)
+
+    def forward(self, x):
+        C = int(x.shape[1] / 4)
+        filters = torch.cat([self.weight, ] * C, dim=0)
+        y = F.conv_transpose2d(x, filters, groups=C, stride=2)
+        return y
+
+# -------------------------------------------------------
+# Channel Attention (CA) Layer
+# -------------------------------------------------------
+class CALayer(nn.Module):
+    def __init__(self, channel=64, reduction=16):
+        super(CALayer, self).__init__()
+
+        self.avg_pool = nn.AdaptiveAvgPool2d(1)
+        self.conv_du = nn.Sequential(
+            nn.Conv2d(channel, channel//reduction, 1, padding=0, bias=True),
+            nn.ReLU(inplace=True),
+            nn.Conv2d(channel//reduction, channel, 1, padding=0, bias=True),
+            nn.Sigmoid()
+        )
+
+    def forward(self, x):
+        y = self.avg_pool(x)
+        y = self.conv_du(y)
+        return x * y
+
+class ChannelPool(nn.Module):
+    def forward(self, x):
+        return torch.cat((torch.max(x,1)[0].unsqueeze(1), torch.mean(x,1).unsqueeze(1)), dim=1)
+
+class spatial_attn_layer(nn.Module):
+    def __init__(self, kernel_size=3):
+        super(spatial_attn_layer, self).__init__()
+        self.compress = ChannelPool()
+        self.spatial = nn.Conv2d(2, 1, 3, stride=1, padding=1, bias=True)
+
+    def forward(self, x):
+        # import pdb;pdb.set_trace()
+        x_compress = self.compress(x)
+        x_out = self.spatial(x_compress)
+        scale = torch.sigmoid(x_out) # broadcasting
+        return x * scale
+
+# -------------------------------------------------------
+# Content Unrelated Channel Attention (CUCA) Layer
+# -------------------------------------------------------
+class CUCALayer(nn.Module):
+    def __init__(self, channel=64, min=0, max=None):
+        super(CUCALayer, self).__init__()
+
+        self.attention = nn.Conv2d(channel, channel, 1, padding=0,
+                                   groups=channel, bias=False)
+        self.min, self.max = min, max
+        nn.init.uniform_(self.attention.weight, 0, 1)
+
+    def forward(self, x):
+        self.attention.weight.data.clamp_(self.min, self.max)
+        return self.attention(x)
+
+
+# -------------------------------------------------------
+# Res Block: x + conv(relu(conv(x)))
+# -------------------------------------------------------
+class ResBlock(nn.Module):
+    def __init__(self, in_channels=64, out_channels=64, kernel_size=3, stride=1,
+                 padding=1, bias=True, mode='CRC'):
+        super(ResBlock, self).__init__()
+
+        assert in_channels == out_channels
+        if mode[0] in ['R','L']:
+            mode = mode[0].lower() + mode[1:]
+
+        self.res = conv(in_channels, out_channels, kernel_size,
+                        stride, padding, bias=bias, mode=mode)
+
+    def forward(self, x):
+        res = self.res(x)
+        return x + res
+
+# -------------------------------------------------------
+# Residual Channel Attention Block (RCAB)
+# -------------------------------------------------------
+class RCABlock(nn.Module):
+    def __init__(self, in_channels=64, out_channels=64, kernel_size=3, stride=1,
+                 padding=1, bias=True, mode='CRC', reduction=16):
+        super(RCABlock, self).__init__()
+        assert in_channels == out_channels
+        if mode[0] in ['R','L']:
+            mode = mode[0].lower() + mode[1:]
+
+        self.res = conv(in_channels, out_channels, kernel_size,
+                        stride, padding, bias=bias, mode=mode)
+        self.CA = CALayer(out_channels, reduction)
+        #self.SA = spatial_attn_layer()            ## Spatial Attention
+        #self.conv1x1 = nn.Conv2d(in_channels*2, in_channels, kernel_size=1)
+
+    def forward(self, x):  
+        res = self.res(x)
+        #sa_branch = self.SA(res)
+        ca_branch = self.CA(res)
+        #res = torch.cat([sa_branch, ca_branch], dim=1)
+        #res = self.conv1x1(res)
+        return ca_branch + x
+
+
+# -------------------------------------------------------
+# Residual Channel Attention Group (RG)
+# -------------------------------------------------------
+class RCAGroup(nn.Module):
+    def __init__(self, in_channels=64, out_channels=64, kernel_size=3, stride=1,
+                 padding=1, bias=True, mode='CRC', reduction=16, nb=12):
+        super(RCAGroup, self).__init__()
+        assert in_channels == out_channels
+        if mode[0] in ['R','L']:
+            mode = mode[0].lower() + mode[1:]
+
+        RG = [RCABlock(in_channels, out_channels, kernel_size, stride, padding,
+                       bias, mode, reduction) for _ in range(nb)]
+        RG.append(conv(out_channels, out_channels, mode='C'))
+
+        # self.rg = ShortcutBlock(nn.Sequential(*RG))
+        self.rg = nn.Sequential(*RG)
+
+    def forward(self, x):
+        res = self.rg(x)
+        return res + x
+
+# -------------------------------------------------------
+# conv + subp + relu
+# -------------------------------------------------------
+def upsample_pixelshuffle(in_channels=64, out_channels=3, kernel_size=3,
+                          stride=1, padding=1, bias=True, mode='2R'):
+    # mode examples: 2, 2R, 2BR, 3, ..., 4BR.
+    assert len(mode)<4 and mode[0] in ['2', '3', '4']
+    up1 = conv(in_channels, out_channels * (int(mode[0]) ** 2), kernel_size,
+               stride, padding, bias=bias, mode='C'+mode)
+    return up1
+
+
+# -------------------------------------------------------
+# nearest_upsample + conv + relu
+# -------------------------------------------------------
+def upsample_upconv(in_channels=64, out_channels=3, kernel_size=3, stride=1,
+                    padding=1, bias=True, mode='2R'):
+    # mode examples: 2, 2R, 2BR, 3, ..., 3BR.
+    assert len(mode)<4 and mode[0] in ['2', '3']
+    if mode[0] == '2':
+        uc = 'UC'
+    elif mode[0] == '3':
+        uc = 'uC'
+    mode = mode.replace(mode[0], uc)
+    up1 = conv(in_channels, out_channels, kernel_size, stride,
+               padding, bias=bias, mode=mode)
+    return up1
+
+
+# -------------------------------------------------------
+# convTranspose + relu
+# -------------------------------------------------------
+def upsample_convtranspose(in_channels=64, out_channels=3, kernel_size=2,
+                           stride=2, padding=0, bias=True, mode='2R'):
+    # mode examples: 2, 2R, 2BR, 3, ..., 4BR.
+    assert len(mode)<4 and mode[0] in ['2', '3', '4']
+    kernel_size = int(mode[0])
+    stride = int(mode[0])
+    mode = mode.replace(mode[0], 'T')
+    up1 = conv(in_channels, out_channels, kernel_size, stride,
+               padding, bias=bias, mode=mode)
+    return up1
+
+
+'''
+# ======================
+# Downsampler
+# ======================
+'''
+
+
+# -------------------------------------------------------
+# strideconv + relu
+# -------------------------------------------------------
+def downsample_strideconv(in_channels=64, out_channels=64, kernel_size=2,
+                          stride=2, padding=0, bias=True, mode='2R'):
+    # mode examples: 2, 2R, 2BR, 3, ..., 4BR.
+    assert len(mode)<4 and mode[0] in ['2', '3', '4']
+    kernel_size = int(mode[0])
+    stride = int(mode[0])
+    mode = mode.replace(mode[0], 'C')
+    down1 = conv(in_channels, out_channels, kernel_size, stride,
+                 padding, bias=bias, mode=mode)
+    return down1
+
+
+# -------------------------------------------------------
+# maxpooling + conv + relu
+# -------------------------------------------------------
+def downsample_maxpool(in_channels=64, out_channels=64, kernel_size=3,
+                       stride=1, padding=0, bias=True, mode='2R'):
+    # mode examples: 2, 2R, 2BR, 3, ..., 3BR.
+    assert len(mode)<4 and mode[0] in ['2', '3']
+    kernel_size_pool = int(mode[0])
+    stride_pool = int(mode[0])
+    mode = mode.replace(mode[0], 'MC')
+    pool = conv(kernel_size=kernel_size_pool, stride=stride_pool, mode=mode[0])
+    pool_tail = conv(in_channels, out_channels, kernel_size, stride,
+                     padding, bias=bias, mode=mode[1:])
+    return sequential(pool, pool_tail)
+
+
+# -------------------------------------------------------
+# averagepooling + conv + relu
+# -------------------------------------------------------
+def downsample_avgpool(in_channels=64, out_channels=64, kernel_size=3,
+                       stride=1, padding=1, bias=True, mode='2R'):
+    # mode examples: 2, 2R, 2BR, 3, ..., 3BR.
+    assert len(mode)<4 and mode[0] in ['2', '3']
+    kernel_size_pool = int(mode[0])
+    stride_pool = int(mode[0])
+    mode = mode.replace(mode[0], 'AC')
+    pool = conv(kernel_size=kernel_size_pool, stride=stride_pool, mode=mode[0])
+    pool_tail = conv(in_channels, out_channels, kernel_size, stride,
+                     padding, bias=bias, mode=mode[1:])
+    return sequential(pool, pool_tail)
+
+
+
+
+
+
+
+
+
+
+class MWRCAN(nn.Module):
+    def __init__(self):
+        super(MWRCAN, self).__init__()
+        c1 = 64
+        c2 = 96
+        c3 = 128
+        n_b = 20
+        
+        self.head = seq(
+            nn.AvgPool2d(2),
+            nn.PixelUnshuffle(2),
+            DWTForward(),
+        )
+
+        self.down1 = seq(
+            nn.Conv2d(48, c1, 3, 1, 1),
+            nn.PReLU(),
+            RCAGroup(in_channels=c1, out_channels=c1, nb=n_b)
+        )
+
+        self.down2 = seq(
+            DWTForward(),
+            nn.Conv2d(c1 * 4, c2, 3, 1, 1),
+            nn.PReLU(),
+              RCAGroup(in_channels=c2, out_channels=c2, nb=n_b)
+        )
+
+        self.down3 = seq(
+            DWTForward(),
+            nn.Conv2d(c2 * 4, c3, 3, 1, 1),
+            nn.PReLU()
+        )
+
+        self.middle = seq(
+            RCAGroup(in_channels=c3, out_channels=c3, nb=n_b),
+            RCAGroup(in_channels=c3, out_channels=c3, nb=n_b)
+        )
+        
+        self.up1 = seq(
+            nn.Conv2d(c3, c2 * 4, 3, 1, 1),
+            nn.PReLU(),
+            DWTInverse()
+        )
+
+        self.up2 = seq(
+            RCAGroup(in_channels=c2, out_channels=c2, nb=n_b),
+            nn.Conv2d(c2, c1 * 4, 3, 1, 1),
+            nn.PReLU(),
+            DWTInverse()
+        )
+
+        self.up3 = seq(
+            RCAGroup(in_channels=c1, out_channels=c1, nb=n_b),
+            nn.Conv2d(c1, 12, 3, 1, 1)
+        )
+
+        self.tail = seq(
+            DWTInverse()
+        )
+
+    def forward(self, x, c=None):
+        c1 = self.head(x)
+        c2 = self.down1(c1)
+        c3 = self.down2(c2)
+        c4 = self.down3(c3)
+        m = self.middle(c4)
+        c5 = self.up1(m) + c3
+        c6 = self.up2(c5) + c2
+        c7 = self.up3(c6)
+        out = self.tail(c7)
+
+        return out
diff --git a/IIR-Lab/ISP_pipeline/raw_prc_pipeline/tone_curve.mat b/IIR-Lab/ISP_pipeline/raw_prc_pipeline/tone_curve.mat
new file mode 100644
index 0000000000000000000000000000000000000000..af56812d14be86467529eccc094f988dd926ed0c
Binary files /dev/null and b/IIR-Lab/ISP_pipeline/raw_prc_pipeline/tone_curve.mat differ
diff --git a/IIR-Lab/ISP_pipeline/requirements.txt b/IIR-Lab/ISP_pipeline/requirements.txt
new file mode 100644
index 0000000000000000000000000000000000000000..b4fa4b6dbf76ff0c6d7ef98ef3acba823f1c7704
--- /dev/null
+++ b/IIR-Lab/ISP_pipeline/requirements.txt
@@ -0,0 +1,26 @@
+ExifRead==3.0.0
+imageio==2.24.0
+networkx==3.0
+numpy==1.24.1
+opencv-python==4.7.0.68
+packaging==23.0
+pandas==1.5.3
+Pillow==9.4.0
+python-dateutil==2.8.2
+pytz==2022.7.1
+PyWavelets==1.4.1
+rawpy==0.17.3
+scikit-image==0.19.3
+scipy==1.10.0
+six==1.16.0
+tifffile==2022.10.10
+tqdm==4.64.1
+colour-science==0.3.16
+cycler==0.10.0
+decorator==4.4.2
+kiwisolver==1.3.1
+matplotlib==3.4.1
+pyparsing==2.4.7
+boto3==1.17.54
+ipykernel>=5.5.3
+jupyter>=1.0.0
\ No newline at end of file
diff --git a/IIR-Lab/ISP_pipeline/resize_and_orientation.py b/IIR-Lab/ISP_pipeline/resize_and_orientation.py
new file mode 100644
index 0000000000000000000000000000000000000000..4a1de72ccc239e7daf13e7ed1626927f7d4d1b65
--- /dev/null
+++ b/IIR-Lab/ISP_pipeline/resize_and_orientation.py
@@ -0,0 +1,72 @@
+import numpy as np
+import cv2
+from PIL import Image as Image
+import os
+import json
+
+def resize_using_pil(img, width=1024, height=768):
+    img_pil = Image.fromarray(img)
+    out_size = (width, height)
+    if img_pil.size == out_size:
+        return img
+    out_img = img_pil.resize(out_size,Image.LANCZOS)
+    # out_img = img_pil
+    out_img = np.array(out_img)
+    return out_img
+
+def readjson(json_path,):
+    with open(json_path,'r',encoding='UTF-8') as f:
+        result = json.load(f)
+        # a,b = result["noise_profile"]
+        # black = result["white_level"]
+        orientation = result["orientation"]
+    return orientation
+
+def fix_orientation(image, orientation):
+    # 1 = Horizontal(normal)
+    # 2 = Mirror horizontal
+    # 3 = Rotate 180
+    # 4 = Mirror vertical
+    # 5 = Mirror horizontal and rotate 270 CW
+    # 6 = Rotate 90 CW
+    # 7 = Mirror horizontal and rotate 90 CW
+    # 8 = Rotate 270 CW
+
+    if type(orientation) is list:
+        orientation = orientation[0]
+
+    if orientation == 'Horizontal(normal)':
+        pass
+    elif orientation == "Mirror horizontal":
+        image = cv2.flip(image, 0)
+    elif orientation == "Rotate 180":
+        image = cv2.rotate(image, cv2.ROTATE_180)
+    elif orientation == "Mirror vertical":
+        image = cv2.flip(image, 1)
+    elif orientation == "Mirror horizontal and rotate 270 CW":
+        image = cv2.flip(image, 0)
+        image = cv2.rotate(image, cv2.ROTATE_90_COUNTERCLOCKWISE)
+    elif orientation == "Rotate 90 CW":
+        image = cv2.rotate(image, cv2.ROTATE_90_CLOCKWISE)
+    elif orientation == "Mirror horizontal and rotate 90 CW":
+        image = cv2.flip(image, 0)
+        image = cv2.rotate(image, cv2.ROTATE_90_CLOCKWISE)
+    elif orientation == "Rotate 270 CW":
+        image = cv2.rotate(image, cv2.ROTATE_90_COUNTERCLOCKWISE)
+
+    return image
+
+
+if __name__ == "__main__":
+    path_in = "/data1/03_results/2nd_validation_results/"
+    path_out = "/data1/03_results/BigGuy_submission_1_antialias/"
+    json_path = "/data1/02_data/Nightimaging/2nd_validation_data/"
+    pic_name = []
+    name_list = os.listdir(path_in)
+    for i in range(len(name_list)):
+        pic = cv2.imread(path_in+name_list[i], cv2.IMREAD_UNCHANGED)
+        resized_pic = resize_using_pil(pic)
+        print(json_path+name_list[i][:-9]+'.json')
+        json_orientation = readjson(json_path=(json_path+name_list[i][:-9]+'.json'))
+        orientated_pic = fix_orientation(resized_pic, json_orientation)
+        cv2.imwrite(path_out+name_list[i][:-9]+".jpg", orientated_pic, [cv2.IMWRITE_JPEG_QUALITY, 100])
diff --git a/IIR-Lab/ISP_pipeline/run.sh b/IIR-Lab/ISP_pipeline/run.sh
new file mode 100644
index 0000000000000000000000000000000000000000..5869897cb1736aa7b09f52b4970a9c05fa509cf5
--- /dev/null
+++ b/IIR-Lab/ISP_pipeline/run.sh
@@ -0,0 +1,2 @@
+#!/usr/bin/env bash
+python -m demo.process_pngs -p "/data1/02_data/Train_Data/" -o "/data1/01_code/06_nightimaging/nightimaging24-develop/processed_data/" -ie gw -tm Flash
\ No newline at end of file
diff --git a/IIR-Lab/ISP_pipeline/utility.py b/IIR-Lab/ISP_pipeline/utility.py
new file mode 100644
index 0000000000000000000000000000000000000000..2eb47354908ec9082eda38120fab024dc02ab836
--- /dev/null
+++ b/IIR-Lab/ISP_pipeline/utility.py
@@ -0,0 +1,1082 @@
+# =============================================================
+# This file contains helper functions and classes
+#
+# Mushfiqul Alam, 2017
+#
+# Report bugs/suggestions:
+#   mushfiqulalam@gmail.com
+# =============================================================
+
+import png
+import numpy as np
+import scipy.misc
+import math
+from scipy import signal        # for convolutions
+from scipy import ndimage       # for n-dimensional convolution
+from scipy import interpolate
+
+# =============================================================
+# function: imsave
+#   save image in image formats
+#   data:   is the image data
+#   output_dtype: output data type
+#   input_dtype: input data type
+#   is_scale: is scaling needed to go from input data type to output data type
+# =============================================================
+def imsave(data, output_name, output_dtype="uint8", input_dtype="uint8", is_scale=False):
+
+    dtype_dictionary = {"uint8" : np.uint8(data), "uint16" : np.uint16(data),\
+                        "uint32" : np.uint32(data), "uint64" : np.uint64(data),\
+                        "int8" : np.int8(data), "int16" : np.int16(data),\
+                        "int32" : np.int32(data), "int64" : np.int64(data),\
+                        "float16" : np.float16(data), "float32" : np.float32(data),\
+                        "float64" : np.float64(data)}
+
+    min_val_dictionary = {"uint8" : 0, "uint16" : 0,\
+                          "uint32" : 0, "uint64" : 0,\
+                          "int8" : -128, "int16" : -32768,\
+                          "int32" : -2147483648, "int64" : -9223372036854775808}
+
+    max_val_dictionary = {"uint8" : 255, "uint16" : 65535,\
+                          "uint32" : 4294967295, "uint64" : 18446744073709551615,\
+                          "int8" : 127, "int16" : 32767,\
+                          "int32" : 2147483647, "int64" : 9223372036854775807}
+
+    # scale the data in case scaling is necessary to go from input_dtype
+    # to output_dtype
+    if (is_scale):
+
+        # convert data into float32
+        data = np.float32(data)
+
+        # Get minimum and maximum value of the input and output data types
+        in_min  = min_val_dictionary[input_dtype]
+        in_max  = max_val_dictionary[input_dtype]
+        out_min = min_val_dictionary[output_dtype]
+        out_max = max_val_dictionary[output_dtype]
+
+        # clip the input data in the input_dtype range
+        data = np.clip(data, in_min, in_max)
+
+        # scale the data
+        data = out_min + (data - in_min) * (out_max - out_min) / (in_max - in_min)
+
+        # clip scaled data in output_dtype range
+        data = np.clip(data, out_min, out_max)
+
+    # convert the data into the output_dtype
+    data = dtype_dictionary[output_dtype]
+
+    # output image type: raw, png, jpeg
+    output_file_type = output_name[-3:]
+
+    # save files depending on output_file_type
+    if (output_file_type == "raw"):
+        pass # will be added later
+        return
+
+    elif (output_file_type == "png"):
+
+        # png will only save uint8 or uint16
+        if ((output_dtype == "uint16") or (output_dtype == "uint8")):
+            if (output_dtype == "uint16"):
+                output_bitdepth = 16
+            elif (output_dtype == "uint8"):
+                output_bitdepth = 8
+
+            pass
+        else:
+            print("For png output, output_dtype must be uint8 or uint16")
+            return
+
+        with open(output_name, "wb") as f:
+            # rgb image
+            if (np.ndim(data) == 3):
+                # create the png writer
+                writer = png.Writer(width=data.shape[1], height=data.shape[0],\
+                                    bitdepth = output_bitdepth)
+                # convert data to the python lists expected by the png Writer
+                data2list = data.reshape(-1, data.shape[1]*data.shape[2]).tolist()
+                # write in the file
+                writer.write(f, data2list)
+
+            # greyscale image
+            elif (np.ndim(data) == 2):
+                # create the png writer
+                writer = png.Writer(width=data.shape[1], height=data.shape[0],\
+                                    bitdepth = output_bitdepth,\
+                                    greyscale = True)
+                # convert data to the python lists expected by the png Writer
+                data2list = data.tolist()
+                # write in the file
+                writer.write(f, data2list)
+
+    elif (output_file_type == "jpg"):
+        pass # will be added later
+        return
+
+    else:
+        print("output_name should contain extensions of .raw, .png, or .jpg")
+        return
+
+
+# =============================================================
+# class: helpers
+#   a class of useful helper functions
+# =============================================================
+class helpers:
+    def __init__(self, data=None, name="helper"):
+        self.data = np.float32(data)
+        self.name = name
+
+    def get_width_height(self):
+        #------------------------------------------------------
+        # returns width, height
+        # We assume data be in height x width x number of channel x frames format
+        #------------------------------------------------------
+        if (np.ndim(self.data) > 1):
+            size = np.shape(self.data)
+            width = size[1]
+            height = size[0]
+            return width, height
+        else:
+            print("Error! data dimension must be 2 or greater")
+
+    def bayer_channel_separation(self, pattern):
+        #------------------------------------------------------
+        # function: bayer_channel_separation
+        #   Objective: Outputs four channels of the bayer pattern
+        #   Input:
+        #       data:   the bayer data
+        #       pattern:    rggb, grbg, gbrg, or bggr
+        #   Output:
+        #       R, G1, G2, B (Quarter resolution images)
+        #------------------------------------------------------
+        if (pattern == "rggb"):
+            R = self.data[::2, ::2]
+            G1 = self.data[::2, 1::2]
+            G2 = self.data[1::2, ::2]
+            B = self.data[1::2, 1::2]
+        elif (pattern == "grbg"):
+            G1 = self.data[::2, ::2]
+            R = self.data[::2, 1::2]
+            B = self.data[1::2, ::2]
+            G2 = self.data[1::2, 1::2]
+        elif (pattern == "gbrg"):
+            G1 = self.data[::2, ::2]
+            B = self.data[::2, 1::2]
+            R = self.data[1::2, ::2]
+            G2 = self.data[1::2, 1::2]
+        elif (pattern == "bggr"):
+            B = self.data[::2, ::2]
+            G1 = self.data[::2, 1::2]
+            G2 = self.data[1::2, ::2]
+            R = self.data[1::2, 1::2]
+        else:
+            print("pattern must be one of these: rggb, grbg, gbrg, bggr")
+            return
+
+        return R, G1, G2, B
+
+
+    def bayer_channel_integration(self, R, G1, G2, B, pattern):
+        #------------------------------------------------------
+        # function: bayer_channel_integration
+        #   Objective: combine data into a raw according to pattern
+        #   Input:
+        #       R, G1, G2, B:   the four separate channels (Quarter resolution)
+        #       pattern:    rggb, grbg, gbrg, or bggr
+        #   Output:
+        #       data (Full resolution image)
+        #------------------------------------------------------
+        size = np.shape(R)
+        data = np.empty((size[0]*2, size[1]*2), dtype=np.float32)
+        if (pattern == "rggb"):
+            data[::2, ::2] = R
+            data[::2, 1::2] = G1
+            data[1::2, ::2] = G2
+            data[1::2, 1::2] = B
+        elif (pattern == "grbg"):
+            data[::2, ::2] = G1
+            data[::2, 1::2] = R
+            data[1::2, ::2] = B
+            data[1::2, 1::2] = G2
+        elif (pattern == "gbrg"):
+            data[::2, ::2] = G1
+            data[::2, 1::2] = B
+            data[1::2, ::2] = R
+            data[1::2, 1::2] = G2
+        elif (pattern == "bggr"):
+            data[::2, ::2] = B
+            data[::2, 1::2] = G1
+            data[1::2, ::2] = G2
+            data[1::2, 1::2] = R
+        else:
+            print("pattern must be one of these: rggb, grbg, gbrg, bggr")
+            return
+
+        return data
+
+
+    def shuffle_bayer_pattern(self, input_pattern, output_pattern):
+        #------------------------------------------------------
+        # function: shuffle_bayer_pattern
+        #   convert from one bayer pattern to another
+        #------------------------------------------------------
+
+        # Get separate channels
+        R, G1, G2, B = self.bayer_channel_separation(input_pattern)
+
+        # return integrated data
+        return self.bayer_channel_integration(R, G1, G2, B, output_pattern)
+
+
+    def sigma_filter_helper(self, neighborhood_size, sigma):
+
+        if (neighborhood_size % 2) == 0:
+            print("Error! neighborhood_size must be odd for example 3, 5, 7")
+            return
+
+        # number of pixels to be padded at the borders
+        no_of_pixel_pad = math.floor(neighborhood_size / 2.)
+
+        # get width, height
+        width, height = self.get_width_height()
+
+        # pad pixels at the borders
+        img = np.pad(self.data, \
+                     (no_of_pixel_pad, no_of_pixel_pad),\
+                     'reflect') # reflect would not repeat the border value
+
+        # allocate memory for output
+        output = np.empty((height, width), dtype=np.float32)
+
+        for i in range(no_of_pixel_pad, height + no_of_pixel_pad):
+            for j in range(no_of_pixel_pad, width + no_of_pixel_pad):
+
+                # save the middle pixel value
+                mid_pixel_val = img[i, j]
+
+                # extract the neighborhood
+                neighborhood = img[i - no_of_pixel_pad : i + no_of_pixel_pad+1,\
+                                   j - no_of_pixel_pad : j + no_of_pixel_pad+1]
+
+                lower_range = mid_pixel_val - sigma
+                upper_range = mid_pixel_val + sigma
+
+                temp = 0.
+                ctr = 0
+                for ni in range (0, neighborhood_size):
+                    for nj in range (0, neighborhood_size):
+                        if (neighborhood[ni, nj] > lower_range) and (neighborhood[ni, nj] < upper_range):
+                            temp += neighborhood[ni, nj]
+                            ctr += 1
+
+                output[i - no_of_pixel_pad, j - no_of_pixel_pad] = temp / ctr
+
+        return output
+
+    def bilinear_interpolation(self, x, y):
+
+        width, height = self.get_width_height()
+
+        x0 = np.floor(x).astype(int)
+        x1 = x0 + 1
+        y0 = np.floor(y).astype(int)
+        y1 = y0 + 1
+
+        x0 = np.clip(x0, 0, width-1)
+        x1 = np.clip(x1, 0, width-1)
+        y0 = np.clip(y0, 0, height-1)
+        y1 = np.clip(y1, 0, height-1)
+
+        Ia = self.data[y0, x0]
+        Ib = self.data[y1, x0]
+        Ic = self.data[y0, x1]
+        Id = self.data[y1, x1]
+
+
+        x = np.clip(x, 0, width-1)
+        y = np.clip(y, 0, height-1)
+
+        wa = (x1 - x) * (y1 - y)
+        wb = (x1 - x) * (y - y0)
+        wc = (x - x0) * (y1 - y)
+        wd = (x - x0) * (y - y0)
+
+        return wa * Ia + wb * Ib + wc * Ic + wd * Id
+
+    def degamma_srgb(self, clip_range=[0, 65535]):
+
+        # bring data in range 0 to 1
+        data = np.clip(self.data, clip_range[0], clip_range[1])
+        data = np.divide(data, clip_range[1])
+
+        data = np.asarray(data)
+        mask = data > 0.04045
+
+        # basically, if data[x, y, c] > 0.04045, data[x, y, c] = ( (data[x, y, c] + 0.055) / 1.055 ) ^ 2.4
+        #            else, data[x, y, c] = data[x, y, c] / 12.92
+        data[mask] += 0.055
+        data[mask] /= 1.055
+        data[mask] **= 2.4
+
+        data[np.invert(mask)] /= 12.92
+
+        # rescale
+        return np.clip(data * clip_range[1], clip_range[0], clip_range[1])
+
+    def gamma_srgb(self, clip_range=[0, 65535]):
+
+        # bring data in range 0 to 1
+        data = np.clip(self.data, clip_range[0], clip_range[1])
+        data = np.divide(data, clip_range[1])
+
+        data = np.asarray(data)
+        mask = data > 0.0031308
+
+        # basically, if data[x, y, c] > 0.0031308, data[x, y, c] = 1.055 * ( var_R(i, j) ^ ( 1 / 2.4 ) ) - 0.055
+        #            else, data[x, y, c] = data[x, y, c] * 12.92
+        data[mask] **= 0.4167
+        data[mask] *= 1.055
+        data[mask] -= 0.055
+
+        data[np.invert(mask)] *= 12.92
+
+        # rescale
+        return np.clip(data * clip_range[1], clip_range[0], clip_range[1])
+
+
+    def degamma_adobe_rgb_1998(self, clip_range=[0, 65535]):
+
+        # bring data in range 0 to 1
+        data = np.clip(self.data, clip_range[0], clip_range[1])
+        data = np.divide(data, clip_range[1])
+
+        data = np.power(data, 2.2) # originally raised to 2.19921875
+
+        # rescale
+        return np.clip(data * clip_range[1], clip_range[0], clip_range[1])
+
+    def gamma_adobe_rgb_1998(self, clip_range=[0, 65535]):
+
+        # bring data in range 0 to 1
+        data = np.clip(self.data, clip_range[0], clip_range[1])
+        data = np.divide(data, clip_range[1])
+
+        data = np.power(data, 0.4545)
+
+        # rescale
+        return np.clip(data * clip_range[1], clip_range[0], clip_range[1])
+
+
+    def get_xyz_reference(self, cie_version="1931", illuminant="d65"):
+
+        if (cie_version == "1931"):
+
+            xyz_reference_dictionary = {"A" : [109.850, 100.0, 35.585],\
+                                        "B" : [99.0927, 100.0, 85.313],\
+                                        "C" : [98.074,  100.0, 118.232],\
+                                        "d50" : [96.422, 100.0, 82.521],\
+                                        "d55" : [95.682, 100.0, 92.149],\
+                                        "d65" : [95.047, 100.0, 108.883],\
+                                        "d75" : [94.972, 100.0, 122.638],\
+                                        "E" : [100.0, 100.0, 100.0],\
+                                        "F1" : [92.834, 100.0, 103.665],\
+                                        "F2" : [99.187, 100.0, 67.395],\
+                                        "F3" : [103.754, 100.0, 49.861],\
+                                        "F4" : [109.147, 100.0, 38.813],\
+                                        "F5" : [90.872, 100.0, 98.723],\
+                                        "F6" : [97.309, 100.0, 60.191],\
+                                        "F7" : [95.044, 100.0, 108.755],\
+                                        "F8" : [96.413, 100.0, 82.333],\
+                                        "F9" : [100.365, 100.0, 67.868],\
+                                        "F10" : [96.174, 100.0, 81.712],\
+                                        "F11" : [100.966, 100.0, 64.370],\
+                                        "F12" : [108.046, 100.0, 39.228]}
+
+        elif (cie_version == "1964"):
+
+            xyz_reference_dictionary = {"A" : [111.144, 100.0, 35.200],\
+                                        "B" : [99.178, 100.0, 84.3493],\
+                                        "C" : [97.285, 100.0, 116.145],\
+                                        "D50" : [96.720, 100.0, 81.427],\
+                                        "D55" : [95.799, 100.0, 90.926],\
+                                        "D65" : [94.811, 100.0, 107.304],\
+                                        "D75" : [94.416, 100.0, 120.641],\
+                                        "E" : [100.0, 100.0, 100.0],\
+                                        "F1" : [94.791, 100.0, 103.191],\
+                                        "F2" : [103.280, 100.0, 69.026],\
+                                        "F3" : [108.968, 100.0, 51.965],\
+                                        "F4" : [114.961, 100.0, 40.963],\
+                                        "F5" : [93.369, 100.0, 98.636],\
+                                        "F6" : [102.148, 100.0, 62.074],\
+                                        "F7" : [95.792, 100.0, 107.687],\
+                                        "F8" : [97.115, 100.0, 81.135],\
+                                        "F9" : [102.116, 100.0, 67.826],\
+                                        "F10" : [99.001, 100.0, 83.134],\
+                                        "F11" : [103.866, 100.0, 65.627],\
+                                        "F12" : [111.428, 100.0, 40.353]}
+
+        else:
+            print("Warning! cie_version must be 1931 or 1964.")
+            return
+
+        return np.divide(xyz_reference_dictionary[illuminant], 100.0)
+
+    def sobel_prewitt_direction_label(self, gradient_magnitude, theta, threshold=0):
+
+        direction_label = np.zeros(np.shape(gradient_magnitude), dtype=np.float32)
+
+        theta = np.asarray(theta)
+        # vertical
+        mask = ((theta >= -22.5) & (theta <= 22.5))
+        direction_label[mask] = 3.
+
+        # +45 degree
+        mask = ((theta > 22.5) & (theta <= 67.5))
+        direction_label[mask] = 2.
+
+        # -45 degree
+        mask = ((theta < -22.5) & (theta >= -67.5))
+        direction_label[mask] = 4.
+
+        # horizontal
+        mask = ((theta > 67.5) & (theta <= 90.)) | ((theta < -67.5) & (theta >= -90.))
+        direction_label[mask] = 1.
+
+        gradient_magnitude = np.asarray(gradient_magnitude)
+        mask = gradient_magnitude < threshold
+        direction_label[mask] = 0.
+
+        return direction_label
+
+    def edge_wise_median(self, kernel_size, edge_location):
+
+        # pad two pixels at the border
+        no_of_pixel_pad = math.floor(kernel_size / 2)   # number of pixels to pad
+
+        data = self.data
+        data = np.pad(data, \
+                      (no_of_pixel_pad, no_of_pixel_pad),\
+                      'reflect') # reflect would not repeat the border value
+
+        edge_location = np.pad(edge_location,\
+                              (no_of_pixel_pad, no_of_pixel_pad),\
+                              'reflect') # reflect would not repeat the border value
+
+        width, height = self.get_width_height()
+        output = np.empty((height, width), dtype=np.float32)
+
+        for i in range(no_of_pixel_pad, height + no_of_pixel_pad):
+            for j in range(no_of_pixel_pad, width + no_of_pixel_pad):
+                if (edge_location[i, j] == 1):
+                    output[i - no_of_pixel_pad, j - no_of_pixel_pad] = \
+                                 np.median(data[i - no_of_pixel_pad : i + no_of_pixel_pad + 1,\
+                                                j - no_of_pixel_pad : j + no_of_pixel_pad + 1])
+                elif (edge_location[i, j] == 0):
+                    output[i - no_of_pixel_pad, j - no_of_pixel_pad] = data[i, j]
+
+        return output
+
+
+    def nonuniform_quantization(self):
+
+        output = np.zeros(np.shape(self.data), dtype=np.float32)
+        min_val = np.min(self.data)
+        max_val = np.max(self.data)
+
+        mask = (self.data > (7./8.) * (max_val - min_val))
+        output[mask] = 3.
+
+        mask = (self.data > (3./4.) * (max_val - min_val)) & (self.data <= (7./8.) * (max_val - min_val))
+        output[mask] = 2.
+
+        mask = (self.data > (1./2.) * (max_val - min_val)) & (self.data <= (3./4.) * (max_val - min_val))
+        output[mask] = 1.
+
+        return output
+
+
+    def __str__(self):
+        return self.name
+
+
+# =============================================================
+# function: distance_euclid
+#   returns Euclidean distance between two points
+# =============================================================
+def distance_euclid(point1, point2):
+    return math.sqrt((point1[0] - point2[0])**2 + (point1[1]-point2[1])**2)
+
+
+# =============================================================
+# class: special_functions
+#   pass input through special functions
+# =============================================================
+class special_function:
+    def __init__(self, data, name="special function"):
+        self.data = np.float32(data)
+        self.name = name
+
+    def soft_coring(self, slope, tau_threshold, gamma_speed):
+        # Usage: Used in the unsharp masking sharpening Process
+        # Input:
+        #   slope:                  controls the boost.
+        #                           the amount of sharpening, higher slope
+        #                           means more aggresssive sharpening
+        #
+        #   tau_threshold:          controls the amount of coring.
+        #                           threshold value till which the image is
+        #                           not sharpened. The lower the value of
+        #                           tau_threshold the more frequencies
+        #                           goes through the sharpening process
+        #
+        #   gamma_speed:            controls the speed of convergence to the slope
+        #                           smaller value gives a little bit more
+        #                           sharpened image, this may be a fine tuner
+        return slope * self.data * ( 1. - np.exp(-((np.abs(self.data / tau_threshold))**gamma_speed)))
+
+
+    def distortion_function(self, correction_type="barrel-1", strength=0.1):
+
+        if (correction_type == "pincushion-1"):
+            return np.divide(self.data, 1. + strength * self.data)
+        elif (correction_type == "pincushion-2"):
+            return np.divide(self.data, 1. + strength * np.power(self.data, 2))
+        elif (correction_type == "barrel-1"):
+            return np.multiply(self.data, 1. + strength * self.data)
+        elif (correction_type == "barrel-2"):
+            return np.multiply(self.data, 1. + strength * np.power(self.data, 2))
+        else:
+            print("Warning! Unknown correction_type.")
+            return
+
+    def bilateral_filter(self, edge):
+        # bilateral filter based upon the work of
+        # Jiawen Chen, Sylvain Paris, and Fredo Durand, 2007 work
+
+        # note: if edge data is not provided, image is served as edge
+        # this is called normal bilateral filter
+        # if edge data is provided, then it is called cross or joint
+        # bilateral filter
+
+        # get width and height of the image
+        width, height = helpers(self.data).get_width_height()
+
+        # sigma_spatial
+        sigma_spatial = min(height, width) / 16.
+
+        # calculate edge_delta
+        edge_min = np.min(edge)
+        edge_max = np.max(edge)
+        edge_delta = edge_max - edge_min
+
+        # sigma_range and sampling_range
+        sigma_range = 0.1 * edge_delta
+        sampling_range = sigma_range
+        sampling_spatial = sigma_spatial
+
+        # derived_sigma_spatial and derived_sigma_range
+        derived_sigma_spatial = sigma_spatial / sampling_spatial
+        derived_sigma_range = sigma_range / sampling_range
+
+        # paddings
+        padding_xy = np.floor(2. * derived_sigma_spatial) + 1.
+        padding_z = np.floor(2. * derived_sigma_range) + 1.
+
+        # downsamples
+        downsample_width = np.uint16(np.floor((width - 1.) / sampling_spatial) + 1. + 2. * padding_xy)
+        downsample_height = np.uint16(np.floor((height - 1.) / sampling_spatial) + 1. + 2. * padding_xy)
+        downsample_depth = np.uint16(np.floor(edge_delta / sampling_range) + 1. + 2. * padding_z)
+
+        grid_data = np.zeros((downsample_height, downsample_width, downsample_depth))
+        grid_weight = np.zeros((downsample_height, downsample_width, downsample_depth))
+
+        jj, ii = np.meshgrid(np.arange(0, width, 1),\
+                             np.arange(0, height, 1))
+
+        di = np.uint16(np.round( ii / sampling_spatial ) + padding_xy + 1.)
+        dj = np.uint16(np.round( jj / sampling_spatial ) + padding_xy + 1.)
+        dz = np.uint16(np.round( (edge - edge_min) / sampling_range ) + padding_z + 1.)
+
+
+        for i in range(0, height):
+            for j in range(0, width):
+
+                data_z = self.data[i, j]
+                if not np.isnan(data_z):
+                    dik = di[i, j]
+                    djk = dj[i, j]
+                    dzk = dz[i, j]
+
+                    grid_data[dik, djk, dzk] = grid_data[dik, djk, dzk] + data_z
+                    grid_weight[dik, djk, dzk] = grid_weight[dik, djk, dzk] + 1.
+
+
+        kernel_width = 2. * derived_sigma_spatial + 1.
+        kernel_height = kernel_width
+        kernel_depth = 2. * derived_sigma_range + 1.
+
+        half_kernel_width = np.floor(kernel_width / 2.)
+        half_kernel_height = np.floor(kernel_height / 2.)
+        half_kernel_depth = np.floor(kernel_depth / 2.)
+
+        grid_x, grid_y, grid_z = np.meshgrid(np.arange(0, kernel_width, 1),\
+                                             np.arange(0, kernel_height, 1),\
+                                             np.arange(0, kernel_depth, 1))
+
+        grid_x = grid_x - half_kernel_width
+        grid_y = grid_y - half_kernel_height
+        grid_z = grid_z - half_kernel_depth
+
+        grid_r_squared = ( ( np.multiply(grid_x, grid_x) + \
+                             np.multiply(grid_y, grid_y) ) / np.multiply(derived_sigma_spatial, derived_sigma_spatial) ) + \
+                         ( np.multiply(grid_z, grid_z) / np.multiply(derived_sigma_range, derived_sigma_range) )
+
+        kernel = np.exp(-0.5 * grid_r_squared)
+        blurred_grid_data = ndimage.convolve(grid_data, kernel, mode='reflect')
+        blurred_grid_weight = ndimage.convolve(grid_weight, kernel, mode='reflect')
+
+        # divide
+        blurred_grid_weight = np.asarray(blurred_grid_weight)
+        mask = blurred_grid_weight == 0
+        blurred_grid_weight[mask] = -2.
+        normalized_blurred_grid = np.divide(blurred_grid_data, blurred_grid_weight)
+        mask = blurred_grid_weight < -1
+        normalized_blurred_grid[mask] = 0.
+        blurred_grid_weight[mask] = 0.
+
+        # upsample
+        jj, ii = np.meshgrid(np.arange(0, width, 1),\
+                             np.arange(0, height, 1))
+
+        di = (ii / sampling_spatial) + padding_xy + 1.
+        dj = (jj / sampling_spatial) + padding_xy + 1.
+        dz = (edge - edge_min) / sampling_range + padding_z + 1.
+
+        # arrange the input points
+        n_i, n_j, n_z = np.shape(normalized_blurred_grid)
+        points = (np.arange(0, n_i, 1), np.arange(0, n_j, 1), np.arange(0, n_z, 1))
+
+        # query points
+        xi = (di, dj, dz)
+
+        # multidimensional interpolation
+        output = interpolate.interpn(points, normalized_blurred_grid, xi, method='linear')
+
+        return output
+
+
+
+# =============================================================
+# class: synthetic_image_generate
+#   creates sysnthetic images for different purposes
+# =============================================================
+class synthetic_image_generate:
+    def __init__(self, width, height, name="synthetic_image"):
+        self.name = name
+        self.width = width
+        self.height = height
+
+    def create_lens_shading_correction_images(self, dark_current=0, flat_max=65535, flat_min=0, clip_range=[0, 65535]):
+        # Objective: creates two images:
+        #               dark_current_image and flat_field_image
+        dark_current_image = dark_current * np.ones((self.height, self.width), dtype=np.float32)
+        flat_field_image = np.empty((self.height, self.width), dtype=np.float32)
+
+        center_pixel_pos = [self.height/2, self.width/2]
+        max_distance = distance_euclid(center_pixel_pos, [self.height, self.width])
+
+        for i in range(0, self.height):
+            for j in range(0, self.width):
+                flat_field_image[i, j] = (max_distance - distance_euclid(center_pixel_pos, [i, j])) / max_distance
+                flat_field_image[i, j] = flat_min + flat_field_image[i, j] * (flat_max - flat_min)
+
+        dark_current_image = np.clip(dark_current_image, clip_range[0], clip_range[1])
+        flat_field_image = np.clip(flat_field_image, clip_range[0], clip_range[1])
+
+        return dark_current_image, flat_field_image
+
+    def create_zone_plate_image(self):
+        pass
+
+    def create_color_gradient_image(self):
+        pass
+
+    def create_random_noise_image(self, mean=0, standard_deviation=1, seed=0):
+        # Creates normally distributed noisy image
+        np.random.seed(seed)
+        return np.random.normal(mean, standard_deviation, (self.height, self.width))
+
+    def create_noisy_image(self, data, mean=0, standard_deviation=1, seed=0, clip_range=[0, 65535]):
+        # Adds normally distributed noise to the data
+        return np.clip(data + self.create_random_noise_image(mean, standard_deviation, seed), clip_range[0], clip_range[1])
+
+
+# =============================================================
+# class: create_filter
+#   creates different filters, generally 2D filters
+# =============================================================
+class create_filter:
+    def __init__(self, name="filter"):
+        self.name = name
+
+    def gaussian(self, kernel_size, sigma):
+
+        # calculate which number to where the grid should be
+        # remember that, kernel_size[0] is the width of the kernel
+        # and kernel_size[1] is the height of the kernel
+        temp = np.floor(np.float32(kernel_size) / 2.)
+
+        # create the grid
+        # example: if kernel_size = [5, 3], then:
+        # x: array([[-2., -1.,  0.,  1.,  2.],
+        #           [-2., -1.,  0.,  1.,  2.],
+        #           [-2., -1.,  0.,  1.,  2.]])
+        # y: array([[-1., -1., -1., -1., -1.],
+        #           [ 0.,  0.,  0.,  0.,  0.],
+        #           [ 1.,  1.,  1.,  1.,  1.]])
+        x, y = np.meshgrid(np.linspace(-temp[0], temp[0], kernel_size[0]),\
+                           np.linspace(-temp[1], temp[1], kernel_size[1]))
+
+        # Gaussian equation
+        temp = np.exp( -(x**2 + y**2) / (2. * sigma**2) )
+
+        # make kernel sum equal to 1
+        return temp / np.sum(temp)
+
+    def gaussian_separable(self, kernel_size, sigma):
+
+        # calculate which number to where the grid should be
+        # remember that, kernel_size[0] is the width of the kernel
+        # and kernel_size[1] is the height of the kernel
+        temp = np.floor(np.float32(kernel_size) / 2.)
+
+        # create the horizontal kernel
+        x = np.linspace(-temp[0], temp[0], kernel_size[0])
+        x = x.reshape((1, kernel_size[0])) # reshape to create row vector
+        hx = np.exp(-x**2 / (2 * sigma**2))
+        hx = hx / np.sum(hx)
+
+        # create the vertical kernel
+        y = np.linspace(-temp[1], temp[1], kernel_size[1])
+        y = y.reshape((kernel_size[1], 1)) # reshape to create column vector
+        hy = np.exp(-y**2 / (2 * sigma**2))
+        hy = hy / np.sum(hy)
+
+        return hx, hy
+
+    def sobel(self, kernel_size):
+        # Returns the Sobel filter kernels Sx and Sy
+
+        Sx = .25 * np.dot([[1.], [2.], [1.]], [[1., 0., -1.]])
+
+        if (kernel_size > 3):
+
+            n = (np.floor((kernel_size - 5) / 2 + 1)).astype(int)
+
+            for i in range(0, n):
+
+                Sx = (1./16.) * signal.convolve2d(np.dot([[1.], [2.], [1.]], [[1., 2., 1.]]), Sx)
+
+        Sy = np.transpose(Sx)
+
+        return Sx, Sy
+
+    def __str__(self):
+        return self.name
+
+
+# =============================================================
+# class: color_conversion
+#   color conversion from one color space to another
+# =============================================================
+class color_conversion:
+    def __init__(self, data, name="color conversion"):
+        self.data = np.float32(data)
+        self.name = name
+
+    def rgb2gray(self):
+        return 0.299 * self.data[:, :, 0] +\
+               0.587 * self.data[:, :, 1] +\
+               0.114 * self.data[:, :, 2]
+
+    def rgb2ycc(self, rule="bt601"):
+
+        # map to select kr and kb
+        kr_kb_dict = {"bt601" : [0.299, 0.114],\
+                      "bt709" : [0.2126, 0.0722],\
+                      "bt2020" : [0.2627, 0.0593]}
+
+        kr = kr_kb_dict[rule][0]
+        kb = kr_kb_dict[rule][1]
+        kg = 1 - (kr + kb)
+
+        output = np.empty(np.shape(self.data), dtype=np.float32)
+        output[:, :, 0] = kr * self.data[:, :, 0] + \
+                          kg * self.data[:, :, 1] + \
+                          kb * self.data[:, :, 2]
+        output[:, :, 1] = 0.5 * ((self.data[:, :, 2] - output[:, :, 0]) / (1 - kb))
+        output[:, :, 2] = 0.5 * ((self.data[:, :, 0] - output[:, :, 0]) / (1 - kr))
+
+        return output
+
+    def ycc2rgb(self, rule="bt601"):
+
+        # map to select kr and kb
+        kr_kb_dict = {"bt601" : [0.299, 0.114],\
+                      "bt709" : [0.2126, 0.0722],\
+                      "bt2020" : [0.2627, 0.0593]}
+
+        kr = kr_kb_dict[rule][0]
+        kb = kr_kb_dict[rule][1]
+        kg = 1 - (kr + kb)
+
+        output = np.empty(np.shape(self.data), dtype=np.float32)
+        output[:, :, 0] = 2. * self.data[:, :, 2] * (1 - kr) + self.data[:, :, 0]
+        output[:, :, 2] = 2. * self.data[:, :, 1] * (1 - kb) + self.data[:, :, 0]
+        output[:, :, 1] = (self.data[:, :, 0] - kr * output[:, :, 0] - kb * output[:, :, 2]) / kg
+
+        return output
+
+    def rgb2xyz(self, color_space="srgb", clip_range=[0, 65535]):
+        # input rgb in range clip_range
+        # output xyz is in range 0 to 1
+
+        if (color_space == "srgb"):
+
+            # degamma / linearization
+            data = helpers(self.data).degamma_srgb(clip_range)
+            data = np.float32(data)
+            data = np.divide(data, clip_range[1])
+
+            # matrix multiplication`
+            output = np.empty(np.shape(self.data), dtype=np.float32)
+            output[:, :, 0] = data[:, :, 0] * 0.4124 + data[:, :, 1] * 0.3576 + data[:, :, 2] * 0.1805
+            output[:, :, 1] = data[:, :, 0] * 0.2126 + data[:, :, 1] * 0.7152 + data[:, :, 2] * 0.0722
+            output[:, :, 2] = data[:, :, 0] * 0.0193 + data[:, :, 1] * 0.1192 + data[:, :, 2] * 0.9505
+
+        elif (color_space == "adobe-rgb-1998"):
+
+            # degamma / linearization
+            data = helpers(self.data).degamma_adobe_rgb_1998(clip_range)
+            data = np.float32(data)
+            data = np.divide(data, clip_range[1])
+
+            # matrix multiplication
+            output = np.empty(np.shape(self.data), dtype=np.float32)
+            output[:, :, 0] = data[:, :, 0] * 0.5767309 + data[:, :, 1] * 0.1855540 + data[:, :, 2] * 0.1881852
+            output[:, :, 1] = data[:, :, 0] * 0.2973769 + data[:, :, 1] * 0.6273491 + data[:, :, 2] * 0.0752741
+            output[:, :, 2] = data[:, :, 0] * 0.0270343 + data[:, :, 1] * 0.0706872 + data[:, :, 2] * 0.9911085
+
+        elif (color_space == "linear"):
+
+            # matrix multiplication`
+            output = np.empty(np.shape(self.data), dtype=np.float32)
+            data = np.float32(self.data)
+            data = np.divide(data, clip_range[1])
+            output[:, :, 0] = data[:, :, 0] * 0.4124 + data[:, :, 1] * 0.3576 + data[:, :, 2] * 0.1805
+            output[:, :, 1] = data[:, :, 0] * 0.2126 + data[:, :, 1] * 0.7152 + data[:, :, 2] * 0.0722
+            output[:, :, 2] = data[:, :, 0] * 0.0193 + data[:, :, 1] * 0.1192 + data[:, :, 2] * 0.9505
+
+        else:
+            print("Warning! color_space must be srgb or adobe-rgb-1998.")
+            return
+
+        return output
+
+
+    def xyz2rgb(self, color_space="srgb", clip_range=[0, 65535]):
+        # input xyz is in range 0 to 1
+        # output rgb in clip_range
+
+        # allocate space for output
+        output = np.empty(np.shape(self.data), dtype=np.float32)
+
+        if (color_space == "srgb"):
+
+            # matrix multiplication
+            output[:, :, 0] = self.data[:, :, 0] *  3.2406 + self.data[:, :, 1] * -1.5372 + self.data[:, :, 2] * -0.4986
+            output[:, :, 1] = self.data[:, :, 0] * -0.9689 + self.data[:, :, 1] *  1.8758 + self.data[:, :, 2] *  0.0415
+            output[:, :, 2] = self.data[:, :, 0] *  0.0557 + self.data[:, :, 1] * -0.2040 + self.data[:, :, 2] *  1.0570
+
+            # gamma to retain nonlinearity
+            output = helpers(output * clip_range[1]).gamma_srgb(clip_range)
+
+
+        elif (color_space == "adobe-rgb-1998"):
+
+            # matrix multiplication
+            output[:, :, 0] = self.data[:, :, 0] *  2.0413690 + self.data[:, :, 1] * -0.5649464 + self.data[:, :, 2] * -0.3446944
+            output[:, :, 1] = self.data[:, :, 0] * -0.9692660 + self.data[:, :, 1] *  1.8760108 + self.data[:, :, 2] *  0.0415560
+            output[:, :, 2] = self.data[:, :, 0] *  0.0134474 + self.data[:, :, 1] * -0.1183897 + self.data[:, :, 2] *  1.0154096
+
+            # gamma to retain nonlinearity
+            output = helpers(output * clip_range[1]).gamma_adobe_rgb_1998(clip_range)
+
+
+        elif (color_space == "linear"):
+
+            # matrix multiplication
+            output[:, :, 0] = self.data[:, :, 0] *  3.2406 + self.data[:, :, 1] * -1.5372 + self.data[:, :, 2] * -0.4986
+            output[:, :, 1] = self.data[:, :, 0] * -0.9689 + self.data[:, :, 1] *  1.8758 + self.data[:, :, 2] *  0.0415
+            output[:, :, 2] = self.data[:, :, 0] *  0.0557 + self.data[:, :, 1] * -0.2040 + self.data[:, :, 2] *  1.0570
+
+            # gamma to retain nonlinearity
+            output = output * clip_range[1]
+
+        else:
+            print("Warning! color_space must be srgb or adobe-rgb-1998.")
+            return
+
+        return output
+
+
+    def xyz2lab(self, cie_version="1931", illuminant="d65"):
+
+        xyz_reference = helpers().get_xyz_reference(cie_version, illuminant)
+
+        data = self.data
+        data[:, :, 0] = data[:, :, 0] / xyz_reference[0]
+        data[:, :, 1] = data[:, :, 1] / xyz_reference[1]
+        data[:, :, 2] = data[:, :, 2] / xyz_reference[2]
+
+        data = np.asarray(data)
+
+        # if data[x, y, c] > 0.008856, data[x, y, c] = data[x, y, c] ^ (1/3)
+        # else, data[x, y, c] = 7.787 * data[x, y, c] + 16/116
+        mask = data > 0.008856
+        data[mask] **= 1./3.
+        data[np.invert(mask)] *= 7.787
+        data[np.invert(mask)] += 16./116.
+
+        data = np.float32(data)
+        output = np.empty(np.shape(self.data), dtype=np.float32)
+        output[:, :, 0] = 116. * data[:, :, 1] - 16.
+        output[:, :, 1] = 500. * (data[:, :, 0] - data[:, :, 1])
+        output[:, :, 2] = 200. * (data[:, :, 1] - data[:, :, 2])
+
+        return output
+
+
+    def lab2xyz(self, cie_version="1931", illuminant="d65"):
+
+        output = np.empty(np.shape(self.data), dtype=np.float32)
+
+        output[:, :, 1] = (self.data[:, :, 0] + 16.) / 116.
+        output[:, :, 0] = (self.data[:, :, 1] / 500.) + output[:, :, 1]
+        output[:, :, 2] = output[:, :, 1] - (self.data[:, :, 2] / 200.)
+
+        # if output[x, y, c] > 0.008856, output[x, y, c] ^ 3
+        # else, output[x, y, c] = ( output[x, y, c] - 16/116 ) / 7.787
+        output = np.asarray(output)
+        mask = output > 0.008856
+        output[mask] **= 3.
+        output[np.invert(mask)] -= 16/116
+        output[np.invert(mask)] /= 7.787
+
+        xyz_reference = helpers().get_xyz_reference(cie_version, illuminant)
+
+        output = np.float32(output)
+        output[:, :, 0] = output[:, :, 0] * xyz_reference[0]
+        output[:, :, 1] = output[:, :, 1] * xyz_reference[1]
+        output[:, :, 2] = output[:, :, 2] * xyz_reference[2]
+
+        return output
+
+    def lab2lch(self):
+
+        output = np.empty(np.shape(self.data), dtype=np.float32)
+
+        output[:, :, 0] = self.data[:, :, 0] # L transfers directly
+        output[:, :, 1] = np.power(np.power(self.data[:, :, 1], 2) + np.power(self.data[:, :, 2], 2), 0.5)
+        output[:, :, 2] = np.arctan2(self.data[:, :, 2], self.data[:, :, 1]) * 180 / np.pi
+
+        return output
+
+    def lch2lab(self):
+
+        output = np.empty(np.shape(self.data), dtype=np.float32)
+
+        output[:, :, 0] = self.data[:, :, 0] # L transfers directly
+        output[:, :, 1] = np.multiply(np.cos(self.data[:, :, 2] * np.pi / 180), self.data[:, :, 1])
+        output[:, :, 2] = np.multiply(np.sin(self.data[:, :, 2] * np.pi / 180), self.data[:, :, 1])
+
+        return output
+
+    def __str__(self):
+        return self.name
+
+
+# =============================================================
+# class: edge_detection
+#   detect edges in an image
+# =============================================================
+class edge_detection:
+    def __init__(self, data, name="edge detection"):
+        self.data = np.float32(data)
+        self.name = name
+
+    def sobel(self, kernel_size=3, output_type="all", threshold=0., clip_range=[0, 65535]):
+
+        Sx, Sy = create_filter().sobel(kernel_size)
+
+        # Gradient in x direction: Gx
+        # Gradient in y direction: Gy
+        if np.ndim(self.data) > 2:
+
+            Gx = np.empty(np.shape(self.data), dtype=np.float32)
+            Gy = np.empty(np.shape(self.data), dtype=np.float32)
+
+            for dimension_idx in range(0, np.shape(self.data)[2]):
+                Gx[:, :, dimension_idx] = signal.convolve2d(self.data[:, :, dimension_idx], Sx, mode="same", boundary="symm")
+                Gy[:, :, dimension_idx] = signal.convolve2d(self.data[:, :, dimension_idx], Sy, mode="same", boundary="symm")
+
+        elif np.ndim(self.data) == 2:
+            Gx = signal.convolve2d(self.data, Sx, mode="same", boundary="symm")
+            Gy = signal.convolve2d(self.data, Sy, mode="same", boundary="symm")
+
+        else:
+            print("Warning! Data dimension must be 2 or 3.")
+
+        # Gradient magnitude
+        G = np.power(np.power(Gx, 2) + np.power(Gy, 2), .5)
+
+        if (output_type == "gradient_magnitude"):
+            return G
+
+        # Gradient angle
+        theta = np.arctan(np.divide(Gy, Gx)) * 180. / np.pi
+
+        if (output_type == "gradient_magnitude_and_angle"):
+            return G, theta
+
+        # Change the threshold according to the clip_range's maximum value
+        threshold = threshold * clip_range[1]
+
+        # calculating if the edge is a strong edge
+        is_edge = np.zeros(np.shape(self.data)).astype(int)
+        mask = G > threshold
+        is_edge[mask] = 1
+
+        if (output_type == "is_edge"):
+            return is_edge
+
+
+        # Edge direction label
+        temp = np.asarray(theta)
+        direction_label = np.zeros(np.shape(self.data), dtype=np.float32)
+
+        if np.ndim(self.data > 2):
+            for i in range(0, np.shape(self.data)[2]):
+                direction_label[:, :, i] = helpers().sobel_prewitt_direction_label(G[:, :, i], theta[:, :, i], threshold)
+        else:
+            direction_label = helpers().sobel_prewitt_direction_label(G, theta, threshold)
+
+        if (output_type == "all"):
+            return G, Gx, Gy, theta, is_edge, direction_label
+
+
+    def __str__(self):
+        return self.name
diff --git a/IIR-Lab/ISP_pipeline/utils/__init__.py b/IIR-Lab/ISP_pipeline/utils/__init__.py
new file mode 100644
index 0000000000000000000000000000000000000000..7be7fa011f6695d63f117d3a522b0b3c15030dd7
--- /dev/null
+++ b/IIR-Lab/ISP_pipeline/utils/__init__.py
@@ -0,0 +1,36 @@
+from fractions import Fraction
+from pathlib import Path
+from json import JSONEncoder
+from .utils import *
+
+
+def rmtree(path: Path):
+    if path.is_file():
+        path.unlink()
+    else:
+        for ch in path.iterdir():
+            rmtree(ch)
+        path.rmdir()
+
+
+def safe_save(fpath, data, save_fun, rewrite=False, error_msg='File {fpath} exists! To rewite it use `--rewrite` flag', **kwargs):
+    if not fpath.is_file() or rewrite:
+        save_fun(str(fpath), data, **kwargs)
+    else:
+        raise FileExistsError(error_msg.format(fpath=fpath))
+
+
+class FractionJSONEncoder(JSONEncoder):
+    def default(self, o):
+        if isinstance(o, Fraction):
+            return {'Fraction': [o.numerator, o.denominator]}
+        else:
+            return o.__dict__
+
+
+def fraction_from_json(json_object):
+    if 'Fraction' in json_object:
+        return Fraction(*json_object['Fraction'])
+    return json_object
+
+
diff --git a/IIR-Lab/ISP_pipeline/utils/__pycache__/__init__.cpython-39.pyc b/IIR-Lab/ISP_pipeline/utils/__pycache__/__init__.cpython-39.pyc
new file mode 100644
index 0000000000000000000000000000000000000000..adf4478ee68497fd2ea862e48f860506392e68b2
Binary files /dev/null and b/IIR-Lab/ISP_pipeline/utils/__pycache__/__init__.cpython-39.pyc differ
diff --git a/IIR-Lab/ISP_pipeline/utils/__pycache__/utils.cpython-39.pyc b/IIR-Lab/ISP_pipeline/utils/__pycache__/utils.cpython-39.pyc
new file mode 100644
index 0000000000000000000000000000000000000000..6531d58abf39f6a8fb09604df275be572ca4296a
Binary files /dev/null and b/IIR-Lab/ISP_pipeline/utils/__pycache__/utils.cpython-39.pyc differ
diff --git a/IIR-Lab/ISP_pipeline/utils/utils.py b/IIR-Lab/ISP_pipeline/utils/utils.py
new file mode 100644
index 0000000000000000000000000000000000000000..343288891019bf465f42658e53952f7319241d91
--- /dev/null
+++ b/IIR-Lab/ISP_pipeline/utils/utils.py
@@ -0,0 +1,56 @@
+from PIL import Image
+import json
+import os
+
+def json_read(fname, **kwargs):
+    with open(fname) as j:
+        data = json.load(j, **kwargs)
+    return data
+
+
+def json_save(fname, data, indent_len=4, **kwargs):
+    with open(fname, "w") as f:
+        s = json.dumps(data, sort_keys=True, ensure_ascii=False,
+                       indent=" " * indent_len, **kwargs)
+        f.write(s)
+
+
+def process_wb_from_txt(txt_path):
+    with open(txt_path, 'r') as fh:
+        txt = [line.rstrip().split() for line in fh]
+
+    txt = [[float(k) for k in row] for row in txt]
+
+    assert len(txt) in [1, 3]
+
+    if len(txt) == 1:
+        # wb vector
+        txt = txt[0]
+
+    return txt
+
+
+def process_ids_from_txt(txt_path):
+    with open(txt_path, 'r') as fh:
+        temp = fh.read().splitlines()
+    return temp
+
+
+def save_txt(p, s):
+    with open(p, 'w') as text_file:
+        text_file.write(s)
+
+
+def downscale_jpg(img_path, new_shape, quality_perc=100):
+    img = Image.open(img_path)
+    if (img.size[0], img.size[1]) != new_shape:
+        new_img = img.resize(new_shape, Image.ANTIALIAS)
+        new_img.save(img_path[:-len('.jpg')] + '.jpg',
+                     'JPEG', quality=quality_perc)
+
+
+def rename_img(img_path):
+    if img_path.lower().endswith('jpeg'):
+        os.rename(img_path, img_path[:-len('jpeg')] + 'jpg')
+    else:
+        os.rename(img_path, img_path[:-len('JPG')] + 'jpg')
diff --git a/IIR-Lab/Rendering_models/high_iso.pth b/IIR-Lab/Rendering_models/high_iso.pth
new file mode 100644
index 0000000000000000000000000000000000000000..a563b22bb8e40332bd7fcbf796e3307e16e4c5c8
--- /dev/null
+++ b/IIR-Lab/Rendering_models/high_iso.pth
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:8f8b211f2939fdc7a030b7d263a4dc7791c4c385ec47ef3c8adefec5d8de8b7f
+size 93994
diff --git a/IIR-Lab/Rendering_models/low_iso.pth b/IIR-Lab/Rendering_models/low_iso.pth
new file mode 100644
index 0000000000000000000000000000000000000000..ee2ad8ae98338dc88aea9377fea86b535222ebd8
--- /dev/null
+++ b/IIR-Lab/Rendering_models/low_iso.pth
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:c669a2f705ad7e8d912e81f89bd17eee621cffda6d8c4ca5701dbc19997449e4
+size 93994
diff --git a/IIR-Lab/aligned_utils.py b/IIR-Lab/aligned_utils.py
new file mode 100644
index 0000000000000000000000000000000000000000..db4cdd2ff4016feaaaa9163818426c8262457b00
--- /dev/null
+++ b/IIR-Lab/aligned_utils.py
@@ -0,0 +1,252 @@
+import numpy as np
+import cv2
+# from PIL import Image
+import os
+import glob
+from tqdm import tqdm
+from pathlib import Path
+import torch
+import torch.nn.functional as F
+# Parameters of the motion estimation algorithms
+def warp_flow(img, flow):
+	'''
+		Applies to img the transformation described by flow.
+	'''
+	#assert len(flow.shape) == 3 and flow.shape[-1] == 2
+	hf, wf = flow.shape[:2]
+	# flow 		= -flow
+	flow[:, :, 0] += np.arange(wf)
+	flow[:, :, 1] += np.arange(hf)[:, np.newaxis]
+	res = cv2.remap(img, flow, None, cv2.INTER_LINEAR)
+	return res
+
+def estimate_invflow(img0, img1, me_algo):
+	'''
+		Estimates inverse optical flow by using the me_algo algorithm.
+	'''		
+
+	# Create estimator object
+	if me_algo == "DeepFlow":
+		of_estim = cv2.optflow.createOptFlow_DeepFlow()
+	else:
+		raise Exception("Incorrect motion estimation algorithm")
+
+	# Run flow estimation (inverse flow)
+	flow = of_estim.calc(img1, img0, None)
+#	flow = cv.calcOpticalFlowFarneback(prvs,next, None, 0.5, 3, 15, 3, 5, 1.2, 0)
+
+	return flow
+
+def align_frames(img_to_align, img_source, mc_alg='DeepFlow'):
+	'''
+		Applies to img_to_align a transformation which converts it into img_source.
+		Args:
+			img_to_align: HxWxC image
+			img_source: HxWxC image
+			mc_alg: selects between DeepFlow, SimpleFlow, and TVL1. DeepFlow runs by default.
+		Returns:
+			HxWxC aligned image
+	'''
+	if img_to_align.ndim == 2:
+		img0 = img_to_align
+		img1 = img_source
+	else:
+		img0 = img_to_align[:, :, 1]
+		img1 = img_source[:, :, 1]
+	out_img = None
+
+	# Align frames according to selection in mc_alg
+	flow = estimate_invflow(img0, img1, mc_alg)
+	#print(flow.astype(np.float32))
+
+	# rectifier
+	out_img = warp_flow(img_to_align, flow.astype(np.float32))
+
+	return out_img, flow
+
+
+
+def SIFT(img1gray, img2gray):
+    # if i == 0:
+    sift = cv2.xfeatures2d.SIFT_create()  # 创建sift方法
+    # sift = cv2.SURF_create()  # 创建sift方法
+    # find the keypoints and descriptors with SIFT
+    kp1, des1 = sift.detectAndCompute(img1gray, None)     # 用sift找到图像中的关键点和描述子
+    kp2, des2 = sift.detectAndCompute(img2gray, None)
+    # FLANN parameters
+    FLANN_INDEX_KDTREE = 1          # FLANN使用的算法选择，有0，1等，具体多少算法不太清楚。
+    index_params = dict(algorithm=FLANN_INDEX_KDTREE, trees=5)
+    search_params = dict(checks=10)
+    flann = cv2.FlannBasedMatcher(index_params, search_params)       # 这里创建FLANN匹配算法
+    matches = flann.knnMatch(des1, des2, k=2)                       # 这里是使用创建的FLANN算法对两张图上的描述子进行匹配，使用k近邻匹配，k=2即最近邻匹配
+    # 上述返回的matches是一种数据类型，这样一个类型中包含了matches.queryIdx .trainIdx 和 .distance，由于是knn，k=2，返回两个最相似的特征点。
+    # 而上面返回的特征点kp1和kp2也是一种类，包含了kp1.pt：关键点坐标  kp1.angle：关键点方向 kp1.response：关键点强度 kp1.size该点直径大小
+    # Need to draw only good matches, so create a mask
+    matchesMask = [[0, 0] for i in range(len(matches))]           # 为了去画匹配的情况，创建了一个掩膜
+
+    good = []
+    # ratio test as per Lowe's paper
+    for i, (m, n) in enumerate(matches):
+        if m.distance < 0.65*n.distance:
+            good.append(m)
+            matchesMask[i] = [1, 0]
+
+
+    MIN_MATCH_COUNT = 9
+
+    print(len(good))
+
+    if len(good) > MIN_MATCH_COUNT:
+        src_pts = np.float32([kp1[m.queryIdx].pt for m in good]).reshape(-1, 1, 2)
+        dst_pts = np.float32([kp2[m.trainIdx].pt for m in good]).reshape(-1, 1, 2)
+        M, mask = cv2.findHomography(src_pts, dst_pts, cv2.RANSAC, 3)
+
+    else:
+        print('error!!!!!!!!!!!!!!!!!!!!!!!!!!!')
+        return
+
+    # print(M)
+    return M
+
+
+
+
+def match_colors(im_ref, im_q, im_test):
+
+    im_ref_mean_re = im_ref.view(*im_ref.shape[:2], -1)
+    im_q_mean_re = im_q.view(*im_q.shape[:2], -1)
+
+    # Estimate color transformation matrix by minimizing the least squares error
+    c_mat_all = []
+    for ir, iq in zip(im_ref_mean_re, im_q_mean_re):
+        c = torch.linalg.lstsq(iq.t(), ir.t())
+        c = c.solution[:im_ref_mean_re.size(1)]
+        c_mat_all.append(c)
+
+    c_mat = torch.stack(c_mat_all, dim=0)
+    # Apply the transformation to test image
+    im_test_re = im_test.view(*im_test.shape[:2], -1)
+    im_t_conv = torch.matmul(im_test_re.permute(0, 2, 1), c_mat).permute(0, 2, 1)
+    im_t_conv = im_t_conv.view(im_test.shape)
+
+    return im_t_conv
+
+def color_correction(gt, in_put, output, scale_factor=2):
+    # ds_gt = F.interpolate(gt, scale_factor=1.0 / scale_factor, mode='bilinear', align_corners=False, recompute_scale_factor=True)
+    output_cor = match_channel_colors(gt, in_put, output)
+    return output_cor
+
+def match_channel_colors(im_ref, im_q, im_test):
+
+    im_ref_reshape = im_ref.view(*im_ref.shape[:2], -1)
+    im_q_reshape = im_q.view(*im_q.shape[:2], -1)
+    im_test_reshape = im_test.view(*im_test.shape[:2], -1)
+    # Estimate color transformation matrix by minimizing the least squares error
+
+    im_t_conv_list = []
+    for i in range(im_ref.size(1)):
+        c_mat_all = []
+        for ir_batch, iq_batch in zip(im_ref_reshape[:, i:i+1, :], im_q_reshape[:, i:i+1, :]):
+            c = torch.linalg.lstsq(iq_batch.t(), ir_batch.t())
+            c = c.solution[:1]
+            c_mat_all.append(c)
+
+        c_mat = torch.stack(c_mat_all, dim=0)
+        # Apply the transformation to test image
+        im_t_conv = torch.matmul(im_test_reshape[:, i:i+1, :].permute(0, 2, 1), c_mat).permute(0, 2, 1)
+        im_t_conv = im_t_conv.view(*im_t_conv.shape[:2], *im_test.shape[-2:])
+        im_t_conv_list.append(im_t_conv)
+
+    im_t_conv = torch.cat(im_t_conv_list, dim=1)
+
+    return im_t_conv
+
+
+
+
+def img2tensor(imgs, bgr2rgb=True, float32=True):
+    """Numpy array to tensor.
+
+    Args:
+        imgs (list[ndarray] | ndarray): Input images.
+        bgr2rgb (bool): Whether to change bgr to rgb.
+        float32 (bool): Whether to change to float32.
+
+    Returns:
+        list[tensor] | tensor: Tensor images. If returned results only have
+            one element, just return tensor.
+    """
+
+    def _totensor(img, bgr2rgb, float32):
+        if img.shape[2] == 3 and bgr2rgb:
+            if img.dtype == 'float64':
+                img = img.astype('float32')
+            img = cv2.cvtColor(img, cv2.COLOR_BGR2RGB)
+        img = torch.from_numpy(img.transpose(2, 0, 1))
+        if float32:
+            img = img.float()
+        return img
+
+    if isinstance(imgs, list):
+        return [_totensor(img, bgr2rgb, float32) for img in imgs]
+    else:
+        return _totensor(imgs, bgr2rgb, float32)
+
+
+def tensor2img(tensor, rgb2bgr=True, out_type=np.uint8, min_max=(0, 1)):
+    """Convert torch Tensors into image numpy arrays.
+
+    After clamping to [min, max], values will be normalized to [0, 1].
+
+    Args:
+        tensor (Tensor or list[Tensor]): Accept shapes:
+            1) 4D mini-batch Tensor of shape (B x 3/1 x H x W);
+            2) 3D Tensor of shape (3/1 x H x W);
+            3) 2D Tensor of shape (H x W).
+            Tensor channel should be in RGB order.
+        rgb2bgr (bool): Whether to change rgb to bgr.
+        out_type (numpy type): output types. If ``np.uint8``, transform outputs
+            to uint8 type with range [0, 255]; otherwise, float type with
+            range [0, 1]. Default: ``np.uint8``.
+        min_max (tuple[int]): min and max values for clamp.
+
+    Returns:
+        (Tensor or list): 3D ndarray of shape (H x W x C) OR 2D ndarray of
+        shape (H x W). The channel order is BGR.
+    """
+    if not (torch.is_tensor(tensor) or (isinstance(tensor, list) and all(torch.is_tensor(t) for t in tensor))):
+        raise TypeError(f'tensor or list of tensors expected, got {type(tensor)}')
+
+    if torch.is_tensor(tensor):
+        tensor = [tensor]
+    result = []
+    for _tensor in tensor:
+        _tensor = _tensor.squeeze(0).float().detach().cpu().clamp_(*min_max)
+        _tensor = (_tensor - min_max[0]) / (min_max[1] - min_max[0])
+
+        n_dim = _tensor.dim()
+        if n_dim == 4:
+            img_np = make_grid(_tensor, nrow=int(math.sqrt(_tensor.size(0))), normalize=False).numpy()
+            img_np = img_np.transpose(1, 2, 0)
+            if rgb2bgr:
+                img_np = cv2.cvtColor(img_np, cv2.COLOR_RGB2BGR)
+        elif n_dim == 3:
+            img_np = _tensor.numpy()
+            img_np = img_np.transpose(1, 2, 0)
+            if img_np.shape[2] == 1:  # gray image
+                img_np = np.squeeze(img_np, axis=2)
+            else:
+                if rgb2bgr:
+                    img_np = cv2.cvtColor(img_np, cv2.COLOR_RGB2BGR)
+        elif n_dim == 2:
+            img_np = _tensor.numpy()
+        else:
+            raise TypeError(f'Only support 4D, 3D or 2D tensor. But received with dimension: {n_dim}')
+        if out_type == np.uint8:
+            # Unlike MATLAB, numpy.unit8() WILL NOT round by default.
+            img_np = (img_np * 255.0).round()
+        img_np = img_np.astype(out_type)
+        result.append(img_np)
+    if len(result) == 1:
+        result = result[0]
+    return result
\ No newline at end of file
diff --git a/IIR-Lab/dataloader/__pycache__/__init__.cpython-310.pyc b/IIR-Lab/dataloader/__pycache__/__init__.cpython-310.pyc
new file mode 100644
index 0000000000000000000000000000000000000000..6df185c870745d802322a7a7d453584bf131b0e8
Binary files /dev/null and b/IIR-Lab/dataloader/__pycache__/__init__.cpython-310.pyc differ
diff --git a/IIR-Lab/dataloader/__pycache__/__init__.cpython-38.pyc b/IIR-Lab/dataloader/__pycache__/__init__.cpython-38.pyc
new file mode 100644
index 0000000000000000000000000000000000000000..114cbf4d46603f40b8d246417caee08d6cd6ee48
Binary files /dev/null and b/IIR-Lab/dataloader/__pycache__/__init__.cpython-38.pyc differ
diff --git a/IIR-Lab/dataloader/__pycache__/data_sampler.cpython-310.pyc b/IIR-Lab/dataloader/__pycache__/data_sampler.cpython-310.pyc
new file mode 100644
index 0000000000000000000000000000000000000000..fbeeb2fdab6d18eea40dcffcee5565d80995d60d
Binary files /dev/null and b/IIR-Lab/dataloader/__pycache__/data_sampler.cpython-310.pyc differ
diff --git a/IIR-Lab/dataloader/__pycache__/data_sampler.cpython-38.pyc b/IIR-Lab/dataloader/__pycache__/data_sampler.cpython-38.pyc
new file mode 100644
index 0000000000000000000000000000000000000000..3300ba52b771594ac5b90800e8a21d74e6eb973e
Binary files /dev/null and b/IIR-Lab/dataloader/__pycache__/data_sampler.cpython-38.pyc differ
diff --git a/IIR-Lab/dataloader/__pycache__/data_utils.cpython-310.pyc b/IIR-Lab/dataloader/__pycache__/data_utils.cpython-310.pyc
new file mode 100644
index 0000000000000000000000000000000000000000..96c573f64c18b124c2d5cf245248f3f6075945a0
Binary files /dev/null and b/IIR-Lab/dataloader/__pycache__/data_utils.cpython-310.pyc differ
diff --git a/IIR-Lab/dataloader/__pycache__/data_utils.cpython-312.pyc b/IIR-Lab/dataloader/__pycache__/data_utils.cpython-312.pyc
new file mode 100644
index 0000000000000000000000000000000000000000..695ec5cae7a27579e8b3555204854f95d2708a96
Binary files /dev/null and b/IIR-Lab/dataloader/__pycache__/data_utils.cpython-312.pyc differ
diff --git a/IIR-Lab/dataloader/__pycache__/data_utils.cpython-39.pyc b/IIR-Lab/dataloader/__pycache__/data_utils.cpython-39.pyc
new file mode 100644
index 0000000000000000000000000000000000000000..cd6899b27724f02d17f2c4a2542e4d7ba5beaeff
Binary files /dev/null and b/IIR-Lab/dataloader/__pycache__/data_utils.cpython-39.pyc differ
diff --git a/IIR-Lab/dataloader/__pycache__/dataset.cpython-310.pyc b/IIR-Lab/dataloader/__pycache__/dataset.cpython-310.pyc
new file mode 100644
index 0000000000000000000000000000000000000000..9d2c88b0f2f027069ce695ad586fb263ba106234
Binary files /dev/null and b/IIR-Lab/dataloader/__pycache__/dataset.cpython-310.pyc differ
diff --git a/IIR-Lab/dataloader/__pycache__/dataset.cpython-312.pyc b/IIR-Lab/dataloader/__pycache__/dataset.cpython-312.pyc
new file mode 100644
index 0000000000000000000000000000000000000000..a57db53f8aa60ede1129a78186692ce15bdc4477
Binary files /dev/null and b/IIR-Lab/dataloader/__pycache__/dataset.cpython-312.pyc differ
diff --git a/IIR-Lab/dataloader/__pycache__/dataset.cpython-38.pyc b/IIR-Lab/dataloader/__pycache__/dataset.cpython-38.pyc
new file mode 100644
index 0000000000000000000000000000000000000000..7e5f74d2cc4cb6ced63c128789d4e960a5f6c22f
Binary files /dev/null and b/IIR-Lab/dataloader/__pycache__/dataset.cpython-38.pyc differ
diff --git a/IIR-Lab/dataloader/__pycache__/dataset.cpython-39.pyc b/IIR-Lab/dataloader/__pycache__/dataset.cpython-39.pyc
new file mode 100644
index 0000000000000000000000000000000000000000..20850380d2cdf0610da32892ad22b8af6a0e6451
Binary files /dev/null and b/IIR-Lab/dataloader/__pycache__/dataset.cpython-39.pyc differ
diff --git a/IIR-Lab/dataloader/data_utils.py b/IIR-Lab/dataloader/data_utils.py
new file mode 100644
index 0000000000000000000000000000000000000000..8e1420805f51d6309748d44276a815e0e738ca02
--- /dev/null
+++ b/IIR-Lab/dataloader/data_utils.py
@@ -0,0 +1,504 @@
+from __future__ import division
+
+import numpy as np
+import cv2
+import random
+import torch
+import glob
+import os
+from random import choices
+from scipy.stats import poisson
+
+def Rawread(path,low=0):
+    if path.endswith('.raw'):
+          return read_img(path,low)
+    if path.endswith('.npy'):
+          return read_npy(path,low)
+    if path.endswith('.png'):
+          return read_png(path,low)
+          
+def read_img(path,low):
+    w = 4000
+    h = 3000
+
+    raw = np.fromfile(path,np.uint16)
+    raw = raw.reshape((h,w))
+    raw = raw.astype(np.float32)-64
+    raw = rggb_raw(raw)
+    raw = np.clip(raw, low, 959)
+
+    return raw
+
+
+def read_npy(path,low):
+    
+    raw = np.load(path)
+
+    if raw.shape[0] == 4:
+        return raw * 959
+    raw = raw.astype(np.float32)-64
+    raw = rggb_raw(raw)
+    raw = np.clip(raw, low, 959)
+    return raw
+
+def read_rawpng(path, metadata):
+    
+    raw = cv2.imread(str(path), cv2.IMREAD_UNCHANGED)
+
+    # if raw.shape[0] == 4:
+    #     return raw * 959
+    raw = ((raw.astype(np.float32) - 256.) / (4095.- 256.)).clip(0, 1)
+    
+    raw = bayer2raw(raw, metadata)
+    raw = np.clip(raw, 0., 1.)
+    return raw
+
+def read_png(path, low):
+    
+    raw = cv2.imread(str(path), cv2.IMREAD_UNCHANGED)
+
+    if raw.shape[0] == 4:
+        return raw * 959
+    raw = raw.astype(np.float32)-256
+    raw = rggb_raw(raw)
+    raw = np.clip(raw, low, 4095)
+    return raw
+
+def random_crop(frames_0,frames_1=None ,crop_size=128):
+
+    F,C, H, W = frames_0.shape
+
+    rnd_w = random.randint(0, W - crop_size)
+    rnd_h = random.randint(0, H - crop_size)
+
+    patch = frames_0[..., rnd_h:rnd_h + crop_size, rnd_w:rnd_w + crop_size]
+    if not frames_1 is None:
+        path1 = frames_1[..., rnd_h:rnd_h + crop_size, rnd_w:rnd_w + crop_size]
+        return np.concatenate([patch,path1],axis=0)
+
+    return patch
+
+def rggb_raw(raw):
+    # pack RGGB Bayer raw to 4 channels
+    H, W = raw.shape
+    raw = raw[None, ...]
+    raw_pack = np.concatenate((raw[:, 0:H:2, 0:W:2],
+                               raw[:, 0:H:2, 1:W:2],
+                               raw[:, 1:H:2, 0:W:2],
+                               raw[:, 1:H:2, 1:W:2]), axis=0)
+    return raw_pack
+
+def bayer2raw(raw, metadata):
+    # pack RGGB Bayer raw to 4 channels
+    H, W = raw.shape
+    raw = raw[None, ...]
+    if metadata['cfa_pattern'][0] == 0:
+        # RGGB
+        raw_pack = np.concatenate((raw[:, 0:H:2, 0:W:2],
+                                raw[:, 0:H:2, 1:W:2],
+                                raw[:, 1:H:2, 0:W:2],
+                                raw[:, 1:H:2, 1:W:2]), axis=0)
+    else :
+        # BGGR
+        raw_pack = np.concatenate((raw[:, 1:H:2, 1:W:2],
+                                raw[:, 0:H:2, 1:W:2],
+                                raw[:, 1:H:2, 0:W:2],
+                                raw[:, 0:H:2, 0:W:2]), axis=0)
+    return raw_pack
+
+def raw_rggb(raws):
+    # depack 4 channels raw to RGGB Bayer
+    C, H, W = raws.shape
+    output = np.zeros((H * 2, W * 2)).astype(np.uint16)
+
+    output[0:2 * H:2, 0:2 * W:2] = raws[0:1, :, :]
+    output[0:2 * H:2, 1:2 * W:2] = raws[1:2, :, :]
+    output[1:2 * H:2, 0:2 * W:2] = raws[2:3, :, :]
+    output[1:2 * H:2, 1:2 * W:2] = raws[3:4, :, :]
+
+    return output
+
+
+def raw_rggb_float32(raws):
+    # depack 4 channels raw to RGGB Bayer
+    C, H, W = raws.shape
+    output = np.zeros((H * 2, W * 2)).astype(np.float32)
+
+    output[0:2 * H:2, 0:2 * W:2] = raws[0:1, :, :]
+    output[0:2 * H:2, 1:2 * W:2] = raws[1:2, :, :]
+    output[1:2 * H:2, 0:2 * W:2] = raws[2:3, :, :]
+    output[1:2 * H:2, 1:2 * W:2] = raws[3:4, :, :]
+
+    return output
+
+
+def depack_rggb_raws(raws):
+    # depack 4 channels raw to RGGB Bayer
+    N, C, H, W = raws.shape
+    output = torch.zeros((N, 1, H * 2, W * 2))
+
+    output[:, :, 0:2 * H:2, 0:2 * W:2] = raws[:, 0:1, :, :]
+    output[:, :, 0:2 * H:2, 1:2 * W:2] = raws[:, 1:2, :, :]
+    output[:, :, 1:2 * H:2, 0:2 * W:2] = raws[:, 2:3, :, :]
+    output[:, :, 1:2 * H:2, 1:2 * W:2] = raws[:, 3:4, :, :]
+
+    return output
+
+
+
+# IMAGETYPES = ('*.bmp', '*.png', '*.jpg', '*.jpeg', '*.tif')
+IMAGETYPES = ('*.npy','*.raw',)  #得加逗号  不然会拆分字符串
+
+def get_imagenames(seq_dir, pattern=None):
+	""" Get ordered list of filenames
+	"""
+	files = []
+	for typ in IMAGETYPES:
+		files.extend(glob.glob(os.path.join(seq_dir, typ)))
+
+	# filter filenames
+	if not pattern is None:
+		ffiltered = []
+		ffiltered = [f for f in files if pattern in os.path.split(f)[-1]]
+		files = ffiltered
+		del ffiltered
+
+	# sort filenames alphabetically
+	files.sort(key=lambda f: int(''.join(filter(str.isdigit, f))))
+	return files
+
+
+
+
+def get_imagenames(seq_dir, pattern=None):
+    """ Get ordered list of filenames
+    """
+    files = []
+    for typ in IMAGETYPES:
+        files.extend(glob.glob(os.path.join(seq_dir, typ)))
+
+    # filter filenames
+    if not pattern is None:
+        ffiltered = []
+        ffiltered = [f for f in files if pattern in os.path.split(f)[-1]]
+        files = ffiltered
+        del ffiltered
+
+    # sort filenames alphabetically
+    files.sort(key=lambda f: int(''.join(filter(str.isdigit, f))))
+    return files
+
+def open_sequence(seq_dir, gray_mode, expand_if_needed=False, max_num_fr=100):
+    r""" Opens a sequence of images and expands it to even sizes if necesary
+    Args:
+        fpath: string, path to image sequence
+        gray_mode: boolean, True indicating if images is to be open are in grayscale mode
+        expand_if_needed: if True, the spatial dimensions will be expanded if
+            size is odd
+        expand_axis0: if True, output will have a fourth dimension
+        max_num_fr: maximum number of frames to load
+    Returns:
+        seq: array of dims [num_frames, C, H, W], C=1 grayscale or C=3 RGB, H and W are even.
+            The image gets normalized gets normalized to the range [0, 1].
+        expanded_h: True if original dim H was odd and image got expanded in this dimension.
+        expanded_w: True if original dim W was odd and image got expanded in this dimension.
+    """
+    # Get ordered list of filenames
+    files = get_imagenames(seq_dir)
+
+    seq_list_raw = []
+    seq_list_raw_noise = []
+    print("\tOpen sequence in folder: ", seq_dir)
+    for fpath in files[0:max_num_fr]:
+
+        raw, raw_noise,  expanded_h, expanded_w = open_image(fpath,\
+                                                   gray_mode=gray_mode,\
+                                                   expand_if_needed=expand_if_needed,\
+                                                   expand_axis0=False)
+        
+        raw = rggb_raw(raw)
+        raw_noise = rggb_raw(raw_noise)
+
+
+        seq_list_raw.append(raw)
+        seq_list_raw_noise.append(raw_noise)
+    seq_raw = np.stack(seq_list_raw, axis=0)
+    seq_raw_noise = np.stack(seq_list_raw_noise, axis=0)
+    return seq_raw, seq_raw_noise,  expanded_h, expanded_w
+
+def open_image(fpath, gray_mode, expand_if_needed=False, expand_axis0=True, normalize_data=True):
+    r""" Opens an image and expands it if necesary
+    Args:
+        fpath: string, path of image file
+        gray_mode: boolean, True indicating if image is to be open
+            in grayscale mode
+        expand_if_needed: if True, the spatial dimensions will be expanded if
+            size is odd
+        expand_axis0: if True, output will have a fourth dimension
+    Returns:
+        img: image of dims NxCxHxW, N=1, C=1 grayscale or C=3 RGB, H and W are even.
+            if expand_axis0=False, the output will have a shape CxHxW.
+            The image gets normalized to the range [0, 1].
+        expanded_h: True if original dim H was odd and image got expanded in this dimension.
+        expanded_w: True if original dim W was odd and image got expanded in this dimension.
+    """
+    # if not gray_mode:
+    #     # Open image as a CxHxW torch.Tensor
+    #     img = cv2.imread(fpath)
+    #     # from HxWxC to CxHxW, RGB image
+    #     img = (cv2.cvtColor(img, cv2.COLOR_BGR2RGB)).transpose(2, 0, 1)
+    # else:
+    #     # from HxWxC to  CxHxW grayscale image (C=1)
+    #     img = cv2.imread(fpath, cv2.IMREAD_GRAYSCALE)
+
+
+
+    # 测试真实的图片
+    # raw_img = ((np.fromfile(fpath,np.uint16).astype(np.float32))*4833)/2048
+    # raw_img = np.clip(raw_img-64, 0, 1023-64)
+    # raw_img = raw_img.reshape((3000,4000))
+
+    # raw_img = np.load(fpath).astype(np.float32)-64
+    w = 4000
+    h = 3000
+    raw_img = np.fromfile(fpath,dtype=np.uint16,count=w*h)
+    raw_img = raw_img.reshape((h,w)).astype(np.float32)-64
+    raw_img = np.clip(raw_img, 0, 959)
+
+    noise_fpath =fpath.replace('onlyraw_test_clean_raw','onlyraw_test_noise_raw')
+    raw_img_noise = np.fromfile(noise_fpath,dtype=np.uint16,count=w*h)
+    raw_img_noise = raw_img_noise.reshape((h,w)).astype(np.float32)-64
+    raw_img_noise = np.clip(raw_img_noise, 0, 959)
+
+
+    #blc
+
+
+    # if expand_axis0:
+    #     img = np.expand_dims(img, 0)
+
+    # Handle odd sizes
+    expanded_h = False
+    expanded_w = False
+    sh_im = raw_img.shape
+    # if expand_if_needed:
+    #     if sh_im[-2]%2 == 1:
+    #         expanded_h = True
+    #         if expand_axis0:
+    #             img = np.concatenate((img, \
+    #                 img[:, :, -1, :][:, :, np.newaxis, :]), axis=2)
+    #         else:
+    #             img = np.concatenate((img, \
+    #                 img[:, -1, :][:, np.newaxis, :]), axis=1)
+
+
+    #     if sh_im[-1]%2 == 1:
+    #         expanded_w = True
+    #         if expand_axis0:
+    #             img = np.concatenate((img, \
+    #                 img[:, :, :, -1][:, :, :, np.newaxis]), axis=3)
+    #         else:
+    #             img = np.concatenate((img, \
+    #                 img[:, :, -1][:, :, np.newaxis]), axis=2)
+
+    if normalize_data:
+        raw_img = normalize(raw_img)
+        raw_img_noise = normalize(raw_img_noise)
+    return raw_img, raw_img_noise,  expanded_h, expanded_w
+
+
+def normalize(data):
+    r"""Normalizes a unit8 image to a float32 image in the range [0, 1]
+
+    Args:
+        data: a unint8 numpy array to normalize from [0, 255] to [0, 1]
+    """
+    return np.float32(data/(959))
+
+
+def augment_cuda(batches, args, spynet=None):
+
+    def _augment(img, hflip=True, rot=True):
+
+        hflip = hflip and random.random() < 0.5
+        vflip = rot and random.random() < 0.5
+        # rot90 = rot and random.random() < 0.5
+        k1 = np.random.randint(0, 4)  #0,1,2,3
+        if hflip: img = img.flip(-1)
+        if vflip: img = img.flip(-2)        
+        
+        img = torch.rot90(img, k=k1, dims=[-2, -1])
+        
+        return img
+
+    batches_aug = _augment(batches)
+
+    if  args.pair:
+        noise = batches_aug[:,:args.frame,...]/959
+        clean = batches_aug[:,args.frame,...]/959 #if args.scene != 'noisedata' else  batches_aug[:,args.frame,...]
+
+ 
+    else:
+        clean, noise = Noise_simulation(batches_aug,args)
+        if not args.consistent_loss:
+            clean = clean[:, args.frame // 2, ...]
+    B, F, C , H, W = noise.shape
+    noise = noise.reshape(B, F*C , H, W )
+
+
+    return  clean, noise, None
+    
+
+def Noise_simulation(batches_aug,args):
+    batches_aug = batches_aug/959
+    batches_aug = torch.clamp(batches_aug , 0, 1)
+    B = batches_aug.shape[0]
+    batch_aug_mean = batches_aug.mean(dim=(1,2,3,4))
+    if args.need_Scaling:
+        if args.sample_gain == 'type1':
+            # rand_avg =  torch.randint(args.luminance_low, args.luminance_high ,(B, )).cuda(args.local_rank)
+            rand_avg =  (torch.rand((B)) * 0.12  + 0.001).cuda(args.local_rank)
+        if args.sample_gain == 'type2':
+            rand_avg = Gain_Sampler(B).cuda(args.local_rank)
+
+        coef = (batch_aug_mean / rand_avg).unsqueeze(-1).unsqueeze(-1).unsqueeze(-1).unsqueeze(-1)
+        batch_aug_dark = torch.clamp(batches_aug / coef, 0, 1)
+    else:
+        batch_aug_dark = batches_aug
+
+    a,b, again, dgain = random_noise_levels_nightimaging(B, args)
+    batch_aug_dark,batch_aug_dark_noise = add_noise(args, batch_aug_dark,a.cuda(args.local_rank),b.cuda(args.local_rank),dgain.cuda(args.local_rank))
+
+    batch_aug_dark_noise = torch.clamp(batch_aug_dark_noise, -0.1, 1)
+
+    # print(batch_aug_dark_noise.mean())
+    return batch_aug_dark.float(), batch_aug_dark_noise.float()
+
+def random_noise_levels_nightimaging(B, args):
+    # print('use new')
+    g = torch.FloatTensor(B).uniform_(0, 125).int().long()
+    noise_profile = torch.from_numpy(np.load('/data1/chengqihua/02_code/03_night_photogrphy/nightimage_v1/dataloader/json_all_2nd.npy'))
+
+    a = noise_profile[g,0]
+    b = noise_profile[g,1]
+
+    return a, b, 1, 1*torch.ones(1)
+
+def random_noise_levels(B, args):
+    ak1=0.05244803
+    ak2=0.01498041
+    bk1=0.00648923
+    bk2= 0.05899386
+    bk3 = 0.21520193
+    g = torch.FloatTensor(B).uniform_(args.min_gain, args.max_gain)
+
+    maskA = g > 16
+
+    again = g.clone()
+    again[maskA] = 16
+
+    maskB = g < 16
+
+    dgain = g.clone() / 16
+    dgain[maskB] = 1
+
+
+
+    a = ak1 * again + ak2
+    b = bk1 * again*again + bk2* again + bk3
+
+    return a, b, again, dgain
+
+def add_noise(args, image, a, b, dgain):
+
+    dgain = dgain.unsqueeze(1).unsqueeze(1).unsqueeze(1).unsqueeze(1)
+    a = a.unsqueeze(1).unsqueeze(1).unsqueeze(1).unsqueeze(1)
+    b = b.unsqueeze(1).unsqueeze(1).unsqueeze(1).unsqueeze(1)
+    
+    
+    B, F, C, H, W = image.size()
+
+    image = image / dgain
+
+
+    poisson_noisy_img = torch.poisson(image/a)*a
+
+    gaussian_noise = torch.sqrt(b)*torch.randn(B, F, C, H, W).cuda(args.local_rank)
+
+    noiseimg = poisson_noisy_img + gaussian_noise
+
+    if args.usedgain :
+        noiseimg = noiseimg * dgain
+        image = image * dgain
+    return image, noiseimg
+
+
+
+def normalize_augment(datain):
+    '''Normalizes and augments an input patch of dim [N, num_frames, C. H, W] in [0., 255.] to \
+        [N, num_frames*C. H, W] in  [0., 1.]. It also returns the central (edited by cjm : now all frames) frame of the temporal \
+        patch as a ground truth.
+    '''
+    def transform(sample):
+        # define transformations
+        do_nothing = lambda x: x
+        do_nothing.__name__ = 'do_nothing'
+        flipud = lambda x: torch.flip(x, dims=[2])
+        flipud.__name__ = 'flipup'
+        rot90 = lambda x: torch.rot90(x, k=1, dims=[2, 3])
+        rot90.__name__ = 'rot90'
+        rot90_flipud = lambda x: torch.flip(torch.rot90(x, k=1, dims=[2, 3]), dims=[2])
+        rot90_flipud.__name__ = 'rot90_flipud'
+        rot180 = lambda x: torch.rot90(x, k=2, dims=[2, 3])
+        rot180.__name__ = 'rot180'
+        rot180_flipud = lambda x: torch.flip(torch.rot90(x, k=2, dims=[2, 3]), dims=[2])
+        rot180_flipud.__name__ = 'rot180_flipud'
+        rot270 = lambda x: torch.rot90(x, k=3, dims=[2, 3])
+        rot270.__name__ = 'rot270'
+        rot270_flipud = lambda x: torch.flip(torch.rot90(x, k=3, dims=[2, 3]), dims=[2])
+        rot270_flipud.__name__ = 'rot270_flipud'
+        add_csnt = lambda x: x + torch.normal(mean=torch.zeros(x.size()[0], 1, 1, 1), \
+                                 std=(5/255.)).expand_as(x).to(x.device)
+        add_csnt.__name__ = 'add_csnt'
+
+        # define transformations and their frequency, then pick one.
+        aug_list = [do_nothing, flipud, rot90, rot90_flipud, \
+                    rot180, rot180_flipud, rot270, rot270_flipud, add_csnt]
+        w_aug = [32, 12, 12, 12, 12, 12, 12, 12, 12] # one fourth chances to do_nothing
+        transf = choices(aug_list, w_aug)
+
+        # transform all images in array
+        return transf[0](sample)
+
+    img_train = datain   #torch.Size([8, 11, 3, 96, 96])
+    # convert to [N, num_frames*C. H, W] in  [0., 1.] from [N, num_frames, C. H, W] in [0., 255.]
+    N, F, C, H, W = img_train.shape
+    img_train = img_train.view(img_train.size()[0], -1, \
+                               img_train.size()[-2], img_train.size()[-1]) / 255.    # torch.Size([8, 33, 96, 96])
+
+    #augment
+    img_train = transform(img_train)
+    img_train = img_train.view(N, F, C, H, W)
+    # extract ground truth (central frame)
+    # gt_train = img_train[:, 3*ctrl_fr_idx:3*ctrl_fr_idx+3, :, :]
+    return img_train, img_train
+
+def Gain_Sampler(B):
+    gain_dict = {
+          'low':[5,35],
+          'mid':[35,60],
+          'high':[60,100]
+     }
+
+    level = ['low','mid','high']
+    sampled = np.random.choice(level,B,[0.7,0.2,0.1])
+    all = []
+    for index in sampled:
+        all.append(torch.randint(gain_dict[index][0],gain_dict[index][1],(1,)))
+
+    return torch.Tensor(all)
+
+def path_replace(path,args):
+    for i in range(len(args.replace_left)):
+        path = path.replace(args.replace_left[i],args.replace_right[i])
+    return path
\ No newline at end of file
diff --git a/IIR-Lab/dataloader/dataset.py b/IIR-Lab/dataloader/dataset.py
new file mode 100644
index 0000000000000000000000000000000000000000..29dff422be75f9f356a079be9fcdea38e161375e
--- /dev/null
+++ b/IIR-Lab/dataloader/dataset.py
@@ -0,0 +1,97 @@
+import os
+import numpy as np
+import random
+import torch
+from torch.utils.data import Dataset
+from torchvision import transforms
+import cv2
+import sys
+from pathlib import Path
+import glob
+sys.path.append('..')
+# from utils_ours import util
+from dataloader.data_utils import rggb_raw, random_crop, Rawread, path_replace, bayer2raw, read_rawpng
+from natsort import ns, natsorted
+import time
+from tqdm import tqdm
+from multiprocessing import Pool
+import pdb
+import json
+from fractions import Fraction
+from pathlib import Path
+from json import JSONEncoder
+from exifread.utils import Ratio
+    
+class imageSet(Dataset):
+    def __init__(self,args):
+        super(imageSet).__init__()
+        self.args = args
+        self.test_dir= args.test_dir
+        self.debug = args.debug
+
+        self.paths = []
+        for file in os.listdir(self.test_dir):
+            if '.png' in file:
+                self.img_path = os.path.join(self.test_dir, file)
+                self.json_path = self.img_path.replace('png', 'json')
+                self.paths.append(dict([(f"{'img_path'}", self.img_path), (f"{'json_path'}", self.json_path)]))
+      
+    def __getitem__(self, index):
+        img_path = self.paths[index]['img_path']
+        json_path = self.paths[index]['json_path']
+
+        metadata = json_read(json_path, object_hook=fraction_from_json)
+
+        input_img = read_rawpng(img_path, metadata)
+
+        return {'input': input_img, 'json_path': json_path}
+    
+    def __len__(self):
+        return len(self.paths)
+
+
+def normalize(raw_image, black_level, white_level):
+    if type(black_level) is list and len(black_level) == 1:
+        black_level = float(black_level[0])
+    if type(white_level) is list and len(white_level) == 1:
+        white_level = float(white_level[0])
+    black_level_mask = black_level
+    if type(black_level) is list and len(black_level) == 4:
+        if type(black_level[0]) is Ratio:
+            black_level = ratios2floats(black_level)
+        if type(black_level[0]) is Fraction:
+            black_level = fractions2floats(black_level)
+        black_level_mask = np.zeros(raw_image.shape)
+        idx2by2 = [[0, 0], [0, 1], [1, 0], [1, 1]]
+        step2 = 2
+        for i, idx in enumerate(idx2by2):
+            black_level_mask[idx[0]::step2, idx[1]::step2] = black_level[i]
+    normalized_image = raw_image.astype(np.float32) - black_level_mask
+    # if some values were smaller than black level
+    normalized_image[normalized_image < 0] = 0
+    normalized_image = normalized_image / (white_level - black_level_mask)
+    return normalized_image
+
+def ratios2floats(ratios):
+    floats = []
+    for ratio in ratios:
+        floats.append(float(ratio.num) / ratio.den)
+    return floats
+
+def fractions2floats(fractions):
+    floats = []
+    for fraction in fractions:
+        floats.append(float(fraction.numerator) / fraction.denominator)
+    return floats
+    
+def json_read(fname, **kwargs):
+    with open(fname) as j:
+        data = json.load(j, **kwargs)
+    return data
+
+def fraction_from_json(json_object):
+    if 'Fraction' in json_object:
+        return Fraction(*json_object['Fraction'])
+    return json_object
+
+
diff --git a/IIR-Lab/denoise_model/high_iso.pth b/IIR-Lab/denoise_model/high_iso.pth
new file mode 100644
index 0000000000000000000000000000000000000000..da69cbfe707f91be00ad1828cfa5c9f97551c95c
--- /dev/null
+++ b/IIR-Lab/denoise_model/high_iso.pth
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:b6bb9641b1fced9a33651132672d6dd649f5428849f6d20ddd4bb478cda3a03e
+size 465804789
diff --git a/IIR-Lab/denoise_model/high_mid_iso.pth b/IIR-Lab/denoise_model/high_mid_iso.pth
new file mode 100644
index 0000000000000000000000000000000000000000..b216ef32e06dc5395b282812afb1977aea5e8a1a
--- /dev/null
+++ b/IIR-Lab/denoise_model/high_mid_iso.pth
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:01deae3e68e447f4a4c7d4f20442941e8127bf1b4a718d451f151ead782c7254
+size 465796068
diff --git a/IIR-Lab/denoise_model/low_iso.pth b/IIR-Lab/denoise_model/low_iso.pth
new file mode 100644
index 0000000000000000000000000000000000000000..18fb3c69cbe3417393f1a287ed670b09667cc5d9
--- /dev/null
+++ b/IIR-Lab/denoise_model/low_iso.pth
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:ab5617cf98787e7e4a9c48d9e5fe6cde729ee7ad83913a01e43abb65734a0a5e
+size 465795637
diff --git a/IIR-Lab/denoise_model/mid_iso.pth b/IIR-Lab/denoise_model/mid_iso.pth
new file mode 100644
index 0000000000000000000000000000000000000000..f7524f4dfcdad030d308c490c760f332bcbe60c0
--- /dev/null
+++ b/IIR-Lab/denoise_model/mid_iso.pth
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:db9e010a52d81d5cb1953c683f301458776cbc9dd3fa0a6ad7d7d15bf285540c
+size 465796068
diff --git a/IIR-Lab/final_test.py b/IIR-Lab/final_test.py
new file mode 100644
index 0000000000000000000000000000000000000000..60904c7f3b134a5ae4dbb5749a72035f16ea1e46
--- /dev/null
+++ b/IIR-Lab/final_test.py
@@ -0,0 +1,156 @@
+import os
+import time
+import argparse
+import torch
+import torch.backends.cudnn as cudnn
+from utils_ours.util import setup_logger, print_args
+from torch.utils.data import DataLoader
+from dataloader.dataset import imageSet
+from models.archs.NAF_arch import NAF_Video
+from torch.nn.parallel import DistributedDataParallel
+import numpy as np
+import torch.nn.functional as F
+from collections import OrderedDict
+import torch.nn as nn        
+from models.utils import chunkV3
+import pdb
+from ISP_pipeline import process_pngs_isp
+import os
+import json
+import cv2
+from skimage import io
+
+ISO = [50,125,320,640,800]
+a = [0.00025822882,0.000580020745,0.00141667975,0.00278965863,0.00347614807]
+b = [2.32350645e-06,3.1125155625e-06,8.328992952e-06,3.3315971808e-05,5.205620595e-05]
+
+#拟合
+coeff_a = np.polyfit(ISO,a,1)  
+coeff_b = np.polyfit(ISO,b,2)  
+
+def main():
+
+    parser = argparse.ArgumentParser(description='imageTest')
+
+    parser.add_argument('--frame', default=1, type=int)
+    parser.add_argument('--test_dir', default = "/data/", type=str)
+    parser.add_argument('--model_type', type=str, default='NAF_Video')
+    parser.add_argument('--save_folder', default='/data/', type=str)
+    parser.add_argument('--resume', default='', type=str)
+    parser.add_argument('--testoption', default='image', type=str)  
+    parser.add_argument('--chunk', action='store_true')                
+    parser.add_argument('--debug',  action='store_true')
+    
+    args = parser.parse_args()
+    args.src_save_folder = '/data/'
+
+    print(args.src_save_folder,'**********************')
+    if not os.path.exists(args.src_save_folder):
+        os.makedirs(args.src_save_folder)
+    print(args.src_save_folder)
+
+    low_iso_model = "denoise_model/low_iso.pth"
+    mid_iso_model = "denoise_model/mid_iso.pth"
+    high_mid_iso_model = "denoise_model/high_mid_iso.pth"
+    high_iso_model = "denoise_model/high_iso.pth"
+    
+    network = NAF_Video(args).cuda()
+
+    load_low_iso_net = torch.load(low_iso_model, map_location=torch.device('cuda'))
+    load_low_iso_net_clean = OrderedDict() 
+    for k, v in load_low_iso_net.items():  
+        if k.startswith('module.'):
+            load_low_iso_net_clean[k[7:]] = v
+        else:
+            load_low_iso_net_clean[k] = v
+
+    load_mid_iso_net = torch.load(mid_iso_model, map_location=torch.device('cpu'))
+    load_mid_iso_net_clean = OrderedDict()  
+    for k, v in load_mid_iso_net.items():  
+        if k.startswith('module.'):
+            load_mid_iso_net_clean[k[7:]] = v
+        else:
+            load_mid_iso_net_clean[k] = v
+
+    load_high_mid_iso_net = torch.load(high_mid_iso_model, map_location=torch.device('cpu'))
+    load_high_mid_iso_net_clean = OrderedDict()  
+    for k, v in load_high_mid_iso_net.items():  
+        if k.startswith('module.'):
+            load_high_mid_iso_net_clean[k[7:]] = v
+        else:
+            load_high_mid_iso_net_clean[k] = v
+
+    load_high_iso_net_clean = torch.load(high_iso_model, map_location=torch.device('cpu'))
+            
+    cudnn.benchmark = True
+
+    test_dataset = imageSet(args)
+    test_dataloader = DataLoader(test_dataset, batch_size=1, num_workers=0, shuffle=False)
+    inference_time = []
+    with torch.no_grad():
+
+        for data in test_dataloader:
+
+            noise = data['input'].cuda()
+            json_path = data['json_path'][0]
+            scene_name = os.path.splitext(os.path.basename(json_path))[0]
+            
+            # now let's process isp moudle
+            json_cfa = process_pngs_isp.readjson(json_path)
+            num_k = json_cfa['noise_profile']
+            iso = (num_k[0] - coeff_a[1])/coeff_a[0]
+    
+            if iso < 900:
+                network.load_state_dict(load_low_iso_net_clean, strict=True)
+                network.eval()
+            elif iso < 1800:
+                network.load_state_dict(load_mid_iso_net_clean, strict=True)
+                network.eval()
+            elif iso < 5600:
+                network.load_state_dict(load_high_mid_iso_net_clean, strict=True)
+                network.eval()
+            else:
+                network.load_state_dict(load_high_iso_net_clean, strict=True)
+                network.eval()
+            
+            t0 = time.perf_counter()
+
+            out = chunkV3(network, noise, args.testoption, patch_h=1024, patch_w=1024)
+            out = torch.clamp(out, 0., 1.)
+
+            # name_rgb = os.path.join(args.src_save_folder, scene_name + '_' + str(int(iso)) + '.jpg')
+            name_rgb = os.path.join(args.src_save_folder, scene_name + '.jpg')
+
+            if not os.path.exists(os.path.dirname(name_rgb)):
+                os.makedirs(os.path.dirname(name_rgb))
+
+            out = out[0]
+            del noise
+            torch.cuda.empty_cache()
+
+            img_pro = process_pngs_isp.isp_night_imaging(out, json_cfa, iso,
+                do_demosaic = True,  # H/2 W/2
+
+                do_channel_gain_white_balance = True,
+                do_xyz_transform = True,
+                do_srgb_transform = True,
+
+                do_gamma_correct = True,  # con
+
+                do_refinement = True,   # 32 bit
+                do_to_uint8 = True,
+
+                do_resize_using_pil = True,   # H/8, W/8
+                do_fix_orientation = True
+                )
+            
+            t1 = time.perf_counter()
+            inference_time.append(t1-t0)
+            img_pro = cv2.cvtColor(img_pro, cv2.COLOR_RGB2BGR)
+            cv2.imwrite(name_rgb, img_pro, [cv2.IMWRITE_PNG_COMPRESSION, 0])
+
+            print("Inference {} in {:.3f}s".format(scene_name, t1 - t0))
+        print(f"Average inference time: {np.mean(inference_time)} seconds")
+
+if __name__ == '__main__':
+    main()
\ No newline at end of file
diff --git a/IIR-Lab/json_all.npy b/IIR-Lab/json_all.npy
new file mode 100644
index 0000000000000000000000000000000000000000..f339460cc1f7300a922ea938107908841bd47550
--- /dev/null
+++ b/IIR-Lab/json_all.npy
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:33de236d4d9a7e2375fd4677847670201cd913c6d28c8838be9f1fc1edeedd01
+size 3328
diff --git a/IIR-Lab/models/__init__.py b/IIR-Lab/models/__init__.py
new file mode 100644
index 0000000000000000000000000000000000000000..8b137891791fe96927ad78e64b0aad7bded08bdc
--- /dev/null
+++ b/IIR-Lab/models/__init__.py
@@ -0,0 +1 @@
+
diff --git a/IIR-Lab/models/__pycache__/Ch_loss.cpython-310.pyc b/IIR-Lab/models/__pycache__/Ch_loss.cpython-310.pyc
new file mode 100644
index 0000000000000000000000000000000000000000..6a467555ee4dae1a3d36b826c70e0724aae9e96f
Binary files /dev/null and b/IIR-Lab/models/__pycache__/Ch_loss.cpython-310.pyc differ
diff --git a/IIR-Lab/models/__pycache__/Ch_loss.cpython-312.pyc b/IIR-Lab/models/__pycache__/Ch_loss.cpython-312.pyc
new file mode 100644
index 0000000000000000000000000000000000000000..d700e2c5dffb9082ea3cb01f8bea95df1254d11e
Binary files /dev/null and b/IIR-Lab/models/__pycache__/Ch_loss.cpython-312.pyc differ
diff --git a/IIR-Lab/models/__pycache__/Ch_loss.cpython-39.pyc b/IIR-Lab/models/__pycache__/Ch_loss.cpython-39.pyc
new file mode 100644
index 0000000000000000000000000000000000000000..cc50c91473f69893461cb0ce041a7d53c4163d86
Binary files /dev/null and b/IIR-Lab/models/__pycache__/Ch_loss.cpython-39.pyc differ
diff --git a/IIR-Lab/models/__pycache__/ISP.cpython-310.pyc b/IIR-Lab/models/__pycache__/ISP.cpython-310.pyc
new file mode 100644
index 0000000000000000000000000000000000000000..59bb7df458116aa49821ebfd70ab010c6e4023e1
Binary files /dev/null and b/IIR-Lab/models/__pycache__/ISP.cpython-310.pyc differ
diff --git a/IIR-Lab/models/__pycache__/ISP.cpython-312.pyc b/IIR-Lab/models/__pycache__/ISP.cpython-312.pyc
new file mode 100644
index 0000000000000000000000000000000000000000..af8f88041c3730007d807023fa779a5f98545360
Binary files /dev/null and b/IIR-Lab/models/__pycache__/ISP.cpython-312.pyc differ
diff --git a/IIR-Lab/models/__pycache__/ISP.cpython-39.pyc b/IIR-Lab/models/__pycache__/ISP.cpython-39.pyc
new file mode 100644
index 0000000000000000000000000000000000000000..0d657e87fd3c7c5aa0acc7ebee8e8b871200e7a5
Binary files /dev/null and b/IIR-Lab/models/__pycache__/ISP.cpython-39.pyc differ
diff --git a/IIR-Lab/models/__pycache__/VGG_loss.cpython-310.pyc b/IIR-Lab/models/__pycache__/VGG_loss.cpython-310.pyc
new file mode 100644
index 0000000000000000000000000000000000000000..e488640f478dcbb8a03d6be8e16612396ef02f1f
Binary files /dev/null and b/IIR-Lab/models/__pycache__/VGG_loss.cpython-310.pyc differ
diff --git a/IIR-Lab/models/__pycache__/VGG_loss.cpython-312.pyc b/IIR-Lab/models/__pycache__/VGG_loss.cpython-312.pyc
new file mode 100644
index 0000000000000000000000000000000000000000..e5df221b6cd0a30a2f3f693c4ef2967b579d8ef3
Binary files /dev/null and b/IIR-Lab/models/__pycache__/VGG_loss.cpython-312.pyc differ
diff --git a/IIR-Lab/models/__pycache__/VGG_loss.cpython-39.pyc b/IIR-Lab/models/__pycache__/VGG_loss.cpython-39.pyc
new file mode 100644
index 0000000000000000000000000000000000000000..14635998c73ff99ebd4e926a28221eb2315fe315
Binary files /dev/null and b/IIR-Lab/models/__pycache__/VGG_loss.cpython-39.pyc differ
diff --git a/IIR-Lab/models/__pycache__/__init__.cpython-310.pyc b/IIR-Lab/models/__pycache__/__init__.cpython-310.pyc
new file mode 100644
index 0000000000000000000000000000000000000000..5a36f60d4c5f61f4b35528363253d9e0029ea7fb
Binary files /dev/null and b/IIR-Lab/models/__pycache__/__init__.cpython-310.pyc differ
diff --git a/IIR-Lab/models/__pycache__/__init__.cpython-312.pyc b/IIR-Lab/models/__pycache__/__init__.cpython-312.pyc
new file mode 100644
index 0000000000000000000000000000000000000000..3286cd00b5083de052e221c85f326f2bb8e64bef
Binary files /dev/null and b/IIR-Lab/models/__pycache__/__init__.cpython-312.pyc differ
diff --git a/IIR-Lab/models/__pycache__/__init__.cpython-38.pyc b/IIR-Lab/models/__pycache__/__init__.cpython-38.pyc
new file mode 100644
index 0000000000000000000000000000000000000000..068d6a7ee1d09ae9451d092b77704e25a96691a6
Binary files /dev/null and b/IIR-Lab/models/__pycache__/__init__.cpython-38.pyc differ
diff --git a/IIR-Lab/models/__pycache__/__init__.cpython-39.pyc b/IIR-Lab/models/__pycache__/__init__.cpython-39.pyc
new file mode 100644
index 0000000000000000000000000000000000000000..a80e319a88f0fa6e2607650a22f36230b71153ed
Binary files /dev/null and b/IIR-Lab/models/__pycache__/__init__.cpython-39.pyc differ
diff --git a/IIR-Lab/models/__pycache__/arch_util.cpython-310.pyc b/IIR-Lab/models/__pycache__/arch_util.cpython-310.pyc
new file mode 100644
index 0000000000000000000000000000000000000000..d8b21179565215ac001f47d46a4bea2cddc75bca
Binary files /dev/null and b/IIR-Lab/models/__pycache__/arch_util.cpython-310.pyc differ
diff --git a/IIR-Lab/models/__pycache__/global_queue_buffer.cpython-310.pyc b/IIR-Lab/models/__pycache__/global_queue_buffer.cpython-310.pyc
new file mode 100644
index 0000000000000000000000000000000000000000..71d69d04b50c0c66477fdc7f316aaeb09e2dc6da
Binary files /dev/null and b/IIR-Lab/models/__pycache__/global_queue_buffer.cpython-310.pyc differ
diff --git a/IIR-Lab/models/__pycache__/loss_util.cpython-310.pyc b/IIR-Lab/models/__pycache__/loss_util.cpython-310.pyc
new file mode 100644
index 0000000000000000000000000000000000000000..5669a929bc0f7a9f903dcfaaddea03704196af70
Binary files /dev/null and b/IIR-Lab/models/__pycache__/loss_util.cpython-310.pyc differ
diff --git a/IIR-Lab/models/__pycache__/loss_util.cpython-38.pyc b/IIR-Lab/models/__pycache__/loss_util.cpython-38.pyc
new file mode 100644
index 0000000000000000000000000000000000000000..2be79af817f437f4d40f79a00c4decd0eadca90a
Binary files /dev/null and b/IIR-Lab/models/__pycache__/loss_util.cpython-38.pyc differ
diff --git a/IIR-Lab/models/__pycache__/losses.cpython-310.pyc b/IIR-Lab/models/__pycache__/losses.cpython-310.pyc
new file mode 100644
index 0000000000000000000000000000000000000000..9c865224fb9f4c2a13432239d3bdab063ca5dcdc
Binary files /dev/null and b/IIR-Lab/models/__pycache__/losses.cpython-310.pyc differ
diff --git a/IIR-Lab/models/__pycache__/losses.cpython-38.pyc b/IIR-Lab/models/__pycache__/losses.cpython-38.pyc
new file mode 100644
index 0000000000000000000000000000000000000000..64e136b619b1ed6d7b571c22a035da780ff06f6d
Binary files /dev/null and b/IIR-Lab/models/__pycache__/losses.cpython-38.pyc differ
diff --git a/IIR-Lab/models/__pycache__/lr_scheduler.cpython-310.pyc b/IIR-Lab/models/__pycache__/lr_scheduler.cpython-310.pyc
new file mode 100644
index 0000000000000000000000000000000000000000..cdaea6b4d8460de6d61f7ad397f5c2c435f67da4
Binary files /dev/null and b/IIR-Lab/models/__pycache__/lr_scheduler.cpython-310.pyc differ
diff --git a/IIR-Lab/models/__pycache__/lr_scheduler.cpython-38.pyc b/IIR-Lab/models/__pycache__/lr_scheduler.cpython-38.pyc
new file mode 100644
index 0000000000000000000000000000000000000000..aa212b07cd9c37c353f604d553a657d588953c3e
Binary files /dev/null and b/IIR-Lab/models/__pycache__/lr_scheduler.cpython-38.pyc differ
diff --git a/IIR-Lab/models/__pycache__/modules.cpython-310.pyc b/IIR-Lab/models/__pycache__/modules.cpython-310.pyc
new file mode 100644
index 0000000000000000000000000000000000000000..f2af8cd73162ab1c19c5b4f653d0bb98250fd106
Binary files /dev/null and b/IIR-Lab/models/__pycache__/modules.cpython-310.pyc differ
diff --git a/IIR-Lab/models/__pycache__/modules.cpython-38.pyc b/IIR-Lab/models/__pycache__/modules.cpython-38.pyc
new file mode 100644
index 0000000000000000000000000000000000000000..910f052ecb9c84d7480fa9f17810d437562606ef
Binary files /dev/null and b/IIR-Lab/models/__pycache__/modules.cpython-38.pyc differ
diff --git a/IIR-Lab/models/__pycache__/trainer.cpython-310.pyc b/IIR-Lab/models/__pycache__/trainer.cpython-310.pyc
new file mode 100644
index 0000000000000000000000000000000000000000..27225235a0334627c47e41ff0e0ce31002c84e27
Binary files /dev/null and b/IIR-Lab/models/__pycache__/trainer.cpython-310.pyc differ
diff --git a/IIR-Lab/models/__pycache__/trainer.cpython-312.pyc b/IIR-Lab/models/__pycache__/trainer.cpython-312.pyc
new file mode 100644
index 0000000000000000000000000000000000000000..6ad1a0f1e0fb060c1804a16b669d776bbab1b39b
Binary files /dev/null and b/IIR-Lab/models/__pycache__/trainer.cpython-312.pyc differ
diff --git a/IIR-Lab/models/__pycache__/trainer.cpython-38.pyc b/IIR-Lab/models/__pycache__/trainer.cpython-38.pyc
new file mode 100644
index 0000000000000000000000000000000000000000..b902be0c04e9a01227c98132343fadd31de5e473
Binary files /dev/null and b/IIR-Lab/models/__pycache__/trainer.cpython-38.pyc differ
diff --git a/IIR-Lab/models/__pycache__/trainer.cpython-39.pyc b/IIR-Lab/models/__pycache__/trainer.cpython-39.pyc
new file mode 100644
index 0000000000000000000000000000000000000000..68b185a39e05e9301053a5b44dcba2c794d647bc
Binary files /dev/null and b/IIR-Lab/models/__pycache__/trainer.cpython-39.pyc differ
diff --git a/IIR-Lab/models/__pycache__/trainer_utils.cpython-310.pyc b/IIR-Lab/models/__pycache__/trainer_utils.cpython-310.pyc
new file mode 100644
index 0000000000000000000000000000000000000000..63f2d3e7770a035acb044349ae48c23bc2b89775
Binary files /dev/null and b/IIR-Lab/models/__pycache__/trainer_utils.cpython-310.pyc differ
diff --git a/IIR-Lab/models/__pycache__/trainer_utils.cpython-312.pyc b/IIR-Lab/models/__pycache__/trainer_utils.cpython-312.pyc
new file mode 100644
index 0000000000000000000000000000000000000000..5f21b3a932b367a44eda562e45a5b84505e4e82a
Binary files /dev/null and b/IIR-Lab/models/__pycache__/trainer_utils.cpython-312.pyc differ
diff --git a/IIR-Lab/models/__pycache__/trainer_utils.cpython-39.pyc b/IIR-Lab/models/__pycache__/trainer_utils.cpython-39.pyc
new file mode 100644
index 0000000000000000000000000000000000000000..435503f10dcf207cc9e7ae8907ce9f145ff93529
Binary files /dev/null and b/IIR-Lab/models/__pycache__/trainer_utils.cpython-39.pyc differ
diff --git a/IIR-Lab/models/__pycache__/utils.cpython-310.pyc b/IIR-Lab/models/__pycache__/utils.cpython-310.pyc
new file mode 100644
index 0000000000000000000000000000000000000000..b7e4fa656b3a4d7ea403a499cb7774672d6843ea
Binary files /dev/null and b/IIR-Lab/models/__pycache__/utils.cpython-310.pyc differ
diff --git a/IIR-Lab/models/__pycache__/utils.cpython-312.pyc b/IIR-Lab/models/__pycache__/utils.cpython-312.pyc
new file mode 100644
index 0000000000000000000000000000000000000000..71fdae49b51e29183c84fede06fd2cdb29bd38f4
Binary files /dev/null and b/IIR-Lab/models/__pycache__/utils.cpython-312.pyc differ
diff --git a/IIR-Lab/models/__pycache__/utils.cpython-39.pyc b/IIR-Lab/models/__pycache__/utils.cpython-39.pyc
new file mode 100644
index 0000000000000000000000000000000000000000..469bc5dffed2b1931c0623d08b468df385e83a15
Binary files /dev/null and b/IIR-Lab/models/__pycache__/utils.cpython-39.pyc differ
diff --git a/IIR-Lab/models/__pycache__/validation_seq_infer.cpython-310.pyc b/IIR-Lab/models/__pycache__/validation_seq_infer.cpython-310.pyc
new file mode 100644
index 0000000000000000000000000000000000000000..2505b0759eb275162fb9e1635b9eae1abea0a50e
Binary files /dev/null and b/IIR-Lab/models/__pycache__/validation_seq_infer.cpython-310.pyc differ
diff --git a/IIR-Lab/models/__pycache__/vgg_model.cpython-310.pyc b/IIR-Lab/models/__pycache__/vgg_model.cpython-310.pyc
new file mode 100644
index 0000000000000000000000000000000000000000..40b0004d4841e6d2f098ded2b6cf9227135fd8c8
Binary files /dev/null and b/IIR-Lab/models/__pycache__/vgg_model.cpython-310.pyc differ
diff --git a/IIR-Lab/models/__pycache__/vgg_model.cpython-38.pyc b/IIR-Lab/models/__pycache__/vgg_model.cpython-38.pyc
new file mode 100644
index 0000000000000000000000000000000000000000..b08782e8905179b9bc90ab7ee9145439c8398efb
Binary files /dev/null and b/IIR-Lab/models/__pycache__/vgg_model.cpython-38.pyc differ
diff --git a/IIR-Lab/models/arch_util.py b/IIR-Lab/models/arch_util.py
new file mode 100644
index 0000000000000000000000000000000000000000..73b6c3f2c73459c6c7a10ae3a9ea3ccea899d045
--- /dev/null
+++ b/IIR-Lab/models/arch_util.py
@@ -0,0 +1,69 @@
+
+import torch
+import torchvision
+from distutils.version import LooseVersion
+from torch import nn as nn
+from torch.nn import init as init
+from .dcn import ModulatedDeformConvPack, modulated_deform_conv
+
+
+class DCNv2Pack(ModulatedDeformConvPack):
+    """Modulated deformable conv for deformable alignment.
+
+    Different from the official DCNv2Pack, which generates offsets and masks
+    from the preceding features, this DCNv2Pack takes another different
+    features to generate offsets and masks.
+
+    ``Paper: Delving Deep into Deformable Alignment in Video Super-Resolution``
+    """
+
+    def forward(self, x, feat):
+        out = self.conv_offset(feat)
+        o1, o2, mask = torch.chunk(out, 3, dim=1)
+        offset = torch.cat((o1, o2), dim=1)
+        mask = torch.sigmoid(mask)
+
+        offset_absmean = torch.mean(torch.abs(offset))
+        if offset_absmean > 250:
+            # logger = get_root_logger()
+            # logger.warning(f'Offset abs mean is {offset_absmean}, larger than 50.')
+            print(f'Offset abs mean is {offset_absmean}, larger than 50.')
+
+        if LooseVersion(torchvision.__version__) >= LooseVersion('0.9.0'):
+            return torchvision.ops.deform_conv2d(x, offset, self.weight, self.bias, self.stride, self.padding,
+                                                 self.dilation, mask)
+        else:
+            return modulated_deform_conv(x, offset, mask, self.weight, self.bias, self.stride, self.padding,
+                                         self.dilation, self.groups, self.deformable_groups)
+
+
+
+class FlowGuidedDCN(ModulatedDeformConvPack):
+    '''Use other features to generate offsets and masks'''
+
+
+    def forward(self, x, feat, flows):
+        '''input: input features for deformable conv: N, C, H, W.
+           fea: other features used for generating offsets and mask: N, C, H, W.
+           flows: N, 2, H, W.
+        '''
+        out = self.conv_offset(feat)
+        o1, o2, mask = torch.chunk(out, 3, dim=1)
+        mask = torch.sigmoid(mask)
+
+        offset = torch.tanh(torch.cat((o1, o2), dim=1)) * 15 # max_residue_magnitude
+        offset = offset + flows.flip(1).repeat(1, offset.size(1)//2, 1, 1)
+
+        offset_mean = torch.mean(torch.abs(offset))
+        if offset_mean > 250:
+            print('FlowGuidedDCN: Offset mean is {}, larger than 100.'.format(offset_mean))
+            # offset = offset.clamp(-50, 50)
+            # return None
+
+        
+        if LooseVersion(torchvision.__version__) >= LooseVersion('0.9.0'):
+            return torchvision.ops.deform_conv2d(x, offset, self.weight, self.bias, self.stride, self.padding,
+                                                 self.dilation, mask)
+        else:
+            return modulated_deform_conv(x, offset, mask, self.weight, self.bias, self.stride, self.padding,
+                                         self.dilation, self.groups, self.deformable_groups)
\ No newline at end of file
diff --git a/IIR-Lab/models/archs/NAF_Tiny_arch.py b/IIR-Lab/models/archs/NAF_Tiny_arch.py
new file mode 100644
index 0000000000000000000000000000000000000000..ddfebd2c10f20c5641cfd7a76d13843f42c9fc88
--- /dev/null
+++ b/IIR-Lab/models/archs/NAF_Tiny_arch.py
@@ -0,0 +1,227 @@
+import torch
+import torch.nn as nn
+# from basicsr.models.archs import recons_video81 as recons_video
+# from basicsr.models.archs import flow_pwc82 as flow_pwc
+import numpy as np
+from torch.nn import functional as F
+import torch.utils.checkpoint as checkpoint
+from torch.cuda.amp import autocast as autocast
+# from .StudentImage_arch import StudentImage
+from torch.nn.parallel import DistributedDataParallel
+from collections import OrderedDict
+from .arch_util import DCNv2Pack
+from .common import ResList
+
+class LayerNormFunction(torch.autograd.Function):
+
+    @staticmethod
+    def forward(ctx, x, weight, bias, eps):
+        ctx.eps = eps
+        N, C, H, W = x.size()
+        mu = x.mean(1, keepdim=True)
+        var = (x - mu).pow(2).mean(1, keepdim=True)
+        y = (x - mu) / (var + eps).sqrt()
+        ctx.save_for_backward(y, var, weight)
+        y = weight.view(1, C, 1, 1) * y + bias.view(1, C, 1, 1)
+        return y
+
+    @staticmethod
+    def backward(ctx, grad_output):
+        eps = ctx.eps
+
+        N, C, H, W = grad_output.size()
+        y, var, weight = ctx.saved_variables
+        g = grad_output * weight.view(1, C, 1, 1)
+        mean_g = g.mean(dim=1, keepdim=True)
+
+        mean_gy = (g * y).mean(dim=1, keepdim=True)
+        gx = 1. / torch.sqrt(var + eps) * (g - y * mean_gy - mean_g)
+        return gx, (grad_output * y).sum(dim=3).sum(dim=2).sum(dim=0), grad_output.sum(dim=3).sum(dim=2).sum(
+            dim=0), None
+
+
+class LayerNorm2d(nn.Module):
+
+    def __init__(self, channels, eps=1e-6):
+        super(LayerNorm2d, self).__init__()
+        self.register_parameter('weight', nn.Parameter(torch.ones(channels)))
+        self.register_parameter('bias', nn.Parameter(torch.zeros(channels)))
+        self.eps = eps
+
+    def forward(self, x):
+        return LayerNormFunction.apply(x, self.weight, self.bias, self.eps)
+
+class SimpleGate(nn.Module):
+    def forward(self, x):
+        x1, x2 = x.chunk(2, dim=1)
+        return x1 * x2
+
+class NAFBlock(nn.Module):
+    def __init__(self, c, DW_Expand=2, FFN_Expand=2, drop_out_rate=0.):
+        super().__init__()
+        dw_channel = c * DW_Expand
+        self.conv1 = nn.Conv2d(in_channels=c, out_channels=dw_channel, kernel_size=1, padding=0, stride=1, groups=1, bias=True)
+        self.conv2 = nn.Conv2d(in_channels=dw_channel, out_channels=dw_channel, kernel_size=3, padding=1, stride=1, groups=dw_channel,
+                               bias=True)
+        self.conv3 = nn.Conv2d(in_channels=dw_channel // 2, out_channels=c, kernel_size=1, padding=0, stride=1, groups=1, bias=True)
+        
+        # Simplified Channel Attention
+        self.sca = nn.Sequential(
+            nn.AdaptiveAvgPool2d(1),
+            nn.Conv2d(in_channels=dw_channel // 2, out_channels=dw_channel // 2, kernel_size=1, padding=0, stride=1,
+                      groups=1, bias=True),
+        )
+
+        # SimpleGate
+        self.sg = SimpleGate()
+
+        ffn_channel = FFN_Expand * c
+        self.conv4 = nn.Conv2d(in_channels=c, out_channels=ffn_channel, kernel_size=1, padding=0, stride=1, groups=1, bias=True)
+        self.conv5 = nn.Conv2d(in_channels=ffn_channel // 2, out_channels=c, kernel_size=1, padding=0, stride=1, groups=1, bias=True)
+
+        self.norm1 = LayerNorm2d(c)
+        self.norm2 = LayerNorm2d(c)
+
+        self.dropout1 = nn.Dropout(drop_out_rate) if drop_out_rate > 0. else nn.Identity()
+        self.dropout2 = nn.Dropout(drop_out_rate) if drop_out_rate > 0. else nn.Identity()
+
+        self.beta = nn.Parameter(torch.zeros((1, c, 1, 1)), requires_grad=True)
+        self.gamma = nn.Parameter(torch.zeros((1, c, 1, 1)), requires_grad=True)
+
+    def forward(self, inp):
+        x = inp
+
+        x = self.norm1(x)
+
+        x = self.conv1(x)
+        x = self.conv2(x)
+        x = self.sg(x)
+        x = x * self.sca(x)
+        x = self.conv3(x)
+
+        x = self.dropout1(x)
+
+        y = inp + x * self.beta
+
+        x = self.conv4(self.norm2(y))
+        x = self.sg(x)
+        x = self.conv5(x)
+
+        x = self.dropout2(x)
+
+        return y + x * self.gamma
+
+
+class NAF_Tiny(nn.Module):
+
+    def __init__(self,args, img_channel=4, width=64, middle_blk_num=4, enc_blk_nums=[2, 2, 2, 2], dec_blk_nums=[2, 2, 2, 2]):
+        super().__init__()
+
+
+        self.lrelu = nn.LeakyReLU(0.2)
+        self.convfist = nn.Conv2d(4, 64, 3, 1, 1)
+        # self.feature_extraction = ResList(5, 64)
+
+   
+        self.ending = nn.Conv2d(in_channels=width, out_channels=4, kernel_size=3, padding=1, stride=1, groups=1,
+                              bias=True)
+
+        self.encoders = nn.ModuleList()
+        self.decoders = nn.ModuleList()
+        self.middle_blks = nn.ModuleList()
+        self.ups = nn.ModuleList()
+        self.downs = nn.ModuleList()
+
+        chan = width
+        for num in enc_blk_nums:
+            self.encoders.append(
+                nn.Sequential(
+                    *[NAFBlock(chan) for _ in range(num)]
+                )
+            )
+            self.downs.append(
+                nn.Conv2d(chan, 2*chan, 2, 2)
+            )
+            chan = chan * 2
+
+        self.middle_blks = \
+            nn.Sequential(
+                *[NAFBlock(chan) for _ in range(middle_blk_num)]
+            )
+
+        for num in dec_blk_nums:
+            self.ups.append(
+                nn.Sequential(
+                    nn.Conv2d(chan, chan * 2, 1, bias=False),
+                    nn.PixelShuffle(2)
+                )
+            )
+            chan = chan // 2
+            self.decoders.append(
+                nn.Sequential(
+                    *[NAFBlock(chan) for _ in range(num)]
+                )
+            )
+
+        self.padder_size = 2 ** len(self.encoders)   # 16
+
+    def forward(self, x):
+        B,C,H,W = x.shape
+        x = self.check_image_size(x)
+        # B, FC, H, W = x.shape
+        # F = 1
+        # C = 4
+
+        # three = three.reshape(B, F, C, H, W)
+        center = x
+        # three = three.reshape(-1, C, H, W)
+
+        x = self.lrelu(self.convfist(x))
+        # x = self.feature_extraction(x)
+
+
+        encs = []
+
+        for encoder, down in zip(self.encoders, self.downs):
+            x = encoder(x)
+            encs.append(x)
+            x = down(x)
+
+        x = self.middle_blks(x)
+
+        for decoder, up, enc_skip in zip(self.decoders, self.ups, encs[::-1]):
+            x = up(x)
+            x = x + enc_skip
+            x = decoder(x)
+
+        x = self.ending(x)
+        
+        x = x + center
+
+        return x[:, :, :H, :W]
+
+    def check_image_size(self, x):
+        _, _, h, w = x.size()
+        mod_pad_h = (self.padder_size - h % self.padder_size) % self.padder_size
+        mod_pad_w = (self.padder_size - w % self.padder_size) % self.padder_size
+        x = F.pad(x, (0, mod_pad_w, 0, mod_pad_h))
+        return x
+    
+
+
+
+
+def load_networks(network, resume, strict=True):
+    load_path = resume
+
+    if isinstance(network, nn.DataParallel) or isinstance(network, DistributedDataParallel): #会带有.module
+        network = network.module     #可以当前net的把.module 去掉 
+    load_net = torch.load(load_path, map_location=torch.device('cuda'))
+    load_net_clean = OrderedDict()  # remove unnecessary 'module.'
+    for k, v in load_net.items():  #可以把加载的net的把.module 去掉 
+        if k.startswith('module.'):
+            load_net_clean[k[7:]] = v
+        else:
+            load_net_clean[k] = v
+
+    network.load_state_dict(load_net_clean, strict=True)
diff --git a/IIR-Lab/models/archs/NAF_arch.py b/IIR-Lab/models/archs/NAF_arch.py
new file mode 100644
index 0000000000000000000000000000000000000000..f8edd0a98e460af16d4c5f130c69fe4a9562d00b
--- /dev/null
+++ b/IIR-Lab/models/archs/NAF_arch.py
@@ -0,0 +1,209 @@
+import torch
+import torch.nn as nn
+# from basicsr.models.archs import recons_video81 as recons_video
+# from basicsr.models.archs import flow_pwc82 as flow_pwc
+import numpy as np
+from torch.nn import functional as F
+import torch.utils.checkpoint as checkpoint
+from torch.cuda.amp import autocast as autocast
+# from .StudentImage_arch import StudentImage
+from torch.nn.parallel import DistributedDataParallel
+from collections import OrderedDict
+from .arch_util import DCNv2Pack
+from .common import ResList
+
+class LayerNormFunction(torch.autograd.Function):
+
+    @staticmethod
+    def forward(ctx, x, weight, bias, eps):
+        ctx.eps = eps
+        N, C, H, W = x.size()
+        mu = x.mean(1, keepdim=True)
+        var = (x - mu).pow(2).mean(1, keepdim=True)
+        y = (x - mu) / (var + eps).sqrt()
+        ctx.save_for_backward(y, var, weight)
+        y = weight.view(1, C, 1, 1) * y + bias.view(1, C, 1, 1)
+        return y
+
+    @staticmethod
+    def backward(ctx, grad_output):
+        eps = ctx.eps
+
+        N, C, H, W = grad_output.size()
+        y, var, weight = ctx.saved_variables
+        g = grad_output * weight.view(1, C, 1, 1)
+        mean_g = g.mean(dim=1, keepdim=True)
+
+        mean_gy = (g * y).mean(dim=1, keepdim=True)
+        gx = 1. / torch.sqrt(var + eps) * (g - y * mean_gy - mean_g)
+        return gx, (grad_output * y).sum(dim=3).sum(dim=2).sum(dim=0), grad_output.sum(dim=3).sum(dim=2).sum(
+            dim=0), None
+
+
+class LayerNorm2d(nn.Module):
+
+    def __init__(self, channels, eps=1e-6):
+        super(LayerNorm2d, self).__init__()
+        self.register_parameter('weight', nn.Parameter(torch.ones(channels)))
+        self.register_parameter('bias', nn.Parameter(torch.zeros(channels)))
+        self.eps = eps
+
+    def forward(self, x):
+        return LayerNormFunction.apply(x, self.weight, self.bias, self.eps)
+
+class SimpleGate(nn.Module):
+    def forward(self, x):
+        x1, x2 = x.chunk(2, dim=1)
+        return x1 * x2
+
+class NAFBlock(nn.Module):
+    def __init__(self, c, DW_Expand=2, FFN_Expand=2, drop_out_rate=0.):
+        super().__init__()
+        dw_channel = c * DW_Expand
+        self.conv1 = nn.Conv2d(in_channels=c, out_channels=dw_channel, kernel_size=1, padding=0, stride=1, groups=1, bias=True)
+        self.conv2 = nn.Conv2d(in_channels=dw_channel, out_channels=dw_channel, kernel_size=3, padding=1, stride=1, groups=dw_channel,
+                               bias=True)
+        self.conv3 = nn.Conv2d(in_channels=dw_channel // 2, out_channels=c, kernel_size=1, padding=0, stride=1, groups=1, bias=True)
+        
+        # Simplified Channel Attention
+        self.sca = nn.Sequential(
+            nn.AdaptiveAvgPool2d(1),
+            nn.Conv2d(in_channels=dw_channel // 2, out_channels=dw_channel // 2, kernel_size=1, padding=0, stride=1,
+                      groups=1, bias=True),
+        )
+
+        # SimpleGate
+        self.sg = SimpleGate()
+
+        ffn_channel = FFN_Expand * c
+        self.conv4 = nn.Conv2d(in_channels=c, out_channels=ffn_channel, kernel_size=1, padding=0, stride=1, groups=1, bias=True)
+        self.conv5 = nn.Conv2d(in_channels=ffn_channel // 2, out_channels=c, kernel_size=1, padding=0, stride=1, groups=1, bias=True)
+
+        self.norm1 = LayerNorm2d(c)
+        self.norm2 = LayerNorm2d(c)
+
+        self.dropout1 = nn.Dropout(drop_out_rate) if drop_out_rate > 0. else nn.Identity()
+        self.dropout2 = nn.Dropout(drop_out_rate) if drop_out_rate > 0. else nn.Identity()
+
+        self.beta = nn.Parameter(torch.zeros((1, c, 1, 1)), requires_grad=True)
+        self.gamma = nn.Parameter(torch.zeros((1, c, 1, 1)), requires_grad=True)
+
+    def forward(self, inp):
+        x = inp
+
+        x = self.norm1(x)
+
+        x = self.conv1(x)
+        x = self.conv2(x)
+        x = self.sg(x)
+        x = x * self.sca(x)
+        x = self.conv3(x)
+
+        x = self.dropout1(x)
+
+        y = inp + x * self.beta
+
+        x = self.conv4(self.norm2(y))
+        x = self.sg(x)
+        x = self.conv5(x)
+
+        x = self.dropout2(x)
+
+        return y + x * self.gamma
+
+
+class NAF_Video(nn.Module):
+
+    def __init__(self,args, img_channel=4, width=64, middle_blk_num=12, enc_blk_nums=[2, 2, 4, 8], dec_blk_nums=[2, 2, 2, 2]):
+        super().__init__()
+
+        self.lrelu = nn.LeakyReLU(0.2)
+        self.convfist = nn.Conv2d(4, 64, 3, 1, 1)
+        self.feature_extraction = ResList(5, 64)
+
+        self.ending = nn.Conv2d(in_channels=width, out_channels=4, kernel_size=3, padding=1, stride=1, groups=1,
+                              bias=True)
+
+        self.encoders = nn.ModuleList()
+        self.decoders = nn.ModuleList()
+        self.middle_blks = nn.ModuleList()
+        self.ups = nn.ModuleList()
+        self.downs = nn.ModuleList()
+
+        chan = width
+        for num in enc_blk_nums:
+            self.encoders.append(
+                nn.Sequential(
+                    *[NAFBlock(chan) for _ in range(num)]
+                )
+            )
+            self.downs.append(
+                nn.Conv2d(chan, 2*chan, 2, 2)
+            )
+            chan = chan * 2
+
+        self.middle_blks = \
+            nn.Sequential(
+                *[NAFBlock(chan) for _ in range(middle_blk_num)]
+            )
+
+        for num in dec_blk_nums:
+            self.ups.append(
+                nn.Sequential(
+                    nn.Conv2d(chan, chan * 2, 1, bias=False),
+                    nn.PixelShuffle(2)
+                )
+            )
+            chan = chan // 2
+            self.decoders.append(
+                nn.Sequential(
+                    *[NAFBlock(chan) for _ in range(num)]
+                )
+            )
+
+        self.padder_size = 2 ** len(self.encoders)   # 16
+
+    def forward(self, x):
+       
+        center = x
+   
+        x = self.lrelu(self.convfist(x))
+        x = self.feature_extraction(x)
+
+        encs = []
+
+
+        for encoder, down in zip(self.encoders, self.downs):
+            x = encoder(x)
+            encs.append(x)
+            x = down(x)
+
+        x = self.middle_blks(x)
+
+          
+        for decoder, up, enc_skip in zip(self.decoders, self.ups, encs[::-1]):
+            x = up(x)
+            x = x + enc_skip
+            x = decoder(x)
+
+        x = self.ending(x)
+        
+        x = x + center
+
+        return x
+
+ 
+def load_networks(network, resume, strict=True):
+    load_path = resume
+
+    if isinstance(network, nn.DataParallel) or isinstance(network, DistributedDataParallel): #会带有.module
+        network = network.module     #可以当前net的把.module 去掉 
+    load_net = torch.load(load_path, map_location=torch.device('cuda'))
+    load_net_clean = OrderedDict()  # remove unnecessary 'module.'
+    for k, v in load_net.items():  #可以把加载的net的把.module 去掉 
+        if k.startswith('module.'):
+            load_net_clean[k[7:]] = v
+        else:
+            load_net_clean[k] = v
+
+    network.load_state_dict(load_net_clean, strict=True)
diff --git a/IIR-Lab/models/archs/__pycache__/BSVDTset_arch.cpython-310.pyc b/IIR-Lab/models/archs/__pycache__/BSVDTset_arch.cpython-310.pyc
new file mode 100644
index 0000000000000000000000000000000000000000..08aa4cac79f65d7d9ac637eb72d37da64447ebca
Binary files /dev/null and b/IIR-Lab/models/archs/__pycache__/BSVDTset_arch.cpython-310.pyc differ
diff --git a/IIR-Lab/models/archs/__pycache__/BSVD_arch.cpython-310.pyc b/IIR-Lab/models/archs/__pycache__/BSVD_arch.cpython-310.pyc
new file mode 100644
index 0000000000000000000000000000000000000000..0a13d7433791847246e242da1eefb30aad30725b
Binary files /dev/null and b/IIR-Lab/models/archs/__pycache__/BSVD_arch.cpython-310.pyc differ
diff --git a/IIR-Lab/models/archs/__pycache__/ConvTrans_Block.cpython-310.pyc b/IIR-Lab/models/archs/__pycache__/ConvTrans_Block.cpython-310.pyc
new file mode 100644
index 0000000000000000000000000000000000000000..26f5e5437f0dfdb63c8ce2d8976c793fabd49023
Binary files /dev/null and b/IIR-Lab/models/archs/__pycache__/ConvTrans_Block.cpython-310.pyc differ
diff --git a/IIR-Lab/models/archs/__pycache__/ConvTrans_Block.cpython-38.pyc b/IIR-Lab/models/archs/__pycache__/ConvTrans_Block.cpython-38.pyc
new file mode 100644
index 0000000000000000000000000000000000000000..9972f9ae7353b6896ac305a62f8e74582c9881cd
Binary files /dev/null and b/IIR-Lab/models/archs/__pycache__/ConvTrans_Block.cpython-38.pyc differ
diff --git a/IIR-Lab/models/archs/__pycache__/ConvTrans_Encoder.cpython-310.pyc b/IIR-Lab/models/archs/__pycache__/ConvTrans_Encoder.cpython-310.pyc
new file mode 100644
index 0000000000000000000000000000000000000000..29cdae5a784721e8be943a6bb3e51ecbee12b863
Binary files /dev/null and b/IIR-Lab/models/archs/__pycache__/ConvTrans_Encoder.cpython-310.pyc differ
diff --git a/IIR-Lab/models/archs/__pycache__/ConvTrans_Encoder.cpython-38.pyc b/IIR-Lab/models/archs/__pycache__/ConvTrans_Encoder.cpython-38.pyc
new file mode 100644
index 0000000000000000000000000000000000000000..3583b31571d924d35eae949af94bc49a83d63632
Binary files /dev/null and b/IIR-Lab/models/archs/__pycache__/ConvTrans_Encoder.cpython-38.pyc differ
diff --git a/IIR-Lab/models/archs/__pycache__/KFDLSR_arch.cpython-310.pyc b/IIR-Lab/models/archs/__pycache__/KFDLSR_arch.cpython-310.pyc
new file mode 100644
index 0000000000000000000000000000000000000000..fed2c21faf0331aba272f13449f9d8e348f0b1e0
Binary files /dev/null and b/IIR-Lab/models/archs/__pycache__/KFDLSR_arch.cpython-310.pyc differ
diff --git a/IIR-Lab/models/archs/__pycache__/MFD_3_64_arch.cpython-310.pyc b/IIR-Lab/models/archs/__pycache__/MFD_3_64_arch.cpython-310.pyc
new file mode 100644
index 0000000000000000000000000000000000000000..16920e3ddfff7f97208c7d362dd7c46cf19d4ab9
Binary files /dev/null and b/IIR-Lab/models/archs/__pycache__/MFD_3_64_arch.cpython-310.pyc differ
diff --git a/IIR-Lab/models/archs/__pycache__/MFD_3_64_arch.cpython-39.pyc b/IIR-Lab/models/archs/__pycache__/MFD_3_64_arch.cpython-39.pyc
new file mode 100644
index 0000000000000000000000000000000000000000..36e2d1fe4f330d30676a77c52cda7158e90dee07
Binary files /dev/null and b/IIR-Lab/models/archs/__pycache__/MFD_3_64_arch.cpython-39.pyc differ
diff --git a/IIR-Lab/models/archs/__pycache__/NAF_Tiny_arch.cpython-39.pyc b/IIR-Lab/models/archs/__pycache__/NAF_Tiny_arch.cpython-39.pyc
new file mode 100644
index 0000000000000000000000000000000000000000..130719b76344d4aea4f724dcaca03ece9281c89b
Binary files /dev/null and b/IIR-Lab/models/archs/__pycache__/NAF_Tiny_arch.cpython-39.pyc differ
diff --git a/IIR-Lab/models/archs/__pycache__/NAF_Video_arch.cpython-310.pyc b/IIR-Lab/models/archs/__pycache__/NAF_Video_arch.cpython-310.pyc
new file mode 100644
index 0000000000000000000000000000000000000000..6fd8ae047091935b26293a787d315a84756d319c
Binary files /dev/null and b/IIR-Lab/models/archs/__pycache__/NAF_Video_arch.cpython-310.pyc differ
diff --git a/IIR-Lab/models/archs/__pycache__/NAF_Video_arch.cpython-312.pyc b/IIR-Lab/models/archs/__pycache__/NAF_Video_arch.cpython-312.pyc
new file mode 100644
index 0000000000000000000000000000000000000000..2177257056626813a4df13eb11a5d2e0b40b83a8
Binary files /dev/null and b/IIR-Lab/models/archs/__pycache__/NAF_Video_arch.cpython-312.pyc differ
diff --git a/IIR-Lab/models/archs/__pycache__/NAF_Video_arch.cpython-39.pyc b/IIR-Lab/models/archs/__pycache__/NAF_Video_arch.cpython-39.pyc
new file mode 100644
index 0000000000000000000000000000000000000000..e6d92c66b05423a0f56e366fa5cc3beb716cd6e4
Binary files /dev/null and b/IIR-Lab/models/archs/__pycache__/NAF_Video_arch.cpython-39.pyc differ
diff --git a/IIR-Lab/models/archs/__pycache__/NAF_arch.cpython-312.pyc b/IIR-Lab/models/archs/__pycache__/NAF_arch.cpython-312.pyc
new file mode 100644
index 0000000000000000000000000000000000000000..4507054e4b006623cf51d3843d671a915dace2d0
Binary files /dev/null and b/IIR-Lab/models/archs/__pycache__/NAF_arch.cpython-312.pyc differ
diff --git a/IIR-Lab/models/archs/__pycache__/NAF_arch.cpython-39.pyc b/IIR-Lab/models/archs/__pycache__/NAF_arch.cpython-39.pyc
new file mode 100644
index 0000000000000000000000000000000000000000..fe6def88b3d2346abee3145a0472b04a405d1dc1
Binary files /dev/null and b/IIR-Lab/models/archs/__pycache__/NAF_arch.cpython-39.pyc differ
diff --git a/IIR-Lab/models/archs/__pycache__/RCAN.cpython-310.pyc b/IIR-Lab/models/archs/__pycache__/RCAN.cpython-310.pyc
new file mode 100644
index 0000000000000000000000000000000000000000..e4b8458d9a88f6fc802aef029daad638813f3435
Binary files /dev/null and b/IIR-Lab/models/archs/__pycache__/RCAN.cpython-310.pyc differ
diff --git a/IIR-Lab/models/archs/__pycache__/RCAN.cpython-38.pyc b/IIR-Lab/models/archs/__pycache__/RCAN.cpython-38.pyc
new file mode 100644
index 0000000000000000000000000000000000000000..0996b97c9ed07b3771f78ac0a788b30323580f8e
Binary files /dev/null and b/IIR-Lab/models/archs/__pycache__/RCAN.cpython-38.pyc differ
diff --git a/IIR-Lab/models/archs/__pycache__/RCAN_arch.cpython-310.pyc b/IIR-Lab/models/archs/__pycache__/RCAN_arch.cpython-310.pyc
new file mode 100644
index 0000000000000000000000000000000000000000..a22a5012bda0009424760f4cbfe7f79efceecc7b
Binary files /dev/null and b/IIR-Lab/models/archs/__pycache__/RCAN_arch.cpython-310.pyc differ
diff --git a/IIR-Lab/models/archs/__pycache__/RES_CA.cpython-310.pyc b/IIR-Lab/models/archs/__pycache__/RES_CA.cpython-310.pyc
new file mode 100644
index 0000000000000000000000000000000000000000..b9f3b9e65f2860b17e3abb704f66d586e5c45360
Binary files /dev/null and b/IIR-Lab/models/archs/__pycache__/RES_CA.cpython-310.pyc differ
diff --git a/IIR-Lab/models/archs/__pycache__/RES_CA.cpython-38.pyc b/IIR-Lab/models/archs/__pycache__/RES_CA.cpython-38.pyc
new file mode 100644
index 0000000000000000000000000000000000000000..69137931fa7d24a6aa9e1c34507c33390df4afcc
Binary files /dev/null and b/IIR-Lab/models/archs/__pycache__/RES_CA.cpython-38.pyc differ
diff --git a/IIR-Lab/models/archs/__pycache__/SelfDZSR_util.cpython-310.pyc b/IIR-Lab/models/archs/__pycache__/SelfDZSR_util.cpython-310.pyc
new file mode 100644
index 0000000000000000000000000000000000000000..7f1d16074c22f5464511899a829a765dc5fa33c6
Binary files /dev/null and b/IIR-Lab/models/archs/__pycache__/SelfDZSR_util.cpython-310.pyc differ
diff --git a/IIR-Lab/models/archs/__pycache__/SelfDZSR_util.cpython-38.pyc b/IIR-Lab/models/archs/__pycache__/SelfDZSR_util.cpython-38.pyc
new file mode 100644
index 0000000000000000000000000000000000000000..6a55108dd4ce020dc6b9172cb7db3583481698a7
Binary files /dev/null and b/IIR-Lab/models/archs/__pycache__/SelfDZSR_util.cpython-38.pyc differ
diff --git a/IIR-Lab/models/archs/__pycache__/StudentHalf_arch.cpython-310.pyc b/IIR-Lab/models/archs/__pycache__/StudentHalf_arch.cpython-310.pyc
new file mode 100644
index 0000000000000000000000000000000000000000..ed50805dbb3f8747f9b8be2bdb86b80c6732ca20
Binary files /dev/null and b/IIR-Lab/models/archs/__pycache__/StudentHalf_arch.cpython-310.pyc differ
diff --git a/IIR-Lab/models/archs/__pycache__/StudentImage_arch.cpython-310.pyc b/IIR-Lab/models/archs/__pycache__/StudentImage_arch.cpython-310.pyc
new file mode 100644
index 0000000000000000000000000000000000000000..c416079258ea2250f6884c1122ae467ed9f1b07f
Binary files /dev/null and b/IIR-Lab/models/archs/__pycache__/StudentImage_arch.cpython-310.pyc differ
diff --git a/IIR-Lab/models/archs/__pycache__/StudentImage_arch.cpython-39.pyc b/IIR-Lab/models/archs/__pycache__/StudentImage_arch.cpython-39.pyc
new file mode 100644
index 0000000000000000000000000000000000000000..d28f2931fd1581e94d79c1f5095b27cb3bd9e3fa
Binary files /dev/null and b/IIR-Lab/models/archs/__pycache__/StudentImage_arch.cpython-39.pyc differ
diff --git a/IIR-Lab/models/archs/__pycache__/StudentTest_arch.cpython-310.pyc b/IIR-Lab/models/archs/__pycache__/StudentTest_arch.cpython-310.pyc
new file mode 100644
index 0000000000000000000000000000000000000000..9be50db63e2f5ac5c07cfddaed23ea01d5a34acc
Binary files /dev/null and b/IIR-Lab/models/archs/__pycache__/StudentTest_arch.cpython-310.pyc differ
diff --git a/IIR-Lab/models/archs/__pycache__/Student_arch.cpython-310.pyc b/IIR-Lab/models/archs/__pycache__/Student_arch.cpython-310.pyc
new file mode 100644
index 0000000000000000000000000000000000000000..c3750258cf10e319712e2ac7db3c6fa6c102c2a5
Binary files /dev/null and b/IIR-Lab/models/archs/__pycache__/Student_arch.cpython-310.pyc differ
diff --git a/IIR-Lab/models/archs/__pycache__/Student_arch.cpython-39.pyc b/IIR-Lab/models/archs/__pycache__/Student_arch.cpython-39.pyc
new file mode 100644
index 0000000000000000000000000000000000000000..6b6f95bde963f4aec2ab45fa401d8000f6b5a18c
Binary files /dev/null and b/IIR-Lab/models/archs/__pycache__/Student_arch.cpython-39.pyc differ
diff --git a/IIR-Lab/models/archs/__pycache__/UNet_NAF_arch.cpython-310.pyc b/IIR-Lab/models/archs/__pycache__/UNet_NAF_arch.cpython-310.pyc
new file mode 100644
index 0000000000000000000000000000000000000000..0c996abebab142f9267604a468557e102f0f30f0
Binary files /dev/null and b/IIR-Lab/models/archs/__pycache__/UNet_NAF_arch.cpython-310.pyc differ
diff --git a/IIR-Lab/models/archs/__pycache__/UNet_NAF_arch.cpython-312.pyc b/IIR-Lab/models/archs/__pycache__/UNet_NAF_arch.cpython-312.pyc
new file mode 100644
index 0000000000000000000000000000000000000000..8a36f7f0208aaed5755df1a780ce6398f5404fd3
Binary files /dev/null and b/IIR-Lab/models/archs/__pycache__/UNet_NAF_arch.cpython-312.pyc differ
diff --git a/IIR-Lab/models/archs/__pycache__/UNet_S_arch.cpython-310.pyc b/IIR-Lab/models/archs/__pycache__/UNet_S_arch.cpython-310.pyc
new file mode 100644
index 0000000000000000000000000000000000000000..896cd1157d8f9f7024cf6b31f2c312c2b446d1ce
Binary files /dev/null and b/IIR-Lab/models/archs/__pycache__/UNet_S_arch.cpython-310.pyc differ
diff --git a/IIR-Lab/models/archs/__pycache__/UNet_ZXS4_arch.cpython-310.pyc b/IIR-Lab/models/archs/__pycache__/UNet_ZXS4_arch.cpython-310.pyc
new file mode 100644
index 0000000000000000000000000000000000000000..7083a1d9515895052f6538fea2a1732160448912
Binary files /dev/null and b/IIR-Lab/models/archs/__pycache__/UNet_ZXS4_arch.cpython-310.pyc differ
diff --git a/IIR-Lab/models/archs/__pycache__/UNet_ZXS4_arch.cpython-312.pyc b/IIR-Lab/models/archs/__pycache__/UNet_ZXS4_arch.cpython-312.pyc
new file mode 100644
index 0000000000000000000000000000000000000000..b60590f512d3b2a5441a406d3f98a2a74e80bf94
Binary files /dev/null and b/IIR-Lab/models/archs/__pycache__/UNet_ZXS4_arch.cpython-312.pyc differ
diff --git a/IIR-Lab/models/archs/__pycache__/UNet_ZXS4_arch.cpython-39.pyc b/IIR-Lab/models/archs/__pycache__/UNet_ZXS4_arch.cpython-39.pyc
new file mode 100644
index 0000000000000000000000000000000000000000..f01935b119e24fde172fbe86f78d201d8b076121
Binary files /dev/null and b/IIR-Lab/models/archs/__pycache__/UNet_ZXS4_arch.cpython-39.pyc differ
diff --git a/IIR-Lab/models/archs/__pycache__/UNet_ZXS5_arch.cpython-39.pyc b/IIR-Lab/models/archs/__pycache__/UNet_ZXS5_arch.cpython-39.pyc
new file mode 100644
index 0000000000000000000000000000000000000000..c7d413d744af9d829cbc75a5bff1f33b2f6de93c
Binary files /dev/null and b/IIR-Lab/models/archs/__pycache__/UNet_ZXS5_arch.cpython-39.pyc differ
diff --git a/IIR-Lab/models/archs/__pycache__/UNet_baseS1_arch.cpython-310.pyc b/IIR-Lab/models/archs/__pycache__/UNet_baseS1_arch.cpython-310.pyc
new file mode 100644
index 0000000000000000000000000000000000000000..fdb3b09b0515770147eda24f2e72eb4ee4c1eca2
Binary files /dev/null and b/IIR-Lab/models/archs/__pycache__/UNet_baseS1_arch.cpython-310.pyc differ
diff --git a/IIR-Lab/models/archs/__pycache__/UNet_baseS2_arch.cpython-310.pyc b/IIR-Lab/models/archs/__pycache__/UNet_baseS2_arch.cpython-310.pyc
new file mode 100644
index 0000000000000000000000000000000000000000..a4e6f77572bb95dfff4602ba320a6671a74505b7
Binary files /dev/null and b/IIR-Lab/models/archs/__pycache__/UNet_baseS2_arch.cpython-310.pyc differ
diff --git a/IIR-Lab/models/archs/__pycache__/UNet_base_arch.cpython-310.pyc b/IIR-Lab/models/archs/__pycache__/UNet_base_arch.cpython-310.pyc
new file mode 100644
index 0000000000000000000000000000000000000000..0c8079f65b0480462b36b709ddfc93ef2d1bf6fe
Binary files /dev/null and b/IIR-Lab/models/archs/__pycache__/UNet_base_arch.cpython-310.pyc differ
diff --git a/IIR-Lab/models/archs/__pycache__/Unet_M_F_arch.cpython-310.pyc b/IIR-Lab/models/archs/__pycache__/Unet_M_F_arch.cpython-310.pyc
new file mode 100644
index 0000000000000000000000000000000000000000..2203a09fc64638104fe54179ec56dc84e2f36b85
Binary files /dev/null and b/IIR-Lab/models/archs/__pycache__/Unet_M_F_arch.cpython-310.pyc differ
diff --git a/IIR-Lab/models/archs/__pycache__/Unet_M_F_arch.cpython-312.pyc b/IIR-Lab/models/archs/__pycache__/Unet_M_F_arch.cpython-312.pyc
new file mode 100644
index 0000000000000000000000000000000000000000..520a8dccc896c296b73258d7b92416f2e4286b9d
Binary files /dev/null and b/IIR-Lab/models/archs/__pycache__/Unet_M_F_arch.cpython-312.pyc differ
diff --git a/IIR-Lab/models/archs/__pycache__/Unet_M_F_arch.cpython-39.pyc b/IIR-Lab/models/archs/__pycache__/Unet_M_F_arch.cpython-39.pyc
new file mode 100644
index 0000000000000000000000000000000000000000..bf8f4a1ad57c6f0c3678b39ab8dd12ae0c847f15
Binary files /dev/null and b/IIR-Lab/models/archs/__pycache__/Unet_M_F_arch.cpython-39.pyc differ
diff --git a/IIR-Lab/models/archs/__pycache__/Unetseeindark.cpython-39.pyc b/IIR-Lab/models/archs/__pycache__/Unetseeindark.cpython-39.pyc
new file mode 100644
index 0000000000000000000000000000000000000000..784d4362183c88d65fd6eca16f138ebba645f86b
Binary files /dev/null and b/IIR-Lab/models/archs/__pycache__/Unetseeindark.cpython-39.pyc differ
diff --git a/IIR-Lab/models/archs/__pycache__/V10_arch.cpython-310.pyc b/IIR-Lab/models/archs/__pycache__/V10_arch.cpython-310.pyc
new file mode 100644
index 0000000000000000000000000000000000000000..43f03129690c7d4baf5d9031e94499d02f2bb486
Binary files /dev/null and b/IIR-Lab/models/archs/__pycache__/V10_arch.cpython-310.pyc differ
diff --git a/IIR-Lab/models/archs/__pycache__/V10_arch.cpython-38.pyc b/IIR-Lab/models/archs/__pycache__/V10_arch.cpython-38.pyc
new file mode 100644
index 0000000000000000000000000000000000000000..f1eb5feef31a6f50b69449c2ead7c996d47a760c
Binary files /dev/null and b/IIR-Lab/models/archs/__pycache__/V10_arch.cpython-38.pyc differ
diff --git a/IIR-Lab/models/archs/__pycache__/V11_unaligned_arch.cpython-310.pyc b/IIR-Lab/models/archs/__pycache__/V11_unaligned_arch.cpython-310.pyc
new file mode 100644
index 0000000000000000000000000000000000000000..a03f27343d50c7fda6479683e7f626bb8351328c
Binary files /dev/null and b/IIR-Lab/models/archs/__pycache__/V11_unaligned_arch.cpython-310.pyc differ
diff --git a/IIR-Lab/models/archs/__pycache__/V11_unaligned_arch.cpython-38.pyc b/IIR-Lab/models/archs/__pycache__/V11_unaligned_arch.cpython-38.pyc
new file mode 100644
index 0000000000000000000000000000000000000000..6a4e7278c4f54385eb6b8b35391fb520533ecee2
Binary files /dev/null and b/IIR-Lab/models/archs/__pycache__/V11_unaligned_arch.cpython-38.pyc differ
diff --git a/IIR-Lab/models/archs/__pycache__/V12_unaligned_arch.cpython-310.pyc b/IIR-Lab/models/archs/__pycache__/V12_unaligned_arch.cpython-310.pyc
new file mode 100644
index 0000000000000000000000000000000000000000..7ba0a843290c44d75e822f8f2d8b9b33517a232c
Binary files /dev/null and b/IIR-Lab/models/archs/__pycache__/V12_unaligned_arch.cpython-310.pyc differ
diff --git a/IIR-Lab/models/archs/__pycache__/V1_arch.cpython-310.pyc b/IIR-Lab/models/archs/__pycache__/V1_arch.cpython-310.pyc
new file mode 100644
index 0000000000000000000000000000000000000000..4de49781cd63f8b669ae5ae78792a73d72787cc9
Binary files /dev/null and b/IIR-Lab/models/archs/__pycache__/V1_arch.cpython-310.pyc differ
diff --git a/IIR-Lab/models/archs/__pycache__/V21_arch.cpython-310.pyc b/IIR-Lab/models/archs/__pycache__/V21_arch.cpython-310.pyc
new file mode 100644
index 0000000000000000000000000000000000000000..fd218e945fc553dae4e1f729f733325cfb83c544
Binary files /dev/null and b/IIR-Lab/models/archs/__pycache__/V21_arch.cpython-310.pyc differ
diff --git a/IIR-Lab/models/archs/__pycache__/V21_arch.cpython-38.pyc b/IIR-Lab/models/archs/__pycache__/V21_arch.cpython-38.pyc
new file mode 100644
index 0000000000000000000000000000000000000000..4b23580175242a3b7deea00bfbc4bd66943a2fe5
Binary files /dev/null and b/IIR-Lab/models/archs/__pycache__/V21_arch.cpython-38.pyc differ
diff --git a/IIR-Lab/models/archs/__pycache__/V22_arch.cpython-310.pyc b/IIR-Lab/models/archs/__pycache__/V22_arch.cpython-310.pyc
new file mode 100644
index 0000000000000000000000000000000000000000..9557f96a2525fc76a1a2fd383dd156f350f8dc49
Binary files /dev/null and b/IIR-Lab/models/archs/__pycache__/V22_arch.cpython-310.pyc differ
diff --git a/IIR-Lab/models/archs/__pycache__/V23_arch.cpython-310.pyc b/IIR-Lab/models/archs/__pycache__/V23_arch.cpython-310.pyc
new file mode 100644
index 0000000000000000000000000000000000000000..71834b0f08148a4da06e2c6057aacfcd3297900e
Binary files /dev/null and b/IIR-Lab/models/archs/__pycache__/V23_arch.cpython-310.pyc differ
diff --git a/IIR-Lab/models/archs/__pycache__/V24_arch.cpython-310.pyc b/IIR-Lab/models/archs/__pycache__/V24_arch.cpython-310.pyc
new file mode 100644
index 0000000000000000000000000000000000000000..16abe6d0ea085801af2a2e72627ee072fca7cec5
Binary files /dev/null and b/IIR-Lab/models/archs/__pycache__/V24_arch.cpython-310.pyc differ
diff --git a/IIR-Lab/models/archs/__pycache__/V24_arch.cpython-38.pyc b/IIR-Lab/models/archs/__pycache__/V24_arch.cpython-38.pyc
new file mode 100644
index 0000000000000000000000000000000000000000..70dc983f748c59572c7953aba01db0c5b5c4b057
Binary files /dev/null and b/IIR-Lab/models/archs/__pycache__/V24_arch.cpython-38.pyc differ
diff --git a/IIR-Lab/models/archs/__pycache__/V25L_arch.cpython-310.pyc b/IIR-Lab/models/archs/__pycache__/V25L_arch.cpython-310.pyc
new file mode 100644
index 0000000000000000000000000000000000000000..b6b3323a9eea25b6631bfd0ccc4df2acb67a0712
Binary files /dev/null and b/IIR-Lab/models/archs/__pycache__/V25L_arch.cpython-310.pyc differ
diff --git a/IIR-Lab/models/archs/__pycache__/V25L_arch.cpython-38.pyc b/IIR-Lab/models/archs/__pycache__/V25L_arch.cpython-38.pyc
new file mode 100644
index 0000000000000000000000000000000000000000..fb363cf1ca49023838f25b44201a5d13cd0abc12
Binary files /dev/null and b/IIR-Lab/models/archs/__pycache__/V25L_arch.cpython-38.pyc differ
diff --git a/IIR-Lab/models/archs/__pycache__/V25_arch.cpython-310.pyc b/IIR-Lab/models/archs/__pycache__/V25_arch.cpython-310.pyc
new file mode 100644
index 0000000000000000000000000000000000000000..894aac2ce226ce03b14f14c5f7dfe2c09e4c21a2
Binary files /dev/null and b/IIR-Lab/models/archs/__pycache__/V25_arch.cpython-310.pyc differ
diff --git a/IIR-Lab/models/archs/__pycache__/V25_arch.cpython-38.pyc b/IIR-Lab/models/archs/__pycache__/V25_arch.cpython-38.pyc
new file mode 100644
index 0000000000000000000000000000000000000000..f0c5330d8d8767624db77d76e6a6e24fb1b16259
Binary files /dev/null and b/IIR-Lab/models/archs/__pycache__/V25_arch.cpython-38.pyc differ
diff --git a/IIR-Lab/models/archs/__pycache__/V26_arch.cpython-310.pyc b/IIR-Lab/models/archs/__pycache__/V26_arch.cpython-310.pyc
new file mode 100644
index 0000000000000000000000000000000000000000..74d5c2901f19b8fcc18c7b89484bac5f6beece4b
Binary files /dev/null and b/IIR-Lab/models/archs/__pycache__/V26_arch.cpython-310.pyc differ
diff --git a/IIR-Lab/models/archs/__pycache__/V26_arch.cpython-38.pyc b/IIR-Lab/models/archs/__pycache__/V26_arch.cpython-38.pyc
new file mode 100644
index 0000000000000000000000000000000000000000..12451c8dddb6e0797fdf7f2f3bd055cc767b9c20
Binary files /dev/null and b/IIR-Lab/models/archs/__pycache__/V26_arch.cpython-38.pyc differ
diff --git a/IIR-Lab/models/archs/__pycache__/V27_arch.cpython-310.pyc b/IIR-Lab/models/archs/__pycache__/V27_arch.cpython-310.pyc
new file mode 100644
index 0000000000000000000000000000000000000000..ed2412c7ce5172023e87ac7392e182482f3cad37
Binary files /dev/null and b/IIR-Lab/models/archs/__pycache__/V27_arch.cpython-310.pyc differ
diff --git a/IIR-Lab/models/archs/__pycache__/V29_arch.cpython-310.pyc b/IIR-Lab/models/archs/__pycache__/V29_arch.cpython-310.pyc
new file mode 100644
index 0000000000000000000000000000000000000000..c80fcbddbce573a94ddca7b22649616d64f264a5
Binary files /dev/null and b/IIR-Lab/models/archs/__pycache__/V29_arch.cpython-310.pyc differ
diff --git a/IIR-Lab/models/archs/__pycache__/V29_arch.cpython-38.pyc b/IIR-Lab/models/archs/__pycache__/V29_arch.cpython-38.pyc
new file mode 100644
index 0000000000000000000000000000000000000000..1c549505cfdd51176ee7001138e28f81d63d43db
Binary files /dev/null and b/IIR-Lab/models/archs/__pycache__/V29_arch.cpython-38.pyc differ
diff --git a/IIR-Lab/models/archs/__pycache__/V2_arch.cpython-310.pyc b/IIR-Lab/models/archs/__pycache__/V2_arch.cpython-310.pyc
new file mode 100644
index 0000000000000000000000000000000000000000..d3bebbacb1ee9270927623618d7ad3016137ace1
Binary files /dev/null and b/IIR-Lab/models/archs/__pycache__/V2_arch.cpython-310.pyc differ
diff --git a/IIR-Lab/models/archs/__pycache__/V30CF_arch.cpython-310.pyc b/IIR-Lab/models/archs/__pycache__/V30CF_arch.cpython-310.pyc
new file mode 100644
index 0000000000000000000000000000000000000000..adc0cd81bfcc140035012e98016093cf2cff84b4
Binary files /dev/null and b/IIR-Lab/models/archs/__pycache__/V30CF_arch.cpython-310.pyc differ
diff --git a/IIR-Lab/models/archs/__pycache__/V30_arch.cpython-310.pyc b/IIR-Lab/models/archs/__pycache__/V30_arch.cpython-310.pyc
new file mode 100644
index 0000000000000000000000000000000000000000..4e6dea9c83457eb871c3fae5cc01818c4c5f256a
Binary files /dev/null and b/IIR-Lab/models/archs/__pycache__/V30_arch.cpython-310.pyc differ
diff --git a/IIR-Lab/models/archs/__pycache__/V30_arch.cpython-38.pyc b/IIR-Lab/models/archs/__pycache__/V30_arch.cpython-38.pyc
new file mode 100644
index 0000000000000000000000000000000000000000..acaa502c2df96d2843417313f25f5f9897c7f587
Binary files /dev/null and b/IIR-Lab/models/archs/__pycache__/V30_arch.cpython-38.pyc differ
diff --git a/IIR-Lab/models/archs/__pycache__/V31D111_arch.cpython-310.pyc b/IIR-Lab/models/archs/__pycache__/V31D111_arch.cpython-310.pyc
new file mode 100644
index 0000000000000000000000000000000000000000..08ebfbe2ed7471f2f3b7dd1647ae746a462f31a3
Binary files /dev/null and b/IIR-Lab/models/archs/__pycache__/V31D111_arch.cpython-310.pyc differ
diff --git a/IIR-Lab/models/archs/__pycache__/V31D111_arch.cpython-38.pyc b/IIR-Lab/models/archs/__pycache__/V31D111_arch.cpython-38.pyc
new file mode 100644
index 0000000000000000000000000000000000000000..5d9d1133ed519dbe3b08268402e6094b958999df
Binary files /dev/null and b/IIR-Lab/models/archs/__pycache__/V31D111_arch.cpython-38.pyc differ
diff --git a/IIR-Lab/models/archs/__pycache__/V31DD_arch.cpython-310.pyc b/IIR-Lab/models/archs/__pycache__/V31DD_arch.cpython-310.pyc
new file mode 100644
index 0000000000000000000000000000000000000000..9f1b2a528ffe42103017c6def9a796301e9b7bef
Binary files /dev/null and b/IIR-Lab/models/archs/__pycache__/V31DD_arch.cpython-310.pyc differ
diff --git a/IIR-Lab/models/archs/__pycache__/V31DD_arch.cpython-38.pyc b/IIR-Lab/models/archs/__pycache__/V31DD_arch.cpython-38.pyc
new file mode 100644
index 0000000000000000000000000000000000000000..5968b39f4cc4e74bd72391065e2e6a0f1d85ebca
Binary files /dev/null and b/IIR-Lab/models/archs/__pycache__/V31DD_arch.cpython-38.pyc differ
diff --git a/IIR-Lab/models/archs/__pycache__/V31DL_arch.cpython-310.pyc b/IIR-Lab/models/archs/__pycache__/V31DL_arch.cpython-310.pyc
new file mode 100644
index 0000000000000000000000000000000000000000..4f8494cb7658a94738e5fda7265551dcf214553d
Binary files /dev/null and b/IIR-Lab/models/archs/__pycache__/V31DL_arch.cpython-310.pyc differ
diff --git a/IIR-Lab/models/archs/__pycache__/V31DL_arch.cpython-38.pyc b/IIR-Lab/models/archs/__pycache__/V31DL_arch.cpython-38.pyc
new file mode 100644
index 0000000000000000000000000000000000000000..9400d23082b87d85634a55893c0a8d685de0de39
Binary files /dev/null and b/IIR-Lab/models/archs/__pycache__/V31DL_arch.cpython-38.pyc differ
diff --git a/IIR-Lab/models/archs/__pycache__/V31DS1_arch.cpython-310.pyc b/IIR-Lab/models/archs/__pycache__/V31DS1_arch.cpython-310.pyc
new file mode 100644
index 0000000000000000000000000000000000000000..4977a77cee0e76c2968738e660260868e29b8860
Binary files /dev/null and b/IIR-Lab/models/archs/__pycache__/V31DS1_arch.cpython-310.pyc differ
diff --git a/IIR-Lab/models/archs/__pycache__/V31DS28_arch.cpython-310.pyc b/IIR-Lab/models/archs/__pycache__/V31DS28_arch.cpython-310.pyc
new file mode 100644
index 0000000000000000000000000000000000000000..64975ccf1f1f6302324132ca2864c256111d7847
Binary files /dev/null and b/IIR-Lab/models/archs/__pycache__/V31DS28_arch.cpython-310.pyc differ
diff --git a/IIR-Lab/models/archs/__pycache__/V31DS2_arch.cpython-310.pyc b/IIR-Lab/models/archs/__pycache__/V31DS2_arch.cpython-310.pyc
new file mode 100644
index 0000000000000000000000000000000000000000..464fe372bc2d44ff0860fa1146114bf3f3c92294
Binary files /dev/null and b/IIR-Lab/models/archs/__pycache__/V31DS2_arch.cpython-310.pyc differ
diff --git a/IIR-Lab/models/archs/__pycache__/V31DS3_arch.cpython-310.pyc b/IIR-Lab/models/archs/__pycache__/V31DS3_arch.cpython-310.pyc
new file mode 100644
index 0000000000000000000000000000000000000000..c648d31fb322a47776bb017da31029f64f9f7674
Binary files /dev/null and b/IIR-Lab/models/archs/__pycache__/V31DS3_arch.cpython-310.pyc differ
diff --git a/IIR-Lab/models/archs/__pycache__/V31DS4_arch.cpython-310.pyc b/IIR-Lab/models/archs/__pycache__/V31DS4_arch.cpython-310.pyc
new file mode 100644
index 0000000000000000000000000000000000000000..50175e3440bf673611b66ce93317698a066a5aa9
Binary files /dev/null and b/IIR-Lab/models/archs/__pycache__/V31DS4_arch.cpython-310.pyc differ
diff --git a/IIR-Lab/models/archs/__pycache__/V31DS5_arch.cpython-310.pyc b/IIR-Lab/models/archs/__pycache__/V31DS5_arch.cpython-310.pyc
new file mode 100644
index 0000000000000000000000000000000000000000..469822788184c313b661bc1ab48b8b8ac12dbfb2
Binary files /dev/null and b/IIR-Lab/models/archs/__pycache__/V31DS5_arch.cpython-310.pyc differ
diff --git a/IIR-Lab/models/archs/__pycache__/V31D_S1_arch.cpython-310.pyc b/IIR-Lab/models/archs/__pycache__/V31D_S1_arch.cpython-310.pyc
new file mode 100644
index 0000000000000000000000000000000000000000..90a53e99dd284f1e0172762ff905e876c8409894
Binary files /dev/null and b/IIR-Lab/models/archs/__pycache__/V31D_S1_arch.cpython-310.pyc differ
diff --git a/IIR-Lab/models/archs/__pycache__/V31D_arch.cpython-310.pyc b/IIR-Lab/models/archs/__pycache__/V31D_arch.cpython-310.pyc
new file mode 100644
index 0000000000000000000000000000000000000000..d176d1e519105944ca15a83ae224104db2c81a1e
Binary files /dev/null and b/IIR-Lab/models/archs/__pycache__/V31D_arch.cpython-310.pyc differ
diff --git a/IIR-Lab/models/archs/__pycache__/V31D_arch.cpython-38.pyc b/IIR-Lab/models/archs/__pycache__/V31D_arch.cpython-38.pyc
new file mode 100644
index 0000000000000000000000000000000000000000..c1e192f6e63a75b40a7be25080f44c647c42f925
Binary files /dev/null and b/IIR-Lab/models/archs/__pycache__/V31D_arch.cpython-38.pyc differ
diff --git a/IIR-Lab/models/archs/__pycache__/V31_arch.cpython-310.pyc b/IIR-Lab/models/archs/__pycache__/V31_arch.cpython-310.pyc
new file mode 100644
index 0000000000000000000000000000000000000000..6756bfd2767d8269f9464eda6bbbaabb52c59eba
Binary files /dev/null and b/IIR-Lab/models/archs/__pycache__/V31_arch.cpython-310.pyc differ
diff --git a/IIR-Lab/models/archs/__pycache__/V31_arch.cpython-38.pyc b/IIR-Lab/models/archs/__pycache__/V31_arch.cpython-38.pyc
new file mode 100644
index 0000000000000000000000000000000000000000..0ce8c47be1837e6c1b547f591d3db417682d2d4c
Binary files /dev/null and b/IIR-Lab/models/archs/__pycache__/V31_arch.cpython-38.pyc differ
diff --git a/IIR-Lab/models/archs/__pycache__/arch_util.cpython-310.pyc b/IIR-Lab/models/archs/__pycache__/arch_util.cpython-310.pyc
new file mode 100644
index 0000000000000000000000000000000000000000..9d13cbf55a9d4249622ed23037f084eca595b122
Binary files /dev/null and b/IIR-Lab/models/archs/__pycache__/arch_util.cpython-310.pyc differ
diff --git a/IIR-Lab/models/archs/__pycache__/arch_util.cpython-312.pyc b/IIR-Lab/models/archs/__pycache__/arch_util.cpython-312.pyc
new file mode 100644
index 0000000000000000000000000000000000000000..bbe9d8c5bbefc55da258fcf8fc3df80efe05073b
Binary files /dev/null and b/IIR-Lab/models/archs/__pycache__/arch_util.cpython-312.pyc differ
diff --git a/IIR-Lab/models/archs/__pycache__/arch_util.cpython-38.pyc b/IIR-Lab/models/archs/__pycache__/arch_util.cpython-38.pyc
new file mode 100644
index 0000000000000000000000000000000000000000..2e132ceee76cb953dae93b68aa02ddf30307de27
Binary files /dev/null and b/IIR-Lab/models/archs/__pycache__/arch_util.cpython-38.pyc differ
diff --git a/IIR-Lab/models/archs/__pycache__/arch_util.cpython-39.pyc b/IIR-Lab/models/archs/__pycache__/arch_util.cpython-39.pyc
new file mode 100644
index 0000000000000000000000000000000000000000..54a51066e9225609372712fe9148ea24b4647c53
Binary files /dev/null and b/IIR-Lab/models/archs/__pycache__/arch_util.cpython-39.pyc differ
diff --git a/IIR-Lab/models/archs/__pycache__/common.cpython-310.pyc b/IIR-Lab/models/archs/__pycache__/common.cpython-310.pyc
new file mode 100644
index 0000000000000000000000000000000000000000..5299e23334689edb262dda1f30dc408710e34b79
Binary files /dev/null and b/IIR-Lab/models/archs/__pycache__/common.cpython-310.pyc differ
diff --git a/IIR-Lab/models/archs/__pycache__/common.cpython-312.pyc b/IIR-Lab/models/archs/__pycache__/common.cpython-312.pyc
new file mode 100644
index 0000000000000000000000000000000000000000..59e4046e3b89b17e39ca68321181d775f1bc8f72
Binary files /dev/null and b/IIR-Lab/models/archs/__pycache__/common.cpython-312.pyc differ
diff --git a/IIR-Lab/models/archs/__pycache__/common.cpython-38.pyc b/IIR-Lab/models/archs/__pycache__/common.cpython-38.pyc
new file mode 100644
index 0000000000000000000000000000000000000000..a4430df0eab097d587ee259ee488434228b1e301
Binary files /dev/null and b/IIR-Lab/models/archs/__pycache__/common.cpython-38.pyc differ
diff --git a/IIR-Lab/models/archs/__pycache__/common.cpython-39.pyc b/IIR-Lab/models/archs/__pycache__/common.cpython-39.pyc
new file mode 100644
index 0000000000000000000000000000000000000000..c379e132b2319d6edd446fbeacac8696ac50eda9
Binary files /dev/null and b/IIR-Lab/models/archs/__pycache__/common.cpython-39.pyc differ
diff --git a/IIR-Lab/models/archs/__pycache__/common_RCAN.cpython-310.pyc b/IIR-Lab/models/archs/__pycache__/common_RCAN.cpython-310.pyc
new file mode 100644
index 0000000000000000000000000000000000000000..0e7034876e3d4050b11e9c940267012798335945
Binary files /dev/null and b/IIR-Lab/models/archs/__pycache__/common_RCAN.cpython-310.pyc differ
diff --git a/IIR-Lab/models/archs/__pycache__/common_RCAN.cpython-38.pyc b/IIR-Lab/models/archs/__pycache__/common_RCAN.cpython-38.pyc
new file mode 100644
index 0000000000000000000000000000000000000000..2ea21f92e475d3f9017776de4f1ae197d08d7ff1
Binary files /dev/null and b/IIR-Lab/models/archs/__pycache__/common_RCAN.cpython-38.pyc differ
diff --git a/IIR-Lab/models/archs/__pycache__/component.cpython-310.pyc b/IIR-Lab/models/archs/__pycache__/component.cpython-310.pyc
new file mode 100644
index 0000000000000000000000000000000000000000..e3e5eb7f1266d5a1186a860e88a6e6d9e1382931
Binary files /dev/null and b/IIR-Lab/models/archs/__pycache__/component.cpython-310.pyc differ
diff --git a/IIR-Lab/models/archs/__pycache__/matching.cpython-310.pyc b/IIR-Lab/models/archs/__pycache__/matching.cpython-310.pyc
new file mode 100644
index 0000000000000000000000000000000000000000..f76688f174aa9f04ab6d63942a0c3909650ba39c
Binary files /dev/null and b/IIR-Lab/models/archs/__pycache__/matching.cpython-310.pyc differ
diff --git a/IIR-Lab/models/archs/__pycache__/matching.cpython-38.pyc b/IIR-Lab/models/archs/__pycache__/matching.cpython-38.pyc
new file mode 100644
index 0000000000000000000000000000000000000000..b4161f6dea56dafd8f69a6b2a998ad5a8145c367
Binary files /dev/null and b/IIR-Lab/models/archs/__pycache__/matching.cpython-38.pyc differ
diff --git a/IIR-Lab/models/archs/__pycache__/spynet.cpython-310.pyc b/IIR-Lab/models/archs/__pycache__/spynet.cpython-310.pyc
new file mode 100644
index 0000000000000000000000000000000000000000..5660697a7c368fccc0d2944ce27c76488609f4ae
Binary files /dev/null and b/IIR-Lab/models/archs/__pycache__/spynet.cpython-310.pyc differ
diff --git a/IIR-Lab/models/archs/__pycache__/spynet.cpython-38.pyc b/IIR-Lab/models/archs/__pycache__/spynet.cpython-38.pyc
new file mode 100644
index 0000000000000000000000000000000000000000..855af46342764ffe3aa69d969eebe67f2092d069
Binary files /dev/null and b/IIR-Lab/models/archs/__pycache__/spynet.cpython-38.pyc differ
diff --git a/IIR-Lab/models/archs/__pycache__/spynet_flow.cpython-310.pyc b/IIR-Lab/models/archs/__pycache__/spynet_flow.cpython-310.pyc
new file mode 100644
index 0000000000000000000000000000000000000000..45580bf039edf34ef2a745096c789c27bd1b935f
Binary files /dev/null and b/IIR-Lab/models/archs/__pycache__/spynet_flow.cpython-310.pyc differ
diff --git a/IIR-Lab/models/archs/arch_util.py b/IIR-Lab/models/archs/arch_util.py
new file mode 100644
index 0000000000000000000000000000000000000000..73b6c3f2c73459c6c7a10ae3a9ea3ccea899d045
--- /dev/null
+++ b/IIR-Lab/models/archs/arch_util.py
@@ -0,0 +1,69 @@
+
+import torch
+import torchvision
+from distutils.version import LooseVersion
+from torch import nn as nn
+from torch.nn import init as init
+from .dcn import ModulatedDeformConvPack, modulated_deform_conv
+
+
+class DCNv2Pack(ModulatedDeformConvPack):
+    """Modulated deformable conv for deformable alignment.
+
+    Different from the official DCNv2Pack, which generates offsets and masks
+    from the preceding features, this DCNv2Pack takes another different
+    features to generate offsets and masks.
+
+    ``Paper: Delving Deep into Deformable Alignment in Video Super-Resolution``
+    """
+
+    def forward(self, x, feat):
+        out = self.conv_offset(feat)
+        o1, o2, mask = torch.chunk(out, 3, dim=1)
+        offset = torch.cat((o1, o2), dim=1)
+        mask = torch.sigmoid(mask)
+
+        offset_absmean = torch.mean(torch.abs(offset))
+        if offset_absmean > 250:
+            # logger = get_root_logger()
+            # logger.warning(f'Offset abs mean is {offset_absmean}, larger than 50.')
+            print(f'Offset abs mean is {offset_absmean}, larger than 50.')
+
+        if LooseVersion(torchvision.__version__) >= LooseVersion('0.9.0'):
+            return torchvision.ops.deform_conv2d(x, offset, self.weight, self.bias, self.stride, self.padding,
+                                                 self.dilation, mask)
+        else:
+            return modulated_deform_conv(x, offset, mask, self.weight, self.bias, self.stride, self.padding,
+                                         self.dilation, self.groups, self.deformable_groups)
+
+
+
+class FlowGuidedDCN(ModulatedDeformConvPack):
+    '''Use other features to generate offsets and masks'''
+
+
+    def forward(self, x, feat, flows):
+        '''input: input features for deformable conv: N, C, H, W.
+           fea: other features used for generating offsets and mask: N, C, H, W.
+           flows: N, 2, H, W.
+        '''
+        out = self.conv_offset(feat)
+        o1, o2, mask = torch.chunk(out, 3, dim=1)
+        mask = torch.sigmoid(mask)
+
+        offset = torch.tanh(torch.cat((o1, o2), dim=1)) * 15 # max_residue_magnitude
+        offset = offset + flows.flip(1).repeat(1, offset.size(1)//2, 1, 1)
+
+        offset_mean = torch.mean(torch.abs(offset))
+        if offset_mean > 250:
+            print('FlowGuidedDCN: Offset mean is {}, larger than 100.'.format(offset_mean))
+            # offset = offset.clamp(-50, 50)
+            # return None
+
+        
+        if LooseVersion(torchvision.__version__) >= LooseVersion('0.9.0'):
+            return torchvision.ops.deform_conv2d(x, offset, self.weight, self.bias, self.stride, self.padding,
+                                                 self.dilation, mask)
+        else:
+            return modulated_deform_conv(x, offset, mask, self.weight, self.bias, self.stride, self.padding,
+                                         self.dilation, self.groups, self.deformable_groups)
\ No newline at end of file
diff --git a/IIR-Lab/models/archs/common.py b/IIR-Lab/models/archs/common.py
new file mode 100644
index 0000000000000000000000000000000000000000..b2e029c48bc8c716ef34e10de96e6c3c9019bbd7
--- /dev/null
+++ b/IIR-Lab/models/archs/common.py
@@ -0,0 +1,498 @@
+import torch
+import torch.nn as nn
+import torch.nn.functional as F
+from torch.nn import init as init
+from torch.nn.modules.batchnorm import _BatchNorm
+import matplotlib.pyplot as plt
+
+
+def default_conv(in_channels, out_channels, kernel_size,stride=1, bias=True):
+    return nn.Conv2d(
+        in_channels, out_channels, kernel_size,
+        padding=(kernel_size//2),stride=stride, bias=bias)
+
+def conv1x1(in_channels, out_channels, stride=1):
+    return nn.Conv2d(in_channels, out_channels, kernel_size=1,
+                     stride=stride, padding=0, bias=True)
+
+def conv3x3(in_channels, out_channels, stride=1):
+    return nn.Conv2d(in_channels, out_channels, kernel_size=3,
+                     stride=stride, padding=1, bias=True)
+
+def conv5x5(in_channels, out_channels, stride=1):
+    return nn.Conv2d(in_channels, out_channels, kernel_size=5,
+                     stride=stride, padding=2, bias=True)
+
+def make_layer(basic_block, num_basic_block, **kwarg):
+    """Make layers by stacking the same blocks.
+
+    Args:
+        basic_block (nn.module): nn.module class for basic block.
+        num_basic_block (int): number of blocks.
+
+    Returns:
+        nn.Sequential: Stacked blocks in nn.Sequential.
+    """
+    layers = []
+    for _ in range(num_basic_block):
+        layers.append(basic_block(**kwarg))
+    return nn.Sequential(*layers)  #30个 (0): ResidualBlockNoBN(
+
+class RBNoBN(nn.Module):
+    """Residual block without BN.
+
+    Args:
+        num_feat (int): Channel number of intermediate features.
+            Default: 64.
+        res_scale (float): Residual scale. Default: 1.
+        pytorch_init (bool): If set to True, use pytorch default init,
+            otherwise, use default_init_weights. Default: False.
+    """
+
+    def __init__(self, num_feat=64, res_scale=1, pytorch_init=False):
+        super(RBNoBN, self).__init__()
+        self.res_scale = res_scale
+        self.conv1 = nn.Conv2d(num_feat, num_feat, 3, 1, 1, bias=True)
+        self.conv2 = nn.Conv2d(num_feat, num_feat, 3, 1, 1, bias=True)
+        self.relu = nn.ReLU(inplace=True)
+
+        if not pytorch_init:
+            default_init_weights([self.conv1, self.conv2], 0.1)
+
+    def forward(self, x):
+        identity = x
+        out = self.conv2(self.relu(self.conv1(x)))
+        return identity + out * self.res_scale
+    
+class ResBlock(nn.Module):
+    def __init__(self, in_channels, out_channels, stride=1, downsample=None, res_scale=1):
+        super(ResBlock, self).__init__()
+        self.res_scale = res_scale
+        self.conv1 = conv3x3(in_channels, out_channels, stride)
+        self.relu = nn.LeakyReLU(0.2, inplace=True)
+        self.conv2 = conv3x3(out_channels, out_channels)
+
+    def forward(self, x):
+        x1 = x
+        out = self.conv1(x)
+        out = self.relu(out)
+        out = self.conv2(out)
+        out = out * self.res_scale + x1
+        return out
+    
+# class ConvResidualBlocks(nn.Module):
+#     """Conv and residual block used in BasicVSR.
+
+#     Args:
+#         num_in_ch (int): Number of input channels. Default: 3.
+#         num_out_ch (int): Number of output channels. Default: 64.
+#         num_block (int): Number of residual blocks. Default: 15.
+#     """
+
+#     def __init__(self, num_in_ch=3, num_out_ch=64, num_block=15):
+#         super().__init__()
+#         self.main = nn.Sequential(
+#             nn.Conv2d(num_in_ch, num_out_ch, 3, 1, 1, bias=True), nn.LeakyReLU(negative_slope=0.2, inplace=True),
+#             make_layer(RBNoBN, num_block, num_feat=num_out_ch))
+
+#     def forward(self, fea):
+#         return self.main(fea)
+
+class Encoder_input(nn.Module):
+    def __init__(self, num_res_blocks, n_feats, img_channel, res_scale=1):
+        super(Encoder_input, self).__init__()
+        self.num_res_blocks = num_res_blocks
+        self.conv_head = conv3x3(img_channel, n_feats)
+        
+        self.RBs = nn.ModuleList()
+        for i in range(self.num_res_blocks):
+            self.RBs.append(ResBlock(in_channels=n_feats, out_channels=n_feats, 
+                res_scale=res_scale))
+            
+        self.conv_tail = conv3x3(n_feats, n_feats)
+        self.relu = nn.LeakyReLU(0.2, inplace=True)
+        
+    def forward(self, x):
+        x = self.relu(self.conv_head(x))
+        x1 = x
+        for i in range(self.num_res_blocks):
+            x = self.RBs[i](x)
+        x = self.conv_tail(x)
+        x = x + x1
+        return x
+
+
+
+
+class ResList(nn.Module):
+    def __init__(self, num_res_blocks, n_feats, res_scale=1):
+        super(ResList, self).__init__()
+        self.num_res_blocks = num_res_blocks
+
+        self.RBs = nn.ModuleList()
+        for i in range(self.num_res_blocks):
+            self.RBs.append(ResBlock(in_channels=n_feats, out_channels=n_feats))
+
+        self.conv_tail = conv3x3(n_feats, n_feats)
+
+    def forward(self, x):
+        x1 = x
+        for i in range(self.num_res_blocks):
+            x = self.RBs[i](x)
+        x = self.conv_tail(x)
+        x = x + x1
+        return x
+    
+
+class Res_Attention_List(nn.Module):
+    def __init__(self, num_res_blocks, n_feats, res_scale=1):
+        super(Res_Attention_List, self).__init__()
+        self.num_res_blocks = num_res_blocks
+
+        self.RBs = nn.ModuleList()
+        for i in range(self.num_res_blocks):
+            self.RBs.append(Res_Attention(in_channels=n_feats, out_channels=n_feats))
+
+        self.conv_tail = conv3x3(n_feats, n_feats)
+
+    def forward(self, x):
+        x1 = x
+        for i in range(self.num_res_blocks):
+            x = self.RBs[i](x)
+        x = self.conv_tail(x)
+        x = x + x1
+        return x
+
+
+class PixelShufflePack(nn.Module):
+    """ Pixel Shuffle upsample layer.
+
+    Args:
+        in_channels (int): Number of input channels.
+        out_channels (int): Number of output channels.
+        scale_factor (int): Upsample ratio.
+        upsample_kernel (int): Kernel size of Conv layer to expand channels.
+
+    Returns:
+        Upsampled feature map.
+    """
+
+    def __init__(self, in_channels, out_channels, scale_factor,
+                 upsample_kernel):
+        super().__init__()
+        self.in_channels = in_channels
+        self.out_channels = out_channels
+        self.scale_factor = scale_factor
+        self.upsample_kernel = upsample_kernel
+        self.upsample_conv = nn.Conv2d(
+            self.in_channels,
+            self.out_channels * scale_factor * scale_factor,
+            self.upsample_kernel,
+            padding=(self.upsample_kernel - 1) // 2)
+        self.init_weights()
+
+    def init_weights(self):
+        """Initialize weights for PixelShufflePack.
+        """
+        default_init_weights(self, 1)
+
+    def forward(self, x):
+        """Forward function for PixelShufflePack.
+
+        Args:
+            x (Tensor): Input tensor with shape (n, c, h, w).
+
+        Returns:
+            Tensor: Forward results.
+        """
+        x = self.upsample_conv(x)
+        x = F.pixel_shuffle(x, self.scale_factor)
+        return x
+
+class BasicBlock(nn.Sequential):
+    def __init__(
+        self, conv, in_channels, out_channels, kernel_size, stride=1, bias=True,
+        bn=False,In=False,act=nn.PReLU()):
+
+        m = [conv(in_channels, out_channels, kernel_size, stride=stride, bias=bias)]
+        if bn:
+            m.append(nn.BatchNorm2d(out_channels))
+        if In:
+            m.append(nn.InstanceNorm2d(out_channels))
+        if act is not None:
+            m.append(act)
+
+        super(BasicBlock, self).__init__(*m)
+
+class MeanShift(nn.Conv2d):
+    def __init__(self, rgb_range, rgb_mean, rgb_std, sign=-1):
+        super(MeanShift, self).__init__(3, 3, kernel_size=1)
+        std = torch.Tensor(rgb_std)
+        self.weight.data = torch.eye(3).view(3, 3, 1, 1)
+        self.weight.data.div_(std.view(3, 1, 1, 1))
+        self.bias.data = sign * rgb_range * torch.Tensor(rgb_mean)
+        self.bias.data.div_(std)
+
+        self.weight.requires_grad = False
+        self.bias.requires_grad = False
+
+def flow_warp(x, flow, interp_mode='bilinear', padding_mode='zeros', align_corners=True):
+    """Warp an image or feature map with optical flow.
+
+    Args:
+        x (Tensor): Tensor with size (n, c, h, w).
+        flow (Tensor): Tensor with size (n, h, w, 2), normal value.
+        interp_mode (str): 'nearest' or 'bilinear'. Default: 'bilinear'.
+        padding_mode (str): 'zeros' or 'border' or 'reflection'.
+            Default: 'zeros'.
+        align_corners (bool): Before pytorch 1.3, the default value is
+            align_corners=True. After pytorch 1.3, the default value is
+            align_corners=False. Here, we use the True as default.
+
+    Returns:
+        Tensor: Warped image or feature map.
+    """
+    assert x.size()[-2:] == flow.size()[1:3]
+    _, _, h, w = x.size()
+    # create mesh grid
+    grid_y, grid_x = torch.meshgrid(torch.arange(0, h).type_as(x), torch.arange(0, w).type_as(x))
+    grid = torch.stack((grid_x, grid_y), 2).float()  # W(x), H(y), 2
+    grid.requires_grad = False
+
+    vgrid = grid + flow
+    # scale grid to [-1,1]
+    vgrid_x = 2.0 * vgrid[:, :, :, 0] / max(w - 1, 1) - 1.0
+    vgrid_y = 2.0 * vgrid[:, :, :, 1] / max(h - 1, 1) - 1.0
+    vgrid_scaled = torch.stack((vgrid_x, vgrid_y), dim=3)
+    output = F.grid_sample(x, vgrid_scaled, mode=interp_mode, padding_mode=padding_mode, align_corners=align_corners)
+
+    # TODO, what if align_corners=False
+    return output
+
+@torch.no_grad()
+def default_init_weights(module_list, scale=1, bias_fill=0, **kwargs):
+    """Initialize network weights.
+
+    Args:
+        module_list (list[nn.Module] | nn.Module): Modules to be initialized.
+        scale (float): Scale initialized weights, especially for residual
+            blocks. Default: 1.
+        bias_fill (float): The value to fill bias. Default: 0
+        kwargs (dict): Other arguments for initialization function.
+    """
+    if not isinstance(module_list, list):
+        module_list = [module_list]
+    for module in module_list:
+        for m in module.modules():
+            if isinstance(m, nn.Conv2d):
+                init.kaiming_normal_(m.weight, **kwargs)
+                m.weight.data *= scale
+                if m.bias is not None:
+                    m.bias.data.fill_(bias_fill)
+            elif isinstance(m, nn.Linear):
+                init.kaiming_normal_(m.weight, **kwargs)
+                m.weight.data *= scale
+                if m.bias is not None:
+                    m.bias.data.fill_(bias_fill)
+            elif isinstance(m, _BatchNorm):
+                init.constant_(m.weight, 1)
+                if m.bias is not None:
+                    m.bias.data.fill_(bias_fill)
+
+
+
+class ChannelPool(nn.Module):
+    def forward(self, x): #是一个元祖 第一个是最大值 第二个是坐标 所以要[0]
+        return torch.cat((torch.max(x,1)[0].unsqueeze(1), torch.mean(x,1).unsqueeze(1)), dim=1 )
+
+
+## Channel Attention (CA) Layer
+class CALayer(nn.Module):
+    def __init__(self, channel, reduction=16):
+        super(CALayer, self).__init__()
+        # global average pooling: feature --> point
+        self.avg_pool = nn.AdaptiveAvgPool2d(1)
+        # feature channel downscale and upscale --> channel weight
+        self.conv_du = nn.Sequential(
+                nn.Conv2d(channel, channel // reduction, 1, padding=0, bias=True),
+                nn.ReLU(inplace=True),
+                nn.Conv2d(channel // reduction, channel, 1, padding=0, bias=True),
+                nn.Sigmoid()
+        )
+
+    def forward(self, x):
+        y = self.avg_pool(x)
+        y = self.conv_du(y)
+        return x * y
+    
+    
+class SpatialGate(nn.Module):
+    def __init__(self):
+        super(SpatialGate, self).__init__()
+        kernel_size = 7
+        self.compress = ChannelPool()
+        # self.spatial = BasicConv(2, 1, kernel_size, stride=1, padding=(kernel_size-1) // 2, relu=False)
+        self.spatial = nn.Conv2d(2, 1, 7, 1, 3)
+        self.sigmoid = nn.Sigmoid()
+
+    def forward(self, x): 
+        x_compress = self.compress(x)       #torch.Size([4, 2, 64, 64])
+        x_out = F.relu(self.spatial(x_compress))
+        # import pdb
+        # pdb.set_trace()
+        scale = self.sigmoid(x_out)# broadcasting
+        return x * scale
+
+
+class Res_Attention_Conf(nn.Module):
+    def __init__(self, in_channels, out_channels, stride=1, downsample=None, res_scale=1, SA=False, CA=False):
+        super(Res_Attention_Conf, self).__init__()
+
+        conv=default_conv
+
+
+        self.res_scale = res_scale
+        self.conv1 = conv3x3(in_channels, out_channels, stride)
+        self.relu = nn.LeakyReLU(0.2, inplace=True)
+        self.conv2 = conv3x3(out_channels, out_channels)
+        self.channel_attention = CALayer(out_channels, reduction=16)
+        self.spatial_attention = SpatialGate()
+        # self.conv3 = conv3x3(out_channels, out_channels)
+        self.CA = CA
+        self.SA = SA
+
+    def forward(self, x):
+
+
+        x1 = x
+        out = self.relu(self.conv1(x))
+
+        if self.SA:
+            out = self.spatial_attention(out)
+            out = out 
+
+        if self.CA:
+            out = self.channel_attention(out)
+
+        out = self.relu(self.conv2(out))
+        # out = self.conv3(out)
+
+        out = out * self.res_scale + x1
+        return out
+
+
+
+
+class Res_CA_Block(nn.Module):
+    def __init__(self, in_channels, out_channels, stride=1,  res_scale=1,  CA=False):
+        super(Res_CA_Block, self).__init__()
+
+        # conv=default_conv
+        self.res_scale = res_scale
+        self.conv1 = conv3x3(in_channels, out_channels, stride)
+        self.relu = nn.LeakyReLU(0.2, inplace=True)
+        self.conv2 = conv3x3(out_channels, out_channels)
+        self.channel_attention = CALayer(out_channels, reduction=16)
+
+        # self.conv3 = conv3x3(out_channels, out_channels)
+        self.CA = CA
+
+
+    def forward(self, x):
+        x1 = x
+        out = self.relu(self.conv1(x))
+        if self.CA:
+            out = self.channel_attention(out)
+
+        out = self.relu(self.conv2(out))
+        # out = self.conv3(out)
+
+        out = out * self.res_scale + x1
+        return out
+
+
+class Res_Attention_List(nn.Module):
+    def __init__(self, num_res_blocks, n_feats, res_scale=1):
+        super(Res_Attention_List, self).__init__()
+        self.num_res_blocks = num_res_blocks
+
+        self.RBs = nn.ModuleList()
+        for i in range(self.num_res_blocks):
+            self.RBs.append(Res_CA_Block(in_channels=n_feats, out_channels=n_feats))
+
+        self.conv_tail = conv3x3(n_feats, n_feats)
+
+    def forward(self, x):
+        x1 = x
+        for i in range(self.num_res_blocks):
+            x = self.RBs[i](x)
+        x = self.conv_tail(x)
+        x = x + x1
+        return x
+
+
+
+class Res_Attention(nn.Module):
+    def __init__(self, in_channels, out_channels, stride=1, downsample=None, res_scale=1, SA=False, CA=False):
+        super(Res_Attention, self).__init__()
+        self.res_scale = res_scale
+        self.conv1 = conv3x3(in_channels, out_channels, stride)
+        self.relu = nn.LeakyReLU(0.2, inplace=True)
+        self.conv2 = conv3x3(out_channels, out_channels)
+        self.channel_attention = CALayer(out_channels, reduction=16)
+        self.spatial_attention = SpatialGate()
+        # self.conv3 = conv3x3(out_channels, out_channels)
+        self.CA = CA
+        self.SA = SA
+
+    def forward(self, x):
+        x1 = x
+        out = self.relu(self.conv1(x))
+
+        if self.SA:
+            out = self.spatial_attention(out)
+
+        if self.CA:
+            out = self.channel_attention(out)
+
+        out = self.relu(self.conv2(out))
+        # out = self.conv3(out)
+
+        out = out * self.res_scale + x1
+        return out
+
+
+
+
+
+
+
+def record(fea, path):
+    fea = fea[0][0]
+    mean = fea.mean()
+    std = fea.std()
+
+    fea_norm = (fea- mean)/std
+
+    # fea = (fea.cpu().numpy()*255).round().astype(np.uint8)
+    fea_norm = fea_norm.detach().cpu().numpy()
+    # cv2.imwrite(path, fea_norm)
+
+    plt.imsave(path, fea_norm, cmap = 'gray')
+    pass
+
+
+
+def record2(fea, path):
+    fea = fea[0][0]
+    mean = fea.mean()
+    std = fea.std()
+
+    fea_norm = (fea- mean)/std
+
+    # fea = (fea.cpu().numpy()*255).round().astype(np.uint8)
+    fea_norm = fea_norm.detach().cpu().numpy()
+    # cv2.imwrite(path, fea_norm)
+
+    plt.imsave(path, fea_norm, cmap = 'gray')
+    pass
\ No newline at end of file
diff --git a/IIR-Lab/models/archs/component.py b/IIR-Lab/models/archs/component.py
new file mode 100644
index 0000000000000000000000000000000000000000..559994843e212daafcb13226fca679361a3a758d
--- /dev/null
+++ b/IIR-Lab/models/archs/component.py
@@ -0,0 +1,384 @@
+import torch
+import torch.nn as nn
+import torch.nn.functional as F
+from torch.nn import init as init
+from torch.nn.modules.batchnorm import _BatchNorm
+import matplotlib.pyplot as plt
+
+
+class CvBlock(nn.Module):
+	'''(Conv2d => BN => ReLU) x 2'''
+	def __init__(self, in_ch, out_ch):
+		super(CvBlock, self).__init__()
+		self.convblock = nn.Sequential(
+			nn.Conv2d(in_ch, out_ch, kernel_size=3, padding=1, bias=False),
+			nn.BatchNorm2d(out_ch),
+			nn.ReLU(inplace=True),
+			nn.Conv2d(out_ch, out_ch, kernel_size=3, padding=1, bias=False),
+			nn.BatchNorm2d(out_ch),
+			nn.ReLU(inplace=True)
+		)
+
+	def forward(self, x):
+		return self.convblock(x)
+
+
+class InputCvBlock(nn.Module):
+	'''(Conv with num_in_frames groups => BN => ReLU) + (Conv => BN => ReLU)'''
+	def __init__(self, num_in_frames, out_ch):
+		super(InputCvBlock, self).__init__()
+		self.interm_ch = 30
+		self.convblock = nn.Sequential(
+			nn.Conv2d(num_in_frames*(3+1), num_in_frames*self.interm_ch, \
+					  kernel_size=3, padding=1, groups=num_in_frames, bias=False),
+			nn.BatchNorm2d(num_in_frames*self.interm_ch),
+			nn.ReLU(inplace=True),
+			nn.Conv2d(num_in_frames*self.interm_ch, out_ch, kernel_size=3, padding=1, bias=False),
+			nn.BatchNorm2d(out_ch),
+			nn.ReLU(inplace=True)
+		)
+
+	def forward(self, x):
+		return self.convblock(x)
+
+
+class InputCvBlock_1(nn.Module):
+	'''(Conv with num_in_frames groups => BN => ReLU) + (Conv => BN => ReLU)'''
+	def __init__(self, num_in_frames, out_ch):
+		super(InputCvBlock_1, self).__init__()
+		self.interm_ch = 30
+		self.convblock = nn.Sequential(
+			nn.Conv2d(num_in_frames*(3+1), num_in_frames*self.interm_ch, \
+					  kernel_size=3, padding=1, groups=num_in_frames, bias=False),
+			nn.BatchNorm2d(num_in_frames*self.interm_ch),
+			nn.ReLU(inplace=True),
+			nn.Conv2d(num_in_frames*self.interm_ch, out_ch, kernel_size=3, padding=1, bias=False),
+			nn.BatchNorm2d(out_ch),
+			nn.ReLU(inplace=True)
+		)
+
+		# self.NAF1 = NAFBlock(out_ch)
+		# self.NAF2 = NAFBlock(out_ch)
+
+	def forward(self, x):
+		x = self.convblock(x)
+		# x = self.NAF1(x)
+		# return self.NAF2(x)
+		return x
+
+
+class DownBlock(nn.Module):
+	'''Downscale + (Conv2d => BN => ReLU)*2'''
+	def __init__(self, in_ch, out_ch):
+		super(DownBlock, self).__init__()
+		self.convblock = nn.Sequential(
+			nn.Conv2d(in_ch, out_ch, kernel_size=3, padding=1, stride=2, bias=False),
+			nn.BatchNorm2d(out_ch),
+			nn.ReLU(inplace=True),
+			CvBlock(out_ch, out_ch)
+		)
+
+	def forward(self, x):
+		return self.convblock(x)
+	
+
+class DownBlock_1(nn.Module):
+	'''Downscale + (Conv2d => BN => ReLU)*2'''
+	def __init__(self, in_ch, out_ch):
+		super(DownBlock_1, self).__init__()
+		self.convblock = nn.Sequential(
+			nn.Conv2d(in_ch, out_ch, kernel_size=3, padding=1, stride=2, bias=False),
+			nn.BatchNorm2d(out_ch),
+			nn.ReLU(inplace=True),
+			CvBlock(out_ch, out_ch)
+		)
+
+		self.NAF1 = NAFBlock(in_ch)
+		self.NAF2 = NAFBlock(in_ch)
+
+
+	def forward(self, x):
+
+
+		x = self.NAF1(x)
+		x = self.NAF2(x)
+		return self.convblock(x)
+	
+
+class UpBlock(nn.Module):
+	'''(Conv2d => BN => ReLU)*2 + Upscale'''
+	def __init__(self, in_ch, out_ch):
+		super(UpBlock, self).__init__()
+		self.convblock = nn.Sequential(
+			CvBlock(in_ch, in_ch),
+			nn.Conv2d(in_ch, out_ch*4, kernel_size=3, padding=1, bias=False),
+			nn.PixelShuffle(2)
+		)
+
+	def forward(self, x):
+		return self.convblock(x)
+	
+
+class UpBlock_1(nn.Module):
+	'''(Conv2d => BN => ReLU)*2 + Upscale'''
+	def __init__(self, in_ch, out_ch):
+		super(UpBlock_1, self).__init__()
+		self.convblock = nn.Sequential(
+			CvBlock(in_ch, in_ch),
+			nn.Conv2d(in_ch, out_ch*4, kernel_size=3, padding=1, bias=False),
+			nn.PixelShuffle(2)
+		)
+
+		self.NAF1 = NAFBlock(in_ch)
+		self.NAF2 = NAFBlock(in_ch)
+
+	def forward(self, x):
+		x = self.NAF1(x)
+		x = self.NAF2(x)
+		return self.convblock(x)
+	
+
+class OutputCvBlock(nn.Module):
+	'''Conv2d => BN => ReLU => Conv2d'''
+	def __init__(self, in_ch, out_ch):
+		super(OutputCvBlock, self).__init__()
+		self.convblock = nn.Sequential(
+			nn.Conv2d(in_ch, in_ch, kernel_size=3, padding=1, bias=False),
+			nn.BatchNorm2d(in_ch),
+			nn.ReLU(inplace=True),
+			nn.Conv2d(in_ch, out_ch, kernel_size=3, padding=1, bias=False)
+		)
+
+	def forward(self, x):
+		return self.convblock(x)
+	
+
+class OutputCvBlock_1(nn.Module):
+	'''Conv2d => BN => ReLU => Conv2d'''
+	def __init__(self, in_ch, out_ch):
+		super(OutputCvBlock_1, self).__init__()
+		self.convblock = nn.Sequential(
+			nn.Conv2d(in_ch, in_ch, kernel_size=3, padding=1, bias=False),
+			nn.BatchNorm2d(in_ch),
+			nn.ReLU(inplace=True),
+			nn.Conv2d(in_ch, out_ch, kernel_size=3, padding=1, bias=False)
+		)
+
+		# self.NAF1 = NAFBlock(in_ch)
+		# self.NAF2 = NAFBlock(in_ch)
+
+	def forward(self, x):
+		# x = self.NAF1(x)
+		# x = self.NAF2(x)
+		return self.convblock(x)
+
+
+class SimpleGate(nn.Module):
+    def forward(self, x):
+        x1, x2 = x.chunk(2, dim=1)
+        return x1 * x2
+
+
+class LayerNormFunction(torch.autograd.Function):
+
+    @staticmethod
+    def forward(ctx, x, weight, bias, eps):
+        ctx.eps = eps
+        N, C, H, W = x.size()
+        mu = x.mean(1, keepdim=True)
+        var = (x - mu).pow(2).mean(1, keepdim=True)
+        y = (x - mu) / (var + eps).sqrt()
+        ctx.save_for_backward(y, var, weight)
+        y = weight.view(1, C, 1, 1) * y + bias.view(1, C, 1, 1)
+        return y
+
+    @staticmethod
+    def backward(ctx, grad_output):
+        eps = ctx.eps
+
+        N, C, H, W = grad_output.size()
+        y, var, weight = ctx.saved_variables
+        g = grad_output * weight.view(1, C, 1, 1)
+        mean_g = g.mean(dim=1, keepdim=True)
+
+        mean_gy = (g * y).mean(dim=1, keepdim=True)
+        gx = 1. / torch.sqrt(var + eps) * (g - y * mean_gy - mean_g)
+        return gx, (grad_output * y).sum(dim=3).sum(dim=2).sum(dim=0), grad_output.sum(dim=3).sum(dim=2).sum(
+            dim=0), None
+
+
+class LayerNorm2d(nn.Module):
+
+    def __init__(self, channels, eps=1e-6):
+        super(LayerNorm2d, self).__init__()
+        self.register_parameter('weight', nn.Parameter(torch.ones(channels)))
+        self.register_parameter('bias', nn.Parameter(torch.zeros(channels)))
+        self.eps = eps
+
+    def forward(self, x):
+        return LayerNormFunction.apply(x, self.weight, self.bias, self.eps)
+
+
+class NAFBlock(nn.Module):
+    def __init__(self, c, DW_Expand=2, FFN_Expand=2, drop_out_rate=0.):
+        super().__init__()
+        dw_channel = c * DW_Expand
+        self.conv1 = nn.Conv2d(in_channels=c, out_channels=dw_channel, kernel_size=1, padding=0, stride=1, groups=1, bias=True)
+        self.conv2 = nn.Conv2d(in_channels=dw_channel, out_channels=dw_channel, kernel_size=3, padding=1, stride=1, groups=dw_channel,
+                               bias=True)
+        self.conv3 = nn.Conv2d(in_channels=dw_channel // 2, out_channels=c, kernel_size=1, padding=0, stride=1, groups=1, bias=True)
+        
+        # Simplified Channel Attention
+        self.sca = nn.Sequential(
+            nn.AdaptiveAvgPool2d(1),
+            nn.Conv2d(in_channels=dw_channel // 2, out_channels=dw_channel // 2, kernel_size=1, padding=0, stride=1,
+                      groups=1, bias=True),
+        )
+
+        # SimpleGate
+        self.sg = SimpleGate()
+
+        ffn_channel = FFN_Expand * c
+        self.conv4 = nn.Conv2d(in_channels=c, out_channels=ffn_channel, kernel_size=1, padding=0, stride=1, groups=1, bias=True)
+        self.conv5 = nn.Conv2d(in_channels=ffn_channel // 2, out_channels=c, kernel_size=1, padding=0, stride=1, groups=1, bias=True)
+
+        self.norm1 = LayerNorm2d(c)
+        self.norm2 = LayerNorm2d(c)
+
+        self.dropout1 = nn.Dropout(drop_out_rate) if drop_out_rate > 0. else nn.Identity()
+        self.dropout2 = nn.Dropout(drop_out_rate) if drop_out_rate > 0. else nn.Identity()
+
+        self.beta = nn.Parameter(torch.zeros((1, c, 1, 1)), requires_grad=True)
+        self.gamma = nn.Parameter(torch.zeros((1, c, 1, 1)), requires_grad=True)
+
+    def forward(self, inp):
+        x = inp
+
+        x = self.norm1(x)
+
+        x = self.conv1(x)
+        x = self.conv2(x)
+        x = self.sg(x)
+        x = x * self.sca(x)
+        x = self.conv3(x)
+
+        x = self.dropout1(x)
+
+        y = inp + x * self.beta
+
+        x = self.conv4(self.norm2(y))
+        x = self.sg(x)
+        x = self.conv5(x)
+
+        x = self.dropout2(x)
+
+        return y + x * self.gamma
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+class DenBlock(nn.Module):
+	""" Definition of the denosing block of FastDVDnet.
+	Inputs of constructor:
+		num_input_frames: int. number of input frames
+	Inputs of forward():
+		xn: input frames of dim [N, C, H, W], (C=3 RGB)
+		noise_map: array with noise map of dim [N, 1, H, W]
+	"""
+
+	def __init__(self, num_input_frames=3):
+		super(DenBlock, self).__init__()
+		self.chs_lyr0 = 32
+		self.chs_lyr1 = 64
+		self.chs_lyr2 = 128
+
+		self.inc = InputCvBlock(num_in_frames=num_input_frames, out_ch=self.chs_lyr0)
+		self.downc0 = DownBlock(in_ch=self.chs_lyr0, out_ch=self.chs_lyr1)
+		self.downc1 = DownBlock(in_ch=self.chs_lyr1, out_ch=self.chs_lyr2)
+		self.upc2 = UpBlock(in_ch=self.chs_lyr2, out_ch=self.chs_lyr1)
+		self.upc1 = UpBlock(in_ch=self.chs_lyr1, out_ch=self.chs_lyr0)
+		self.outc = OutputCvBlock(in_ch=self.chs_lyr0, out_ch=3)
+
+
+	def forward(self, in0, in1, in2, noise_map):
+		'''Args:
+			inX: Tensor, [N, C, H, W] in the [0., 1.] range
+			noise_map: Tensor [N, 1, H, W] in the [0., 1.] range
+		'''
+		# Input convolution block
+		x0 = self.inc(torch.cat((in0, noise_map, in1, noise_map, in2, noise_map), dim=1))
+		# Downsampling
+		x1 = self.downc0(x0)
+		x2 = self.downc1(x1)
+		# Upsampling
+		x2 = self.upc2(x2)
+		x1 = self.upc1(x1+x2)
+		# Estimation
+		x = self.outc(x0+x1)
+
+		# Residual
+		x = in1 - x
+
+		return x
+
+
+class DenBlock_1(nn.Module):
+	""" Definition of the denosing block of FastDVDnet.
+	Inputs of constructor:
+		num_input_frames: int. number of input frames
+	Inputs of forward():
+		xn: input frames of dim [N, C, H, W], (C=3 RGB)
+		noise_map: array with noise map of dim [N, 1, H, W]
+	"""
+
+	def __init__(self, num_input_frames=3):
+		super(DenBlock_1, self).__init__()
+		self.chs_lyr0 = 32
+		self.chs_lyr1 = 64
+		self.chs_lyr2 = 128
+
+		self.inc = InputCvBlock_1(num_in_frames=num_input_frames, out_ch=self.chs_lyr0)
+		self.downc0 = DownBlock_1(in_ch=self.chs_lyr0, out_ch=self.chs_lyr1)
+		self.downc1 = DownBlock_1(in_ch=self.chs_lyr1, out_ch=self.chs_lyr2)
+		self.upc2 = UpBlock_1(in_ch=self.chs_lyr2, out_ch=self.chs_lyr1)
+		self.upc1 = UpBlock_1(in_ch=self.chs_lyr1, out_ch=self.chs_lyr0)
+		self.outc = OutputCvBlock_1(in_ch=self.chs_lyr0, out_ch=3)
+		
+
+	def forward(self, in0, in1, in2, noise_map):
+		'''Args:
+			inX: Tensor, [N, C, H, W] in the [0., 1.] range
+			noise_map: Tensor [N, 1, H, W] in the [0., 1.] range
+		'''
+		# Input convolution block
+		x0 = self.inc(torch.cat((in0, noise_map, in1, noise_map, in2, noise_map), dim=1))
+		# Downsampling
+		x1 = self.downc0(x0)
+		x2 = self.downc1(x1)
+		# Upsampling
+		x2 = self.upc2(x2)
+		x1 = self.upc1(x1+x2)
+		# Estimation
+		x = self.outc(x0+x1)
+
+		# Residual
+		x = in1 - x
+
+		return x
\ No newline at end of file
diff --git a/IIR-Lab/models/archs/dcn/__init__.py b/IIR-Lab/models/archs/dcn/__init__.py
new file mode 100644
index 0000000000000000000000000000000000000000..32e3592f896d61b4127e09d0476381b9d55e32ff
--- /dev/null
+++ b/IIR-Lab/models/archs/dcn/__init__.py
@@ -0,0 +1,7 @@
+from .deform_conv import (DeformConv, DeformConvPack, ModulatedDeformConv, ModulatedDeformConvPack, deform_conv,
+                          modulated_deform_conv)
+
+__all__ = [
+    'DeformConv', 'DeformConvPack', 'ModulatedDeformConv', 'ModulatedDeformConvPack', 'deform_conv',
+    'modulated_deform_conv'
+]
diff --git a/IIR-Lab/models/archs/dcn/__pycache__/__init__.cpython-310.pyc b/IIR-Lab/models/archs/dcn/__pycache__/__init__.cpython-310.pyc
new file mode 100644
index 0000000000000000000000000000000000000000..600f6f26af983d6854c22d8f374cd46857cef71b
Binary files /dev/null and b/IIR-Lab/models/archs/dcn/__pycache__/__init__.cpython-310.pyc differ
diff --git a/IIR-Lab/models/archs/dcn/__pycache__/__init__.cpython-312.pyc b/IIR-Lab/models/archs/dcn/__pycache__/__init__.cpython-312.pyc
new file mode 100644
index 0000000000000000000000000000000000000000..7a9f9f962afa1933f6c50da6b72c71189378bfb4
Binary files /dev/null and b/IIR-Lab/models/archs/dcn/__pycache__/__init__.cpython-312.pyc differ
diff --git a/IIR-Lab/models/archs/dcn/__pycache__/__init__.cpython-38.pyc b/IIR-Lab/models/archs/dcn/__pycache__/__init__.cpython-38.pyc
new file mode 100644
index 0000000000000000000000000000000000000000..663b0797ac2bc7752910b7459b157ce30410c5f7
Binary files /dev/null and b/IIR-Lab/models/archs/dcn/__pycache__/__init__.cpython-38.pyc differ
diff --git a/IIR-Lab/models/archs/dcn/__pycache__/__init__.cpython-39.pyc b/IIR-Lab/models/archs/dcn/__pycache__/__init__.cpython-39.pyc
new file mode 100644
index 0000000000000000000000000000000000000000..b80426dcaa53b083fbc992dd7748e2a5971da7be
Binary files /dev/null and b/IIR-Lab/models/archs/dcn/__pycache__/__init__.cpython-39.pyc differ
diff --git a/IIR-Lab/models/archs/dcn/__pycache__/deform_conv.cpython-310.pyc b/IIR-Lab/models/archs/dcn/__pycache__/deform_conv.cpython-310.pyc
new file mode 100644
index 0000000000000000000000000000000000000000..bf65111b43bc4967962f6109ec0a1fdea2c8adb2
Binary files /dev/null and b/IIR-Lab/models/archs/dcn/__pycache__/deform_conv.cpython-310.pyc differ
diff --git a/IIR-Lab/models/archs/dcn/__pycache__/deform_conv.cpython-312.pyc b/IIR-Lab/models/archs/dcn/__pycache__/deform_conv.cpython-312.pyc
new file mode 100644
index 0000000000000000000000000000000000000000..faff316465cc908db1f0cf6d251b0e1fbfa8e3eb
Binary files /dev/null and b/IIR-Lab/models/archs/dcn/__pycache__/deform_conv.cpython-312.pyc differ
diff --git a/IIR-Lab/models/archs/dcn/__pycache__/deform_conv.cpython-38.pyc b/IIR-Lab/models/archs/dcn/__pycache__/deform_conv.cpython-38.pyc
new file mode 100644
index 0000000000000000000000000000000000000000..991621bdfdc40736c33b8a6e126607e36f62e0a5
Binary files /dev/null and b/IIR-Lab/models/archs/dcn/__pycache__/deform_conv.cpython-38.pyc differ
diff --git a/IIR-Lab/models/archs/dcn/__pycache__/deform_conv.cpython-39.pyc b/IIR-Lab/models/archs/dcn/__pycache__/deform_conv.cpython-39.pyc
new file mode 100644
index 0000000000000000000000000000000000000000..22086ce145867dd3d5ef974adc8d642a05fcddff
Binary files /dev/null and b/IIR-Lab/models/archs/dcn/__pycache__/deform_conv.cpython-39.pyc differ
diff --git a/IIR-Lab/models/archs/dcn/deform_conv.py b/IIR-Lab/models/archs/dcn/deform_conv.py
new file mode 100644
index 0000000000000000000000000000000000000000..6268ca825d59ef4a30d4d2156c4438cbbe9b3c1e
--- /dev/null
+++ b/IIR-Lab/models/archs/dcn/deform_conv.py
@@ -0,0 +1,379 @@
+import math
+import os
+import torch
+from torch import nn as nn
+from torch.autograd import Function
+from torch.autograd.function import once_differentiable
+from torch.nn import functional as F
+from torch.nn.modules.utils import _pair, _single
+
+BASICSR_JIT = os.getenv('BASICSR_JIT')
+if BASICSR_JIT == 'True':
+    from torch.utils.cpp_extension import load
+    module_path = os.path.dirname(__file__)
+    deform_conv_ext = load(
+        'deform_conv',
+        sources=[
+            os.path.join(module_path, 'src', 'deform_conv_ext.cpp'),
+            os.path.join(module_path, 'src', 'deform_conv_cuda.cpp'),
+            os.path.join(module_path, 'src', 'deform_conv_cuda_kernel.cu'),
+        ],
+    )
+else:
+    try:
+        from . import deform_conv_ext
+    except ImportError:
+        pass
+        # avoid annoying print output
+        # print(f'Cannot import deform_conv_ext. Error: {error}. You may need to: \n '
+        #       '1. compile with BASICSR_EXT=True. or\n '
+        #       '2. set BASICSR_JIT=True during running')
+
+
+class DeformConvFunction(Function):
+
+    @staticmethod
+    def forward(ctx,
+                input,
+                offset,
+                weight,
+                stride=1,
+                padding=0,
+                dilation=1,
+                groups=1,
+                deformable_groups=1,
+                im2col_step=64):
+        if input is not None and input.dim() != 4:
+            raise ValueError(f'Expected 4D tensor as input, got {input.dim()}D tensor instead.')
+        ctx.stride = _pair(stride)
+        ctx.padding = _pair(padding)
+        ctx.dilation = _pair(dilation)
+        ctx.groups = groups
+        ctx.deformable_groups = deformable_groups
+        ctx.im2col_step = im2col_step
+
+        ctx.save_for_backward(input, offset, weight)
+
+        output = input.new_empty(DeformConvFunction._output_size(input, weight, ctx.padding, ctx.dilation, ctx.stride))
+
+        ctx.bufs_ = [input.new_empty(0), input.new_empty(0)]  # columns, ones
+
+        if not input.is_cuda:
+            raise NotImplementedError
+        else:
+            cur_im2col_step = min(ctx.im2col_step, input.shape[0])
+            assert (input.shape[0] % cur_im2col_step) == 0, 'im2col step must divide batchsize'
+            deform_conv_ext.deform_conv_forward(input, weight,
+                                                offset, output, ctx.bufs_[0], ctx.bufs_[1], weight.size(3),
+                                                weight.size(2), ctx.stride[1], ctx.stride[0], ctx.padding[1],
+                                                ctx.padding[0], ctx.dilation[1], ctx.dilation[0], ctx.groups,
+                                                ctx.deformable_groups, cur_im2col_step)
+        return output
+
+    @staticmethod
+    @once_differentiable
+    def backward(ctx, grad_output):
+        input, offset, weight = ctx.saved_tensors
+
+        grad_input = grad_offset = grad_weight = None
+
+        if not grad_output.is_cuda:
+            raise NotImplementedError
+        else:
+            cur_im2col_step = min(ctx.im2col_step, input.shape[0])
+            assert (input.shape[0] % cur_im2col_step) == 0, 'im2col step must divide batchsize'
+
+            if ctx.needs_input_grad[0] or ctx.needs_input_grad[1]:
+                grad_input = torch.zeros_like(input)
+                grad_offset = torch.zeros_like(offset)
+                deform_conv_ext.deform_conv_backward_input(input, offset, grad_output, grad_input,
+                                                           grad_offset, weight, ctx.bufs_[0], weight.size(3),
+                                                           weight.size(2), ctx.stride[1], ctx.stride[0], ctx.padding[1],
+                                                           ctx.padding[0], ctx.dilation[1], ctx.dilation[0], ctx.groups,
+                                                           ctx.deformable_groups, cur_im2col_step)
+
+            if ctx.needs_input_grad[2]:
+                grad_weight = torch.zeros_like(weight)
+                deform_conv_ext.deform_conv_backward_parameters(input, offset, grad_output, grad_weight,
+                                                                ctx.bufs_[0], ctx.bufs_[1], weight.size(3),
+                                                                weight.size(2), ctx.stride[1], ctx.stride[0],
+                                                                ctx.padding[1], ctx.padding[0], ctx.dilation[1],
+                                                                ctx.dilation[0], ctx.groups, ctx.deformable_groups, 1,
+                                                                cur_im2col_step)
+
+        return (grad_input, grad_offset, grad_weight, None, None, None, None, None)
+
+    @staticmethod
+    def _output_size(input, weight, padding, dilation, stride):
+        channels = weight.size(0)
+        output_size = (input.size(0), channels)
+        for d in range(input.dim() - 2):
+            in_size = input.size(d + 2)
+            pad = padding[d]
+            kernel = dilation[d] * (weight.size(d + 2) - 1) + 1
+            stride_ = stride[d]
+            output_size += ((in_size + (2 * pad) - kernel) // stride_ + 1, )
+        if not all(map(lambda s: s > 0, output_size)):
+            raise ValueError(f'convolution input is too small (output would be {"x".join(map(str, output_size))})')
+        return output_size
+
+
+class ModulatedDeformConvFunction(Function):
+
+    @staticmethod
+    def forward(ctx,
+                input,
+                offset,
+                mask,
+                weight,
+                bias=None,
+                stride=1,
+                padding=0,
+                dilation=1,
+                groups=1,
+                deformable_groups=1):
+        ctx.stride = stride
+        ctx.padding = padding
+        ctx.dilation = dilation
+        ctx.groups = groups
+        ctx.deformable_groups = deformable_groups
+        ctx.with_bias = bias is not None
+        if not ctx.with_bias:
+            bias = input.new_empty(1)  # fake tensor
+        if not input.is_cuda:
+            raise NotImplementedError
+        if weight.requires_grad or mask.requires_grad or offset.requires_grad or input.requires_grad:
+            ctx.save_for_backward(input, offset, mask, weight, bias)
+        output = input.new_empty(ModulatedDeformConvFunction._infer_shape(ctx, input, weight))
+        ctx._bufs = [input.new_empty(0), input.new_empty(0)]
+        deform_conv_ext.modulated_deform_conv_forward(input, weight, bias, ctx._bufs[0], offset, mask, output,
+                                                      ctx._bufs[1], weight.shape[2], weight.shape[3], ctx.stride,
+                                                      ctx.stride, ctx.padding, ctx.padding, ctx.dilation, ctx.dilation,
+                                                      ctx.groups, ctx.deformable_groups, ctx.with_bias)
+        return output
+
+    @staticmethod
+    @once_differentiable
+    def backward(ctx, grad_output):
+        if not grad_output.is_cuda:
+            raise NotImplementedError
+        input, offset, mask, weight, bias = ctx.saved_tensors
+        grad_input = torch.zeros_like(input)
+        grad_offset = torch.zeros_like(offset)
+        grad_mask = torch.zeros_like(mask)
+        grad_weight = torch.zeros_like(weight)
+        grad_bias = torch.zeros_like(bias)
+        deform_conv_ext.modulated_deform_conv_backward(input, weight, bias, ctx._bufs[0], offset, mask, ctx._bufs[1],
+                                                       grad_input, grad_weight, grad_bias, grad_offset, grad_mask,
+                                                       grad_output, weight.shape[2], weight.shape[3], ctx.stride,
+                                                       ctx.stride, ctx.padding, ctx.padding, ctx.dilation, ctx.dilation,
+                                                       ctx.groups, ctx.deformable_groups, ctx.with_bias)
+        if not ctx.with_bias:
+            grad_bias = None
+
+        return (grad_input, grad_offset, grad_mask, grad_weight, grad_bias, None, None, None, None, None)
+
+    @staticmethod
+    def _infer_shape(ctx, input, weight):
+        n = input.size(0)
+        channels_out = weight.size(0)
+        height, width = input.shape[2:4]
+        kernel_h, kernel_w = weight.shape[2:4]
+        height_out = (height + 2 * ctx.padding - (ctx.dilation * (kernel_h - 1) + 1)) // ctx.stride + 1
+        width_out = (width + 2 * ctx.padding - (ctx.dilation * (kernel_w - 1) + 1)) // ctx.stride + 1
+        return n, channels_out, height_out, width_out
+
+
+deform_conv = DeformConvFunction.apply
+modulated_deform_conv = ModulatedDeformConvFunction.apply
+
+
+class DeformConv(nn.Module):
+
+    def __init__(self,
+                 in_channels,
+                 out_channels,
+                 kernel_size,
+                 stride=1,
+                 padding=0,
+                 dilation=1,
+                 groups=1,
+                 deformable_groups=1,
+                 bias=False):
+        super(DeformConv, self).__init__()
+
+        assert not bias
+        assert in_channels % groups == 0, f'in_channels {in_channels} is not divisible by groups {groups}'
+        assert out_channels % groups == 0, f'out_channels {out_channels} is not divisible by groups {groups}'
+
+        self.in_channels = in_channels
+        self.out_channels = out_channels
+        self.kernel_size = _pair(kernel_size)
+        self.stride = _pair(stride)
+        self.padding = _pair(padding)
+        self.dilation = _pair(dilation)
+        self.groups = groups
+        self.deformable_groups = deformable_groups
+        # enable compatibility with nn.Conv2d
+        self.transposed = False
+        self.output_padding = _single(0)
+
+        self.weight = nn.Parameter(torch.Tensor(out_channels, in_channels // self.groups, *self.kernel_size))
+
+        self.reset_parameters()
+
+    def reset_parameters(self):
+        n = self.in_channels
+        for k in self.kernel_size:
+            n *= k
+        stdv = 1. / math.sqrt(n)
+        self.weight.data.uniform_(-stdv, stdv)
+
+    def forward(self, x, offset):
+        # To fix an assert error in deform_conv_cuda.cpp:128
+        # input image is smaller than kernel
+        input_pad = (x.size(2) < self.kernel_size[0] or x.size(3) < self.kernel_size[1])
+        if input_pad:
+            pad_h = max(self.kernel_size[0] - x.size(2), 0)
+            pad_w = max(self.kernel_size[1] - x.size(3), 0)
+            x = F.pad(x, (0, pad_w, 0, pad_h), 'constant', 0).contiguous()
+            offset = F.pad(offset, (0, pad_w, 0, pad_h), 'constant', 0).contiguous()
+        out = deform_conv(x, offset, self.weight, self.stride, self.padding, self.dilation, self.groups,
+                          self.deformable_groups)
+        if input_pad:
+            out = out[:, :, :out.size(2) - pad_h, :out.size(3) - pad_w].contiguous()
+        return out
+
+
+class DeformConvPack(DeformConv):
+    """A Deformable Conv Encapsulation that acts as normal Conv layers.
+
+    Args:
+        in_channels (int): Same as nn.Conv2d.
+        out_channels (int): Same as nn.Conv2d.
+        kernel_size (int or tuple[int]): Same as nn.Conv2d.
+        stride (int or tuple[int]): Same as nn.Conv2d.
+        padding (int or tuple[int]): Same as nn.Conv2d.
+        dilation (int or tuple[int]): Same as nn.Conv2d.
+        groups (int): Same as nn.Conv2d.
+        bias (bool or str): If specified as `auto`, it will be decided by the
+            norm_cfg. Bias will be set as True if norm_cfg is None, otherwise
+            False.
+    """
+
+    _version = 2
+
+    def __init__(self, *args, **kwargs):
+        super(DeformConvPack, self).__init__(*args, **kwargs)
+
+        self.conv_offset = nn.Conv2d(
+            self.in_channels,
+            self.deformable_groups * 2 * self.kernel_size[0] * self.kernel_size[1],
+            kernel_size=self.kernel_size,
+            stride=_pair(self.stride),
+            padding=_pair(self.padding),
+            dilation=_pair(self.dilation),
+            bias=True)
+        self.init_offset()
+
+    def init_offset(self):
+        self.conv_offset.weight.data.zero_()
+        self.conv_offset.bias.data.zero_()
+
+    def forward(self, x):
+        offset = self.conv_offset(x)
+        return deform_conv(x, offset, self.weight, self.stride, self.padding, self.dilation, self.groups,
+                           self.deformable_groups)
+
+
+class ModulatedDeformConv(nn.Module):
+
+    def __init__(self,
+                 in_channels,
+                 out_channels,
+                 kernel_size,
+                 stride=1,
+                 padding=0,
+                 dilation=1,
+                 groups=1,
+                 deformable_groups=1,
+                 bias=True):
+        super(ModulatedDeformConv, self).__init__()
+        self.in_channels = in_channels
+        self.out_channels = out_channels
+        self.kernel_size = _pair(kernel_size)
+        self.stride = stride
+        self.padding = padding
+        self.dilation = dilation
+        self.groups = groups
+        self.deformable_groups = deformable_groups
+        self.with_bias = bias
+        # enable compatibility with nn.Conv2d
+        self.transposed = False
+        self.output_padding = _single(0)
+
+        self.weight = nn.Parameter(torch.Tensor(out_channels, in_channels // groups, *self.kernel_size))
+        if bias:
+            self.bias = nn.Parameter(torch.Tensor(out_channels))
+        else:
+            self.register_parameter('bias', None)
+        self.init_weights()
+
+    def init_weights(self):
+        n = self.in_channels
+        for k in self.kernel_size:
+            n *= k
+        stdv = 1. / math.sqrt(n)
+        self.weight.data.uniform_(-stdv, stdv)
+        if self.bias is not None:
+            self.bias.data.zero_()
+
+    def forward(self, x, offset, mask):
+        return modulated_deform_conv(x, offset, mask, self.weight, self.bias, self.stride, self.padding, self.dilation,
+                                     self.groups, self.deformable_groups)
+
+
+class ModulatedDeformConvPack(ModulatedDeformConv):
+    """A ModulatedDeformable Conv Encapsulation that acts as normal Conv layers.
+
+    Args:
+        in_channels (int): Same as nn.Conv2d.
+        out_channels (int): Same as nn.Conv2d.
+        kernel_size (int or tuple[int]): Same as nn.Conv2d.
+        stride (int or tuple[int]): Same as nn.Conv2d.
+        padding (int or tuple[int]): Same as nn.Conv2d.
+        dilation (int or tuple[int]): Same as nn.Conv2d.
+        groups (int): Same as nn.Conv2d.
+        bias (bool or str): If specified as `auto`, it will be decided by the
+            norm_cfg. Bias will be set as True if norm_cfg is None, otherwise
+            False.
+    """
+
+    _version = 2
+
+    def __init__(self, *args, **kwargs):
+        super(ModulatedDeformConvPack, self).__init__(*args, **kwargs)
+
+        self.conv_offset = nn.Conv2d(
+            self.in_channels,
+            self.deformable_groups * 3 * self.kernel_size[0] * self.kernel_size[1],
+            kernel_size=self.kernel_size,
+            stride=_pair(self.stride),
+            padding=_pair(self.padding),
+            dilation=_pair(self.dilation),
+            bias=True)
+        self.init_weights()
+
+    def init_weights(self):
+        super(ModulatedDeformConvPack, self).init_weights()
+        if hasattr(self, 'conv_offset'):
+            self.conv_offset.weight.data.zero_()
+            self.conv_offset.bias.data.zero_()
+
+    def forward(self, x):
+        out = self.conv_offset(x)
+        o1, o2, mask = torch.chunk(out, 3, dim=1)
+        offset = torch.cat((o1, o2), dim=1)
+        mask = torch.sigmoid(mask)
+        return modulated_deform_conv(x, offset, mask, self.weight, self.bias, self.stride, self.padding, self.dilation,
+                                     self.groups, self.deformable_groups)
diff --git a/IIR-Lab/models/archs/dcn/src/deform_conv_cuda.cpp b/IIR-Lab/models/archs/dcn/src/deform_conv_cuda.cpp
new file mode 100644
index 0000000000000000000000000000000000000000..b465c493a3dd67d320b7a8997fbd501d2f89c807
--- /dev/null
+++ b/IIR-Lab/models/archs/dcn/src/deform_conv_cuda.cpp
@@ -0,0 +1,685 @@
+// modify from
+// https://github.com/chengdazhi/Deformable-Convolution-V2-PyTorch/blob/mmdetection/mmdet/ops/dcn/src/deform_conv_cuda.c
+
+#include <torch/extension.h>
+#include <ATen/DeviceGuard.h>
+
+#include <cmath>
+#include <vector>
+
+void deformable_im2col(const at::Tensor data_im, const at::Tensor data_offset,
+                       const int channels, const int height, const int width,
+                       const int ksize_h, const int ksize_w, const int pad_h,
+                       const int pad_w, const int stride_h, const int stride_w,
+                       const int dilation_h, const int dilation_w,
+                       const int parallel_imgs, const int deformable_group,
+                       at::Tensor data_col);
+
+void deformable_col2im(const at::Tensor data_col, const at::Tensor data_offset,
+                       const int channels, const int height, const int width,
+                       const int ksize_h, const int ksize_w, const int pad_h,
+                       const int pad_w, const int stride_h, const int stride_w,
+                       const int dilation_h, const int dilation_w,
+                       const int parallel_imgs, const int deformable_group,
+                       at::Tensor grad_im);
+
+void deformable_col2im_coord(
+    const at::Tensor data_col, const at::Tensor data_im,
+    const at::Tensor data_offset, const int channels, const int height,
+    const int width, const int ksize_h, const int ksize_w, const int pad_h,
+    const int pad_w, const int stride_h, const int stride_w,
+    const int dilation_h, const int dilation_w, const int parallel_imgs,
+    const int deformable_group, at::Tensor grad_offset);
+
+void modulated_deformable_im2col_cuda(
+    const at::Tensor data_im, const at::Tensor data_offset,
+    const at::Tensor data_mask, const int batch_size, const int channels,
+    const int height_im, const int width_im, const int height_col,
+    const int width_col, const int kernel_h, const int kenerl_w,
+    const int pad_h, const int pad_w, const int stride_h, const int stride_w,
+    const int dilation_h, const int dilation_w, const int deformable_group,
+    at::Tensor data_col);
+
+void modulated_deformable_col2im_cuda(
+    const at::Tensor data_col, const at::Tensor data_offset,
+    const at::Tensor data_mask, const int batch_size, const int channels,
+    const int height_im, const int width_im, const int height_col,
+    const int width_col, const int kernel_h, const int kenerl_w,
+    const int pad_h, const int pad_w, const int stride_h, const int stride_w,
+    const int dilation_h, const int dilation_w, const int deformable_group,
+    at::Tensor grad_im);
+
+void modulated_deformable_col2im_coord_cuda(
+    const at::Tensor data_col, const at::Tensor data_im,
+    const at::Tensor data_offset, const at::Tensor data_mask,
+    const int batch_size, const int channels, const int height_im,
+    const int width_im, const int height_col, const int width_col,
+    const int kernel_h, const int kenerl_w, const int pad_h, const int pad_w,
+    const int stride_h, const int stride_w, const int dilation_h,
+    const int dilation_w, const int deformable_group, at::Tensor grad_offset,
+    at::Tensor grad_mask);
+
+void shape_check(at::Tensor input, at::Tensor offset, at::Tensor *gradOutput,
+                 at::Tensor weight, int kH, int kW, int dH, int dW, int padH,
+                 int padW, int dilationH, int dilationW, int group,
+                 int deformable_group) {
+  TORCH_CHECK(weight.ndimension() == 4,
+           "4D weight tensor (nOutputPlane,nInputPlane,kH,kW) expected, "
+           "but got: %s",
+           weight.ndimension());
+
+  TORCH_CHECK(weight.is_contiguous(), "weight tensor has to be contiguous");
+
+  TORCH_CHECK(kW > 0 && kH > 0,
+           "kernel size should be greater than zero, but got kH: %d kW: %d", kH,
+           kW);
+
+  TORCH_CHECK((weight.size(2) == kH && weight.size(3) == kW),
+           "kernel size should be consistent with weight, ",
+           "but got kH: %d kW: %d weight.size(2): %d, weight.size(3): %d", kH,
+           kW, weight.size(2), weight.size(3));
+
+  TORCH_CHECK(dW > 0 && dH > 0,
+           "stride should be greater than zero, but got dH: %d dW: %d", dH, dW);
+
+  TORCH_CHECK(
+      dilationW > 0 && dilationH > 0,
+      "dilation should be greater than 0, but got dilationH: %d dilationW: %d",
+      dilationH, dilationW);
+
+  int ndim = input.ndimension();
+  int dimf = 0;
+  int dimh = 1;
+  int dimw = 2;
+
+  if (ndim == 4) {
+    dimf++;
+    dimh++;
+    dimw++;
+  }
+
+  TORCH_CHECK(ndim == 3 || ndim == 4, "3D or 4D input tensor expected but got: %s",
+           ndim);
+
+  long nInputPlane = weight.size(1) * group;
+  long inputHeight = input.size(dimh);
+  long inputWidth = input.size(dimw);
+  long nOutputPlane = weight.size(0);
+  long outputHeight =
+      (inputHeight + 2 * padH - (dilationH * (kH - 1) + 1)) / dH + 1;
+  long outputWidth =
+      (inputWidth + 2 * padW - (dilationW * (kW - 1) + 1)) / dW + 1;
+
+  TORCH_CHECK(nInputPlane % deformable_group == 0,
+           "input channels must divide deformable group size");
+
+  if (outputWidth < 1 || outputHeight < 1)
+    AT_ERROR(
+        "Given input size: (%ld x %ld x %ld). "
+        "Calculated output size: (%ld x %ld x %ld). Output size is too small",
+        nInputPlane, inputHeight, inputWidth, nOutputPlane, outputHeight,
+        outputWidth);
+
+  TORCH_CHECK(input.size(1) == nInputPlane,
+           "invalid number of input planes, expected: %d, but got: %d",
+           nInputPlane, input.size(1));
+
+  TORCH_CHECK((inputHeight >= kH && inputWidth >= kW),
+           "input image is smaller than kernel");
+
+  TORCH_CHECK((offset.size(2) == outputHeight && offset.size(3) == outputWidth),
+           "invalid spatial size of offset, expected height: %d width: %d, but "
+           "got height: %d width: %d",
+           outputHeight, outputWidth, offset.size(2), offset.size(3));
+
+  TORCH_CHECK((offset.size(1) == deformable_group * 2 * kH * kW),
+           "invalid number of channels of offset");
+
+  if (gradOutput != NULL) {
+    TORCH_CHECK(gradOutput->size(dimf) == nOutputPlane,
+             "invalid number of gradOutput planes, expected: %d, but got: %d",
+             nOutputPlane, gradOutput->size(dimf));
+
+    TORCH_CHECK((gradOutput->size(dimh) == outputHeight &&
+              gradOutput->size(dimw) == outputWidth),
+             "invalid size of gradOutput, expected height: %d width: %d , but "
+             "got height: %d width: %d",
+             outputHeight, outputWidth, gradOutput->size(dimh),
+             gradOutput->size(dimw));
+  }
+}
+
+int deform_conv_forward_cuda(at::Tensor input, at::Tensor weight,
+                             at::Tensor offset, at::Tensor output,
+                             at::Tensor columns, at::Tensor ones, int kW,
+                             int kH, int dW, int dH, int padW, int padH,
+                             int dilationW, int dilationH, int group,
+                             int deformable_group, int im2col_step) {
+  // todo: resize columns to include im2col: done
+  // todo: add im2col_step as input
+  // todo: add new output buffer and transpose it to output (or directly
+  // transpose output) todo: possibly change data indexing because of
+  // parallel_imgs
+
+  shape_check(input, offset, NULL, weight, kH, kW, dH, dW, padH, padW,
+              dilationH, dilationW, group, deformable_group);
+  at::DeviceGuard guard(input.device());
+
+  input = input.contiguous();
+  offset = offset.contiguous();
+  weight = weight.contiguous();
+
+  int batch = 1;
+  if (input.ndimension() == 3) {
+    // Force batch
+    batch = 0;
+    input.unsqueeze_(0);
+    offset.unsqueeze_(0);
+  }
+
+  // todo: assert batchsize dividable by im2col_step
+
+  long batchSize = input.size(0);
+  long nInputPlane = input.size(1);
+  long inputHeight = input.size(2);
+  long inputWidth = input.size(3);
+
+  long nOutputPlane = weight.size(0);
+
+  long outputWidth =
+      (inputWidth + 2 * padW - (dilationW * (kW - 1) + 1)) / dW + 1;
+  long outputHeight =
+      (inputHeight + 2 * padH - (dilationH * (kH - 1) + 1)) / dH + 1;
+
+  TORCH_CHECK((offset.size(0) == batchSize), "invalid batch size of offset");
+
+  output = output.view({batchSize / im2col_step, im2col_step, nOutputPlane,
+                        outputHeight, outputWidth});
+  columns = at::zeros(
+      {nInputPlane * kW * kH, im2col_step * outputHeight * outputWidth},
+      input.options());
+
+  if (ones.ndimension() != 2 ||
+      ones.size(0) * ones.size(1) < outputHeight * outputWidth) {
+    ones = at::ones({outputHeight, outputWidth}, input.options());
+  }
+
+  input = input.view({batchSize / im2col_step, im2col_step, nInputPlane,
+                      inputHeight, inputWidth});
+  offset =
+      offset.view({batchSize / im2col_step, im2col_step,
+                   deformable_group * 2 * kH * kW, outputHeight, outputWidth});
+
+  at::Tensor output_buffer =
+      at::zeros({batchSize / im2col_step, nOutputPlane,
+                 im2col_step * outputHeight, outputWidth},
+                output.options());
+
+  output_buffer = output_buffer.view(
+      {output_buffer.size(0), group, output_buffer.size(1) / group,
+       output_buffer.size(2), output_buffer.size(3)});
+
+  for (int elt = 0; elt < batchSize / im2col_step; elt++) {
+    deformable_im2col(input[elt], offset[elt], nInputPlane, inputHeight,
+                      inputWidth, kH, kW, padH, padW, dH, dW, dilationH,
+                      dilationW, im2col_step, deformable_group, columns);
+
+    columns = columns.view({group, columns.size(0) / group, columns.size(1)});
+    weight = weight.view({group, weight.size(0) / group, weight.size(1),
+                          weight.size(2), weight.size(3)});
+
+    for (int g = 0; g < group; g++) {
+      output_buffer[elt][g] = output_buffer[elt][g]
+                                  .flatten(1)
+                                  .addmm_(weight[g].flatten(1), columns[g])
+                                  .view_as(output_buffer[elt][g]);
+    }
+  }
+
+  output_buffer = output_buffer.view(
+      {output_buffer.size(0), output_buffer.size(1) * output_buffer.size(2),
+       output_buffer.size(3), output_buffer.size(4)});
+
+  output_buffer = output_buffer.view({batchSize / im2col_step, nOutputPlane,
+                                      im2col_step, outputHeight, outputWidth});
+  output_buffer.transpose_(1, 2);
+  output.copy_(output_buffer);
+  output = output.view({batchSize, nOutputPlane, outputHeight, outputWidth});
+
+  input = input.view({batchSize, nInputPlane, inputHeight, inputWidth});
+  offset = offset.view(
+      {batchSize, deformable_group * 2 * kH * kW, outputHeight, outputWidth});
+
+  if (batch == 0) {
+    output = output.view({nOutputPlane, outputHeight, outputWidth});
+    input = input.view({nInputPlane, inputHeight, inputWidth});
+    offset = offset.view({offset.size(1), offset.size(2), offset.size(3)});
+  }
+
+  return 1;
+}
+
+int deform_conv_backward_input_cuda(at::Tensor input, at::Tensor offset,
+                                    at::Tensor gradOutput, at::Tensor gradInput,
+                                    at::Tensor gradOffset, at::Tensor weight,
+                                    at::Tensor columns, int kW, int kH, int dW,
+                                    int dH, int padW, int padH, int dilationW,
+                                    int dilationH, int group,
+                                    int deformable_group, int im2col_step) {
+  shape_check(input, offset, &gradOutput, weight, kH, kW, dH, dW, padH, padW,
+              dilationH, dilationW, group, deformable_group);
+  at::DeviceGuard guard(input.device());
+
+  input = input.contiguous();
+  offset = offset.contiguous();
+  gradOutput = gradOutput.contiguous();
+  weight = weight.contiguous();
+
+  int batch = 1;
+
+  if (input.ndimension() == 3) {
+    // Force batch
+    batch = 0;
+    input = input.view({1, input.size(0), input.size(1), input.size(2)});
+    offset = offset.view({1, offset.size(0), offset.size(1), offset.size(2)});
+    gradOutput = gradOutput.view(
+        {1, gradOutput.size(0), gradOutput.size(1), gradOutput.size(2)});
+  }
+
+  long batchSize = input.size(0);
+  long nInputPlane = input.size(1);
+  long inputHeight = input.size(2);
+  long inputWidth = input.size(3);
+
+  long nOutputPlane = weight.size(0);
+
+  long outputWidth =
+      (inputWidth + 2 * padW - (dilationW * (kW - 1) + 1)) / dW + 1;
+  long outputHeight =
+      (inputHeight + 2 * padH - (dilationH * (kH - 1) + 1)) / dH + 1;
+
+  TORCH_CHECK((offset.size(0) == batchSize), 3, "invalid batch size of offset");
+  gradInput = gradInput.view({batchSize, nInputPlane, inputHeight, inputWidth});
+  columns = at::zeros(
+      {nInputPlane * kW * kH, im2col_step * outputHeight * outputWidth},
+      input.options());
+
+  // change order of grad output
+  gradOutput = gradOutput.view({batchSize / im2col_step, im2col_step,
+                                nOutputPlane, outputHeight, outputWidth});
+  gradOutput.transpose_(1, 2);
+
+  gradInput = gradInput.view({batchSize / im2col_step, im2col_step, nInputPlane,
+                              inputHeight, inputWidth});
+  input = input.view({batchSize / im2col_step, im2col_step, nInputPlane,
+                      inputHeight, inputWidth});
+  gradOffset = gradOffset.view({batchSize / im2col_step, im2col_step,
+                                deformable_group * 2 * kH * kW, outputHeight,
+                                outputWidth});
+  offset =
+      offset.view({batchSize / im2col_step, im2col_step,
+                   deformable_group * 2 * kH * kW, outputHeight, outputWidth});
+
+  for (int elt = 0; elt < batchSize / im2col_step; elt++) {
+    // divide into groups
+    columns = columns.view({group, columns.size(0) / group, columns.size(1)});
+    weight = weight.view({group, weight.size(0) / group, weight.size(1),
+                          weight.size(2), weight.size(3)});
+    gradOutput = gradOutput.view(
+        {gradOutput.size(0), group, gradOutput.size(1) / group,
+         gradOutput.size(2), gradOutput.size(3), gradOutput.size(4)});
+
+    for (int g = 0; g < group; g++) {
+      columns[g] = columns[g].addmm_(weight[g].flatten(1).transpose(0, 1),
+                                     gradOutput[elt][g].flatten(1), 0.0f, 1.0f);
+    }
+
+    columns =
+        columns.view({columns.size(0) * columns.size(1), columns.size(2)});
+    gradOutput = gradOutput.view(
+        {gradOutput.size(0), gradOutput.size(1) * gradOutput.size(2),
+         gradOutput.size(3), gradOutput.size(4), gradOutput.size(5)});
+
+    deformable_col2im_coord(columns, input[elt], offset[elt], nInputPlane,
+                            inputHeight, inputWidth, kH, kW, padH, padW, dH, dW,
+                            dilationH, dilationW, im2col_step, deformable_group,
+                            gradOffset[elt]);
+
+    deformable_col2im(columns, offset[elt], nInputPlane, inputHeight,
+                      inputWidth, kH, kW, padH, padW, dH, dW, dilationH,
+                      dilationW, im2col_step, deformable_group, gradInput[elt]);
+  }
+
+  gradOutput.transpose_(1, 2);
+  gradOutput =
+      gradOutput.view({batchSize, nOutputPlane, outputHeight, outputWidth});
+
+  gradInput = gradInput.view({batchSize, nInputPlane, inputHeight, inputWidth});
+  input = input.view({batchSize, nInputPlane, inputHeight, inputWidth});
+  gradOffset = gradOffset.view(
+      {batchSize, deformable_group * 2 * kH * kW, outputHeight, outputWidth});
+  offset = offset.view(
+      {batchSize, deformable_group * 2 * kH * kW, outputHeight, outputWidth});
+
+  if (batch == 0) {
+    gradOutput = gradOutput.view({nOutputPlane, outputHeight, outputWidth});
+    input = input.view({nInputPlane, inputHeight, inputWidth});
+    gradInput = gradInput.view({nInputPlane, inputHeight, inputWidth});
+    offset = offset.view({offset.size(1), offset.size(2), offset.size(3)});
+    gradOffset =
+        gradOffset.view({offset.size(1), offset.size(2), offset.size(3)});
+  }
+
+  return 1;
+}
+
+int deform_conv_backward_parameters_cuda(
+    at::Tensor input, at::Tensor offset, at::Tensor gradOutput,
+    at::Tensor gradWeight,  // at::Tensor gradBias,
+    at::Tensor columns, at::Tensor ones, int kW, int kH, int dW, int dH,
+    int padW, int padH, int dilationW, int dilationH, int group,
+    int deformable_group, float scale, int im2col_step) {
+  // todo: transpose and reshape outGrad
+  // todo: reshape columns
+  // todo: add im2col_step as input
+
+  shape_check(input, offset, &gradOutput, gradWeight, kH, kW, dH, dW, padH,
+              padW, dilationH, dilationW, group, deformable_group);
+  at::DeviceGuard guard(input.device());
+
+  input = input.contiguous();
+  offset = offset.contiguous();
+  gradOutput = gradOutput.contiguous();
+
+  int batch = 1;
+
+  if (input.ndimension() == 3) {
+    // Force batch
+    batch = 0;
+    input = input.view(
+        at::IntList({1, input.size(0), input.size(1), input.size(2)}));
+    gradOutput = gradOutput.view(
+        {1, gradOutput.size(0), gradOutput.size(1), gradOutput.size(2)});
+  }
+
+  long batchSize = input.size(0);
+  long nInputPlane = input.size(1);
+  long inputHeight = input.size(2);
+  long inputWidth = input.size(3);
+
+  long nOutputPlane = gradWeight.size(0);
+
+  long outputWidth =
+      (inputWidth + 2 * padW - (dilationW * (kW - 1) + 1)) / dW + 1;
+  long outputHeight =
+      (inputHeight + 2 * padH - (dilationH * (kH - 1) + 1)) / dH + 1;
+
+  TORCH_CHECK((offset.size(0) == batchSize), "invalid batch size of offset");
+
+  columns = at::zeros(
+      {nInputPlane * kW * kH, im2col_step * outputHeight * outputWidth},
+      input.options());
+
+  gradOutput = gradOutput.view({batchSize / im2col_step, im2col_step,
+                                nOutputPlane, outputHeight, outputWidth});
+  gradOutput.transpose_(1, 2);
+
+  at::Tensor gradOutputBuffer = at::zeros_like(gradOutput);
+  gradOutputBuffer =
+      gradOutputBuffer.view({batchSize / im2col_step, nOutputPlane, im2col_step,
+                             outputHeight, outputWidth});
+  gradOutputBuffer.copy_(gradOutput);
+  gradOutputBuffer =
+      gradOutputBuffer.view({batchSize / im2col_step, nOutputPlane,
+                             im2col_step * outputHeight, outputWidth});
+
+  gradOutput.transpose_(1, 2);
+  gradOutput =
+      gradOutput.view({batchSize, nOutputPlane, outputHeight, outputWidth});
+
+  input = input.view({batchSize / im2col_step, im2col_step, nInputPlane,
+                      inputHeight, inputWidth});
+  offset =
+      offset.view({batchSize / im2col_step, im2col_step,
+                   deformable_group * 2 * kH * kW, outputHeight, outputWidth});
+
+  for (int elt = 0; elt < batchSize / im2col_step; elt++) {
+    deformable_im2col(input[elt], offset[elt], nInputPlane, inputHeight,
+                      inputWidth, kH, kW, padH, padW, dH, dW, dilationH,
+                      dilationW, im2col_step, deformable_group, columns);
+
+    // divide into group
+    gradOutputBuffer = gradOutputBuffer.view(
+        {gradOutputBuffer.size(0), group, gradOutputBuffer.size(1) / group,
+         gradOutputBuffer.size(2), gradOutputBuffer.size(3)});
+    columns = columns.view({group, columns.size(0) / group, columns.size(1)});
+    gradWeight =
+        gradWeight.view({group, gradWeight.size(0) / group, gradWeight.size(1),
+                         gradWeight.size(2), gradWeight.size(3)});
+
+    for (int g = 0; g < group; g++) {
+      gradWeight[g] = gradWeight[g]
+                          .flatten(1)
+                          .addmm_(gradOutputBuffer[elt][g].flatten(1),
+                                  columns[g].transpose(1, 0), 1.0, scale)
+                          .view_as(gradWeight[g]);
+    }
+    gradOutputBuffer = gradOutputBuffer.view(
+        {gradOutputBuffer.size(0),
+         gradOutputBuffer.size(1) * gradOutputBuffer.size(2),
+         gradOutputBuffer.size(3), gradOutputBuffer.size(4)});
+    columns =
+        columns.view({columns.size(0) * columns.size(1), columns.size(2)});
+    gradWeight = gradWeight.view({gradWeight.size(0) * gradWeight.size(1),
+                                  gradWeight.size(2), gradWeight.size(3),
+                                  gradWeight.size(4)});
+  }
+
+  input = input.view({batchSize, nInputPlane, inputHeight, inputWidth});
+  offset = offset.view(
+      {batchSize, deformable_group * 2 * kH * kW, outputHeight, outputWidth});
+
+  if (batch == 0) {
+    gradOutput = gradOutput.view({nOutputPlane, outputHeight, outputWidth});
+    input = input.view({nInputPlane, inputHeight, inputWidth});
+  }
+
+  return 1;
+}
+
+void modulated_deform_conv_cuda_forward(
+    at::Tensor input, at::Tensor weight, at::Tensor bias, at::Tensor ones,
+    at::Tensor offset, at::Tensor mask, at::Tensor output, at::Tensor columns,
+    int kernel_h, int kernel_w, const int stride_h, const int stride_w,
+    const int pad_h, const int pad_w, const int dilation_h,
+    const int dilation_w, const int group, const int deformable_group,
+    const bool with_bias) {
+  TORCH_CHECK(input.is_contiguous(), "input tensor has to be contiguous");
+  TORCH_CHECK(weight.is_contiguous(), "weight tensor has to be contiguous");
+  at::DeviceGuard guard(input.device());
+
+  const int batch = input.size(0);
+  const int channels = input.size(1);
+  const int height = input.size(2);
+  const int width = input.size(3);
+
+  const int channels_out = weight.size(0);
+  const int channels_kernel = weight.size(1);
+  const int kernel_h_ = weight.size(2);
+  const int kernel_w_ = weight.size(3);
+
+  if (kernel_h_ != kernel_h || kernel_w_ != kernel_w)
+    AT_ERROR("Input shape and kernel shape won't match: (%d x %d vs %d x %d).",
+             kernel_h_, kernel_w, kernel_h_, kernel_w_);
+  if (channels != channels_kernel * group)
+    AT_ERROR("Input shape and kernel channels won't match: (%d vs %d).",
+             channels, channels_kernel * group);
+
+  const int height_out =
+      (height + 2 * pad_h - (dilation_h * (kernel_h - 1) + 1)) / stride_h + 1;
+  const int width_out =
+      (width + 2 * pad_w - (dilation_w * (kernel_w - 1) + 1)) / stride_w + 1;
+
+  if (ones.ndimension() != 2 ||
+      ones.size(0) * ones.size(1) < height_out * width_out) {
+    // Resize plane and fill with ones...
+    ones = at::ones({height_out, width_out}, input.options());
+  }
+
+  // resize output
+  output = output.view({batch, channels_out, height_out, width_out}).zero_();
+  // resize temporary columns
+  columns =
+      at::zeros({channels * kernel_h * kernel_w, 1 * height_out * width_out},
+                input.options());
+
+  output = output.view({output.size(0), group, output.size(1) / group,
+                        output.size(2), output.size(3)});
+
+  for (int b = 0; b < batch; b++) {
+    modulated_deformable_im2col_cuda(
+        input[b], offset[b], mask[b], 1, channels, height, width, height_out,
+        width_out, kernel_h, kernel_w, pad_h, pad_w, stride_h, stride_w,
+        dilation_h, dilation_w, deformable_group, columns);
+
+    // divide into group
+    weight = weight.view({group, weight.size(0) / group, weight.size(1),
+                          weight.size(2), weight.size(3)});
+    columns = columns.view({group, columns.size(0) / group, columns.size(1)});
+
+    for (int g = 0; g < group; g++) {
+      output[b][g] = output[b][g]
+                         .flatten(1)
+                         .addmm_(weight[g].flatten(1), columns[g])
+                         .view_as(output[b][g]);
+    }
+
+    weight = weight.view({weight.size(0) * weight.size(1), weight.size(2),
+                          weight.size(3), weight.size(4)});
+    columns =
+        columns.view({columns.size(0) * columns.size(1), columns.size(2)});
+  }
+
+  output = output.view({output.size(0), output.size(1) * output.size(2),
+                        output.size(3), output.size(4)});
+
+  if (with_bias) {
+    output += bias.view({1, bias.size(0), 1, 1});
+  }
+}
+
+void modulated_deform_conv_cuda_backward(
+    at::Tensor input, at::Tensor weight, at::Tensor bias, at::Tensor ones,
+    at::Tensor offset, at::Tensor mask, at::Tensor columns,
+    at::Tensor grad_input, at::Tensor grad_weight, at::Tensor grad_bias,
+    at::Tensor grad_offset, at::Tensor grad_mask, at::Tensor grad_output,
+    int kernel_h, int kernel_w, int stride_h, int stride_w, int pad_h,
+    int pad_w, int dilation_h, int dilation_w, int group, int deformable_group,
+    const bool with_bias) {
+  TORCH_CHECK(input.is_contiguous(), "input tensor has to be contiguous");
+  TORCH_CHECK(weight.is_contiguous(), "weight tensor has to be contiguous");
+  at::DeviceGuard guard(input.device());
+
+  const int batch = input.size(0);
+  const int channels = input.size(1);
+  const int height = input.size(2);
+  const int width = input.size(3);
+
+  const int channels_kernel = weight.size(1);
+  const int kernel_h_ = weight.size(2);
+  const int kernel_w_ = weight.size(3);
+  if (kernel_h_ != kernel_h || kernel_w_ != kernel_w)
+    AT_ERROR("Input shape and kernel shape won't match: (%d x %d vs %d x %d).",
+             kernel_h_, kernel_w, kernel_h_, kernel_w_);
+  if (channels != channels_kernel * group)
+    AT_ERROR("Input shape and kernel channels won't match: (%d vs %d).",
+             channels, channels_kernel * group);
+
+  const int height_out =
+      (height + 2 * pad_h - (dilation_h * (kernel_h - 1) + 1)) / stride_h + 1;
+  const int width_out =
+      (width + 2 * pad_w - (dilation_w * (kernel_w - 1) + 1)) / stride_w + 1;
+
+  if (ones.ndimension() != 2 ||
+      ones.size(0) * ones.size(1) < height_out * width_out) {
+    // Resize plane and fill with ones...
+    ones = at::ones({height_out, width_out}, input.options());
+  }
+
+  grad_input = grad_input.view({batch, channels, height, width});
+  columns = at::zeros({channels * kernel_h * kernel_w, height_out * width_out},
+                      input.options());
+
+  grad_output =
+      grad_output.view({grad_output.size(0), group, grad_output.size(1) / group,
+                        grad_output.size(2), grad_output.size(3)});
+
+  for (int b = 0; b < batch; b++) {
+    // divide int group
+    columns = columns.view({group, columns.size(0) / group, columns.size(1)});
+    weight = weight.view({group, weight.size(0) / group, weight.size(1),
+                          weight.size(2), weight.size(3)});
+
+    for (int g = 0; g < group; g++) {
+      columns[g].addmm_(weight[g].flatten(1).transpose(0, 1),
+                        grad_output[b][g].flatten(1), 0.0f, 1.0f);
+    }
+
+    columns =
+        columns.view({columns.size(0) * columns.size(1), columns.size(2)});
+    weight = weight.view({weight.size(0) * weight.size(1), weight.size(2),
+                          weight.size(3), weight.size(4)});
+
+    // gradient w.r.t. input coordinate data
+    modulated_deformable_col2im_coord_cuda(
+        columns, input[b], offset[b], mask[b], 1, channels, height, width,
+        height_out, width_out, kernel_h, kernel_w, pad_h, pad_w, stride_h,
+        stride_w, dilation_h, dilation_w, deformable_group, grad_offset[b],
+        grad_mask[b]);
+    // gradient w.r.t. input data
+    modulated_deformable_col2im_cuda(
+        columns, offset[b], mask[b], 1, channels, height, width, height_out,
+        width_out, kernel_h, kernel_w, pad_h, pad_w, stride_h, stride_w,
+        dilation_h, dilation_w, deformable_group, grad_input[b]);
+
+    // gradient w.r.t. weight, dWeight should accumulate across the batch and
+    // group
+    modulated_deformable_im2col_cuda(
+        input[b], offset[b], mask[b], 1, channels, height, width, height_out,
+        width_out, kernel_h, kernel_w, pad_h, pad_w, stride_h, stride_w,
+        dilation_h, dilation_w, deformable_group, columns);
+
+    columns = columns.view({group, columns.size(0) / group, columns.size(1)});
+    grad_weight = grad_weight.view({group, grad_weight.size(0) / group,
+                                    grad_weight.size(1), grad_weight.size(2),
+                                    grad_weight.size(3)});
+    if (with_bias)
+      grad_bias = grad_bias.view({group, grad_bias.size(0) / group});
+
+    for (int g = 0; g < group; g++) {
+      grad_weight[g] =
+          grad_weight[g]
+              .flatten(1)
+              .addmm_(grad_output[b][g].flatten(1), columns[g].transpose(0, 1))
+              .view_as(grad_weight[g]);
+      if (with_bias) {
+        grad_bias[g] =
+            grad_bias[g]
+                .view({-1, 1})
+                .addmm_(grad_output[b][g].flatten(1), ones.view({-1, 1}))
+                .view(-1);
+      }
+    }
+
+    columns =
+        columns.view({columns.size(0) * columns.size(1), columns.size(2)});
+    grad_weight = grad_weight.view({grad_weight.size(0) * grad_weight.size(1),
+                                    grad_weight.size(2), grad_weight.size(3),
+                                    grad_weight.size(4)});
+    if (with_bias)
+      grad_bias = grad_bias.view({grad_bias.size(0) * grad_bias.size(1)});
+  }
+  grad_output = grad_output.view({grad_output.size(0) * grad_output.size(1),
+                                  grad_output.size(2), grad_output.size(3),
+                                  grad_output.size(4)});
+}
diff --git a/IIR-Lab/models/archs/dcn/src/deform_conv_cuda_kernel.cu b/IIR-Lab/models/archs/dcn/src/deform_conv_cuda_kernel.cu
new file mode 100644
index 0000000000000000000000000000000000000000..98752dccf8c58817ca1a952554dd3f33188a2d34
--- /dev/null
+++ b/IIR-Lab/models/archs/dcn/src/deform_conv_cuda_kernel.cu
@@ -0,0 +1,867 @@
+/*!
+ ******************* BEGIN Caffe Copyright Notice and Disclaimer ****************
+ *
+ * COPYRIGHT
+ *
+ * All contributions by the University of California:
+ * Copyright (c) 2014-2017 The Regents of the University of California (Regents)
+ * All rights reserved.
+ *
+ * All other contributions:
+ * Copyright (c) 2014-2017, the respective contributors
+ * All rights reserved.
+ *
+ * Caffe uses a shared copyright model: each contributor holds copyright over
+ * their contributions to Caffe. The project versioning records all such
+ * contribution and copyright details. If a contributor wants to further mark
+ * their specific copyright on a particular contribution, they should indicate
+ * their copyright solely in the commit message of the change when it is
+ * committed.
+ *
+ * LICENSE
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions are met:
+ *
+ * 1. Redistributions of source code must retain the above copyright notice, this
+ * list of conditions and the following disclaimer.
+ * 2. Redistributions in binary form must reproduce the above copyright notice,
+ * this list of conditions and the following disclaimer in the documentation
+ * and/or other materials provided with the distribution.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND
+ * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED
+ * WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
+ * DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE LIABLE FOR
+ * ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES
+ * (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES;
+ * LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND
+ * ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+ * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
+ * SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+ *
+ * CONTRIBUTION AGREEMENT
+ *
+ * By contributing to the BVLC/caffe repository through pull-request, comment,
+ * or otherwise, the contributor releases their content to the
+ * license and copyright terms herein.
+ *
+ ***************** END Caffe Copyright Notice and Disclaimer ********************
+ *
+ * Copyright (c) 2018 Microsoft
+ * Licensed under The MIT License [see LICENSE for details]
+ * \file modulated_deformable_im2col.cuh
+ * \brief Function definitions of converting an image to
+ * column matrix based on kernel, padding, dilation, and offset.
+ * These functions are mainly used in deformable convolution operators.
+ * \ref: https://arxiv.org/abs/1703.06211
+ * \author Yuwen Xiong, Haozhi Qi, Jifeng Dai, Xizhou Zhu, Han Hu, Dazhi Cheng
+ */
+
+// modified from https://github.com/chengdazhi/Deformable-Convolution-V2-PyTorch/blob/mmdetection/mmdet/ops/dcn/src/deform_conv_cuda_kernel.cu
+
+#include <ATen/ATen.h>
+#include <ATen/cuda/CUDAContext.h>
+#include <THC/THCAtomics.cuh>
+#include <stdio.h>
+#include <math.h>
+#include <float.h>
+
+using namespace at;
+
+#define CUDA_KERNEL_LOOP(i, n)                                 \
+  for (int i = blockIdx.x * blockDim.x + threadIdx.x; i < (n); \
+       i += blockDim.x * gridDim.x)
+
+const int CUDA_NUM_THREADS = 1024;
+const int kMaxGridNum = 65535;
+
+inline int GET_BLOCKS(const int N)
+{
+  return std::min(kMaxGridNum, (N + CUDA_NUM_THREADS - 1) / CUDA_NUM_THREADS);
+}
+
+template <typename scalar_t>
+__device__ scalar_t deformable_im2col_bilinear(const scalar_t *bottom_data, const int data_width,
+                                               const int height, const int width, scalar_t h, scalar_t w)
+{
+
+  int h_low = floor(h);
+  int w_low = floor(w);
+  int h_high = h_low + 1;
+  int w_high = w_low + 1;
+
+  scalar_t lh = h - h_low;
+  scalar_t lw = w - w_low;
+  scalar_t hh = 1 - lh, hw = 1 - lw;
+
+  scalar_t v1 = 0;
+  if (h_low >= 0 && w_low >= 0)
+    v1 = bottom_data[h_low * data_width + w_low];
+  scalar_t v2 = 0;
+  if (h_low >= 0 && w_high <= width - 1)
+    v2 = bottom_data[h_low * data_width + w_high];
+  scalar_t v3 = 0;
+  if (h_high <= height - 1 && w_low >= 0)
+    v3 = bottom_data[h_high * data_width + w_low];
+  scalar_t v4 = 0;
+  if (h_high <= height - 1 && w_high <= width - 1)
+    v4 = bottom_data[h_high * data_width + w_high];
+
+  scalar_t w1 = hh * hw, w2 = hh * lw, w3 = lh * hw, w4 = lh * lw;
+
+  scalar_t val = (w1 * v1 + w2 * v2 + w3 * v3 + w4 * v4);
+  return val;
+}
+
+template <typename scalar_t>
+__device__ scalar_t get_gradient_weight(scalar_t argmax_h, scalar_t argmax_w,
+                                        const int h, const int w, const int height, const int width)
+{
+
+  if (argmax_h <= -1 || argmax_h >= height || argmax_w <= -1 || argmax_w >= width)
+  {
+    //empty
+    return 0;
+  }
+
+  int argmax_h_low = floor(argmax_h);
+  int argmax_w_low = floor(argmax_w);
+  int argmax_h_high = argmax_h_low + 1;
+  int argmax_w_high = argmax_w_low + 1;
+
+  scalar_t weight = 0;
+  if (h == argmax_h_low && w == argmax_w_low)
+    weight = (h + 1 - argmax_h) * (w + 1 - argmax_w);
+  if (h == argmax_h_low && w == argmax_w_high)
+    weight = (h + 1 - argmax_h) * (argmax_w + 1 - w);
+  if (h == argmax_h_high && w == argmax_w_low)
+    weight = (argmax_h + 1 - h) * (w + 1 - argmax_w);
+  if (h == argmax_h_high && w == argmax_w_high)
+    weight = (argmax_h + 1 - h) * (argmax_w + 1 - w);
+  return weight;
+}
+
+template <typename scalar_t>
+__device__ scalar_t get_coordinate_weight(scalar_t argmax_h, scalar_t argmax_w,
+                                          const int height, const int width, const scalar_t *im_data,
+                                          const int data_width, const int bp_dir)
+{
+
+  if (argmax_h <= -1 || argmax_h >= height || argmax_w <= -1 || argmax_w >= width)
+  {
+    //empty
+    return 0;
+  }
+
+  int argmax_h_low = floor(argmax_h);
+  int argmax_w_low = floor(argmax_w);
+  int argmax_h_high = argmax_h_low + 1;
+  int argmax_w_high = argmax_w_low + 1;
+
+  scalar_t weight = 0;
+
+  if (bp_dir == 0)
+  {
+    if (argmax_h_low >= 0 && argmax_w_low >= 0)
+      weight += -1 * (argmax_w_low + 1 - argmax_w) * im_data[argmax_h_low * data_width + argmax_w_low];
+    if (argmax_h_low >= 0 && argmax_w_high <= width - 1)
+      weight += -1 * (argmax_w - argmax_w_low) * im_data[argmax_h_low * data_width + argmax_w_high];
+    if (argmax_h_high <= height - 1 && argmax_w_low >= 0)
+      weight += (argmax_w_low + 1 - argmax_w) * im_data[argmax_h_high * data_width + argmax_w_low];
+    if (argmax_h_high <= height - 1 && argmax_w_high <= width - 1)
+      weight += (argmax_w - argmax_w_low) * im_data[argmax_h_high * data_width + argmax_w_high];
+  }
+  else if (bp_dir == 1)
+  {
+    if (argmax_h_low >= 0 && argmax_w_low >= 0)
+      weight += -1 * (argmax_h_low + 1 - argmax_h) * im_data[argmax_h_low * data_width + argmax_w_low];
+    if (argmax_h_low >= 0 && argmax_w_high <= width - 1)
+      weight += (argmax_h_low + 1 - argmax_h) * im_data[argmax_h_low * data_width + argmax_w_high];
+    if (argmax_h_high <= height - 1 && argmax_w_low >= 0)
+      weight += -1 * (argmax_h - argmax_h_low) * im_data[argmax_h_high * data_width + argmax_w_low];
+    if (argmax_h_high <= height - 1 && argmax_w_high <= width - 1)
+      weight += (argmax_h - argmax_h_low) * im_data[argmax_h_high * data_width + argmax_w_high];
+  }
+
+  return weight;
+}
+
+template <typename scalar_t>
+__global__ void deformable_im2col_gpu_kernel(const int n, const scalar_t *data_im, const scalar_t *data_offset,
+                                             const int height, const int width, const int kernel_h, const int kernel_w,
+                                             const int pad_h, const int pad_w, const int stride_h, const int stride_w,
+                                             const int dilation_h, const int dilation_w, const int channel_per_deformable_group,
+                                             const int batch_size, const int num_channels, const int deformable_group,
+                                             const int height_col, const int width_col,
+                                             scalar_t *data_col)
+{
+  CUDA_KERNEL_LOOP(index, n)
+  {
+    // index index of output matrix
+    const int w_col = index % width_col;
+    const int h_col = (index / width_col) % height_col;
+    const int b_col = (index / width_col / height_col) % batch_size;
+    const int c_im = (index / width_col / height_col) / batch_size;
+    const int c_col = c_im * kernel_h * kernel_w;
+
+    // compute deformable group index
+    const int deformable_group_index = c_im / channel_per_deformable_group;
+
+    const int h_in = h_col * stride_h - pad_h;
+    const int w_in = w_col * stride_w - pad_w;
+    scalar_t *data_col_ptr = data_col + ((c_col * batch_size + b_col) * height_col + h_col) * width_col + w_col;
+    //const scalar_t* data_im_ptr = data_im + ((b_col * num_channels + c_im) * height + h_in) * width + w_in;
+    const scalar_t *data_im_ptr = data_im + (b_col * num_channels + c_im) * height * width;
+    const scalar_t *data_offset_ptr = data_offset + (b_col * deformable_group + deformable_group_index) * 2 * kernel_h * kernel_w * height_col * width_col;
+
+    for (int i = 0; i < kernel_h; ++i)
+    {
+      for (int j = 0; j < kernel_w; ++j)
+      {
+        const int data_offset_h_ptr = ((2 * (i * kernel_w + j)) * height_col + h_col) * width_col + w_col;
+        const int data_offset_w_ptr = ((2 * (i * kernel_w + j) + 1) * height_col + h_col) * width_col + w_col;
+        const scalar_t offset_h = data_offset_ptr[data_offset_h_ptr];
+        const scalar_t offset_w = data_offset_ptr[data_offset_w_ptr];
+        scalar_t val = static_cast<scalar_t>(0);
+        const scalar_t h_im = h_in + i * dilation_h + offset_h;
+        const scalar_t w_im = w_in + j * dilation_w + offset_w;
+        if (h_im > -1 && w_im > -1 && h_im < height && w_im < width)
+        {
+          //const scalar_t map_h = i * dilation_h + offset_h;
+          //const scalar_t map_w = j * dilation_w + offset_w;
+          //const int cur_height = height - h_in;
+          //const int cur_width = width - w_in;
+          //val = deformable_im2col_bilinear(data_im_ptr, width, cur_height, cur_width, map_h, map_w);
+          val = deformable_im2col_bilinear(data_im_ptr, width, height, width, h_im, w_im);
+        }
+        *data_col_ptr = val;
+        data_col_ptr += batch_size * height_col * width_col;
+      }
+    }
+  }
+}
+
+void deformable_im2col(
+    const at::Tensor data_im, const at::Tensor data_offset, const int channels,
+    const int height, const int width, const int ksize_h, const int ksize_w,
+    const int pad_h, const int pad_w, const int stride_h, const int stride_w,
+    const int dilation_h, const int dilation_w, const int parallel_imgs,
+    const int deformable_group, at::Tensor data_col)
+{
+  // num_axes should be smaller than block size
+  // todo: check parallel_imgs is correctly passed in
+  int height_col = (height + 2 * pad_h - (dilation_h * (ksize_h - 1) + 1)) / stride_h + 1;
+  int width_col = (width + 2 * pad_w - (dilation_w * (ksize_w - 1) + 1)) / stride_w + 1;
+  int num_kernels = channels * height_col * width_col * parallel_imgs;
+  int channel_per_deformable_group = channels / deformable_group;
+
+  AT_DISPATCH_FLOATING_TYPES_AND_HALF(
+      data_im.scalar_type(), "deformable_im2col_gpu", ([&] {
+        const scalar_t *data_im_ = data_im.data_ptr<scalar_t>();
+        const scalar_t *data_offset_ = data_offset.data_ptr<scalar_t>();
+        scalar_t *data_col_ = data_col.data_ptr<scalar_t>();
+
+        deformable_im2col_gpu_kernel<<<GET_BLOCKS(num_kernels), CUDA_NUM_THREADS, 0, at::cuda::getCurrentCUDAStream()>>>(
+            num_kernels, data_im_, data_offset_, height, width, ksize_h, ksize_w,
+            pad_h, pad_w, stride_h, stride_w, dilation_h, dilation_w,
+            channel_per_deformable_group, parallel_imgs, channels, deformable_group,
+            height_col, width_col, data_col_);
+      }));
+
+  cudaError_t err = cudaGetLastError();
+  if (err != cudaSuccess)
+  {
+    printf("error in deformable_im2col: %s\n", cudaGetErrorString(err));
+  }
+}
+
+template <typename scalar_t>
+__global__ void deformable_col2im_gpu_kernel(
+    const int n, const scalar_t *data_col, const scalar_t *data_offset,
+    const int channels, const int height, const int width,
+    const int kernel_h, const int kernel_w,
+    const int pad_h, const int pad_w,
+    const int stride_h, const int stride_w,
+    const int dilation_h, const int dilation_w,
+    const int channel_per_deformable_group,
+    const int batch_size, const int deformable_group,
+    const int height_col, const int width_col,
+    scalar_t *grad_im)
+{
+  CUDA_KERNEL_LOOP(index, n)
+  {
+    const int j = (index / width_col / height_col / batch_size) % kernel_w;
+    const int i = (index / width_col / height_col / batch_size / kernel_w) % kernel_h;
+    const int c = index / width_col / height_col / batch_size / kernel_w / kernel_h;
+    // compute the start and end of the output
+
+    const int deformable_group_index = c / channel_per_deformable_group;
+
+    int w_out = index % width_col;
+    int h_out = (index / width_col) % height_col;
+    int b = (index / width_col / height_col) % batch_size;
+    int w_in = w_out * stride_w - pad_w;
+    int h_in = h_out * stride_h - pad_h;
+
+    const scalar_t *data_offset_ptr = data_offset + (b * deformable_group + deformable_group_index) *
+                                                        2 * kernel_h * kernel_w * height_col * width_col;
+    const int data_offset_h_ptr = ((2 * (i * kernel_w + j)) * height_col + h_out) * width_col + w_out;
+    const int data_offset_w_ptr = ((2 * (i * kernel_w + j) + 1) * height_col + h_out) * width_col + w_out;
+    const scalar_t offset_h = data_offset_ptr[data_offset_h_ptr];
+    const scalar_t offset_w = data_offset_ptr[data_offset_w_ptr];
+    const scalar_t cur_inv_h_data = h_in + i * dilation_h + offset_h;
+    const scalar_t cur_inv_w_data = w_in + j * dilation_w + offset_w;
+
+    const scalar_t cur_top_grad = data_col[index];
+    const int cur_h = (int)cur_inv_h_data;
+    const int cur_w = (int)cur_inv_w_data;
+    for (int dy = -2; dy <= 2; dy++)
+    {
+      for (int dx = -2; dx <= 2; dx++)
+      {
+        if (cur_h + dy >= 0 && cur_h + dy < height &&
+            cur_w + dx >= 0 && cur_w + dx < width &&
+            abs(cur_inv_h_data - (cur_h + dy)) < 1 &&
+            abs(cur_inv_w_data - (cur_w + dx)) < 1)
+        {
+          int cur_bottom_grad_pos = ((b * channels + c) * height + cur_h + dy) * width + cur_w + dx;
+          scalar_t weight = get_gradient_weight(cur_inv_h_data, cur_inv_w_data, cur_h + dy, cur_w + dx, height, width);
+          atomicAdd(grad_im + cur_bottom_grad_pos, weight * cur_top_grad);
+        }
+      }
+    }
+  }
+}
+
+void deformable_col2im(
+    const at::Tensor data_col, const at::Tensor data_offset, const int channels,
+    const int height, const int width, const int ksize_h,
+    const int ksize_w, const int pad_h, const int pad_w,
+    const int stride_h, const int stride_w,
+    const int dilation_h, const int dilation_w,
+    const int parallel_imgs, const int deformable_group,
+    at::Tensor grad_im)
+{
+
+  // todo: make sure parallel_imgs is passed in correctly
+  int height_col = (height + 2 * pad_h - (dilation_h * (ksize_h - 1) + 1)) / stride_h + 1;
+  int width_col = (width + 2 * pad_w - (dilation_w * (ksize_w - 1) + 1)) / stride_w + 1;
+  int num_kernels = channels * ksize_h * ksize_w * height_col * width_col * parallel_imgs;
+  int channel_per_deformable_group = channels / deformable_group;
+
+  AT_DISPATCH_FLOATING_TYPES_AND_HALF(
+      data_col.scalar_type(), "deformable_col2im_gpu", ([&] {
+        const scalar_t *data_col_ = data_col.data_ptr<scalar_t>();
+        const scalar_t *data_offset_ = data_offset.data_ptr<scalar_t>();
+        scalar_t *grad_im_ = grad_im.data_ptr<scalar_t>();
+
+        deformable_col2im_gpu_kernel<<<GET_BLOCKS(num_kernels), CUDA_NUM_THREADS, 0, at::cuda::getCurrentCUDAStream()>>>(
+            num_kernels, data_col_, data_offset_, channels, height, width, ksize_h,
+            ksize_w, pad_h, pad_w, stride_h, stride_w,
+            dilation_h, dilation_w, channel_per_deformable_group,
+            parallel_imgs, deformable_group, height_col, width_col, grad_im_);
+      }));
+
+  cudaError_t err = cudaGetLastError();
+  if (err != cudaSuccess)
+  {
+    printf("error in deformable_col2im: %s\n", cudaGetErrorString(err));
+  }
+}
+
+template <typename scalar_t>
+__global__ void deformable_col2im_coord_gpu_kernel(const int n, const scalar_t *data_col,
+                                                   const scalar_t *data_im, const scalar_t *data_offset,
+                                                   const int channels, const int height, const int width,
+                                                   const int kernel_h, const int kernel_w,
+                                                   const int pad_h, const int pad_w,
+                                                   const int stride_h, const int stride_w,
+                                                   const int dilation_h, const int dilation_w,
+                                                   const int channel_per_deformable_group,
+                                                   const int batch_size, const int offset_channels, const int deformable_group,
+                                                   const int height_col, const int width_col, scalar_t *grad_offset)
+{
+  CUDA_KERNEL_LOOP(index, n)
+  {
+    scalar_t val = 0;
+    int w = index % width_col;
+    int h = (index / width_col) % height_col;
+    int c = (index / width_col / height_col) % offset_channels;
+    int b = (index / width_col / height_col) / offset_channels;
+    // compute the start and end of the output
+
+    const int deformable_group_index = c / (2 * kernel_h * kernel_w);
+    const int col_step = kernel_h * kernel_w;
+    int cnt = 0;
+    const scalar_t *data_col_ptr = data_col + deformable_group_index * channel_per_deformable_group *
+                                                  batch_size * width_col * height_col;
+    const scalar_t *data_im_ptr = data_im + (b * deformable_group + deformable_group_index) *
+                                                channel_per_deformable_group / kernel_h / kernel_w * height * width;
+    const scalar_t *data_offset_ptr = data_offset + (b * deformable_group + deformable_group_index) * 2 *
+                                                        kernel_h * kernel_w * height_col * width_col;
+
+    const int offset_c = c - deformable_group_index * 2 * kernel_h * kernel_w;
+
+    for (int col_c = (offset_c / 2); col_c < channel_per_deformable_group; col_c += col_step)
+    {
+      const int col_pos = (((col_c * batch_size + b) * height_col) + h) * width_col + w;
+      const int bp_dir = offset_c % 2;
+
+      int j = (col_pos / width_col / height_col / batch_size) % kernel_w;
+      int i = (col_pos / width_col / height_col / batch_size / kernel_w) % kernel_h;
+      int w_out = col_pos % width_col;
+      int h_out = (col_pos / width_col) % height_col;
+      int w_in = w_out * stride_w - pad_w;
+      int h_in = h_out * stride_h - pad_h;
+      const int data_offset_h_ptr = (((2 * (i * kernel_w + j)) * height_col + h_out) * width_col + w_out);
+      const int data_offset_w_ptr = (((2 * (i * kernel_w + j) + 1) * height_col + h_out) * width_col + w_out);
+      const scalar_t offset_h = data_offset_ptr[data_offset_h_ptr];
+      const scalar_t offset_w = data_offset_ptr[data_offset_w_ptr];
+      scalar_t inv_h = h_in + i * dilation_h + offset_h;
+      scalar_t inv_w = w_in + j * dilation_w + offset_w;
+      if (inv_h <= -1 || inv_w <= -1 || inv_h >= height || inv_w >= width)
+      {
+        inv_h = inv_w = -2;
+      }
+      const scalar_t weight = get_coordinate_weight(
+          inv_h, inv_w,
+          height, width, data_im_ptr + cnt * height * width, width, bp_dir);
+      val += weight * data_col_ptr[col_pos];
+      cnt += 1;
+    }
+
+    grad_offset[index] = val;
+  }
+}
+
+void deformable_col2im_coord(
+    const at::Tensor data_col, const at::Tensor data_im, const at::Tensor data_offset,
+    const int channels, const int height, const int width, const int ksize_h,
+    const int ksize_w, const int pad_h, const int pad_w, const int stride_h,
+    const int stride_w, const int dilation_h, const int dilation_w,
+    const int parallel_imgs, const int deformable_group, at::Tensor grad_offset)
+{
+
+  int height_col = (height + 2 * pad_h - (dilation_h * (ksize_h - 1) + 1)) / stride_h + 1;
+  int width_col = (width + 2 * pad_w - (dilation_w * (ksize_w - 1) + 1)) / stride_w + 1;
+  int num_kernels = height_col * width_col * 2 * ksize_h * ksize_w * deformable_group * parallel_imgs;
+  int channel_per_deformable_group = channels * ksize_h * ksize_w / deformable_group;
+
+  AT_DISPATCH_FLOATING_TYPES_AND_HALF(
+      data_col.scalar_type(), "deformable_col2im_coord_gpu", ([&] {
+        const scalar_t *data_col_ = data_col.data_ptr<scalar_t>();
+        const scalar_t *data_im_ = data_im.data_ptr<scalar_t>();
+        const scalar_t *data_offset_ = data_offset.data_ptr<scalar_t>();
+        scalar_t *grad_offset_ = grad_offset.data_ptr<scalar_t>();
+
+        deformable_col2im_coord_gpu_kernel<<<GET_BLOCKS(num_kernels), CUDA_NUM_THREADS, 0, at::cuda::getCurrentCUDAStream()>>>(
+            num_kernels, data_col_, data_im_, data_offset_, channels, height, width,
+            ksize_h, ksize_w, pad_h, pad_w, stride_h, stride_w,
+            dilation_h, dilation_w, channel_per_deformable_group,
+            parallel_imgs, 2 * ksize_h * ksize_w * deformable_group, deformable_group,
+            height_col, width_col, grad_offset_);
+      }));
+}
+
+template <typename scalar_t>
+__device__ scalar_t dmcn_im2col_bilinear(const scalar_t *bottom_data, const int data_width,
+                                         const int height, const int width, scalar_t h, scalar_t w)
+{
+  int h_low = floor(h);
+  int w_low = floor(w);
+  int h_high = h_low + 1;
+  int w_high = w_low + 1;
+
+  scalar_t lh = h - h_low;
+  scalar_t lw = w - w_low;
+  scalar_t hh = 1 - lh, hw = 1 - lw;
+
+  scalar_t v1 = 0;
+  if (h_low >= 0 && w_low >= 0)
+    v1 = bottom_data[h_low * data_width + w_low];
+  scalar_t v2 = 0;
+  if (h_low >= 0 && w_high <= width - 1)
+    v2 = bottom_data[h_low * data_width + w_high];
+  scalar_t v3 = 0;
+  if (h_high <= height - 1 && w_low >= 0)
+    v3 = bottom_data[h_high * data_width + w_low];
+  scalar_t v4 = 0;
+  if (h_high <= height - 1 && w_high <= width - 1)
+    v4 = bottom_data[h_high * data_width + w_high];
+
+  scalar_t w1 = hh * hw, w2 = hh * lw, w3 = lh * hw, w4 = lh * lw;
+
+  scalar_t val = (w1 * v1 + w2 * v2 + w3 * v3 + w4 * v4);
+  return val;
+}
+
+template <typename scalar_t>
+__device__ scalar_t dmcn_get_gradient_weight(scalar_t argmax_h, scalar_t argmax_w,
+                                             const int h, const int w, const int height, const int width)
+{
+  if (argmax_h <= -1 || argmax_h >= height || argmax_w <= -1 || argmax_w >= width)
+  {
+    //empty
+    return 0;
+  }
+
+  int argmax_h_low = floor(argmax_h);
+  int argmax_w_low = floor(argmax_w);
+  int argmax_h_high = argmax_h_low + 1;
+  int argmax_w_high = argmax_w_low + 1;
+
+  scalar_t weight = 0;
+  if (h == argmax_h_low && w == argmax_w_low)
+    weight = (h + 1 - argmax_h) * (w + 1 - argmax_w);
+  if (h == argmax_h_low && w == argmax_w_high)
+    weight = (h + 1 - argmax_h) * (argmax_w + 1 - w);
+  if (h == argmax_h_high && w == argmax_w_low)
+    weight = (argmax_h + 1 - h) * (w + 1 - argmax_w);
+  if (h == argmax_h_high && w == argmax_w_high)
+    weight = (argmax_h + 1 - h) * (argmax_w + 1 - w);
+  return weight;
+}
+
+template <typename scalar_t>
+__device__ scalar_t dmcn_get_coordinate_weight(scalar_t argmax_h, scalar_t argmax_w,
+                                               const int height, const int width, const scalar_t *im_data,
+                                               const int data_width, const int bp_dir)
+{
+  if (argmax_h <= -1 || argmax_h >= height || argmax_w <= -1 || argmax_w >= width)
+  {
+    //empty
+    return 0;
+  }
+
+  int argmax_h_low = floor(argmax_h);
+  int argmax_w_low = floor(argmax_w);
+  int argmax_h_high = argmax_h_low + 1;
+  int argmax_w_high = argmax_w_low + 1;
+
+  scalar_t weight = 0;
+
+  if (bp_dir == 0)
+  {
+    if (argmax_h_low >= 0 && argmax_w_low >= 0)
+      weight += -1 * (argmax_w_low + 1 - argmax_w) * im_data[argmax_h_low * data_width + argmax_w_low];
+    if (argmax_h_low >= 0 && argmax_w_high <= width - 1)
+      weight += -1 * (argmax_w - argmax_w_low) * im_data[argmax_h_low * data_width + argmax_w_high];
+    if (argmax_h_high <= height - 1 && argmax_w_low >= 0)
+      weight += (argmax_w_low + 1 - argmax_w) * im_data[argmax_h_high * data_width + argmax_w_low];
+    if (argmax_h_high <= height - 1 && argmax_w_high <= width - 1)
+      weight += (argmax_w - argmax_w_low) * im_data[argmax_h_high * data_width + argmax_w_high];
+  }
+  else if (bp_dir == 1)
+  {
+    if (argmax_h_low >= 0 && argmax_w_low >= 0)
+      weight += -1 * (argmax_h_low + 1 - argmax_h) * im_data[argmax_h_low * data_width + argmax_w_low];
+    if (argmax_h_low >= 0 && argmax_w_high <= width - 1)
+      weight += (argmax_h_low + 1 - argmax_h) * im_data[argmax_h_low * data_width + argmax_w_high];
+    if (argmax_h_high <= height - 1 && argmax_w_low >= 0)
+      weight += -1 * (argmax_h - argmax_h_low) * im_data[argmax_h_high * data_width + argmax_w_low];
+    if (argmax_h_high <= height - 1 && argmax_w_high <= width - 1)
+      weight += (argmax_h - argmax_h_low) * im_data[argmax_h_high * data_width + argmax_w_high];
+  }
+
+  return weight;
+}
+
+template <typename scalar_t>
+__global__ void modulated_deformable_im2col_gpu_kernel(const int n,
+                                                       const scalar_t *data_im, const scalar_t *data_offset, const scalar_t *data_mask,
+                                                       const int height, const int width, const int kernel_h, const int kernel_w,
+                                                       const int pad_h, const int pad_w,
+                                                       const int stride_h, const int stride_w,
+                                                       const int dilation_h, const int dilation_w,
+                                                       const int channel_per_deformable_group,
+                                                       const int batch_size, const int num_channels, const int deformable_group,
+                                                       const int height_col, const int width_col,
+                                                       scalar_t *data_col)
+{
+  CUDA_KERNEL_LOOP(index, n)
+  {
+    // index index of output matrix
+    const int w_col = index % width_col;
+    const int h_col = (index / width_col) % height_col;
+    const int b_col = (index / width_col / height_col) % batch_size;
+    const int c_im = (index / width_col / height_col) / batch_size;
+    const int c_col = c_im * kernel_h * kernel_w;
+
+    // compute deformable group index
+    const int deformable_group_index = c_im / channel_per_deformable_group;
+
+    const int h_in = h_col * stride_h - pad_h;
+    const int w_in = w_col * stride_w - pad_w;
+
+    scalar_t *data_col_ptr = data_col + ((c_col * batch_size + b_col) * height_col + h_col) * width_col + w_col;
+    //const float* data_im_ptr = data_im + ((b_col * num_channels + c_im) * height + h_in) * width + w_in;
+    const scalar_t *data_im_ptr = data_im + (b_col * num_channels + c_im) * height * width;
+    const scalar_t *data_offset_ptr = data_offset + (b_col * deformable_group + deformable_group_index) * 2 * kernel_h * kernel_w * height_col * width_col;
+
+    const scalar_t *data_mask_ptr = data_mask + (b_col * deformable_group + deformable_group_index) * kernel_h * kernel_w * height_col * width_col;
+
+    for (int i = 0; i < kernel_h; ++i)
+    {
+      for (int j = 0; j < kernel_w; ++j)
+      {
+        const int data_offset_h_ptr = ((2 * (i * kernel_w + j)) * height_col + h_col) * width_col + w_col;
+        const int data_offset_w_ptr = ((2 * (i * kernel_w + j) + 1) * height_col + h_col) * width_col + w_col;
+        const int data_mask_hw_ptr = ((i * kernel_w + j) * height_col + h_col) * width_col + w_col;
+        const scalar_t offset_h = data_offset_ptr[data_offset_h_ptr];
+        const scalar_t offset_w = data_offset_ptr[data_offset_w_ptr];
+        const scalar_t mask = data_mask_ptr[data_mask_hw_ptr];
+        scalar_t val = static_cast<scalar_t>(0);
+        const scalar_t h_im = h_in + i * dilation_h + offset_h;
+        const scalar_t w_im = w_in + j * dilation_w + offset_w;
+        //if (h_im >= 0 && w_im >= 0 && h_im < height && w_im < width) {
+        if (h_im > -1 && w_im > -1 && h_im < height && w_im < width)
+        {
+          //const float map_h = i * dilation_h + offset_h;
+          //const float map_w = j * dilation_w + offset_w;
+          //const int cur_height = height - h_in;
+          //const int cur_width = width - w_in;
+          //val = dmcn_im2col_bilinear(data_im_ptr, width, cur_height, cur_width, map_h, map_w);
+          val = dmcn_im2col_bilinear(data_im_ptr, width, height, width, h_im, w_im);
+        }
+        *data_col_ptr = val * mask;
+        data_col_ptr += batch_size * height_col * width_col;
+        //data_col_ptr += height_col * width_col;
+      }
+    }
+  }
+}
+
+template <typename scalar_t>
+__global__ void modulated_deformable_col2im_gpu_kernel(const int n,
+                                                       const scalar_t *data_col, const scalar_t *data_offset, const scalar_t *data_mask,
+                                                       const int channels, const int height, const int width,
+                                                       const int kernel_h, const int kernel_w,
+                                                       const int pad_h, const int pad_w,
+                                                       const int stride_h, const int stride_w,
+                                                       const int dilation_h, const int dilation_w,
+                                                       const int channel_per_deformable_group,
+                                                       const int batch_size, const int deformable_group,
+                                                       const int height_col, const int width_col,
+                                                       scalar_t *grad_im)
+{
+  CUDA_KERNEL_LOOP(index, n)
+  {
+    const int j = (index / width_col / height_col / batch_size) % kernel_w;
+    const int i = (index / width_col / height_col / batch_size / kernel_w) % kernel_h;
+    const int c = index / width_col / height_col / batch_size / kernel_w / kernel_h;
+    // compute the start and end of the output
+
+    const int deformable_group_index = c / channel_per_deformable_group;
+
+    int w_out = index % width_col;
+    int h_out = (index / width_col) % height_col;
+    int b = (index / width_col / height_col) % batch_size;
+    int w_in = w_out * stride_w - pad_w;
+    int h_in = h_out * stride_h - pad_h;
+
+    const scalar_t *data_offset_ptr = data_offset + (b * deformable_group + deformable_group_index) * 2 * kernel_h * kernel_w * height_col * width_col;
+    const scalar_t *data_mask_ptr = data_mask + (b * deformable_group + deformable_group_index) * kernel_h * kernel_w * height_col * width_col;
+    const int data_offset_h_ptr = ((2 * (i * kernel_w + j)) * height_col + h_out) * width_col + w_out;
+    const int data_offset_w_ptr = ((2 * (i * kernel_w + j) + 1) * height_col + h_out) * width_col + w_out;
+    const int data_mask_hw_ptr = ((i * kernel_w + j) * height_col + h_out) * width_col + w_out;
+    const scalar_t offset_h = data_offset_ptr[data_offset_h_ptr];
+    const scalar_t offset_w = data_offset_ptr[data_offset_w_ptr];
+    const scalar_t mask = data_mask_ptr[data_mask_hw_ptr];
+    const scalar_t cur_inv_h_data = h_in + i * dilation_h + offset_h;
+    const scalar_t cur_inv_w_data = w_in + j * dilation_w + offset_w;
+
+    const scalar_t cur_top_grad = data_col[index] * mask;
+    const int cur_h = (int)cur_inv_h_data;
+    const int cur_w = (int)cur_inv_w_data;
+    for (int dy = -2; dy <= 2; dy++)
+    {
+      for (int dx = -2; dx <= 2; dx++)
+      {
+        if (cur_h + dy >= 0 && cur_h + dy < height &&
+            cur_w + dx >= 0 && cur_w + dx < width &&
+            abs(cur_inv_h_data - (cur_h + dy)) < 1 &&
+            abs(cur_inv_w_data - (cur_w + dx)) < 1)
+        {
+          int cur_bottom_grad_pos = ((b * channels + c) * height + cur_h + dy) * width + cur_w + dx;
+          scalar_t weight = dmcn_get_gradient_weight(cur_inv_h_data, cur_inv_w_data, cur_h + dy, cur_w + dx, height, width);
+          atomicAdd(grad_im + cur_bottom_grad_pos, weight * cur_top_grad);
+        }
+      }
+    }
+  }
+}
+
+template <typename scalar_t>
+__global__ void modulated_deformable_col2im_coord_gpu_kernel(const int n,
+                                                             const scalar_t *data_col, const scalar_t *data_im,
+                                                             const scalar_t *data_offset, const scalar_t *data_mask,
+                                                             const int channels, const int height, const int width,
+                                                             const int kernel_h, const int kernel_w,
+                                                             const int pad_h, const int pad_w,
+                                                             const int stride_h, const int stride_w,
+                                                             const int dilation_h, const int dilation_w,
+                                                             const int channel_per_deformable_group,
+                                                             const int batch_size, const int offset_channels, const int deformable_group,
+                                                             const int height_col, const int width_col,
+                                                             scalar_t *grad_offset, scalar_t *grad_mask)
+{
+  CUDA_KERNEL_LOOP(index, n)
+  {
+    scalar_t val = 0, mval = 0;
+    int w = index % width_col;
+    int h = (index / width_col) % height_col;
+    int c = (index / width_col / height_col) % offset_channels;
+    int b = (index / width_col / height_col) / offset_channels;
+    // compute the start and end of the output
+
+    const int deformable_group_index = c / (2 * kernel_h * kernel_w);
+    const int col_step = kernel_h * kernel_w;
+    int cnt = 0;
+    const scalar_t *data_col_ptr = data_col + deformable_group_index * channel_per_deformable_group * batch_size * width_col * height_col;
+    const scalar_t *data_im_ptr = data_im + (b * deformable_group + deformable_group_index) * channel_per_deformable_group / kernel_h / kernel_w * height * width;
+    const scalar_t *data_offset_ptr = data_offset + (b * deformable_group + deformable_group_index) * 2 * kernel_h * kernel_w * height_col * width_col;
+    const scalar_t *data_mask_ptr = data_mask + (b * deformable_group + deformable_group_index) * kernel_h * kernel_w * height_col * width_col;
+
+    const int offset_c = c - deformable_group_index * 2 * kernel_h * kernel_w;
+
+    for (int col_c = (offset_c / 2); col_c < channel_per_deformable_group; col_c += col_step)
+    {
+      const int col_pos = (((col_c * batch_size + b) * height_col) + h) * width_col + w;
+      const int bp_dir = offset_c % 2;
+
+      int j = (col_pos / width_col / height_col / batch_size) % kernel_w;
+      int i = (col_pos / width_col / height_col / batch_size / kernel_w) % kernel_h;
+      int w_out = col_pos % width_col;
+      int h_out = (col_pos / width_col) % height_col;
+      int w_in = w_out * stride_w - pad_w;
+      int h_in = h_out * stride_h - pad_h;
+      const int data_offset_h_ptr = (((2 * (i * kernel_w + j)) * height_col + h_out) * width_col + w_out);
+      const int data_offset_w_ptr = (((2 * (i * kernel_w + j) + 1) * height_col + h_out) * width_col + w_out);
+      const int data_mask_hw_ptr = (((i * kernel_w + j) * height_col + h_out) * width_col + w_out);
+      const scalar_t offset_h = data_offset_ptr[data_offset_h_ptr];
+      const scalar_t offset_w = data_offset_ptr[data_offset_w_ptr];
+      const scalar_t mask = data_mask_ptr[data_mask_hw_ptr];
+      scalar_t inv_h = h_in + i * dilation_h + offset_h;
+      scalar_t inv_w = w_in + j * dilation_w + offset_w;
+      if (inv_h <= -1 || inv_w <= -1 || inv_h >= height || inv_w >= width)
+      {
+        inv_h = inv_w = -2;
+      }
+      else
+      {
+        mval += data_col_ptr[col_pos] * dmcn_im2col_bilinear(data_im_ptr + cnt * height * width, width, height, width, inv_h, inv_w);
+      }
+      const scalar_t weight = dmcn_get_coordinate_weight(
+          inv_h, inv_w,
+          height, width, data_im_ptr + cnt * height * width, width, bp_dir);
+      val += weight * data_col_ptr[col_pos] * mask;
+      cnt += 1;
+    }
+    // KERNEL_ASSIGN(grad_offset[index], offset_req, val);
+    grad_offset[index] = val;
+    if (offset_c % 2 == 0)
+      // KERNEL_ASSIGN(grad_mask[(((b * deformable_group + deformable_group_index) * kernel_h * kernel_w + offset_c / 2) * height_col + h) * width_col + w], mask_req, mval);
+      grad_mask[(((b * deformable_group + deformable_group_index) * kernel_h * kernel_w + offset_c / 2) * height_col + h) * width_col + w] = mval;
+  }
+}
+
+void modulated_deformable_im2col_cuda(
+    const at::Tensor data_im, const at::Tensor data_offset, const at::Tensor data_mask,
+    const int batch_size, const int channels, const int height_im, const int width_im,
+    const int height_col, const int width_col, const int kernel_h, const int kenerl_w,
+    const int pad_h, const int pad_w, const int stride_h, const int stride_w,
+    const int dilation_h, const int dilation_w,
+    const int deformable_group, at::Tensor data_col)
+{
+  // num_axes should be smaller than block size
+  const int channel_per_deformable_group = channels / deformable_group;
+  const int num_kernels = channels * batch_size * height_col * width_col;
+
+  AT_DISPATCH_FLOATING_TYPES_AND_HALF(
+      data_im.scalar_type(), "modulated_deformable_im2col_gpu", ([&] {
+        const scalar_t *data_im_ = data_im.data_ptr<scalar_t>();
+        const scalar_t *data_offset_ = data_offset.data_ptr<scalar_t>();
+        const scalar_t *data_mask_ = data_mask.data_ptr<scalar_t>();
+        scalar_t *data_col_ = data_col.data_ptr<scalar_t>();
+
+        modulated_deformable_im2col_gpu_kernel<<<GET_BLOCKS(num_kernels), CUDA_NUM_THREADS, 0, at::cuda::getCurrentCUDAStream()>>>(
+            num_kernels, data_im_, data_offset_, data_mask_, height_im, width_im, kernel_h, kenerl_w,
+            pad_h, pad_w, stride_h, stride_w, dilation_h, dilation_w, channel_per_deformable_group,
+            batch_size, channels, deformable_group, height_col, width_col, data_col_);
+      }));
+
+  cudaError_t err = cudaGetLastError();
+  if (err != cudaSuccess)
+  {
+    printf("error in modulated_deformable_im2col_cuda: %s\n", cudaGetErrorString(err));
+  }
+}
+
+void modulated_deformable_col2im_cuda(
+    const at::Tensor data_col, const at::Tensor data_offset, const at::Tensor data_mask,
+    const int batch_size, const int channels, const int height_im, const int width_im,
+    const int height_col, const int width_col, const int kernel_h, const int kernel_w,
+    const int pad_h, const int pad_w, const int stride_h, const int stride_w,
+    const int dilation_h, const int dilation_w,
+    const int deformable_group, at::Tensor grad_im)
+{
+
+  const int channel_per_deformable_group = channels / deformable_group;
+  const int num_kernels = channels * kernel_h * kernel_w * batch_size * height_col * width_col;
+
+  AT_DISPATCH_FLOATING_TYPES_AND_HALF(
+      data_col.scalar_type(), "modulated_deformable_col2im_gpu", ([&] {
+        const scalar_t *data_col_ = data_col.data_ptr<scalar_t>();
+        const scalar_t *data_offset_ = data_offset.data_ptr<scalar_t>();
+        const scalar_t *data_mask_ = data_mask.data_ptr<scalar_t>();
+        scalar_t *grad_im_ = grad_im.data_ptr<scalar_t>();
+
+        modulated_deformable_col2im_gpu_kernel<<<GET_BLOCKS(num_kernels), CUDA_NUM_THREADS, 0, at::cuda::getCurrentCUDAStream()>>>(
+            num_kernels, data_col_, data_offset_, data_mask_, channels, height_im, width_im,
+            kernel_h, kernel_w, pad_h, pad_w, stride_h, stride_w,
+            dilation_h, dilation_w, channel_per_deformable_group,
+            batch_size, deformable_group, height_col, width_col, grad_im_);
+      }));
+
+  cudaError_t err = cudaGetLastError();
+  if (err != cudaSuccess)
+  {
+    printf("error in modulated_deformable_col2im_cuda: %s\n", cudaGetErrorString(err));
+  }
+}
+
+void modulated_deformable_col2im_coord_cuda(
+    const at::Tensor data_col, const at::Tensor data_im, const at::Tensor data_offset, const at::Tensor data_mask,
+    const int batch_size, const int channels, const int height_im, const int width_im,
+    const int height_col, const int width_col, const int kernel_h, const int kernel_w,
+    const int pad_h, const int pad_w, const int stride_h, const int stride_w,
+    const int dilation_h, const int dilation_w,
+    const int deformable_group,
+    at::Tensor grad_offset, at::Tensor grad_mask)
+{
+  const int num_kernels = batch_size * height_col * width_col * 2 * kernel_h * kernel_w * deformable_group;
+  const int channel_per_deformable_group = channels * kernel_h * kernel_w / deformable_group;
+
+  AT_DISPATCH_FLOATING_TYPES_AND_HALF(
+      data_col.scalar_type(), "modulated_deformable_col2im_coord_gpu", ([&] {
+        const scalar_t *data_col_ = data_col.data_ptr<scalar_t>();
+        const scalar_t *data_im_ = data_im.data_ptr<scalar_t>();
+        const scalar_t *data_offset_ = data_offset.data_ptr<scalar_t>();
+        const scalar_t *data_mask_ = data_mask.data_ptr<scalar_t>();
+        scalar_t *grad_offset_ = grad_offset.data_ptr<scalar_t>();
+        scalar_t *grad_mask_ = grad_mask.data_ptr<scalar_t>();
+
+        modulated_deformable_col2im_coord_gpu_kernel<<<GET_BLOCKS(num_kernels), CUDA_NUM_THREADS, 0, at::cuda::getCurrentCUDAStream()>>>(
+            num_kernels, data_col_, data_im_, data_offset_, data_mask_, channels, height_im, width_im,
+            kernel_h, kernel_w, pad_h, pad_w, stride_h, stride_w,
+            dilation_h, dilation_w, channel_per_deformable_group,
+            batch_size, 2 * kernel_h * kernel_w * deformable_group, deformable_group, height_col, width_col,
+            grad_offset_, grad_mask_);
+      }));
+  cudaError_t err = cudaGetLastError();
+  if (err != cudaSuccess)
+  {
+    printf("error in modulated_deformable_col2im_coord_cuda: %s\n", cudaGetErrorString(err));
+  }
+}
diff --git a/IIR-Lab/models/archs/dcn/src/deform_conv_ext.cpp b/IIR-Lab/models/archs/dcn/src/deform_conv_ext.cpp
new file mode 100644
index 0000000000000000000000000000000000000000..41c6df6f721bd95a525fd6a03dd9882e863de042
--- /dev/null
+++ b/IIR-Lab/models/archs/dcn/src/deform_conv_ext.cpp
@@ -0,0 +1,164 @@
+// modify from
+// https://github.com/chengdazhi/Deformable-Convolution-V2-PyTorch/blob/mmdetection/mmdet/ops/dcn/src/deform_conv_cuda.c
+
+#include <torch/extension.h>
+#include <ATen/DeviceGuard.h>
+
+#include <cmath>
+#include <vector>
+
+#define WITH_CUDA  // always use cuda
+#ifdef WITH_CUDA
+int deform_conv_forward_cuda(at::Tensor input, at::Tensor weight,
+                             at::Tensor offset, at::Tensor output,
+                             at::Tensor columns, at::Tensor ones, int kW,
+                             int kH, int dW, int dH, int padW, int padH,
+                             int dilationW, int dilationH, int group,
+                             int deformable_group, int im2col_step);
+
+int deform_conv_backward_input_cuda(at::Tensor input, at::Tensor offset,
+                                    at::Tensor gradOutput, at::Tensor gradInput,
+                                    at::Tensor gradOffset, at::Tensor weight,
+                                    at::Tensor columns, int kW, int kH, int dW,
+                                    int dH, int padW, int padH, int dilationW,
+                                    int dilationH, int group,
+                                    int deformable_group, int im2col_step);
+
+int deform_conv_backward_parameters_cuda(
+    at::Tensor input, at::Tensor offset, at::Tensor gradOutput,
+    at::Tensor gradWeight,  // at::Tensor gradBias,
+    at::Tensor columns, at::Tensor ones, int kW, int kH, int dW, int dH,
+    int padW, int padH, int dilationW, int dilationH, int group,
+    int deformable_group, float scale, int im2col_step);
+
+void modulated_deform_conv_cuda_forward(
+    at::Tensor input, at::Tensor weight, at::Tensor bias, at::Tensor ones,
+    at::Tensor offset, at::Tensor mask, at::Tensor output, at::Tensor columns,
+    int kernel_h, int kernel_w, const int stride_h, const int stride_w,
+    const int pad_h, const int pad_w, const int dilation_h,
+    const int dilation_w, const int group, const int deformable_group,
+    const bool with_bias);
+
+void modulated_deform_conv_cuda_backward(
+    at::Tensor input, at::Tensor weight, at::Tensor bias, at::Tensor ones,
+    at::Tensor offset, at::Tensor mask, at::Tensor columns,
+    at::Tensor grad_input, at::Tensor grad_weight, at::Tensor grad_bias,
+    at::Tensor grad_offset, at::Tensor grad_mask, at::Tensor grad_output,
+    int kernel_h, int kernel_w, int stride_h, int stride_w, int pad_h,
+    int pad_w, int dilation_h, int dilation_w, int group, int deformable_group,
+    const bool with_bias);
+#endif
+
+int deform_conv_forward(at::Tensor input, at::Tensor weight,
+                             at::Tensor offset, at::Tensor output,
+                             at::Tensor columns, at::Tensor ones, int kW,
+                             int kH, int dW, int dH, int padW, int padH,
+                             int dilationW, int dilationH, int group,
+                             int deformable_group, int im2col_step) {
+  if (input.device().is_cuda()) {
+#ifdef WITH_CUDA
+    return deform_conv_forward_cuda(input, weight, offset, output, columns,
+        ones, kW, kH, dW, dH, padW, padH, dilationW, dilationH, group,
+        deformable_group, im2col_step);
+#else
+    AT_ERROR("deform conv is not compiled with GPU support");
+#endif
+  }
+  AT_ERROR("deform conv is not implemented on CPU");
+}
+
+int deform_conv_backward_input(at::Tensor input, at::Tensor offset,
+                                    at::Tensor gradOutput, at::Tensor gradInput,
+                                    at::Tensor gradOffset, at::Tensor weight,
+                                    at::Tensor columns, int kW, int kH, int dW,
+                                    int dH, int padW, int padH, int dilationW,
+                                    int dilationH, int group,
+                                    int deformable_group, int im2col_step) {
+  if (input.device().is_cuda()) {
+#ifdef WITH_CUDA
+    return deform_conv_backward_input_cuda(input, offset, gradOutput,
+        gradInput, gradOffset, weight, columns, kW, kH, dW, dH, padW, padH,
+        dilationW, dilationH, group, deformable_group, im2col_step);
+#else
+    AT_ERROR("deform conv is not compiled with GPU support");
+#endif
+  }
+  AT_ERROR("deform conv is not implemented on CPU");
+}
+
+int deform_conv_backward_parameters(
+    at::Tensor input, at::Tensor offset, at::Tensor gradOutput,
+    at::Tensor gradWeight,  // at::Tensor gradBias,
+    at::Tensor columns, at::Tensor ones, int kW, int kH, int dW, int dH,
+    int padW, int padH, int dilationW, int dilationH, int group,
+    int deformable_group, float scale, int im2col_step) {
+  if (input.device().is_cuda()) {
+#ifdef WITH_CUDA
+    return deform_conv_backward_parameters_cuda(input, offset, gradOutput,
+        gradWeight, columns, ones, kW, kH, dW, dH, padW, padH, dilationW,
+        dilationH, group, deformable_group, scale, im2col_step);
+#else
+    AT_ERROR("deform conv is not compiled with GPU support");
+#endif
+  }
+  AT_ERROR("deform conv is not implemented on CPU");
+}
+
+void modulated_deform_conv_forward(
+    at::Tensor input, at::Tensor weight, at::Tensor bias, at::Tensor ones,
+    at::Tensor offset, at::Tensor mask, at::Tensor output, at::Tensor columns,
+    int kernel_h, int kernel_w, const int stride_h, const int stride_w,
+    const int pad_h, const int pad_w, const int dilation_h,
+    const int dilation_w, const int group, const int deformable_group,
+    const bool with_bias) {
+  if (input.device().is_cuda()) {
+#ifdef WITH_CUDA
+    return modulated_deform_conv_cuda_forward(input, weight, bias, ones,
+        offset, mask, output, columns, kernel_h, kernel_w, stride_h,
+        stride_w, pad_h, pad_w, dilation_h, dilation_w, group,
+        deformable_group, with_bias);
+#else
+    AT_ERROR("modulated deform conv is not compiled with GPU support");
+#endif
+  }
+  AT_ERROR("modulated deform conv is not implemented on CPU");
+}
+
+void modulated_deform_conv_backward(
+    at::Tensor input, at::Tensor weight, at::Tensor bias, at::Tensor ones,
+    at::Tensor offset, at::Tensor mask, at::Tensor columns,
+    at::Tensor grad_input, at::Tensor grad_weight, at::Tensor grad_bias,
+    at::Tensor grad_offset, at::Tensor grad_mask, at::Tensor grad_output,
+    int kernel_h, int kernel_w, int stride_h, int stride_w, int pad_h,
+    int pad_w, int dilation_h, int dilation_w, int group, int deformable_group,
+    const bool with_bias) {
+  if (input.device().is_cuda()) {
+#ifdef WITH_CUDA
+    return modulated_deform_conv_cuda_backward(input, weight, bias, ones,
+        offset, mask, columns, grad_input, grad_weight, grad_bias, grad_offset,
+        grad_mask, grad_output, kernel_h, kernel_w, stride_h, stride_w,
+        pad_h, pad_w, dilation_h, dilation_w, group, deformable_group,
+        with_bias);
+#else
+    AT_ERROR("modulated deform conv is not compiled with GPU support");
+#endif
+  }
+  AT_ERROR("modulated deform conv is not implemented on CPU");
+}
+
+
+PYBIND11_MODULE(TORCH_EXTENSION_NAME, m) {
+  m.def("deform_conv_forward", &deform_conv_forward,
+        "deform forward");
+  m.def("deform_conv_backward_input", &deform_conv_backward_input,
+        "deform_conv_backward_input");
+  m.def("deform_conv_backward_parameters",
+        &deform_conv_backward_parameters,
+        "deform_conv_backward_parameters");
+  m.def("modulated_deform_conv_forward",
+        &modulated_deform_conv_forward,
+        "modulated deform conv forward");
+  m.def("modulated_deform_conv_backward",
+        &modulated_deform_conv_backward,
+        "modulated deform conv backward");
+}
diff --git a/IIR-Lab/models/dcn/__init__.py b/IIR-Lab/models/dcn/__init__.py
new file mode 100644
index 0000000000000000000000000000000000000000..32e3592f896d61b4127e09d0476381b9d55e32ff
--- /dev/null
+++ b/IIR-Lab/models/dcn/__init__.py
@@ -0,0 +1,7 @@
+from .deform_conv import (DeformConv, DeformConvPack, ModulatedDeformConv, ModulatedDeformConvPack, deform_conv,
+                          modulated_deform_conv)
+
+__all__ = [
+    'DeformConv', 'DeformConvPack', 'ModulatedDeformConv', 'ModulatedDeformConvPack', 'deform_conv',
+    'modulated_deform_conv'
+]
diff --git a/IIR-Lab/models/dcn/__pycache__/__init__.cpython-310.pyc b/IIR-Lab/models/dcn/__pycache__/__init__.cpython-310.pyc
new file mode 100644
index 0000000000000000000000000000000000000000..bfbea69661d64cf4987690ec9641116b8f0f1d47
Binary files /dev/null and b/IIR-Lab/models/dcn/__pycache__/__init__.cpython-310.pyc differ
diff --git a/IIR-Lab/models/dcn/__pycache__/__init__.cpython-38.pyc b/IIR-Lab/models/dcn/__pycache__/__init__.cpython-38.pyc
new file mode 100644
index 0000000000000000000000000000000000000000..663b0797ac2bc7752910b7459b157ce30410c5f7
Binary files /dev/null and b/IIR-Lab/models/dcn/__pycache__/__init__.cpython-38.pyc differ
diff --git a/IIR-Lab/models/dcn/__pycache__/deform_conv.cpython-310.pyc b/IIR-Lab/models/dcn/__pycache__/deform_conv.cpython-310.pyc
new file mode 100644
index 0000000000000000000000000000000000000000..145e66f55b4c3dbf4812067a1eb9b5c82aa65d75
Binary files /dev/null and b/IIR-Lab/models/dcn/__pycache__/deform_conv.cpython-310.pyc differ
diff --git a/IIR-Lab/models/dcn/__pycache__/deform_conv.cpython-38.pyc b/IIR-Lab/models/dcn/__pycache__/deform_conv.cpython-38.pyc
new file mode 100644
index 0000000000000000000000000000000000000000..991621bdfdc40736c33b8a6e126607e36f62e0a5
Binary files /dev/null and b/IIR-Lab/models/dcn/__pycache__/deform_conv.cpython-38.pyc differ
diff --git a/IIR-Lab/models/dcn/deform_conv.py b/IIR-Lab/models/dcn/deform_conv.py
new file mode 100644
index 0000000000000000000000000000000000000000..6268ca825d59ef4a30d4d2156c4438cbbe9b3c1e
--- /dev/null
+++ b/IIR-Lab/models/dcn/deform_conv.py
@@ -0,0 +1,379 @@
+import math
+import os
+import torch
+from torch import nn as nn
+from torch.autograd import Function
+from torch.autograd.function import once_differentiable
+from torch.nn import functional as F
+from torch.nn.modules.utils import _pair, _single
+
+BASICSR_JIT = os.getenv('BASICSR_JIT')
+if BASICSR_JIT == 'True':
+    from torch.utils.cpp_extension import load
+    module_path = os.path.dirname(__file__)
+    deform_conv_ext = load(
+        'deform_conv',
+        sources=[
+            os.path.join(module_path, 'src', 'deform_conv_ext.cpp'),
+            os.path.join(module_path, 'src', 'deform_conv_cuda.cpp'),
+            os.path.join(module_path, 'src', 'deform_conv_cuda_kernel.cu'),
+        ],
+    )
+else:
+    try:
+        from . import deform_conv_ext
+    except ImportError:
+        pass
+        # avoid annoying print output
+        # print(f'Cannot import deform_conv_ext. Error: {error}. You may need to: \n '
+        #       '1. compile with BASICSR_EXT=True. or\n '
+        #       '2. set BASICSR_JIT=True during running')
+
+
+class DeformConvFunction(Function):
+
+    @staticmethod
+    def forward(ctx,
+                input,
+                offset,
+                weight,
+                stride=1,
+                padding=0,
+                dilation=1,
+                groups=1,
+                deformable_groups=1,
+                im2col_step=64):
+        if input is not None and input.dim() != 4:
+            raise ValueError(f'Expected 4D tensor as input, got {input.dim()}D tensor instead.')
+        ctx.stride = _pair(stride)
+        ctx.padding = _pair(padding)
+        ctx.dilation = _pair(dilation)
+        ctx.groups = groups
+        ctx.deformable_groups = deformable_groups
+        ctx.im2col_step = im2col_step
+
+        ctx.save_for_backward(input, offset, weight)
+
+        output = input.new_empty(DeformConvFunction._output_size(input, weight, ctx.padding, ctx.dilation, ctx.stride))
+
+        ctx.bufs_ = [input.new_empty(0), input.new_empty(0)]  # columns, ones
+
+        if not input.is_cuda:
+            raise NotImplementedError
+        else:
+            cur_im2col_step = min(ctx.im2col_step, input.shape[0])
+            assert (input.shape[0] % cur_im2col_step) == 0, 'im2col step must divide batchsize'
+            deform_conv_ext.deform_conv_forward(input, weight,
+                                                offset, output, ctx.bufs_[0], ctx.bufs_[1], weight.size(3),
+                                                weight.size(2), ctx.stride[1], ctx.stride[0], ctx.padding[1],
+                                                ctx.padding[0], ctx.dilation[1], ctx.dilation[0], ctx.groups,
+                                                ctx.deformable_groups, cur_im2col_step)
+        return output
+
+    @staticmethod
+    @once_differentiable
+    def backward(ctx, grad_output):
+        input, offset, weight = ctx.saved_tensors
+
+        grad_input = grad_offset = grad_weight = None
+
+        if not grad_output.is_cuda:
+            raise NotImplementedError
+        else:
+            cur_im2col_step = min(ctx.im2col_step, input.shape[0])
+            assert (input.shape[0] % cur_im2col_step) == 0, 'im2col step must divide batchsize'
+
+            if ctx.needs_input_grad[0] or ctx.needs_input_grad[1]:
+                grad_input = torch.zeros_like(input)
+                grad_offset = torch.zeros_like(offset)
+                deform_conv_ext.deform_conv_backward_input(input, offset, grad_output, grad_input,
+                                                           grad_offset, weight, ctx.bufs_[0], weight.size(3),
+                                                           weight.size(2), ctx.stride[1], ctx.stride[0], ctx.padding[1],
+                                                           ctx.padding[0], ctx.dilation[1], ctx.dilation[0], ctx.groups,
+                                                           ctx.deformable_groups, cur_im2col_step)
+
+            if ctx.needs_input_grad[2]:
+                grad_weight = torch.zeros_like(weight)
+                deform_conv_ext.deform_conv_backward_parameters(input, offset, grad_output, grad_weight,
+                                                                ctx.bufs_[0], ctx.bufs_[1], weight.size(3),
+                                                                weight.size(2), ctx.stride[1], ctx.stride[0],
+                                                                ctx.padding[1], ctx.padding[0], ctx.dilation[1],
+                                                                ctx.dilation[0], ctx.groups, ctx.deformable_groups, 1,
+                                                                cur_im2col_step)
+
+        return (grad_input, grad_offset, grad_weight, None, None, None, None, None)
+
+    @staticmethod
+    def _output_size(input, weight, padding, dilation, stride):
+        channels = weight.size(0)
+        output_size = (input.size(0), channels)
+        for d in range(input.dim() - 2):
+            in_size = input.size(d + 2)
+            pad = padding[d]
+            kernel = dilation[d] * (weight.size(d + 2) - 1) + 1
+            stride_ = stride[d]
+            output_size += ((in_size + (2 * pad) - kernel) // stride_ + 1, )
+        if not all(map(lambda s: s > 0, output_size)):
+            raise ValueError(f'convolution input is too small (output would be {"x".join(map(str, output_size))})')
+        return output_size
+
+
+class ModulatedDeformConvFunction(Function):
+
+    @staticmethod
+    def forward(ctx,
+                input,
+                offset,
+                mask,
+                weight,
+                bias=None,
+                stride=1,
+                padding=0,
+                dilation=1,
+                groups=1,
+                deformable_groups=1):
+        ctx.stride = stride
+        ctx.padding = padding
+        ctx.dilation = dilation
+        ctx.groups = groups
+        ctx.deformable_groups = deformable_groups
+        ctx.with_bias = bias is not None
+        if not ctx.with_bias:
+            bias = input.new_empty(1)  # fake tensor
+        if not input.is_cuda:
+            raise NotImplementedError
+        if weight.requires_grad or mask.requires_grad or offset.requires_grad or input.requires_grad:
+            ctx.save_for_backward(input, offset, mask, weight, bias)
+        output = input.new_empty(ModulatedDeformConvFunction._infer_shape(ctx, input, weight))
+        ctx._bufs = [input.new_empty(0), input.new_empty(0)]
+        deform_conv_ext.modulated_deform_conv_forward(input, weight, bias, ctx._bufs[0], offset, mask, output,
+                                                      ctx._bufs[1], weight.shape[2], weight.shape[3], ctx.stride,
+                                                      ctx.stride, ctx.padding, ctx.padding, ctx.dilation, ctx.dilation,
+                                                      ctx.groups, ctx.deformable_groups, ctx.with_bias)
+        return output
+
+    @staticmethod
+    @once_differentiable
+    def backward(ctx, grad_output):
+        if not grad_output.is_cuda:
+            raise NotImplementedError
+        input, offset, mask, weight, bias = ctx.saved_tensors
+        grad_input = torch.zeros_like(input)
+        grad_offset = torch.zeros_like(offset)
+        grad_mask = torch.zeros_like(mask)
+        grad_weight = torch.zeros_like(weight)
+        grad_bias = torch.zeros_like(bias)
+        deform_conv_ext.modulated_deform_conv_backward(input, weight, bias, ctx._bufs[0], offset, mask, ctx._bufs[1],
+                                                       grad_input, grad_weight, grad_bias, grad_offset, grad_mask,
+                                                       grad_output, weight.shape[2], weight.shape[3], ctx.stride,
+                                                       ctx.stride, ctx.padding, ctx.padding, ctx.dilation, ctx.dilation,
+                                                       ctx.groups, ctx.deformable_groups, ctx.with_bias)
+        if not ctx.with_bias:
+            grad_bias = None
+
+        return (grad_input, grad_offset, grad_mask, grad_weight, grad_bias, None, None, None, None, None)
+
+    @staticmethod
+    def _infer_shape(ctx, input, weight):
+        n = input.size(0)
+        channels_out = weight.size(0)
+        height, width = input.shape[2:4]
+        kernel_h, kernel_w = weight.shape[2:4]
+        height_out = (height + 2 * ctx.padding - (ctx.dilation * (kernel_h - 1) + 1)) // ctx.stride + 1
+        width_out = (width + 2 * ctx.padding - (ctx.dilation * (kernel_w - 1) + 1)) // ctx.stride + 1
+        return n, channels_out, height_out, width_out
+
+
+deform_conv = DeformConvFunction.apply
+modulated_deform_conv = ModulatedDeformConvFunction.apply
+
+
+class DeformConv(nn.Module):
+
+    def __init__(self,
+                 in_channels,
+                 out_channels,
+                 kernel_size,
+                 stride=1,
+                 padding=0,
+                 dilation=1,
+                 groups=1,
+                 deformable_groups=1,
+                 bias=False):
+        super(DeformConv, self).__init__()
+
+        assert not bias
+        assert in_channels % groups == 0, f'in_channels {in_channels} is not divisible by groups {groups}'
+        assert out_channels % groups == 0, f'out_channels {out_channels} is not divisible by groups {groups}'
+
+        self.in_channels = in_channels
+        self.out_channels = out_channels
+        self.kernel_size = _pair(kernel_size)
+        self.stride = _pair(stride)
+        self.padding = _pair(padding)
+        self.dilation = _pair(dilation)
+        self.groups = groups
+        self.deformable_groups = deformable_groups
+        # enable compatibility with nn.Conv2d
+        self.transposed = False
+        self.output_padding = _single(0)
+
+        self.weight = nn.Parameter(torch.Tensor(out_channels, in_channels // self.groups, *self.kernel_size))
+
+        self.reset_parameters()
+
+    def reset_parameters(self):
+        n = self.in_channels
+        for k in self.kernel_size:
+            n *= k
+        stdv = 1. / math.sqrt(n)
+        self.weight.data.uniform_(-stdv, stdv)
+
+    def forward(self, x, offset):
+        # To fix an assert error in deform_conv_cuda.cpp:128
+        # input image is smaller than kernel
+        input_pad = (x.size(2) < self.kernel_size[0] or x.size(3) < self.kernel_size[1])
+        if input_pad:
+            pad_h = max(self.kernel_size[0] - x.size(2), 0)
+            pad_w = max(self.kernel_size[1] - x.size(3), 0)
+            x = F.pad(x, (0, pad_w, 0, pad_h), 'constant', 0).contiguous()
+            offset = F.pad(offset, (0, pad_w, 0, pad_h), 'constant', 0).contiguous()
+        out = deform_conv(x, offset, self.weight, self.stride, self.padding, self.dilation, self.groups,
+                          self.deformable_groups)
+        if input_pad:
+            out = out[:, :, :out.size(2) - pad_h, :out.size(3) - pad_w].contiguous()
+        return out
+
+
+class DeformConvPack(DeformConv):
+    """A Deformable Conv Encapsulation that acts as normal Conv layers.
+
+    Args:
+        in_channels (int): Same as nn.Conv2d.
+        out_channels (int): Same as nn.Conv2d.
+        kernel_size (int or tuple[int]): Same as nn.Conv2d.
+        stride (int or tuple[int]): Same as nn.Conv2d.
+        padding (int or tuple[int]): Same as nn.Conv2d.
+        dilation (int or tuple[int]): Same as nn.Conv2d.
+        groups (int): Same as nn.Conv2d.
+        bias (bool or str): If specified as `auto`, it will be decided by the
+            norm_cfg. Bias will be set as True if norm_cfg is None, otherwise
+            False.
+    """
+
+    _version = 2
+
+    def __init__(self, *args, **kwargs):
+        super(DeformConvPack, self).__init__(*args, **kwargs)
+
+        self.conv_offset = nn.Conv2d(
+            self.in_channels,
+            self.deformable_groups * 2 * self.kernel_size[0] * self.kernel_size[1],
+            kernel_size=self.kernel_size,
+            stride=_pair(self.stride),
+            padding=_pair(self.padding),
+            dilation=_pair(self.dilation),
+            bias=True)
+        self.init_offset()
+
+    def init_offset(self):
+        self.conv_offset.weight.data.zero_()
+        self.conv_offset.bias.data.zero_()
+
+    def forward(self, x):
+        offset = self.conv_offset(x)
+        return deform_conv(x, offset, self.weight, self.stride, self.padding, self.dilation, self.groups,
+                           self.deformable_groups)
+
+
+class ModulatedDeformConv(nn.Module):
+
+    def __init__(self,
+                 in_channels,
+                 out_channels,
+                 kernel_size,
+                 stride=1,
+                 padding=0,
+                 dilation=1,
+                 groups=1,
+                 deformable_groups=1,
+                 bias=True):
+        super(ModulatedDeformConv, self).__init__()
+        self.in_channels = in_channels
+        self.out_channels = out_channels
+        self.kernel_size = _pair(kernel_size)
+        self.stride = stride
+        self.padding = padding
+        self.dilation = dilation
+        self.groups = groups
+        self.deformable_groups = deformable_groups
+        self.with_bias = bias
+        # enable compatibility with nn.Conv2d
+        self.transposed = False
+        self.output_padding = _single(0)
+
+        self.weight = nn.Parameter(torch.Tensor(out_channels, in_channels // groups, *self.kernel_size))
+        if bias:
+            self.bias = nn.Parameter(torch.Tensor(out_channels))
+        else:
+            self.register_parameter('bias', None)
+        self.init_weights()
+
+    def init_weights(self):
+        n = self.in_channels
+        for k in self.kernel_size:
+            n *= k
+        stdv = 1. / math.sqrt(n)
+        self.weight.data.uniform_(-stdv, stdv)
+        if self.bias is not None:
+            self.bias.data.zero_()
+
+    def forward(self, x, offset, mask):
+        return modulated_deform_conv(x, offset, mask, self.weight, self.bias, self.stride, self.padding, self.dilation,
+                                     self.groups, self.deformable_groups)
+
+
+class ModulatedDeformConvPack(ModulatedDeformConv):
+    """A ModulatedDeformable Conv Encapsulation that acts as normal Conv layers.
+
+    Args:
+        in_channels (int): Same as nn.Conv2d.
+        out_channels (int): Same as nn.Conv2d.
+        kernel_size (int or tuple[int]): Same as nn.Conv2d.
+        stride (int or tuple[int]): Same as nn.Conv2d.
+        padding (int or tuple[int]): Same as nn.Conv2d.
+        dilation (int or tuple[int]): Same as nn.Conv2d.
+        groups (int): Same as nn.Conv2d.
+        bias (bool or str): If specified as `auto`, it will be decided by the
+            norm_cfg. Bias will be set as True if norm_cfg is None, otherwise
+            False.
+    """
+
+    _version = 2
+
+    def __init__(self, *args, **kwargs):
+        super(ModulatedDeformConvPack, self).__init__(*args, **kwargs)
+
+        self.conv_offset = nn.Conv2d(
+            self.in_channels,
+            self.deformable_groups * 3 * self.kernel_size[0] * self.kernel_size[1],
+            kernel_size=self.kernel_size,
+            stride=_pair(self.stride),
+            padding=_pair(self.padding),
+            dilation=_pair(self.dilation),
+            bias=True)
+        self.init_weights()
+
+    def init_weights(self):
+        super(ModulatedDeformConvPack, self).init_weights()
+        if hasattr(self, 'conv_offset'):
+            self.conv_offset.weight.data.zero_()
+            self.conv_offset.bias.data.zero_()
+
+    def forward(self, x):
+        out = self.conv_offset(x)
+        o1, o2, mask = torch.chunk(out, 3, dim=1)
+        offset = torch.cat((o1, o2), dim=1)
+        mask = torch.sigmoid(mask)
+        return modulated_deform_conv(x, offset, mask, self.weight, self.bias, self.stride, self.padding, self.dilation,
+                                     self.groups, self.deformable_groups)
diff --git a/IIR-Lab/models/dcn/src/deform_conv_cuda.cpp b/IIR-Lab/models/dcn/src/deform_conv_cuda.cpp
new file mode 100644
index 0000000000000000000000000000000000000000..b465c493a3dd67d320b7a8997fbd501d2f89c807
--- /dev/null
+++ b/IIR-Lab/models/dcn/src/deform_conv_cuda.cpp
@@ -0,0 +1,685 @@
+// modify from
+// https://github.com/chengdazhi/Deformable-Convolution-V2-PyTorch/blob/mmdetection/mmdet/ops/dcn/src/deform_conv_cuda.c
+
+#include <torch/extension.h>
+#include <ATen/DeviceGuard.h>
+
+#include <cmath>
+#include <vector>
+
+void deformable_im2col(const at::Tensor data_im, const at::Tensor data_offset,
+                       const int channels, const int height, const int width,
+                       const int ksize_h, const int ksize_w, const int pad_h,
+                       const int pad_w, const int stride_h, const int stride_w,
+                       const int dilation_h, const int dilation_w,
+                       const int parallel_imgs, const int deformable_group,
+                       at::Tensor data_col);
+
+void deformable_col2im(const at::Tensor data_col, const at::Tensor data_offset,
+                       const int channels, const int height, const int width,
+                       const int ksize_h, const int ksize_w, const int pad_h,
+                       const int pad_w, const int stride_h, const int stride_w,
+                       const int dilation_h, const int dilation_w,
+                       const int parallel_imgs, const int deformable_group,
+                       at::Tensor grad_im);
+
+void deformable_col2im_coord(
+    const at::Tensor data_col, const at::Tensor data_im,
+    const at::Tensor data_offset, const int channels, const int height,
+    const int width, const int ksize_h, const int ksize_w, const int pad_h,
+    const int pad_w, const int stride_h, const int stride_w,
+    const int dilation_h, const int dilation_w, const int parallel_imgs,
+    const int deformable_group, at::Tensor grad_offset);
+
+void modulated_deformable_im2col_cuda(
+    const at::Tensor data_im, const at::Tensor data_offset,
+    const at::Tensor data_mask, const int batch_size, const int channels,
+    const int height_im, const int width_im, const int height_col,
+    const int width_col, const int kernel_h, const int kenerl_w,
+    const int pad_h, const int pad_w, const int stride_h, const int stride_w,
+    const int dilation_h, const int dilation_w, const int deformable_group,
+    at::Tensor data_col);
+
+void modulated_deformable_col2im_cuda(
+    const at::Tensor data_col, const at::Tensor data_offset,
+    const at::Tensor data_mask, const int batch_size, const int channels,
+    const int height_im, const int width_im, const int height_col,
+    const int width_col, const int kernel_h, const int kenerl_w,
+    const int pad_h, const int pad_w, const int stride_h, const int stride_w,
+    const int dilation_h, const int dilation_w, const int deformable_group,
+    at::Tensor grad_im);
+
+void modulated_deformable_col2im_coord_cuda(
+    const at::Tensor data_col, const at::Tensor data_im,
+    const at::Tensor data_offset, const at::Tensor data_mask,
+    const int batch_size, const int channels, const int height_im,
+    const int width_im, const int height_col, const int width_col,
+    const int kernel_h, const int kenerl_w, const int pad_h, const int pad_w,
+    const int stride_h, const int stride_w, const int dilation_h,
+    const int dilation_w, const int deformable_group, at::Tensor grad_offset,
+    at::Tensor grad_mask);
+
+void shape_check(at::Tensor input, at::Tensor offset, at::Tensor *gradOutput,
+                 at::Tensor weight, int kH, int kW, int dH, int dW, int padH,
+                 int padW, int dilationH, int dilationW, int group,
+                 int deformable_group) {
+  TORCH_CHECK(weight.ndimension() == 4,
+           "4D weight tensor (nOutputPlane,nInputPlane,kH,kW) expected, "
+           "but got: %s",
+           weight.ndimension());
+
+  TORCH_CHECK(weight.is_contiguous(), "weight tensor has to be contiguous");
+
+  TORCH_CHECK(kW > 0 && kH > 0,
+           "kernel size should be greater than zero, but got kH: %d kW: %d", kH,
+           kW);
+
+  TORCH_CHECK((weight.size(2) == kH && weight.size(3) == kW),
+           "kernel size should be consistent with weight, ",
+           "but got kH: %d kW: %d weight.size(2): %d, weight.size(3): %d", kH,
+           kW, weight.size(2), weight.size(3));
+
+  TORCH_CHECK(dW > 0 && dH > 0,
+           "stride should be greater than zero, but got dH: %d dW: %d", dH, dW);
+
+  TORCH_CHECK(
+      dilationW > 0 && dilationH > 0,
+      "dilation should be greater than 0, but got dilationH: %d dilationW: %d",
+      dilationH, dilationW);
+
+  int ndim = input.ndimension();
+  int dimf = 0;
+  int dimh = 1;
+  int dimw = 2;
+
+  if (ndim == 4) {
+    dimf++;
+    dimh++;
+    dimw++;
+  }
+
+  TORCH_CHECK(ndim == 3 || ndim == 4, "3D or 4D input tensor expected but got: %s",
+           ndim);
+
+  long nInputPlane = weight.size(1) * group;
+  long inputHeight = input.size(dimh);
+  long inputWidth = input.size(dimw);
+  long nOutputPlane = weight.size(0);
+  long outputHeight =
+      (inputHeight + 2 * padH - (dilationH * (kH - 1) + 1)) / dH + 1;
+  long outputWidth =
+      (inputWidth + 2 * padW - (dilationW * (kW - 1) + 1)) / dW + 1;
+
+  TORCH_CHECK(nInputPlane % deformable_group == 0,
+           "input channels must divide deformable group size");
+
+  if (outputWidth < 1 || outputHeight < 1)
+    AT_ERROR(
+        "Given input size: (%ld x %ld x %ld). "
+        "Calculated output size: (%ld x %ld x %ld). Output size is too small",
+        nInputPlane, inputHeight, inputWidth, nOutputPlane, outputHeight,
+        outputWidth);
+
+  TORCH_CHECK(input.size(1) == nInputPlane,
+           "invalid number of input planes, expected: %d, but got: %d",
+           nInputPlane, input.size(1));
+
+  TORCH_CHECK((inputHeight >= kH && inputWidth >= kW),
+           "input image is smaller than kernel");
+
+  TORCH_CHECK((offset.size(2) == outputHeight && offset.size(3) == outputWidth),
+           "invalid spatial size of offset, expected height: %d width: %d, but "
+           "got height: %d width: %d",
+           outputHeight, outputWidth, offset.size(2), offset.size(3));
+
+  TORCH_CHECK((offset.size(1) == deformable_group * 2 * kH * kW),
+           "invalid number of channels of offset");
+
+  if (gradOutput != NULL) {
+    TORCH_CHECK(gradOutput->size(dimf) == nOutputPlane,
+             "invalid number of gradOutput planes, expected: %d, but got: %d",
+             nOutputPlane, gradOutput->size(dimf));
+
+    TORCH_CHECK((gradOutput->size(dimh) == outputHeight &&
+              gradOutput->size(dimw) == outputWidth),
+             "invalid size of gradOutput, expected height: %d width: %d , but "
+             "got height: %d width: %d",
+             outputHeight, outputWidth, gradOutput->size(dimh),
+             gradOutput->size(dimw));
+  }
+}
+
+int deform_conv_forward_cuda(at::Tensor input, at::Tensor weight,
+                             at::Tensor offset, at::Tensor output,
+                             at::Tensor columns, at::Tensor ones, int kW,
+                             int kH, int dW, int dH, int padW, int padH,
+                             int dilationW, int dilationH, int group,
+                             int deformable_group, int im2col_step) {
+  // todo: resize columns to include im2col: done
+  // todo: add im2col_step as input
+  // todo: add new output buffer and transpose it to output (or directly
+  // transpose output) todo: possibly change data indexing because of
+  // parallel_imgs
+
+  shape_check(input, offset, NULL, weight, kH, kW, dH, dW, padH, padW,
+              dilationH, dilationW, group, deformable_group);
+  at::DeviceGuard guard(input.device());
+
+  input = input.contiguous();
+  offset = offset.contiguous();
+  weight = weight.contiguous();
+
+  int batch = 1;
+  if (input.ndimension() == 3) {
+    // Force batch
+    batch = 0;
+    input.unsqueeze_(0);
+    offset.unsqueeze_(0);
+  }
+
+  // todo: assert batchsize dividable by im2col_step
+
+  long batchSize = input.size(0);
+  long nInputPlane = input.size(1);
+  long inputHeight = input.size(2);
+  long inputWidth = input.size(3);
+
+  long nOutputPlane = weight.size(0);
+
+  long outputWidth =
+      (inputWidth + 2 * padW - (dilationW * (kW - 1) + 1)) / dW + 1;
+  long outputHeight =
+      (inputHeight + 2 * padH - (dilationH * (kH - 1) + 1)) / dH + 1;
+
+  TORCH_CHECK((offset.size(0) == batchSize), "invalid batch size of offset");
+
+  output = output.view({batchSize / im2col_step, im2col_step, nOutputPlane,
+                        outputHeight, outputWidth});
+  columns = at::zeros(
+      {nInputPlane * kW * kH, im2col_step * outputHeight * outputWidth},
+      input.options());
+
+  if (ones.ndimension() != 2 ||
+      ones.size(0) * ones.size(1) < outputHeight * outputWidth) {
+    ones = at::ones({outputHeight, outputWidth}, input.options());
+  }
+
+  input = input.view({batchSize / im2col_step, im2col_step, nInputPlane,
+                      inputHeight, inputWidth});
+  offset =
+      offset.view({batchSize / im2col_step, im2col_step,
+                   deformable_group * 2 * kH * kW, outputHeight, outputWidth});
+
+  at::Tensor output_buffer =
+      at::zeros({batchSize / im2col_step, nOutputPlane,
+                 im2col_step * outputHeight, outputWidth},
+                output.options());
+
+  output_buffer = output_buffer.view(
+      {output_buffer.size(0), group, output_buffer.size(1) / group,
+       output_buffer.size(2), output_buffer.size(3)});
+
+  for (int elt = 0; elt < batchSize / im2col_step; elt++) {
+    deformable_im2col(input[elt], offset[elt], nInputPlane, inputHeight,
+                      inputWidth, kH, kW, padH, padW, dH, dW, dilationH,
+                      dilationW, im2col_step, deformable_group, columns);
+
+    columns = columns.view({group, columns.size(0) / group, columns.size(1)});
+    weight = weight.view({group, weight.size(0) / group, weight.size(1),
+                          weight.size(2), weight.size(3)});
+
+    for (int g = 0; g < group; g++) {
+      output_buffer[elt][g] = output_buffer[elt][g]
+                                  .flatten(1)
+                                  .addmm_(weight[g].flatten(1), columns[g])
+                                  .view_as(output_buffer[elt][g]);
+    }
+  }
+
+  output_buffer = output_buffer.view(
+      {output_buffer.size(0), output_buffer.size(1) * output_buffer.size(2),
+       output_buffer.size(3), output_buffer.size(4)});
+
+  output_buffer = output_buffer.view({batchSize / im2col_step, nOutputPlane,
+                                      im2col_step, outputHeight, outputWidth});
+  output_buffer.transpose_(1, 2);
+  output.copy_(output_buffer);
+  output = output.view({batchSize, nOutputPlane, outputHeight, outputWidth});
+
+  input = input.view({batchSize, nInputPlane, inputHeight, inputWidth});
+  offset = offset.view(
+      {batchSize, deformable_group * 2 * kH * kW, outputHeight, outputWidth});
+
+  if (batch == 0) {
+    output = output.view({nOutputPlane, outputHeight, outputWidth});
+    input = input.view({nInputPlane, inputHeight, inputWidth});
+    offset = offset.view({offset.size(1), offset.size(2), offset.size(3)});
+  }
+
+  return 1;
+}
+
+int deform_conv_backward_input_cuda(at::Tensor input, at::Tensor offset,
+                                    at::Tensor gradOutput, at::Tensor gradInput,
+                                    at::Tensor gradOffset, at::Tensor weight,
+                                    at::Tensor columns, int kW, int kH, int dW,
+                                    int dH, int padW, int padH, int dilationW,
+                                    int dilationH, int group,
+                                    int deformable_group, int im2col_step) {
+  shape_check(input, offset, &gradOutput, weight, kH, kW, dH, dW, padH, padW,
+              dilationH, dilationW, group, deformable_group);
+  at::DeviceGuard guard(input.device());
+
+  input = input.contiguous();
+  offset = offset.contiguous();
+  gradOutput = gradOutput.contiguous();
+  weight = weight.contiguous();
+
+  int batch = 1;
+
+  if (input.ndimension() == 3) {
+    // Force batch
+    batch = 0;
+    input = input.view({1, input.size(0), input.size(1), input.size(2)});
+    offset = offset.view({1, offset.size(0), offset.size(1), offset.size(2)});
+    gradOutput = gradOutput.view(
+        {1, gradOutput.size(0), gradOutput.size(1), gradOutput.size(2)});
+  }
+
+  long batchSize = input.size(0);
+  long nInputPlane = input.size(1);
+  long inputHeight = input.size(2);
+  long inputWidth = input.size(3);
+
+  long nOutputPlane = weight.size(0);
+
+  long outputWidth =
+      (inputWidth + 2 * padW - (dilationW * (kW - 1) + 1)) / dW + 1;
+  long outputHeight =
+      (inputHeight + 2 * padH - (dilationH * (kH - 1) + 1)) / dH + 1;
+
+  TORCH_CHECK((offset.size(0) == batchSize), 3, "invalid batch size of offset");
+  gradInput = gradInput.view({batchSize, nInputPlane, inputHeight, inputWidth});
+  columns = at::zeros(
+      {nInputPlane * kW * kH, im2col_step * outputHeight * outputWidth},
+      input.options());
+
+  // change order of grad output
+  gradOutput = gradOutput.view({batchSize / im2col_step, im2col_step,
+                                nOutputPlane, outputHeight, outputWidth});
+  gradOutput.transpose_(1, 2);
+
+  gradInput = gradInput.view({batchSize / im2col_step, im2col_step, nInputPlane,
+                              inputHeight, inputWidth});
+  input = input.view({batchSize / im2col_step, im2col_step, nInputPlane,
+                      inputHeight, inputWidth});
+  gradOffset = gradOffset.view({batchSize / im2col_step, im2col_step,
+                                deformable_group * 2 * kH * kW, outputHeight,
+                                outputWidth});
+  offset =
+      offset.view({batchSize / im2col_step, im2col_step,
+                   deformable_group * 2 * kH * kW, outputHeight, outputWidth});
+
+  for (int elt = 0; elt < batchSize / im2col_step; elt++) {
+    // divide into groups
+    columns = columns.view({group, columns.size(0) / group, columns.size(1)});
+    weight = weight.view({group, weight.size(0) / group, weight.size(1),
+                          weight.size(2), weight.size(3)});
+    gradOutput = gradOutput.view(
+        {gradOutput.size(0), group, gradOutput.size(1) / group,
+         gradOutput.size(2), gradOutput.size(3), gradOutput.size(4)});
+
+    for (int g = 0; g < group; g++) {
+      columns[g] = columns[g].addmm_(weight[g].flatten(1).transpose(0, 1),
+                                     gradOutput[elt][g].flatten(1), 0.0f, 1.0f);
+    }
+
+    columns =
+        columns.view({columns.size(0) * columns.size(1), columns.size(2)});
+    gradOutput = gradOutput.view(
+        {gradOutput.size(0), gradOutput.size(1) * gradOutput.size(2),
+         gradOutput.size(3), gradOutput.size(4), gradOutput.size(5)});
+
+    deformable_col2im_coord(columns, input[elt], offset[elt], nInputPlane,
+                            inputHeight, inputWidth, kH, kW, padH, padW, dH, dW,
+                            dilationH, dilationW, im2col_step, deformable_group,
+                            gradOffset[elt]);
+
+    deformable_col2im(columns, offset[elt], nInputPlane, inputHeight,
+                      inputWidth, kH, kW, padH, padW, dH, dW, dilationH,
+                      dilationW, im2col_step, deformable_group, gradInput[elt]);
+  }
+
+  gradOutput.transpose_(1, 2);
+  gradOutput =
+      gradOutput.view({batchSize, nOutputPlane, outputHeight, outputWidth});
+
+  gradInput = gradInput.view({batchSize, nInputPlane, inputHeight, inputWidth});
+  input = input.view({batchSize, nInputPlane, inputHeight, inputWidth});
+  gradOffset = gradOffset.view(
+      {batchSize, deformable_group * 2 * kH * kW, outputHeight, outputWidth});
+  offset = offset.view(
+      {batchSize, deformable_group * 2 * kH * kW, outputHeight, outputWidth});
+
+  if (batch == 0) {
+    gradOutput = gradOutput.view({nOutputPlane, outputHeight, outputWidth});
+    input = input.view({nInputPlane, inputHeight, inputWidth});
+    gradInput = gradInput.view({nInputPlane, inputHeight, inputWidth});
+    offset = offset.view({offset.size(1), offset.size(2), offset.size(3)});
+    gradOffset =
+        gradOffset.view({offset.size(1), offset.size(2), offset.size(3)});
+  }
+
+  return 1;
+}
+
+int deform_conv_backward_parameters_cuda(
+    at::Tensor input, at::Tensor offset, at::Tensor gradOutput,
+    at::Tensor gradWeight,  // at::Tensor gradBias,
+    at::Tensor columns, at::Tensor ones, int kW, int kH, int dW, int dH,
+    int padW, int padH, int dilationW, int dilationH, int group,
+    int deformable_group, float scale, int im2col_step) {
+  // todo: transpose and reshape outGrad
+  // todo: reshape columns
+  // todo: add im2col_step as input
+
+  shape_check(input, offset, &gradOutput, gradWeight, kH, kW, dH, dW, padH,
+              padW, dilationH, dilationW, group, deformable_group);
+  at::DeviceGuard guard(input.device());
+
+  input = input.contiguous();
+  offset = offset.contiguous();
+  gradOutput = gradOutput.contiguous();
+
+  int batch = 1;
+
+  if (input.ndimension() == 3) {
+    // Force batch
+    batch = 0;
+    input = input.view(
+        at::IntList({1, input.size(0), input.size(1), input.size(2)}));
+    gradOutput = gradOutput.view(
+        {1, gradOutput.size(0), gradOutput.size(1), gradOutput.size(2)});
+  }
+
+  long batchSize = input.size(0);
+  long nInputPlane = input.size(1);
+  long inputHeight = input.size(2);
+  long inputWidth = input.size(3);
+
+  long nOutputPlane = gradWeight.size(0);
+
+  long outputWidth =
+      (inputWidth + 2 * padW - (dilationW * (kW - 1) + 1)) / dW + 1;
+  long outputHeight =
+      (inputHeight + 2 * padH - (dilationH * (kH - 1) + 1)) / dH + 1;
+
+  TORCH_CHECK((offset.size(0) == batchSize), "invalid batch size of offset");
+
+  columns = at::zeros(
+      {nInputPlane * kW * kH, im2col_step * outputHeight * outputWidth},
+      input.options());
+
+  gradOutput = gradOutput.view({batchSize / im2col_step, im2col_step,
+                                nOutputPlane, outputHeight, outputWidth});
+  gradOutput.transpose_(1, 2);
+
+  at::Tensor gradOutputBuffer = at::zeros_like(gradOutput);
+  gradOutputBuffer =
+      gradOutputBuffer.view({batchSize / im2col_step, nOutputPlane, im2col_step,
+                             outputHeight, outputWidth});
+  gradOutputBuffer.copy_(gradOutput);
+  gradOutputBuffer =
+      gradOutputBuffer.view({batchSize / im2col_step, nOutputPlane,
+                             im2col_step * outputHeight, outputWidth});
+
+  gradOutput.transpose_(1, 2);
+  gradOutput =
+      gradOutput.view({batchSize, nOutputPlane, outputHeight, outputWidth});
+
+  input = input.view({batchSize / im2col_step, im2col_step, nInputPlane,
+                      inputHeight, inputWidth});
+  offset =
+      offset.view({batchSize / im2col_step, im2col_step,
+                   deformable_group * 2 * kH * kW, outputHeight, outputWidth});
+
+  for (int elt = 0; elt < batchSize / im2col_step; elt++) {
+    deformable_im2col(input[elt], offset[elt], nInputPlane, inputHeight,
+                      inputWidth, kH, kW, padH, padW, dH, dW, dilationH,
+                      dilationW, im2col_step, deformable_group, columns);
+
+    // divide into group
+    gradOutputBuffer = gradOutputBuffer.view(
+        {gradOutputBuffer.size(0), group, gradOutputBuffer.size(1) / group,
+         gradOutputBuffer.size(2), gradOutputBuffer.size(3)});
+    columns = columns.view({group, columns.size(0) / group, columns.size(1)});
+    gradWeight =
+        gradWeight.view({group, gradWeight.size(0) / group, gradWeight.size(1),
+                         gradWeight.size(2), gradWeight.size(3)});
+
+    for (int g = 0; g < group; g++) {
+      gradWeight[g] = gradWeight[g]
+                          .flatten(1)
+                          .addmm_(gradOutputBuffer[elt][g].flatten(1),
+                                  columns[g].transpose(1, 0), 1.0, scale)
+                          .view_as(gradWeight[g]);
+    }
+    gradOutputBuffer = gradOutputBuffer.view(
+        {gradOutputBuffer.size(0),
+         gradOutputBuffer.size(1) * gradOutputBuffer.size(2),
+         gradOutputBuffer.size(3), gradOutputBuffer.size(4)});
+    columns =
+        columns.view({columns.size(0) * columns.size(1), columns.size(2)});
+    gradWeight = gradWeight.view({gradWeight.size(0) * gradWeight.size(1),
+                                  gradWeight.size(2), gradWeight.size(3),
+                                  gradWeight.size(4)});
+  }
+
+  input = input.view({batchSize, nInputPlane, inputHeight, inputWidth});
+  offset = offset.view(
+      {batchSize, deformable_group * 2 * kH * kW, outputHeight, outputWidth});
+
+  if (batch == 0) {
+    gradOutput = gradOutput.view({nOutputPlane, outputHeight, outputWidth});
+    input = input.view({nInputPlane, inputHeight, inputWidth});
+  }
+
+  return 1;
+}
+
+void modulated_deform_conv_cuda_forward(
+    at::Tensor input, at::Tensor weight, at::Tensor bias, at::Tensor ones,
+    at::Tensor offset, at::Tensor mask, at::Tensor output, at::Tensor columns,
+    int kernel_h, int kernel_w, const int stride_h, const int stride_w,
+    const int pad_h, const int pad_w, const int dilation_h,
+    const int dilation_w, const int group, const int deformable_group,
+    const bool with_bias) {
+  TORCH_CHECK(input.is_contiguous(), "input tensor has to be contiguous");
+  TORCH_CHECK(weight.is_contiguous(), "weight tensor has to be contiguous");
+  at::DeviceGuard guard(input.device());
+
+  const int batch = input.size(0);
+  const int channels = input.size(1);
+  const int height = input.size(2);
+  const int width = input.size(3);
+
+  const int channels_out = weight.size(0);
+  const int channels_kernel = weight.size(1);
+  const int kernel_h_ = weight.size(2);
+  const int kernel_w_ = weight.size(3);
+
+  if (kernel_h_ != kernel_h || kernel_w_ != kernel_w)
+    AT_ERROR("Input shape and kernel shape won't match: (%d x %d vs %d x %d).",
+             kernel_h_, kernel_w, kernel_h_, kernel_w_);
+  if (channels != channels_kernel * group)
+    AT_ERROR("Input shape and kernel channels won't match: (%d vs %d).",
+             channels, channels_kernel * group);
+
+  const int height_out =
+      (height + 2 * pad_h - (dilation_h * (kernel_h - 1) + 1)) / stride_h + 1;
+  const int width_out =
+      (width + 2 * pad_w - (dilation_w * (kernel_w - 1) + 1)) / stride_w + 1;
+
+  if (ones.ndimension() != 2 ||
+      ones.size(0) * ones.size(1) < height_out * width_out) {
+    // Resize plane and fill with ones...
+    ones = at::ones({height_out, width_out}, input.options());
+  }
+
+  // resize output
+  output = output.view({batch, channels_out, height_out, width_out}).zero_();
+  // resize temporary columns
+  columns =
+      at::zeros({channels * kernel_h * kernel_w, 1 * height_out * width_out},
+                input.options());
+
+  output = output.view({output.size(0), group, output.size(1) / group,
+                        output.size(2), output.size(3)});
+
+  for (int b = 0; b < batch; b++) {
+    modulated_deformable_im2col_cuda(
+        input[b], offset[b], mask[b], 1, channels, height, width, height_out,
+        width_out, kernel_h, kernel_w, pad_h, pad_w, stride_h, stride_w,
+        dilation_h, dilation_w, deformable_group, columns);
+
+    // divide into group
+    weight = weight.view({group, weight.size(0) / group, weight.size(1),
+                          weight.size(2), weight.size(3)});
+    columns = columns.view({group, columns.size(0) / group, columns.size(1)});
+
+    for (int g = 0; g < group; g++) {
+      output[b][g] = output[b][g]
+                         .flatten(1)
+                         .addmm_(weight[g].flatten(1), columns[g])
+                         .view_as(output[b][g]);
+    }
+
+    weight = weight.view({weight.size(0) * weight.size(1), weight.size(2),
+                          weight.size(3), weight.size(4)});
+    columns =
+        columns.view({columns.size(0) * columns.size(1), columns.size(2)});
+  }
+
+  output = output.view({output.size(0), output.size(1) * output.size(2),
+                        output.size(3), output.size(4)});
+
+  if (with_bias) {
+    output += bias.view({1, bias.size(0), 1, 1});
+  }
+}
+
+void modulated_deform_conv_cuda_backward(
+    at::Tensor input, at::Tensor weight, at::Tensor bias, at::Tensor ones,
+    at::Tensor offset, at::Tensor mask, at::Tensor columns,
+    at::Tensor grad_input, at::Tensor grad_weight, at::Tensor grad_bias,
+    at::Tensor grad_offset, at::Tensor grad_mask, at::Tensor grad_output,
+    int kernel_h, int kernel_w, int stride_h, int stride_w, int pad_h,
+    int pad_w, int dilation_h, int dilation_w, int group, int deformable_group,
+    const bool with_bias) {
+  TORCH_CHECK(input.is_contiguous(), "input tensor has to be contiguous");
+  TORCH_CHECK(weight.is_contiguous(), "weight tensor has to be contiguous");
+  at::DeviceGuard guard(input.device());
+
+  const int batch = input.size(0);
+  const int channels = input.size(1);
+  const int height = input.size(2);
+  const int width = input.size(3);
+
+  const int channels_kernel = weight.size(1);
+  const int kernel_h_ = weight.size(2);
+  const int kernel_w_ = weight.size(3);
+  if (kernel_h_ != kernel_h || kernel_w_ != kernel_w)
+    AT_ERROR("Input shape and kernel shape won't match: (%d x %d vs %d x %d).",
+             kernel_h_, kernel_w, kernel_h_, kernel_w_);
+  if (channels != channels_kernel * group)
+    AT_ERROR("Input shape and kernel channels won't match: (%d vs %d).",
+             channels, channels_kernel * group);
+
+  const int height_out =
+      (height + 2 * pad_h - (dilation_h * (kernel_h - 1) + 1)) / stride_h + 1;
+  const int width_out =
+      (width + 2 * pad_w - (dilation_w * (kernel_w - 1) + 1)) / stride_w + 1;
+
+  if (ones.ndimension() != 2 ||
+      ones.size(0) * ones.size(1) < height_out * width_out) {
+    // Resize plane and fill with ones...
+    ones = at::ones({height_out, width_out}, input.options());
+  }
+
+  grad_input = grad_input.view({batch, channels, height, width});
+  columns = at::zeros({channels * kernel_h * kernel_w, height_out * width_out},
+                      input.options());
+
+  grad_output =
+      grad_output.view({grad_output.size(0), group, grad_output.size(1) / group,
+                        grad_output.size(2), grad_output.size(3)});
+
+  for (int b = 0; b < batch; b++) {
+    // divide int group
+    columns = columns.view({group, columns.size(0) / group, columns.size(1)});
+    weight = weight.view({group, weight.size(0) / group, weight.size(1),
+                          weight.size(2), weight.size(3)});
+
+    for (int g = 0; g < group; g++) {
+      columns[g].addmm_(weight[g].flatten(1).transpose(0, 1),
+                        grad_output[b][g].flatten(1), 0.0f, 1.0f);
+    }
+
+    columns =
+        columns.view({columns.size(0) * columns.size(1), columns.size(2)});
+    weight = weight.view({weight.size(0) * weight.size(1), weight.size(2),
+                          weight.size(3), weight.size(4)});
+
+    // gradient w.r.t. input coordinate data
+    modulated_deformable_col2im_coord_cuda(
+        columns, input[b], offset[b], mask[b], 1, channels, height, width,
+        height_out, width_out, kernel_h, kernel_w, pad_h, pad_w, stride_h,
+        stride_w, dilation_h, dilation_w, deformable_group, grad_offset[b],
+        grad_mask[b]);
+    // gradient w.r.t. input data
+    modulated_deformable_col2im_cuda(
+        columns, offset[b], mask[b], 1, channels, height, width, height_out,
+        width_out, kernel_h, kernel_w, pad_h, pad_w, stride_h, stride_w,
+        dilation_h, dilation_w, deformable_group, grad_input[b]);
+
+    // gradient w.r.t. weight, dWeight should accumulate across the batch and
+    // group
+    modulated_deformable_im2col_cuda(
+        input[b], offset[b], mask[b], 1, channels, height, width, height_out,
+        width_out, kernel_h, kernel_w, pad_h, pad_w, stride_h, stride_w,
+        dilation_h, dilation_w, deformable_group, columns);
+
+    columns = columns.view({group, columns.size(0) / group, columns.size(1)});
+    grad_weight = grad_weight.view({group, grad_weight.size(0) / group,
+                                    grad_weight.size(1), grad_weight.size(2),
+                                    grad_weight.size(3)});
+    if (with_bias)
+      grad_bias = grad_bias.view({group, grad_bias.size(0) / group});
+
+    for (int g = 0; g < group; g++) {
+      grad_weight[g] =
+          grad_weight[g]
+              .flatten(1)
+              .addmm_(grad_output[b][g].flatten(1), columns[g].transpose(0, 1))
+              .view_as(grad_weight[g]);
+      if (with_bias) {
+        grad_bias[g] =
+            grad_bias[g]
+                .view({-1, 1})
+                .addmm_(grad_output[b][g].flatten(1), ones.view({-1, 1}))
+                .view(-1);
+      }
+    }
+
+    columns =
+        columns.view({columns.size(0) * columns.size(1), columns.size(2)});
+    grad_weight = grad_weight.view({grad_weight.size(0) * grad_weight.size(1),
+                                    grad_weight.size(2), grad_weight.size(3),
+                                    grad_weight.size(4)});
+    if (with_bias)
+      grad_bias = grad_bias.view({grad_bias.size(0) * grad_bias.size(1)});
+  }
+  grad_output = grad_output.view({grad_output.size(0) * grad_output.size(1),
+                                  grad_output.size(2), grad_output.size(3),
+                                  grad_output.size(4)});
+}
diff --git a/IIR-Lab/models/dcn/src/deform_conv_cuda_kernel.cu b/IIR-Lab/models/dcn/src/deform_conv_cuda_kernel.cu
new file mode 100644
index 0000000000000000000000000000000000000000..98752dccf8c58817ca1a952554dd3f33188a2d34
--- /dev/null
+++ b/IIR-Lab/models/dcn/src/deform_conv_cuda_kernel.cu
@@ -0,0 +1,867 @@
+/*!
+ ******************* BEGIN Caffe Copyright Notice and Disclaimer ****************
+ *
+ * COPYRIGHT
+ *
+ * All contributions by the University of California:
+ * Copyright (c) 2014-2017 The Regents of the University of California (Regents)
+ * All rights reserved.
+ *
+ * All other contributions:
+ * Copyright (c) 2014-2017, the respective contributors
+ * All rights reserved.
+ *
+ * Caffe uses a shared copyright model: each contributor holds copyright over
+ * their contributions to Caffe. The project versioning records all such
+ * contribution and copyright details. If a contributor wants to further mark
+ * their specific copyright on a particular contribution, they should indicate
+ * their copyright solely in the commit message of the change when it is
+ * committed.
+ *
+ * LICENSE
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions are met:
+ *
+ * 1. Redistributions of source code must retain the above copyright notice, this
+ * list of conditions and the following disclaimer.
+ * 2. Redistributions in binary form must reproduce the above copyright notice,
+ * this list of conditions and the following disclaimer in the documentation
+ * and/or other materials provided with the distribution.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND
+ * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED
+ * WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
+ * DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE LIABLE FOR
+ * ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES
+ * (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES;
+ * LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND
+ * ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+ * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
+ * SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+ *
+ * CONTRIBUTION AGREEMENT
+ *
+ * By contributing to the BVLC/caffe repository through pull-request, comment,
+ * or otherwise, the contributor releases their content to the
+ * license and copyright terms herein.
+ *
+ ***************** END Caffe Copyright Notice and Disclaimer ********************
+ *
+ * Copyright (c) 2018 Microsoft
+ * Licensed under The MIT License [see LICENSE for details]
+ * \file modulated_deformable_im2col.cuh
+ * \brief Function definitions of converting an image to
+ * column matrix based on kernel, padding, dilation, and offset.
+ * These functions are mainly used in deformable convolution operators.
+ * \ref: https://arxiv.org/abs/1703.06211
+ * \author Yuwen Xiong, Haozhi Qi, Jifeng Dai, Xizhou Zhu, Han Hu, Dazhi Cheng
+ */
+
+// modified from https://github.com/chengdazhi/Deformable-Convolution-V2-PyTorch/blob/mmdetection/mmdet/ops/dcn/src/deform_conv_cuda_kernel.cu
+
+#include <ATen/ATen.h>
+#include <ATen/cuda/CUDAContext.h>
+#include <THC/THCAtomics.cuh>
+#include <stdio.h>
+#include <math.h>
+#include <float.h>
+
+using namespace at;
+
+#define CUDA_KERNEL_LOOP(i, n)                                 \
+  for (int i = blockIdx.x * blockDim.x + threadIdx.x; i < (n); \
+       i += blockDim.x * gridDim.x)
+
+const int CUDA_NUM_THREADS = 1024;
+const int kMaxGridNum = 65535;
+
+inline int GET_BLOCKS(const int N)
+{
+  return std::min(kMaxGridNum, (N + CUDA_NUM_THREADS - 1) / CUDA_NUM_THREADS);
+}
+
+template <typename scalar_t>
+__device__ scalar_t deformable_im2col_bilinear(const scalar_t *bottom_data, const int data_width,
+                                               const int height, const int width, scalar_t h, scalar_t w)
+{
+
+  int h_low = floor(h);
+  int w_low = floor(w);
+  int h_high = h_low + 1;
+  int w_high = w_low + 1;
+
+  scalar_t lh = h - h_low;
+  scalar_t lw = w - w_low;
+  scalar_t hh = 1 - lh, hw = 1 - lw;
+
+  scalar_t v1 = 0;
+  if (h_low >= 0 && w_low >= 0)
+    v1 = bottom_data[h_low * data_width + w_low];
+  scalar_t v2 = 0;
+  if (h_low >= 0 && w_high <= width - 1)
+    v2 = bottom_data[h_low * data_width + w_high];
+  scalar_t v3 = 0;
+  if (h_high <= height - 1 && w_low >= 0)
+    v3 = bottom_data[h_high * data_width + w_low];
+  scalar_t v4 = 0;
+  if (h_high <= height - 1 && w_high <= width - 1)
+    v4 = bottom_data[h_high * data_width + w_high];
+
+  scalar_t w1 = hh * hw, w2 = hh * lw, w3 = lh * hw, w4 = lh * lw;
+
+  scalar_t val = (w1 * v1 + w2 * v2 + w3 * v3 + w4 * v4);
+  return val;
+}
+
+template <typename scalar_t>
+__device__ scalar_t get_gradient_weight(scalar_t argmax_h, scalar_t argmax_w,
+                                        const int h, const int w, const int height, const int width)
+{
+
+  if (argmax_h <= -1 || argmax_h >= height || argmax_w <= -1 || argmax_w >= width)
+  {
+    //empty
+    return 0;
+  }
+
+  int argmax_h_low = floor(argmax_h);
+  int argmax_w_low = floor(argmax_w);
+  int argmax_h_high = argmax_h_low + 1;
+  int argmax_w_high = argmax_w_low + 1;
+
+  scalar_t weight = 0;
+  if (h == argmax_h_low && w == argmax_w_low)
+    weight = (h + 1 - argmax_h) * (w + 1 - argmax_w);
+  if (h == argmax_h_low && w == argmax_w_high)
+    weight = (h + 1 - argmax_h) * (argmax_w + 1 - w);
+  if (h == argmax_h_high && w == argmax_w_low)
+    weight = (argmax_h + 1 - h) * (w + 1 - argmax_w);
+  if (h == argmax_h_high && w == argmax_w_high)
+    weight = (argmax_h + 1 - h) * (argmax_w + 1 - w);
+  return weight;
+}
+
+template <typename scalar_t>
+__device__ scalar_t get_coordinate_weight(scalar_t argmax_h, scalar_t argmax_w,
+                                          const int height, const int width, const scalar_t *im_data,
+                                          const int data_width, const int bp_dir)
+{
+
+  if (argmax_h <= -1 || argmax_h >= height || argmax_w <= -1 || argmax_w >= width)
+  {
+    //empty
+    return 0;
+  }
+
+  int argmax_h_low = floor(argmax_h);
+  int argmax_w_low = floor(argmax_w);
+  int argmax_h_high = argmax_h_low + 1;
+  int argmax_w_high = argmax_w_low + 1;
+
+  scalar_t weight = 0;
+
+  if (bp_dir == 0)
+  {
+    if (argmax_h_low >= 0 && argmax_w_low >= 0)
+      weight += -1 * (argmax_w_low + 1 - argmax_w) * im_data[argmax_h_low * data_width + argmax_w_low];
+    if (argmax_h_low >= 0 && argmax_w_high <= width - 1)
+      weight += -1 * (argmax_w - argmax_w_low) * im_data[argmax_h_low * data_width + argmax_w_high];
+    if (argmax_h_high <= height - 1 && argmax_w_low >= 0)
+      weight += (argmax_w_low + 1 - argmax_w) * im_data[argmax_h_high * data_width + argmax_w_low];
+    if (argmax_h_high <= height - 1 && argmax_w_high <= width - 1)
+      weight += (argmax_w - argmax_w_low) * im_data[argmax_h_high * data_width + argmax_w_high];
+  }
+  else if (bp_dir == 1)
+  {
+    if (argmax_h_low >= 0 && argmax_w_low >= 0)
+      weight += -1 * (argmax_h_low + 1 - argmax_h) * im_data[argmax_h_low * data_width + argmax_w_low];
+    if (argmax_h_low >= 0 && argmax_w_high <= width - 1)
+      weight += (argmax_h_low + 1 - argmax_h) * im_data[argmax_h_low * data_width + argmax_w_high];
+    if (argmax_h_high <= height - 1 && argmax_w_low >= 0)
+      weight += -1 * (argmax_h - argmax_h_low) * im_data[argmax_h_high * data_width + argmax_w_low];
+    if (argmax_h_high <= height - 1 && argmax_w_high <= width - 1)
+      weight += (argmax_h - argmax_h_low) * im_data[argmax_h_high * data_width + argmax_w_high];
+  }
+
+  return weight;
+}
+
+template <typename scalar_t>
+__global__ void deformable_im2col_gpu_kernel(const int n, const scalar_t *data_im, const scalar_t *data_offset,
+                                             const int height, const int width, const int kernel_h, const int kernel_w,
+                                             const int pad_h, const int pad_w, const int stride_h, const int stride_w,
+                                             const int dilation_h, const int dilation_w, const int channel_per_deformable_group,
+                                             const int batch_size, const int num_channels, const int deformable_group,
+                                             const int height_col, const int width_col,
+                                             scalar_t *data_col)
+{
+  CUDA_KERNEL_LOOP(index, n)
+  {
+    // index index of output matrix
+    const int w_col = index % width_col;
+    const int h_col = (index / width_col) % height_col;
+    const int b_col = (index / width_col / height_col) % batch_size;
+    const int c_im = (index / width_col / height_col) / batch_size;
+    const int c_col = c_im * kernel_h * kernel_w;
+
+    // compute deformable group index
+    const int deformable_group_index = c_im / channel_per_deformable_group;
+
+    const int h_in = h_col * stride_h - pad_h;
+    const int w_in = w_col * stride_w - pad_w;
+    scalar_t *data_col_ptr = data_col + ((c_col * batch_size + b_col) * height_col + h_col) * width_col + w_col;
+    //const scalar_t* data_im_ptr = data_im + ((b_col * num_channels + c_im) * height + h_in) * width + w_in;
+    const scalar_t *data_im_ptr = data_im + (b_col * num_channels + c_im) * height * width;
+    const scalar_t *data_offset_ptr = data_offset + (b_col * deformable_group + deformable_group_index) * 2 * kernel_h * kernel_w * height_col * width_col;
+
+    for (int i = 0; i < kernel_h; ++i)
+    {
+      for (int j = 0; j < kernel_w; ++j)
+      {
+        const int data_offset_h_ptr = ((2 * (i * kernel_w + j)) * height_col + h_col) * width_col + w_col;
+        const int data_offset_w_ptr = ((2 * (i * kernel_w + j) + 1) * height_col + h_col) * width_col + w_col;
+        const scalar_t offset_h = data_offset_ptr[data_offset_h_ptr];
+        const scalar_t offset_w = data_offset_ptr[data_offset_w_ptr];
+        scalar_t val = static_cast<scalar_t>(0);
+        const scalar_t h_im = h_in + i * dilation_h + offset_h;
+        const scalar_t w_im = w_in + j * dilation_w + offset_w;
+        if (h_im > -1 && w_im > -1 && h_im < height && w_im < width)
+        {
+          //const scalar_t map_h = i * dilation_h + offset_h;
+          //const scalar_t map_w = j * dilation_w + offset_w;
+          //const int cur_height = height - h_in;
+          //const int cur_width = width - w_in;
+          //val = deformable_im2col_bilinear(data_im_ptr, width, cur_height, cur_width, map_h, map_w);
+          val = deformable_im2col_bilinear(data_im_ptr, width, height, width, h_im, w_im);
+        }
+        *data_col_ptr = val;
+        data_col_ptr += batch_size * height_col * width_col;
+      }
+    }
+  }
+}
+
+void deformable_im2col(
+    const at::Tensor data_im, const at::Tensor data_offset, const int channels,
+    const int height, const int width, const int ksize_h, const int ksize_w,
+    const int pad_h, const int pad_w, const int stride_h, const int stride_w,
+    const int dilation_h, const int dilation_w, const int parallel_imgs,
+    const int deformable_group, at::Tensor data_col)
+{
+  // num_axes should be smaller than block size
+  // todo: check parallel_imgs is correctly passed in
+  int height_col = (height + 2 * pad_h - (dilation_h * (ksize_h - 1) + 1)) / stride_h + 1;
+  int width_col = (width + 2 * pad_w - (dilation_w * (ksize_w - 1) + 1)) / stride_w + 1;
+  int num_kernels = channels * height_col * width_col * parallel_imgs;
+  int channel_per_deformable_group = channels / deformable_group;
+
+  AT_DISPATCH_FLOATING_TYPES_AND_HALF(
+      data_im.scalar_type(), "deformable_im2col_gpu", ([&] {
+        const scalar_t *data_im_ = data_im.data_ptr<scalar_t>();
+        const scalar_t *data_offset_ = data_offset.data_ptr<scalar_t>();
+        scalar_t *data_col_ = data_col.data_ptr<scalar_t>();
+
+        deformable_im2col_gpu_kernel<<<GET_BLOCKS(num_kernels), CUDA_NUM_THREADS, 0, at::cuda::getCurrentCUDAStream()>>>(
+            num_kernels, data_im_, data_offset_, height, width, ksize_h, ksize_w,
+            pad_h, pad_w, stride_h, stride_w, dilation_h, dilation_w,
+            channel_per_deformable_group, parallel_imgs, channels, deformable_group,
+            height_col, width_col, data_col_);
+      }));
+
+  cudaError_t err = cudaGetLastError();
+  if (err != cudaSuccess)
+  {
+    printf("error in deformable_im2col: %s\n", cudaGetErrorString(err));
+  }
+}
+
+template <typename scalar_t>
+__global__ void deformable_col2im_gpu_kernel(
+    const int n, const scalar_t *data_col, const scalar_t *data_offset,
+    const int channels, const int height, const int width,
+    const int kernel_h, const int kernel_w,
+    const int pad_h, const int pad_w,
+    const int stride_h, const int stride_w,
+    const int dilation_h, const int dilation_w,
+    const int channel_per_deformable_group,
+    const int batch_size, const int deformable_group,
+    const int height_col, const int width_col,
+    scalar_t *grad_im)
+{
+  CUDA_KERNEL_LOOP(index, n)
+  {
+    const int j = (index / width_col / height_col / batch_size) % kernel_w;
+    const int i = (index / width_col / height_col / batch_size / kernel_w) % kernel_h;
+    const int c = index / width_col / height_col / batch_size / kernel_w / kernel_h;
+    // compute the start and end of the output
+
+    const int deformable_group_index = c / channel_per_deformable_group;
+
+    int w_out = index % width_col;
+    int h_out = (index / width_col) % height_col;
+    int b = (index / width_col / height_col) % batch_size;
+    int w_in = w_out * stride_w - pad_w;
+    int h_in = h_out * stride_h - pad_h;
+
+    const scalar_t *data_offset_ptr = data_offset + (b * deformable_group + deformable_group_index) *
+                                                        2 * kernel_h * kernel_w * height_col * width_col;
+    const int data_offset_h_ptr = ((2 * (i * kernel_w + j)) * height_col + h_out) * width_col + w_out;
+    const int data_offset_w_ptr = ((2 * (i * kernel_w + j) + 1) * height_col + h_out) * width_col + w_out;
+    const scalar_t offset_h = data_offset_ptr[data_offset_h_ptr];
+    const scalar_t offset_w = data_offset_ptr[data_offset_w_ptr];
+    const scalar_t cur_inv_h_data = h_in + i * dilation_h + offset_h;
+    const scalar_t cur_inv_w_data = w_in + j * dilation_w + offset_w;
+
+    const scalar_t cur_top_grad = data_col[index];
+    const int cur_h = (int)cur_inv_h_data;
+    const int cur_w = (int)cur_inv_w_data;
+    for (int dy = -2; dy <= 2; dy++)
+    {
+      for (int dx = -2; dx <= 2; dx++)
+      {
+        if (cur_h + dy >= 0 && cur_h + dy < height &&
+            cur_w + dx >= 0 && cur_w + dx < width &&
+            abs(cur_inv_h_data - (cur_h + dy)) < 1 &&
+            abs(cur_inv_w_data - (cur_w + dx)) < 1)
+        {
+          int cur_bottom_grad_pos = ((b * channels + c) * height + cur_h + dy) * width + cur_w + dx;
+          scalar_t weight = get_gradient_weight(cur_inv_h_data, cur_inv_w_data, cur_h + dy, cur_w + dx, height, width);
+          atomicAdd(grad_im + cur_bottom_grad_pos, weight * cur_top_grad);
+        }
+      }
+    }
+  }
+}
+
+void deformable_col2im(
+    const at::Tensor data_col, const at::Tensor data_offset, const int channels,
+    const int height, const int width, const int ksize_h,
+    const int ksize_w, const int pad_h, const int pad_w,
+    const int stride_h, const int stride_w,
+    const int dilation_h, const int dilation_w,
+    const int parallel_imgs, const int deformable_group,
+    at::Tensor grad_im)
+{
+
+  // todo: make sure parallel_imgs is passed in correctly
+  int height_col = (height + 2 * pad_h - (dilation_h * (ksize_h - 1) + 1)) / stride_h + 1;
+  int width_col = (width + 2 * pad_w - (dilation_w * (ksize_w - 1) + 1)) / stride_w + 1;
+  int num_kernels = channels * ksize_h * ksize_w * height_col * width_col * parallel_imgs;
+  int channel_per_deformable_group = channels / deformable_group;
+
+  AT_DISPATCH_FLOATING_TYPES_AND_HALF(
+      data_col.scalar_type(), "deformable_col2im_gpu", ([&] {
+        const scalar_t *data_col_ = data_col.data_ptr<scalar_t>();
+        const scalar_t *data_offset_ = data_offset.data_ptr<scalar_t>();
+        scalar_t *grad_im_ = grad_im.data_ptr<scalar_t>();
+
+        deformable_col2im_gpu_kernel<<<GET_BLOCKS(num_kernels), CUDA_NUM_THREADS, 0, at::cuda::getCurrentCUDAStream()>>>(
+            num_kernels, data_col_, data_offset_, channels, height, width, ksize_h,
+            ksize_w, pad_h, pad_w, stride_h, stride_w,
+            dilation_h, dilation_w, channel_per_deformable_group,
+            parallel_imgs, deformable_group, height_col, width_col, grad_im_);
+      }));
+
+  cudaError_t err = cudaGetLastError();
+  if (err != cudaSuccess)
+  {
+    printf("error in deformable_col2im: %s\n", cudaGetErrorString(err));
+  }
+}
+
+template <typename scalar_t>
+__global__ void deformable_col2im_coord_gpu_kernel(const int n, const scalar_t *data_col,
+                                                   const scalar_t *data_im, const scalar_t *data_offset,
+                                                   const int channels, const int height, const int width,
+                                                   const int kernel_h, const int kernel_w,
+                                                   const int pad_h, const int pad_w,
+                                                   const int stride_h, const int stride_w,
+                                                   const int dilation_h, const int dilation_w,
+                                                   const int channel_per_deformable_group,
+                                                   const int batch_size, const int offset_channels, const int deformable_group,
+                                                   const int height_col, const int width_col, scalar_t *grad_offset)
+{
+  CUDA_KERNEL_LOOP(index, n)
+  {
+    scalar_t val = 0;
+    int w = index % width_col;
+    int h = (index / width_col) % height_col;
+    int c = (index / width_col / height_col) % offset_channels;
+    int b = (index / width_col / height_col) / offset_channels;
+    // compute the start and end of the output
+
+    const int deformable_group_index = c / (2 * kernel_h * kernel_w);
+    const int col_step = kernel_h * kernel_w;
+    int cnt = 0;
+    const scalar_t *data_col_ptr = data_col + deformable_group_index * channel_per_deformable_group *
+                                                  batch_size * width_col * height_col;
+    const scalar_t *data_im_ptr = data_im + (b * deformable_group + deformable_group_index) *
+                                                channel_per_deformable_group / kernel_h / kernel_w * height * width;
+    const scalar_t *data_offset_ptr = data_offset + (b * deformable_group + deformable_group_index) * 2 *
+                                                        kernel_h * kernel_w * height_col * width_col;
+
+    const int offset_c = c - deformable_group_index * 2 * kernel_h * kernel_w;
+
+    for (int col_c = (offset_c / 2); col_c < channel_per_deformable_group; col_c += col_step)
+    {
+      const int col_pos = (((col_c * batch_size + b) * height_col) + h) * width_col + w;
+      const int bp_dir = offset_c % 2;
+
+      int j = (col_pos / width_col / height_col / batch_size) % kernel_w;
+      int i = (col_pos / width_col / height_col / batch_size / kernel_w) % kernel_h;
+      int w_out = col_pos % width_col;
+      int h_out = (col_pos / width_col) % height_col;
+      int w_in = w_out * stride_w - pad_w;
+      int h_in = h_out * stride_h - pad_h;
+      const int data_offset_h_ptr = (((2 * (i * kernel_w + j)) * height_col + h_out) * width_col + w_out);
+      const int data_offset_w_ptr = (((2 * (i * kernel_w + j) + 1) * height_col + h_out) * width_col + w_out);
+      const scalar_t offset_h = data_offset_ptr[data_offset_h_ptr];
+      const scalar_t offset_w = data_offset_ptr[data_offset_w_ptr];
+      scalar_t inv_h = h_in + i * dilation_h + offset_h;
+      scalar_t inv_w = w_in + j * dilation_w + offset_w;
+      if (inv_h <= -1 || inv_w <= -1 || inv_h >= height || inv_w >= width)
+      {
+        inv_h = inv_w = -2;
+      }
+      const scalar_t weight = get_coordinate_weight(
+          inv_h, inv_w,
+          height, width, data_im_ptr + cnt * height * width, width, bp_dir);
+      val += weight * data_col_ptr[col_pos];
+      cnt += 1;
+    }
+
+    grad_offset[index] = val;
+  }
+}
+
+void deformable_col2im_coord(
+    const at::Tensor data_col, const at::Tensor data_im, const at::Tensor data_offset,
+    const int channels, const int height, const int width, const int ksize_h,
+    const int ksize_w, const int pad_h, const int pad_w, const int stride_h,
+    const int stride_w, const int dilation_h, const int dilation_w,
+    const int parallel_imgs, const int deformable_group, at::Tensor grad_offset)
+{
+
+  int height_col = (height + 2 * pad_h - (dilation_h * (ksize_h - 1) + 1)) / stride_h + 1;
+  int width_col = (width + 2 * pad_w - (dilation_w * (ksize_w - 1) + 1)) / stride_w + 1;
+  int num_kernels = height_col * width_col * 2 * ksize_h * ksize_w * deformable_group * parallel_imgs;
+  int channel_per_deformable_group = channels * ksize_h * ksize_w / deformable_group;
+
+  AT_DISPATCH_FLOATING_TYPES_AND_HALF(
+      data_col.scalar_type(), "deformable_col2im_coord_gpu", ([&] {
+        const scalar_t *data_col_ = data_col.data_ptr<scalar_t>();
+        const scalar_t *data_im_ = data_im.data_ptr<scalar_t>();
+        const scalar_t *data_offset_ = data_offset.data_ptr<scalar_t>();
+        scalar_t *grad_offset_ = grad_offset.data_ptr<scalar_t>();
+
+        deformable_col2im_coord_gpu_kernel<<<GET_BLOCKS(num_kernels), CUDA_NUM_THREADS, 0, at::cuda::getCurrentCUDAStream()>>>(
+            num_kernels, data_col_, data_im_, data_offset_, channels, height, width,
+            ksize_h, ksize_w, pad_h, pad_w, stride_h, stride_w,
+            dilation_h, dilation_w, channel_per_deformable_group,
+            parallel_imgs, 2 * ksize_h * ksize_w * deformable_group, deformable_group,
+            height_col, width_col, grad_offset_);
+      }));
+}
+
+template <typename scalar_t>
+__device__ scalar_t dmcn_im2col_bilinear(const scalar_t *bottom_data, const int data_width,
+                                         const int height, const int width, scalar_t h, scalar_t w)
+{
+  int h_low = floor(h);
+  int w_low = floor(w);
+  int h_high = h_low + 1;
+  int w_high = w_low + 1;
+
+  scalar_t lh = h - h_low;
+  scalar_t lw = w - w_low;
+  scalar_t hh = 1 - lh, hw = 1 - lw;
+
+  scalar_t v1 = 0;
+  if (h_low >= 0 && w_low >= 0)
+    v1 = bottom_data[h_low * data_width + w_low];
+  scalar_t v2 = 0;
+  if (h_low >= 0 && w_high <= width - 1)
+    v2 = bottom_data[h_low * data_width + w_high];
+  scalar_t v3 = 0;
+  if (h_high <= height - 1 && w_low >= 0)
+    v3 = bottom_data[h_high * data_width + w_low];
+  scalar_t v4 = 0;
+  if (h_high <= height - 1 && w_high <= width - 1)
+    v4 = bottom_data[h_high * data_width + w_high];
+
+  scalar_t w1 = hh * hw, w2 = hh * lw, w3 = lh * hw, w4 = lh * lw;
+
+  scalar_t val = (w1 * v1 + w2 * v2 + w3 * v3 + w4 * v4);
+  return val;
+}
+
+template <typename scalar_t>
+__device__ scalar_t dmcn_get_gradient_weight(scalar_t argmax_h, scalar_t argmax_w,
+                                             const int h, const int w, const int height, const int width)
+{
+  if (argmax_h <= -1 || argmax_h >= height || argmax_w <= -1 || argmax_w >= width)
+  {
+    //empty
+    return 0;
+  }
+
+  int argmax_h_low = floor(argmax_h);
+  int argmax_w_low = floor(argmax_w);
+  int argmax_h_high = argmax_h_low + 1;
+  int argmax_w_high = argmax_w_low + 1;
+
+  scalar_t weight = 0;
+  if (h == argmax_h_low && w == argmax_w_low)
+    weight = (h + 1 - argmax_h) * (w + 1 - argmax_w);
+  if (h == argmax_h_low && w == argmax_w_high)
+    weight = (h + 1 - argmax_h) * (argmax_w + 1 - w);
+  if (h == argmax_h_high && w == argmax_w_low)
+    weight = (argmax_h + 1 - h) * (w + 1 - argmax_w);
+  if (h == argmax_h_high && w == argmax_w_high)
+    weight = (argmax_h + 1 - h) * (argmax_w + 1 - w);
+  return weight;
+}
+
+template <typename scalar_t>
+__device__ scalar_t dmcn_get_coordinate_weight(scalar_t argmax_h, scalar_t argmax_w,
+                                               const int height, const int width, const scalar_t *im_data,
+                                               const int data_width, const int bp_dir)
+{
+  if (argmax_h <= -1 || argmax_h >= height || argmax_w <= -1 || argmax_w >= width)
+  {
+    //empty
+    return 0;
+  }
+
+  int argmax_h_low = floor(argmax_h);
+  int argmax_w_low = floor(argmax_w);
+  int argmax_h_high = argmax_h_low + 1;
+  int argmax_w_high = argmax_w_low + 1;
+
+  scalar_t weight = 0;
+
+  if (bp_dir == 0)
+  {
+    if (argmax_h_low >= 0 && argmax_w_low >= 0)
+      weight += -1 * (argmax_w_low + 1 - argmax_w) * im_data[argmax_h_low * data_width + argmax_w_low];
+    if (argmax_h_low >= 0 && argmax_w_high <= width - 1)
+      weight += -1 * (argmax_w - argmax_w_low) * im_data[argmax_h_low * data_width + argmax_w_high];
+    if (argmax_h_high <= height - 1 && argmax_w_low >= 0)
+      weight += (argmax_w_low + 1 - argmax_w) * im_data[argmax_h_high * data_width + argmax_w_low];
+    if (argmax_h_high <= height - 1 && argmax_w_high <= width - 1)
+      weight += (argmax_w - argmax_w_low) * im_data[argmax_h_high * data_width + argmax_w_high];
+  }
+  else if (bp_dir == 1)
+  {
+    if (argmax_h_low >= 0 && argmax_w_low >= 0)
+      weight += -1 * (argmax_h_low + 1 - argmax_h) * im_data[argmax_h_low * data_width + argmax_w_low];
+    if (argmax_h_low >= 0 && argmax_w_high <= width - 1)
+      weight += (argmax_h_low + 1 - argmax_h) * im_data[argmax_h_low * data_width + argmax_w_high];
+    if (argmax_h_high <= height - 1 && argmax_w_low >= 0)
+      weight += -1 * (argmax_h - argmax_h_low) * im_data[argmax_h_high * data_width + argmax_w_low];
+    if (argmax_h_high <= height - 1 && argmax_w_high <= width - 1)
+      weight += (argmax_h - argmax_h_low) * im_data[argmax_h_high * data_width + argmax_w_high];
+  }
+
+  return weight;
+}
+
+template <typename scalar_t>
+__global__ void modulated_deformable_im2col_gpu_kernel(const int n,
+                                                       const scalar_t *data_im, const scalar_t *data_offset, const scalar_t *data_mask,
+                                                       const int height, const int width, const int kernel_h, const int kernel_w,
+                                                       const int pad_h, const int pad_w,
+                                                       const int stride_h, const int stride_w,
+                                                       const int dilation_h, const int dilation_w,
+                                                       const int channel_per_deformable_group,
+                                                       const int batch_size, const int num_channels, const int deformable_group,
+                                                       const int height_col, const int width_col,
+                                                       scalar_t *data_col)
+{
+  CUDA_KERNEL_LOOP(index, n)
+  {
+    // index index of output matrix
+    const int w_col = index % width_col;
+    const int h_col = (index / width_col) % height_col;
+    const int b_col = (index / width_col / height_col) % batch_size;
+    const int c_im = (index / width_col / height_col) / batch_size;
+    const int c_col = c_im * kernel_h * kernel_w;
+
+    // compute deformable group index
+    const int deformable_group_index = c_im / channel_per_deformable_group;
+
+    const int h_in = h_col * stride_h - pad_h;
+    const int w_in = w_col * stride_w - pad_w;
+
+    scalar_t *data_col_ptr = data_col + ((c_col * batch_size + b_col) * height_col + h_col) * width_col + w_col;
+    //const float* data_im_ptr = data_im + ((b_col * num_channels + c_im) * height + h_in) * width + w_in;
+    const scalar_t *data_im_ptr = data_im + (b_col * num_channels + c_im) * height * width;
+    const scalar_t *data_offset_ptr = data_offset + (b_col * deformable_group + deformable_group_index) * 2 * kernel_h * kernel_w * height_col * width_col;
+
+    const scalar_t *data_mask_ptr = data_mask + (b_col * deformable_group + deformable_group_index) * kernel_h * kernel_w * height_col * width_col;
+
+    for (int i = 0; i < kernel_h; ++i)
+    {
+      for (int j = 0; j < kernel_w; ++j)
+      {
+        const int data_offset_h_ptr = ((2 * (i * kernel_w + j)) * height_col + h_col) * width_col + w_col;
+        const int data_offset_w_ptr = ((2 * (i * kernel_w + j) + 1) * height_col + h_col) * width_col + w_col;
+        const int data_mask_hw_ptr = ((i * kernel_w + j) * height_col + h_col) * width_col + w_col;
+        const scalar_t offset_h = data_offset_ptr[data_offset_h_ptr];
+        const scalar_t offset_w = data_offset_ptr[data_offset_w_ptr];
+        const scalar_t mask = data_mask_ptr[data_mask_hw_ptr];
+        scalar_t val = static_cast<scalar_t>(0);
+        const scalar_t h_im = h_in + i * dilation_h + offset_h;
+        const scalar_t w_im = w_in + j * dilation_w + offset_w;
+        //if (h_im >= 0 && w_im >= 0 && h_im < height && w_im < width) {
+        if (h_im > -1 && w_im > -1 && h_im < height && w_im < width)
+        {
+          //const float map_h = i * dilation_h + offset_h;
+          //const float map_w = j * dilation_w + offset_w;
+          //const int cur_height = height - h_in;
+          //const int cur_width = width - w_in;
+          //val = dmcn_im2col_bilinear(data_im_ptr, width, cur_height, cur_width, map_h, map_w);
+          val = dmcn_im2col_bilinear(data_im_ptr, width, height, width, h_im, w_im);
+        }
+        *data_col_ptr = val * mask;
+        data_col_ptr += batch_size * height_col * width_col;
+        //data_col_ptr += height_col * width_col;
+      }
+    }
+  }
+}
+
+template <typename scalar_t>
+__global__ void modulated_deformable_col2im_gpu_kernel(const int n,
+                                                       const scalar_t *data_col, const scalar_t *data_offset, const scalar_t *data_mask,
+                                                       const int channels, const int height, const int width,
+                                                       const int kernel_h, const int kernel_w,
+                                                       const int pad_h, const int pad_w,
+                                                       const int stride_h, const int stride_w,
+                                                       const int dilation_h, const int dilation_w,
+                                                       const int channel_per_deformable_group,
+                                                       const int batch_size, const int deformable_group,
+                                                       const int height_col, const int width_col,
+                                                       scalar_t *grad_im)
+{
+  CUDA_KERNEL_LOOP(index, n)
+  {
+    const int j = (index / width_col / height_col / batch_size) % kernel_w;
+    const int i = (index / width_col / height_col / batch_size / kernel_w) % kernel_h;
+    const int c = index / width_col / height_col / batch_size / kernel_w / kernel_h;
+    // compute the start and end of the output
+
+    const int deformable_group_index = c / channel_per_deformable_group;
+
+    int w_out = index % width_col;
+    int h_out = (index / width_col) % height_col;
+    int b = (index / width_col / height_col) % batch_size;
+    int w_in = w_out * stride_w - pad_w;
+    int h_in = h_out * stride_h - pad_h;
+
+    const scalar_t *data_offset_ptr = data_offset + (b * deformable_group + deformable_group_index) * 2 * kernel_h * kernel_w * height_col * width_col;
+    const scalar_t *data_mask_ptr = data_mask + (b * deformable_group + deformable_group_index) * kernel_h * kernel_w * height_col * width_col;
+    const int data_offset_h_ptr = ((2 * (i * kernel_w + j)) * height_col + h_out) * width_col + w_out;
+    const int data_offset_w_ptr = ((2 * (i * kernel_w + j) + 1) * height_col + h_out) * width_col + w_out;
+    const int data_mask_hw_ptr = ((i * kernel_w + j) * height_col + h_out) * width_col + w_out;
+    const scalar_t offset_h = data_offset_ptr[data_offset_h_ptr];
+    const scalar_t offset_w = data_offset_ptr[data_offset_w_ptr];
+    const scalar_t mask = data_mask_ptr[data_mask_hw_ptr];
+    const scalar_t cur_inv_h_data = h_in + i * dilation_h + offset_h;
+    const scalar_t cur_inv_w_data = w_in + j * dilation_w + offset_w;
+
+    const scalar_t cur_top_grad = data_col[index] * mask;
+    const int cur_h = (int)cur_inv_h_data;
+    const int cur_w = (int)cur_inv_w_data;
+    for (int dy = -2; dy <= 2; dy++)
+    {
+      for (int dx = -2; dx <= 2; dx++)
+      {
+        if (cur_h + dy >= 0 && cur_h + dy < height &&
+            cur_w + dx >= 0 && cur_w + dx < width &&
+            abs(cur_inv_h_data - (cur_h + dy)) < 1 &&
+            abs(cur_inv_w_data - (cur_w + dx)) < 1)
+        {
+          int cur_bottom_grad_pos = ((b * channels + c) * height + cur_h + dy) * width + cur_w + dx;
+          scalar_t weight = dmcn_get_gradient_weight(cur_inv_h_data, cur_inv_w_data, cur_h + dy, cur_w + dx, height, width);
+          atomicAdd(grad_im + cur_bottom_grad_pos, weight * cur_top_grad);
+        }
+      }
+    }
+  }
+}
+
+template <typename scalar_t>
+__global__ void modulated_deformable_col2im_coord_gpu_kernel(const int n,
+                                                             const scalar_t *data_col, const scalar_t *data_im,
+                                                             const scalar_t *data_offset, const scalar_t *data_mask,
+                                                             const int channels, const int height, const int width,
+                                                             const int kernel_h, const int kernel_w,
+                                                             const int pad_h, const int pad_w,
+                                                             const int stride_h, const int stride_w,
+                                                             const int dilation_h, const int dilation_w,
+                                                             const int channel_per_deformable_group,
+                                                             const int batch_size, const int offset_channels, const int deformable_group,
+                                                             const int height_col, const int width_col,
+                                                             scalar_t *grad_offset, scalar_t *grad_mask)
+{
+  CUDA_KERNEL_LOOP(index, n)
+  {
+    scalar_t val = 0, mval = 0;
+    int w = index % width_col;
+    int h = (index / width_col) % height_col;
+    int c = (index / width_col / height_col) % offset_channels;
+    int b = (index / width_col / height_col) / offset_channels;
+    // compute the start and end of the output
+
+    const int deformable_group_index = c / (2 * kernel_h * kernel_w);
+    const int col_step = kernel_h * kernel_w;
+    int cnt = 0;
+    const scalar_t *data_col_ptr = data_col + deformable_group_index * channel_per_deformable_group * batch_size * width_col * height_col;
+    const scalar_t *data_im_ptr = data_im + (b * deformable_group + deformable_group_index) * channel_per_deformable_group / kernel_h / kernel_w * height * width;
+    const scalar_t *data_offset_ptr = data_offset + (b * deformable_group + deformable_group_index) * 2 * kernel_h * kernel_w * height_col * width_col;
+    const scalar_t *data_mask_ptr = data_mask + (b * deformable_group + deformable_group_index) * kernel_h * kernel_w * height_col * width_col;
+
+    const int offset_c = c - deformable_group_index * 2 * kernel_h * kernel_w;
+
+    for (int col_c = (offset_c / 2); col_c < channel_per_deformable_group; col_c += col_step)
+    {
+      const int col_pos = (((col_c * batch_size + b) * height_col) + h) * width_col + w;
+      const int bp_dir = offset_c % 2;
+
+      int j = (col_pos / width_col / height_col / batch_size) % kernel_w;
+      int i = (col_pos / width_col / height_col / batch_size / kernel_w) % kernel_h;
+      int w_out = col_pos % width_col;
+      int h_out = (col_pos / width_col) % height_col;
+      int w_in = w_out * stride_w - pad_w;
+      int h_in = h_out * stride_h - pad_h;
+      const int data_offset_h_ptr = (((2 * (i * kernel_w + j)) * height_col + h_out) * width_col + w_out);
+      const int data_offset_w_ptr = (((2 * (i * kernel_w + j) + 1) * height_col + h_out) * width_col + w_out);
+      const int data_mask_hw_ptr = (((i * kernel_w + j) * height_col + h_out) * width_col + w_out);
+      const scalar_t offset_h = data_offset_ptr[data_offset_h_ptr];
+      const scalar_t offset_w = data_offset_ptr[data_offset_w_ptr];
+      const scalar_t mask = data_mask_ptr[data_mask_hw_ptr];
+      scalar_t inv_h = h_in + i * dilation_h + offset_h;
+      scalar_t inv_w = w_in + j * dilation_w + offset_w;
+      if (inv_h <= -1 || inv_w <= -1 || inv_h >= height || inv_w >= width)
+      {
+        inv_h = inv_w = -2;
+      }
+      else
+      {
+        mval += data_col_ptr[col_pos] * dmcn_im2col_bilinear(data_im_ptr + cnt * height * width, width, height, width, inv_h, inv_w);
+      }
+      const scalar_t weight = dmcn_get_coordinate_weight(
+          inv_h, inv_w,
+          height, width, data_im_ptr + cnt * height * width, width, bp_dir);
+      val += weight * data_col_ptr[col_pos] * mask;
+      cnt += 1;
+    }
+    // KERNEL_ASSIGN(grad_offset[index], offset_req, val);
+    grad_offset[index] = val;
+    if (offset_c % 2 == 0)
+      // KERNEL_ASSIGN(grad_mask[(((b * deformable_group + deformable_group_index) * kernel_h * kernel_w + offset_c / 2) * height_col + h) * width_col + w], mask_req, mval);
+      grad_mask[(((b * deformable_group + deformable_group_index) * kernel_h * kernel_w + offset_c / 2) * height_col + h) * width_col + w] = mval;
+  }
+}
+
+void modulated_deformable_im2col_cuda(
+    const at::Tensor data_im, const at::Tensor data_offset, const at::Tensor data_mask,
+    const int batch_size, const int channels, const int height_im, const int width_im,
+    const int height_col, const int width_col, const int kernel_h, const int kenerl_w,
+    const int pad_h, const int pad_w, const int stride_h, const int stride_w,
+    const int dilation_h, const int dilation_w,
+    const int deformable_group, at::Tensor data_col)
+{
+  // num_axes should be smaller than block size
+  const int channel_per_deformable_group = channels / deformable_group;
+  const int num_kernels = channels * batch_size * height_col * width_col;
+
+  AT_DISPATCH_FLOATING_TYPES_AND_HALF(
+      data_im.scalar_type(), "modulated_deformable_im2col_gpu", ([&] {
+        const scalar_t *data_im_ = data_im.data_ptr<scalar_t>();
+        const scalar_t *data_offset_ = data_offset.data_ptr<scalar_t>();
+        const scalar_t *data_mask_ = data_mask.data_ptr<scalar_t>();
+        scalar_t *data_col_ = data_col.data_ptr<scalar_t>();
+
+        modulated_deformable_im2col_gpu_kernel<<<GET_BLOCKS(num_kernels), CUDA_NUM_THREADS, 0, at::cuda::getCurrentCUDAStream()>>>(
+            num_kernels, data_im_, data_offset_, data_mask_, height_im, width_im, kernel_h, kenerl_w,
+            pad_h, pad_w, stride_h, stride_w, dilation_h, dilation_w, channel_per_deformable_group,
+            batch_size, channels, deformable_group, height_col, width_col, data_col_);
+      }));
+
+  cudaError_t err = cudaGetLastError();
+  if (err != cudaSuccess)
+  {
+    printf("error in modulated_deformable_im2col_cuda: %s\n", cudaGetErrorString(err));
+  }
+}
+
+void modulated_deformable_col2im_cuda(
+    const at::Tensor data_col, const at::Tensor data_offset, const at::Tensor data_mask,
+    const int batch_size, const int channels, const int height_im, const int width_im,
+    const int height_col, const int width_col, const int kernel_h, const int kernel_w,
+    const int pad_h, const int pad_w, const int stride_h, const int stride_w,
+    const int dilation_h, const int dilation_w,
+    const int deformable_group, at::Tensor grad_im)
+{
+
+  const int channel_per_deformable_group = channels / deformable_group;
+  const int num_kernels = channels * kernel_h * kernel_w * batch_size * height_col * width_col;
+
+  AT_DISPATCH_FLOATING_TYPES_AND_HALF(
+      data_col.scalar_type(), "modulated_deformable_col2im_gpu", ([&] {
+        const scalar_t *data_col_ = data_col.data_ptr<scalar_t>();
+        const scalar_t *data_offset_ = data_offset.data_ptr<scalar_t>();
+        const scalar_t *data_mask_ = data_mask.data_ptr<scalar_t>();
+        scalar_t *grad_im_ = grad_im.data_ptr<scalar_t>();
+
+        modulated_deformable_col2im_gpu_kernel<<<GET_BLOCKS(num_kernels), CUDA_NUM_THREADS, 0, at::cuda::getCurrentCUDAStream()>>>(
+            num_kernels, data_col_, data_offset_, data_mask_, channels, height_im, width_im,
+            kernel_h, kernel_w, pad_h, pad_w, stride_h, stride_w,
+            dilation_h, dilation_w, channel_per_deformable_group,
+            batch_size, deformable_group, height_col, width_col, grad_im_);
+      }));
+
+  cudaError_t err = cudaGetLastError();
+  if (err != cudaSuccess)
+  {
+    printf("error in modulated_deformable_col2im_cuda: %s\n", cudaGetErrorString(err));
+  }
+}
+
+void modulated_deformable_col2im_coord_cuda(
+    const at::Tensor data_col, const at::Tensor data_im, const at::Tensor data_offset, const at::Tensor data_mask,
+    const int batch_size, const int channels, const int height_im, const int width_im,
+    const int height_col, const int width_col, const int kernel_h, const int kernel_w,
+    const int pad_h, const int pad_w, const int stride_h, const int stride_w,
+    const int dilation_h, const int dilation_w,
+    const int deformable_group,
+    at::Tensor grad_offset, at::Tensor grad_mask)
+{
+  const int num_kernels = batch_size * height_col * width_col * 2 * kernel_h * kernel_w * deformable_group;
+  const int channel_per_deformable_group = channels * kernel_h * kernel_w / deformable_group;
+
+  AT_DISPATCH_FLOATING_TYPES_AND_HALF(
+      data_col.scalar_type(), "modulated_deformable_col2im_coord_gpu", ([&] {
+        const scalar_t *data_col_ = data_col.data_ptr<scalar_t>();
+        const scalar_t *data_im_ = data_im.data_ptr<scalar_t>();
+        const scalar_t *data_offset_ = data_offset.data_ptr<scalar_t>();
+        const scalar_t *data_mask_ = data_mask.data_ptr<scalar_t>();
+        scalar_t *grad_offset_ = grad_offset.data_ptr<scalar_t>();
+        scalar_t *grad_mask_ = grad_mask.data_ptr<scalar_t>();
+
+        modulated_deformable_col2im_coord_gpu_kernel<<<GET_BLOCKS(num_kernels), CUDA_NUM_THREADS, 0, at::cuda::getCurrentCUDAStream()>>>(
+            num_kernels, data_col_, data_im_, data_offset_, data_mask_, channels, height_im, width_im,
+            kernel_h, kernel_w, pad_h, pad_w, stride_h, stride_w,
+            dilation_h, dilation_w, channel_per_deformable_group,
+            batch_size, 2 * kernel_h * kernel_w * deformable_group, deformable_group, height_col, width_col,
+            grad_offset_, grad_mask_);
+      }));
+  cudaError_t err = cudaGetLastError();
+  if (err != cudaSuccess)
+  {
+    printf("error in modulated_deformable_col2im_coord_cuda: %s\n", cudaGetErrorString(err));
+  }
+}
diff --git a/IIR-Lab/models/dcn/src/deform_conv_ext.cpp b/IIR-Lab/models/dcn/src/deform_conv_ext.cpp
new file mode 100644
index 0000000000000000000000000000000000000000..41c6df6f721bd95a525fd6a03dd9882e863de042
--- /dev/null
+++ b/IIR-Lab/models/dcn/src/deform_conv_ext.cpp
@@ -0,0 +1,164 @@
+// modify from
+// https://github.com/chengdazhi/Deformable-Convolution-V2-PyTorch/blob/mmdetection/mmdet/ops/dcn/src/deform_conv_cuda.c
+
+#include <torch/extension.h>
+#include <ATen/DeviceGuard.h>
+
+#include <cmath>
+#include <vector>
+
+#define WITH_CUDA  // always use cuda
+#ifdef WITH_CUDA
+int deform_conv_forward_cuda(at::Tensor input, at::Tensor weight,
+                             at::Tensor offset, at::Tensor output,
+                             at::Tensor columns, at::Tensor ones, int kW,
+                             int kH, int dW, int dH, int padW, int padH,
+                             int dilationW, int dilationH, int group,
+                             int deformable_group, int im2col_step);
+
+int deform_conv_backward_input_cuda(at::Tensor input, at::Tensor offset,
+                                    at::Tensor gradOutput, at::Tensor gradInput,
+                                    at::Tensor gradOffset, at::Tensor weight,
+                                    at::Tensor columns, int kW, int kH, int dW,
+                                    int dH, int padW, int padH, int dilationW,
+                                    int dilationH, int group,
+                                    int deformable_group, int im2col_step);
+
+int deform_conv_backward_parameters_cuda(
+    at::Tensor input, at::Tensor offset, at::Tensor gradOutput,
+    at::Tensor gradWeight,  // at::Tensor gradBias,
+    at::Tensor columns, at::Tensor ones, int kW, int kH, int dW, int dH,
+    int padW, int padH, int dilationW, int dilationH, int group,
+    int deformable_group, float scale, int im2col_step);
+
+void modulated_deform_conv_cuda_forward(
+    at::Tensor input, at::Tensor weight, at::Tensor bias, at::Tensor ones,
+    at::Tensor offset, at::Tensor mask, at::Tensor output, at::Tensor columns,
+    int kernel_h, int kernel_w, const int stride_h, const int stride_w,
+    const int pad_h, const int pad_w, const int dilation_h,
+    const int dilation_w, const int group, const int deformable_group,
+    const bool with_bias);
+
+void modulated_deform_conv_cuda_backward(
+    at::Tensor input, at::Tensor weight, at::Tensor bias, at::Tensor ones,
+    at::Tensor offset, at::Tensor mask, at::Tensor columns,
+    at::Tensor grad_input, at::Tensor grad_weight, at::Tensor grad_bias,
+    at::Tensor grad_offset, at::Tensor grad_mask, at::Tensor grad_output,
+    int kernel_h, int kernel_w, int stride_h, int stride_w, int pad_h,
+    int pad_w, int dilation_h, int dilation_w, int group, int deformable_group,
+    const bool with_bias);
+#endif
+
+int deform_conv_forward(at::Tensor input, at::Tensor weight,
+                             at::Tensor offset, at::Tensor output,
+                             at::Tensor columns, at::Tensor ones, int kW,
+                             int kH, int dW, int dH, int padW, int padH,
+                             int dilationW, int dilationH, int group,
+                             int deformable_group, int im2col_step) {
+  if (input.device().is_cuda()) {
+#ifdef WITH_CUDA
+    return deform_conv_forward_cuda(input, weight, offset, output, columns,
+        ones, kW, kH, dW, dH, padW, padH, dilationW, dilationH, group,
+        deformable_group, im2col_step);
+#else
+    AT_ERROR("deform conv is not compiled with GPU support");
+#endif
+  }
+  AT_ERROR("deform conv is not implemented on CPU");
+}
+
+int deform_conv_backward_input(at::Tensor input, at::Tensor offset,
+                                    at::Tensor gradOutput, at::Tensor gradInput,
+                                    at::Tensor gradOffset, at::Tensor weight,
+                                    at::Tensor columns, int kW, int kH, int dW,
+                                    int dH, int padW, int padH, int dilationW,
+                                    int dilationH, int group,
+                                    int deformable_group, int im2col_step) {
+  if (input.device().is_cuda()) {
+#ifdef WITH_CUDA
+    return deform_conv_backward_input_cuda(input, offset, gradOutput,
+        gradInput, gradOffset, weight, columns, kW, kH, dW, dH, padW, padH,
+        dilationW, dilationH, group, deformable_group, im2col_step);
+#else
+    AT_ERROR("deform conv is not compiled with GPU support");
+#endif
+  }
+  AT_ERROR("deform conv is not implemented on CPU");
+}
+
+int deform_conv_backward_parameters(
+    at::Tensor input, at::Tensor offset, at::Tensor gradOutput,
+    at::Tensor gradWeight,  // at::Tensor gradBias,
+    at::Tensor columns, at::Tensor ones, int kW, int kH, int dW, int dH,
+    int padW, int padH, int dilationW, int dilationH, int group,
+    int deformable_group, float scale, int im2col_step) {
+  if (input.device().is_cuda()) {
+#ifdef WITH_CUDA
+    return deform_conv_backward_parameters_cuda(input, offset, gradOutput,
+        gradWeight, columns, ones, kW, kH, dW, dH, padW, padH, dilationW,
+        dilationH, group, deformable_group, scale, im2col_step);
+#else
+    AT_ERROR("deform conv is not compiled with GPU support");
+#endif
+  }
+  AT_ERROR("deform conv is not implemented on CPU");
+}
+
+void modulated_deform_conv_forward(
+    at::Tensor input, at::Tensor weight, at::Tensor bias, at::Tensor ones,
+    at::Tensor offset, at::Tensor mask, at::Tensor output, at::Tensor columns,
+    int kernel_h, int kernel_w, const int stride_h, const int stride_w,
+    const int pad_h, const int pad_w, const int dilation_h,
+    const int dilation_w, const int group, const int deformable_group,
+    const bool with_bias) {
+  if (input.device().is_cuda()) {
+#ifdef WITH_CUDA
+    return modulated_deform_conv_cuda_forward(input, weight, bias, ones,
+        offset, mask, output, columns, kernel_h, kernel_w, stride_h,
+        stride_w, pad_h, pad_w, dilation_h, dilation_w, group,
+        deformable_group, with_bias);
+#else
+    AT_ERROR("modulated deform conv is not compiled with GPU support");
+#endif
+  }
+  AT_ERROR("modulated deform conv is not implemented on CPU");
+}
+
+void modulated_deform_conv_backward(
+    at::Tensor input, at::Tensor weight, at::Tensor bias, at::Tensor ones,
+    at::Tensor offset, at::Tensor mask, at::Tensor columns,
+    at::Tensor grad_input, at::Tensor grad_weight, at::Tensor grad_bias,
+    at::Tensor grad_offset, at::Tensor grad_mask, at::Tensor grad_output,
+    int kernel_h, int kernel_w, int stride_h, int stride_w, int pad_h,
+    int pad_w, int dilation_h, int dilation_w, int group, int deformable_group,
+    const bool with_bias) {
+  if (input.device().is_cuda()) {
+#ifdef WITH_CUDA
+    return modulated_deform_conv_cuda_backward(input, weight, bias, ones,
+        offset, mask, columns, grad_input, grad_weight, grad_bias, grad_offset,
+        grad_mask, grad_output, kernel_h, kernel_w, stride_h, stride_w,
+        pad_h, pad_w, dilation_h, dilation_w, group, deformable_group,
+        with_bias);
+#else
+    AT_ERROR("modulated deform conv is not compiled with GPU support");
+#endif
+  }
+  AT_ERROR("modulated deform conv is not implemented on CPU");
+}
+
+
+PYBIND11_MODULE(TORCH_EXTENSION_NAME, m) {
+  m.def("deform_conv_forward", &deform_conv_forward,
+        "deform forward");
+  m.def("deform_conv_backward_input", &deform_conv_backward_input,
+        "deform_conv_backward_input");
+  m.def("deform_conv_backward_parameters",
+        &deform_conv_backward_parameters,
+        "deform_conv_backward_parameters");
+  m.def("modulated_deform_conv_forward",
+        &modulated_deform_conv_forward,
+        "modulated deform conv forward");
+  m.def("modulated_deform_conv_backward",
+        &modulated_deform_conv_backward,
+        "modulated deform conv backward");
+}
diff --git a/IIR-Lab/models/utils.py b/IIR-Lab/models/utils.py
new file mode 100644
index 0000000000000000000000000000000000000000..5dc6140a982a46b4d1687ee0b21c4ca14d7a8d99
--- /dev/null
+++ b/IIR-Lab/models/utils.py
@@ -0,0 +1,186 @@
+import torch
+import numpy as np
+import torch.nn.functional as F
+import cv2
+def padding_4x(seq_noise):
+    sh_im = seq_noise.size()
+    expanded_h = sh_im[-2]%16
+
+    if expanded_h:
+        expanded_h = 16-expanded_h
+    expanded_w = sh_im[-1]%16
+    if expanded_w:
+        expanded_w = 16-expanded_w
+
+    padexp = (0, expanded_w, 0, expanded_h)
+    seq_noise = F.pad(input=seq_noise, pad=padexp, mode='reflect')
+    return seq_noise, expanded_h, expanded_w
+
+def depadding(seq_denoise,expanded_h, expanded_w):
+    if expanded_h:
+        seq_denoise = seq_denoise[:, :, :-expanded_h, :]
+    if expanded_w:
+        seq_denoise = seq_denoise[:, :, :, :-expanded_w]
+    return seq_denoise
+def chunkV3(net, input_data, option, patch_h = 516, patch_w = 516, patch_h_overlap = 128, patch_w_overlap = 128):
+    #input_data  [1,6,4,1500, 2000]
+
+    # H = input_data.shape[3]
+    # W = input_data.shape[4]
+
+    shape_list = input_data.shape
+
+    B, C, H, W = shape_list[0], shape_list[1], shape_list[2], shape_list[3]                   # 1，4，1500，2000
+    
+    test_result = torch.zeros_like(input_data).cuda()  # 和input的shape一样
+    
+    # t0 = time.perf_counter()
+    h_index = 1
+    while (patch_h*h_index-patch_h_overlap*(h_index-1)) < H:
+       
+        test_horizontal_result = torch.zeros((B,C,patch_h,W)).cuda()  #和input的shape一样 patch_h不一样
+
+        h_begin = patch_h*(h_index-1)-patch_h_overlap*(h_index-1)
+        h_end = patch_h*h_index-patch_h_overlap*(h_index-1) 
+        w_index = 1
+        while (patch_w*w_index-patch_w_overlap*(w_index-1)) < W:
+            w_begin = patch_w*(w_index-1)-patch_w_overlap*(w_index-1)
+            w_end = patch_w*w_index-patch_w_overlap*(w_index-1)
+            test_patch = input_data[...,h_begin:h_end,w_begin:w_end]        
+
+            with torch.no_grad():
+                test_patch_result = net(test_patch).detach().cuda()
+
+            if w_index == 1:
+                test_horizontal_result[...,w_begin:w_end] = test_patch_result
+            else:
+                for i in range(patch_w_overlap):
+                    test_horizontal_result[...,w_begin+i] = test_horizontal_result[...,w_begin+i]*(patch_w_overlap-1-i)/(patch_w_overlap-1)+test_patch_result[...,i]*i/(patch_w_overlap-1)
+                test_horizontal_result[...,w_begin+patch_w_overlap:w_end] = test_patch_result[...,patch_w_overlap:]
+            w_index += 1                   
+    
+        test_patch = input_data[...,h_begin:h_end,-patch_w:]        
+
+        with torch.no_grad():
+            test_patch_result = net(test_patch).detach().cuda()
+        last_range = w_end-(W-patch_w)       
+
+        for i in range(last_range):
+            test_horizontal_result[...,W-patch_w+i] = test_horizontal_result[...,W-patch_w+i]*(last_range-1-i)/(last_range-1)+test_patch_result[...,i]*i/(last_range-1)
+        test_horizontal_result[...,w_end:] = test_patch_result[...,last_range:]       
+
+        if h_index == 1:
+            test_result[...,h_begin:h_end,:] = test_horizontal_result
+        else:
+            for i in range(patch_h_overlap):
+                test_result[...,h_begin+i,:] = test_result[...,h_begin+i,:]*(patch_h_overlap-1-i)/(patch_h_overlap-1)+test_horizontal_result[...,i,:]*i/(patch_h_overlap-1)
+            test_result[...,h_begin+patch_h_overlap:h_end,:] = test_horizontal_result[...,patch_h_overlap:,:] 
+        h_index += 1
+
+    
+    test_horizontal_result = torch.zeros((B,C,patch_h,W)).cuda()  #和input的shape一样 patch_h不一样
+        
+    w_index = 1
+    while (patch_w*w_index-patch_w_overlap*(w_index-1)) < W:
+        w_begin = patch_w*(w_index-1)-patch_w_overlap*(w_index-1)
+        w_end = patch_w*w_index-patch_w_overlap*(w_index-1)
+        test_patch = input_data[...,-patch_h:,w_begin:w_end]            
+              
+        with torch.no_grad():
+            test_patch_result = net(test_patch).detach().cuda()
+
+        if w_index == 1:
+            test_horizontal_result[...,w_begin:w_end] = test_patch_result
+        else:
+            for i in range(patch_w_overlap):
+                test_horizontal_result[...,w_begin+i] = test_horizontal_result[...,w_begin+i]*(patch_w_overlap-1-i)/(patch_w_overlap-1)+test_patch_result[...,i]*i/(patch_w_overlap-1)
+            test_horizontal_result[...,w_begin+patch_w_overlap:w_end] = test_patch_result[...,patch_w_overlap:]   
+        w_index += 1
+
+    test_patch = input_data[...,-patch_h:,-patch_w:]         
+
+    with torch.no_grad():
+        test_patch_result = net(test_patch).detach().cuda()
+    last_range = w_end-(W-patch_w)       
+    for i in range(last_range):
+        test_horizontal_result[...,W-patch_w+i] = test_horizontal_result[...,W-patch_w+i]*(last_range-1-i)/(last_range-1)+test_patch_result[...,i]*i/(last_range-1) 
+    test_horizontal_result[...,w_end:] = test_patch_result[...,last_range:] 
+
+    last_last_range = h_end-(H-patch_h)
+    for i in range(last_last_range):
+        test_result[...,H-patch_w+i,:] = test_result[...,H-patch_w+i,:]*(last_last_range-1-i)/(last_last_range-1)+test_horizontal_result[...,i,:]*i/(last_last_range-1)
+    test_result[...,h_end:,:] = test_horizontal_result[...,last_last_range:,:]
+   
+    del test_horizontal_result, test_patch, input_data, test_patch_result
+    return test_result
+
+
+def calculate_psnr(img, img2, input_order='HWC'):
+
+
+    assert img.shape == img2.shape, (f'Image shapes are different: {img.shape}, {img2.shape}.')
+    if input_order not in ['HWC', 'CHW']:
+        raise ValueError(f'Wrong input_order {input_order}. Supported input_orders are "HWC" and "CHW"')
+    
+    img = img.transpose(1, 2, 0)
+    img2 = img2.transpose(1, 2, 0)
+
+
+    img = img.astype(np.float64)
+    img2 = img2.astype(np.float64)
+
+    mse = np.mean((img - img2)**2)
+    if mse == 0:
+        return float('inf')
+    return 10. * np.log10(1. * 1. / mse)
+
+
+def calculate_ssim(img, img2, input_order='HWC'):
+
+
+    assert img.shape == img2.shape, (f'Image shapes are different: {img.shape}, {img2.shape}.')
+    if input_order not in ['HWC', 'CHW']:
+        raise ValueError(f'Wrong input_order {input_order}. Supported input_orders are "HWC" and "CHW"')
+
+
+    img = img.transpose(1, 2, 0)
+    img2 = img2.transpose(1, 2, 0)
+
+
+    img = img.astype(np.float64)
+    img2 = img2.astype(np.float64)
+
+    ssims = []
+    for i in range(img.shape[2]):
+        ssims.append(_ssim(img[..., i], img2[..., i]))
+    return np.array(ssims).mean()
+    
+def _ssim(img, img2):
+    """Calculate SSIM (structural similarity) for one channel images.
+
+    It is called by func:`calculate_ssim`.
+
+    Args:
+        img (ndarray): Images with range [0, 255] with order 'HWC'.
+        img2 (ndarray): Images with range [0, 255] with order 'HWC'.
+
+    Returns:
+        float: SSIM result.
+    """
+
+    c1 = (0.01 * 1)**2
+    c2 = (0.03 * 1)**2
+    kernel = cv2.getGaussianKernel(11, 1.5)
+    window = np.outer(kernel, kernel.transpose())
+
+    mu1 = cv2.filter2D(img, -1, window)[5:-5, 5:-5]  # valid mode for window size 11
+    mu2 = cv2.filter2D(img2, -1, window)[5:-5, 5:-5]
+    mu1_sq = mu1**2
+    mu2_sq = mu2**2
+    mu1_mu2 = mu1 * mu2
+    sigma1_sq = cv2.filter2D(img**2, -1, window)[5:-5, 5:-5] - mu1_sq
+    sigma2_sq = cv2.filter2D(img2**2, -1, window)[5:-5, 5:-5] - mu2_sq
+    sigma12 = cv2.filter2D(img * img2, -1, window)[5:-5, 5:-5] - mu1_mu2
+
+    ssim_map = ((2 * mu1_mu2 + c1) * (2 * sigma12 + c2)) / ((mu1_sq + mu2_sq + c1) * (sigma1_sq + sigma2_sq + c2))
+    return ssim_map.mean()
\ No newline at end of file
diff --git a/IIR-Lab/requirements.txt b/IIR-Lab/requirements.txt
new file mode 100644
index 0000000000000000000000000000000000000000..ffec907057be1f60e43558610dbe12c77e567f51
--- /dev/null
+++ b/IIR-Lab/requirements.txt
@@ -0,0 +1,141 @@
+absl-py==2.1.0
+addict==2.4.0
+asttokens==2.4.1
+attrs==23.2.0
+backcall==0.2.0
+beautifulsoup4==4.12.3
+bleach==6.1.0
+bm3d==4.0.1
+bm4d==4.2.3
+cachetools==5.3.2
+certifi==2023.11.17
+charset-normalizer==3.3.2
+comm==0.2.1
+contourpy==1.1.1
+cycler==0.12.1
+debugpy==1.8.1
+decorator==5.1.1
+defusedxml==0.7.1
+docopt==0.6.2
+einops==0.7.0
+exceptiongroup==1.2.0
+executing==2.0.1
+ExifRead==3.0.0
+fastjsonschema==2.19.1
+fastrlock==0.8.2
+filelock==3.13.1
+fonttools==4.47.2
+fsspec==2023.12.2
+future==0.18.3
+fvcore==0.1.5.post20221221
+gitdb==4.0.11
+GitPython==3.1.41
+google-auth==2.27.0
+google-auth-oauthlib==1.2.0
+grpcio==1.60.0
+h5py==3.10.0
+hdf5storage==0.1.19
+huggingface-hub==0.20.2
+idna==3.6
+imageio==2.33.1
+importlib-metadata==7.0.1
+importlib-resources==6.1.1
+iopath==0.1.10
+ipykernel==6.29.2
+ipython==8.12.3
+jedi==0.19.1
+Jinja2==3.1.3
+joblib==1.3.2
+jsonschema==4.21.1
+jsonschema-specifications==2023.12.1
+jupyter_client==8.6.0
+jupyter_core==5.7.1
+jupyterlab_pygments==0.3.0
+kiwisolver==1.4.5
+kornia==0.7.1
+lazy_loader==0.3
+lmdb==1.4.1
+lpips==0.1.4
+Markdown==3.5.2
+MarkupSafe==2.1.3
+matplotlib==3.7.2
+matplotlib-inline==0.1.6
+mistune==3.0.2
+mpmath==1.3.0
+natsort==8.4.0
+nbclient==0.9.0
+nbconvert==7.16.1
+nbformat==5.9.2
+nest-asyncio==1.6.0
+networkx==3.1
+numpy==1.24.4
+oauthlib==3.2.2
+opencv-python==4.9.0.80
+packaging==23.2
+pandas==2.0.3
+pandocfilters==1.5.1
+parso==0.8.3
+pexpect==4.9.0
+pickleshare==0.7.5
+pillow==10.2.0
+pipreqs==0.5.0
+platformdirs==4.1.0
+portalocker==2.8.2
+prompt-toolkit==3.0.43
+protobuf==4.23.4
+psutil==5.9.8
+ptflops==0.7.2.2
+ptyprocess==0.7.0
+pure-eval==0.2.2
+pyasn1==0.5.1
+pyasn1-modules==0.3.0
+Pygments==2.17.2
+pyparsing==3.0.0
+pypng==0.20220715.0
+python-dateutil==2.8.2
+pytorch-msssim==1.0.0
+pytz==2024.1
+PyWavelets==1.4.1
+PyYAML==6.0.1
+pyzmq==25.1.2
+rawpy==0.19.0
+referencing==0.33.0
+requests==2.31.0
+requests-oauthlib==1.3.1
+rpds-py==0.18.0
+rsa==4.9
+safetensors==0.4.1
+scikit-image==0.21.0
+scikit-learn==1.3.2
+scipy==1.11.4
+seaborn==0.13.2
+six==1.16.0
+smmap==5.0.1
+soupsieve==2.5
+stack-data==0.6.3
+sympy==1.12
+tabulate==0.9.0
+tensorboard==2.15.1
+tensorboard-data-server==0.7.2
+termcolor==2.4.0
+tf_keras-nightly==2.16.0.dev2024011811
+thop==0.1.1.post2209072238
+threadpoolctl==3.2.0
+tifffile==2023.12.9
+timm==0.9.12
+tinycss2==1.2.1
+tomli==2.0.1
+tornado==6.4
+tqdm==4.66.1
+traitlets==5.14.1
+triton==2.1.0
+typing_extensions==4.9.0
+tzdata==2023.4
+urllib3==2.1.0
+wcwidth==0.2.13
+webencodings==0.5.1
+Werkzeug==3.0.1
+yacs==0.1.8
+yapf==0.40.2
+yarg==0.1.9
+zipp==3.17.0
diff --git a/IIR-Lab/run.sh b/IIR-Lab/run.sh
new file mode 100644
index 0000000000000000000000000000000000000000..4beafc3264e1dfa2cd0331610d2a2ab8b08fc386
--- /dev/null
+++ b/IIR-Lab/run.sh
@@ -0,0 +1,3 @@
+#! /bin/bash
+cd /nightimage
+python final_test.py --chunk
diff --git a/IIR-Lab/utils.py b/IIR-Lab/utils.py
new file mode 100644
index 0000000000000000000000000000000000000000..bd8d259b1d584a527b84a6397c67c78974674101
--- /dev/null
+++ b/IIR-Lab/utils.py
@@ -0,0 +1,207 @@
+import torch
+import numpy as np
+import torch.nn.functional as F
+import cv2
+def padding_4x(seq_noise):
+    sh_im = seq_noise.size()
+    expanded_h = sh_im[-2]%16
+
+    if expanded_h:
+        expanded_h = 16-expanded_h
+    expanded_w = sh_im[-1]%16
+    if expanded_w:
+        expanded_w = 16-expanded_w
+
+    padexp = (0, expanded_w, 0, expanded_h)
+    seq_noise = F.pad(input=seq_noise, pad=padexp, mode='reflect')
+    return seq_noise, expanded_h, expanded_w
+
+def depadding(seq_denoise,expanded_h, expanded_w):
+    if expanded_h:
+        seq_denoise = seq_denoise[:, :, :-expanded_h, :]
+    if expanded_w:
+        seq_denoise = seq_denoise[:, :, :, :-expanded_w]
+    return seq_denoise
+def chunkV3(net, input_data, option, patch_h = 516, patch_w = 516, patch_h_overlap = 16, patch_w_overlap = 16):
+    #input_data  [1,6,4,1500, 2000]
+
+    # H = input_data.shape[3]
+    # W = input_data.shape[4]
+
+    shape_list = input_data.shape
+
+    if option == 'image':
+        B, C, H, W = shape_list[0], shape_list[1], shape_list[2], shape_list[3]                   # 1，4，1500，2000
+    if option == 'RViDeformer':
+        B, F, C, H, W = shape_list[0], shape_list[1], shape_list[2], shape_list[3], shape_list[4] # 1，6， 4，1500，2000
+    if option == 'three2one':
+        B, FC , H, W = shape_list[0], shape_list[1], shape_list[2], shape_list[3]                # 1，12，1500，2000
+    
+    if option == 'image':
+        test_result = torch.zeros_like(input_data).cpu()  # 和input的shape一样
+    if option == 'RViDeformer':
+        test_result = torch.zeros_like(input_data).cpu()  # 和input的shape一样
+    if option == 'three2one':
+        test_result = torch.zeros((B, 4 , H, W)).cpu()  # 和input的shape一样
+
+
+    # t0 = time.perf_counter()
+    h_index = 1
+    while (patch_h*h_index-patch_h_overlap*(h_index-1)) < H:
+        if option == 'image':
+            test_horizontal_result = torch.zeros((B,C,patch_h,W)).cpu()  #和input的shape一样 patch_h不一样
+        if option == 'RViDeformer':
+            test_horizontal_result = torch.zeros((B, F, C, patch_h, W)).cpu()
+        if option == 'three2one':
+            test_horizontal_result = torch.zeros((B, 4, patch_h, W)).cpu()
+
+        h_begin = patch_h*(h_index-1)-patch_h_overlap*(h_index-1)
+        h_end = patch_h*h_index-patch_h_overlap*(h_index-1) 
+        w_index = 1
+        while (patch_w*w_index-patch_w_overlap*(w_index-1)) < W:
+            w_begin = patch_w*(w_index-1)-patch_w_overlap*(w_index-1)
+            w_end = patch_w*w_index-patch_w_overlap*(w_index-1)
+            test_patch = input_data[...,h_begin:h_end,w_begin:w_end]        
+
+            with torch.no_grad():
+                test_patch_result = net(test_patch).detach().cpu()
+
+            if w_index == 1:
+                test_horizontal_result[...,w_begin:w_end] = test_patch_result
+            else:
+                for i in range(patch_w_overlap):
+                    test_horizontal_result[...,w_begin+i] = test_horizontal_result[...,w_begin+i]*(patch_w_overlap-1-i)/(patch_w_overlap-1)+test_patch_result[...,i]*i/(patch_w_overlap-1)
+                test_horizontal_result[...,w_begin+patch_w_overlap:w_end] = test_patch_result[...,patch_w_overlap:]
+            w_index += 1                   
+    
+        test_patch = input_data[...,h_begin:h_end,-patch_w:]        
+
+        with torch.no_grad():
+            test_patch_result = net(test_patch).detach().cpu()
+        last_range = w_end-(W-patch_w)       
+
+        for i in range(last_range):
+            test_horizontal_result[...,W-patch_w+i] = test_horizontal_result[...,W-patch_w+i]*(last_range-1-i)/(last_range-1)+test_patch_result[...,i]*i/(last_range-1)
+        test_horizontal_result[...,w_end:] = test_patch_result[...,last_range:]       
+
+        if h_index == 1:
+            test_result[...,h_begin:h_end,:] = test_horizontal_result
+        else:
+            for i in range(patch_h_overlap):
+                test_result[...,h_begin+i,:] = test_result[...,h_begin+i,:]*(patch_h_overlap-1-i)/(patch_h_overlap-1)+test_horizontal_result[...,i,:]*i/(patch_h_overlap-1)
+            test_result[...,h_begin+patch_h_overlap:h_end,:] = test_horizontal_result[...,patch_h_overlap:,:] 
+        h_index += 1
+
+    if option == 'image':
+        test_horizontal_result = torch.zeros((B,C,patch_h,W)).cpu()  #和input的shape一样 patch_h不一样
+    if option == 'RViDeformer':
+        test_horizontal_result = torch.zeros((B, F, C, patch_h, W)).cpu()
+    if option == 'three2one':
+        test_horizontal_result = torch.zeros((B, 4, patch_h, W)).cpu()
+        
+    w_index = 1
+    while (patch_w*w_index-patch_w_overlap*(w_index-1)) < W:
+        w_begin = patch_w*(w_index-1)-patch_w_overlap*(w_index-1)
+        w_end = patch_w*w_index-patch_w_overlap*(w_index-1)
+        test_patch = input_data[...,-patch_h:,w_begin:w_end]            
+              
+        with torch.no_grad():
+            test_patch_result = net(test_patch).detach().cpu()
+
+        if w_index == 1:
+            test_horizontal_result[...,w_begin:w_end] = test_patch_result
+        else:
+            for i in range(patch_w_overlap):
+                test_horizontal_result[...,w_begin+i] = test_horizontal_result[...,w_begin+i]*(patch_w_overlap-1-i)/(patch_w_overlap-1)+test_patch_result[...,i]*i/(patch_w_overlap-1)
+            test_horizontal_result[...,w_begin+patch_w_overlap:w_end] = test_patch_result[...,patch_w_overlap:]   
+        w_index += 1
+
+    test_patch = input_data[...,-patch_h:,-patch_w:]         
+
+    with torch.no_grad():
+        test_patch_result = net(test_patch).detach().cpu()
+    last_range = w_end-(W-patch_w)       
+    for i in range(last_range):
+        test_horizontal_result[...,W-patch_w+i] = test_horizontal_result[...,W-patch_w+i]*(last_range-1-i)/(last_range-1)+test_patch_result[...,i]*i/(last_range-1) 
+    test_horizontal_result[...,w_end:] = test_patch_result[...,last_range:] 
+
+    last_last_range = h_end-(H-patch_h)
+    for i in range(last_last_range):
+        test_result[...,H-patch_w+i,:] = test_result[...,H-patch_w+i,:]*(last_last_range-1-i)/(last_last_range-1)+test_horizontal_result[...,i,:]*i/(last_last_range-1)
+    test_result[...,h_end:,:] = test_horizontal_result[...,last_last_range:,:]
+   
+    # t1 = time.perf_counter()
+    # print('Total running time: %s s' % (str(t1 - t0)))
+
+    return test_result
+
+
+def calculate_psnr(img, img2, input_order='HWC'):
+
+
+    assert img.shape == img2.shape, (f'Image shapes are different: {img.shape}, {img2.shape}.')
+    if input_order not in ['HWC', 'CHW']:
+        raise ValueError(f'Wrong input_order {input_order}. Supported input_orders are "HWC" and "CHW"')
+    
+    img = img.transpose(1, 2, 0)
+    img2 = img2.transpose(1, 2, 0)
+
+
+    img = img.astype(np.float64)
+    img2 = img2.astype(np.float64)
+
+    mse = np.mean((img - img2)**2)
+    if mse == 0:
+        return float('inf')
+    return 10. * np.log10(1. * 1. / mse)
+
+
+def calculate_ssim(img, img2, input_order='HWC'):
+
+
+    assert img.shape == img2.shape, (f'Image shapes are different: {img.shape}, {img2.shape}.')
+    if input_order not in ['HWC', 'CHW']:
+        raise ValueError(f'Wrong input_order {input_order}. Supported input_orders are "HWC" and "CHW"')
+
+
+    img = img.transpose(1, 2, 0)
+    img2 = img2.transpose(1, 2, 0)
+
+
+    img = img.astype(np.float64)
+    img2 = img2.astype(np.float64)
+
+    ssims = []
+    for i in range(img.shape[2]):
+        ssims.append(_ssim(img[..., i], img2[..., i]))
+    return np.array(ssims).mean()
+    
+def _ssim(img, img2):
+    """Calculate SSIM (structural similarity) for one channel images.
+
+    It is called by func:`calculate_ssim`.
+
+    Args:
+        img (ndarray): Images with range [0, 255] with order 'HWC'.
+        img2 (ndarray): Images with range [0, 255] with order 'HWC'.
+
+    Returns:
+        float: SSIM result.
+    """
+
+    c1 = (0.01 * 1)**2
+    c2 = (0.03 * 1)**2
+    kernel = cv2.getGaussianKernel(11, 1.5)
+    window = np.outer(kernel, kernel.transpose())
+
+    mu1 = cv2.filter2D(img, -1, window)[5:-5, 5:-5]  # valid mode for window size 11
+    mu2 = cv2.filter2D(img2, -1, window)[5:-5, 5:-5]
+    mu1_sq = mu1**2
+    mu2_sq = mu2**2
+    mu1_mu2 = mu1 * mu2
+    sigma1_sq = cv2.filter2D(img**2, -1, window)[5:-5, 5:-5] - mu1_sq
+    sigma2_sq = cv2.filter2D(img2**2, -1, window)[5:-5, 5:-5] - mu2_sq
+    sigma12 = cv2.filter2D(img * img2, -1, window)[5:-5, 5:-5] - mu1_mu2
+
+    ssim_map = ((2 * mu1_mu2 + c1) * (2 * sigma12 + c2)) / ((mu1_sq + mu2_sq + c1) * (sigma1_sq + sigma2_sq + c2))
+    return ssim_map.mean()
\ No newline at end of file
diff --git a/IIR-Lab/utils_ours/__pycache__/calculate_PSNR_SSIM.cpython-310.pyc b/IIR-Lab/utils_ours/__pycache__/calculate_PSNR_SSIM.cpython-310.pyc
new file mode 100644
index 0000000000000000000000000000000000000000..e21c4492d89082e490a64f96ca42e8585e9a3322
Binary files /dev/null and b/IIR-Lab/utils_ours/__pycache__/calculate_PSNR_SSIM.cpython-310.pyc differ
diff --git a/IIR-Lab/utils_ours/__pycache__/calculate_PSNR_SSIM.cpython-38.pyc b/IIR-Lab/utils_ours/__pycache__/calculate_PSNR_SSIM.cpython-38.pyc
new file mode 100644
index 0000000000000000000000000000000000000000..df76eac3a07acb5f0a91673d80b350d1d8513df0
Binary files /dev/null and b/IIR-Lab/utils_ours/__pycache__/calculate_PSNR_SSIM.cpython-38.pyc differ
diff --git a/IIR-Lab/utils_ours/__pycache__/calculate_PSNR_SSIM.cpython-39.pyc b/IIR-Lab/utils_ours/__pycache__/calculate_PSNR_SSIM.cpython-39.pyc
new file mode 100644
index 0000000000000000000000000000000000000000..ef7aa5fbaf7f7b2d40135b460314d0f9fa7d3d1b
Binary files /dev/null and b/IIR-Lab/utils_ours/__pycache__/calculate_PSNR_SSIM.cpython-39.pyc differ
diff --git a/IIR-Lab/utils_ours/__pycache__/lc.cpython-310.pyc b/IIR-Lab/utils_ours/__pycache__/lc.cpython-310.pyc
new file mode 100644
index 0000000000000000000000000000000000000000..708144b77f6fa7b6f7667124bcff5a19eb57ca47
Binary files /dev/null and b/IIR-Lab/utils_ours/__pycache__/lc.cpython-310.pyc differ
diff --git a/IIR-Lab/utils_ours/__pycache__/psnr_ssim.cpython-310.pyc b/IIR-Lab/utils_ours/__pycache__/psnr_ssim.cpython-310.pyc
new file mode 100644
index 0000000000000000000000000000000000000000..ab93e4b91ad3568a13018c7e9cea36df5d66e8af
Binary files /dev/null and b/IIR-Lab/utils_ours/__pycache__/psnr_ssim.cpython-310.pyc differ
diff --git a/IIR-Lab/utils_ours/__pycache__/psnr_ssim.cpython-38.pyc b/IIR-Lab/utils_ours/__pycache__/psnr_ssim.cpython-38.pyc
new file mode 100644
index 0000000000000000000000000000000000000000..d670918d9e91b8ba4de8820531b7736aa351e9cc
Binary files /dev/null and b/IIR-Lab/utils_ours/__pycache__/psnr_ssim.cpython-38.pyc differ
diff --git a/IIR-Lab/utils_ours/__pycache__/util.cpython-310.pyc b/IIR-Lab/utils_ours/__pycache__/util.cpython-310.pyc
new file mode 100644
index 0000000000000000000000000000000000000000..c4c220d5bdb45721fdc1c52b8b39d7d316364708
Binary files /dev/null and b/IIR-Lab/utils_ours/__pycache__/util.cpython-310.pyc differ
diff --git a/IIR-Lab/utils_ours/__pycache__/util.cpython-311.pyc b/IIR-Lab/utils_ours/__pycache__/util.cpython-311.pyc
new file mode 100644
index 0000000000000000000000000000000000000000..8cbc1d8da8578bd6766881548b71e18e021a28a3
Binary files /dev/null and b/IIR-Lab/utils_ours/__pycache__/util.cpython-311.pyc differ
diff --git a/IIR-Lab/utils_ours/__pycache__/util.cpython-312.pyc b/IIR-Lab/utils_ours/__pycache__/util.cpython-312.pyc
new file mode 100644
index 0000000000000000000000000000000000000000..969bf464fda6645c87c51f2fb8ebfb9fa84c87a4
Binary files /dev/null and b/IIR-Lab/utils_ours/__pycache__/util.cpython-312.pyc differ
diff --git a/IIR-Lab/utils_ours/__pycache__/util.cpython-38.pyc b/IIR-Lab/utils_ours/__pycache__/util.cpython-38.pyc
new file mode 100644
index 0000000000000000000000000000000000000000..1ec25a0bab3b5f98a9f799a0701b1547bbef58a0
Binary files /dev/null and b/IIR-Lab/utils_ours/__pycache__/util.cpython-38.pyc differ
diff --git a/IIR-Lab/utils_ours/__pycache__/util.cpython-39.pyc b/IIR-Lab/utils_ours/__pycache__/util.cpython-39.pyc
new file mode 100644
index 0000000000000000000000000000000000000000..c8132aebdf4bdf2211cc56cb16c87bd547b05cf7
Binary files /dev/null and b/IIR-Lab/utils_ours/__pycache__/util.cpython-39.pyc differ
diff --git a/IIR-Lab/utils_ours/util.py b/IIR-Lab/utils_ours/util.py
new file mode 100644
index 0000000000000000000000000000000000000000..8d9e9b4efa703ba887ab34c1d018a3054b4eb67a
--- /dev/null
+++ b/IIR-Lab/utils_ours/util.py
@@ -0,0 +1,80 @@
+import os
+import sys
+import time
+from datetime import datetime
+import logging
+import numpy as np
+import torch
+import math
+
+def get_timestamp():
+    return datetime.now().strftime('%y%m%d-%H%M%S')
+
+def mkdir_and_rename(path):
+    if os.path.exists(path):
+        new_name = path + '_archived_' + get_timestamp()
+        print('Path already exists. Rename it to [{:s}]'.format(new_name))
+        os.rename(path, new_name)
+    os.makedirs(path)
+
+
+def scandir(dir_path, suffix=None, recursive=False, full_path=False):
+    """Scan a directory to find the interested files.
+    Args:
+        dir_path (str): Path of the directory.
+        suffix (str | tuple(str), optional): File suffix that we are
+            interested in. Default: None.
+        recursive (bool, optional): If set to True, recursively scan the
+            directory. Default: False.
+        full_path (bool, optional): If set to True, include the dir_path.
+            Default: False.
+    Returns:
+        A generator for all the interested files with relative pathes.
+    """
+
+    if (suffix is not None) and not isinstance(suffix, (str, tuple)):
+        raise TypeError('"suffix" must be a string or tuple of strings')
+
+    root = dir_path
+
+    def _scandir(dir_path, suffix, recursive):
+        for entry in os.scandir(dir_path):
+            if not entry.name.startswith('.') and entry.is_file():
+                if full_path:
+                    return_path = entry.path
+                else:
+                    return_path = os.path.relpath(entry.path, root)
+
+                if suffix is None:
+                    yield return_path
+                elif return_path.endswith(suffix):
+                    yield return_path
+            else:
+                if recursive:
+                    yield from _scandir(
+                        entry.path, suffix=suffix, recursive=recursive)
+                else:
+                    continue
+
+    return _scandir(dir_path, suffix=suffix, recursive=recursive)
+
+
+def setup_logger(log_file_path):
+    log_formatter = logging.Formatter("%(asctime)s    [%(levelname)-5.5s]  %(message)s")
+    root_logger = logging.getLogger()
+    root_logger.setLevel(logging.INFO)
+
+    log_file_handler = logging.FileHandler(log_file_path, encoding='utf-8')
+    log_file_handler.setFormatter(log_formatter)
+    root_logger.addHandler(log_file_handler)
+
+    log_stream_handler = logging.StreamHandler(sys.stdout)
+    log_stream_handler.setFormatter(log_formatter)
+    root_logger.addHandler(log_stream_handler)
+
+    logging.info('Logging file is %s' % log_file_path)
+
+
+def print_args(args):
+    for arg in vars(args):
+        logging.info(arg + ':%s'%(getattr(args, arg)))
\ No newline at end of file