diff --git a/IIR-Lab/Dockerfile b/IIR-Lab/Dockerfile new file mode 100644 index 0000000000000000000000000000000000000000..6b4c569cf70f5b73a495a84b657d839709791861 --- /dev/null +++ b/IIR-Lab/Dockerfile @@ -0,0 +1,17 @@ +FROM pytorch/pytorch:2.0.1-cuda11.7-cudnn8-runtime + +ENV TZ=Asia +ARG DEBIAN_FRONTEND=noninteractive + +RUN apt-get update && apt-get install -y \ + libpng-dev libjpeg-dev \ + libopencv-dev ffmpeg \ + libgl1-mesa-glx + +COPY requirements.txt . +RUN python -m pip install --upgrade pip +RUN pip install --no-cache -r requirements.txt + +COPY . /nightimage +RUN chmod +x /nightimage/run.sh +WORKDIR /nightimage \ No newline at end of file diff --git a/IIR-Lab/ISP_pipeline/.gitignore b/IIR-Lab/ISP_pipeline/.gitignore new file mode 100644 index 0000000000000000000000000000000000000000..b6e47617de110dea7ca47e087ff1347cc2646eda --- /dev/null +++ b/IIR-Lab/ISP_pipeline/.gitignore @@ -0,0 +1,129 @@ +# Byte-compiled / optimized / DLL files +__pycache__/ +*.py[cod] +*$py.class + +# C extensions +*.so + +# Distribution / packaging +.Python +build/ +develop-eggs/ +dist/ +downloads/ +eggs/ +.eggs/ +lib/ +lib64/ +parts/ +sdist/ +var/ +wheels/ +pip-wheel-metadata/ +share/python-wheels/ +*.egg-info/ +.installed.cfg +*.egg +MANIFEST + +# PyInstaller +# Usually these files are written by a python script from a template +# before PyInstaller builds the exe, so as to inject date/other infos into it. +*.manifest +*.spec + +# Installer logs +pip-log.txt +pip-delete-this-directory.txt + +# Unit test / coverage reports +htmlcov/ +.tox/ +.nox/ +.coverage +.coverage.* +.cache +nosetests.xml +coverage.xml +*.cover +*.py,cover +.hypothesis/ +.pytest_cache/ + +# Translations +*.mo +*.pot + +# Django stuff: +*.log +local_settings.py +db.sqlite3 +db.sqlite3-journal + +# Flask stuff: +instance/ +.webassets-cache + +# Scrapy stuff: +.scrapy + +# Sphinx documentation +docs/_build/ + +# PyBuilder +target/ + +# Jupyter Notebook +.ipynb_checkpoints + +# IPython +profile_default/ +ipython_config.py + +# pyenv +.python-version + +# pipenv +# According to pypa/pipenv#598, it is recommended to include Pipfile.lock in version control. +# However, in case of collaboration, if having platform-specific dependencies or dependencies +# having no cross-platform support, pipenv may install dependencies that don't work, or not +# install all needed dependencies. +#Pipfile.lock + +# PEP 582; used by e.g. github.com/David-OConnor/pyflow +__pypackages__/ + +# Celery stuff +celerybeat-schedule +celerybeat.pid + +# SageMath parsed files +*.sage.py + +# Environments +.env +.venv +env/ +venv/ +ENV/ +env.bak/ +venv.bak/ + +# Spyder project settings +.spyderproject +.spyproject + +# Rope project settings +.ropeproject + +# mkdocs documentation +/site + +# mypy +.mypy_cache/ +.dmypy.json +dmypy.json + +# Pyre type checker +.pyre/ diff --git a/IIR-Lab/ISP_pipeline/Dockerfile b/IIR-Lab/ISP_pipeline/Dockerfile new file mode 100644 index 0000000000000000000000000000000000000000..1c92a995e40704663df77e4598a31af9b9de4256 --- /dev/null +++ b/IIR-Lab/ISP_pipeline/Dockerfile @@ -0,0 +1,12 @@ +FROM python:3.9 + +RUN apt-get update && apt-get install -y \ + libpng-dev libjpeg-dev \ + libopencv-dev ffmpeg \ + libgl1-mesa-glx + +COPY requirements.txt . +RUN python -m pip install --no-cache -r requirements.txt + +COPY . /nightimaging +WORKDIR /nightimaging \ No newline at end of file diff --git a/IIR-Lab/ISP_pipeline/LICENSE b/IIR-Lab/ISP_pipeline/LICENSE new file mode 100644 index 0000000000000000000000000000000000000000..5f5ae4a8c40d61a0d26503dce7e6c1c7234a07e6 --- /dev/null +++ b/IIR-Lab/ISP_pipeline/LICENSE @@ -0,0 +1,21 @@ +MIT License + +Copyright (c) 2023 Color Reproduction and Synthesis + +Permission is hereby granted, free of charge, to any person obtaining a copy +of this software and associated documentation files (the "Software"), to deal +in the Software without restriction, including without limitation the rights +to use, copy, modify, merge, publish, distribute, sublicense, and/or sell +copies of the Software, and to permit persons to whom the Software is +furnished to do so, subject to the following conditions: + +The above copyright notice and this permission notice shall be included in all +copies or substantial portions of the Software. + +THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR +IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, +FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE +AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER +LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, +OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE +SOFTWARE. diff --git a/IIR-Lab/ISP_pipeline/__pycache__/debayer.cpython-312.pyc b/IIR-Lab/ISP_pipeline/__pycache__/debayer.cpython-312.pyc new file mode 100644 index 0000000000000000000000000000000000000000..62e8b3216c60b54463fe832372ac66548c9df700 Binary files /dev/null and b/IIR-Lab/ISP_pipeline/__pycache__/debayer.cpython-312.pyc differ diff --git a/IIR-Lab/ISP_pipeline/__pycache__/debayer.cpython-39.pyc b/IIR-Lab/ISP_pipeline/__pycache__/debayer.cpython-39.pyc new file mode 100644 index 0000000000000000000000000000000000000000..10a3f15f2a8737e9d070f18872bbe033e84369a8 Binary files /dev/null and b/IIR-Lab/ISP_pipeline/__pycache__/debayer.cpython-39.pyc differ diff --git a/IIR-Lab/ISP_pipeline/__pycache__/imaging.cpython-312.pyc b/IIR-Lab/ISP_pipeline/__pycache__/imaging.cpython-312.pyc new file mode 100644 index 0000000000000000000000000000000000000000..05d9736fbf0012d3ede06b9418fec800d07b4561 Binary files /dev/null and b/IIR-Lab/ISP_pipeline/__pycache__/imaging.cpython-312.pyc differ diff --git a/IIR-Lab/ISP_pipeline/__pycache__/imaging.cpython-39.pyc b/IIR-Lab/ISP_pipeline/__pycache__/imaging.cpython-39.pyc new file mode 100644 index 0000000000000000000000000000000000000000..0ad7f78e53609cdee6851eca2e8a5e7a7fb4e39f Binary files /dev/null and b/IIR-Lab/ISP_pipeline/__pycache__/imaging.cpython-39.pyc differ diff --git a/IIR-Lab/ISP_pipeline/__pycache__/process_pngs_isp.cpython-312.pyc b/IIR-Lab/ISP_pipeline/__pycache__/process_pngs_isp.cpython-312.pyc new file mode 100644 index 0000000000000000000000000000000000000000..5f38a928c24365b967cd61cf376d9d8fd9fa1e82 Binary files /dev/null and b/IIR-Lab/ISP_pipeline/__pycache__/process_pngs_isp.cpython-312.pyc differ diff --git a/IIR-Lab/ISP_pipeline/__pycache__/process_pngs_isp.cpython-39.pyc b/IIR-Lab/ISP_pipeline/__pycache__/process_pngs_isp.cpython-39.pyc new file mode 100644 index 0000000000000000000000000000000000000000..1bbd415f279692aa63128c4cfdc0ec163e7e81b5 Binary files /dev/null and b/IIR-Lab/ISP_pipeline/__pycache__/process_pngs_isp.cpython-39.pyc differ diff --git a/IIR-Lab/ISP_pipeline/__pycache__/utility.cpython-312.pyc b/IIR-Lab/ISP_pipeline/__pycache__/utility.cpython-312.pyc new file mode 100644 index 0000000000000000000000000000000000000000..78364817de05cfcccae13dae2f93b484804f3fff Binary files /dev/null and b/IIR-Lab/ISP_pipeline/__pycache__/utility.cpython-312.pyc differ diff --git a/IIR-Lab/ISP_pipeline/__pycache__/utility.cpython-39.pyc b/IIR-Lab/ISP_pipeline/__pycache__/utility.cpython-39.pyc new file mode 100644 index 0000000000000000000000000000000000000000..3a0cc3db3952e096dc802ad15c4ee62debaa2c3d Binary files /dev/null and b/IIR-Lab/ISP_pipeline/__pycache__/utility.cpython-39.pyc differ diff --git a/IIR-Lab/ISP_pipeline/cfa_pattern_change.py b/IIR-Lab/ISP_pipeline/cfa_pattern_change.py new file mode 100644 index 0000000000000000000000000000000000000000..abbd1d883677af0d6752c411506b1100e8af2df7 --- /dev/null +++ b/IIR-Lab/ISP_pipeline/cfa_pattern_change.py @@ -0,0 +1,74 @@ +import numpy as np +import os +import json +import cv2 + +def change_cfa_pattern(img): + raw_colors = np.asarray([3,1,2,0]).reshape((2, 2)) + changed_raw_colors = np.asarray([0,1,2,3]).reshape((2, 2)) + demosaiced_image = np.zeros((img.shape[0]//2, img.shape[1]//2, 4)) + for i in range(2): + for j in range(2): + ch = raw_colors[i, j] + demosaiced_image[:, :, ch] = img[i::2, j::2] + for i in range(2): + for j in range(2): + ch1 = changed_raw_colors[i, j] + img[i::2, j::2] = demosaiced_image[:, :, ch1] + + return img + +def rggb_raw(raw): + # pack RGGB Bayer raw to 4 channels + H, W = raw.shape + raw = raw[None, ...] + raw_pack = np.concatenate((raw[:, 0:H:2, 0:W:2], + raw[:, 0:H:2, 1:W:2], + raw[:, 1:H:2, 0:W:2], + raw[:, 1:H:2, 1:W:2]), axis=0) + # tmp = rggb[...,0] + # rggb[...,0] = rggb[...,-1] + # rggb[...,-1] = tmp + return raw_pack + +def raw_rggb(raws): + # depack 4 channels raw to RGGB Bayer + C, H, W = raws.shape + output = np.zeros((H * 2, W * 2)).astype(np.uint16) + + output[0:2 * H:2, 0:2 * W:2] = raws[0:1, :, :] + output[0:2 * H:2, 1:2 * W:2] = raws[1:2, :, :] + output[1:2 * H:2, 0:2 * W:2] = raws[2:3, :, :] + output[1:2 * H:2, 1:2 * W:2] = raws[3:4, :, :] + + return output + +if __name__ == "__main__": + json_path = "/data1/02_data/Train_Data/" + file_name = os.listdir(json_path) + json_list = [] + for file_name_all in file_name: + if file_name_all.endswith(".json"): + json_list.append(json_path+file_name_all) + a = [] + for i in range(len(json_list)): + with open(json_list[i],'r',encoding='UTF-8') as f: + result = json.load(f) + # a,b = result["noise_profile"] + # black = result["white_level"] + cfa_pattern = result["cfa_pattern"] + if cfa_pattern[0] == 2: + a.append(json_list[i]) + for j in range(len(a)): + pic_name,_ = os.path.splitext(a[j]) + img = cv2.imread(pic_name+str(".png"), cv2.IMREAD_UNCHANGED) + # img1 = cv2.imread(pic_name+".png", cv2.IMREAD_UNCHANGED) + # test = img - img1 + # print(test) + changed_img = change_cfa_pattern(img=img) + # cv2.imwrite(pic_name+"test1.png",changed_img) + np.save(pic_name+"origin.npy",img) + np.save(pic_name+"changed.npy",changed_img) + # np.save("./json_all.npy",result) + + \ No newline at end of file diff --git a/IIR-Lab/ISP_pipeline/debayer.py b/IIR-Lab/ISP_pipeline/debayer.py new file mode 100644 index 0000000000000000000000000000000000000000..5e9855b53c633c5671a39d1042ec70d2dbeb80da --- /dev/null +++ b/IIR-Lab/ISP_pipeline/debayer.py @@ -0,0 +1,1295 @@ +import numpy as np +import math +import time +import utility +from scipy import signal + +# ============================================================= +# function: dbayer_mhc +# demosaicing using Malvar-He-Cutler algorithm +# http://www.ipol.im/pub/art/2011/g_mhcd/ +# ============================================================= +def debayer_mhc(raw, bayer_pattern="rggb", clip_range=[0, 65535], timeshow=False): + + # convert to float32 in case it was not + raw = np.float32(raw) + + # dimensions + width, height = utility.helpers(raw).get_width_height() + + # number of pixels to pad + no_of_pixel_pad = 2 + raw = np.pad(raw, \ + (no_of_pixel_pad, no_of_pixel_pad),\ + 'reflect') # reflect would not repeat the border value + + # allocate space for the R, G, B planes + R = np.empty( (height + no_of_pixel_pad * 2, width + no_of_pixel_pad * 2), dtype = np.float32 ) + G = np.empty( (height + no_of_pixel_pad * 2, width + no_of_pixel_pad * 2), dtype = np.float32 ) + B = np.empty( (height + no_of_pixel_pad * 2, width + no_of_pixel_pad * 2), dtype = np.float32 ) + + # create a RGB output + demosaic_out = np.empty( (height, width, 3), dtype = np.float32 ) + + # fill up the directly available values according to the Bayer pattern + if (bayer_pattern == "rggb"): + + G[::2, 1::2] = raw[::2, 1::2] + G[1::2, ::2] = raw[1::2, ::2] + R[::2, ::2] = raw[::2, ::2] + B[1::2, 1::2] = raw[1::2, 1::2] + + # Green channel + for i in range(no_of_pixel_pad, height + no_of_pixel_pad): + + # to display progress + t0 = time.process_time() + + for j in range(no_of_pixel_pad, width + no_of_pixel_pad): + + # G at Red location + if (((i % 2) == 0) and ((j % 2) == 0)): + G[i, j] = 0.125 * np.sum([-1. * R[i-2, j], \ + 2. * G[i-1, j], \ + -1. * R[i, j-2], 2. * G[i, j-1], 4. * R[i,j], 2. * G[i, j+1], -1. * R[i, j+2],\ + 2. * G[i+1, j], \ + -1. * R[i+2, j]]) + # G at Blue location + elif (((i % 2) != 0) and ((j % 2) != 0)): + G[i, j] = 0.125 * np.sum([-1. * B[i-2, j], \ + 2. * G[i-1, j], \ + -1. * B[i, j-2], 2. * G[i, j-1], 4. * B[i,j], 2. * G[i, j+1], -1. * B[i, j+2], \ + 2. * G[i+1, j],\ + -1. * B[i+2, j]]) + if (timeshow): + elapsed_time = time.process_time() - t0 + print("Green: row index: " + str(i-1) + " of " + str(height) + \ + " | elapsed time: " + "{:.3f}".format(elapsed_time) + " seconds") + + # Red and Blue channel + for i in range(no_of_pixel_pad, height + no_of_pixel_pad): + + # to display progress + t0 = time.process_time() + + for j in range(no_of_pixel_pad, width + no_of_pixel_pad): + + # Green locations in Red rows + if (((i % 2) == 0) and ((j % 2) != 0)): + # R at Green locations in Red rows + R[i, j] = 0.125 * np.sum([.5 * G[i-2, j],\ + -1. * G[i-1, j-1], -1. * G[i-1, j+1], \ + -1. * G[i, j-2], 4. * R[i, j-1], 5. * G[i,j], 4. * R[i, j+1], -1. * G[i, j+2], \ + -1. * G[i+1, j-1], -1. * G[i+1, j+1], \ + .5 * G[i+2, j]]) + + # B at Green locations in Red rows + B[i, j] = 0.125 * np.sum([-1. * G[i-2, j], \ + -1. * G[i-1, j-1], 4. * B[i-1, j], -1. * G[i-1, j+1], \ + .5 * G[i, j-2], 5. * G[i,j], .5 * G[i, j+2], \ + -1. * G[i+1, j-1], 4. * B[i+1,j], -1. * G[i+1, j+1], \ + -1. * G[i+2, j]]) + + # Green locations in Blue rows + elif (((i % 2) != 0) and ((j % 2) == 0)): + + # R at Green locations in Blue rows + R[i, j] = 0.125 * np.sum([-1. * G[i-2, j], \ + -1. * G[i-1, j-1], 4. * R[i-1, j], -1. * G[i-1, j+1], \ + .5 * G[i, j-2], 5. * G[i,j], .5 * G[i, j+2], \ + -1. * G[i+1, j-1], 4. * R[i+1, j], -1. * G[i+1, j+1], \ + -1. * G[i+2, j]]) + + # B at Green locations in Blue rows + B[i, j] = 0.125 * np.sum([.5 * G[i-2, j], \ + -1. * G [i-1, j-1], -1. * G[i-1, j+1], \ + -1. * G[i, j-2], 4. * B[i, j-1], 5. * G[i,j], 4. * B[i, j+1], -1. * G[i, j+2], \ + -1. * G[i+1, j-1], -1. * G[i+1, j+1], \ + .5 * G[i+2, j]]) + + # R at Blue locations + elif (((i % 2) != 0) and ((j % 2) != 0)): + R[i, j] = 0.125 * np.sum([-1.5 * B[i-2, j], \ + 2. * R[i-1, j-1], 2. * R[i-1, j+1], \ + -1.5 * B[i, j-2], 6. * B[i,j], -1.5 * B[i, j+2], \ + 2. * R[i+1, j-1], 2. * R[i+1, j+1], \ + -1.5 * B[i+2, j]]) + + # B at Red locations + elif (((i % 2) == 0) and ((j % 2) == 0)): + B[i, j] = 0.125 * np.sum([-1.5 * R[i-2, j], \ + 2. * B[i-1, j-1], 2. * B[i-1, j+1], \ + -1.5 * R[i, j-2], 6. * R[i,j], -1.5 * R[i, j+2], \ + 2. * B[i+1, j-1], 2. * B[i+1, j+1], \ + -1.5 * R[i+2, j]]) + + if (timeshow): + elapsed_time = time.process_time() - t0 + print("Red/Blue: row index: " + str(i-1) + " of " + str(height) + \ + " | elapsed time: " + "{:.3f}".format(elapsed_time) + " seconds") + + + elif (bayer_pattern == "gbrg"): + + G[::2, ::2] = raw[::2, ::2] + G[1::2, 1::2] = raw[1::2, 1::2] + R[1::2, ::2] = raw[1::2, ::2] + B[::2, 1::2] = raw[::2, 1::2] + + # Green channel + for i in range(no_of_pixel_pad, height + no_of_pixel_pad): + + # to display progress + t0 = time.process_time() + + for j in range(no_of_pixel_pad, width + no_of_pixel_pad): + + # G at Red location + if (((i % 2) != 0) and ((j % 2) == 0)): + G[i, j] = 0.125 * np.sum([-1. * R[i-2, j], \ + 2. * G[i-1, j], \ + -1. * R[i, j-2], 2. * G[i, j-1], 4. * R[i,j], 2. * G[i, j+1], -1. * R[i, j+2],\ + 2. * G[i+1, j], \ + -1. * R[i+2, j]]) + # G at Blue location + elif (((i % 2) == 0) and ((j % 2) != 0)): + G[i, j] = 0.125 * np.sum([-1. * B[i-2, j], \ + 2. * G[i-1, j], \ + -1. * B[i, j-2], 2. * G[i, j-1], 4. * B[i,j], 2. * G[i, j+1], -1. * B[i, j+2], \ + 2. * G[i+1, j],\ + -1. * B[i+2, j]]) + if (timeshow): + elapsed_time = time.process_time() - t0 + print("Green: row index: " + str(i-1) + " of " + str(height) + \ + " | elapsed time: " + "{:.3f}".format(elapsed_time) + " seconds") + + # Red and Blue channel + for i in range(no_of_pixel_pad, height + no_of_pixel_pad): + + # to display progress + t0 = time.process_time() + + for j in range(no_of_pixel_pad, width + no_of_pixel_pad): + + # Green locations in Red rows + if (((i % 2) != 0) and ((j % 2) != 0)): + # R at Green locations in Red rows + R[i, j] = 0.125 * np.sum([.5 * G[i-2, j],\ + -1. * G[i-1, j-1], -1. * G[i-1, j+1], \ + -1. * G[i, j-2], 4. * R[i, j-1], 5. * G[i,j], 4. * R[i, j+1], -1. * G[i, j+2], \ + -1. * G[i+1, j-1], -1. * G[i+1, j+1], \ + .5 * G[i+2, j]]) + + # B at Green locations in Red rows + B[i, j] = 0.125 * np.sum([-1. * G[i-2, j], \ + -1. * G[i-1, j-1], 4. * B[i-1, j], -1. * G[i-1, j+1], \ + .5 * G[i, j-2], 5. * G[i,j], .5 * G[i, j+2], \ + -1. * G[i+1, j-1], 4. * B[i+1,j], -1. * G[i+1, j+1], \ + -1. * G[i+2, j]]) + + # Green locations in Blue rows + elif (((i % 2) == 0) and ((j % 2) == 0)): + + # R at Green locations in Blue rows + R[i, j] = 0.125 * np.sum([-1. * G[i-2, j], \ + -1. * G[i-1, j-1], 4. * R[i-1, j], -1. * G[i-1, j+1], \ + .5 * G[i, j-2], 5. * G[i,j], .5 * G[i, j+2], \ + -1. * G[i+1, j-1], 4. * R[i+1, j], -1. * G[i+1, j+1], \ + -1. * G[i+2, j]]) + + # B at Green locations in Blue rows + B[i, j] = 0.125 * np.sum([.5 * G[i-2, j], \ + -1. * G [i-1, j-1], -1. * G[i-1, j+1], \ + -1. * G[i, j-2], 4. * B[i, j-1], 5. * G[i,j], 4. * B[i, j+1], -1. * G[i, j+2], \ + -1. * G[i+1, j-1], -1. * G[i+1, j+1], \ + .5 * G[i+2, j]]) + + # R at Blue locations + elif (((i % 2) == 0) and ((j % 2) != 0)): + R[i, j] = 0.125 * np.sum([-1.5 * B[i-2, j], \ + 2. * R[i-1, j-1], 2. * R[i-1, j+1], \ + -1.5 * B[i, j-2], 6. * B[i,j], -1.5 * B[i, j+2], \ + 2. * R[i+1, j-1], 2. * R[i+1, j+1], \ + -1.5 * B[i+2, j]]) + + # B at Red locations + elif (((i % 2) != 0) and ((j % 2) == 0)): + B[i, j] = 0.125 * np.sum([-1.5 * R[i-2, j], \ + 2. * B[i-1, j-1], 2. * B[i-1, j+1], \ + -1.5 * R[i, j-2], 6. * R[i,j], -1.5 * R[i, j+2], \ + 2. * B[i+1, j-1], 2. * B[i+1, j+1], \ + -1.5 * R[i+2, j]]) + + if (timeshow): + elapsed_time = time.process_time() - t0 + print("Red/Blue: row index: " + str(i-1) + " of " + str(height) + \ + " | elapsed time: " + "{:.3f}".format(elapsed_time) + " seconds") + + elif (bayer_pattern == "grbg"): + + G[::2, ::2] = raw[::2, ::2] + G[1::2, 1::2] = raw[1::2, 1::2] + R[::2, 1::2] = raw[::2, 1::2] + B[1::2, ::2] = raw[1::2, ::2] + + # Green channel + for i in range(no_of_pixel_pad, height + no_of_pixel_pad): + + # to display progress + t0 = time.process_time() + + for j in range(no_of_pixel_pad, width + no_of_pixel_pad): + + # G at Red location + if (((i % 2) == 0) and ((j % 2) != 0)): + G[i, j] = 0.125 * np.sum([-1. * R[i-2, j], \ + 2. * G[i-1, j], \ + -1. * R[i, j-2], 2. * G[i, j-1], 4. * R[i,j], 2. * G[i, j+1], -1. * R[i, j+2],\ + 2. * G[i+1, j], \ + -1. * R[i+2, j]]) + # G at Blue location + elif (((i % 2) != 0) and ((j % 2) == 0)): + G[i, j] = 0.125 * np.sum([-1. * B[i-2, j], \ + 2. * G[i-1, j], \ + -1. * B[i, j-2], 2. * G[i, j-1], 4. * B[i,j], 2. * G[i, j+1], -1. * B[i, j+2], \ + 2. * G[i+1, j],\ + -1. * B[i+2, j]]) + if (timeshow): + elapsed_time = time.process_time() - t0 + print("Green: row index: " + str(i-1) + " of " + str(height) + \ + " | elapsed time: " + "{:.3f}".format(elapsed_time) + " seconds") + + # Red and Blue channel + for i in range(no_of_pixel_pad, height + no_of_pixel_pad): + + # to display progress + t0 = time.process_time() + + for j in range(no_of_pixel_pad, width + no_of_pixel_pad): + + # Green locations in Red rows + if (((i % 2) == 0) and ((j % 2) == 0)): + # R at Green locations in Red rows + R[i, j] = 0.125 * np.sum([.5 * G[i-2, j],\ + -1. * G[i-1, j-1], -1. * G[i-1, j+1], \ + -1. * G[i, j-2], 4. * R[i, j-1], 5. * G[i,j], 4. * R[i, j+1], -1. * G[i, j+2], \ + -1. * G[i+1, j-1], -1. * G[i+1, j+1], \ + .5 * G[i+2, j]]) + + # B at Green locations in Red rows + B[i, j] = 0.125 * np.sum([-1. * G[i-2, j], \ + -1. * G[i-1, j-1], 4. * B[i-1, j], -1. * G[i-1, j+1], \ + .5 * G[i, j-2], 5. * G[i,j], .5 * G[i, j+2], \ + -1. * G[i+1, j-1], 4. * B[i+1,j], -1. * G[i+1, j+1], \ + -1. * G[i+2, j]]) + + # Green locations in Blue rows + elif (((i % 2) != 0) and ((j % 2) != 0)): + + # R at Green locations in Blue rows + R[i, j] = 0.125 * np.sum([-1. * G[i-2, j], \ + -1. * G[i-1, j-1], 4. * R[i-1, j], -1. * G[i-1, j+1], \ + .5 * G[i, j-2], 5. * G[i,j], .5 * G[i, j+2], \ + -1. * G[i+1, j-1], 4. * R[i+1, j], -1. * G[i+1, j+1], \ + -1. * G[i+2, j]]) + + # B at Green locations in Blue rows + B[i, j] = 0.125 * np.sum([.5 * G[i-2, j], \ + -1. * G [i-1, j-1], -1. * G[i-1, j+1], \ + -1. * G[i, j-2], 4. * B[i, j-1], 5. * G[i,j], 4. * B[i, j+1], -1. * G[i, j+2], \ + -1. * G[i+1, j-1], -1. * G[i+1, j+1], \ + .5 * G[i+2, j]]) + + # R at Blue locations + elif (((i % 2) != 0) and ((j % 2) == 0)): + R[i, j] = 0.125 * np.sum([-1.5 * B[i-2, j], \ + 2. * R[i-1, j-1], 2. * R[i-1, j+1], \ + -1.5 * B[i, j-2], 6. * B[i,j], -1.5 * B[i, j+2], \ + 2. * R[i+1, j-1], 2. * R[i+1, j+1], \ + -1.5 * B[i+2, j]]) + + # B at Red locations + elif (((i % 2) == 0) and ((j % 2) != 0)): + B[i, j] = 0.125 * np.sum([-1.5 * R[i-2, j], \ + 2. * B[i-1, j-1], 2. * B[i-1, j+1], \ + -1.5 * R[i, j-2], 6. * R[i,j], -1.5 * R[i, j+2], \ + 2. * B[i+1, j-1], 2. * B[i+1, j+1], \ + -1.5 * R[i+2, j]]) + + if (timeshow): + elapsed_time = time.process_time() - t0 + print("Red/Blue: row index: " + str(i-1) + " of " + str(height) + \ + " | elapsed time: " + "{:.3f}".format(elapsed_time) + " seconds") + + elif (bayer_pattern == "bggr"): + + G[::2, 1::2] = raw[::2, 1::2] + G[1::2, ::2] = raw[1::2, ::2] + R[1::2, 1::2] = raw[1::2, 1::2] + B[::2, ::2] = raw[::2, ::2] + + # Green channel + for i in range(no_of_pixel_pad, height + no_of_pixel_pad): + + # to display progress + t0 = time.process_time() + + for j in range(no_of_pixel_pad, width + no_of_pixel_pad): + + # G at Red location + if (((i % 2) != 0) and ((j % 2) != 0)): + G[i, j] = 0.125 * np.sum([-1. * R[i-2, j], \ + 2. * G[i-1, j], \ + -1. * R[i, j-2], 2. * G[i, j-1], 4. * R[i,j], 2. * G[i, j+1], -1. * R[i, j+2],\ + 2. * G[i+1, j], \ + -1. * R[i+2, j]]) + # G at Blue location + elif (((i % 2) == 0) and ((j % 2) == 0)): + G[i, j] = 0.125 * np.sum([-1. * B[i-2, j], \ + 2. * G[i-1, j], \ + -1. * B[i, j-2], 2. * G[i, j-1], 4. * B[i,j], 2. * G[i, j+1], -1. * B[i, j+2], \ + 2. * G[i+1, j],\ + -1. * B[i+2, j]]) + if (timeshow): + elapsed_time = time.process_time() - t0 + print("Green: row index: " + str(i-1) + " of " + str(height) + \ + " | elapsed time: " + "{:.3f}".format(elapsed_time) + " seconds") + + # Red and Blue channel + for i in range(no_of_pixel_pad, height + no_of_pixel_pad): + + # to display progress + t0 = time.process_time() + + for j in range(no_of_pixel_pad, width + no_of_pixel_pad): + + # Green locations in Red rows + if (((i % 2) != 0) and ((j % 2) == 0)): + # R at Green locations in Red rows + R[i, j] = 0.125 * np.sum([.5 * G[i-2, j],\ + -1. * G[i-1, j-1], -1. * G[i-1, j+1], \ + -1. * G[i, j-2], 4. * R[i, j-1], 5. * G[i,j], 4. * R[i, j+1], -1. * G[i, j+2], \ + -1. * G[i+1, j-1], -1. * G[i+1, j+1], \ + .5 * G[i+2, j]]) + + # B at Green locations in Red rows + B[i, j] = 0.125 * np.sum([-1. * G[i-2, j], \ + -1. * G[i-1, j-1], 4. * B[i-1, j], -1. * G[i-1, j+1], \ + .5 * G[i, j-2], 5. * G[i,j], .5 * G[i, j+2], \ + -1. * G[i+1, j-1], 4. * B[i+1,j], -1. * G[i+1, j+1], \ + -1. * G[i+2, j]]) + + # Green locations in Blue rows + elif (((i % 2) == 0) and ((j % 2) != 0)): + + # R at Green locations in Blue rows + R[i, j] = 0.125 * np.sum([-1. * G[i-2, j], \ + -1. * G[i-1, j-1], 4. * R[i-1, j], -1. * G[i-1, j+1], \ + .5 * G[i, j-2], 5. * G[i,j], .5 * G[i, j+2], \ + -1. * G[i+1, j-1], 4. * R[i+1, j], -1. * G[i+1, j+1], \ + -1. * G[i+2, j]]) + + # B at Green locations in Blue rows + B[i, j] = 0.125 * np.sum([.5 * G[i-2, j], \ + -1. * G [i-1, j-1], -1. * G[i-1, j+1], \ + -1. * G[i, j-2], 4. * B[i, j-1], 5. * G[i,j], 4. * B[i, j+1], -1. * G[i, j+2], \ + -1. * G[i+1, j-1], -1. * G[i+1, j+1], \ + .5 * G[i+2, j]]) + + # R at Blue locations + elif (((i % 2) == 0) and ((j % 2) == 0)): + R[i, j] = 0.125 * np.sum([-1.5 * B[i-2, j], \ + 2. * R[i-1, j-1], 2. * R[i-1, j+1], \ + -1.5 * B[i, j-2], 6. * B[i,j], -1.5 * B[i, j+2], \ + 2. * R[i+1, j-1], 2. * R[i+1, j+1], \ + -1.5 * B[i+2, j]]) + + # B at Red locations + elif (((i % 2) != 0) and ((j % 2) != 0)): + B[i, j] = 0.125 * np.sum([-1.5 * R[i-2, j], \ + 2. * B[i-1, j-1], 2. * B[i-1, j+1], \ + -1.5 * R[i, j-2], 6. * R[i,j], -1.5 * R[i, j+2], \ + 2. * B[i+1, j-1], 2. * B[i+1, j+1], \ + -1.5 * R[i+2, j]]) + + if (timeshow): + elapsed_time = time.process_time() - t0 + print("Red/Blue: row index: " + str(i-1) + " of " + str(height) + \ + " | elapsed time: " + "{:.3f}".format(elapsed_time) + " seconds") + + else: + print("Invalid bayer pattern. Valid pattern can be rggb, gbrg, grbg, bggr") + return demosaic_out # This will be all zeros + + # Fill up the RGB output with interpolated values + demosaic_out[0:height, 0:width, 0] = R[no_of_pixel_pad : height + no_of_pixel_pad, \ + no_of_pixel_pad : width + no_of_pixel_pad] + demosaic_out[0:height, 0:width, 1] = G[no_of_pixel_pad : height + no_of_pixel_pad, \ + no_of_pixel_pad : width + no_of_pixel_pad] + demosaic_out[0:height, 0:width, 2] = B[no_of_pixel_pad : height + no_of_pixel_pad, \ + no_of_pixel_pad : width + no_of_pixel_pad] + + demosaic_out = np.clip(demosaic_out, clip_range[0], clip_range[1]) + return demosaic_out + + +def fill_channel_directional_weight(data, bayer_pattern): + + #== Calculate the directional weights (weight_N, weight_E, weight_S, weight_W. + # where N, E, S, W stand for north, east, south, and west.) + data = np.asarray(data) + v = np.asarray(signal.convolve2d(data, [[1],[0],[-1]], mode="same", boundary="symm")) + h = np.asarray(signal.convolve2d(data, [[1, 0, -1]], mode="same", boundary="symm")) + + weight_N = np.zeros(np.shape(data), dtype=np.float32) + weight_E = np.zeros(np.shape(data), dtype=np.float32) + weight_S = np.zeros(np.shape(data), dtype=np.float32) + weight_W = np.zeros(np.shape(data), dtype=np.float32) + + value_N = np.zeros(np.shape(data), dtype=np.float32) + value_E = np.zeros(np.shape(data), dtype=np.float32) + value_S = np.zeros(np.shape(data), dtype=np.float32) + value_W = np.zeros(np.shape(data), dtype=np.float32) + + if ((bayer_pattern == "rggb") or (bayer_pattern == "bggr")): + + + # note that in the following the locations in the comments are given + # assuming the bayer_pattern rggb + + #== CALCULATE WEIGHTS IN B LOCATIONS + weight_N[1::2, 1::2] = np.abs(v[1::2, 1::2]) + np.abs(v[::2, 1::2]) + + # repeating the column before the last to the right so that sampling + # does not cause any dimension mismatch + temp_h_b = np.hstack((h, np.atleast_2d(h[:, -2]).T)) + weight_E[1::2, 1::2] = np.abs(h[1::2, 1::2]) + np.abs(temp_h_b[1::2, 2::2]) + + # repeating the row before the last row to the bottom so that sampling + # does not cause any dimension mismatch + temp_v_b = np.vstack((v, v[-1])) + weight_S[1::2, 1::2] = np.abs(v[1::2, 1::2]) + np.abs(temp_v_b[2::2, 1::2]) + weight_W[1::2, 1::2] = np.abs(h[1::2, 1::2]) + np.abs(h[1::2, ::2]) + + #== CALCULATE WEIGHTS IN R LOCATIONS + # repeating the second row at the top of matrix so that sampling does + # not cause any dimension mismatch, also remove the bottom row + temp_v_r = np.delete(np.vstack((v[1], v)), -1, 0) + weight_N[::2, ::2] = np.abs(v[::2, ::2]) + np.abs(temp_v_r[::2, ::2]) + + weight_E[::2, ::2] = np.abs(h[::2, ::2]) + np.abs(h[::2, 1::2]) + + weight_S[::2, ::2] = np.abs(v[::2, ::2]) + np.abs(v[1::2, ::2]) + + # repeating the second column at the left of matrix so that sampling + # does not cause any dimension mismatch, also remove the rightmost + # column + temp_h_r = np.delete(np.hstack((np.atleast_2d(h[:, 1]).T, h)), -1, 1) + weight_W[::2, ::2] = np.abs(h[::2, ::2]) + np.abs(temp_h_r[::2, ::2]) + + weight_N = np.divide(1., 1. + weight_N) + weight_E = np.divide(1., 1. + weight_E) + weight_S = np.divide(1., 1. + weight_S) + weight_W = np.divide(1., 1. + weight_W) + + #== CALCULATE DIRECTIONAL ESTIMATES IN B LOCATIONS + value_N[1::2, 1::2] = data[::2, 1::2] + v[::2, 1::2] / 2. + + # repeating the column before the last to the right so that sampling + # does not cause any dimension mismatch + temp = np.hstack((data, np.atleast_2d(data[:, -2]).T)) + value_E[1::2, 1::2] = temp[1::2, 2::2] - temp_h_b[1::2, 2::2] / 2. + + # repeating the row before the last row to the bottom so that sampling + # does not cause any dimension mismatch + temp = np.vstack((data, data[-1])) + value_S[1::2, 1::2] = temp[2::2, 1::2] - temp_v_b[2::2, 1::2] / 2. + + value_W[1::2, 1::2] = data[1::2, ::2] + h[1::2, ::2] / 2. + + #== CALCULATE DIRECTIONAL ESTIMATES IN R LOCATIONS + # repeating the second row at the top of matrix so that sampling does + # not cause any dimension mismatch, also remove the bottom row + temp = np.delete(np.vstack((data[1], data)), -1, 0) + value_N[::2, ::2] = temp[::2, ::2] + temp_v_r[::2, ::2] / 2. + + value_E[::2, ::2] = data[::2, 1::2] - h[::2, 1::2] / 2. + + value_S[::2, ::2] = data[1::2, ::2] - v[1::2, ::2] / 2. + + # repeating the second column at the left of matrix so that sampling + # does not cause any dimension mismatch, also remove the rightmost + # column + temp = np.delete(np.hstack((np.atleast_2d(data[:, 1]).T, data)), -1, 1) + value_W[::2, ::2] = temp[::2, ::2] + temp_h_r[::2, ::2] / 2. + + output = np.zeros(np.shape(data), dtype=np.float32) + output = np.divide((np.multiply(value_N, weight_N) + \ + np.multiply(value_E, weight_E) + \ + np.multiply(value_S, weight_S) + \ + np.multiply(value_W, weight_W)),\ + (weight_N + weight_E + weight_S + weight_W)) + + output[::2, 1::2] = data[::2, 1::2] + output[1::2, ::2] = data[1::2, ::2] + + return output + + elif ((bayer_pattern == "gbrg") or (bayer_pattern == "grbg")): + + # note that in the following the locations in the comments are given + # assuming the bayer_pattern gbrg + + #== CALCULATE WEIGHTS IN B LOCATIONS + # repeating the second row at the top of matrix so that sampling does + # not cause any dimension mismatch, also remove the bottom row + temp_v_b = np.delete(np.vstack((v[1], v)), -1, 0) + weight_N[::2, 1::2] = np.abs(v[::2, 1::2]) + np.abs(temp_v_b[::2, 1::2]) + + # repeating the column before the last to the right so that sampling + # does not cause any dimension mismatch + temp_h_b = np.hstack((h, np.atleast_2d(h[:, -2]).T)) + weight_E[::2, 1::2] = np.abs(h[::2, 1::2]) + np.abs(temp_h_b[::2, 2::2]) + + # repeating the row before the last row to the bottom so that sampling + # does not cause any dimension mismatch + weight_S[::2, 1::2] = np.abs(v[::2, 1::2]) + np.abs(v[1::2, 1::2]) + weight_W[::2, 1::2] = np.abs(h[::2, 1::2]) + np.abs(h[::2, ::2]) + + #== CALCULATE WEIGHTS IN R LOCATIONS + weight_N[1::2, ::2] = np.abs(v[1::2, ::2]) + np.abs(v[::2, ::2]) + weight_E[1::2, ::2] = np.abs(h[1::2, ::2]) + np.abs(h[1::2, 1::2]) + + # repeating the row before the last row to the bottom so that sampling + # does not cause any dimension mismatch + temp_v_r = np.vstack((v, v[-1])) + weight_S[1::2, ::2] = np.abs(v[1::2, ::2]) + np.abs(temp_v_r[2::2, ::2]) + + # repeating the second column at the left of matrix so that sampling + # does not cause any dimension mismatch, also remove the rightmost + # column + temp_h_r = np.delete(np.hstack((np.atleast_2d(h[:, 1]).T, h)), -1, 1) + weight_W[1::2, ::2] = np.abs(h[1::2, ::2]) + np.abs(temp_h_r[1::2, ::2]) + + weight_N = np.divide(1., 1. + weight_N) + weight_E = np.divide(1., 1. + weight_E) + weight_S = np.divide(1., 1. + weight_S) + weight_W = np.divide(1., 1. + weight_W) + + #== CALCULATE DIRECTIONAL ESTIMATES IN B LOCATIONS + # repeating the second row at the top of matrix so that sampling does + # not cause any dimension mismatch, also remove the bottom row + temp = np.delete(np.vstack((data[1], data)), -1, 0) + value_N[::2, 1::2] = temp[::2, 1::2] + temp_v_b[::2, 1::2] / 2. + + # repeating the column before the last to the right so that sampling + # does not cause any dimension mismatch + temp = np.hstack((data, np.atleast_2d(data[:, -2]).T)) + value_E[::2, 1::2] = temp[::2, 2::2] - temp_h_b[::2, 2::2] / 2. + + # repeating the row before the last row to the bottom so that sampling + # does not cause any dimension mismatch + value_S[::2, 1::2] = data[1::2, 1::2] - v[1::2, 1::2] / 2. + + value_W[::2, 1::2] = data[::2, ::2] + h[::2, ::2] / 2. + + #== CALCULATE DIRECTIONAL ESTIMATES IN R LOCATIONS + # repeating the second row at the top of matrix so that sampling does + # not cause any dimension mismatch, also remove the bottom row + value_N[1::2, ::2] = data[::2, ::2] + v[::2, ::2] / 2. + value_E[1::2, ::2] = data[1::2, 1::2] - h[1::2, 1::2] / 2. + + # repeating the row before the last row to the bottom so that sampling + # does not cause any dimension mismatch + temp = np.vstack((data, data[-1])) + value_S[1::2, ::2] = temp[2::2, ::2] - temp_v_r[2::2, ::2] / 2. + + # repeating the second column at the left of matrix so that sampling + # does not cause any dimension mismatch, also remove the rightmost + # column + temp = np.delete(np.hstack((np.atleast_2d(data[:, 1]).T, data)), -1, 1) + value_W[1::2, ::2] = temp[1::2, ::2] + temp_h_r[1::2, ::2] / 2. + + output = np.zeros(np.shape(data), dtype=np.float32) + output = np.divide((np.multiply(value_N, weight_N) + \ + np.multiply(value_E, weight_E) + \ + np.multiply(value_S, weight_S) + \ + np.multiply(value_W, weight_W)),\ + (weight_N + weight_E + weight_S + weight_W)) + + output[::2, ::2] = data[::2, ::2] + output[1::2, 1::2] = data[1::2, 1::2] + + return output + + +def fill_br_locations(data, G, bayer_pattern): + + # Fill up the B/R values interpolated at R/B locations + B = np.zeros(np.shape(data), dtype=np.float32) + R = np.zeros(np.shape(data), dtype=np.float32) + + data = np.asarray(data) + G = np.asarray(G) + d1 = np.asarray(signal.convolve2d(data, [[-1, 0, 0],[0, 0, 0], [0, 0, 1]], mode="same", boundary="symm")) + d2 = np.asarray(signal.convolve2d(data, [[0, 0, 1], [0, 0, 0], [-1, 0, 0]], mode="same", boundary="symm")) + + df_NE = np.asarray(signal.convolve2d(G, [[0, 0, 0], [0, 1, 0], [-1, 0, 0]], mode="same", boundary="symm")) + df_SE = np.asarray(signal.convolve2d(G, [[-1, 0, 0], [0, 1, 0], [0, 0, 0]], mode="same", boundary="symm")) + df_SW = np.asarray(signal.convolve2d(G, [[0, 0, -1], [0, 1, 0], [0, 0, 0]], mode="same", boundary="symm")) + df_NW = np.asarray(signal.convolve2d(G, [[0, 0, 0], [0, 1, 0], [0, 0, -1]], mode="same", boundary="symm")) + + weight_NE = np.zeros(np.shape(data), dtype=np.float32) + weight_SE = np.zeros(np.shape(data), dtype=np.float32) + weight_SW = np.zeros(np.shape(data), dtype=np.float32) + weight_NW = np.zeros(np.shape(data), dtype=np.float32) + + value_NE = np.zeros(np.shape(data), dtype=np.float32) + value_SE = np.zeros(np.shape(data), dtype=np.float32) + value_SW = np.zeros(np.shape(data), dtype=np.float32) + value_NW = np.zeros(np.shape(data), dtype=np.float32) + + if ((bayer_pattern == "rggb") or (bayer_pattern == "bggr")): + + #== weights for B in R locations + weight_NE[::2, ::2] = np.abs(d2[::2, ::2]) + np.abs(df_NE[::2, ::2]) + weight_SE[::2, ::2] = np.abs(d1[::2, ::2]) + np.abs(df_SE[::2, ::2]) + weight_SW[::2, ::2] = np.abs(d2[::2, ::2]) + np.abs(df_SW[::2, ::2]) + weight_NW[::2, ::2] = np.abs(d1[::2, ::2]) + np.abs(df_NW[::2, ::2]) + + #== weights for R in B locations + weight_NE[1::2, 1::2] = np.abs(d2[1::2, 1::2]) + np.abs(df_NE[1::2, 1::2]) + weight_SE[1::2, 1::2] = np.abs(d1[1::2, 1::2]) + np.abs(df_SE[1::2, 1::2]) + weight_SW[1::2, 1::2] = np.abs(d2[1::2, 1::2]) + np.abs(df_SW[1::2, 1::2]) + weight_NW[1::2, 1::2] = np.abs(d1[1::2, 1::2]) + np.abs(df_NW[1::2, 1::2]) + + weight_NE = np.divide(1., 1. + weight_NE) + weight_SE = np.divide(1., 1. + weight_SE) + weight_SW = np.divide(1., 1. + weight_SW) + weight_NW = np.divide(1., 1. + weight_NW) + + #== directional estimates of B in R locations + # repeating the second row at the top of matrix so that sampling does + # not cause any dimension mismatch, also remove the bottom row + temp = np.delete(np.vstack((data[1], data)), -1, 0) + value_NE[::2, ::2] = temp[::2, 1::2] + df_NE[::2, ::2] / 2. + value_SE[::2, ::2] = data[1::2, 1::2] + df_SE[::2, ::2] / 2. + # repeating the second column at the left of matrix so that sampling + # does not cause any dimension mismatch, also remove the rightmost + # column + temp = np.delete(np.hstack((np.atleast_2d(data[:, 1]).T, data)), -1, 1) + value_SW[::2, ::2] = temp[1::2, ::2] + df_SW[::2, ::2] / 2. + + # repeating the second row at the top of matrix so that sampling does + # not cause any dimension mismatch, also remove the bottom row + temp = np.delete(np.vstack((data[1], data)), -1, 0) + # repeating the second column at the left of matrix so that sampling + # does not cause any dimension mismatch, also remove the rightmost + # column + temp = np.delete(np.hstack((np.atleast_2d(temp[:, 1]).T, temp)), -1, 1) + value_NW[::2, ::2] = temp[::2, ::2] + df_NW[::2, ::2] + + #== directional estimates of R in B locations + # repeating the column before the last to the right so that sampling + # does not cause any dimension mismatch + temp = np.hstack((data, np.atleast_2d(data[:, -2]).T)) + value_NE[1::2, 1::2] = temp[::2, 2::2] + df_NE[1::2, 1::2] / 2. + # repeating the column before the last to the right so that sampling + # does not cause any dimension mismatch + temp = np.hstack((data, np.atleast_2d(data[:, -2]).T)) + # repeating the row before the last row to the bottom so that sampling + # does not cause any dimension mismatch + temp = np.vstack((temp, temp[-1])) + value_SE[1::2, 1::2] = temp[2::2, 2::2] + df_SE[1::2, 1::2] / 2. + # repeating the row before the last row to the bottom so that sampling + # does not cause any dimension mismatch + temp = np.vstack((data, data[-1])) + value_SW[1::2, 1::2] = temp[2::2, ::2] + df_SW[1::2, 1::2] / 2. + value_NW[1::2, 1::2] = data[::2, ::2] + df_NW[1::2, 1::2] / 2. + + RB = np.divide(np.multiply(weight_NE, value_NE) + \ + np.multiply(weight_SE, value_SE) + \ + np.multiply(weight_SW, value_SW) + \ + np.multiply(weight_NW, value_NW),\ + (weight_NE + weight_SE + weight_SW + weight_NW)) + + if (bayer_pattern == "rggb"): + + R[1::2, 1::2] = RB[1::2, 1::2] + R[::2, ::2] = data[::2, ::2] + B[::2, ::2] = RB[::2, ::2] + B[1::2, 1::2] = data[1::2, 1::2] + + elif (bayer_pattern == "bggr"): + R[::2, ::2] = RB[::2, ::2] + R[1::2, 1::2] = data[1::2, 1::2] + B[1::2, 1::2] = RB[1::2, 1::2] + B[::2, ::2] = data[::2, ::2] + + + R[1::2, ::2] = G[1::2, ::2] + R[::2, 1::2] = G[::2, 1::2] + R = fill_channel_directional_weight(R, "gbrg") + + B[1::2, ::2] = G[1::2, ::2] + B[::2, 1::2] = G[::2, 1::2] + B = fill_channel_directional_weight(B, "gbrg") + + + elif ((bayer_pattern == "grbg") or (bayer_pattern == "gbrg")): + #== weights for B in R locations + weight_NE[::2, 1::2] = np.abs(d2[::2, 1::2]) + np.abs(df_NE[::2, 1::2]) + weight_SE[::2, 1::2] = np.abs(d1[::2, 1::2]) + np.abs(df_SE[::2, 1::2]) + weight_SW[::2, 1::2] = np.abs(d2[::2, 1::2]) + np.abs(df_SW[::2, 1::2]) + weight_NW[::2, 1::2] = np.abs(d1[::2, 1::2]) + np.abs(df_NW[::2, 1::2]) + + #== weights for R in B locations + weight_NE[1::2, ::2] = np.abs(d2[1::2, ::2]) + np.abs(df_NE[1::2, ::2]) + weight_SE[1::2, ::2] = np.abs(d1[1::2, ::2]) + np.abs(df_SE[1::2, ::2]) + weight_SW[1::2, ::2] = np.abs(d2[1::2, ::2]) + np.abs(df_SW[1::2, ::2]) + weight_NW[1::2, ::2] = np.abs(d1[1::2, ::2]) + np.abs(df_NW[1::2, ::2]) + + weight_NE = np.divide(1., 1. + weight_NE) + weight_SE = np.divide(1., 1. + weight_SE) + weight_SW = np.divide(1., 1. + weight_SW) + weight_NW = np.divide(1., 1. + weight_NW) + + #== directional estimates of B in R locations + # repeating the second row at the top of matrix so that sampling does + # not cause any dimension mismatch, also remove the bottom row + temp = np.delete(np.vstack((data[1], data)), -1, 0) + # repeating the column before the last to the right so that sampling + # does not cause any dimension mismatch + temp = np.hstack((temp, np.atleast_2d(temp[:, -2]).T)) + value_NE[::2, 1::2] = temp[::2, 2::2] + df_NE[::2, 1::2] / 2. + # repeating the column before the last to the right so that sampling + # does not cause any dimension mismatch + temp = np.hstack((data, np.atleast_2d(data[:, -2]).T)) + value_SE[::2, 1::2] = temp[1::2, 2::2] + df_SE[::2, 1::2] / 2. + value_SW[::2, 1::2] = data[1::2, ::2] + df_SW[::2, 1::2] / 2. + + # repeating the second row at the top of matrix so that sampling does + # not cause any dimension mismatch, also remove the bottom row + temp = np.delete(np.vstack((data[1], data)), -1, 0) + value_NW[::2, 1::2] = temp[::2, ::2] + df_NW[::2, 1::2] + + #== directional estimates of R in B locations + value_NE[1::2, ::2] = data[::2, 1::2] + df_NE[1::2, ::2] / 2. + # repeating the column before the last to the right so that sampling + # does not cause any dimension mismatch + temp = np.hstack((data, np.atleast_2d(data[:, -2]).T)) + # repeating the row before the last row to the bottom so that sampling + # does not cause any dimension mismatch + temp = np.vstack((temp, temp[-1])) + value_SE[1::2, ::2] = temp[2::2, 1::2] + df_SE[1::2, ::2] / 2. + # repeating the row before the last row to the bottom so that sampling + # does not cause any dimension mismatch + temp = np.vstack((data, data[-1])) + # repeating the second column at the left of matrix so that sampling + # does not cause any dimension mismatch, also remove the rightmost + # column + temp = np.delete(np.hstack((np.atleast_2d(temp[:, 1]).T, temp)), -1, 1) + value_SW[1::2, ::2] = temp[2::2, ::2] + df_SW[1::2, ::2] / 2. + # repeating the second column at the left of matrix so that sampling + # does not cause any dimension mismatch, also remove the rightmost + # column + temp = np.delete(np.hstack((np.atleast_2d(data[:, 1]).T, data)), -1, 1) + value_NW[1::2, ::2] = temp[::2, ::2] + df_NW[1::2, ::2] / 2. + + RB = np.divide(np.multiply(weight_NE, value_NE) + \ + np.multiply(weight_SE, value_SE) + \ + np.multiply(weight_SW, value_SW) + \ + np.multiply(weight_NW, value_NW),\ + (weight_NE + weight_SE + weight_SW + weight_NW)) + + if (bayer_pattern == "grbg"): + + R[1::2, ::2] = RB[1::2, ::2] + R[::2, 1::2] = data[::2, 1::2] + B[::2, 1::2] = RB[::2, 1::2] + B[1::2, ::2] = data[1::2, ::2] + + elif (bayer_pattern == "gbrg"): + R[::2, 1::2] = RB[::2, 1::2] + R[1::2, ::2] = data[1::2, ::2] + B[1::2, ::2] = RB[1::2, ::2] + B[::2, 1::2] = data[::2, 1::2] + + + R[::2, ::2] = G[::2, ::2] + R[1::2, 1::2] = G[1::2, 1::2] + R = fill_channel_directional_weight(R, "rggb") + + B[1::2, 1::2] = G[1::2, 1::2] + B[::2, ::2] = G[::2, ::2] + B = fill_channel_directional_weight(B, "rggb") + + + return B, R + +# # ============================================================= +# # function: dbayer_mhc_fast +# # demosaicing using Malvar-He-Cutler algorithm +# # http://www.ipol.im/pub/art/2011/g_mhcd/ +# # ============================================================= +# def debayer_mhc_fast(raw, bayer_pattern="rggb", clip_range=[0, 65535], timeshow=False): +# +# # convert to float32 in case it was not +# raw = np.float32(raw) +# +# # dimensions +# width, height = utility.helpers(raw).get_width_height() +# +# # allocate space for the R, G, B planes +# R = np.empty((height, width), dtype = np.float32) +# G = np.empty((height, width), dtype = np.float32) +# B = np.empty((height, width), dtype = np.float32) +# +# # create a RGB output +# demosaic_out = np.empty( (height, width, 3), dtype = np.float32 ) +# +# # define the convolution kernels +# kernel_g_at_rb = [[0., 0., -1., 0., 0.],\ +# [0., 0., 2., 0., 0.],\ +# [-1., 2., 4., 2., -1.],\ +# [0., 0., 2., 0., 0.],\ +# [0., 0., -1., 0., 0.]] * .125 +# +# kernel_r_at_gr = [[0., 0., .5, 0., 0.],\ +# [0., -1., 0., -1., 0.],\ +# [-1., 4., 5., 4., -1.],\ +# [0., -1., 0., -1., 0.],\ +# [0., 0., .5, 0., 0.]] * .125 +# +# kernel_b_at_gr = [[0., 0., -1., 0., 0.],\ +# [0., -1., 4., -1., 0.],\ +# [.5., 0., 5., 0., .5],\ +# [0., -1., 4., -1., 0],\ +# [0., 0., -1., 0., 0.]] * .125 +# +# kernel_r_at_gb = [[0., 0., -1., 0., 0.],\ +# [0., -1., 4., -1., 0.],\ +# [.5, 0., 5., 0., .5],\ +# [0., -1., 4., -1., 0.],\ +# [0., 0., -1., 0., 0.]] * .125 +# +# kernel_b_at_gb = [[0., 0., .5, 0., 0.],\ +# [0., -1., 0., -1., 0.],\ +# [-1., 4., 5., 4., -1.],\ +# [0., -1., 0., -1., 0.],\ +# [0., 0., .5, 0., 0.]] * .125 +# +# kernel_r_at_b = [[0., 0., -1.5, 0., 0.],\ +# [0., 2., 0., 2., 0.],\ +# [-1.5, 0., 6., 0., -1.5],\ +# [0., 2., 0., 2., 0.],\ +# [0., 0., -1.5, 0., 0.]] * .125 +# +# kernel_b_at_r = [[0., 0., -1.5, 0., 0.],\ +# [0., 2., 0., 2., 0.],\ +# [-1.5, 0., 6., 0., -1.5],\ +# [0., 2., 0., 2., 0.],\ +# [0., 0., -1.5, 0., 0.]] * .125 +# +# +# +# # fill up the directly available values according to the Bayer pattern +# if (bayer_pattern == "rggb"): +# +# G[::2, 1::2] = raw[::2, 1::2] +# G[1::2, ::2] = raw[1::2, ::2] +# R[::2, ::2] = raw[::2, ::2] +# B[1::2, 1::2] = raw[1::2, 1::2] +# +# # Green channel +# for i in range(no_of_pixel_pad, height + no_of_pixel_pad): +# +# # to display progress +# t0 = time.process_time() +# +# for j in range(no_of_pixel_pad, width + no_of_pixel_pad): +# +# # G at Red location +# if (((i % 2) == 0) and ((j % 2) == 0)): +# G[i, j] = 0.125 * np.sum([-1. * R[i-2, j], \ +# 2. * G[i-1, j], \ +# -1. * R[i, j-2], 2. * G[i, j-1], 4. * R[i,j], 2. * G[i, j+1], -1. * R[i, j+2],\ +# 2. * G[i+1, j], \ +# -1. * R[i+2, j]]) +# # G at Blue location +# elif (((i % 2) != 0) and ((j % 2) != 0)): +# G[i, j] = 0.125 * np.sum([-1. * B[i-2, j], \ +# 2. * G[i-1, j], \ +# -1. * B[i, j-2], 2. * G[i, j-1], 4. * B[i,j], 2. * G[i, j+1], -1. * B[i, j+2], \ +# 2. * G[i+1, j],\ +# -1. * B[i+2, j]]) +# if (timeshow): +# elapsed_time = time.process_time() - t0 +# print("Green: row index: " + str(i-1) + " of " + str(height) + \ +# " | elapsed time: " + "{:.3f}".format(elapsed_time) + " seconds") +# +# # Red and Blue channel +# for i in range(no_of_pixel_pad, height + no_of_pixel_pad): +# +# # to display progress +# t0 = time.process_time() +# +# for j in range(no_of_pixel_pad, width + no_of_pixel_pad): +# +# # Green locations in Red rows +# if (((i % 2) == 0) and ((j % 2) != 0)): +# # R at Green locations in Red rows +# R[i, j] = 0.125 * np.sum([.5 * G[i-2, j],\ +# -1. * G[i-1, j-1], -1. * G[i-1, j+1], \ +# -1. * G[i, j-2], 4. * R[i, j-1], 5. * G[i,j], 4. * R[i, j+1], -1. * G[i, j+2], \ +# -1. * G[i+1, j-1], -1. * G[i+1, j+1], \ +# .5 * G[i+2, j]]) +# +# # B at Green locations in Red rows +# B[i, j] = 0.125 * np.sum([-1. * G[i-2, j], \ +# -1. * G[i-1, j-1], 4. * B[i-1, j], -1. * G[i-1, j+1], \ +# .5 * G[i, j-2], 5. * G[i,j], .5 * G[i, j+2], \ +# -1. * G[i+1, j-1], 4. * B[i+1,j], -1. * G[i+1, j+1], \ +# -1. * G[i+2, j]]) +# +# # Green locations in Blue rows +# elif (((i % 2) != 0) and ((j % 2) == 0)): +# +# # R at Green locations in Blue rows +# R[i, j] = 0.125 * np.sum([-1. * G[i-2, j], \ +# -1. * G[i-1, j-1], 4. * R[i-1, j], -1. * G[i-1, j+1], \ +# .5 * G[i, j-2], 5. * G[i,j], .5 * G[i, j+2], \ +# -1. * G[i+1, j-1], 4. * R[i+1, j], -1. * G[i+1, j+1], \ +# -1. * G[i+2, j]]) +# +# # B at Green locations in Blue rows +# B[i, j] = 0.125 * np.sum([.5 * G[i-2, j], \ +# -1. * G [i-1, j-1], -1. * G[i-1, j+1], \ +# -1. * G[i, j-2], 4. * B[i, j-1], 5. * G[i,j], 4. * B[i, j+1], -1. * G[i, j+2], \ +# -1. * G[i+1, j-1], -1. * G[i+1, j+1], \ +# .5 * G[i+2, j]]) +# +# # R at Blue locations +# elif (((i % 2) != 0) and ((j % 2) != 0)): +# R[i, j] = 0.125 * np.sum([-1.5 * B[i-2, j], \ +# 2. * R[i-1, j-1], 2. * R[i-1, j+1], \ +# -1.5 * B[i, j-2], 6. * B[i,j], -1.5 * B[i, j+2], \ +# 2. * R[i+1, j-1], 2. * R[i+1, j+1], \ +# -1.5 * B[i+2, j]]) +# +# # B at Red locations +# elif (((i % 2) == 0) and ((j % 2) == 0)): +# B[i, j] = 0.125 * np.sum([-1.5 * R[i-2, j], \ +# 2. * B[i-1, j-1], 2. * B[i-1, j+1], \ +# -1.5 * R[i, j-2], 6. * R[i,j], -1.5 * R[i, j+2], \ +# 2. * B[i+1, j-1], 2. * B[i+1, j+1], \ +# -1.5 * R[i+2, j]]) +# +# if (timeshow): +# elapsed_time = time.process_time() - t0 +# print("Red/Blue: row index: " + str(i-1) + " of " + str(height) + \ +# " | elapsed time: " + "{:.3f}".format(elapsed_time) + " seconds") +# +# +# elif (bayer_pattern == "gbrg"): +# +# G[::2, ::2] = raw[::2, ::2] +# G[1::2, 1::2] = raw[1::2, 1::2] +# R[1::2, ::2] = raw[1::2, ::2] +# B[::2, 1::2] = raw[::2, 1::2] +# +# # Green channel +# for i in range(no_of_pixel_pad, height + no_of_pixel_pad): +# +# # to display progress +# t0 = time.process_time() +# +# for j in range(no_of_pixel_pad, width + no_of_pixel_pad): +# +# # G at Red location +# if (((i % 2) != 0) and ((j % 2) == 0)): +# G[i, j] = 0.125 * np.sum([-1. * R[i-2, j], \ +# 2. * G[i-1, j], \ +# -1. * R[i, j-2], 2. * G[i, j-1], 4. * R[i,j], 2. * G[i, j+1], -1. * R[i, j+2],\ +# 2. * G[i+1, j], \ +# -1. * R[i+2, j]]) +# # G at Blue location +# elif (((i % 2) == 0) and ((j % 2) != 0)): +# G[i, j] = 0.125 * np.sum([-1. * B[i-2, j], \ +# 2. * G[i-1, j], \ +# -1. * B[i, j-2], 2. * G[i, j-1], 4. * B[i,j], 2. * G[i, j+1], -1. * B[i, j+2], \ +# 2. * G[i+1, j],\ +# -1. * B[i+2, j]]) +# if (timeshow): +# elapsed_time = time.process_time() - t0 +# print("Green: row index: " + str(i-1) + " of " + str(height) + \ +# " | elapsed time: " + "{:.3f}".format(elapsed_time) + " seconds") +# +# # Red and Blue channel +# for i in range(no_of_pixel_pad, height + no_of_pixel_pad): +# +# # to display progress +# t0 = time.process_time() +# +# for j in range(no_of_pixel_pad, width + no_of_pixel_pad): +# +# # Green locations in Red rows +# if (((i % 2) != 0) and ((j % 2) != 0)): +# # R at Green locations in Red rows +# R[i, j] = 0.125 * np.sum([.5 * G[i-2, j],\ +# -1. * G[i-1, j-1], -1. * G[i-1, j+1], \ +# -1. * G[i, j-2], 4. * R[i, j-1], 5. * G[i,j], 4. * R[i, j+1], -1. * G[i, j+2], \ +# -1. * G[i+1, j-1], -1. * G[i+1, j+1], \ +# .5 * G[i+2, j]]) +# +# # B at Green locations in Red rows +# B[i, j] = 0.125 * np.sum([-1. * G[i-2, j], \ +# -1. * G[i-1, j-1], 4. * B[i-1, j], -1. * G[i-1, j+1], \ +# .5 * G[i, j-2], 5. * G[i,j], .5 * G[i, j+2], \ +# -1. * G[i+1, j-1], 4. * B[i+1,j], -1. * G[i+1, j+1], \ +# -1. * G[i+2, j]]) +# +# # Green locations in Blue rows +# elif (((i % 2) == 0) and ((j % 2) == 0)): +# +# # R at Green locations in Blue rows +# R[i, j] = 0.125 * np.sum([-1. * G[i-2, j], \ +# -1. * G[i-1, j-1], 4. * R[i-1, j], -1. * G[i-1, j+1], \ +# .5 * G[i, j-2], 5. * G[i,j], .5 * G[i, j+2], \ +# -1. * G[i+1, j-1], 4. * R[i+1, j], -1. * G[i+1, j+1], \ +# -1. * G[i+2, j]]) +# +# # B at Green locations in Blue rows +# B[i, j] = 0.125 * np.sum([.5 * G[i-2, j], \ +# -1. * G [i-1, j-1], -1. * G[i-1, j+1], \ +# -1. * G[i, j-2], 4. * B[i, j-1], 5. * G[i,j], 4. * B[i, j+1], -1. * G[i, j+2], \ +# -1. * G[i+1, j-1], -1. * G[i+1, j+1], \ +# .5 * G[i+2, j]]) +# +# # R at Blue locations +# elif (((i % 2) == 0) and ((j % 2) != 0)): +# R[i, j] = 0.125 * np.sum([-1.5 * B[i-2, j], \ +# 2. * R[i-1, j-1], 2. * R[i-1, j+1], \ +# -1.5 * B[i, j-2], 6. * B[i,j], -1.5 * B[i, j+2], \ +# 2. * R[i+1, j-1], 2. * R[i+1, j+1], \ +# -1.5 * B[i+2, j]]) +# +# # B at Red locations +# elif (((i % 2) != 0) and ((j % 2) == 0)): +# B[i, j] = 0.125 * np.sum([-1.5 * R[i-2, j], \ +# 2. * B[i-1, j-1], 2. * B[i-1, j+1], \ +# -1.5 * R[i, j-2], 6. * R[i,j], -1.5 * R[i, j+2], \ +# 2. * B[i+1, j-1], 2. * B[i+1, j+1], \ +# -1.5 * R[i+2, j]]) +# +# if (timeshow): +# elapsed_time = time.process_time() - t0 +# print("Red/Blue: row index: " + str(i-1) + " of " + str(height) + \ +# " | elapsed time: " + "{:.3f}".format(elapsed_time) + " seconds") +# +# elif (bayer_pattern == "grbg"): +# +# G[::2, ::2] = raw[::2, ::2] +# G[1::2, 1::2] = raw[1::2, 1::2] +# R[::2, 1::2] = raw[::2, 1::2] +# B[1::2, ::2] = raw[1::2, ::2] +# +# # Green channel +# for i in range(no_of_pixel_pad, height + no_of_pixel_pad): +# +# # to display progress +# t0 = time.process_time() +# +# for j in range(no_of_pixel_pad, width + no_of_pixel_pad): +# +# # G at Red location +# if (((i % 2) == 0) and ((j % 2) != 0)): +# G[i, j] = 0.125 * np.sum([-1. * R[i-2, j], \ +# 2. * G[i-1, j], \ +# -1. * R[i, j-2], 2. * G[i, j-1], 4. * R[i,j], 2. * G[i, j+1], -1. * R[i, j+2],\ +# 2. * G[i+1, j], \ +# -1. * R[i+2, j]]) +# # G at Blue location +# elif (((i % 2) != 0) and ((j % 2) == 0)): +# G[i, j] = 0.125 * np.sum([-1. * B[i-2, j], \ +# 2. * G[i-1, j], \ +# -1. * B[i, j-2], 2. * G[i, j-1], 4. * B[i,j], 2. * G[i, j+1], -1. * B[i, j+2], \ +# 2. * G[i+1, j],\ +# -1. * B[i+2, j]]) +# if (timeshow): +# elapsed_time = time.process_time() - t0 +# print("Green: row index: " + str(i-1) + " of " + str(height) + \ +# " | elapsed time: " + "{:.3f}".format(elapsed_time) + " seconds") +# +# # Red and Blue channel +# for i in range(no_of_pixel_pad, height + no_of_pixel_pad): +# +# # to display progress +# t0 = time.process_time() +# +# for j in range(no_of_pixel_pad, width + no_of_pixel_pad): +# +# # Green locations in Red rows +# if (((i % 2) == 0) and ((j % 2) == 0)): +# # R at Green locations in Red rows +# R[i, j] = 0.125 * np.sum([.5 * G[i-2, j],\ +# -1. * G[i-1, j-1], -1. * G[i-1, j+1], \ +# -1. * G[i, j-2], 4. * R[i, j-1], 5. * G[i,j], 4. * R[i, j+1], -1. * G[i, j+2], \ +# -1. * G[i+1, j-1], -1. * G[i+1, j+1], \ +# .5 * G[i+2, j]]) +# +# # B at Green locations in Red rows +# B[i, j] = 0.125 * np.sum([-1. * G[i-2, j], \ +# -1. * G[i-1, j-1], 4. * B[i-1, j], -1. * G[i-1, j+1], \ +# .5 * G[i, j-2], 5. * G[i,j], .5 * G[i, j+2], \ +# -1. * G[i+1, j-1], 4. * B[i+1,j], -1. * G[i+1, j+1], \ +# -1. * G[i+2, j]]) +# +# # Green locations in Blue rows +# elif (((i % 2) != 0) and ((j % 2) != 0)): +# +# # R at Green locations in Blue rows +# R[i, j] = 0.125 * np.sum([-1. * G[i-2, j], \ +# -1. * G[i-1, j-1], 4. * R[i-1, j], -1. * G[i-1, j+1], \ +# .5 * G[i, j-2], 5. * G[i,j], .5 * G[i, j+2], \ +# -1. * G[i+1, j-1], 4. * R[i+1, j], -1. * G[i+1, j+1], \ +# -1. * G[i+2, j]]) +# +# # B at Green locations in Blue rows +# B[i, j] = 0.125 * np.sum([.5 * G[i-2, j], \ +# -1. * G [i-1, j-1], -1. * G[i-1, j+1], \ +# -1. * G[i, j-2], 4. * B[i, j-1], 5. * G[i,j], 4. * B[i, j+1], -1. * G[i, j+2], \ +# -1. * G[i+1, j-1], -1. * G[i+1, j+1], \ +# .5 * G[i+2, j]]) +# +# # R at Blue locations +# elif (((i % 2) != 0) and ((j % 2) == 0)): +# R[i, j] = 0.125 * np.sum([-1.5 * B[i-2, j], \ +# 2. * R[i-1, j-1], 2. * R[i-1, j+1], \ +# -1.5 * B[i, j-2], 6. * B[i,j], -1.5 * B[i, j+2], \ +# 2. * R[i+1, j-1], 2. * R[i+1, j+1], \ +# -1.5 * B[i+2, j]]) +# +# # B at Red locations +# elif (((i % 2) == 0) and ((j % 2) != 0)): +# B[i, j] = 0.125 * np.sum([-1.5 * R[i-2, j], \ +# 2. * B[i-1, j-1], 2. * B[i-1, j+1], \ +# -1.5 * R[i, j-2], 6. * R[i,j], -1.5 * R[i, j+2], \ +# 2. * B[i+1, j-1], 2. * B[i+1, j+1], \ +# -1.5 * R[i+2, j]]) +# +# if (timeshow): +# elapsed_time = time.process_time() - t0 +# print("Red/Blue: row index: " + str(i-1) + " of " + str(height) + \ +# " | elapsed time: " + "{:.3f}".format(elapsed_time) + " seconds") +# +# elif (bayer_pattern == "bggr"): +# +# G[::2, 1::2] = raw[::2, 1::2] +# G[1::2, ::2] = raw[1::2, ::2] +# R[1::2, 1::2] = raw[1::2, 1::2] +# B[::2, ::2] = raw[::2, ::2] +# +# # Green channel +# for i in range(no_of_pixel_pad, height + no_of_pixel_pad): +# +# # to display progress +# t0 = time.process_time() +# +# for j in range(no_of_pixel_pad, width + no_of_pixel_pad): +# +# # G at Red location +# if (((i % 2) != 0) and ((j % 2) != 0)): +# G[i, j] = 0.125 * np.sum([-1. * R[i-2, j], \ +# 2. * G[i-1, j], \ +# -1. * R[i, j-2], 2. * G[i, j-1], 4. * R[i,j], 2. * G[i, j+1], -1. * R[i, j+2],\ +# 2. * G[i+1, j], \ +# -1. * R[i+2, j]]) +# # G at Blue location +# elif (((i % 2) == 0) and ((j % 2) == 0)): +# G[i, j] = 0.125 * np.sum([-1. * B[i-2, j], \ +# 2. * G[i-1, j], \ +# -1. * B[i, j-2], 2. * G[i, j-1], 4. * B[i,j], 2. * G[i, j+1], -1. * B[i, j+2], \ +# 2. * G[i+1, j],\ +# -1. * B[i+2, j]]) +# if (timeshow): +# elapsed_time = time.process_time() - t0 +# print("Green: row index: " + str(i-1) + " of " + str(height) + \ +# " | elapsed time: " + "{:.3f}".format(elapsed_time) + " seconds") +# +# # Red and Blue channel +# for i in range(no_of_pixel_pad, height + no_of_pixel_pad): +# +# # to display progress +# t0 = time.process_time() +# +# for j in range(no_of_pixel_pad, width + no_of_pixel_pad): +# +# # Green locations in Red rows +# if (((i % 2) != 0) and ((j % 2) == 0)): +# # R at Green locations in Red rows +# R[i, j] = 0.125 * np.sum([.5 * G[i-2, j],\ +# -1. * G[i-1, j-1], -1. * G[i-1, j+1], \ +# -1. * G[i, j-2], 4. * R[i, j-1], 5. * G[i,j], 4. * R[i, j+1], -1. * G[i, j+2], \ +# -1. * G[i+1, j-1], -1. * G[i+1, j+1], \ +# .5 * G[i+2, j]]) +# +# # B at Green locations in Red rows +# B[i, j] = 0.125 * np.sum([-1. * G[i-2, j], \ +# -1. * G[i-1, j-1], 4. * B[i-1, j], -1. * G[i-1, j+1], \ +# .5 * G[i, j-2], 5. * G[i,j], .5 * G[i, j+2], \ +# -1. * G[i+1, j-1], 4. * B[i+1,j], -1. * G[i+1, j+1], \ +# -1. * G[i+2, j]]) +# +# # Green locations in Blue rows +# elif (((i % 2) == 0) and ((j % 2) != 0)): +# +# # R at Green locations in Blue rows +# R[i, j] = 0.125 * np.sum([-1. * G[i-2, j], \ +# -1. * G[i-1, j-1], 4. * R[i-1, j], -1. * G[i-1, j+1], \ +# .5 * G[i, j-2], 5. * G[i,j], .5 * G[i, j+2], \ +# -1. * G[i+1, j-1], 4. * R[i+1, j], -1. * G[i+1, j+1], \ +# -1. * G[i+2, j]]) +# +# # B at Green locations in Blue rows +# B[i, j] = 0.125 * np.sum([.5 * G[i-2, j], \ +# -1. * G [i-1, j-1], -1. * G[i-1, j+1], \ +# -1. * G[i, j-2], 4. * B[i, j-1], 5. * G[i,j], 4. * B[i, j+1], -1. * G[i, j+2], \ +# -1. * G[i+1, j-1], -1. * G[i+1, j+1], \ +# .5 * G[i+2, j]]) +# +# # R at Blue locations +# elif (((i % 2) == 0) and ((j % 2) == 0)): +# R[i, j] = 0.125 * np.sum([-1.5 * B[i-2, j], \ +# 2. * R[i-1, j-1], 2. * R[i-1, j+1], \ +# -1.5 * B[i, j-2], 6. * B[i,j], -1.5 * B[i, j+2], \ +# 2. * R[i+1, j-1], 2. * R[i+1, j+1], \ +# -1.5 * B[i+2, j]]) +# +# # B at Red locations +# elif (((i % 2) != 0) and ((j % 2) != 0)): +# B[i, j] = 0.125 * np.sum([-1.5 * R[i-2, j], \ +# 2. * B[i-1, j-1], 2. * B[i-1, j+1], \ +# -1.5 * R[i, j-2], 6. * R[i,j], -1.5 * R[i, j+2], \ +# 2. * B[i+1, j-1], 2. * B[i+1, j+1], \ +# -1.5 * R[i+2, j]]) +# +# if (timeshow): +# elapsed_time = time.process_time() - t0 +# print("Red/Blue: row index: " + str(i-1) + " of " + str(height) + \ +# " | elapsed time: " + "{:.3f}".format(elapsed_time) + " seconds") +# +# else: +# print("Invalid bayer pattern. Valid pattern can be rggb, gbrg, grbg, bggr") +# return demosaic_out # This will be all zeros +# +# # Fill up the RGB output with interpolated values +# demosaic_out[0:height, 0:width, 0] = R[no_of_pixel_pad : height + no_of_pixel_pad, \ +# no_of_pixel_pad : width + no_of_pixel_pad] +# demosaic_out[0:height, 0:width, 1] = G[no_of_pixel_pad : height + no_of_pixel_pad, \ +# no_of_pixel_pad : width + no_of_pixel_pad] +# demosaic_out[0:height, 0:width, 2] = B[no_of_pixel_pad : height + no_of_pixel_pad, \ +# no_of_pixel_pad : width + no_of_pixel_pad] +# +# demosaic_out = np.clip(demosaic_out, clip_range[0], clip_range[1]) +# return demosaic_out diff --git a/IIR-Lab/ISP_pipeline/docker_guidelines.md b/IIR-Lab/ISP_pipeline/docker_guidelines.md new file mode 100644 index 0000000000000000000000000000000000000000..b691d28b1fae7775dfc59dadf2738124f00c2bf4 --- /dev/null +++ b/IIR-Lab/ISP_pipeline/docker_guidelines.md @@ -0,0 +1,29 @@ +# Final submission docker guidelines + +All final submissions should be submitted as a docker image. The docker image should be built from the dockerfile in the root of the repository. The docker image should be built with the following command: + +```bash + docker build -t . +``` + +The docker image should be run with the following command: + +```bash + docker run -it --rm -v $(pwd)/data:/data ./run.sh +``` + +As output, the docker image should produce images in `JPEG` format in the `/data` directory. All produced files should be named as the input files, but with the `.jpg` extension. The filenames should be the same as the RAW input filenames in `/data`. Make sure that your code does not create any other folders in the `/data` directory. Docker should contain all the necessary dependencies to run the code. It also should include the `run.sh` script as the entrypoint. Take into account that inside the docker image, the `/data` directory will be mounted to the `$(pwd)/data` directory of the host machine. This means that the docker image should be able to read the input files from the `/data` directory and write the output files to the `/data` directory. + +## Example + +We providing an example of a docker image that can be used as a reference. It can be found in our [github repository](https://github.com/createcolor/nightimaging23) + +Your dockerfile may look like this: + +```dockerfile +FROM tensorflow/tensorflow:2.3.0 +WORKDIR /opt/app +COPY . . +RUN pip install -r /app/requirements.txt +CMD ["./run.sh"] +``` diff --git a/IIR-Lab/ISP_pipeline/imaging.py b/IIR-Lab/ISP_pipeline/imaging.py new file mode 100644 index 0000000000000000000000000000000000000000..786a0c896ca7b69d1ff93f18a136b7cc685accd2 --- /dev/null +++ b/IIR-Lab/ISP_pipeline/imaging.py @@ -0,0 +1,1293 @@ +# Note: +# The functions try to operate in float32 data precision + +# ============================================================= +# Import the libraries +# ============================================================= +import numpy as np # array operations +import math # basing math operations +from matplotlib import pylab as plt +import time # measure runtime +import utility +import debayer +import sys # float precision +from scipy import signal # convolutions +from scipy import interpolate # for interpolation + + + +# ============================================================= +# class: ImageInfo +# Helps set up necessary information/metadata of the image +# ============================================================= +class ImageInfo: + def __init__(self, name = "unknown", data = -1, is_show = False): + self.name = name + self.data = data + self.size = np.shape(self.data) + self.is_show = is_show + self.color_space = "unknown" + self.bayer_pattern = "unknown" + self.channel_gain = (1.0, 1.0, 1.0, 1.0) + self.bit_depth = 0 + self.black_level = (0, 0, 0, 0) + self.white_level = (1, 1, 1, 1) + self.color_matrix = [[1., .0, .0],\ + [.0, 1., .0],\ + [.0, .0, 1.]] # xyz2cam + self.min_value = np.min(self.data) + self.max_value = np.max(self.data) + self.data_type = self.data.dtype + + # Display image only isShow = True + if (self.is_show): + plt.imshow(self.data) + plt.show() + + def set_data(self, data): + # This function updates data and corresponding fields + self.data = data + self.size = np.shape(self.data) + self.data_type = self.data.dtype + self.min_value = np.min(self.data) + self.max_value = np.max(self.data) + + def get_size(self): + return self.size + + def get_width(self): + return self.size[1] + + def get_height(self): + return self.size[0] + + def get_depth(self): + if np.ndim(self.data) > 2: + return self.size[2] + else: + return 0 + + def set_color_space(self, color_space): + self.color_space = color_space + + def get_color_space(self): + return self.color_space + + def set_channel_gain(self, channel_gain): + self.channel_gain = channel_gain + + def get_channel_gain(self): + return self.channel_gain + + def set_color_matrix(self, color_matrix): + self.color_matrix = color_matrix + + def get_color_matrix(self): + return self.color_matrix + + def set_bayer_pattern(self, bayer_pattern): + self.bayer_pattern = bayer_pattern + + def get_bayer_pattern(self): + return self.bayer_pattern + + def set_bit_depth(self, bit_depth): + self.bit_depth = bit_depth + + def get_bit_depth(self): + return self.bit_depth + + def set_black_level(self, black_level): + self.black_level = black_level + + def get_black_level(self): + return self.black_level + + def set_white_level(self, white_level): + self.white_level = white_level + + def get_white_level(self): + return self.white_level + + def get_min_value(self): + return self.min_value + + def get_max_value(self): + return self.max_value + + def get_data_type(self): + return self.data_type + + def __str__(self): + return "Image " + self.name + " info:" + \ + "\n\tname:\t" + self.name + \ + "\n\tsize:\t" + str(self.size) + \ + "\n\tcolor space:\t" + self.color_space + \ + "\n\tbayer pattern:\t" + self.bayer_pattern + \ + "\n\tchannel gains:\t" + str(self.channel_gain) + \ + "\n\tbit depth:\t" + str(self.bit_depth) + \ + "\n\tdata type:\t" + str(self.data_type) + \ + "\n\tblack level:\t" + str(self.black_level) + \ + "\n\tminimum value:\t" + str(self.min_value) + \ + "\n\tmaximum value:\t" + str(self.max_value) + + +# ============================================================= +# function: black_level_correction +# subtracts the black level channel wise +# ============================================================= +def black_level_correction(raw, black_level, white_level, clip_range): + + print("----------------------------------------------------") + print("Running black level correction...") + + # make float32 in case if it was not + black_level = np.float32(black_level) + white_level = np.float32(white_level) + raw = np.float32(raw) + + # create new data so that original raw data do not change + data = np.zeros(raw.shape) + + # bring data in range 0 to 1 + data[::2, ::2] = (raw[::2, ::2] - black_level[0]) / (white_level[0] - black_level[0]) + data[::2, 1::2] = (raw[::2, 1::2] - black_level[1]) / (white_level[1] - black_level[1]) + data[1::2, ::2] = (raw[1::2, ::2] - black_level[2]) / (white_level[2] - black_level[2]) + data[1::2, 1::2] = (raw[1::2, 1::2]- black_level[3]) / (white_level[3] - black_level[3]) + + # bring within the bit depth range + data = data * clip_range[1] + + # clip within the range + data = np.clip(data, clip_range[0], clip_range[1]) # upper level not necessary + data = np.float32(data) + + return data + + +# ============================================================= +# function: channel_gain_white_balance +# multiply with the white balance channel gains +# ============================================================= +def channel_gain_white_balance(data, channel_gain): + + print("----------------------------------------------------") + print("Running channel gain white balance...") + + # convert into float32 in case they were not + data = np.float32(data) + channel_gain = np.float32(channel_gain) + + # multiply with the channel gains + data[::2, ::2] = data[::2, ::2] * channel_gain[0] + data[::2, 1::2] = data[::2, 1::2] * channel_gain[1] + data[1::2, ::2] = data[1::2, ::2] * channel_gain[2] + data[1::2, 1::2] = data[1::2, 1::2] * channel_gain[3] + + # clipping within range + data = np.clip(data, 0., None) # upper level not necessary + + return data + + +# ============================================================= +# function: bad_pixel_correction +# correct for the bad (dead, stuck, or hot) pixels +# ============================================================= +def bad_pixel_correction(data, neighborhood_size): + + print("----------------------------------------------------") + print("Running bad pixel correction...") + + if ((neighborhood_size % 2) == 0): + print("neighborhood_size shoud be odd number, recommended value 3") + return data + + # convert to float32 in case they were not + # Being consistent in data format to be float32 + data = np.float32(data) + + # Separate out the quarter resolution images + D = {} # Empty dictionary + D[0] = data[::2, ::2] + D[1] = data[::2, 1::2] + D[2] = data[1::2, ::2] + D[3] = data[1::2, 1::2] + + # number of pixels to be padded at the borders + no_of_pixel_pad = math.floor(neighborhood_size / 2.) + + for idx in range(0, len(D)): # perform same operation for each quarter + + # display progress + print("bad pixel correction: Quarter " + str(idx+1) + " of 4") + + img = D[idx] + width, height = utility.helpers(img).get_width_height() + + # pad pixels at the borders + img = np.pad(img, \ + (no_of_pixel_pad, no_of_pixel_pad),\ + 'reflect') # reflect would not repeat the border value + + for i in range(no_of_pixel_pad, height + no_of_pixel_pad): + for j in range(no_of_pixel_pad, width + no_of_pixel_pad): + + # save the middle pixel value + mid_pixel_val = img[i, j] + + # extract the neighborhood + neighborhood = img[i - no_of_pixel_pad : i + no_of_pixel_pad+1,\ + j - no_of_pixel_pad : j + no_of_pixel_pad+1] + + # set the center pixels value same as the left pixel + # Does not matter replace with right or left pixel + # is used to replace the center pixels value + neighborhood[no_of_pixel_pad, no_of_pixel_pad] = neighborhood[no_of_pixel_pad, no_of_pixel_pad-1] + + min_neighborhood = np.min(neighborhood) + max_neighborhood = np.max(neighborhood) + + if (mid_pixel_val < min_neighborhood): + img[i,j] = min_neighborhood + elif (mid_pixel_val > max_neighborhood): + img[i,j] = max_neighborhood + else: + img[i,j] = mid_pixel_val + + # Put the corrected image to the dictionary + D[idx] = img[no_of_pixel_pad : height + no_of_pixel_pad,\ + no_of_pixel_pad : width + no_of_pixel_pad] + + # Regrouping the data + data[::2, ::2] = D[0] + data[::2, 1::2] = D[1] + data[1::2, ::2] = D[2] + data[1::2, 1::2] = D[3] + + return data + + +# ============================================================= +# class: demosaic +# ============================================================= +class demosaic: + def __init__(self, data, bayer_pattern="rggb", clip_range=[0, 65535], name="demosaic"): + self.data = np.float32(data) + self.bayer_pattern = bayer_pattern + self.clip_range = clip_range + self.name = name + + def mhc(self, timeshow=False): + + print("----------------------------------------------------") + print("Running demosaicing using Malvar-He-Cutler algorithm...") + + return debayer.debayer_mhc(self.data, self.bayer_pattern, self.clip_range, timeshow) + + def post_process_local_color_ratio(self, beta): + # Objective is to reduce high chroma jump + # Beta is controlling parameter, higher gives more effect, + # however, too high does not make any more change + + print("----------------------------------------------------") + print("Demosaicing post process using local color ratio...") + + data = self.data + + # add beta with the data to prevent divide by zero + data_beta = self.data + beta + + # convolution kernels + # zeta1 averages the up, down, left, and right four values of a 3x3 window + zeta1 = np.multiply([[0., 1., 0.], [1., 0., 1.], [0., 1., 0.]], .25) + # zeta2 averages the four corner values of a 3x3 window + zeta2 = np.multiply([[1., 0., 1.], [0., 0., 0.], [1., 0., 1.]], .25) + + # average of color ratio + g_over_b = signal.convolve2d(np.divide(data_beta[:, :, 1], data_beta[:, :, 2]), zeta1, mode="same", boundary="symm") + g_over_r = signal.convolve2d(np.divide(data_beta[:, :, 1], data_beta[:, :, 0]), zeta1, mode="same", boundary="symm") + b_over_g_zeta2 = signal.convolve2d(np.divide(data_beta[:, :, 2], data_beta[:, :, 1]), zeta2, mode="same", boundary="symm") + r_over_g_zeta2 = signal.convolve2d(np.divide(data_beta[:, :, 0], data_beta[:, :, 1]), zeta2, mode="same", boundary="symm") + b_over_g_zeta1 = signal.convolve2d(np.divide(data_beta[:, :, 2], data_beta[:, :, 1]), zeta1, mode="same", boundary="symm") + r_over_g_zeta1 = signal.convolve2d(np.divide(data_beta[:, :, 0], data_beta[:, :, 1]), zeta1, mode="same", boundary="symm") + + # G at B locations and G at R locations + if self.bayer_pattern == "rggb": + # G at B locations + data[1::2, 1::2, 1] = -beta + np.multiply(data_beta[1::2, 1::2, 2], g_over_b[1::2, 1::2]) + # G at R locations + data[::2, ::2, 1] = -beta + np.multiply(data_beta[::2, ::2, 0], g_over_r[::2, ::2]) + # B at R locations + data[::2, ::2, 2] = -beta + np.multiply(data_beta[::2, ::2, 1], b_over_g_zeta2[::2, ::2]) + # R at B locations + data[1::2, 1::2, 0] = -beta + np.multiply(data_beta[1::2, 1::2, 1], r_over_g_zeta2[1::2, 1::2]) + # B at G locations + data[::2, 1::2, 2] = -beta + np.multiply(data_beta[::2, 1::2, 1], b_over_g_zeta1[::2, 1::2]) + data[1::2, ::2, 2] = -beta + np.multiply(data_beta[1::2, ::2, 1], b_over_g_zeta1[1::2, ::2]) + # R at G locations + data[::2, 1::2, 0] = -beta + np.multiply(data_beta[::2, 1::2, 1], r_over_g_zeta1[::2, 1::2]) + data[1::2, ::2, 0] = -beta + np.multiply(data_beta[1::2, ::2, 1], r_over_g_zeta1[1::2, ::2]) + + elif self.bayer_pattern == "grbg": + # G at B locations + data[1::2, ::2, 1] = -beta + np.multiply(data_beta[1::2, ::2, 2], g_over_b[1::2, 1::2]) + # G at R locations + data[::2, 1::2, 1] = -beta + np.multiply(data_beta[::2, 1::2, 0], g_over_r[::2, 1::2]) + # B at R locations + data[::2, 1::2, 2] = -beta + np.multiply(data_beta[::2, 1::2, 1], b_over_g_zeta2[::2, 1::2]) + # R at B locations + data[1::2, ::2, 0] = -beta + np.multiply(data_beta[1::2, ::2, 1], r_over_g_zeta2[1::2, ::2]) + # B at G locations + data[::2, ::2, 2] = -beta + np.multiply(data_beta[::2, ::2, 1], b_over_g_zeta1[::2, ::2]) + data[1::2, 1::2, 2] = -beta + np.multiply(data_beta[1::2, 1::2, 1], b_over_g_zeta1[1::2, 1::2]) + # R at G locations + data[::2, ::2, 0] = -beta + np.multiply(data_beta[::2, ::2, 1], r_over_g_zeta1[::2, ::2]) + data[1::2, 1::2, 0] = -beta + np.multiply(data_beta[1::2, 1::2, 1], r_over_g_zeta1[1::2, 1::2]) + + elif self.bayer_pattern == "gbrg": + # G at B locations + data[::2, 1::2, 1] = -beta + np.multiply(data_beta[::2, 1::2, 2], g_over_b[::2, 1::2]) + # G at R locations + data[1::2, ::2, 1] = -beta + np.multiply(data_beta[1::2, ::2, 0], g_over_r[1::2, ::2]) + # B at R locations + data[1::2, ::2, 2] = -beta + np.multiply(data_beta[1::2, ::2, 1], b_over_g_zeta2[1::2, ::2]) + # R at B locations + data[::2, 1::2, 0] = -beta + np.multiply(data_beta[::2, 1::2, 1], r_over_g_zeta2[::2, 1::2]) + # B at G locations + data[::2, ::2, 2] = -beta + np.multiply(data_beta[::2, ::2, 1], b_over_g_zeta1[::2, ::2]) + data[1::2, 1::2, 2] = -beta + np.multiply(data_beta[1::2, 1::2, 1], b_over_g_zeta1[1::2, 1::2]) + # R at G locations + data[::2, ::2, 0] = -beta + np.multiply(data_beta[::2, ::2, 1], r_over_g_zeta1[::2, ::2]) + data[1::2, 1::2, 0] = -beta + np.multiply(data_beta[1::2, 1::2, 1], r_over_g_zeta1[1::2, 1::2]) + + elif self.bayer_pattern == "bggr": + # G at B locations + data[::2, ::2, 1] = -beta + np.multiply(data_beta[::2, ::2, 2], g_over_b[::2, ::2]) + # G at R locations + data[1::2, 1::2, 1] = -beta + np.multiply(data_beta[1::2, 1::2, 0], g_over_r[1::2, 1::2]) + # B at R locations + data[1::2, 1::2, 2] = -beta + np.multiply(data_beta[1::2, 1::2, 1], b_over_g_zeta2[1::2, 1::2]) + # R at B locations + data[::2, ::2, 0] = -beta + np.multiply(data_beta[::2, ::2, 1], r_over_g_zeta2[::2, ::2]) + # B at G locations + data[::2, 1::2, 2] = -beta + np.multiply(data_beta[::2, 1::2, 1], b_over_g_zeta1[::2, 1::2]) + data[1::2, ::2, 2] = -beta + np.multiply(data_beta[1::2, ::2, 1], b_over_g_zeta1[1::2, ::2]) + # R at G locations + data[::2, 1::2, 0] = -beta + np.multiply(data_beta[::2, 1::2, 1], r_over_g_zeta1[::2, 1::2]) + data[1::2, ::2, 0] = -beta + np.multiply(data_beta[1::2, ::2, 1], r_over_g_zeta1[1::2, ::2]) + + + return np.clip(data, self.clip_range[0], self.clip_range[1]) + + + def directionally_weighted_gradient_based_interpolation(self): + # Reference: + # http://www.arl.army.mil/arlreports/2010/ARL-TR-5061.pdf + + print("----------------------------------------------------") + print("Running demosaicing using directionally weighted gradient based interpolation...") + + # Fill up the green channel + G = debayer.fill_channel_directional_weight(self.data, self.bayer_pattern) + + B, R = debayer.fill_br_locations(self.data, G, self.bayer_pattern) + + width, height = utility.helpers(self.data).get_width_height() + output = np.empty((height, width, 3), dtype=np.float32) + output[:, :, 0] = R + output[:, :, 1] = G + output[:, :, 2] = B + + return np.clip(output, self.clip_range[0], self.clip_range[1]) + + + def post_process_median_filter(self, edge_detect_kernel_size=3, edge_threshold=0, median_filter_kernel_size=3, clip_range=[0, 65535]): + # Objective is to reduce the zipper effect around the edges + # Inputs: + # edge_detect_kernel_size: the neighborhood size used to detect edges + # edge_threshold: the threshold value above which (compared against) + # the gradient_magnitude to declare if it is an edge + # median_filter_kernel_size: the neighborhood size used to perform + # median filter operation + # clip_range: used for scaling in edge_detection + # + # Output: + # output: median filtered output around the edges + # edge_location: a debug image to see where the edges were detected + # based on the threshold + + + # detect edge locations + edge_location = utility.edge_detection(self.data).sobel(edge_detect_kernel_size, "is_edge", edge_threshold, clip_range) + + # allocate space for output + output = np.empty(np.shape(self.data), dtype=np.float32) + + if (np.ndim(self.data) > 2): + + for i in range(0, np.shape(self.data)[2]): + output[:, :, i] = utility.helpers(self.data[:, :, i]).edge_wise_median(median_filter_kernel_size, edge_location[:, :, i]) + + elif (np.ndim(self.data) == 2): + output = utility.helpers(self.data).edge_wise_median(median_filter_kernel_size, edge_location) + + return output, edge_location + + def __str__(self): + return self.name + + +# ============================================================= +# class: lens_shading_correction +# Correct the lens shading / vignetting +# ============================================================= +class lens_shading_correction: + def __init__(self, data, name="lens_shading_correction"): + # convert to float32 in case it was not + self.data = np.float32(data) + self.name = name + + def flat_field_compensation(self, dark_current_image, flat_field_image): + # dark_current_image: + # is captured from the camera with cap on + # and fully dark condition, several images captured and + # temporally averaged + # flat_field_image: + # is found by capturing an image of a flat field test chart + # with certain lighting condition + # Note: flat_field_compensation is memory intensive procedure because + # both the dark_current_image and flat_field_image need to be + # saved in memory beforehand + print("----------------------------------------------------") + print("Running lens shading correction with flat field compensation...") + + # convert to float32 in case it was not + dark_current_image = np.float32(dark_current_image) + flat_field_image = np.float32(flat_field_image) + temp = flat_field_image - dark_current_image + return np.average(temp) * np.divide((self.data - dark_current_image), temp) + + def approximate_mathematical_compensation(self, params, clip_min=0, clip_max=65535): + # parms: + # parameters of a parabolic model y = a*(x-b)^2 + c + # For example, params = [0.01759, -28.37, -13.36] + # Note: approximate_mathematical_compensation require less memory + print("----------------------------------------------------") + print("Running lens shading correction with approximate mathematical compensation...") + width, height = utility.helpers(self.data).get_width_height() + + center_pixel_pos = [height/2, width/2] + max_distance = utility.distance_euclid(center_pixel_pos, [height, width]) + + # allocate memory for output + temp = np.empty((height, width), dtype=np.float32) + + for i in range(0, height): + for j in range(0, width): + distance = utility.distance_euclid(center_pixel_pos, [i, j]) / max_distance + # parabolic model + gain = params[0] * (distance - params[1])**2 + params[2] + temp[i, j] = self.data[i, j] * gain + + temp = np.clip(temp, clip_min, clip_max) + return temp + + def __str__(self): + return "lens shading correction. There are two methods: " + \ + "\n (1) flat_field_compensation: requires dark_current_image and flat_field_image" + \ + "\n (2) approximate_mathematical_compensation:" + + +# ============================================================= +# class: lens_shading_correction +# Correct the lens shading / vignetting +# ============================================================= +class bayer_denoising: + def __init__(self, data, name="bayer_denoising"): + # convert to float32 in case it was not + self.data = np.float32(data) + self.name = name + + def utilize_hvs_behavior(self, bayer_pattern, initial_noise_level, hvs_min, hvs_max, threshold_red_blue, clip_range): + # Objective: bayer denoising + # Inputs: + # bayer_pattern: rggb, gbrg, grbg, bggr + # initial_noise_level: + # Output: + # denoised bayer raw output + # Source: Based on paper titled "Noise Reduction for CFA Image Sensors + # Exploiting HVS Behaviour," by Angelo Bosco, Sebastiano Battiato, + # Arcangelo Bruna and Rosetta Rizzo + # Sensors 2009, 9, 1692-1713; doi:10.3390/s90301692 + + print("----------------------------------------------------") + print("Running bayer denoising utilizing hvs behavior...") + + # copy the self.data to raw and we will only work on raw + # to make sure no change happen to self.data + raw = self.data + raw = np.clip(raw, clip_range[0], clip_range[1]) + width, height = utility.helpers(raw).get_width_height() + + # First make the bayer_pattern rggb + # The algorithm is written only for rggb pattern, thus convert all other + # pattern to rggb. Furthermore, this shuffling does not affect the + # algorithm output + if (bayer_pattern != "rggb"): + raw = utility.helpers(self.data).shuffle_bayer_pattern(bayer_pattern, "rggb") + + # fixed neighborhood_size + neighborhood_size = 5 # we are keeping this fixed + # bigger size such as 9 can be declared + # however, the code need to be changed then + + # pad two pixels at the border + no_of_pixel_pad = math.floor(neighborhood_size / 2) # number of pixels to pad + + raw = np.pad(raw, \ + (no_of_pixel_pad, no_of_pixel_pad),\ + 'reflect') # reflect would not repeat the border value + + # allocating space for denoised output + denoised_out = np.empty((height, width), dtype=np.float32) + + texture_degree_debug = np.empty((height, width), dtype=np.float32) + for i in range(no_of_pixel_pad, height + no_of_pixel_pad): + for j in range(no_of_pixel_pad, width + no_of_pixel_pad): + + # center pixel + center_pixel = raw[i, j] + + # signal analyzer block + half_max = clip_range[1] / 2 + if (center_pixel <= half_max): + hvs_weight = -(((hvs_max - hvs_min) * center_pixel) / half_max) + hvs_max + else: + hvs_weight = (((center_pixel - clip_range[1]) * (hvs_max - hvs_min))/(clip_range[1] - half_max)) + hvs_max + + # noise level estimator previous value + if (j < no_of_pixel_pad+2): + noise_level_previous_red = initial_noise_level + noise_level_previous_blue = initial_noise_level + noise_level_previous_green = initial_noise_level + else: + noise_level_previous_green = noise_level_current_green + if ((i % 2) == 0): # red + noise_level_previous_red = noise_level_current_red + elif ((i % 2) != 0): # blue + noise_level_previous_blue = noise_level_current_blue + + # Processings depending on Green or Red/Blue + # Red + if (((i % 2) == 0) and ((j % 2) == 0)): + # get neighborhood + neighborhood = [raw[i-2, j-2], raw[i-2, j], raw[i-2, j+2],\ + raw[i, j-2], raw[i, j+2],\ + raw[i+2, j-2], raw[i+2, j], raw[i+2, j+2]] + + # absolute difference from the center pixel + d = np.abs(neighborhood - center_pixel) + + # maximum and minimum difference + d_max = np.max(d) + d_min = np.min(d) + + # calculate texture_threshold + texture_threshold = hvs_weight + noise_level_previous_red + + # texture degree analyzer + if (d_max <= threshold_red_blue): + texture_degree = 1. + elif ((d_max > threshold_red_blue) and (d_max <= texture_threshold)): + texture_degree = -((d_max - threshold_red_blue) / (texture_threshold - threshold_red_blue)) + 1. + elif (d_max > texture_threshold): + texture_degree = 0. + + # noise level estimator update + noise_level_current_red = texture_degree * d_max + (1 - texture_degree) * noise_level_previous_red + + # Blue + elif (((i % 2) != 0) and ((j % 2) != 0)): + + # get neighborhood + neighborhood = [raw[i-2, j-2], raw[i-2, j], raw[i-2, j+2],\ + raw[i, j-2], raw[i, j+2],\ + raw[i+2, j-2], raw[i+2, j], raw[i+2, j+2]] + + # absolute difference from the center pixel + d = np.abs(neighborhood - center_pixel) + + # maximum and minimum difference + d_max = np.max(d) + d_min = np.min(d) + + # calculate texture_threshold + texture_threshold = hvs_weight + noise_level_previous_blue + + # texture degree analyzer + if (d_max <= threshold_red_blue): + texture_degree = 1. + elif ((d_max > threshold_red_blue) and (d_max <= texture_threshold)): + texture_degree = -((d_max - threshold_red_blue) / (texture_threshold - threshold_red_blue)) + 1. + elif (d_max > texture_threshold): + texture_degree = 0. + + # noise level estimator update + noise_level_current_blue = texture_degree * d_max + (1 - texture_degree) * noise_level_previous_blue + + # Green + elif ((((i % 2) == 0) and ((j % 2) != 0)) or (((i % 2) != 0) and ((j % 2) == 0))): + + neighborhood = [raw[i-2, j-2], raw[i-2, j], raw[i-2, j+2],\ + raw[i-1, j-1], raw[i-1, j+1],\ + raw[i, j-2], raw[i, j+2],\ + raw[i+1, j-1], raw[i+1, j+1],\ + raw[i+2, j-2], raw[i+2, j], raw[i+2, j+2]] + + # difference from the center pixel + d = np.abs(neighborhood - center_pixel) + + # maximum and minimum difference + d_max = np.max(d) + d_min = np.min(d) + + # calculate texture_threshold + texture_threshold = hvs_weight + noise_level_previous_green + + # texture degree analyzer + if (d_max == 0): + texture_degree = 1 + elif ((d_max > 0) and (d_max <= texture_threshold)): + texture_degree = -(d_max / texture_threshold) + 1. + elif (d_max > texture_threshold): + texture_degree = 0 + + # noise level estimator update + noise_level_current_green = texture_degree * d_max + (1 - texture_degree) * noise_level_previous_green + + # similarity threshold calculation + if (texture_degree == 1): + threshold_low = threshold_high = d_max + elif (texture_degree == 0): + threshold_low = d_min + threshold_high = (d_max + d_min) / 2 + elif ((texture_degree > 0) and (texture_degree < 1)): + threshold_high = (d_max + ((d_max + d_min) / 2)) / 2 + threshold_low = (d_min + threshold_high) / 2 + + # weight computation + weight = np.empty(np.size(d), dtype=np.float32) + pf = 0. + for w_i in range(0, np.size(d)): + if (d[w_i] <= threshold_low): + weight[w_i] = 1. + elif (d[w_i] > threshold_high): + weight[w_i] = 0. + elif ((d[w_i] > threshold_low) and (d[w_i] < threshold_high)): + weight[w_i] = 1. + ((d[w_i] - threshold_low) / (threshold_low - threshold_high)) + + pf += weight[w_i] * neighborhood[w_i] + (1. - weight[w_i]) * center_pixel + + denoised_out[i - no_of_pixel_pad, j-no_of_pixel_pad] = pf / np.size(d) + # texture_degree_debug is a debug output + texture_degree_debug[i - no_of_pixel_pad, j-no_of_pixel_pad] = texture_degree + + if (bayer_pattern != "rggb"): + denoised_out = utility.shuffle_bayer_pattern(denoised_out, "rggb", bayer_pattern) + + return np.clip(denoised_out, clip_range[0], clip_range[1]), texture_degree_debug + + def __str__(self): + return self.name + + +# ============================================================= +# class: color_correction +# Correct the color in linaer domain +# ============================================================= +class color_correction: + def __init__(self, data, color_matrix, color_space="srgb", illuminant="d65", name="color correction", clip_range=[0, 65535]): + # Inputs: + # data: linear rgb image before nonlinearity/gamma + # xyz2cam: 3x3 matrix found from the camera metedata, specifically + # color matrix 2 from the metadata + # color_space: output color space + # illuminance: the illuminant of the lighting condition + # name: name of the class + self.data = np.float32(data) + self.xyz2cam = np.float32(color_matrix) + self.color_space = color_space + self.illuminant = illuminant + self.name = name + self.clip_range = clip_range + + def get_rgb2xyz(self): + # Objective: get the rgb2xyz matrix dependin on the output color space + # and the illuminant + # Source: http://www.brucelindbloom.com/index.html?Eqn_RGB_XYZ_Matrix.html + if (self.color_space == "srgb"): + if (self.illuminant == "d65"): + return [[.4124564, .3575761, .1804375],\ + [.2126729, .7151522, .0721750],\ + [.0193339, .1191920, .9503041]] + elif (self.illuminant == "d50"): + return [[.4360747, .3850649, .1430804],\ + [.2225045, .7168786, .0606169],\ + [.0139322, .0971045, .7141733]] + else: + print("for now, color_space must be d65 or d50") + return + + elif (self.color_space == "adobe-rgb-1998"): + if (self.illuminant == "d65"): + return [[.5767309, .1855540, .1881852],\ + [.2973769, .6273491, .0752741],\ + [.0270343, .0706872, .9911085]] + elif (self.illuminant == "d50"): + return [[.6097559, .2052401, .1492240],\ + [.3111242, .6256560, .0632197],\ + [.0194811, .0608902, .7448387]] + else: + print("for now, illuminant must be d65 or d50") + return + else: + print("for now, color_space must be srgb or adobe-rgb-1998") + return + + def calculate_cam2rgb(self): + # Objective: Calculates the color correction matrix + + # matric multiplication + rgb2cam = np.dot(self.xyz2cam, self.get_rgb2xyz()) + + # make sum of each row to be 1.0, necessary to preserve white balance + # basically divice each value by its row wise sum + rgb2cam = np.divide(rgb2cam, np.reshape(np.sum(rgb2cam, 1), [3, 1])) + + # - inverse the matrix to get cam2rgb. + # - cam2rgb should also have the characteristic that sum of each row + # equal to 1.0 to preserve white balance + # - check if rgb2cam is invertible by checking the condition of + # rgb2cam. If rgb2cam is singular it will give a warning and + # return an identiry matrix + if (np.linalg.cond(rgb2cam) < (1 / sys.float_info.epsilon)): + return np.linalg.inv(rgb2cam) # this is cam2rgb / color correction matrix + else: + print("Warning! matrix not invertible.") + return np.identity(3, dtype=np.float32) + + def apply_cmatrix(self): + # Objective: Apply the color correction matrix (cam2rgb) + + print("----------------------------------------------------") + print("running color correction...") + + # check if data is 3 dimensional + if (np.ndim(self.data) != 3): + print("data need to be three dimensional") + return + + # get the color correction matrix + cam2rgb = self.calculate_cam2rgb() + + # get width and height + width, height = utility.helpers(self.data).get_width_height() + + # apply the matrix + R = self.data[:, :, 0] + G = self.data[:, :, 1] + B = self.data[:, :, 2] + + color_corrected = np.empty((height, width, 3), dtype=np.float32) + color_corrected[:, :, 0] = R * cam2rgb[0, 0] + G * cam2rgb[0, 1] + B * cam2rgb[0, 2] + color_corrected[:, :, 1] = R * cam2rgb[1, 0] + G * cam2rgb[1, 1] + B * cam2rgb[1, 2] + color_corrected[:, :, 2] = R * cam2rgb[2, 0] + G * cam2rgb[2, 1] + B * cam2rgb[2, 2] + + return np.clip(color_corrected, self.clip_range[0], self.clip_range[1]) + + def __str__(self): + return self.name + + +# ============================================================= +# class: nonlinearity +# apply gamma or degamma +# ============================================================= +class nonlinearity: + def __init__(self, data, name="nonlinearity"): + self.data = np.float32(data) + self.name = name + + def luma_adjustment(self, multiplier, clip_range=[0, 65535]): + # The multiplier is applied only on luma channel + # by a multipler in log10 scale: + # multipler 10 means multiplied by 1. + # multipler 100 means multiplied by 2. as such + + print("----------------------------------------------------") + print("Running brightening...") + + return np.clip(np.log10(multiplier) * self.data, clip_range[0], clip_range[1]) + + def by_value(self, value, clip_range): + + print("----------------------------------------------------") + print("Running nonlinearity by value...") + + # clip within the range + data = np.clip(self.data, clip_range[0], clip_range[1]) + # make 0 to 1 + data = data / clip_range[1] + # apply nonlinearity + return np.clip(clip_range[1] * (data**value), clip_range[0], clip_range[1]) + + def by_table(self, table, nonlinearity_type="gamma", clip_range=[0, 65535]): + + print("----------------------------------------------------") + print("Running nonlinearity by table...") + + gamma_table = np.loadtxt(table) + gamma_table = clip_range[1] * gamma_table / np.max(gamma_table) + linear_table = np.linspace(clip_range[0], clip_range[1], np.size(gamma_table)) + + # linear interpolation, query is the self.data + if (nonlinearity_type == "gamma"): + # mapping is from linear_table to gamma_table + return np.clip(np.interp(self.data, linear_table, gamma_table), clip_range[0], clip_range[1]) + elif (nonlinearity_type == "degamma"): + # mapping is from gamma_table to linear_table + return np.clip(np.interp(self.data, gamma_table, linear_table), clip_range[0], clip_range[1]) + + def by_equation(self, a, b, clip_range): + + print("----------------------------------------------------") + print("Running nonlinearity by equation...") + + # clip within the range + data = np.clip(self.data, clip_range[0], clip_range[1]) + # make 0 to 1 + data = data / clip_range[1] + + # apply nonlinearity + return np.clip(clip_range[1] * (a * np.exp(b * data) + data + a * data - a * np.exp(b) * data - a), clip_range[0], clip_range[1]) + + def __str__(self): + return self.name + + +# ============================================================= +# class: tone_mapping +# improve the overall tone of the image +# ============================================================= +class tone_mapping: + def __init__(self, data, name="tone mapping"): + self.data = np.float32(data) + self.name = name + + def nonlinear_masking(self, strength_multiplier=1.0, gaussian_kernel_size=[5, 5], gaussian_sigma=1.0, clip_range=[0, 65535]): + # Objective: improves the overall tone of the image + # Inputs: + # strength_multiplier: >0. The higher the more aggressing tone mapping + # gaussian_kernel_size: kernel size for calculating the mask image + # gaussian_sigma: spread of the gaussian kernel for calculating the + # mask image + # + # Source: + # N. Moroney, “Local color correction using non-linear masking”, + # Proc. IS&T/SID 8th Color Imaging Conference, pp. 108-111, (2000) + # + # Note, Slight changes is carried by mushfiqul alam, specifically + # introducing the strength_multiplier + + print("----------------------------------------------------") + print("Running tone mapping by non linear masking...") + + # convert to gray image + if (np.ndim(self.data) == 3): + gray_image = utility.color_conversion(self.data).rgb2gray() + else: + gray_image = self.data + + # gaussian blur the gray image + gaussian_kernel = utility.create_filter().gaussian(gaussian_kernel_size, gaussian_sigma) + + # the mask image: (1) blur + # (2) bring within range 0 to 1 + # (3) multiply with strength_multiplier + mask = signal.convolve2d(gray_image, gaussian_kernel, mode="same", boundary="symm") + mask = strength_multiplier * mask / clip_range[1] + + # calculate the alpha image + temp = np.power(0.5, mask) + if (np.ndim(self.data) == 3): + width, height = utility.helpers(self.data).get_width_height() + alpha = np.empty((height, width, 3), dtype=np.float32) + alpha[:, :, 0] = temp + alpha[:, :, 1] = temp + alpha[:, :, 2] = temp + else: + alpha = temp + + # output + return np.clip(clip_range[1] * np.power(self.data/clip_range[1], alpha), clip_range[0], clip_range[1]) + + def dynamic_range_compression(self, drc_type="normal", drc_bound=[-40., 260.], clip_range=[0, 65535]): + + ycc = utility.color_conversion(self.data).rgb2ycc("bt601") + y = ycc[:, :, 0] + cb = ycc[:, :, 1] + cr = ycc[:, :, 2] + + if (drc_type == "normal"): + edge = y + elif (drc_type == "joint"): + edge = utility.edge_detection(y).sobel(3, "gradient_magnitude") + + y_bilateral_filtered = utility.special_function(y).bilateral_filter(edge) + detail = np.divide(ycc[:, :, 0], y_bilateral_filtered) + + C = drc_bound[0] * clip_range[1] / 255. + temp = drc_bound[1] * clip_range[1] / 255. + F = (temp * (C + clip_range[1])) / (clip_range[1] * (temp - C)) + y_bilateral_filtered_contrast_reduced = F * (y_bilateral_filtered - (clip_range[1] / 2.)) + (clip_range[1] / 2.) + + y_out = np.multiply(y_bilateral_filtered_contrast_reduced, detail) + + ycc_out = ycc + ycc_out[:, :, 0] = y_out + rgb_out = utility.color_conversion(ycc_out).ycc2rgb("bt601") + + return np.clip(rgb_out, clip_range[0], clip_range[1]) + + +# ============================================================= +# class: sharpening +# sharpens the image +# ============================================================= +class sharpening: + def __init__(self, data, name="sharpening"): + self.data = np.float32(data) + self.name = name + + def unsharp_masking(self, gaussian_kernel_size=[5, 5], gaussian_sigma=2.0,\ + slope=1.5, tau_threshold=0.05, gamma_speed=4., clip_range=[0, 65535]): + # Objective: sharpen image + # Input: + # gaussian_kernel_size: dimension of the gaussian blur filter kernel + # + # gaussian_sigma: spread of the gaussian blur filter kernel + # bigger sigma more sharpening + # + # slope: controls the boost. + # the amount of sharpening, higher slope + # means more aggresssive sharpening + # + # tau_threshold: controls the amount of coring. + # threshold value till which the image is + # not sharpened. The lower the value of + # tau_threshold the more frequencies + # goes through the sharpening process + # + # gamma_speed: controls the speed of convergence to the slope + # smaller value gives a little bit more + # sharpened image, this may be a fine tuner + + print("----------------------------------------------------") + print("Running sharpening by unsharp masking...") + + # create gaussian kernel + gaussian_kernel = utility.create_filter().gaussian(gaussian_kernel_size, gaussian_sigma) + + # convolove the image with the gaussian kernel + # first input is the image + # second input is the kernel + # output shape will be the same as the first input + # boundary will be padded by using symmetrical method while convolving + if np.ndim(self.data > 2): + image_blur = np.empty(np.shape(self.data), dtype=np.float32) + for i in range(0, np.shape(self.data)[2]): + image_blur[:, :, i] = signal.convolve2d(self.data[:, :, i], gaussian_kernel, mode="same", boundary="symm") + else: + image_blur = signal.convolove2d(self.data, gaussian_kernel, mode="same", boundary="symm") + + # the high frequency component image + image_high_pass = self.data - image_blur + + # soft coring (see in utility) + # basically pass the high pass image via a slightly nonlinear function + tau_threshold = tau_threshold * clip_range[1] + + # add the soft cored high pass image to the original and clip + # within range and return + return np.clip(self.data + utility.special_function(\ + image_high_pass).soft_coring(\ + slope, tau_threshold, gamma_speed), clip_range[0], clip_range[1]) + + def __str__(self): + return self.name + + +# ============================================================= +# class: noise_reduction +# reduce noise of the nonlinear image (after gamma) +# ============================================================= +class noise_reduction: + def __init__(self, data, clip_range=[0, 65535], name="noise reduction"): + self.data = np.float32(data) + self.clip_range = clip_range + self.name = name + + def sigma_filter(self, neighborhood_size=7, sigma=[6, 6, 6]): + + print("----------------------------------------------------") + print("Running noise reduction by sigma filter...") + + if np.ndim(self.data > 2): # if rgb image + output = np.empty(np.shape(self.data), dtype=np.float32) + for i in range(0, np.shape(self.data)[2]): + output[:, :, i] = utility.helpers(self.data[:, :, i]).sigma_filter_helper(neighborhood_size, sigma[i]) + return np.clip(output, self.clip_range[0], self.clip_range[1]) + else: # gray image + return np.clip(utility.helpers(self.data).sigma_filter_helper(neighborhood_size, sigma), self.clip_range[0], self.clip_range[1]) + + def __str__(self): + return self.name + + +# ============================================================= +# class: distortion_correction +# correct the distortion +# ============================================================= +class distortion_correction: + def __init__(self, data, name="distortion correction"): + self.data = np.float32(data) + self.name = name + + + def empirical_correction(self, correction_type="pincushion-1", strength=0.1, zoom_type="crop", clip_range=[0, 65535]): + #------------------------------------------------------ + # Objective: + # correct geometric distortion with the assumption that the distortion + # is symmetric and the center is at the center of of the image + # Input: + # correction_type: which type of correction needed to be carried + # out, choose one the four: + # pincushion-1, pincushion-2, barrel-1, barrel-2 + # 1 and 2 are difference between the power + # over the radius + # + # strength: should be equal or greater than 0. + # 0 means no correction will be done. + # if negative value were applied correction_type + # will be reversed. Thus,>=0 value expected. + # + # zoom_type: either "fit" or "crop" + # fit will return image with full content + # in the whole area + # crop will return image will 0 values outsise + # the border + # + # clip_range: to clip the final image within the range + #------------------------------------------------------ + + if (strength < 0): + print("Warning! strength should be equal of greater than 0.") + return self.data + + print("----------------------------------------------------") + print("Running distortion correction by empirical method...") + + # get half_width and half_height, assume this is the center + width, height = utility.helpers(self.data).get_width_height() + half_width = width / 2 + half_height = height / 2 + + # create a meshgrid of points + xi, yi = np.meshgrid(np.linspace(-half_width, half_width, width),\ + np.linspace(-half_height, half_height, height)) + + # cartesian to polar coordinate + r = np.sqrt(xi**2 + yi**2) + theta = np.arctan2(yi, xi) + + # maximum radius + R = math.sqrt(width**2 + height**2) + + # make r within range 0~1 + r = r / R + + # apply the radius to the desired transformation + s = utility.special_function(r).distortion_function(correction_type, strength) + + # select a scaling_parameter based on zoon_type and k value + if ((correction_type=="barrel-1") or (correction_type=="barrel-2")): + if (zoom_type == "fit"): + scaling_parameter = r[0, 0] / s[0, 0] + elif (zoom_type == "crop"): + scaling_parameter = 1. / (1. + strength * (np.min([half_width, half_height])/R)**2) + elif ((correction_type=="pincushion-1") or (correction_type=="pincushion-2")): + if (zoom_type == "fit"): + scaling_parameter = 1. / (1. + strength * (np.min([half_width, half_height])/R)**2) + elif (zoom_type == "crop"): + scaling_parameter = r[0, 0] / s[0, 0] + + # multiply by scaling_parameter and un-normalize + s = s * scaling_parameter * R + + # convert back to cartesian coordinate and add back the center coordinate + xt = np.multiply(s, np.cos(theta)) + yt = np.multiply(s, np.sin(theta)) + + # interpolation + if np.ndim(self.data == 3): + + output = np.empty(np.shape(self.data), dtype=np.float32) + + output[:, :, 0] = utility.helpers(self.data[:, :, 0]).bilinear_interpolation(xt + half_width, yt + half_height) + output[:, :, 1] = utility.helpers(self.data[:, :, 1]).bilinear_interpolation(xt + half_width, yt + half_height) + output[:, :, 2] = utility.helpers(self.data[:, :, 2]).bilinear_interpolation(xt + half_width, yt + half_height) + + elif np.ndim(self.data == 2): + + output = utility.helpers(self.data).bilinear_interpolation(xt + half_width, yt + half_height) + + return np.clip(output, clip_range[0], clip_range[1]) + + + def __str__(self): + return self.name + + +# ============================================================= +# class: memory_color_enhancement +# enhance memory colors such as sky, grass, skin color +# ============================================================= +class memory_color_enhancement: + def __init__(self, data, name="memory color enhancement"): + self.data = np.float32(data) + self.name = name + + def by_hue_squeeze(self, target_hue, hue_preference, hue_sigma, is_both_side, multiplier, chroma_preference, chroma_sigma, color_space="srgb", illuminant="d65", clip_range=[0, 65535], cie_version="1931"): + + # RGB to xyz + data = utility.color_conversion(self.data).rgb2xyz(color_space, clip_range) + # xyz to lab + data = utility.color_conversion(data).xyz2lab(cie_version, illuminant) + # lab to lch + data = utility.color_conversion(data).lab2lch() + + # hue squeezing + # we are traversing through different color preferences + width, height = utility.helpers(self.data).get_width_height() + hue_correction = np.zeros((height, width), dtype=np.float32) + for i in range(0, np.size(target_hue)): + + delta_hue = data[:, :, 2] - hue_preference[i] + + if is_both_side[i]: + weight_temp = np.exp( -np.power(data[:, :, 2] - target_hue[i], 2) / (2 * hue_sigma[i]**2)) + \ + np.exp( -np.power(data[:, :, 2] + target_hue[i], 2) / (2 * hue_sigma[i]**2)) + else: + weight_temp = np.exp( -np.power(data[:, :, 2] - target_hue[i], 2) / (2 * hue_sigma[i]**2)) + + weight_hue = multiplier[i] * weight_temp / np.max(weight_temp) + + weight_chroma = np.exp( -np.power(data[:, :, 1] - chroma_preference[i], 2) / (2 * chroma_sigma[i]**2)) + + hue_correction = hue_correction + np.multiply(np.multiply(delta_hue, weight_hue), weight_chroma) + + # correct the hue + data[:, :, 2] = data[:, :, 2] - hue_correction + + # lch to lab + data = utility.color_conversion(data).lch2lab() + # lab to xyz + data = utility.color_conversion(data).lab2xyz(cie_version, illuminant) + # xyz to rgb + data = utility.color_conversion(data).xyz2rgb(color_space, clip_range) + + return data + + + def __str__(self): + return self.name + + +# ============================================================= +# class: chromatic_aberration_correction +# removes artifacts similar to result from chromatic +# aberration +# ============================================================= +class chromatic_aberration_correction: + def __init__(self, data, name="chromatic aberration correction"): + self.data = np.float32(data) + self.name = name + + def purple_fringe_removal(self, nsr_threshold, cr_threshold, clip_range=[0, 65535]): + # -------------------------------------------------------------- + # nsr_threshold: near saturated region threshold (in percentage) + # cr_threshold: candidate region threshold + # -------------------------------------------------------------- + + width, height = utility.helpers(self.data).get_width_height() + + r = self.data[:, :, 0] + g = self.data[:, :, 1] + b = self.data[:, :, 2] + + ## Detection of purple fringe + # near saturated region detection + nsr_threshold = clip_range[1] * nsr_threshold / 100 + temp = (r + g + b) / 3 + temp = np.asarray(temp) + mask = temp > nsr_threshold + nsr = np.zeros((height, width)).astype(int) + nsr[mask] = 1 + + # candidate region detection + temp = r - b + temp1 = b - g + temp = np.asarray(temp) + temp1 = np.asarray(temp1) + mask = (temp < cr_threshold) & (temp1 > cr_threshold) + cr = np.zeros((height, width)).astype(int) + cr[mask] = 1 + + # quantization + qr = utility.helpers(r).nonuniform_quantization() + qg = utility.helpers(g).nonuniform_quantization() + qb = utility.helpers(b).nonuniform_quantization() + + g_qr = utility.edge_detection(qr).sobel(5, "gradient_magnitude") + g_qg = utility.edge_detection(qg).sobel(5, "gradient_magnitude") + g_qb = utility.edge_detection(qb).sobel(5, "gradient_magnitude") + + g_qr = np.asarray(g_qr) + g_qg = np.asarray(g_qg) + g_qb = np.asarray(g_qb) + + # bgm: binary gradient magnitude + bgm = np.zeros((height, width), dtype=np.float32) + mask = (g_qr != 0) | (g_qg != 0) | (g_qb != 0) + bgm[mask] = 1 + + fringe_map = np.multiply(np.multiply(nsr, cr), bgm) + fring_map = np.asarray(fringe_map) + mask = (fringe_map == 1) + + r1 = r + g1 = g + b1 = b + r1[mask] = g1[mask] = b1[mask] = (r[mask] + g[mask] + b[mask]) / 3. + + output = np.empty(np.shape(self.data), dtype=np.float32) + output[:, :, 0] = r1 + output[:, :, 1] = g1 + output[:, :, 2] = b1 + + return np.float32(output) + + + def __str__(self): + return self.name diff --git a/IIR-Lab/ISP_pipeline/lsc_table_r_gr_gb_b_2.npy b/IIR-Lab/ISP_pipeline/lsc_table_r_gr_gb_b_2.npy new file mode 100644 index 0000000000000000000000000000000000000000..4e644a21d2d7b4fc4f86854504df101082d4e684 --- /dev/null +++ b/IIR-Lab/ISP_pipeline/lsc_table_r_gr_gb_b_2.npy @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:0932e4b3ed5feb111880988eadae26a3cc77a5bd448150087b0983d8a62bb2b7 +size 402653312 diff --git a/IIR-Lab/ISP_pipeline/process_pngs_isp.py b/IIR-Lab/ISP_pipeline/process_pngs_isp.py new file mode 100644 index 0000000000000000000000000000000000000000..23ddd1bb3e041564c5324d8c9065e9f393e7d52b --- /dev/null +++ b/IIR-Lab/ISP_pipeline/process_pngs_isp.py @@ -0,0 +1,276 @@ +import sys +sys.path.append('ISP_pipeline') +from raw_prc_pipeline.pipeline import RawProcessingPipelineDemo +import cv2 +import numpy as np +import json +import PIL.Image as Image +import os,sys +from raw_prc_pipeline import io +from copy import deepcopy +import torch + +def resize_using_pil(img, width=1024, height=768): + img_pil = Image.fromarray(img) + out_size = (width, height) + if img_pil.size == out_size: + return img + out_img = img_pil.resize(out_size, Image.LANCZOS) + # out_img = img_pil + out_img = np.array(out_img) + return out_img + +def fix_orientation(image, orientation): + + if type(orientation) is list: + orientation = orientation[0] + + if orientation == 'Horizontal(normal)': + pass + elif orientation == "Mirror horizontal": + image = cv2.flip(image, 0) + elif orientation == "Rotate 180": + image = cv2.rotate(image, cv2.ROTATE_180) + elif orientation == "Mirror vertical": + image = cv2.flip(image, 1) + elif orientation == "Mirror horizontal and rotate 270 CW": + image = cv2.flip(image, 0) + image = cv2.rotate(image, cv2.ROTATE_90_COUNTERCLOCKWISE) + elif orientation == "Rotate 90 CW": + image = cv2.rotate(image, cv2.ROTATE_90_CLOCKWISE) + elif orientation == "Mirror horizontal and rotate 90 CW": + image = cv2.flip(image, 0) + image = cv2.rotate(image, cv2.ROTATE_90_CLOCKWISE) + elif orientation == "Rotate 270 CW": + image = cv2.rotate(image, cv2.ROTATE_90_COUNTERCLOCKWISE) + + return image + +def isp_night_imaging(data, meta_data, iso, + do_demosaic = True, # H/2 W/2 + + do_channel_gain_white_balance = True, + do_xyz_transform = True, + do_srgb_transform = True, + + do_gamma_correct = True, # con + + do_refinement = True, # 32 bit + do_to_uint8 = True, + + do_resize_using_pil = True, # H/8, W/8 + do_fix_orientation = True + ): + + pipeline_params = { + 'tone_mapping': 'Flash', # options: Flash, Storm, Base, Linear, Drago, Mantiuk, Reinhard + 'illumination_estimation': 'gw', # ie algorithm, options: "gw", "wp", "sog", "iwp" + 'denoise_flg': True, + 'out_landscape_width': 1024, + 'out_landscape_height': 768, + "color_matrix": [ 1.06835938, -0.29882812, -0.14257812, + -0.43164062, 1.35546875, 0.05078125, + -0.1015625, 0.24414062, 0.5859375] + } + + pipeline_demo = RawProcessingPipelineDemo(**pipeline_params) + + # =================================== + # Demosacing + # =================================== + if do_demosaic: + data = torch.stack((data[0,:,:], (data[1,:,:]+data[2,:,:])/2, data[3,:,:]), dim=0) + data = data.permute(1, 2, 0).contiguous() + # torch.cuda.empty_cache() + else: + pass + + # =================================== + # Channel gain for white balance + # =================================== + if do_channel_gain_white_balance: + data = pipeline_demo.white_balance(data, img_meta=meta_data) + + else: + pass + + # =================================== + # xyz_transform + # =================================== + if do_xyz_transform: + data = pipeline_demo.xyz_transform(data,img_meta=meta_data) # CCM + else: + pass + + # =================================== + # srgb_transform + # =================================== + if do_srgb_transform: + data = pipeline_demo.srgb_transform(data, img_meta=meta_data) # fix ccm + else: + pass + + # =================================== + # gamma_correct + # =================================== + if do_gamma_correct: + data = pipeline_demo.gamma_correct(data, img_meta=meta_data) + else: + pass + + # =================================== + # refinement + # =================================== + if do_refinement: + if iso < 1000: + pth1 = "Rendering_models/low_iso.pth" + data = pipeline_demo.do_refinement(data, "csrnet", pth1) + else: + pth1 = "Rendering_models/high_iso.pth" + data = pipeline_demo.do_refinement(data, "csrnet", pth1) + torch.cuda.empty_cache() + + else: + pass + + # =================================== + # to_uint8 + # =================================== + if do_to_uint8: + data = pipeline_demo.to_uint8(data, img_meta=meta_data) + torch.cuda.empty_cache() + else: + pass + + # =================================== + # resize_using_pil + # =================================== + if do_resize_using_pil: + data = resize_using_pil(data, pipeline_demo.params["out_landscape_width"], pipeline_demo.params["out_landscape_height"]) + + else: + pass + + # =================================== + # fix_orientation + # =================================== + if do_fix_orientation: + data = fix_orientation(data, meta_data["orientation"]) + else: + pass + + return data + +def readjson(json_path,): + with open(json_path,'r',encoding='UTF-8') as f: + result = json.load(f) + + return result + +def get_smooth_kernel_size(factor): + if factor == 1: + return (5, 5) + elif factor == 0.5: + return (3, 3) + elif factor == 0.375: + return (3, 3) + elif factor in [0.2, 0.25]: + return (5, 5) + elif factor == 0.125: + return (7, 7) + else: + raise Exception('Unknown factor') + +def read_rawpng(path, metadata): + + raw = cv2.imread(str(path), cv2.IMREAD_UNCHANGED) + + if raw.shape[0] == 4: + return raw * 959 + raw = (raw.astype(np.float32) - 256.) / (4095.- 256.) + + raw = bayer2raw(raw, metadata) + raw = np.clip(raw, 0., 1.) + return raw + +def bayer2raw(raw, metadata): + # pack RGGB Bayer raw to 4 channels + H, W = raw.shape + raw = raw[None, ...] + if metadata['cfa_pattern'][0] == 0: + # RGGB + raw_pack = np.concatenate((raw[:, 0:H:2, 0:W:2], + raw[:, 0:H:2, 1:W:2], + raw[:, 1:H:2, 0:W:2], + raw[:, 1:H:2, 1:W:2]), axis=0) + else : + # BGGR + raw_pack = np.concatenate((raw[:, 1:H:2, 1:W:2], + raw[:, 0:H:2, 1:W:2], + raw[:, 1:H:2, 0:W:2], + raw[:, 0:H:2, 0:W:2]), axis=0) + return raw_pack + +def raw_rggb_float32(raws): + # depack 4 channels raw to RGGB Bayer + C, H, W = raws.shape + output = np.zeros((H * 2, W * 2)).astype(np.float32) + + output[0:2 * H:2, 0:2 * W:2] = raws[0:1, :, :] + output[0:2 * H:2, 1:2 * W:2] = raws[1:2, :, :] + output[1:2 * H:2, 0:2 * W:2] = raws[2:3, :, :] + output[1:2 * H:2, 1:2 * W:2] = raws[3:4, :, :] + + return output + +def json_read(pth): + with open(pth) as j: + data = json.load(j) + return data + +def linear_insert_1color(img_dt, resize, fx=128, fy=128): + pos_0_0, pos_0_1, pos_1_1, pos_1_0, m, n = insert_linear_pos(img_dt=img_dt, resize=resize, x_scale=fx, y_scale=fy) + a = (pos_1_0 - pos_0_0) + b = (pos_0_1 - pos_0_0) + c = pos_1_1 + pos_0_0 - pos_1_0 - pos_0_1 + return np.round(a * n + b * m + c * n * m + pos_0_0).astype(int) + +def insert_linear_pos(img_dt, resize, x_scale=128, y_scale=128): + m_, n_ = img_dt.shape + # 获取新的图像的大小 + if resize is None: + n_new, m_new = np.round(x_scale * n_).astype(int), np.round(y_scale * m_).astype(int) + else: + n_new, m_new = resize + + n_scale, m_scale = n_ / n_new, m_ / m_new # src_with/dst_with, Src_height/dst_heaight + # 一、获取位置对应的四个点 + # 1-1- 初始化位置 + m_indxs = np.repeat(np.arange(m_new), n_new).reshape(m_new, n_new) + n_indxs = np.array(list(range(n_new))*m_new).reshape(m_new, n_new) + # 1-2- 初始化位置 + m_indxs_c = (m_indxs + 0.5 ) * m_scale - 0.5 + n_indxs_c = (n_indxs + 0.5 ) * n_scale - 0.5 + ### 将小于零的数处理成0 + m_indxs_c[np.where(m_indxs_c < 0)] = 0.0 + n_indxs_c[np.where(n_indxs_c < 0)] = 0.0 + + # 1-3 获取正方形顶点坐标 + m_indxs_c_down = m_indxs_c.astype(int) + n_indxs_c_down = n_indxs_c.astype(int) + m_indxs_c_up = m_indxs_c_down + 1 + n_indxs_c_up = n_indxs_c_down + 1 + ### 溢出部分修正 + m_max = m_ - 1 + n_max = n_ - 1 + m_indxs_c_up[np.where(m_indxs_c_up > m_max)] = m_max + n_indxs_c_up[np.where(n_indxs_c_up > n_max)] = n_max + + # 1-4 获取正方形四个顶点的位置 + pos_0_0 = img_dt[m_indxs_c_down, n_indxs_c_down].astype(int) + pos_0_1 = img_dt[m_indxs_c_up, n_indxs_c_down].astype(int) + pos_1_1 = img_dt[m_indxs_c_up, n_indxs_c_up].astype(int) + pos_1_0 = img_dt[m_indxs_c_down, n_indxs_c_up].astype(int) + # 1-5 获取浮点位置 + m, n = np.modf(m_indxs_c)[0], np.modf(n_indxs_c)[0] + return pos_0_0, pos_0_1, pos_1_1, pos_1_0, m, n diff --git a/IIR-Lab/ISP_pipeline/raw_prc_pipeline/__init__.py b/IIR-Lab/ISP_pipeline/raw_prc_pipeline/__init__.py new file mode 100644 index 0000000000000000000000000000000000000000..145944fea100721d2745c2584ec7557df753950f --- /dev/null +++ b/IIR-Lab/ISP_pipeline/raw_prc_pipeline/__init__.py @@ -0,0 +1,3 @@ +expected_img_ext = '.jpg' +expected_landscape_img_height = 866 +expected_landscape_img_width = 1300 \ No newline at end of file diff --git a/IIR-Lab/ISP_pipeline/raw_prc_pipeline/__pycache__/__init__.cpython-312.pyc b/IIR-Lab/ISP_pipeline/raw_prc_pipeline/__pycache__/__init__.cpython-312.pyc new file mode 100644 index 0000000000000000000000000000000000000000..fbe18bd8d1fd89d1d68b3481740d41ed88a008f8 Binary files /dev/null and b/IIR-Lab/ISP_pipeline/raw_prc_pipeline/__pycache__/__init__.cpython-312.pyc differ diff --git a/IIR-Lab/ISP_pipeline/raw_prc_pipeline/__pycache__/__init__.cpython-39.pyc b/IIR-Lab/ISP_pipeline/raw_prc_pipeline/__pycache__/__init__.cpython-39.pyc new file mode 100644 index 0000000000000000000000000000000000000000..5c2191acdb9f99b93c69cece093f852919acc5e8 Binary files /dev/null and b/IIR-Lab/ISP_pipeline/raw_prc_pipeline/__pycache__/__init__.cpython-39.pyc differ diff --git a/IIR-Lab/ISP_pipeline/raw_prc_pipeline/__pycache__/arch_util.cpython-312.pyc b/IIR-Lab/ISP_pipeline/raw_prc_pipeline/__pycache__/arch_util.cpython-312.pyc new file mode 100644 index 0000000000000000000000000000000000000000..5d982291defc518df78fa3a7cd7444336838d779 Binary files /dev/null and b/IIR-Lab/ISP_pipeline/raw_prc_pipeline/__pycache__/arch_util.cpython-312.pyc differ diff --git a/IIR-Lab/ISP_pipeline/raw_prc_pipeline/__pycache__/color.cpython-312.pyc b/IIR-Lab/ISP_pipeline/raw_prc_pipeline/__pycache__/color.cpython-312.pyc new file mode 100644 index 0000000000000000000000000000000000000000..7f6a873959f5780e4fe729c842c373af142011bf Binary files /dev/null and b/IIR-Lab/ISP_pipeline/raw_prc_pipeline/__pycache__/color.cpython-312.pyc differ diff --git a/IIR-Lab/ISP_pipeline/raw_prc_pipeline/__pycache__/color.cpython-39.pyc b/IIR-Lab/ISP_pipeline/raw_prc_pipeline/__pycache__/color.cpython-39.pyc new file mode 100644 index 0000000000000000000000000000000000000000..2c3a0741f0dfd7cc30b97afa41fe1c6bbce40eb9 Binary files /dev/null and b/IIR-Lab/ISP_pipeline/raw_prc_pipeline/__pycache__/color.cpython-39.pyc differ diff --git a/IIR-Lab/ISP_pipeline/raw_prc_pipeline/__pycache__/csrnet_network.cpython-312.pyc b/IIR-Lab/ISP_pipeline/raw_prc_pipeline/__pycache__/csrnet_network.cpython-312.pyc new file mode 100644 index 0000000000000000000000000000000000000000..15d4f02699b6a967b87e696c4c0f2e7b38c15a32 Binary files /dev/null and b/IIR-Lab/ISP_pipeline/raw_prc_pipeline/__pycache__/csrnet_network.cpython-312.pyc differ diff --git a/IIR-Lab/ISP_pipeline/raw_prc_pipeline/__pycache__/csrnet_network.cpython-39.pyc b/IIR-Lab/ISP_pipeline/raw_prc_pipeline/__pycache__/csrnet_network.cpython-39.pyc new file mode 100644 index 0000000000000000000000000000000000000000..bf60e0892388f6cd2a5c2dc06b9dc86381cab320 Binary files /dev/null and b/IIR-Lab/ISP_pipeline/raw_prc_pipeline/__pycache__/csrnet_network.cpython-39.pyc differ diff --git a/IIR-Lab/ISP_pipeline/raw_prc_pipeline/__pycache__/exif_data_formats.cpython-312.pyc b/IIR-Lab/ISP_pipeline/raw_prc_pipeline/__pycache__/exif_data_formats.cpython-312.pyc new file mode 100644 index 0000000000000000000000000000000000000000..68dd5004acdd82dd1a2e078ac4b2d22fb3742141 Binary files /dev/null and b/IIR-Lab/ISP_pipeline/raw_prc_pipeline/__pycache__/exif_data_formats.cpython-312.pyc differ diff --git a/IIR-Lab/ISP_pipeline/raw_prc_pipeline/__pycache__/exif_data_formats.cpython-39.pyc b/IIR-Lab/ISP_pipeline/raw_prc_pipeline/__pycache__/exif_data_formats.cpython-39.pyc new file mode 100644 index 0000000000000000000000000000000000000000..54b2d49f26a6ec079f98fa71971895cf480d4723 Binary files /dev/null and b/IIR-Lab/ISP_pipeline/raw_prc_pipeline/__pycache__/exif_data_formats.cpython-39.pyc differ diff --git a/IIR-Lab/ISP_pipeline/raw_prc_pipeline/__pycache__/exif_utils.cpython-312.pyc b/IIR-Lab/ISP_pipeline/raw_prc_pipeline/__pycache__/exif_utils.cpython-312.pyc new file mode 100644 index 0000000000000000000000000000000000000000..5a8112462b25275c6be2a2ad64f409453a41ab46 Binary files /dev/null and b/IIR-Lab/ISP_pipeline/raw_prc_pipeline/__pycache__/exif_utils.cpython-312.pyc differ diff --git a/IIR-Lab/ISP_pipeline/raw_prc_pipeline/__pycache__/exif_utils.cpython-39.pyc b/IIR-Lab/ISP_pipeline/raw_prc_pipeline/__pycache__/exif_utils.cpython-39.pyc new file mode 100644 index 0000000000000000000000000000000000000000..9628c90d86a561cc416d356a7a207a8a3aeb83ff Binary files /dev/null and b/IIR-Lab/ISP_pipeline/raw_prc_pipeline/__pycache__/exif_utils.cpython-39.pyc differ diff --git a/IIR-Lab/ISP_pipeline/raw_prc_pipeline/__pycache__/fs.cpython-312.pyc b/IIR-Lab/ISP_pipeline/raw_prc_pipeline/__pycache__/fs.cpython-312.pyc new file mode 100644 index 0000000000000000000000000000000000000000..a3c6975dce23c2cfc8f450e31129c8216d14ad2f Binary files /dev/null and b/IIR-Lab/ISP_pipeline/raw_prc_pipeline/__pycache__/fs.cpython-312.pyc differ diff --git a/IIR-Lab/ISP_pipeline/raw_prc_pipeline/__pycache__/fs.cpython-39.pyc b/IIR-Lab/ISP_pipeline/raw_prc_pipeline/__pycache__/fs.cpython-39.pyc new file mode 100644 index 0000000000000000000000000000000000000000..d48459227043bbeacec46c3efb1811cb61e5a020 Binary files /dev/null and b/IIR-Lab/ISP_pipeline/raw_prc_pipeline/__pycache__/fs.cpython-39.pyc differ diff --git a/IIR-Lab/ISP_pipeline/raw_prc_pipeline/__pycache__/io.cpython-312.pyc b/IIR-Lab/ISP_pipeline/raw_prc_pipeline/__pycache__/io.cpython-312.pyc new file mode 100644 index 0000000000000000000000000000000000000000..f8e338eee14957aad749860343ea75a5ceb1f708 Binary files /dev/null and b/IIR-Lab/ISP_pipeline/raw_prc_pipeline/__pycache__/io.cpython-312.pyc differ diff --git a/IIR-Lab/ISP_pipeline/raw_prc_pipeline/__pycache__/io.cpython-39.pyc b/IIR-Lab/ISP_pipeline/raw_prc_pipeline/__pycache__/io.cpython-39.pyc new file mode 100644 index 0000000000000000000000000000000000000000..cf3690caa1168b5531834ceb99eac2b30024df17 Binary files /dev/null and b/IIR-Lab/ISP_pipeline/raw_prc_pipeline/__pycache__/io.cpython-39.pyc differ diff --git a/IIR-Lab/ISP_pipeline/raw_prc_pipeline/__pycache__/lut_network.cpython-312.pyc b/IIR-Lab/ISP_pipeline/raw_prc_pipeline/__pycache__/lut_network.cpython-312.pyc new file mode 100644 index 0000000000000000000000000000000000000000..33b62ba43af43152c5c28551108a489598c24603 Binary files /dev/null and b/IIR-Lab/ISP_pipeline/raw_prc_pipeline/__pycache__/lut_network.cpython-312.pyc differ diff --git a/IIR-Lab/ISP_pipeline/raw_prc_pipeline/__pycache__/misc.cpython-312.pyc b/IIR-Lab/ISP_pipeline/raw_prc_pipeline/__pycache__/misc.cpython-312.pyc new file mode 100644 index 0000000000000000000000000000000000000000..6b6c965995ebd6f3197aceb83053df276da1148f Binary files /dev/null and b/IIR-Lab/ISP_pipeline/raw_prc_pipeline/__pycache__/misc.cpython-312.pyc differ diff --git a/IIR-Lab/ISP_pipeline/raw_prc_pipeline/__pycache__/misc.cpython-39.pyc b/IIR-Lab/ISP_pipeline/raw_prc_pipeline/__pycache__/misc.cpython-39.pyc new file mode 100644 index 0000000000000000000000000000000000000000..9c32161d29dbb5e15723bfea06f6008f76fc2e51 Binary files /dev/null and b/IIR-Lab/ISP_pipeline/raw_prc_pipeline/__pycache__/misc.cpython-39.pyc differ diff --git a/IIR-Lab/ISP_pipeline/raw_prc_pipeline/__pycache__/optim.cpython-312.pyc b/IIR-Lab/ISP_pipeline/raw_prc_pipeline/__pycache__/optim.cpython-312.pyc new file mode 100644 index 0000000000000000000000000000000000000000..d704966e579136719f38e6ff50b28470da8475ec Binary files /dev/null and b/IIR-Lab/ISP_pipeline/raw_prc_pipeline/__pycache__/optim.cpython-312.pyc differ diff --git a/IIR-Lab/ISP_pipeline/raw_prc_pipeline/__pycache__/optim.cpython-39.pyc b/IIR-Lab/ISP_pipeline/raw_prc_pipeline/__pycache__/optim.cpython-39.pyc new file mode 100644 index 0000000000000000000000000000000000000000..a01a80b1260b20527116b4dbdf60c01ff54e72d3 Binary files /dev/null and b/IIR-Lab/ISP_pipeline/raw_prc_pipeline/__pycache__/optim.cpython-39.pyc differ diff --git a/IIR-Lab/ISP_pipeline/raw_prc_pipeline/__pycache__/pipeline.cpython-312.pyc b/IIR-Lab/ISP_pipeline/raw_prc_pipeline/__pycache__/pipeline.cpython-312.pyc new file mode 100644 index 0000000000000000000000000000000000000000..247a3da50412ffad85a2d0f710b2fc27813e2686 Binary files /dev/null and b/IIR-Lab/ISP_pipeline/raw_prc_pipeline/__pycache__/pipeline.cpython-312.pyc differ diff --git a/IIR-Lab/ISP_pipeline/raw_prc_pipeline/__pycache__/pipeline.cpython-39.pyc b/IIR-Lab/ISP_pipeline/raw_prc_pipeline/__pycache__/pipeline.cpython-39.pyc new file mode 100644 index 0000000000000000000000000000000000000000..cbe8427e47b064bc745f2bdfa63403a5c2d0c8aa Binary files /dev/null and b/IIR-Lab/ISP_pipeline/raw_prc_pipeline/__pycache__/pipeline.cpython-39.pyc differ diff --git a/IIR-Lab/ISP_pipeline/raw_prc_pipeline/__pycache__/pipeline_bm3d.cpython-312.pyc b/IIR-Lab/ISP_pipeline/raw_prc_pipeline/__pycache__/pipeline_bm3d.cpython-312.pyc new file mode 100644 index 0000000000000000000000000000000000000000..16a24c409b9490abbdd09e398c979ed21bf96b1a Binary files /dev/null and b/IIR-Lab/ISP_pipeline/raw_prc_pipeline/__pycache__/pipeline_bm3d.cpython-312.pyc differ diff --git a/IIR-Lab/ISP_pipeline/raw_prc_pipeline/__pycache__/pipeline_bm3d.cpython-39.pyc b/IIR-Lab/ISP_pipeline/raw_prc_pipeline/__pycache__/pipeline_bm3d.cpython-39.pyc new file mode 100644 index 0000000000000000000000000000000000000000..a67dcc4c1bf1dd19c90e3f7065f81f4124ffb2b1 Binary files /dev/null and b/IIR-Lab/ISP_pipeline/raw_prc_pipeline/__pycache__/pipeline_bm3d.cpython-39.pyc differ diff --git a/IIR-Lab/ISP_pipeline/raw_prc_pipeline/__pycache__/pipeline_utils.cpython-312.pyc b/IIR-Lab/ISP_pipeline/raw_prc_pipeline/__pycache__/pipeline_utils.cpython-312.pyc new file mode 100644 index 0000000000000000000000000000000000000000..cb2273bd40e7f1ea4b87d32cdb3c3b849d9597fa Binary files /dev/null and b/IIR-Lab/ISP_pipeline/raw_prc_pipeline/__pycache__/pipeline_utils.cpython-312.pyc differ diff --git a/IIR-Lab/ISP_pipeline/raw_prc_pipeline/__pycache__/pipeline_utils.cpython-39.pyc b/IIR-Lab/ISP_pipeline/raw_prc_pipeline/__pycache__/pipeline_utils.cpython-39.pyc new file mode 100644 index 0000000000000000000000000000000000000000..f8793a15091901e79e587d9903da63cce922c56c Binary files /dev/null and b/IIR-Lab/ISP_pipeline/raw_prc_pipeline/__pycache__/pipeline_utils.cpython-39.pyc differ diff --git a/IIR-Lab/ISP_pipeline/raw_prc_pipeline/__pycache__/refine_network.cpython-312.pyc b/IIR-Lab/ISP_pipeline/raw_prc_pipeline/__pycache__/refine_network.cpython-312.pyc new file mode 100644 index 0000000000000000000000000000000000000000..f2c93de8e68e9783ab30f49da50d1933563e834b Binary files /dev/null and b/IIR-Lab/ISP_pipeline/raw_prc_pipeline/__pycache__/refine_network.cpython-312.pyc differ diff --git a/IIR-Lab/ISP_pipeline/raw_prc_pipeline/arch_util.py b/IIR-Lab/ISP_pipeline/raw_prc_pipeline/arch_util.py new file mode 100644 index 0000000000000000000000000000000000000000..3b0480f800f3ca33720886abbca6841c796c3b9c --- /dev/null +++ b/IIR-Lab/ISP_pipeline/raw_prc_pipeline/arch_util.py @@ -0,0 +1,626 @@ +import torch +import torch.nn as nn +import torch.nn.init as init +import torch.nn.functional as F + + +def initialize_weights(net_l, scale=1): + if not isinstance(net_l, list): + net_l = [net_l] + for net in net_l: + for m in net.modules(): + if isinstance(m, nn.Conv2d): + init.kaiming_normal_(m.weight, a=0, mode='fan_in') + m.weight.data *= scale # for residual block + if m.bias is not None: + m.bias.data.zero_() + elif isinstance(m, nn.Linear): + init.kaiming_normal_(m.weight, a=0, mode='fan_in') + m.weight.data *= scale + if m.bias is not None: + m.bias.data.zero_() + elif isinstance(m, nn.BatchNorm2d): + init.constant_(m.weight, 1) + init.constant_(m.bias.data, 0.0) + + +def make_layer(block, n_layers): + layers = [] + for _ in range(n_layers): + layers.append(block()) + return nn.Sequential(*layers) + + +class ResidualBlock_noBN(nn.Module): + '''Residual block w/o BN + ---Conv-ReLU-Conv-+- + |________________| + ''' + + def __init__(self, nf=64): + super(ResidualBlock_noBN, self).__init__() + self.conv1 = nn.Conv2d(nf, nf, 3, 1, 1, bias=True) + self.conv2 = nn.Conv2d(nf, nf, 3, 1, 1, bias=True) + + # initialization + initialize_weights([self.conv1, self.conv2], 0.1) + + def forward(self, x): + identity = x + out = F.relu(self.conv1(x), inplace=True) + out = self.conv2(out) + return identity + out + + +def flow_warp(x, flow, interp_mode='bilinear', padding_mode='zeros'): + """Warp an image or feature map with optical flow + Args: + x (Tensor): size (N, C, H, W) + flow (Tensor): size (N, H, W, 2), normal value + interp_mode (str): 'nearest' or 'bilinear' + padding_mode (str): 'zeros' or 'border' or 'reflection' + + Returns: + Tensor: warped image or feature map + """ + assert x.size()[-2:] == flow.size()[1:3] + B, C, H, W = x.size() + # mesh grid + grid_y, grid_x = torch.meshgrid(torch.arange(0, H), torch.arange(0, W)) + grid = torch.stack((grid_x, grid_y), 2).float() # W(x), H(y), 2 + grid.requires_grad = False + grid = grid.type_as(x) + vgrid = grid + flow + # scale grid to [-1,1] + vgrid_x = 2.0 * vgrid[:, :, :, 0] / max(W - 1, 1) - 1.0 + vgrid_y = 2.0 * vgrid[:, :, :, 1] / max(H - 1, 1) - 1.0 + vgrid_scaled = torch.stack((vgrid_x, vgrid_y), dim=3) + output = F.grid_sample(x, vgrid_scaled, mode=interp_mode, padding_mode=padding_mode) + return output + +""" +Copyright (c) 2022 Samsung Electronics Co., Ltd. + +Author(s): +Luxi Zhao (lucy.zhao@samsung.com; lucyzhao.zlx@gmail.com) +Abdelrahman Abdelhamed (abdoukamel@gmail.com) + +Licensed under the Creative Commons Attribution-NonCommercial-ShareAlike 4.0 International (CC BY-NC-SA 4.0) License, (the "License"); +you may not use this file except in compliance with the License. +You may obtain a copy of the License at https://creativecommons.org/licenses/by-nc-sa/4.0 +Unless required by applicable law or agreed to in writing, software distributed under the License is distributed on an +"AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +See the License for the specific language governing permissions and limitations under the License. +For conditions of distribution and use, see the accompanying LICENSE.md file. + +""" + +import numpy as np +import torch +import torch.nn as nn +import torch.nn.functional as F +def utils_get_image_stats(image_shape, grid_size): + """ + Information about the cropped image. + :return: grid size, tile size, sizes of the 4 margins, meshgrids. + """ + + grid_rows = grid_size[0] + grid_cols = grid_size[1] + + residual_height = image_shape[0] % grid_rows + residual_width = image_shape[1] % grid_cols + + tile_height = image_shape[0] // grid_rows + tile_width = image_shape[1] // grid_cols + + margin_top = tile_height // 2 + margin_left = tile_width // 2 + + margin_bot = tile_height + residual_height - margin_top + margin_right = tile_width + residual_width - margin_left + + return tile_height, tile_width, margin_top, margin_left, margin_bot, margin_right + +def apply_ltm_lut(imgs, luts): + + imgs = (imgs - .5) * 2. + + grids = imgs.unsqueeze(0).unsqueeze(0) + luts = luts.unsqueeze(0) + + outs = F.grid_sample(luts, grids, + mode='bilinear', padding_mode='border', align_corners=True) + + return outs.squeeze(0).squeeze(1).permute(1,2,0) + + +def apply_ltm(image, tone_curve, num_curves): + """ + Apply tone curve to an image (patch). + :param image: (h, w, 3) if num_curves == 3, else (h, w) + :param tone_curve: (num_curves, control_points) + :param num_curves: 3 for 1 curve per channel, 1 for 1 curve for all channels. + :return: tone-mapped image. + """ + + if image.shape[-1] == 3: + if type(image) == np.ndarray: + r = tone_curve[0][image[..., 0]] + g = tone_curve[1][image[..., 1]] + b = tone_curve[2][image[..., 2]] + new_image = np.stack((r, g, b), axis=-1) + else: + r = tone_curve[0][image[..., 0].reshape(-1).long()].reshape(image[..., 0].shape) + g = tone_curve[1][image[..., 1].reshape(-1).long()].reshape(image[..., 1].shape) + b = tone_curve[2][image[..., 2].reshape(-1).long()].reshape(image[..., 2].shape) + new_image = torch.stack((r, g, b), axis=-1) + # new_image = np.stack((r, g, b), axis=-1) + else: + new_image = tone_curve[0][image[..., 0].reshape(-1).long()].reshape(image[..., 0].shape).unsqueeze(dim=2) + #tone_curve[0][image[..., 0].reshape(-1).long()].reshape(image[..., 0].shape) + + return new_image + + +def apply_gtm(image, tone_curve, num_curves): + """ + Apply a single tone curve to an image. + :param image: (h, w, 3) if num_curves == 3, else (h, w) + :param tone_curve: (1, num_curves, control_points) + :param num_curves: 3 for 1 curve per channel, 1 for 1 curve for all channels. + :return: tone-mapped image. + """ + tone_curve = tone_curve[0] + out = apply_ltm(image, tone_curve, num_curves) + return out + + +def apply_ltm_center(image, tone_curves, stats, num_curves): + """ + Apply tone curves to the center region of an image. + :param image: the original image. + :param tone_curves: a list of all tone curves in row scan order. + :return: interpolated center region of an image. + """ + grid_rows, grid_cols, tile_height, tile_width, margin_top, margin_left, margin_bot, margin_right, meshgrids = stats + xs_tl, ys_tl, xs_br, ys_br = meshgrids['center'] + + if torch.cuda.is_available(): + device = torch.device("cuda") + else: + device = torch.device("cpu") + xs_tl = xs_tl.to(device) + ys_tl = ys_tl.to(device) + xs_br = xs_br.to(device) + ys_br = ys_br.to(device) + + + # Get neighbourhoods + neighbourhoods = [] + for y in range(margin_top, image.shape[0]-margin_bot, tile_height): + for x in range(margin_left, image.shape[1]-margin_right, tile_width): + neighbourhoods.append(image[y:y + tile_height, x:x + tile_width, :]) + + assert len(neighbourhoods) == (grid_rows-1) * (grid_cols-1) + + # Get indices for all 4-tile neighbourhoods + tile_ids = [] + for i in range(grid_rows - 1): + for j in range(grid_cols - 1): + start = i * grid_cols + j + tile_ids.append([start, start + 1, start + grid_cols, start + grid_cols + 1]) + + # Apply LTM and interpolate + new_ns = [] + for i, n in enumerate(neighbourhoods): + n_tile_ids = tile_ids[i] # ids of the 4 tone curves (tiles) of the neighbourhood + # n_4versions = [apply_ltm(n, tone_curves[j], num_curves) for j in n_tile_ids] # tl, tr, bl, br + n_4versions = [apply_ltm_lut(n, tone_curves[j])for j in n_tile_ids] + out = ys_br * xs_br * n_4versions[0] + ys_br * xs_tl * n_4versions[1] + ys_tl * xs_br * n_4versions[2] + ys_tl * xs_tl * n_4versions[3] + out /= (tile_height-1) * (tile_width-1) + + new_ns.append(out) + + # Stack the interpolated neighbourhoods together + rows = [] + for i in range(grid_rows - 1): + cols = [new_ns[i * (grid_cols - 1) + j] for j in range(grid_cols - 1)] + row = torch.cat(cols, dim=1) + rows.append(row) + out = torch.cat(rows, dim=0) + return out + + +def apply_ltm_border(image, tone_curves, stats, num_curves=3): + """ + Apply tone curves to the border, not including corner areas. + :param image: the original image. + :param tone_curves: a list of all tone curves in row scan order. + :return: interpolated border regions of the image. In order of top, bottom, left, right. + """ + grid_rows, grid_cols, tile_height, tile_width, margin_top, margin_left, margin_bot, margin_right, meshgrids = stats + (top_xs_l, top_xs_r), (bot_xs_l, bot_xs_r), (left_ys_t, left_ys_b), (right_ys_t, right_ys_b) = meshgrids['border'] + + if torch.cuda.is_available(): + device = torch.device("cuda") + else: + device = torch.device("cpu") + + top_xs_l = top_xs_l.to(device) + top_xs_r = top_xs_r.to(device) + bot_xs_l = bot_xs_l.to(device) + bot_xs_r = bot_xs_r.to(device) + + left_ys_t = left_ys_t.to(device) + left_ys_b = left_ys_b.to(device) + right_ys_t = right_ys_t.to(device) + right_ys_b = right_ys_b.to(device) + # top, bottom, left, right neighbourhoods to be interpolated + ntop = [] + nbot = [] + nleft = [] + nright = [] + + for x in range(margin_left, image.shape[1] - margin_right, tile_width): + ntop.append(image[:margin_top, x:x + tile_width, :]) + nbot.append(image[-margin_bot:, x:x + tile_width, :]) + + for y in range(margin_top, image.shape[0] - margin_bot, tile_height): + nleft.append(image[y:y + tile_height, :margin_left, :]) + nright.append(image[y:y + tile_height, -margin_right:, :]) + + def apply_ltm_two_tiles(tc1, tc2, meshgrid1, meshgrid2, nbhd, interp_length, num_curves): + """ + Apply tone curve to, and interpolate a two-tile neighbourhood, either horizontal or vertical + :param tc1: left / top tone curves + :param tc2: right / bottom tone curves + :param meshgrid1: left / top meshgrids (leftmost / topmost positions are 0) + :param meshgrid2: right / bottom meshgrids (rightmost / bottommost positions are 0) + :param nbhd: neighbourhood to interpolate + :param interp_length: normalizing factor of the meshgrid. + Example: if xs = np.meshgrid(np.arange(10)), then interp_length = 9 + :return: interpolated neighbourhood + """ + + # new_nbhd1 = apply_ltm(nbhd, tc1, num_curves) + # new_nbhd2 = apply_ltm(nbhd, tc2, num_curves) + + new_nbhd1 = apply_ltm_lut(nbhd, tc1) + new_nbhd2 = apply_ltm_lut(nbhd, tc2) + + out = meshgrid1 * new_nbhd2 + meshgrid2 * new_nbhd1 + out /= interp_length + return out + + new_ntop = [apply_ltm_two_tiles(tone_curves[i], # left tone curve + tone_curves[i + 1], # right tone curve + top_xs_l, top_xs_r, + n, tile_width - 1, num_curves) for i, n in enumerate(ntop)] + + new_nbot = [apply_ltm_two_tiles(tone_curves[(grid_rows - 1) * grid_cols + i], # left tone curve + tone_curves[(grid_rows - 1) * grid_cols + i + 1], # right tone curve + bot_xs_l, bot_xs_r, + n, tile_width - 1, num_curves) for i, n in enumerate(nbot)] + + new_nleft = [apply_ltm_two_tiles(tone_curves[i * grid_cols], # top tone curve + tone_curves[(i + 1) * grid_cols], # bottom tone curve + left_ys_t, left_ys_b, + n, tile_height - 1, num_curves) for i, n in enumerate(nleft)] + + new_nright = [apply_ltm_two_tiles(tone_curves[(i + 1) * grid_cols - 1], # top tone curve + tone_curves[(i + 2) * grid_cols - 1], # bottom tone curve + right_ys_t, right_ys_b, + n, tile_height - 1, num_curves) for i, n in enumerate(nright)] + + new_ntop = torch.cat(new_ntop, dim=1) + new_nbot = torch.cat(new_nbot, dim=1) + new_nleft = torch.cat(new_nleft, dim=0) + new_nright = torch.cat(new_nright, dim=0) + return new_ntop, new_nbot, new_nleft, new_nright + + +def apply_ltm_corner(image, tone_curves, stats, num_curves=3): + """ + tone_curves: a list of all tone curves in row scan order. + return: interpolated corner tiles in the order of top left, top right, bot left, bot right + """ + grid_rows, grid_cols, tile_height, tile_width, margin_top, margin_left, margin_bot, margin_right, _ = stats + + corner_ids = [0, grid_cols - 1, -grid_rows, -1] + tl_tile = image[:margin_top, :margin_left] + tr_tile = image[:margin_top, -margin_right:] + bl_tile = image[-margin_bot:, :margin_left] + br_tile = image[-margin_bot:, -margin_right:] + + corner_tiles = [tl_tile, tr_tile, bl_tile, br_tile] + corner_tcs = [tone_curves[i] for i in corner_ids] # tcs: (grid_size, num_curves, control_points) + #new_tiles = [apply_ltm(corner_tiles[i], corner_tcs[i], num_curves) for i in range(len(corner_tcs))] + new_tiles = [apply_ltm_lut(corner_tiles[i],corner_tcs[i]) for i in range(len(corner_tcs))] + + return new_tiles[0], new_tiles[1], new_tiles[2], new_tiles[3] + + +# def get_meshgrids(height, width): +# """ +# Get two meshgrids of size (height, width). One with top left corner being (0, 0), +# the other with bottom right corner being (0, 0). +# :return: top left xs, ys, bottom right xs, ys +# """ +# xs, ys = np.meshgrid(np.arange(width), np.arange(height)) +# newys, newxs = torch.meshgrid(torch.arange(height, dtype=torch.int32), torch.arange(width, dtype=torch.int32)) +# # mesh grid for top left corner +# xs_tl = np.tile(np.abs(xs)[..., np.newaxis], 3) # [0, 1, 2, ..., tile_width-1] +# ys_tl = np.tile(np.abs(ys)[..., np.newaxis], 3) +# new_xs_tl = newxs[..., None].abs().repeat(1, 1, 3) +# new_ys_tl = newys[..., None].abs().repeat(1, 1, 3) +# # mesh grid for bottom right corner +# xs_br = np.tile(np.abs(xs - width + 1)[..., np.newaxis], 3) # [-(tile_width-1), ..., -2, -1, 0] +# ys_br = np.tile(np.abs(ys - height + 1)[..., np.newaxis], 3) + +# new_xs_br = (newxs - width + 1).abs()[..., None].repeat(1, 1, 3) +# new_ys_br = (newys - width + 1).abs()[..., None].repeat(1, 1, 3) +# # return xs_tl, ys_tl, xs_br, ys_br +# return new_xs_tl, new_ys_tl, new_xs_br, new_ys_br +def get_meshgrids(height, width): + """ + Get two meshgrids of size (height, width). One with top left corner being (0, 0), + the other with bottom right corner being (0, 0). + :return: top left xs, ys, bottom right xs, ys + """ + xs, ys = np.meshgrid(np.arange(width), np.arange(height)) + # mesh grid for top left corner + xs_tl = np.tile(np.abs(xs)[..., np.newaxis], 3) # [0, 1, 2, ..., tile_width-1] + ys_tl = np.tile(np.abs(ys)[..., np.newaxis], 3) + # mesh grid for bottom right corner + xs_br = np.tile(np.abs(xs - width + 1)[..., np.newaxis], 3) # [-(tile_width-1), ..., -2, -1, 0] + ys_br = np.tile(np.abs(ys - height + 1)[..., np.newaxis], 3) + + return torch.tensor(xs_tl), torch.tensor(ys_tl), torch.tensor(xs_br), torch.tensor(ys_br) + + + +def get_meshgrid_center(tile_height, tile_width): + return get_meshgrids(tile_height, tile_width) + + +def get_meshgrid_border(tile_height, tile_width, margin_top, margin_left, margin_bot, margin_right): + """ + :return: meshgrids for the 4 border regions, in the order of top, bottom, left, right + """ + # top + top_xs_l, _, top_xs_r, _ = get_meshgrids(margin_top, tile_width) + + # bottom + bot_xs_l, _, bot_xs_r, _ = get_meshgrids(margin_bot, tile_width) + + # left + _, left_ys_t, _, left_ys_b = get_meshgrids(tile_height, margin_left) + + # right + _, right_ys_t, _, right_ys_b = get_meshgrids(tile_height, margin_right) + + return (top_xs_l, top_xs_r), (bot_xs_l, bot_xs_r), (left_ys_t, left_ys_b), (right_ys_t, right_ys_b) + + +def get_image_stats(image, grid_size): + """ + Information about the cropped image. + :param image: the original image + :return: grid size, tile size, sizes of the 4 margins, meshgrids. + """ + + grid_rows = grid_size[0] + grid_cols = grid_size[1] + + tile_height, tile_width, margin_top, margin_left, margin_bot, margin_right = utils_get_image_stats(image.shape, + grid_size) + + meshgrid_center = get_meshgrid_center(tile_height, tile_width) + meshgrid_border = get_meshgrid_border(tile_height, tile_width, margin_top, margin_left, margin_bot, margin_right) + + meshgrids = { + 'center': meshgrid_center, + 'border': meshgrid_border + } + + return grid_rows, grid_cols, tile_height, tile_width, margin_top, margin_left, margin_bot, margin_right, meshgrids + + +#自己构造image 1 * 512 * 512 *3,tone_curve 1 * 64 * 3 * 256 维度得tf tensor 然后只在这里debug +def do_interpolation_lut(image, tone_curves, grid_size, num_curves=3): + """ + Perform tone mapping and interpolation on an image. + Center region: bilinear interpolation. + Border region: linear interpolation. + Corner region: no interpolation. + :param num_curves: 3 -> 1 curve for each R,G,B channel, 1 -> 1 curve for all channels + :param image: input int8 + :param tone_curves: (grid_size, num_curves, control_points) + :param grid_size: (ncols, nrows) + :return: image: float32, between [0-1] + """ + if grid_size[0] == 1 and grid_size[1] == 1: + return apply_gtm(image, tone_curves, num_curves).astype(np.float64) + + # get image statistics + stats = get_image_stats(image, grid_size) + + + + # Center area: + center = apply_ltm_center(image, tone_curves, stats, num_curves) + + # Border area: + b_top, b_bot, b_left, b_right = apply_ltm_border(image, tone_curves, stats, num_curves) + + # Corner area: + tlc, trc, blc, brc = apply_ltm_corner(image, tone_curves, stats, num_curves) + + # stack the corners, borders, and center together + row_t = torch.cat([tlc, b_top, trc], dim=1) + row_c = torch.cat([b_left, center, b_right], dim=1) + row_b = torch.cat([blc, b_bot, brc], dim=1) + out = torch.cat([row_t, row_c, row_b], dim=0) + + assert out.shape == image.shape + + return out + + + +class Model(nn.Module): + def __init__(self): + super(Model, self).__init__() + # self.conv1 = nn.Conv2d(3, 4, kernel_size=3, padding=1) + # self.pool1 = nn.MaxPool2d(2) + + # self.conv2 = nn.Conv2d(4, 8, kernel_size=3, padding=1) + # self.pool2 = nn.MaxPool2d(2) + + # self.conv3 = nn.Conv2d(8, 16, kernel_size=3, padding=1) + # self.pool3 = nn.MaxPool2d(2) + + # self.conv4 = nn.Conv2d(16, 32, kernel_size=3, padding=1) + # self.pool4 = nn.MaxPool2d(2) + + # self.conv5 = nn.Conv2d(32, 64, kernel_size=3, padding=1) + # self.pool5 = nn.MaxPool2d(2) + + # self.conv6 = nn.Conv2d(64, 768, kernel_size=3, padding=1) + # self.pool6 = nn.MaxPool2d(2) + + self.layer_1 = nn.Sequential( + nn.Conv2d(3, 4, kernel_size=3, padding=1), + nn.BatchNorm2d(4), + nn.ReLU(), + nn.MaxPool2d(2) + ) + self.layer_2 = nn.Sequential( + nn.Conv2d(4, 8, kernel_size=3, padding=1), + nn.BatchNorm2d(8), + nn.ReLU(), + nn.MaxPool2d(2) + ) + self.layer_3 = nn.Sequential( + nn.Conv2d(8, 16, kernel_size=3, padding=1), + nn.BatchNorm2d(16), + nn.ReLU(), + nn.MaxPool2d(2) + ) + self.layer_4 = nn.Sequential( + nn.Conv2d(16, 32, kernel_size=3, padding=1), + nn.BatchNorm2d(32), + nn.ReLU(), + nn.MaxPool2d(2) + ) + self.layer_5 = nn.Sequential( + nn.Conv2d(32, 64, kernel_size=3, padding=1), + nn.BatchNorm2d(64), + nn.ReLU(), + nn.MaxPool2d(2) + ) + self.layer_6 = nn.Sequential( + nn.Conv2d(64, 768, kernel_size=3, padding=1), + nn.BatchNorm2d(768), + nn.Sigmoid(), + nn.MaxPool2d(2) + ) + + + def forward(self, x): + + ''' + + original = x + x = self.conv1(x) + x = self.pool1(x) + + x = self.conv2(x) + x = self.pool2(x) + + x = self.conv3(x) + x = self.pool3(x) + + x = self.conv4(x) + x = self.pool4(x) + + x = self.conv5(x) + x = self.pool5(x) + + x = self.conv6(x) + x = self.pool6(x) + oldres = x + x = original + ''' + + x = self.layer_1(x) + + x = self.layer_2(x) + + x = self.layer_3(x) + + x = self.layer_4(x) + + x = self.layer_5(x) + + x = self.layer_6(x) + + x = x.reshape(x.shape[0], x.shape[2] * x.shape[3], 3, int(x.shape[1] / 3)) + return x + + +def _lut_transform(imgs, luts): + # img (b, 3, h, w), lut (b, c, m, m, m) + if imgs.shape[1]==1: + + #for gray image pro-processs + luts = luts.expand(1,1,64,64,64) + # normalize pixel values + imgs = (imgs - .5) * 2. + grids = (imgs.unsqueeze(4)).repeat(1,1,1,1,3) + else: + # normalize pixel values + imgs = (imgs - .5) * 2. + # reshape img to grid of shape (b, 1, h, w, 3) + # imgs = imgs.permute(2,0,1).unsqueeze(dim=0) + # grids = imgs.permute(0, 2, 3, 1).unsqueeze(1) + grids = imgs.unsqueeze(0).unsqueeze(0) + luts = luts.unsqueeze(0) + # after gridsampling, output is of shape (b, c, 1, h, w) + outs = F.grid_sample(luts, grids, + mode='bilinear', padding_mode='border', align_corners=True) + return outs.squeeze(2) + + +if __name__ == '__main__': + + import torch + import cv2 + + grid_size = [8,8] + + np.random.seed(42) + rand_img = np.random.random((512, 512, 3)) + luts_np = np.random.random((64, 3, 9)) + + img_torch = torch.tensor(rand_img, dtype=torch.float32).cuda() + luts_torch = torch.tensor(luts_np, dtype=torch.float32).cuda() + + + iluts = [] + for i in range(luts_torch.shape[0]): + iluts.append(torch.stack( + torch.meshgrid(*(luts_torch[i].unbind(0)[::-1])), + dim=0).flip(0)) + iluts = torch.stack(iluts, dim=0) + + + result = do_interpolation_lut(img_torch, iluts, grid_size) + print(result) + + + + + + diff --git a/IIR-Lab/ISP_pipeline/raw_prc_pipeline/color.py b/IIR-Lab/ISP_pipeline/raw_prc_pipeline/color.py new file mode 100644 index 0000000000000000000000000000000000000000..f18d489e83ef7f6ba8721a8226d04950846c8186 --- /dev/null +++ b/IIR-Lab/ISP_pipeline/raw_prc_pipeline/color.py @@ -0,0 +1,306 @@ +import numpy as np + + +def rgb2gray(data): + return 0.299 * data[:, :, 0] + \ + 0.587 * data[:, :, 1] + \ + 0.114 * data[:, :, 2] + + +def rgb2ycc(data, rule="bt601"): + # map to select kr and kb + kr_kb_dict = {"bt601": [0.299, 0.114], + "bt709": [0.2126, 0.0722], + "bt2020": [0.2627, 0.0593]} + + kr = kr_kb_dict[rule][0] + kb = kr_kb_dict[rule][1] + kg = 1 - (kr + kb) + + output = np.empty(np.shape(data), dtype=np.float32) + output[:, :, 0] = kr * data[:, :, 0] + \ + kg * data[:, :, 1] + \ + kb * data[:, :, 2] + output[:, :, 1] = 0.5 * ((data[:, :, 2] - output[:, :, 0]) / (1 - kb)) + output[:, :, 2] = 0.5 * ((data[:, :, 0] - output[:, :, 0]) / (1 - kr)) + + return output + + +def ycc2rgb(data, rule="bt601"): + # map to select kr and kb + kr_kb_dict = {"bt601": [0.299, 0.114], + "bt709": [0.2126, 0.0722], + "bt2020": [0.2627, 0.0593]} + + kr = kr_kb_dict[rule][0] + kb = kr_kb_dict[rule][1] + kg = 1 - (kr + kb) + + output = np.empty(np.shape(data), dtype=np.float32) + output[:, :, 0] = 2. * data[:, :, 2] * (1 - kr) + data[:, :, 0] + output[:, :, 2] = 2. * data[:, :, 1] * (1 - kb) + data[:, :, 0] + output[:, :, 1] = (data[:, :, 0] - kr * output[:, :, 0] - kb * output[:, :, 2]) / kg + + return output + + +def degamma_srgb(data, clip_range=[0, 65535]): + # bring data in range 0 to 1 + data = np.clip(data, clip_range[0], clip_range[1]) + data = np.divide(data, clip_range[1]) + + data = np.asarray(data) + mask = data > 0.04045 + + # basically, if data[x, y, c] > 0.04045, data[x, y, c] = ( (data[x, y, c] + 0.055) / 1.055 ) ^ 2.4 + # else, data[x, y, c] = data[x, y, c] / 12.92 + data[mask] += 0.055 + data[mask] /= 1.055 + data[mask] **= 2.4 + + data[np.invert(mask)] /= 12.92 + + # rescale + return np.clip(data * clip_range[1], clip_range[0], clip_range[1]) + + +def degamma_adobe_rgb_1998(data, clip_range=[0, 65535]): + # bring data in range 0 to 1 + data = np.clip(data, clip_range[0], clip_range[1]) + data = np.divide(data, clip_range[1]) + + data = np.power(data, 2.2) # originally raised to 2.19921875 + + # rescale + return np.clip(data * clip_range[1], clip_range[0], clip_range[1]) + + +def rgb2xyz(data, color_space="srgb", clip_range=[0, 255]): + # input rgb in range clip_range + # output xyz is in range 0 to 1 + if color_space == "srgb": + # degamma / linearization + data = degamma_srgb(data, clip_range) + data = np.float32(data) + data = np.divide(data, clip_range[1]) + + # matrix multiplication` + output = np.empty(np.shape(data), dtype=np.float32) + output[:, :, 0] = data[:, :, 0] * 0.4124 + data[:, :, 1] * 0.3576 + data[:, :, 2] * 0.1805 + output[:, :, 1] = data[:, :, 0] * 0.2126 + data[:, :, 1] * 0.7152 + data[:, :, 2] * 0.0722 + output[:, :, 2] = data[:, :, 0] * 0.0193 + data[:, :, 1] * 0.1192 + data[:, :, 2] * 0.9505 + elif color_space == "adobe-rgb-1998": + # degamma / linearization + data = degamma_adobe_rgb_1998(data, clip_range) + data = np.float32(data) + data = np.divide(data, clip_range[1]) + + # matrix multiplication + output = np.empty(np.shape(data), dtype=np.float32) + output[:, :, 0] = data[:, :, 0] * 0.5767309 + data[:, :, 1] * 0.1855540 + data[:, :, 2] * 0.1881852 + output[:, :, 1] = data[:, :, 0] * 0.2973769 + data[:, :, 1] * 0.6273491 + data[:, :, 2] * 0.0752741 + output[:, :, 2] = data[:, :, 0] * 0.0270343 + data[:, :, 1] * 0.0706872 + data[:, :, 2] * 0.9911085 + elif color_space == "linear": + # matrix multiplication` + output = np.empty(np.shape(data), dtype=np.float32) + data = np.float32(data) + data = np.divide(data, clip_range[1]) + output[:, :, 0] = data[:, :, 0] * 0.4124 + data[:, :, 1] * 0.3576 + data[:, :, 2] * 0.1805 + output[:, :, 1] = data[:, :, 0] * 0.2126 + data[:, :, 1] * 0.7152 + data[:, :, 2] * 0.0722 + output[:, :, 2] = data[:, :, 0] * 0.0193 + data[:, :, 1] * 0.1192 + data[:, :, 2] * 0.9505 + else: + print("Warning! color_space must be srgb or adobe-rgb-1998.") + return + + return output + + +def gamma_srgb(data, clip_range=[0, 65535]): + # bring data in range 0 to 1 + data = np.clip(data, clip_range[0], clip_range[1]) + data = np.divide(data, clip_range[1]) + + data = np.asarray(data) + mask = data > 0.0031308 + + # basically, if data[x, y, c] > 0.0031308, data[x, y, c] = 1.055 * ( var_R(i, j) ^ ( 1 / 2.4 ) ) - 0.055 + # else, data[x, y, c] = data[x, y, c] * 12.92 + data[mask] **= 0.4167 + data[mask] *= 1.055 + data[mask] -= 0.055 + + data[np.invert(mask)] *= 12.92 + + # rescale + return np.clip(data * clip_range[1], clip_range[0], clip_range[1]) + + +def gamma_adobe_rgb_1998(data, clip_range=[0, 65535]): + # bring data in range 0 to 1 + data = np.clip(data, clip_range[0], clip_range[1]) + data = np.divide(data, clip_range[1]) + + data = np.power(data, 0.4545) + + # rescale + return np.clip(data * clip_range[1], clip_range[0], clip_range[1]) + + +def xyz2rgb(data, color_space="srgb", clip_range=[0, 255]): + # input xyz is in range 0 to 1 + # output rgb in clip_range + + # allocate space for output + output = np.empty(np.shape(data), dtype=np.float32) + + if color_space == "srgb": + # matrix multiplication + output[:, :, 0] = data[:, :, 0] * 3.2406 + data[:, :, 1] * -1.5372 + data[:, :, 2] * -0.4986 + output[:, :, 1] = data[:, :, 0] * -0.9689 + data[:, :, 1] * 1.8758 + data[:, :, 2] * 0.0415 + output[:, :, 2] = data[:, :, 0] * 0.0557 + data[:, :, 1] * -0.2040 + data[:, :, 2] * 1.0570 + + # gamma to retain nonlinearity + output = gamma_srgb(output * clip_range[1], clip_range) + elif color_space == "adobe-rgb-1998": + # matrix multiplication + output[:, :, 0] = data[:, :, 0] * 2.0413690 + data[:, :, 1] * -0.5649464 + data[:, :, 2] * -0.3446944 + output[:, :, 1] = data[:, :, 0] * -0.9692660 + data[:, :, 1] * 1.8760108 + data[:, :, 2] * 0.0415560 + output[:, :, 2] = data[:, :, 0] * 0.0134474 + data[:, :, 1] * -0.1183897 + data[:, :, 2] * 1.0154096 + + # gamma to retain nonlinearity + output = gamma_adobe_rgb_1998(output * clip_range[1], clip_range) + elif color_space == "linear": + + # matrix multiplication + output[:, :, 0] = data[:, :, 0] * 3.2406 + data[:, :, 1] * -1.5372 + data[:, :, 2] * -0.4986 + output[:, :, 1] = data[:, :, 0] * -0.9689 + data[:, :, 1] * 1.8758 + data[:, :, 2] * 0.0415 + output[:, :, 2] = data[:, :, 0] * 0.0557 + data[:, :, 1] * -0.2040 + data[:, :, 2] * 1.0570 + + # gamma to retain nonlinearity + output = output * clip_range[1] + else: + print("Warning! color_space must be srgb or adobe-rgb-1998.") + return + + return output + + +def get_xyz_reference(cie_version="1931", illuminant="d65"): + if cie_version == "1931": + xyz_reference_dictionary = {"A": [109.850, 100.0, 35.585], + "B": [99.0927, 100.0, 85.313], + "C": [98.074, 100.0, 118.232], + "d50": [96.422, 100.0, 82.521], + "d55": [95.682, 100.0, 92.149], + "d65": [95.047, 100.0, 108.883], + "d75": [94.972, 100.0, 122.638], + "E": [100.0, 100.0, 100.0], + "F1": [92.834, 100.0, 103.665], + "F2": [99.187, 100.0, 67.395], + "F3": [103.754, 100.0, 49.861], + "F4": [109.147, 100.0, 38.813], + "F5": [90.872, 100.0, 98.723], + "F6": [97.309, 100.0, 60.191], + "F7": [95.044, 100.0, 108.755], + "F8": [96.413, 100.0, 82.333], + "F9": [100.365, 100.0, 67.868], + "F10": [96.174, 100.0, 81.712], + "F11": [100.966, 100.0, 64.370], + "F12": [108.046, 100.0, 39.228]} + elif cie_version == "1964": + xyz_reference_dictionary = {"A": [111.144, 100.0, 35.200], + "B": [99.178, 100.0, 84.3493], + "C": [97.285, 100.0, 116.145], + "D50": [96.720, 100.0, 81.427], + "D55": [95.799, 100.0, 90.926], + "D65": [94.811, 100.0, 107.304], + "D75": [94.416, 100.0, 120.641], + "E": [100.0, 100.0, 100.0], + "F1": [94.791, 100.0, 103.191], + "F2": [103.280, 100.0, 69.026], + "F3": [108.968, 100.0, 51.965], + "F4": [114.961, 100.0, 40.963], + "F5": [93.369, 100.0, 98.636], + "F6": [102.148, 100.0, 62.074], + "F7": [95.792, 100.0, 107.687], + "F8": [97.115, 100.0, 81.135], + "F9": [102.116, 100.0, 67.826], + "F10": [99.001, 100.0, 83.134], + "F11": [103.866, 100.0, 65.627], + "F12": [111.428, 100.0, 40.353]} + else: + print("Warning! cie_version must be 1931 or 1964.") + return + return np.divide(xyz_reference_dictionary[illuminant], 100.0) + + +def xyz2lab(data, cie_version="1931", illuminant="d65"): + xyz_reference = get_xyz_reference(cie_version, illuminant) + + data = data + data[:, :, 0] = data[:, :, 0] / xyz_reference[0] + data[:, :, 1] = data[:, :, 1] / xyz_reference[1] + data[:, :, 2] = data[:, :, 2] / xyz_reference[2] + + data = np.asarray(data) + + # if data[x, y, c] > 0.008856, data[x, y, c] = data[x, y, c] ^ (1/3) + # else, data[x, y, c] = 7.787 * data[x, y, c] + 16/116 + mask = data > 0.008856 + data[mask] **= 1. / 3. + data[np.invert(mask)] *= 7.787 + data[np.invert(mask)] += 16. / 116. + + data = np.float32(data) + output = np.empty(np.shape(data), dtype=np.float32) + output[:, :, 0] = 116. * data[:, :, 1] - 16. + output[:, :, 1] = 500. * (data[:, :, 0] - data[:, :, 1]) + output[:, :, 2] = 200. * (data[:, :, 1] - data[:, :, 2]) + + return output + + +def lab2xyz(data, cie_version="1931", illuminant="d65"): + output = np.empty(np.shape(data), dtype=np.float32) + + output[:, :, 1] = (data[:, :, 0] + 16.) / 116. + output[:, :, 0] = (data[:, :, 1] / 500.) + output[:, :, 1] + output[:, :, 2] = output[:, :, 1] - (data[:, :, 2] / 200.) + + # if output[x, y, c] > 0.008856, output[x, y, c] ^ 3 + # else, output[x, y, c] = ( output[x, y, c] - 16/116 ) / 7.787 + output = np.asarray(output) + mask = output > 0.008856 + output[mask] **= 3. + output[np.invert(mask)] -= 16 / 116 + output[np.invert(mask)] /= 7.787 + + xyz_reference = get_xyz_reference(cie_version, illuminant) + + output = np.float32(output) + output[:, :, 0] = output[:, :, 0] * xyz_reference[0] + output[:, :, 1] = output[:, :, 1] * xyz_reference[1] + output[:, :, 2] = output[:, :, 2] * xyz_reference[2] + + return output + + +def lab2lch(data): + output = np.empty(np.shape(data), dtype=np.float32) + + output[:, :, 0] = data[:, :, 0] # L transfers directly + output[:, :, 1] = np.power(np.power(data[:, :, 1], 2) + np.power(data[:, :, 2], 2), 0.5) + output[:, :, 2] = np.arctan2(data[:, :, 2], data[:, :, 1]) * 180 / np.pi + + return output + + +def lch2lab(data): + output = np.empty(np.shape(data), dtype=np.float32) + + output[:, :, 0] = data[:, :, 0] # L transfers directly + output[:, :, 1] = np.multiply(np.cos(data[:, :, 2] * np.pi / 180), data[:, :, 1]) + output[:, :, 2] = np.multiply(np.sin(data[:, :, 2] * np.pi / 180), data[:, :, 1]) + + return output diff --git a/IIR-Lab/ISP_pipeline/raw_prc_pipeline/csrnet_network.py b/IIR-Lab/ISP_pipeline/raw_prc_pipeline/csrnet_network.py new file mode 100644 index 0000000000000000000000000000000000000000..c0483fa328a819378111beb9f30e09bba208c873 --- /dev/null +++ b/IIR-Lab/ISP_pipeline/raw_prc_pipeline/csrnet_network.py @@ -0,0 +1,76 @@ +import functools +import math +import torch +import torch.nn as nn +import torch.nn.functional as F + + +class Condition(nn.Module): + def __init__(self, in_nc=3, nf=32): + super(Condition, self).__init__() + stride = 2 + pad = 0 + self.pad = nn.ZeroPad2d(1) + self.conv1 = nn.Conv2d(in_nc, nf, 7, stride, pad, bias=True) + self.conv2 = nn.Conv2d(nf, nf, 3, stride, pad, bias=True) + self.conv3 = nn.Conv2d(nf, nf, 3, stride, pad, bias=True) + self.act = nn.ReLU(inplace=True) + + def forward(self, x): + conv1_out = self.act(self.conv1(self.pad(x))) + conv2_out = self.act(self.conv2(self.pad(conv1_out))) + conv3_out = self.act(self.conv3(self.pad(conv2_out))) + out = torch.mean(conv3_out, dim=[2, 3], keepdim=False) + + return out + + +# 3layers with control +class CSRNet(nn.Module): + def __init__(self, in_nc=3, out_nc=3, base_nf=48, cond_nf=24): + super(CSRNet, self).__init__() + + self.base_nf = base_nf + self.out_nc = out_nc + + self.cond_net = Condition(in_nc=in_nc, nf=cond_nf) + + self.cond_scale1 = nn.Linear(cond_nf, base_nf, bias=True) + self.cond_scale2 = nn.Linear(cond_nf, base_nf, bias=True) + self.cond_scale3 = nn.Linear(cond_nf, 3, bias=True) + + self.cond_shift1 = nn.Linear(cond_nf, base_nf, bias=True) + self.cond_shift2 = nn.Linear(cond_nf, base_nf, bias=True) + self.cond_shift3 = nn.Linear(cond_nf, 3, bias=True) + + self.conv1 = nn.Conv2d(in_nc, base_nf, 1, 1, bias=True) + self.conv2 = nn.Conv2d(base_nf, base_nf, 1, 1, bias=True) + self.conv3 = nn.Conv2d(base_nf, out_nc, 1, 1, bias=True) + + self.act = nn.ReLU(inplace=True) + + + def forward(self, x): + cond = self.cond_net(x) + + scale1 = self.cond_scale1(cond) + shift1 = self.cond_shift1(cond) + + scale2 = self.cond_scale2(cond) + shift2 = self.cond_shift2(cond) + + scale3 = self.cond_scale3(cond) + shift3 = self.cond_shift3(cond) + + out = self.conv1(x) + out = out * scale1.view(-1, self.base_nf, 1, 1) + shift1.view(-1, self.base_nf, 1, 1) + out + out = self.act(out) + + + out = self.conv2(out) + out = out * scale2.view(-1, self.base_nf, 1, 1) + shift2.view(-1, self.base_nf, 1, 1) + out + out = self.act(out) + + out = self.conv3(out) + out = out * scale3.view(-1, self.out_nc, 1, 1) + shift3.view(-1, self.out_nc, 1, 1) + out + return out \ No newline at end of file diff --git a/IIR-Lab/ISP_pipeline/raw_prc_pipeline/exif_data_formats.py b/IIR-Lab/ISP_pipeline/raw_prc_pipeline/exif_data_formats.py new file mode 100644 index 0000000000000000000000000000000000000000..3854a24d790348fac8b81590d8c24bb48fe80e81 --- /dev/null +++ b/IIR-Lab/ISP_pipeline/raw_prc_pipeline/exif_data_formats.py @@ -0,0 +1,22 @@ +class ExifFormat: + def __init__(self, id, name, size, short_name): + self.id = id + self.name = name + self.size = size + self.short_name = short_name # used with struct.unpack() + + +exif_formats = { + 1: ExifFormat(1, 'unsigned byte', 1, 'B'), + 2: ExifFormat(2, 'ascii string', 1, 's'), + 3: ExifFormat(3, 'unsigned short', 2, 'H'), + 4: ExifFormat(4, 'unsigned long', 4, 'L'), + 5: ExifFormat(5, 'unsigned rational', 8, ''), + 6: ExifFormat(6, 'signed byte', 1, 'b'), + 7: ExifFormat(7, 'undefined', 1, 'B'), # consider `undefined` as `unsigned byte` + 8: ExifFormat(8, 'signed short', 2, 'h'), + 9: ExifFormat(9, 'signed long', 4, 'l'), + 10: ExifFormat(10, 'signed rational', 8, ''), + 11: ExifFormat(11, 'single float', 4, 'f'), + 12: ExifFormat(12, 'double float', 8, 'd'), +} \ No newline at end of file diff --git a/IIR-Lab/ISP_pipeline/raw_prc_pipeline/exif_utils.py b/IIR-Lab/ISP_pipeline/raw_prc_pipeline/exif_utils.py new file mode 100644 index 0000000000000000000000000000000000000000..566620c1cc33d9f7ec1c75a182192176daeb1253 --- /dev/null +++ b/IIR-Lab/ISP_pipeline/raw_prc_pipeline/exif_utils.py @@ -0,0 +1,208 @@ +""" +Manual parsing of image file directories (IFDs). +""" + + +import struct +from fractions import Fraction +from raw_prc_pipeline.exif_data_formats import exif_formats + +class Ifd: + def __init__(self): + self.offset = -1 + self.tags = {} # dict; tag number will be key. + + +class Tag: + def __init__(self): + self.offset = -1 + self.tag_num = -1 + self.data_format = -1 + self.num_values = -1 + self.values = [] + + +def parse_exif(image_path, verbose=True): + """ + Parse EXIF tags from a binary file and return IFDs. + Returned IFDs include EXIF SubIFDs, if any. + """ + + def print_(str_): + if verbose: + print(str_) + + ifds = {} # dict of pairs; using offset to IFD as key. + + with open(image_path, 'rb') as fid: + fid.seek(0) + b0 = fid.read(1) + _ = fid.read(1) + # byte storage direction (endian): + # +1: b'M' (big-endian/Motorola) + # -1: b'I' (little-endian/Intel) + endian = 1 if b0 == b'M' else -1 + print_("Endian = {}".format(b0)) + endian_sign = "<" if endian == -1 else ">" # used in struct.unpack + print_("Endian sign = {}".format(endian_sign)) + _ = fid.read(2) # 0x002A + b4_7 = fid.read(4) # offset to first IFD + offset_ = struct.unpack(endian_sign + "I", b4_7)[0] + i = 0 + ifd_offsets = [offset_] + while len(ifd_offsets) > 0: + offset_ = ifd_offsets.pop(0) + # check if IFD at this offset was already parsed before + if offset_ in ifds: + continue + print_("=========== Parsing IFD # {} ===========".format(i)) + ifd_ = parse_exif_ifd(fid, offset_, endian_sign, verbose) + ifds.update({ifd_.offset: ifd_}) + print_("=========== Finished parsing IFD # {} ===========".format(i)) + i += 1 + # check SubIFDs; zero or more offsets at tag 0x014a + sub_idfs_tag_num = int('0x014a', 16) + if sub_idfs_tag_num in ifd_.tags: + ifd_offsets.extend(ifd_.tags[sub_idfs_tag_num].values) + # check Exif SUbIDF; usually one offset at tag 0x8769 + exif_sub_idf_tag_num = int('0x8769', 16) + if exif_sub_idf_tag_num in ifd_.tags: + ifd_offsets.extend(ifd_.tags[exif_sub_idf_tag_num].values) + return ifds + + +def parse_exif_ifd(binary_file, offset_, endian_sign, verbose=True): + """ + Parse an EXIF IFD. + """ + + def print_(str_): + if verbose: + print(str_) + + ifd = Ifd() + ifd.offset = offset_ + print_("IFD offset = {}".format(ifd.offset)) + binary_file.seek(offset_) + num_entries = struct.unpack(endian_sign + "H", binary_file.read(2))[0] # format H = unsigned short + print_("Number of entries = {}".format(num_entries)) + for t in range(num_entries): + print_("---------- Tag {} / {} ----------".format(t + 1, num_entries)) + if t == 22: + ttt = 1 + tag_ = parse_exif_tag(binary_file, endian_sign, verbose) + ifd.tags.update({tag_.tag_num: tag_}) # supposedly, EXIF tag numbers won't repeat in the same IFD + # TODO: check for subsequent IFDs by parsing the next 4 bytes immediately after the IFD + return ifd + + +def parse_exif_tag(binary_file, endian_sign, verbose=True): + """ + Parse EXIF tag from a binary file starting from the current file pointer and returns the tag values. + """ + + def print_(str_): + if verbose: + print(str_) + + tag = Tag() + + # tag offset + tag.offset = binary_file.tell() + print_("Tag offset = {}".format(tag.offset)) + + # tag number + bytes_ = binary_file.read(2) + tag.tag_num = struct.unpack(endian_sign + "H", bytes_)[0] # H: unsigned 2-byte short + print_("Tag number = {} = 0x{:04x}".format(tag.tag_num, tag.tag_num)) + + # data format (some value between [1, 12]) + tag.data_format = struct.unpack(endian_sign + "H", binary_file.read(2))[0] # H: unsigned 2-byte short + exif_format = exif_formats[tag.data_format] + print_("Data format = {} = {}".format(tag.data_format, exif_format.name)) + + # number of components/values + tag.num_values = struct.unpack(endian_sign + "I", binary_file.read(4))[0] # I: unsigned 4-byte integer + print_("Number of values = {}".format(tag.num_values)) + + # total number of data bytes + total_bytes = tag.num_values * exif_format.size + print_("Total bytes = {}".format(total_bytes)) + + # seek to data offset (if needed) + data_is_offset = False + current_offset = binary_file.tell() + if total_bytes > 4: + print_("Total bytes > 4; The next 4 bytes are an offset.") + data_is_offset = True + data_offset = struct.unpack(endian_sign + "I", binary_file.read(4))[0] + current_offset = binary_file.tell() + print_("Current offset = {}".format(current_offset)) + print_("Seeking to data offset = {}".format(data_offset)) + binary_file.seek(data_offset) + + # read values + # TODO: need to distinguish between numeric and text values? + if tag.num_values == 1 and total_bytes < 4: + # special case: data is a single value that is less than 4 bytes inside 4 bytes, take care of endian + val_bytes = binary_file.read(4) + # if endian_sign == ">": + # val_bytes = val_bytes[4 - total_bytes:] + # else: + # val_bytes = val_bytes[:total_bytes][::-1] + val_bytes = val_bytes[:total_bytes] + tag.values.append(struct.unpack(endian_sign + exif_format.short_name, val_bytes)[0]) + else: + # read data values one by one + for k in range(tag.num_values): + val_bytes = binary_file.read(exif_format.size) + if exif_format.name == 'unsigned rational': + tag.values.append(eight_bytes_to_fraction(val_bytes, endian_sign, signed=False)) + elif exif_format.name == 'signed rational': + tag.values.append(eight_bytes_to_fraction(val_bytes, endian_sign, signed=True)) + else: + tag.values.append(struct.unpack(endian_sign + exif_format.short_name, val_bytes)[0]) + if total_bytes < 4: + # special case: multiple values less than 4 bytes in total, inside the 4 bytes; skip the extra bytes + binary_file.seek(4 - total_bytes, 1) + + if verbose: + if len(tag.values) > 100: + print_("Got more than 100 values; printing first 100 only:") + print_("Tag values = {}".format(tag.values[:100])) + else: + print_("Tag values = {}".format(tag.values)) + if tag.data_format == 2: + print_("Tag values (string) = {}".format(b''.join(tag.values).decode())) + + if data_is_offset: + # seek back to current position to read the next tag + print_("Seeking back to current offset = {}".format(current_offset)) + binary_file.seek(current_offset) + + return tag + + +def get_tag_values_from_ifds(tag_num, ifds): + """ + Return values of a tag, if found in ifds. Return None otherwise. + Assuming any tag exists only once in all ifds. + """ + for key, ifd in ifds.items(): + if tag_num in ifd.tags: + return ifd.tags[tag_num].values + return None + + +def eight_bytes_to_fraction(eight_bytes, endian_sign, signed): + """ + Convert 8-byte array into a Fraction. Take care of endian and sign. + """ + if signed: + num = struct.unpack(endian_sign + "l", eight_bytes[:4])[0] + den = struct.unpack(endian_sign + "l", eight_bytes[4:])[0] + else: + num = struct.unpack(endian_sign + "L", eight_bytes[:4])[0] + den = struct.unpack(endian_sign + "L", eight_bytes[4:])[0] + den = den if den != 0 else 1 + return Fraction(num, den) \ No newline at end of file diff --git a/IIR-Lab/ISP_pipeline/raw_prc_pipeline/fs.py b/IIR-Lab/ISP_pipeline/raw_prc_pipeline/fs.py new file mode 100644 index 0000000000000000000000000000000000000000..675da56513a81eb4acfbba50d568f14191c4e3d6 --- /dev/null +++ b/IIR-Lab/ISP_pipeline/raw_prc_pipeline/fs.py @@ -0,0 +1,43 @@ +import cv2 +import numpy as np + + +def perform_flash(source, a=5, target=-1, perform_gamma_correction=True): + rows, cols, _ = source.shape + + v = np.max(source, axis=2) + vd = np.copy(v) + vd[vd == 0] = 1e-9 + result = source / (a * np.exp(np.mean(np.log(vd))) + np.tile(np.expand_dims(vd, axis=2), (1, 1, 3))) + + if perform_gamma_correction: + result **= 1.0 / 2.2 + + if target >= 0: + result *= target / np.mean((0.299 * result[:, :, 2] + 0.587 * result[:, :, 1] + 0.114 * result[:, :, 0])) + else: + result *= 255.0 / np.max(result) + + return result + + +def perform_storm(source, a=5, target=-1, kernels=(1, 4, 16, 64, 256), perform_gamma_correction=True): + rows, cols, _ = source.shape + + v = np.max(source, axis=2) + vd = np.copy(v) + vd[vd == 0] = 1e-9 + lv = np.log(vd) + result = sum([source / np.tile( + np.expand_dims(a * np.exp(cv2.boxFilter(lv, -1, (int(min(rows // kernel, cols // kernel)),) * 2)) + vd, axis=2), + (1, 1, 3)) for kernel in kernels]) + + if perform_gamma_correction: + result **= 1.0 / 2.2 + + if target >= 0: + result *= target / np.mean((0.299 * result[:, :, 2] + 0.587 * result[:, :, 1] + 0.114 * result[:, :, 0])) + else: + result *= 255.0 / np.max(result) + + return result diff --git a/IIR-Lab/ISP_pipeline/raw_prc_pipeline/io.py b/IIR-Lab/ISP_pipeline/raw_prc_pipeline/io.py new file mode 100644 index 0000000000000000000000000000000000000000..e62baacff2dd91ba4a01936642a8e90c0a4544fa --- /dev/null +++ b/IIR-Lab/ISP_pipeline/raw_prc_pipeline/io.py @@ -0,0 +1,53 @@ +import cv2 +import json +import torch +from pathlib import Path +from fractions import Fraction + + +def get_device(gpu_id=None): + cuda_device = "cuda" + if gpu_id is not None: + assert gpu_id in ["0", "1"] # for local setup with 2 GPUs + cuda_device += f":{gpu_id}" + return torch.device(cuda_device if torch.cuda.is_available() else "cpu") + + +def fraction_from_json(json_object): + if 'Fraction' in json_object: + return Fraction(*json_object['Fraction']) + return json_object + + +def json_read(fname, **kwargs): + with open(fname) as j: + data = json.load(j, **kwargs) + return data + + +def read_image(path): + png_path = Path(path) + raw_image = cv2.imread(str(png_path), cv2.IMREAD_UNCHANGED) + metadata = json_read(png_path.with_suffix('.json'), object_hook=fraction_from_json) + return raw_image, metadata + + +def write_processed_as_jpg(out, dst_path, quality=100): + cv2.imwrite(dst_path, out, [cv2.IMWRITE_JPEG_QUALITY, quality]) + + +def download_weights(url, fname): + import requests + r = requests.get(url, stream=True) + with open(fname, 'wb') as f: + total_length = int(r.headers.get('content-length')) + for chunk in r.iter_content(chunk_size=1024): + if chunk: + f.write(chunk) + f.flush() + + +def unzip(path_to_zip_file, directory_to_extract_to): + import zipfile + with zipfile.ZipFile(path_to_zip_file, 'r') as zip_ref: + zip_ref.extractall(directory_to_extract_to) \ No newline at end of file diff --git a/IIR-Lab/ISP_pipeline/raw_prc_pipeline/misc.py b/IIR-Lab/ISP_pipeline/raw_prc_pipeline/misc.py new file mode 100644 index 0000000000000000000000000000000000000000..6d906066cacf95a83bda19ee93789547e808ac3b --- /dev/null +++ b/IIR-Lab/ISP_pipeline/raw_prc_pipeline/misc.py @@ -0,0 +1,251 @@ +import numpy as np +from math import ceil +import torch +from modeling.DeepWB.utilities import imresize + + +def decode_cfa_pattern(cfa_pattern): + cfa_dict = {0: 'B', 1: 'G', 2: 'R'} + return "".join([cfa_dict[x] for x in cfa_pattern]) + + +def to_tensor(im, dims=3): + """ Converts a given ndarray image to torch tensor image. + + Args: + im: ndarray image (height x width x channel x [sample]). + dims: dimension number of the given image. If dims = 3, the image should + be in (height x width x channel) format; while if dims = 4, the image + should be in (height x width x channel x sample) format; default is 3. + + Returns: + torch tensor in the format (channel x height x width) or (sample x + channel x height x width). + """ + + assert (dims == 3 or dims == 4) + if dims == 3: + im = im.transpose((2, 0, 1)) + elif dims == 4: + im = im.transpose((0, 3, 1, 2)) + else: + raise NotImplementedError + + return torch.from_numpy(im.copy()) + + +def outOfGamutClipping(I, range=1.): + """ Clips out-of-gamut pixels. """ + if range == 1.: + I[I > 1] = 1 # any pixel is higher than 1, clip it to 1 + I[I < 0] = 0 # any pixel is below 0, clip it to 0 + else: + I[I > 255] = 255 # any pixel is higher than 255, clip it to 255 + I[I < 0] = 0 # any pixel is below 0, clip it to 0 + return I + + +def ratios2floats(ratios): + floats = [] + for ratio in ratios: + floats.append(float(ratio.num) / ratio.den) + return floats + + +def fractions2floats(fractions): + floats = [] + for fraction in fractions: + floats.append(float(fraction.numerator) / fraction.denominator) + return floats + + +def gaussian(kernel_size, sigma): + # calculate which number to where the grid should be + # remember that, kernel_size[0] is the width of the kernel + # and kernel_size[1] is the height of the kernel + temp = np.floor(np.float32(kernel_size) / 2.) + + # create the grid + # example: if kernel_size = [5, 3], then: + # x: array([[-2., -1., 0., 1., 2.], + # [-2., -1., 0., 1., 2.], + # [-2., -1., 0., 1., 2.]]) + # y: array([[-1., -1., -1., -1., -1.], + # [ 0., 0., 0., 0., 0.], + # [ 1., 1., 1., 1., 1.]]) + x, y = np.meshgrid(np.linspace(-temp[0], temp[0], kernel_size[0]), np.linspace(-temp[1], temp[1], kernel_size[1])) + + # Gaussian equation + temp = np.exp(-(x ** 2 + y ** 2) / (2. * sigma ** 2)) + + # make kernel sum equal to 1 + return temp / np.sum(temp) + + +def aspect_ratio_imresize(im, max_output=256): + h, w, c = im.shape + if max(h, w) > max_output: + ratio = max_output / max(h, w) + im = imresize.imresize(im, scalar_scale=ratio) + h, w, c = im.shape + + if w % (2 ** 4) == 0: + new_size_w = w + else: + new_size_w = w + (2 ** 4) - w % (2 ** 4) + + if h % (2 ** 4) == 0: + new_size_h = h + else: + new_size_h = h + (2 ** 4) - h % (2 ** 4) + + new_size = (new_size_h, new_size_w) + if not ((h, w) == new_size): + im = imresize.imresize(im, output_shape=new_size) + + return im + + +def cubic(x): + x = np.array(x).astype(np.float64) + absx = np.absolute(x) + absx2 = np.multiply(absx, absx) + absx3 = np.multiply(absx2, absx) + f = np.multiply(1.5*absx3 - 2.5*absx2 + 1, absx <= 1) + np.multiply(-0.5*absx3 + 2.5*absx2 - 4*absx + 2, (1 < absx) & (absx <= 2)) + return f + + +def triangle(x): + x = np.array(x).astype(np.float64) + lessthanzero = np.logical_and((x>=-1),x<0) + greaterthanzero = np.logical_and((x<=1),x>=0) + f = np.multiply((x+1),lessthanzero) + np.multiply((1-x),greaterthanzero) + return f + + +def deriveSizeFromScale(img_shape, scale): + output_shape = [] + for k in range(2): + output_shape.append(int(ceil(scale[k] * img_shape[k]))) + return output_shape + + +def deriveScaleFromSize(img_shape_in, img_shape_out): + scale = [] + for k in range(2): + scale.append(1.0 * img_shape_out[k] / img_shape_in[k]) + return scale + + +def contributions(in_length, out_length, scale, kernel, k_width): + if scale < 1: + h = lambda x: scale * kernel(scale * x) + kernel_width = 1.0 * k_width / scale + else: + h = kernel + kernel_width = k_width + x = np.arange(1, out_length+1).astype(np.float64) + u = x / scale + 0.5 * (1 - 1 / scale) + left = np.floor(u - kernel_width / 2) + P = int(ceil(kernel_width)) + 2 + ind = np.expand_dims(left, axis=1) + np.arange(P) - 1 # -1 because indexing from 0 + indices = ind.astype(np.int32) + weights = h(np.expand_dims(u, axis=1) - indices - 1) # -1 because indexing from 0 + weights = np.divide(weights, np.expand_dims(np.sum(weights, axis=1), axis=1)) + aux = np.concatenate((np.arange(in_length), np.arange(in_length - 1, -1, step=-1))).astype(np.int32) + indices = aux[np.mod(indices, aux.size)] + ind2store = np.nonzero(np.any(weights, axis=0)) + weights = weights[:, ind2store] + indices = indices[:, ind2store] + return weights, indices + + +def imresizemex(inimg, weights, indices, dim): + in_shape = inimg.shape + w_shape = weights.shape + out_shape = list(in_shape) + out_shape[dim] = w_shape[0] + outimg = np.zeros(out_shape) + if dim == 0: + for i_img in range(in_shape[1]): + for i_w in range(w_shape[0]): + w = weights[i_w, :] + ind = indices[i_w, :] + im_slice = inimg[ind, i_img].astype(np.float64) + outimg[i_w, i_img] = np.sum(np.multiply(np.squeeze(im_slice, axis=0), w.T), axis=0) + elif dim == 1: + for i_img in range(in_shape[0]): + for i_w in range(w_shape[0]): + w = weights[i_w, :] + ind = indices[i_w, :] + im_slice = inimg[i_img, ind].astype(np.float64) + outimg[i_img, i_w] = np.sum(np.multiply(np.squeeze(im_slice, axis=0), w.T), axis=0) + if inimg.dtype == np.uint8: + outimg = np.clip(outimg, 0, 255) + return np.around(outimg).astype(np.uint8) + else: + return outimg + + +def imresizevec(inimg, weights, indices, dim): + wshape = weights.shape + if dim == 0: + weights = weights.reshape((wshape[0], wshape[2], 1, 1)) + outimg = np.sum(weights*((inimg[indices].squeeze(axis=1)).astype(np.float64)), axis=1) + elif dim == 1: + weights = weights.reshape((1, wshape[0], wshape[2], 1)) + outimg = np.sum(weights*((inimg[:, indices].squeeze(axis=2)).astype(np.float64)), axis=2) + if inimg.dtype == np.uint8: + outimg = np.clip(outimg, 0, 255) + return np.around(outimg).astype(np.uint8) + else: + return outimg + + +def resizeAlongDim(A, dim, weights, indices, mode="vec"): + if mode == "org": + out = imresizemex(A, weights, indices, dim) + else: + out = imresizevec(A, weights, indices, dim) + return out + + +def imresize(I, scalar_scale=None, method='bicubic', output_shape=None, mode="vec"): + if method == 'bicubic': + kernel = cubic + elif method == 'bilinear': + kernel = triangle + else: + print ('Error: Unidentified method supplied') + + kernel_width = 4.0 + # Fill scale and output_size + if scalar_scale is not None: + scalar_scale = float(scalar_scale) + scale = [scalar_scale, scalar_scale] + output_size = deriveSizeFromScale(I.shape, scale) + elif output_shape is not None: + scale = deriveScaleFromSize(I.shape, output_shape) + output_size = list(output_shape) + else: + print ('Error: scalar_scale OR output_shape should be defined!') + return + scale_np = np.array(scale) + order = np.argsort(scale_np) + weights = [] + indices = [] + for k in range(2): + w, ind = contributions(I.shape[k], output_size[k], scale[k], kernel, kernel_width) + weights.append(w) + indices.append(ind) + B = np.copy(I) + flag2D = False + if B.ndim == 2: + B = np.expand_dims(B, axis=2) + flag2D = True + for k in range(2): + dim = order[k] + B = resizeAlongDim(B, dim, weights[dim], indices[dim], mode) + if flag2D: + B = np.squeeze(B, axis=2) + return B \ No newline at end of file diff --git a/IIR-Lab/ISP_pipeline/raw_prc_pipeline/optim.py b/IIR-Lab/ISP_pipeline/raw_prc_pipeline/optim.py new file mode 100644 index 0000000000000000000000000000000000000000..cc58d57d7826791df68734dbec88486fc2fa4de5 --- /dev/null +++ b/IIR-Lab/ISP_pipeline/raw_prc_pipeline/optim.py @@ -0,0 +1,30 @@ +import numpy as np +from sklearn.linear_model import LinearRegression + + +def kernelP(I): + """ Kernel function: kernel(r, g, b) -> (r,g,b,rg,rb,gb,r^2,g^2,b^2,rgb,1) + Ref: Hong, et al., "A study of digital camera colorimetric characterization + based on polynomial modeling." Color Research & Application, 2001. """ + return (np.transpose( + (I[:, 0], I[:, 1], I[:, 2], I[:, 0] * I[:, 1], I[:, 0] * I[:, 2], + I[:, 1] * I[:, 2], I[:, 0] * I[:, 0], I[:, 1] * I[:, 1], + I[:, 2] * I[:, 2], I[:, 0] * I[:, 1] * I[:, 2], + np.repeat(1, np.shape(I)[0])))) + + +def get_mapping_func(image1, image2): + """ Computes the polynomial mapping """ + image1 = np.reshape(image1, [-1, 3]) + image2 = np.reshape(image2, [-1, 3]) + m = LinearRegression().fit(kernelP(image1), image2) + return m + + +def apply_mapping_func(image, m): + """ Applies the polynomial mapping """ + sz = image.shape + image = np.reshape(image, [-1, 3]) + result = m.predict(kernelP(image)) + result = np.reshape(result, [sz[0], sz[1], sz[2]]) + return result diff --git a/IIR-Lab/ISP_pipeline/raw_prc_pipeline/pipeline.py b/IIR-Lab/ISP_pipeline/raw_prc_pipeline/pipeline.py new file mode 100644 index 0000000000000000000000000000000000000000..7cc06f37b84049a711d5da487d00ce52119723aa --- /dev/null +++ b/IIR-Lab/ISP_pipeline/raw_prc_pipeline/pipeline.py @@ -0,0 +1,268 @@ +""" +Demo raw processing pipeline and pipeline executor. +""" +import sys +sys.path.append('ISP_pipeline') +import numpy as np +from raw_prc_pipeline.pipeline_utils import * +from copy import deepcopy +import hdf5storage +import torch +import json +from fractions import Fraction +import bm3d + +class RawProcessingPipelineDemo: + """ + Demonstration pipeline of raw image processing. + + This pipeline is a baseline pipeline to process raw image. + The public methods of this class are successive steps of raw image processing pipeline. + The declaration order of the public methods must correspond to the order in which these methods (steps) are supposed to be called when processing raw image. + + It is assumed that each public method has 2 parameters: + raw_img : ndarray + Array with images data. + img_meta : Dict + Some metadata of image. + + Also each such public method must return an image (ndarray) as the result of processing. + """ + def __init__(self, illumination_estimation='', + denoise_flg=True, + tone_mapping='Flash', + out_landscape_width=None, + out_landscape_height=None, + color_matrix = [ 1.06835938, -0.29882812, -0.14257812, + -0.43164062, 1.35546875, 0.05078125, + -0.1015625, 0.24414062, 0.5859375]): + """ + RawProcessingPipelineDemo __init__ method. + + Parameters + ---------- + illumination_estimation : str, optional + Options for illumination estimation algorithms: '', 'gw', 'wp', 'sog', 'iwp', by default ''. + denoise_flg : bool, optional + Denoising flag, by default True. + If True, resulted images will be denoised with some predefined parameters. + tone_mapping : str, optional + Options for tone mapping methods, defined in function `apply_tone_map` from `pipeline_utils` module. + By default 'Flash'. + out_landscape_width : int, optional + The width of output image (when orientation is landscape). If None, the image resize will not be performed. + By default None. + out_landscape_height : int, optional + The height of output image (when orientation is landscape). If None, the image resize will not be performed. + By default None. + color_matrix : list, optional + Avg color tranformation matrix. If None, average color transformation matrix of Huawei Mate 40 Pro is used. + """ + + self.params = locals() + del self.params['self'] + + # Linearization not handled. + def linearize_raw(self, raw_img, img_meta): + return raw_img + + def normalize(self, linearized_raw, img_meta): + return normalize(linearized_raw, img_meta['black_level'], img_meta['white_level']) + + def demosaic(self, normalized, img_meta): + return simple_demosaic(normalized, [0, 1, 1, 2]) + + + def denoise(self, normalized, img_meta): + nr_bm3d = bm3d.bm3d(normalized, sigma_psd=10/4095, stage_arg=bm3d.BM3DStages.HARD_THRESHOLDING) + data = (0.3 * (normalized - nr_bm3d) + nr_bm3d).clip(0, 1.) + return data + + def white_balance(self, demosaic, img_meta): + wb_params = img_meta['as_shot_neutral'] + + white_balanced = white_balance(demosaic, wb_params) + return white_balanced + + def bgr_gtm(self, raw_img, gtm_key1, gtm_key2): + raw_img = bgr_gtm(raw_img, gtm_key1, gtm_key2) + return raw_img + + def xyz_transform(self, white_balanced, img_meta): + img_meta["color_matrix_1"] = self.params["color_matrix"] + img_meta["color_matrix_2"] = self.params["color_matrix"] + + return apply_color_space_transform(white_balanced, img_meta['color_matrix_1'], img_meta['color_matrix_2']) + + def srgb_transform(self, xyz, img_meta): + return transform_xyz_to_srgb(xyz) + + def tone_mapping(self, srgb, style): + if self.params['tone_mapping'] is None: + return apply_tone_map(srgb, 'Base') + return apply_tone_map(srgb, style) + + def gamma_correct(self, srgb, img_meta): + return apply_gamma(srgb) + + def autocontrast(self, srgb, img_meta): + # return autocontrast(srgb) + return autocontrast_using_pil(srgb, style='new') + + def perform_autocontrast(self, srgb, method): + assert method in ["pil", "standard", "channel1", "channel2"] + if method == "pil": + return autocontrast_using_pil(srgb) + elif method == "standard": + return perform_autocontrast_standard(srgb, cutoff=(4, 0)) + elif method == "old": + return perform_autocontrast_channel1(srgb) + else: + return perform_autocontrast_channel2(srgb) + + def do_refinement(self, srgb, method, pth): + srgb = csrnet(srgb, pth) + torch.cuda.empty_cache() + return srgb + + def adjust_contrast_brightness(self, srgb): + # return autocontrast(srgb) + return adjust_contrast_brightness(srgb, contrast=1.2, brightness=16) + + def to_uint8(self, srgb, img_meta): + return (srgb*255).type(torch.uint8).cpu().numpy() + + def to_uint16(self, srgb, img_meta): + return (srgb*65535).astype(np.uint16) + + def process_sharpen(self, bgr, img_meta): + bgr = bgr.clip(0, 1) + maxvalue = 255 + if (np.max(bgr) <= 1): + bgr = bgr * maxvalue + + bgr = sharpen_bilateralFilter(bgr) + bgr = bgr/255. + return bgr + + def resize(self, img, img_meta): + if self.params['out_landscape_width'] is None or self.params['out_landscape_height'] is None: + return img + return resize_using_pil(img, self.params['out_landscape_width'], self.params['out_landscape_height']) + + def fix_orientation(self, img, img_meta): + return fix_orientation(img, img_meta['orientation']) + + +class PipelineExecutor: + """ + Pipeline executor class. + + This class can be used to successively execute the steps of some image pipeline class (for example `RawProcessingPipelineDemo`). + The declaration order of the public methods of pipeline class must correspond to the order in which these methods (steps) are supposed to be called when processing image. + + It is assumed that each public method of the pipeline class has 2 parameters: + raw_img : ndarray + Array with images data. + img_meta : Dict + Some meta data of image. + + Also each such public method must return an image (ndarray) as the result of processing. + """ + def __init__(self, img, img_meta, pipeline_obj, first_stage=None, last_stage=None): + """ + PipelineExecutor __init__ method. + + Parameters + ---------- + img : ndarray + Image that should be processed by pipeline. + img_meta : Dict + Some image metadata. + pipeline_obj : pipeline object + Some pipeline object such as RawProcessingPipelineDemo. + first_stage : str, optional + The name of first public method of pipeline object that should be called by PipelineExecutor. + If None, the first public method from defined in pipeline object will be considered as `first_stage` method. + By default None. + last_stage : str, optional + The name of last public method of pipeline object that should be called by PipelineExecutor. + If None, the last public method from defined in pipeline object will be considered as `last_stage` method. + By default None. + """ + self.pipeline_obj = pipeline_obj + self.stages_dict = self._init_stages() + self.stages_names, self.stages = list( + self.stages_dict.keys()), list(self.stages_dict.values()) + + if first_stage is None: + self.next_stage_indx = 0 + else: + assert first_stage in self.stages_names, f"Invalid first_stage={first_stage}. Try use the following stages: {self.stages_names}" + self.next_stage_indx = self.stages_names.index(first_stage) + + if last_stage is None: + self.last_stage_indx = len(self.stages_names) - 1 + else: + assert last_stage in self.stages_names, f"Invalid last_stage={last_stage}. Try use the following stages: {self.stages_names}" + self.last_stage_indx = self.stages_names.index(last_stage) + if self.next_stage_indx > self.last_stage_indx: + print(f'Warning: the specified first_stage={first_stage} follows the specified last_stage={last_stage}, so using __call__ no image processing will be done.') + + self.current_image = img + self.img_meta = img_meta + + def _init_stages(self): + stages = {func: getattr(self.pipeline_obj, func) for func in self.pipeline_obj.__class__.__dict__ if callable( + getattr(self.pipeline_obj, func)) and not func.startswith("_")} + return stages + + @property + def next_stage(self): + if self.next_stage_indx < len(self.stages): + return self.stages_names[self.next_stage_indx] + else: + return None + + @property + def last_stage(self): + return self.stages_names[self.last_stage_indx] + + def __iter__(self): + return self + + def __next__(self): + if self.next_stage_indx < len(self.stages): + stage_func = self.stages[self.next_stage_indx] + self.current_image = stage_func(self.current_image, self.img_meta) + self.next_stage_indx += 1 + return self.current_image + else: + raise StopIteration + + def __call__(self): + """ + PipelineExecutor __call__ method. + + This method will sequentially execute the methods defined in the pipeline object from the `first_stage` to the `last_stage` inclusive. + + Returns + ------- + ndarray + Resulted processed raw image. + """ + for current_image in self: + if self.next_stage_indx > self.last_stage_indx: + return current_image + return self.current_image + +def json_read(fname, **kwargs): + with open(fname) as j: + data = json.load(j, **kwargs) + return data + + +def fraction_from_json(json_object): + if 'Fraction' in json_object: + return Fraction(*json_object['Fraction']) + return json_object \ No newline at end of file diff --git a/IIR-Lab/ISP_pipeline/raw_prc_pipeline/pipeline_bm3d.py b/IIR-Lab/ISP_pipeline/raw_prc_pipeline/pipeline_bm3d.py new file mode 100644 index 0000000000000000000000000000000000000000..fe7e292ca864f95bfc6d6de3cf46361f720a0dcb --- /dev/null +++ b/IIR-Lab/ISP_pipeline/raw_prc_pipeline/pipeline_bm3d.py @@ -0,0 +1,223 @@ +""" +Demo raw processing pipeline and pipeline executor. +""" + +import numpy as np +from raw_prc_pipeline.pipeline_utils import * +from bm3d import bm3d_rgb + +class RawProcessingPipelineDemo: + """ + Demonstration pipeline of raw image processing. + + This pipeline is a baseline pipeline to process raw image. + The public methods of this class are successive steps of raw image processing pipeline. + The declaration order of the public methods must correspond to the order in which these methods (steps) are supposed to be called when processing raw image. + + It is assumed that each public method has 2 parameters: + raw_img : ndarray + Array with images data. + img_meta : Dict + Some metadata of image. + + Also each such public method must return an image (ndarray) as the result of processing. + """ + def __init__(self, illumination_estimation='', + denoise_flg=True, + tone_mapping='Flash', + out_landscape_width=None, + out_landscape_height=None, + color_matrix = [ 1.06835938, -0.29882812, -0.14257812, + -0.43164062, 1.35546875, 0.05078125, + -0.1015625, 0.24414062, 0.5859375]): + """ + RawProcessingPipelineDemo __init__ method. + + Parameters + ---------- + illumination_estimation : str, optional + Options for illumination estimation algorithms: '', 'gw', 'wp', 'sog', 'iwp', by default ''. + denoise_flg : bool, optional + Denoising flag, by default True. + If True, resulted images will be denoised with some predefined parameters. + tone_mapping : str, optional + Options for tone mapping methods, defined in function `apply_tone_map` from `pipeline_utils` module. + By default 'Flash'. + out_landscape_width : int, optional + The width of output image (when orientation is landscape). If None, the image resize will not be performed. + By default None. + out_landscape_height : int, optional + The height of output image (when orientation is landscape). If None, the image resize will not be performed. + By default None. + color_matrix : list, optional + Avg color tranformation matrix. If None, average color transformation matrix of Huawei Mate 40 Pro is used. + """ + + self.params = locals() + del self.params['self'] + + # Linearization not handled. + def linearize_raw(self, raw_img, img_meta): + return raw_img + + def normalize(self, linearized_raw, img_meta): + return normalize(linearized_raw, img_meta['black_level'], img_meta['white_level']) + + def demosaic(self, normalized, img_meta): + return simple_demosaic(normalized, img_meta['cfa_pattern']) + + def denoise(self, demosaic, img_meta): + if not self.params['denoise_flg']: + return demosaic + return denoise_image(demosaic) + + # def denoise(self, demosaic, img_meta): + # if not self.params['denoise_flg']: + # return demosaic + # return bm3d_rgb(demosaic, sigma_psd=0.1) + + def white_balance(self, demosaic, img_meta): + if self.params['illumination_estimation'] == '': + wb_params = img_meta['as_shot_neutral'] + else: + wb_params = illumination_parameters_estimation( + demosaic, self.params['illumination_estimation']) + + white_balanced = white_balance(demosaic, wb_params) + return white_balanced + + def xyz_transform(self, white_balanced, img_meta): + # in case of absence of color matrix we use mean color matrix + if "color_matrix_1" not in img_meta.keys(): + img_meta["color_matrix_1"] = self.params["color_matrix"] + img_meta["color_matrix_2"] = self.params["color_matrix"] + return apply_color_space_transform(white_balanced, img_meta['color_matrix_1'], img_meta['color_matrix_2']) + + def srgb_transform(self, xyz, img_meta): + return transform_xyz_to_srgb(xyz) + + def tone_mapping(self, srgb, img_meta): + if self.params['tone_mapping'] is None: + return apply_tone_map(srgb, 'Base') + return apply_tone_map(srgb, self.params['tone_mapping']) + + def gamma_correct(self, srgb, img_meta): + return apply_gamma(srgb) + + def autocontrast(self, srgb, img_meta): + # return autocontrast(srgb) + return autocontrast_using_pil(srgb) + + def to_uint8(self, srgb, img_meta): + return (srgb*255).astype(np.uint8) + + def resize(self, img, img_meta): + if self.params['out_landscape_width'] is None or self.params['out_landscape_height'] is None: + return img + return resize_using_pil(img, self.params['out_landscape_width'], self.params['out_landscape_height']) + + def fix_orientation(self, img, img_meta): + return fix_orientation(img, img_meta['orientation']) + + +class PipelineExecutor: + """ + Pipeline executor class. + + This class can be used to successively execute the steps of some image pipeline class (for example `RawProcessingPipelineDemo`). + The declaration order of the public methods of pipeline class must correspond to the order in which these methods (steps) are supposed to be called when processing image. + + It is assumed that each public method of the pipeline class has 2 parameters: + raw_img : ndarray + Array with images data. + img_meta : Dict + Some meta data of image. + + Also each such public method must return an image (ndarray) as the result of processing. + """ + def __init__(self, img, img_meta, pipeline_obj, first_stage=None, last_stage=None): + """ + PipelineExecutor __init__ method. + + Parameters + ---------- + img : ndarray + Image that should be processed by pipeline. + img_meta : Dict + Some image metadata. + pipeline_obj : pipeline object + Some pipeline object such as RawProcessingPipelineDemo. + first_stage : str, optional + The name of first public method of pipeline object that should be called by PipelineExecutor. + If None, the first public method from defined in pipeline object will be considered as `first_stage` method. + By default None. + last_stage : str, optional + The name of last public method of pipeline object that should be called by PipelineExecutor. + If None, the last public method from defined in pipeline object will be considered as `last_stage` method. + By default None. + """ + self.pipeline_obj = pipeline_obj + self.stages_dict = self._init_stages() + self.stages_names, self.stages = list( + self.stages_dict.keys()), list(self.stages_dict.values()) + + if first_stage is None: + self.next_stage_indx = 0 + else: + assert first_stage in self.stages_names, f"Invalid first_stage={first_stage}. Try use the following stages: {self.stages_names}" + self.next_stage_indx = self.stages_names.index(first_stage) + + if last_stage is None: + self.last_stage_indx = len(self.stages_names) - 1 + else: + assert last_stage in self.stages_names, f"Invalid last_stage={last_stage}. Try use the following stages: {self.stages_names}" + self.last_stage_indx = self.stages_names.index(last_stage) + if self.next_stage_indx > self.last_stage_indx: + print(f'Warning: the specified first_stage={first_stage} follows the specified last_stage={last_stage}, so using __call__ no image processing will be done.') + + self.current_image = img + self.img_meta = img_meta + + def _init_stages(self): + stages = {func: getattr(self.pipeline_obj, func) for func in self.pipeline_obj.__class__.__dict__ if callable( + getattr(self.pipeline_obj, func)) and not func.startswith("_")} + return stages + + @property + def next_stage(self): + if self.next_stage_indx < len(self.stages): + return self.stages_names[self.next_stage_indx] + else: + return None + + @property + def last_stage(self): + return self.stages_names[self.last_stage_indx] + + def __iter__(self): + return self + + def __next__(self): + if self.next_stage_indx < len(self.stages): + stage_func = self.stages[self.next_stage_indx] + self.current_image = stage_func(self.current_image, self.img_meta) + self.next_stage_indx += 1 + return self.current_image + else: + raise StopIteration + + def __call__(self): + """ + PipelineExecutor __call__ method. + + This method will sequentially execute the methods defined in the pipeline object from the `first_stage` to the `last_stage` inclusive. + + Returns + ------- + ndarray + Resulted processed raw image. + """ + for current_image in self: + if self.next_stage_indx > self.last_stage_indx: + return current_image + return self.current_image diff --git a/IIR-Lab/ISP_pipeline/raw_prc_pipeline/pipeline_utils.py b/IIR-Lab/ISP_pipeline/raw_prc_pipeline/pipeline_utils.py new file mode 100644 index 0000000000000000000000000000000000000000..ae137adcd04db20bc5bcfbf6c99f2bfa10b8a7b0 --- /dev/null +++ b/IIR-Lab/ISP_pipeline/raw_prc_pipeline/pipeline_utils.py @@ -0,0 +1,712 @@ +""" +Camera pipeline utilities. +""" + +import os +from fractions import Fraction + +import cv2 +import numpy as np +import exifread +# from exifread import Ratio +from exifread.utils import Ratio +import rawpy +from scipy.io import loadmat +from raw_prc_pipeline.exif_utils import parse_exif, get_tag_values_from_ifds +from raw_prc_pipeline.fs import perform_storm, perform_flash +from PIL import Image, ImageOps +from skimage.restoration import denoise_bilateral +from skimage.transform import resize as skimage_resize +import torch + +from kornia.geometry.transform import resize + +# from modeling import weight_refinement +# from raw_prc_pipeline import misc, optim, color +import pdb +from copy import deepcopy +import hdf5storage +import torch +from scipy import signal + +def get_visible_raw_image(image_path): + raw_image = rawpy.imread(image_path).raw_image_visible.copy() + # raw_image = rawpy.imread(image_path).raw_image.copy() + return raw_image + + +def get_image_tags(image_path): + with open(image_path, 'rb') as f: + tags = exifread.process_file(f) + return tags + + +def get_image_ifds(image_path): + ifds = parse_exif(image_path, verbose=False) + return ifds + + +def get_metadata(image_path): + metadata = {} + tags = get_image_tags(image_path) + ifds = get_image_ifds(image_path) + metadata['linearization_table'] = get_linearization_table(tags, ifds) + metadata['black_level'] = get_black_level(tags, ifds) + metadata['white_level'] = get_white_level(tags, ifds) + metadata['cfa_pattern'] = get_cfa_pattern(tags, ifds) + metadata['as_shot_neutral'] = get_as_shot_neutral(tags, ifds) + color_matrix_1, color_matrix_2 = get_color_matrices(tags, ifds) + metadata['color_matrix_1'] = color_matrix_1 + metadata['color_matrix_2'] = color_matrix_2 + metadata['orientation'] = get_orientation(tags, ifds) + # isn't used + metadata['noise_profile'] = get_noise_profile(tags, ifds) + # ... + # fall back to default values, if necessary + if metadata['black_level'] is None: + metadata['black_level'] = 0 + print("Black level is None; using 0.") + if metadata['white_level'] is None: + metadata['white_level'] = 2 ** 16 + print("White level is None; using 2 ** 16.") + if metadata['cfa_pattern'] is None: + metadata['cfa_pattern'] = [0, 1, 1, 2] + print("CFAPattern is None; using [0, 1, 1, 2] (RGGB)") + if metadata['as_shot_neutral'] is None: + metadata['as_shot_neutral'] = [1, 1, 1] + print("AsShotNeutral is None; using [1, 1, 1]") + if metadata['color_matrix_1'] is None: + metadata['color_matrix_1'] = [1] * 9 + print("ColorMatrix1 is None; using [1, 1, 1, 1, 1, 1, 1, 1, 1]") + if metadata['color_matrix_2'] is None: + metadata['color_matrix_2'] = [1] * 9 + print("ColorMatrix2 is None; using [1, 1, 1, 1, 1, 1, 1, 1, 1]") + if metadata['orientation'] is None: + metadata['orientation'] = 0 + print("Orientation is None; using 0.") + # ... + return metadata + + +def get_linearization_table(tags, ifds): + possible_keys = ['Image Tag 0xC618', 'Image Tag 50712', + 'LinearizationTable', 'Image LinearizationTable'] + return get_values(tags, possible_keys) + + +def get_black_level(tags, ifds): + possible_keys = ['Image Tag 0xC61A', 'Image Tag 50714', + 'BlackLevel', 'Image BlackLevel'] + vals = get_values(tags, possible_keys) + if vals is None: + # print("Black level not found in exifread tags. Searching IFDs.") + vals = get_tag_values_from_ifds(50714, ifds) + return vals + + +def get_white_level(tags, ifds): + possible_keys = ['Image Tag 0xC61D', 'Image Tag 50717', + 'WhiteLevel', 'Image WhiteLevel'] + vals = get_values(tags, possible_keys) + if vals is None: + # print("White level not found in exifread tags. Searching IFDs.") + vals = get_tag_values_from_ifds(50717, ifds) + return vals + + +def get_cfa_pattern(tags, ifds): + possible_keys = ['CFAPattern', 'Image CFAPattern'] + vals = get_values(tags, possible_keys) + if vals is None: + # print("CFAPattern not found in exifread tags. Searching IFDs.") + vals = get_tag_values_from_ifds(33422, ifds) + return vals + + +def get_as_shot_neutral(tags, ifds): + possible_keys = ['Image Tag 0xC628', 'Image Tag 50728', + 'AsShotNeutral', 'Image AsShotNeutral'] + return get_values(tags, possible_keys) + + +def get_color_matrices(tags, ifds): + possible_keys_1 = ['Image Tag 0xC621', 'Image Tag 50721', + 'ColorMatrix1', 'Image ColorMatrix1'] + color_matrix_1 = get_values(tags, possible_keys_1) + possible_keys_2 = ['Image Tag 0xC622', 'Image Tag 50722', + 'ColorMatrix2', 'Image ColorMatrix2'] + color_matrix_2 = get_values(tags, possible_keys_2) + #print(f'Color matrix 1:{color_matrix_1}') + #print(f'Color matrix 2:{color_matrix_2}') + #print(np.sum(np.abs(np.array(color_matrix_1) - np.array(color_matrix_2)))) + return color_matrix_1, color_matrix_2 + + +def get_orientation(tags, ifds): + possible_tags = ['Orientation', 'Image Orientation'] + return get_values(tags, possible_tags) + + +def get_noise_profile(tags, ifds): + possible_keys = ['Image Tag 0xC761', 'Image Tag 51041', + 'NoiseProfile', 'Image NoiseProfile'] + vals = get_values(tags, possible_keys) + if vals is None: + # print("Noise profile not found in exifread tags. Searching IFDs.") + vals = get_tag_values_from_ifds(51041, ifds) + return vals + + +def get_values(tags, possible_keys): + values = None + for key in possible_keys: + if key in tags.keys(): + values = tags[key].values + return values + + +def normalize(raw_image, black_level, white_level): + if type(black_level) is list and len(black_level) == 1: + black_level = float(black_level[0]) + if type(white_level) is list and len(white_level) == 1: + white_level = float(white_level[0]) + black_level_mask = black_level + if type(black_level) is list and len(black_level) == 4: + if type(black_level[0]) is Ratio: + black_level = ratios2floats(black_level) + if type(black_level[0]) is Fraction: + black_level = fractions2floats(black_level) + black_level_mask = np.zeros(raw_image.shape) + idx2by2 = [[0, 0], [0, 1], [1, 0], [1, 1]] + step2 = 2 + for i, idx in enumerate(idx2by2): + black_level_mask[idx[0]::step2, idx[1]::step2] = black_level[i] + normalized_image = raw_image.astype(np.float32) - black_level_mask + # if some values were smaller than black level + normalized_image[normalized_image < 0] = 0 + normalized_image = normalized_image / (white_level - black_level_mask) + return normalized_image + + +def ratios2floats(ratios): + floats = [] + for ratio in ratios: + floats.append(float(ratio.num) / ratio.den) + return floats + + +def fractions2floats(fractions): + floats = [] + for fraction in fractions: + floats.append(float(fraction.numerator) / fraction.denominator) + return floats + + +def illumination_parameters_estimation(current_image, illumination_estimation_option): + ie_method = illumination_estimation_option.lower() + if ie_method == "gw": + ie = np.mean(current_image, axis=(0, 1)) + ie /= ie[1] + return ie + elif ie_method == "sog": + sog_p = 4. + ie = np.mean(current_image**sog_p, axis=(0, 1))**(1/sog_p) + ie /= ie[1] + return ie + elif ie_method == "wp": + ie = np.max(current_image, axis=(0, 1)) + ie /= ie[1] + return ie + elif ie_method == "iwp": + samples_count = 20 + sample_size = 20 + rows, cols = current_image.shape[:2] + data = np.reshape(current_image, (rows*cols, 3)) + maxima = np.zeros((samples_count, 3)) + for i in range(samples_count): + maxima[i, :] = np.max(data[np.random.randint( + low=0, high=rows*cols, size=(sample_size)), :], axis=0) + ie = np.mean(maxima, axis=0) + ie /= ie[1] + return ie + else: + raise ValueError( + 'Bad illumination_estimation_option value! Use the following options: "gw", "wp", "sog", "iwp"') + + +def white_balance(demosaic_img, as_shot_neutral): + if type(as_shot_neutral[0]) is Ratio: + as_shot_neutral = ratios2floats(as_shot_neutral) + + as_shot_neutral = np.asarray(as_shot_neutral) + # transform vector into matrix + if as_shot_neutral.shape == (3,): + as_shot_neutral = np.diag(1./as_shot_neutral) + + assert as_shot_neutral.shape == (3, 3) + + as_shot_neutral = torch.tensor(as_shot_neutral.T, dtype=torch.float32).cuda() + demosaic_img = demosaic_img @ as_shot_neutral + demosaic_img = torch.clamp(demosaic_img, 0.0, 1.0) + + return demosaic_img + + +def simple_demosaic(img, cfa_pattern): + raw_colors = np.asarray(cfa_pattern).reshape((2, 2)) + demosaiced_image = np.zeros((img.shape[0]//2, img.shape[1]//2, 3)) + for i in range(2): + for j in range(2): + ch = raw_colors[i, j] + if ch == 1: + demosaiced_image[:, :, ch] += img[i::2, j::2] / 2 + else: + demosaiced_image[:, :, ch] = img[i::2, j::2] + return demosaiced_image + + +def denoise_image(demosaiced_image): + current_image = denoise_bilateral( + demosaiced_image, sigma_color=None, sigma_spatial=2., channel_axis=-1, mode='reflect') + return current_image + +def apply_color_space_transform(demosaiced_image, color_matrix_1, color_matrix_2): + # pdb.set_trace() + # if isinstance(color_matrix_1[0], Fraction): + # color_matrix_1 = fractions2floats(color_matrix_1) + + # xyz2cam1 = np.reshape(np.asarray(color_matrix_1), (3, 3)) + + # # normalize rows (needed?) + # xyz2cam1 = xyz2cam1 / np.sum(xyz2cam1, axis=1, keepdims=True) + + # # inverse + # cam2xyz1 = np.linalg.inv(xyz2cam1) + + cam2xyz1 = torch.tensor([[ 0.64782996, 0.18070131, 0.17146873], + [ 0.20529524, 0.78768572, 0.00701903], + [ 0.02675084, -0.29688082, 1.27012997]], dtype=torch.float32).cuda() + + # for now, use one matrix # TODO: interpolate btween both + # simplified matrix multiplication + xyz_image = cam2xyz1.unsqueeze(0).unsqueeze(0) * \ + demosaiced_image.unsqueeze(2) + xyz_image = torch.sum(xyz_image, dim=-1) + xyz_image = torch.clamp(xyz_image, 0.0, 1.0) + del demosaiced_image, cam2xyz1 + return xyz_image + + +def transform_xyz_to_srgb(xyz_image): + # srgb2xyz = np.array([[0.4124564, 0.3575761, 0.1804375], + # [0.2126729, 0.7151522, 0.0721750], + # [0.0193339, 0.1191920, 0.9503041]]) + + # xyz2srgb = np.linalg.inv(srgb2xyz) + + # xyz2srgb = np.array( [[ 1.9712269,-0.6789218, -0.29230508], + # [-0.29104823, 1.748401 , -0.45735288], + # [ 0.02051281,-0.5380369, 1.5175241 ]]) + + # P40 ccm + # xyz2srgb = np.array([[1.521689, -0.673763, 0.152074], + # [-0.145724, 1.266507, -0.120783], + # [-0.0397583, -0.561249, 1.60100734]]) + + # xyz2srgb = np.array([[3.2404542, -1.5371385, -0.4985314], + # [-0.9692660, 1.8760108, 0.0415560], + # [0.0556434, -0.2040259, 1.0572252]]) + + # # normalize rows (needed?) + # xyz2srgb = xyz2srgb / np.sum(xyz2srgb, axis=-1, keepdims=True) + + xyz2srgb = torch.tensor([[ 2.68965507, -1.27586199, -0.41379307], + [-1.02210817, 1.97828664, 0.04382154], + [ 0.06122446, -0.22448978, 1.16326533]], dtype=torch.float32).cuda() + + srgb_image = xyz2srgb.unsqueeze(0).unsqueeze(0) * xyz_image.unsqueeze(2) + srgb_image = torch.sum(srgb_image, dim=-1) + srgb_image = torch.clip(srgb_image, 0.0, 1.0) + del xyz_image, xyz2srgb + return srgb_image + + +def reverse_orientation(image, orientation): + # 1 = Horizontal(normal) + # 2 = Mirror horizontal + # 3 = Rotate 180 + # 4 = Mirror vertical + # 5 = Mirror horizontal and rotate 270 CW + # 6 = Rotate 90 CW + # 7 = Mirror horizontal and rotate 90 CW + # 8 = Rotate 270 CW + rev_orientations = np.array([1, 2, 3, 4, 5, 8, 7, 6]) + return fix_orientation(image, rev_orientations[orientation - 1]) + + +def apply_gamma(x): + # return x ** (1.0 / 2.2) + # x = x.copy() + idx = x <= 0.0031308 + x[idx] *= 12.92 + x[idx == False] = (x[idx == False] ** (1.0 / 2.4)) * 1.055 - 0.055 + return x + +def bgr_gtm(bgr, key_1, key_2): + min_val = 1 / (2 ** 16) + y = bgr[:, :, 0] * 0.299 + bgr[:, :, 1] * 0.587 + bgr[:, :, 1] * 0.144 + b = y * (key_2 - key_1) + key_1 + ld = (np.log(y + b) - np.log(b)) / (np.log(1 + b) - np.log(b)) + y = np.maximum(y, min_val) + gain = ld / y + gain = np.stack([gain, gain, gain], axis=-1) + bgr = bgr * gain + + return bgr + +def sharpen_bilateralFilter(RGB): + d = 3 # kernel size + sigmaColor = 10 # color domain sigma + sigmaSpace = 10 # space domain sigma + + weight = 2 + weight_ratio = 0.1 + + h, w, c = RGB.shape + ycc = rgb2ycbcr(RGB, w, h) + ycc_out=ycc + y = ycc[:, :, 0] + cb = ycc[:, :, 1] + cr = ycc[:, :, 2] + + y_bilateral_filtered = cv2.bilateralFilter(y.astype(np.float32), d, sigmaColor, sigmaSpace) + detail = ycc[:, :, 0] - y_bilateral_filtered + + y_out = y_bilateral_filtered + weight * detail + y_out = np.clip(y_out, 0, 255) + + ycc_out[:, :, 0] = y_out + rgb_out = ycbcr2rgb(ycc_out, w, h) + return rgb_out + +# 0~255 的 Ycbcr转换 +def rgb2ycbcr(image, width, height): + ycbcr_img = np.zeros(shape=(height, width, 3)) + ycbcr_img[:,:,0] = 0.299*image[:,:,0] + 0.5877*image[:,:,1] + 0.114*image[:,:,2] + ycbcr_img[:,:,1] = 128 - 0.168736*image[:,:,0] - 0.331264*image[:,:,1] + 0.5*image[:,:,2] + ycbcr_img[:,:,2] = 128 + 0.5*image[:,:,0] - 0.418688*image[:,:,1] - 0.081312*image[:,:,2] + ycbcr_img = np.clip(ycbcr_img, 0, 255) + return ycbcr_img +# 0~255 的 Ycbcr转换 +def ycbcr2rgb(image, width, height): + rgb_img = np.zeros(shape=(height, width, 3)) + rgb_img[:,:,0] = image[:,:,0] + 1.402*(image[:,:,2]-128) # R = Y + 1.402*(Cr-128) + rgb_img[:,:,1] = image[:,:,0] - 0.344136*(image[:,:,1]-128) - 0.714136*(image[:,:,2]-128) # G = Y - 0.344136*(Cb-128) - 0.714136*(Cr-128) + rgb_img[:,:,2] = image[:,:,0] + 1.772*(image[:,:,1]-128) # B = Y + 1.772*(Cb-128) + rgb_img = np.clip(rgb_img, 0, 255) + return rgb_img + +def apply_tone_map(x, tone_mapping='Base'): + if tone_mapping == 'Flash': + return perform_flash(x, perform_gamma_correction=0)/255. + elif tone_mapping == 'Storm': + return perform_storm(x, perform_gamma_correction=0)/255. + elif tone_mapping == 'Drago': + tonemap = cv2.createTonemapDrago() + return tonemap.process(x.astype(np.float32)) + elif tone_mapping == 'Mantiuk': + tonemap = cv2.createTonemapMantiuk() + return tonemap.process(x.astype(np.float32)) + elif tone_mapping == 'Reinhard': + tonemap = cv2.createTonemapReinhard() + return tonemap.process(x.astype(np.float32)) + elif tone_mapping == 'Linear': + return np.clip(x/np.sort(x.flatten())[-50000], 0, 1) + elif tone_mapping == 'Base': + # return 3 * x ** 2 - 2 * x ** 3 + # tone_curve = loadmat('tone_curve.mat') + tone_curve = loadmat(os.path.join(os.path.dirname( + os.path.realpath(__file__)), 'tone_curve.mat')) + tone_curve = tone_curve['tc'] + x = np.round(x * (len(tone_curve) - 1)).astype(int) + tone_mapped_image = np.squeeze(tone_curve[x]) + return tone_mapped_image + else: + raise ValueError( + 'Bad tone_mapping option value! Use the following options: "Base", "Flash", "Storm", "Linear", "Drago", "Mantiuk", "Reinhard"') + + +def autocontrast(output_image, cutoff_prcnt=2, preserve_tone=False): + if preserve_tone: + min_val, max_val = np.percentile(output_image, [cutoff_prcnt, 100 - cutoff_prcnt]) + output_image = (output_image - min_val)/(max_val - min_val) + else: + channels = [None]*3 + for ch in range(3): + min_val, max_val = np.percentile(output_image[...,ch], [cutoff_prcnt, 100 - cutoff_prcnt]) + channels[ch] = (output_image[...,ch] - min_val)/(max_val - min_val) + output_image = np.dstack(channels) + output_image = np.clip(output_image, 0, 1) + return output_image + + +def autocontrast_using_pil(img, style='new', cutoff=4): + img_uint8 = np.clip(255*img, 0, 255).astype(np.uint8) + img_pil = Image.fromarray(img_uint8) + img_pil = ImageOps.autocontrast(img_pil, cutoff=cutoff) + output_image = np.array(img_pil).astype(np.float32) / 255 + return output_image + +def _lut(image, lut): + if image.mode == "P": + # FIXME: apply to lookup table, not image data + raise NotImplementedError("mode P support coming soon") + elif image.mode in ("L", "RGB"): + if image.mode == "RGB" and len(lut) == 256: + lut = lut + lut + lut + return image.point(lut) + else: + raise OSError("not supported for this image mode") + + +def autocontrast(image, cutoff=(0, 0), ignore=None): + """ + Maximize (normalize) image contrast. This function calculates a + histogram of the input image, removes **cutoff** percent of the + lightest and darkest pixels from the histogram, and remaps the image + so that the darkest pixel becomes black (0), and the lightest + becomes white (255). + + :param image: The image to process. + :param cutoff: How many percent to cut off from the histogram. + :param ignore: The background pixel value (use None for no background). + :return: An image. + """ + histogram = image.histogram() + lut = [] + for layer in range(0, len(histogram), 256): + h = histogram[layer : layer + 256] + if ignore is not None: + # get rid of outliers + try: + h[ignore] = 0 + except TypeError: + # assume sequence + for ix in ignore: + h[ix] = 0 + if cutoff: + # cut off pixels from both ends of the histogram + # get number of pixels + n = 0 + for ix in range(256): + n = n + h[ix] + # remove cutoff% pixels from the low end + cut = n * cutoff[0] // 100 + for lo in range(256): + if cut > h[lo]: + cut = cut - h[lo] + h[lo] = 0 + else: + h[lo] -= cut + cut = 0 + if cut <= 0: + break + # remove cutoff% samples from the hi end + cut = n * cutoff[1] // 100 + for hi in range(255, -1, -1): + if cut > h[hi]: + cut = cut - h[hi] + h[hi] = 0 + else: + h[hi] -= cut + cut = 0 + if cut <= 0: + break + # find lowest/highest samples after preprocessing + for lo in range(256): + if h[lo]: + break + for hi in range(255, -1, -1): + if h[hi]: + break + if hi <= lo: + # don't bother + lut.extend(list(range(256))) + else: + scale = 255.0 / (hi - lo) + offset = -lo * scale + for ix in range(256): + ix = int(ix * scale + offset) + if ix < 0: + ix = 0 + elif ix > 255: + ix = 255 + lut.append(ix) + return _lut(image, lut) + + +def perform_autocontrast_standard(img, cutoff=(2, 0)): + img_uint8 = np.clip(255*img, 0, 255).astype(np.uint8) + img_pil = Image.fromarray(img_uint8) + img_pil = autocontrast(img_pil, cutoff=cutoff) + output_image = np.array(img_pil).astype(np.float32) / 255. + return output_image + + +def perform_autocontrast_channel1(img): + + def reject_outliers(data, m=1.2): + return abs(data - np.mean(data)) < m * np.std(data) + + def get_cutoff(img_ch): + values, _ = np.histogram(img_ch, bins=32) + ratios = values / values.sum() + cutoff = 4 if reject_outliers(values)[0] else 4 - np.log(100 * np.abs(ratios[1]-ratios[0])) + if cutoff < 0: + cutoff = 0 + return int(cutoff) + + img_uint8 = np.clip(255*img, 0, 255).astype(np.uint8) + img_pil = Image.fromarray(img_uint8) + r, g, b = img_pil.split() + cutoff_r = get_cutoff(np.array(r).flatten()) + cutoff_g = get_cutoff(np.array(g).flatten()) + cutoff_b = get_cutoff(np.array(b).flatten()) + r_ = autocontrast(r, cutoff=(cutoff_r, 0)) + g_ = autocontrast(g, cutoff=(cutoff_g, 0)) + b_ = autocontrast(b, cutoff=(cutoff_b, 0)) + output_r = np.array(r_).astype(np.float32) / 255. + output_g = np.array(g_).astype(np.float32) / 255. + output_b = np.array(b_).astype(np.float32) / 255. + output_image = np.transpose(np.array([output_r, output_g, output_b]), (1, 2, 0)) + return output_image + + +def perform_autocontrast_channel2(img): + + def get_cutoff(img_uint8, base_cutoff=4): + cutoff = list() + h, w, _ = img_uint8.shape + for ch in Image.fromarray(img_uint8).split(): + values, _ = np.histogram(np.array(ch).flatten(), bins=32) + cutoff.append(np.ceil((values.cumsum() / (h * w))[0] * 100).astype(int)) + cutoff = [coff if coff > base_cutoff else base_cutoff for coff in cutoff] + return cutoff + + img_uint8 = np.clip(255*img, 0, 255).astype(np.uint8) + cutoff = get_cutoff(img_uint8) + output = np.array([ + np.array(autocontrast(ch, cutoff=(coff, 0))).astype(np.float32) / 255. + for ch, coff in zip(Image.fromarray(img_uint8).split(), cutoff) + ]) + return np.transpose(output, (1, 2, 0)) + +def csrnet(img, pth): + device = torch.device("cuda") + from raw_prc_pipeline.csrnet_network import CSRNet as NET + checkpoint_path = pth + model = get_parm(NET, checkpoint_path, device) + + # img = np.pad(img, ((0, 24), (0, 16), (0, 0)), 'reflect') + # img = img.astype(np.float32) + + img = img.unsqueeze(0).permute(0,3,1,2).contiguous() + + with torch.no_grad(): + output_image = model(img) + + # output_image = output_image.detach().cpu().squeeze(0).numpy().transpose(1, 2, 0) + output_image = output_image[0].permute(1,2,0) + output_image = torch.clamp(output_image, 0, 1) + + torch.cuda.empty_cache() + + return output_image + +def get_net(NET, checkpoint_path, device): + net = NET() + load_net = torch.load(checkpoint_path, map_location="cuda") + # pdb.set_trace() + try: + load_net = load_net['params'] + except: + load_net = load_net['state_dict_model'] + + # remove unnecessary 'module.' + for k, v in deepcopy(load_net).items(): + if k.startswith('module.'): + load_net[k[7:]] = v + load_net.pop(k) + + net.load_state_dict(load_net, strict=True) + net = net.to(device) + net = net.eval() + return net + +def get_parm(NET, checkpoint_path, device): + net = NET() + load_net = torch.load(checkpoint_path, map_location="cuda") + + net.load_state_dict(load_net, strict=True) + net = net.to(device) + net = net.eval() + return net + +def raw_rgb_to_cct(rawRgb, xyz2cam1, xyz2cam2): + """Convert raw-RGB triplet to corresponding correlated color temperature (CCT)""" + pass + +def resize_using_skimage(img, width=1296, height=864): + out_shape = (height, width) + img.shape[2:] + if img.shape == out_shape: + return img + out_img = skimage_resize(img, out_shape, preserve_range=True, anti_aliasing=True) + out_img = out_img.astype(np.uint8) + return out_img + + +def resize_using_pil(img, width=1296, height=864): + img_pil = Image.fromarray(img) + out_size = (width, height) + if img_pil.size == out_size: + return img + # out_img = img_pil.resize(out_size, Image.ANTIALIAS) + out_img = img_pil + out_img = np.array(out_img) + return out_img + + +def fix_orientation(image, orientation): + + if type(orientation) is list: + orientation = orientation[0] + + if orientation == 1: + pass + elif orientation == 2: + image = cv2.flip(image, 0) + elif orientation == 3: + image = cv2.rotate(image, cv2.ROTATE_180) + elif orientation == 4: + image = cv2.flip(image, 1) + elif orientation == 5: + image = cv2.flip(image, 0) + image = cv2.rotate(image, cv2.ROTATE_90_COUNTERCLOCKWISE) + elif orientation == 6: + image = cv2.rotate(image, cv2.ROTATE_90_CLOCKWISE) + elif orientation == 7: + image = cv2.flip(image, 0) + image = cv2.rotate(image, cv2.ROTATE_90_CLOCKWISE) + elif orientation == 8: + image = cv2.rotate(image, cv2.ROTATE_90_COUNTERCLOCKWISE) + + return image + +def adjust_contrast_brightness(img, contrast:float=1.0, brightness:int=0): + """ + Adjusts contrast and brightness of an uint8 image. + contrast: (0.0, inf) with 1.0 leaving the contrast as is + brightness: [-255, 255] with 0 leaving the brightness as is + """ + brightness += int(round(255*(1-contrast)/2)) + return cv2.addWeighted(img, contrast, img, 0, brightness) \ No newline at end of file diff --git a/IIR-Lab/ISP_pipeline/raw_prc_pipeline/refine_network.py b/IIR-Lab/ISP_pipeline/raw_prc_pipeline/refine_network.py new file mode 100644 index 0000000000000000000000000000000000000000..38165babe0886a9eb87c0eb5dab6ea7de8813dac --- /dev/null +++ b/IIR-Lab/ISP_pipeline/raw_prc_pipeline/refine_network.py @@ -0,0 +1,496 @@ +import math +import torch +import torch.optim as optim +import torch.nn as nn +import torch.nn.functional as F +from torch.nn import init, Module +import functools +from torch.optim import lr_scheduler +from collections import OrderedDict +import numpy as np + +''' +# =================================== +# Advanced nn.Sequential +# reform nn.Sequentials and nn.Modules +# to a single nn.Sequential +# =================================== +''' + +def seq(*args): + if len(args) == 1: + args = args[0] + if isinstance(args, nn.Module): + return args + modules = OrderedDict() + if isinstance(args, OrderedDict): + for k, v in args.items(): + modules[k] = seq(v) + return nn.Sequential(modules) + assert isinstance(args, (list, tuple)) + return nn.Sequential(*[seq(i) for i in args]) + +''' +# =================================== +# Useful blocks +# -------------------------------- +# conv (+ normaliation + relu) +# concat +# sum +# resblock (ResBlock) +# resdenseblock (ResidualDenseBlock_5C) +# resinresdenseblock (RRDB) +# =================================== +''' + +# ------------------------------------------------------- +# return nn.Sequantial of (Conv + BN + ReLU) +# ------------------------------------------------------- +def conv(in_channels=64, out_channels=64, kernel_size=3, stride=1, padding=1, + output_padding=0, dilation=1, groups=1, bias=True, + padding_mode='zeros', mode='CBR'): + L = [] + for t in mode: + if t == 'C': + L.append(nn.Conv2d(in_channels=in_channels, + out_channels=out_channels, + kernel_size=kernel_size, + stride=stride, + padding=padding, + dilation=dilation, + groups=groups, + bias=bias, + padding_mode=padding_mode)) + elif t == 'X': + assert in_channels == out_channels + L.append(nn.Conv2d(in_channels=in_channels, + out_channels=out_channels, + kernel_size=kernel_size, + stride=stride, + padding=padding, + dilation=dilation, + groups=in_channels, + bias=bias, + padding_mode=padding_mode)) + elif t == 'T': + L.append(nn.ConvTranspose2d(in_channels=in_channels, + out_channels=out_channels, + kernel_size=kernel_size, + stride=stride, + padding=padding, + output_padding=output_padding, + groups=groups, + bias=bias, + dilation=dilation, + padding_mode=padding_mode)) + elif t == 'B': + L.append(nn.BatchNorm2d(out_channels)) + elif t == 'I': + L.append(nn.InstanceNorm2d(out_channels, affine=True)) + elif t == 'i': + L.append(nn.InstanceNorm2d(out_channels)) + elif t == 'R': + L.append(nn.ReLU(inplace=True)) + elif t == 'r': + L.append(nn.ReLU(inplace=False)) + elif t == 'P': + L.append(nn.PReLU()) + elif t == 'L': + L.append(nn.LeakyReLU(negative_slope=1e-1, inplace=True)) + elif t == 'l': + L.append(nn.LeakyReLU(negative_slope=1e-1, inplace=False)) + elif t == '2': + L.append(nn.PixelShuffle(upscale_factor=2)) + elif t == '3': + L.append(nn.PixelShuffle(upscale_factor=3)) + elif t == '4': + L.append(nn.PixelShuffle(upscale_factor=4)) + elif t == 'U': + L.append(nn.Upsample(scale_factor=2, mode='nearest')) + elif t == 'u': + L.append(nn.Upsample(scale_factor=3, mode='nearest')) + elif t == 'M': + L.append(nn.MaxPool2d(kernel_size=kernel_size, + stride=stride, + padding=0)) + elif t == 'A': + L.append(nn.AvgPool2d(kernel_size=kernel_size, + stride=stride, + padding=0)) + else: + raise NotImplementedError('Undefined type: '.format(t)) + return seq(*L) + +# ------------------------------------------------------- +# Concat the output of a submodule to its input +# ------------------------------------------------------- +class ConcatBlock(nn.Module): + def __init__(self, submodule): + super(ConcatBlock, self).__init__() + + self.sub = submodule + + def forward(self, x): + output = torch.cat((x, self.sub(x)), dim=1) + return output + + def __repr__(self): + return self.sub.__repr__() + '_concat' + +# ------------------------------------------------------- +# Elementwise sum the output of a submodule to its input +# ------------------------------------------------------- +class ShortcutBlock(nn.Module): + def __init__(self, submodule): + super(ShortcutBlock, self).__init__() + + self.sub = submodule + + def forward(self, x): + output = x + self.sub(x) + return output + + def __repr__(self): + tmpstr = 'Identity + \n|' + modstr = self.sub.__repr__().replace('\n', '\n|') + tmpstr = tmpstr + modstr + return tmpstr + +class DWTForward(nn.Module): + def __init__(self): + super(DWTForward, self).__init__() + ll = np.array([[0.5, 0.5], [0.5, 0.5]]) + lh = np.array([[-0.5, -0.5], [0.5, 0.5]]) + hl = np.array([[-0.5, 0.5], [-0.5, 0.5]]) + hh = np.array([[0.5, -0.5], [-0.5, 0.5]]) + filts = np.stack([ll[None,::-1,::-1], lh[None,::-1,::-1], + hl[None,::-1,::-1], hh[None,::-1,::-1]], + axis=0) + self.weight = nn.Parameter( + torch.tensor(filts).to(torch.get_default_dtype()), + requires_grad=False) + def forward(self, x): + C = x.shape[1] + filters = torch.cat([self.weight,] * C, dim=0) + y = F.conv2d(x, filters, groups=C, stride=2) + return y + +class DWTInverse(nn.Module): + def __init__(self): + super(DWTInverse, self).__init__() + ll = np.array([[0.5, 0.5], [0.5, 0.5]]) + lh = np.array([[-0.5, -0.5], [0.5, 0.5]]) + hl = np.array([[-0.5, 0.5], [-0.5, 0.5]]) + hh = np.array([[0.5, -0.5], [-0.5, 0.5]]) + filts = np.stack([ll[None, ::-1, ::-1], lh[None, ::-1, ::-1], + hl[None, ::-1, ::-1], hh[None, ::-1, ::-1]], + axis=0) + self.weight = nn.Parameter( + torch.tensor(filts).to(torch.get_default_dtype()), + requires_grad=False) + + def forward(self, x): + C = int(x.shape[1] / 4) + filters = torch.cat([self.weight, ] * C, dim=0) + y = F.conv_transpose2d(x, filters, groups=C, stride=2) + return y + +# ------------------------------------------------------- +# Channel Attention (CA) Layer +# ------------------------------------------------------- +class CALayer(nn.Module): + def __init__(self, channel=64, reduction=16): + super(CALayer, self).__init__() + + self.avg_pool = nn.AdaptiveAvgPool2d(1) + self.conv_du = nn.Sequential( + nn.Conv2d(channel, channel//reduction, 1, padding=0, bias=True), + nn.ReLU(inplace=True), + nn.Conv2d(channel//reduction, channel, 1, padding=0, bias=True), + nn.Sigmoid() + ) + + def forward(self, x): + y = self.avg_pool(x) + y = self.conv_du(y) + return x * y + +class ChannelPool(nn.Module): + def forward(self, x): + return torch.cat((torch.max(x,1)[0].unsqueeze(1), torch.mean(x,1).unsqueeze(1)), dim=1) + +class spatial_attn_layer(nn.Module): + def __init__(self, kernel_size=3): + super(spatial_attn_layer, self).__init__() + self.compress = ChannelPool() + self.spatial = nn.Conv2d(2, 1, 3, stride=1, padding=1, bias=True) + + def forward(self, x): + # import pdb;pdb.set_trace() + x_compress = self.compress(x) + x_out = self.spatial(x_compress) + scale = torch.sigmoid(x_out) # broadcasting + return x * scale + +# ------------------------------------------------------- +# Content Unrelated Channel Attention (CUCA) Layer +# ------------------------------------------------------- +class CUCALayer(nn.Module): + def __init__(self, channel=64, min=0, max=None): + super(CUCALayer, self).__init__() + + self.attention = nn.Conv2d(channel, channel, 1, padding=0, + groups=channel, bias=False) + self.min, self.max = min, max + nn.init.uniform_(self.attention.weight, 0, 1) + + def forward(self, x): + self.attention.weight.data.clamp_(self.min, self.max) + return self.attention(x) + + +# ------------------------------------------------------- +# Res Block: x + conv(relu(conv(x))) +# ------------------------------------------------------- +class ResBlock(nn.Module): + def __init__(self, in_channels=64, out_channels=64, kernel_size=3, stride=1, + padding=1, bias=True, mode='CRC'): + super(ResBlock, self).__init__() + + assert in_channels == out_channels + if mode[0] in ['R','L']: + mode = mode[0].lower() + mode[1:] + + self.res = conv(in_channels, out_channels, kernel_size, + stride, padding, bias=bias, mode=mode) + + def forward(self, x): + res = self.res(x) + return x + res + +# ------------------------------------------------------- +# Residual Channel Attention Block (RCAB) +# ------------------------------------------------------- +class RCABlock(nn.Module): + def __init__(self, in_channels=64, out_channels=64, kernel_size=3, stride=1, + padding=1, bias=True, mode='CRC', reduction=16): + super(RCABlock, self).__init__() + assert in_channels == out_channels + if mode[0] in ['R','L']: + mode = mode[0].lower() + mode[1:] + + self.res = conv(in_channels, out_channels, kernel_size, + stride, padding, bias=bias, mode=mode) + self.CA = CALayer(out_channels, reduction) + #self.SA = spatial_attn_layer() ## Spatial Attention + #self.conv1x1 = nn.Conv2d(in_channels*2, in_channels, kernel_size=1) + + def forward(self, x): + res = self.res(x) + #sa_branch = self.SA(res) + ca_branch = self.CA(res) + #res = torch.cat([sa_branch, ca_branch], dim=1) + #res = self.conv1x1(res) + return ca_branch + x + + +# ------------------------------------------------------- +# Residual Channel Attention Group (RG) +# ------------------------------------------------------- +class RCAGroup(nn.Module): + def __init__(self, in_channels=64, out_channels=64, kernel_size=3, stride=1, + padding=1, bias=True, mode='CRC', reduction=16, nb=12): + super(RCAGroup, self).__init__() + assert in_channels == out_channels + if mode[0] in ['R','L']: + mode = mode[0].lower() + mode[1:] + + RG = [RCABlock(in_channels, out_channels, kernel_size, stride, padding, + bias, mode, reduction) for _ in range(nb)] + RG.append(conv(out_channels, out_channels, mode='C')) + + # self.rg = ShortcutBlock(nn.Sequential(*RG)) + self.rg = nn.Sequential(*RG) + + def forward(self, x): + res = self.rg(x) + return res + x + +# ------------------------------------------------------- +# conv + subp + relu +# ------------------------------------------------------- +def upsample_pixelshuffle(in_channels=64, out_channels=3, kernel_size=3, + stride=1, padding=1, bias=True, mode='2R'): + # mode examples: 2, 2R, 2BR, 3, ..., 4BR. + assert len(mode)<4 and mode[0] in ['2', '3', '4'] + up1 = conv(in_channels, out_channels * (int(mode[0]) ** 2), kernel_size, + stride, padding, bias=bias, mode='C'+mode) + return up1 + + +# ------------------------------------------------------- +# nearest_upsample + conv + relu +# ------------------------------------------------------- +def upsample_upconv(in_channels=64, out_channels=3, kernel_size=3, stride=1, + padding=1, bias=True, mode='2R'): + # mode examples: 2, 2R, 2BR, 3, ..., 3BR. + assert len(mode)<4 and mode[0] in ['2', '3'] + if mode[0] == '2': + uc = 'UC' + elif mode[0] == '3': + uc = 'uC' + mode = mode.replace(mode[0], uc) + up1 = conv(in_channels, out_channels, kernel_size, stride, + padding, bias=bias, mode=mode) + return up1 + + +# ------------------------------------------------------- +# convTranspose + relu +# ------------------------------------------------------- +def upsample_convtranspose(in_channels=64, out_channels=3, kernel_size=2, + stride=2, padding=0, bias=True, mode='2R'): + # mode examples: 2, 2R, 2BR, 3, ..., 4BR. + assert len(mode)<4 and mode[0] in ['2', '3', '4'] + kernel_size = int(mode[0]) + stride = int(mode[0]) + mode = mode.replace(mode[0], 'T') + up1 = conv(in_channels, out_channels, kernel_size, stride, + padding, bias=bias, mode=mode) + return up1 + + +''' +# ====================== +# Downsampler +# ====================== +''' + + +# ------------------------------------------------------- +# strideconv + relu +# ------------------------------------------------------- +def downsample_strideconv(in_channels=64, out_channels=64, kernel_size=2, + stride=2, padding=0, bias=True, mode='2R'): + # mode examples: 2, 2R, 2BR, 3, ..., 4BR. + assert len(mode)<4 and mode[0] in ['2', '3', '4'] + kernel_size = int(mode[0]) + stride = int(mode[0]) + mode = mode.replace(mode[0], 'C') + down1 = conv(in_channels, out_channels, kernel_size, stride, + padding, bias=bias, mode=mode) + return down1 + + +# ------------------------------------------------------- +# maxpooling + conv + relu +# ------------------------------------------------------- +def downsample_maxpool(in_channels=64, out_channels=64, kernel_size=3, + stride=1, padding=0, bias=True, mode='2R'): + # mode examples: 2, 2R, 2BR, 3, ..., 3BR. + assert len(mode)<4 and mode[0] in ['2', '3'] + kernel_size_pool = int(mode[0]) + stride_pool = int(mode[0]) + mode = mode.replace(mode[0], 'MC') + pool = conv(kernel_size=kernel_size_pool, stride=stride_pool, mode=mode[0]) + pool_tail = conv(in_channels, out_channels, kernel_size, stride, + padding, bias=bias, mode=mode[1:]) + return sequential(pool, pool_tail) + + +# ------------------------------------------------------- +# averagepooling + conv + relu +# ------------------------------------------------------- +def downsample_avgpool(in_channels=64, out_channels=64, kernel_size=3, + stride=1, padding=1, bias=True, mode='2R'): + # mode examples: 2, 2R, 2BR, 3, ..., 3BR. + assert len(mode)<4 and mode[0] in ['2', '3'] + kernel_size_pool = int(mode[0]) + stride_pool = int(mode[0]) + mode = mode.replace(mode[0], 'AC') + pool = conv(kernel_size=kernel_size_pool, stride=stride_pool, mode=mode[0]) + pool_tail = conv(in_channels, out_channels, kernel_size, stride, + padding, bias=bias, mode=mode[1:]) + return sequential(pool, pool_tail) + + + + + + + + + + +class MWRCAN(nn.Module): + def __init__(self): + super(MWRCAN, self).__init__() + c1 = 64 + c2 = 96 + c3 = 128 + n_b = 20 + + self.head = seq( + nn.AvgPool2d(2), + nn.PixelUnshuffle(2), + DWTForward(), + ) + + self.down1 = seq( + nn.Conv2d(48, c1, 3, 1, 1), + nn.PReLU(), + RCAGroup(in_channels=c1, out_channels=c1, nb=n_b) + ) + + self.down2 = seq( + DWTForward(), + nn.Conv2d(c1 * 4, c2, 3, 1, 1), + nn.PReLU(), + RCAGroup(in_channels=c2, out_channels=c2, nb=n_b) + ) + + self.down3 = seq( + DWTForward(), + nn.Conv2d(c2 * 4, c3, 3, 1, 1), + nn.PReLU() + ) + + self.middle = seq( + RCAGroup(in_channels=c3, out_channels=c3, nb=n_b), + RCAGroup(in_channels=c3, out_channels=c3, nb=n_b) + ) + + self.up1 = seq( + nn.Conv2d(c3, c2 * 4, 3, 1, 1), + nn.PReLU(), + DWTInverse() + ) + + self.up2 = seq( + RCAGroup(in_channels=c2, out_channels=c2, nb=n_b), + nn.Conv2d(c2, c1 * 4, 3, 1, 1), + nn.PReLU(), + DWTInverse() + ) + + self.up3 = seq( + RCAGroup(in_channels=c1, out_channels=c1, nb=n_b), + nn.Conv2d(c1, 12, 3, 1, 1) + ) + + self.tail = seq( + DWTInverse() + ) + + def forward(self, x, c=None): + c1 = self.head(x) + c2 = self.down1(c1) + c3 = self.down2(c2) + c4 = self.down3(c3) + m = self.middle(c4) + c5 = self.up1(m) + c3 + c6 = self.up2(c5) + c2 + c7 = self.up3(c6) + out = self.tail(c7) + + return out diff --git a/IIR-Lab/ISP_pipeline/raw_prc_pipeline/tone_curve.mat b/IIR-Lab/ISP_pipeline/raw_prc_pipeline/tone_curve.mat new file mode 100644 index 0000000000000000000000000000000000000000..af56812d14be86467529eccc094f988dd926ed0c Binary files /dev/null and b/IIR-Lab/ISP_pipeline/raw_prc_pipeline/tone_curve.mat differ diff --git a/IIR-Lab/ISP_pipeline/requirements.txt b/IIR-Lab/ISP_pipeline/requirements.txt new file mode 100644 index 0000000000000000000000000000000000000000..b4fa4b6dbf76ff0c6d7ef98ef3acba823f1c7704 --- /dev/null +++ b/IIR-Lab/ISP_pipeline/requirements.txt @@ -0,0 +1,26 @@ +ExifRead==3.0.0 +imageio==2.24.0 +networkx==3.0 +numpy==1.24.1 +opencv-python==4.7.0.68 +packaging==23.0 +pandas==1.5.3 +Pillow==9.4.0 +python-dateutil==2.8.2 +pytz==2022.7.1 +PyWavelets==1.4.1 +rawpy==0.17.3 +scikit-image==0.19.3 +scipy==1.10.0 +six==1.16.0 +tifffile==2022.10.10 +tqdm==4.64.1 +colour-science==0.3.16 +cycler==0.10.0 +decorator==4.4.2 +kiwisolver==1.3.1 +matplotlib==3.4.1 +pyparsing==2.4.7 +boto3==1.17.54 +ipykernel>=5.5.3 +jupyter>=1.0.0 \ No newline at end of file diff --git a/IIR-Lab/ISP_pipeline/resize_and_orientation.py b/IIR-Lab/ISP_pipeline/resize_and_orientation.py new file mode 100644 index 0000000000000000000000000000000000000000..4a1de72ccc239e7daf13e7ed1626927f7d4d1b65 --- /dev/null +++ b/IIR-Lab/ISP_pipeline/resize_and_orientation.py @@ -0,0 +1,72 @@ +import numpy as np +import cv2 +from PIL import Image as Image +import os +import json + +def resize_using_pil(img, width=1024, height=768): + img_pil = Image.fromarray(img) + out_size = (width, height) + if img_pil.size == out_size: + return img + out_img = img_pil.resize(out_size,Image.LANCZOS) + # out_img = img_pil + out_img = np.array(out_img) + return out_img + +def readjson(json_path,): + with open(json_path,'r',encoding='UTF-8') as f: + result = json.load(f) + # a,b = result["noise_profile"] + # black = result["white_level"] + orientation = result["orientation"] + return orientation + +def fix_orientation(image, orientation): + # 1 = Horizontal(normal) + # 2 = Mirror horizontal + # 3 = Rotate 180 + # 4 = Mirror vertical + # 5 = Mirror horizontal and rotate 270 CW + # 6 = Rotate 90 CW + # 7 = Mirror horizontal and rotate 90 CW + # 8 = Rotate 270 CW + + if type(orientation) is list: + orientation = orientation[0] + + if orientation == 'Horizontal(normal)': + pass + elif orientation == "Mirror horizontal": + image = cv2.flip(image, 0) + elif orientation == "Rotate 180": + image = cv2.rotate(image, cv2.ROTATE_180) + elif orientation == "Mirror vertical": + image = cv2.flip(image, 1) + elif orientation == "Mirror horizontal and rotate 270 CW": + image = cv2.flip(image, 0) + image = cv2.rotate(image, cv2.ROTATE_90_COUNTERCLOCKWISE) + elif orientation == "Rotate 90 CW": + image = cv2.rotate(image, cv2.ROTATE_90_CLOCKWISE) + elif orientation == "Mirror horizontal and rotate 90 CW": + image = cv2.flip(image, 0) + image = cv2.rotate(image, cv2.ROTATE_90_CLOCKWISE) + elif orientation == "Rotate 270 CW": + image = cv2.rotate(image, cv2.ROTATE_90_COUNTERCLOCKWISE) + + return image + + +if __name__ == "__main__": + path_in = "/data1/03_results/2nd_validation_results/" + path_out = "/data1/03_results/BigGuy_submission_1_antialias/" + json_path = "/data1/02_data/Nightimaging/2nd_validation_data/" + pic_name = [] + name_list = os.listdir(path_in) + for i in range(len(name_list)): + pic = cv2.imread(path_in+name_list[i], cv2.IMREAD_UNCHANGED) + resized_pic = resize_using_pil(pic) + print(json_path+name_list[i][:-9]+'.json') + json_orientation = readjson(json_path=(json_path+name_list[i][:-9]+'.json')) + orientated_pic = fix_orientation(resized_pic, json_orientation) + cv2.imwrite(path_out+name_list[i][:-9]+".jpg", orientated_pic, [cv2.IMWRITE_JPEG_QUALITY, 100]) diff --git a/IIR-Lab/ISP_pipeline/run.sh b/IIR-Lab/ISP_pipeline/run.sh new file mode 100644 index 0000000000000000000000000000000000000000..5869897cb1736aa7b09f52b4970a9c05fa509cf5 --- /dev/null +++ b/IIR-Lab/ISP_pipeline/run.sh @@ -0,0 +1,2 @@ +#!/usr/bin/env bash +python -m demo.process_pngs -p "/data1/02_data/Train_Data/" -o "/data1/01_code/06_nightimaging/nightimaging24-develop/processed_data/" -ie gw -tm Flash \ No newline at end of file diff --git a/IIR-Lab/ISP_pipeline/utility.py b/IIR-Lab/ISP_pipeline/utility.py new file mode 100644 index 0000000000000000000000000000000000000000..2eb47354908ec9082eda38120fab024dc02ab836 --- /dev/null +++ b/IIR-Lab/ISP_pipeline/utility.py @@ -0,0 +1,1082 @@ +# ============================================================= +# This file contains helper functions and classes +# +# Mushfiqul Alam, 2017 +# +# Report bugs/suggestions: +# mushfiqulalam@gmail.com +# ============================================================= + +import png +import numpy as np +import scipy.misc +import math +from scipy import signal # for convolutions +from scipy import ndimage # for n-dimensional convolution +from scipy import interpolate + +# ============================================================= +# function: imsave +# save image in image formats +# data: is the image data +# output_dtype: output data type +# input_dtype: input data type +# is_scale: is scaling needed to go from input data type to output data type +# ============================================================= +def imsave(data, output_name, output_dtype="uint8", input_dtype="uint8", is_scale=False): + + dtype_dictionary = {"uint8" : np.uint8(data), "uint16" : np.uint16(data),\ + "uint32" : np.uint32(data), "uint64" : np.uint64(data),\ + "int8" : np.int8(data), "int16" : np.int16(data),\ + "int32" : np.int32(data), "int64" : np.int64(data),\ + "float16" : np.float16(data), "float32" : np.float32(data),\ + "float64" : np.float64(data)} + + min_val_dictionary = {"uint8" : 0, "uint16" : 0,\ + "uint32" : 0, "uint64" : 0,\ + "int8" : -128, "int16" : -32768,\ + "int32" : -2147483648, "int64" : -9223372036854775808} + + max_val_dictionary = {"uint8" : 255, "uint16" : 65535,\ + "uint32" : 4294967295, "uint64" : 18446744073709551615,\ + "int8" : 127, "int16" : 32767,\ + "int32" : 2147483647, "int64" : 9223372036854775807} + + # scale the data in case scaling is necessary to go from input_dtype + # to output_dtype + if (is_scale): + + # convert data into float32 + data = np.float32(data) + + # Get minimum and maximum value of the input and output data types + in_min = min_val_dictionary[input_dtype] + in_max = max_val_dictionary[input_dtype] + out_min = min_val_dictionary[output_dtype] + out_max = max_val_dictionary[output_dtype] + + # clip the input data in the input_dtype range + data = np.clip(data, in_min, in_max) + + # scale the data + data = out_min + (data - in_min) * (out_max - out_min) / (in_max - in_min) + + # clip scaled data in output_dtype range + data = np.clip(data, out_min, out_max) + + # convert the data into the output_dtype + data = dtype_dictionary[output_dtype] + + # output image type: raw, png, jpeg + output_file_type = output_name[-3:] + + # save files depending on output_file_type + if (output_file_type == "raw"): + pass # will be added later + return + + elif (output_file_type == "png"): + + # png will only save uint8 or uint16 + if ((output_dtype == "uint16") or (output_dtype == "uint8")): + if (output_dtype == "uint16"): + output_bitdepth = 16 + elif (output_dtype == "uint8"): + output_bitdepth = 8 + + pass + else: + print("For png output, output_dtype must be uint8 or uint16") + return + + with open(output_name, "wb") as f: + # rgb image + if (np.ndim(data) == 3): + # create the png writer + writer = png.Writer(width=data.shape[1], height=data.shape[0],\ + bitdepth = output_bitdepth) + # convert data to the python lists expected by the png Writer + data2list = data.reshape(-1, data.shape[1]*data.shape[2]).tolist() + # write in the file + writer.write(f, data2list) + + # greyscale image + elif (np.ndim(data) == 2): + # create the png writer + writer = png.Writer(width=data.shape[1], height=data.shape[0],\ + bitdepth = output_bitdepth,\ + greyscale = True) + # convert data to the python lists expected by the png Writer + data2list = data.tolist() + # write in the file + writer.write(f, data2list) + + elif (output_file_type == "jpg"): + pass # will be added later + return + + else: + print("output_name should contain extensions of .raw, .png, or .jpg") + return + + +# ============================================================= +# class: helpers +# a class of useful helper functions +# ============================================================= +class helpers: + def __init__(self, data=None, name="helper"): + self.data = np.float32(data) + self.name = name + + def get_width_height(self): + #------------------------------------------------------ + # returns width, height + # We assume data be in height x width x number of channel x frames format + #------------------------------------------------------ + if (np.ndim(self.data) > 1): + size = np.shape(self.data) + width = size[1] + height = size[0] + return width, height + else: + print("Error! data dimension must be 2 or greater") + + def bayer_channel_separation(self, pattern): + #------------------------------------------------------ + # function: bayer_channel_separation + # Objective: Outputs four channels of the bayer pattern + # Input: + # data: the bayer data + # pattern: rggb, grbg, gbrg, or bggr + # Output: + # R, G1, G2, B (Quarter resolution images) + #------------------------------------------------------ + if (pattern == "rggb"): + R = self.data[::2, ::2] + G1 = self.data[::2, 1::2] + G2 = self.data[1::2, ::2] + B = self.data[1::2, 1::2] + elif (pattern == "grbg"): + G1 = self.data[::2, ::2] + R = self.data[::2, 1::2] + B = self.data[1::2, ::2] + G2 = self.data[1::2, 1::2] + elif (pattern == "gbrg"): + G1 = self.data[::2, ::2] + B = self.data[::2, 1::2] + R = self.data[1::2, ::2] + G2 = self.data[1::2, 1::2] + elif (pattern == "bggr"): + B = self.data[::2, ::2] + G1 = self.data[::2, 1::2] + G2 = self.data[1::2, ::2] + R = self.data[1::2, 1::2] + else: + print("pattern must be one of these: rggb, grbg, gbrg, bggr") + return + + return R, G1, G2, B + + + def bayer_channel_integration(self, R, G1, G2, B, pattern): + #------------------------------------------------------ + # function: bayer_channel_integration + # Objective: combine data into a raw according to pattern + # Input: + # R, G1, G2, B: the four separate channels (Quarter resolution) + # pattern: rggb, grbg, gbrg, or bggr + # Output: + # data (Full resolution image) + #------------------------------------------------------ + size = np.shape(R) + data = np.empty((size[0]*2, size[1]*2), dtype=np.float32) + if (pattern == "rggb"): + data[::2, ::2] = R + data[::2, 1::2] = G1 + data[1::2, ::2] = G2 + data[1::2, 1::2] = B + elif (pattern == "grbg"): + data[::2, ::2] = G1 + data[::2, 1::2] = R + data[1::2, ::2] = B + data[1::2, 1::2] = G2 + elif (pattern == "gbrg"): + data[::2, ::2] = G1 + data[::2, 1::2] = B + data[1::2, ::2] = R + data[1::2, 1::2] = G2 + elif (pattern == "bggr"): + data[::2, ::2] = B + data[::2, 1::2] = G1 + data[1::2, ::2] = G2 + data[1::2, 1::2] = R + else: + print("pattern must be one of these: rggb, grbg, gbrg, bggr") + return + + return data + + + def shuffle_bayer_pattern(self, input_pattern, output_pattern): + #------------------------------------------------------ + # function: shuffle_bayer_pattern + # convert from one bayer pattern to another + #------------------------------------------------------ + + # Get separate channels + R, G1, G2, B = self.bayer_channel_separation(input_pattern) + + # return integrated data + return self.bayer_channel_integration(R, G1, G2, B, output_pattern) + + + def sigma_filter_helper(self, neighborhood_size, sigma): + + if (neighborhood_size % 2) == 0: + print("Error! neighborhood_size must be odd for example 3, 5, 7") + return + + # number of pixels to be padded at the borders + no_of_pixel_pad = math.floor(neighborhood_size / 2.) + + # get width, height + width, height = self.get_width_height() + + # pad pixels at the borders + img = np.pad(self.data, \ + (no_of_pixel_pad, no_of_pixel_pad),\ + 'reflect') # reflect would not repeat the border value + + # allocate memory for output + output = np.empty((height, width), dtype=np.float32) + + for i in range(no_of_pixel_pad, height + no_of_pixel_pad): + for j in range(no_of_pixel_pad, width + no_of_pixel_pad): + + # save the middle pixel value + mid_pixel_val = img[i, j] + + # extract the neighborhood + neighborhood = img[i - no_of_pixel_pad : i + no_of_pixel_pad+1,\ + j - no_of_pixel_pad : j + no_of_pixel_pad+1] + + lower_range = mid_pixel_val - sigma + upper_range = mid_pixel_val + sigma + + temp = 0. + ctr = 0 + for ni in range (0, neighborhood_size): + for nj in range (0, neighborhood_size): + if (neighborhood[ni, nj] > lower_range) and (neighborhood[ni, nj] < upper_range): + temp += neighborhood[ni, nj] + ctr += 1 + + output[i - no_of_pixel_pad, j - no_of_pixel_pad] = temp / ctr + + return output + + def bilinear_interpolation(self, x, y): + + width, height = self.get_width_height() + + x0 = np.floor(x).astype(int) + x1 = x0 + 1 + y0 = np.floor(y).astype(int) + y1 = y0 + 1 + + x0 = np.clip(x0, 0, width-1) + x1 = np.clip(x1, 0, width-1) + y0 = np.clip(y0, 0, height-1) + y1 = np.clip(y1, 0, height-1) + + Ia = self.data[y0, x0] + Ib = self.data[y1, x0] + Ic = self.data[y0, x1] + Id = self.data[y1, x1] + + + x = np.clip(x, 0, width-1) + y = np.clip(y, 0, height-1) + + wa = (x1 - x) * (y1 - y) + wb = (x1 - x) * (y - y0) + wc = (x - x0) * (y1 - y) + wd = (x - x0) * (y - y0) + + return wa * Ia + wb * Ib + wc * Ic + wd * Id + + def degamma_srgb(self, clip_range=[0, 65535]): + + # bring data in range 0 to 1 + data = np.clip(self.data, clip_range[0], clip_range[1]) + data = np.divide(data, clip_range[1]) + + data = np.asarray(data) + mask = data > 0.04045 + + # basically, if data[x, y, c] > 0.04045, data[x, y, c] = ( (data[x, y, c] + 0.055) / 1.055 ) ^ 2.4 + # else, data[x, y, c] = data[x, y, c] / 12.92 + data[mask] += 0.055 + data[mask] /= 1.055 + data[mask] **= 2.4 + + data[np.invert(mask)] /= 12.92 + + # rescale + return np.clip(data * clip_range[1], clip_range[0], clip_range[1]) + + def gamma_srgb(self, clip_range=[0, 65535]): + + # bring data in range 0 to 1 + data = np.clip(self.data, clip_range[0], clip_range[1]) + data = np.divide(data, clip_range[1]) + + data = np.asarray(data) + mask = data > 0.0031308 + + # basically, if data[x, y, c] > 0.0031308, data[x, y, c] = 1.055 * ( var_R(i, j) ^ ( 1 / 2.4 ) ) - 0.055 + # else, data[x, y, c] = data[x, y, c] * 12.92 + data[mask] **= 0.4167 + data[mask] *= 1.055 + data[mask] -= 0.055 + + data[np.invert(mask)] *= 12.92 + + # rescale + return np.clip(data * clip_range[1], clip_range[0], clip_range[1]) + + + def degamma_adobe_rgb_1998(self, clip_range=[0, 65535]): + + # bring data in range 0 to 1 + data = np.clip(self.data, clip_range[0], clip_range[1]) + data = np.divide(data, clip_range[1]) + + data = np.power(data, 2.2) # originally raised to 2.19921875 + + # rescale + return np.clip(data * clip_range[1], clip_range[0], clip_range[1]) + + def gamma_adobe_rgb_1998(self, clip_range=[0, 65535]): + + # bring data in range 0 to 1 + data = np.clip(self.data, clip_range[0], clip_range[1]) + data = np.divide(data, clip_range[1]) + + data = np.power(data, 0.4545) + + # rescale + return np.clip(data * clip_range[1], clip_range[0], clip_range[1]) + + + def get_xyz_reference(self, cie_version="1931", illuminant="d65"): + + if (cie_version == "1931"): + + xyz_reference_dictionary = {"A" : [109.850, 100.0, 35.585],\ + "B" : [99.0927, 100.0, 85.313],\ + "C" : [98.074, 100.0, 118.232],\ + "d50" : [96.422, 100.0, 82.521],\ + "d55" : [95.682, 100.0, 92.149],\ + "d65" : [95.047, 100.0, 108.883],\ + "d75" : [94.972, 100.0, 122.638],\ + "E" : [100.0, 100.0, 100.0],\ + "F1" : [92.834, 100.0, 103.665],\ + "F2" : [99.187, 100.0, 67.395],\ + "F3" : [103.754, 100.0, 49.861],\ + "F4" : [109.147, 100.0, 38.813],\ + "F5" : [90.872, 100.0, 98.723],\ + "F6" : [97.309, 100.0, 60.191],\ + "F7" : [95.044, 100.0, 108.755],\ + "F8" : [96.413, 100.0, 82.333],\ + "F9" : [100.365, 100.0, 67.868],\ + "F10" : [96.174, 100.0, 81.712],\ + "F11" : [100.966, 100.0, 64.370],\ + "F12" : [108.046, 100.0, 39.228]} + + elif (cie_version == "1964"): + + xyz_reference_dictionary = {"A" : [111.144, 100.0, 35.200],\ + "B" : [99.178, 100.0, 84.3493],\ + "C" : [97.285, 100.0, 116.145],\ + "D50" : [96.720, 100.0, 81.427],\ + "D55" : [95.799, 100.0, 90.926],\ + "D65" : [94.811, 100.0, 107.304],\ + "D75" : [94.416, 100.0, 120.641],\ + "E" : [100.0, 100.0, 100.0],\ + "F1" : [94.791, 100.0, 103.191],\ + "F2" : [103.280, 100.0, 69.026],\ + "F3" : [108.968, 100.0, 51.965],\ + "F4" : [114.961, 100.0, 40.963],\ + "F5" : [93.369, 100.0, 98.636],\ + "F6" : [102.148, 100.0, 62.074],\ + "F7" : [95.792, 100.0, 107.687],\ + "F8" : [97.115, 100.0, 81.135],\ + "F9" : [102.116, 100.0, 67.826],\ + "F10" : [99.001, 100.0, 83.134],\ + "F11" : [103.866, 100.0, 65.627],\ + "F12" : [111.428, 100.0, 40.353]} + + else: + print("Warning! cie_version must be 1931 or 1964.") + return + + return np.divide(xyz_reference_dictionary[illuminant], 100.0) + + def sobel_prewitt_direction_label(self, gradient_magnitude, theta, threshold=0): + + direction_label = np.zeros(np.shape(gradient_magnitude), dtype=np.float32) + + theta = np.asarray(theta) + # vertical + mask = ((theta >= -22.5) & (theta <= 22.5)) + direction_label[mask] = 3. + + # +45 degree + mask = ((theta > 22.5) & (theta <= 67.5)) + direction_label[mask] = 2. + + # -45 degree + mask = ((theta < -22.5) & (theta >= -67.5)) + direction_label[mask] = 4. + + # horizontal + mask = ((theta > 67.5) & (theta <= 90.)) | ((theta < -67.5) & (theta >= -90.)) + direction_label[mask] = 1. + + gradient_magnitude = np.asarray(gradient_magnitude) + mask = gradient_magnitude < threshold + direction_label[mask] = 0. + + return direction_label + + def edge_wise_median(self, kernel_size, edge_location): + + # pad two pixels at the border + no_of_pixel_pad = math.floor(kernel_size / 2) # number of pixels to pad + + data = self.data + data = np.pad(data, \ + (no_of_pixel_pad, no_of_pixel_pad),\ + 'reflect') # reflect would not repeat the border value + + edge_location = np.pad(edge_location,\ + (no_of_pixel_pad, no_of_pixel_pad),\ + 'reflect') # reflect would not repeat the border value + + width, height = self.get_width_height() + output = np.empty((height, width), dtype=np.float32) + + for i in range(no_of_pixel_pad, height + no_of_pixel_pad): + for j in range(no_of_pixel_pad, width + no_of_pixel_pad): + if (edge_location[i, j] == 1): + output[i - no_of_pixel_pad, j - no_of_pixel_pad] = \ + np.median(data[i - no_of_pixel_pad : i + no_of_pixel_pad + 1,\ + j - no_of_pixel_pad : j + no_of_pixel_pad + 1]) + elif (edge_location[i, j] == 0): + output[i - no_of_pixel_pad, j - no_of_pixel_pad] = data[i, j] + + return output + + + def nonuniform_quantization(self): + + output = np.zeros(np.shape(self.data), dtype=np.float32) + min_val = np.min(self.data) + max_val = np.max(self.data) + + mask = (self.data > (7./8.) * (max_val - min_val)) + output[mask] = 3. + + mask = (self.data > (3./4.) * (max_val - min_val)) & (self.data <= (7./8.) * (max_val - min_val)) + output[mask] = 2. + + mask = (self.data > (1./2.) * (max_val - min_val)) & (self.data <= (3./4.) * (max_val - min_val)) + output[mask] = 1. + + return output + + + def __str__(self): + return self.name + + +# ============================================================= +# function: distance_euclid +# returns Euclidean distance between two points +# ============================================================= +def distance_euclid(point1, point2): + return math.sqrt((point1[0] - point2[0])**2 + (point1[1]-point2[1])**2) + + +# ============================================================= +# class: special_functions +# pass input through special functions +# ============================================================= +class special_function: + def __init__(self, data, name="special function"): + self.data = np.float32(data) + self.name = name + + def soft_coring(self, slope, tau_threshold, gamma_speed): + # Usage: Used in the unsharp masking sharpening Process + # Input: + # slope: controls the boost. + # the amount of sharpening, higher slope + # means more aggresssive sharpening + # + # tau_threshold: controls the amount of coring. + # threshold value till which the image is + # not sharpened. The lower the value of + # tau_threshold the more frequencies + # goes through the sharpening process + # + # gamma_speed: controls the speed of convergence to the slope + # smaller value gives a little bit more + # sharpened image, this may be a fine tuner + return slope * self.data * ( 1. - np.exp(-((np.abs(self.data / tau_threshold))**gamma_speed))) + + + def distortion_function(self, correction_type="barrel-1", strength=0.1): + + if (correction_type == "pincushion-1"): + return np.divide(self.data, 1. + strength * self.data) + elif (correction_type == "pincushion-2"): + return np.divide(self.data, 1. + strength * np.power(self.data, 2)) + elif (correction_type == "barrel-1"): + return np.multiply(self.data, 1. + strength * self.data) + elif (correction_type == "barrel-2"): + return np.multiply(self.data, 1. + strength * np.power(self.data, 2)) + else: + print("Warning! Unknown correction_type.") + return + + def bilateral_filter(self, edge): + # bilateral filter based upon the work of + # Jiawen Chen, Sylvain Paris, and Fredo Durand, 2007 work + + # note: if edge data is not provided, image is served as edge + # this is called normal bilateral filter + # if edge data is provided, then it is called cross or joint + # bilateral filter + + # get width and height of the image + width, height = helpers(self.data).get_width_height() + + # sigma_spatial + sigma_spatial = min(height, width) / 16. + + # calculate edge_delta + edge_min = np.min(edge) + edge_max = np.max(edge) + edge_delta = edge_max - edge_min + + # sigma_range and sampling_range + sigma_range = 0.1 * edge_delta + sampling_range = sigma_range + sampling_spatial = sigma_spatial + + # derived_sigma_spatial and derived_sigma_range + derived_sigma_spatial = sigma_spatial / sampling_spatial + derived_sigma_range = sigma_range / sampling_range + + # paddings + padding_xy = np.floor(2. * derived_sigma_spatial) + 1. + padding_z = np.floor(2. * derived_sigma_range) + 1. + + # downsamples + downsample_width = np.uint16(np.floor((width - 1.) / sampling_spatial) + 1. + 2. * padding_xy) + downsample_height = np.uint16(np.floor((height - 1.) / sampling_spatial) + 1. + 2. * padding_xy) + downsample_depth = np.uint16(np.floor(edge_delta / sampling_range) + 1. + 2. * padding_z) + + grid_data = np.zeros((downsample_height, downsample_width, downsample_depth)) + grid_weight = np.zeros((downsample_height, downsample_width, downsample_depth)) + + jj, ii = np.meshgrid(np.arange(0, width, 1),\ + np.arange(0, height, 1)) + + di = np.uint16(np.round( ii / sampling_spatial ) + padding_xy + 1.) + dj = np.uint16(np.round( jj / sampling_spatial ) + padding_xy + 1.) + dz = np.uint16(np.round( (edge - edge_min) / sampling_range ) + padding_z + 1.) + + + for i in range(0, height): + for j in range(0, width): + + data_z = self.data[i, j] + if not np.isnan(data_z): + dik = di[i, j] + djk = dj[i, j] + dzk = dz[i, j] + + grid_data[dik, djk, dzk] = grid_data[dik, djk, dzk] + data_z + grid_weight[dik, djk, dzk] = grid_weight[dik, djk, dzk] + 1. + + + kernel_width = 2. * derived_sigma_spatial + 1. + kernel_height = kernel_width + kernel_depth = 2. * derived_sigma_range + 1. + + half_kernel_width = np.floor(kernel_width / 2.) + half_kernel_height = np.floor(kernel_height / 2.) + half_kernel_depth = np.floor(kernel_depth / 2.) + + grid_x, grid_y, grid_z = np.meshgrid(np.arange(0, kernel_width, 1),\ + np.arange(0, kernel_height, 1),\ + np.arange(0, kernel_depth, 1)) + + grid_x = grid_x - half_kernel_width + grid_y = grid_y - half_kernel_height + grid_z = grid_z - half_kernel_depth + + grid_r_squared = ( ( np.multiply(grid_x, grid_x) + \ + np.multiply(grid_y, grid_y) ) / np.multiply(derived_sigma_spatial, derived_sigma_spatial) ) + \ + ( np.multiply(grid_z, grid_z) / np.multiply(derived_sigma_range, derived_sigma_range) ) + + kernel = np.exp(-0.5 * grid_r_squared) + blurred_grid_data = ndimage.convolve(grid_data, kernel, mode='reflect') + blurred_grid_weight = ndimage.convolve(grid_weight, kernel, mode='reflect') + + # divide + blurred_grid_weight = np.asarray(blurred_grid_weight) + mask = blurred_grid_weight == 0 + blurred_grid_weight[mask] = -2. + normalized_blurred_grid = np.divide(blurred_grid_data, blurred_grid_weight) + mask = blurred_grid_weight < -1 + normalized_blurred_grid[mask] = 0. + blurred_grid_weight[mask] = 0. + + # upsample + jj, ii = np.meshgrid(np.arange(0, width, 1),\ + np.arange(0, height, 1)) + + di = (ii / sampling_spatial) + padding_xy + 1. + dj = (jj / sampling_spatial) + padding_xy + 1. + dz = (edge - edge_min) / sampling_range + padding_z + 1. + + # arrange the input points + n_i, n_j, n_z = np.shape(normalized_blurred_grid) + points = (np.arange(0, n_i, 1), np.arange(0, n_j, 1), np.arange(0, n_z, 1)) + + # query points + xi = (di, dj, dz) + + # multidimensional interpolation + output = interpolate.interpn(points, normalized_blurred_grid, xi, method='linear') + + return output + + + +# ============================================================= +# class: synthetic_image_generate +# creates sysnthetic images for different purposes +# ============================================================= +class synthetic_image_generate: + def __init__(self, width, height, name="synthetic_image"): + self.name = name + self.width = width + self.height = height + + def create_lens_shading_correction_images(self, dark_current=0, flat_max=65535, flat_min=0, clip_range=[0, 65535]): + # Objective: creates two images: + # dark_current_image and flat_field_image + dark_current_image = dark_current * np.ones((self.height, self.width), dtype=np.float32) + flat_field_image = np.empty((self.height, self.width), dtype=np.float32) + + center_pixel_pos = [self.height/2, self.width/2] + max_distance = distance_euclid(center_pixel_pos, [self.height, self.width]) + + for i in range(0, self.height): + for j in range(0, self.width): + flat_field_image[i, j] = (max_distance - distance_euclid(center_pixel_pos, [i, j])) / max_distance + flat_field_image[i, j] = flat_min + flat_field_image[i, j] * (flat_max - flat_min) + + dark_current_image = np.clip(dark_current_image, clip_range[0], clip_range[1]) + flat_field_image = np.clip(flat_field_image, clip_range[0], clip_range[1]) + + return dark_current_image, flat_field_image + + def create_zone_plate_image(self): + pass + + def create_color_gradient_image(self): + pass + + def create_random_noise_image(self, mean=0, standard_deviation=1, seed=0): + # Creates normally distributed noisy image + np.random.seed(seed) + return np.random.normal(mean, standard_deviation, (self.height, self.width)) + + def create_noisy_image(self, data, mean=0, standard_deviation=1, seed=0, clip_range=[0, 65535]): + # Adds normally distributed noise to the data + return np.clip(data + self.create_random_noise_image(mean, standard_deviation, seed), clip_range[0], clip_range[1]) + + +# ============================================================= +# class: create_filter +# creates different filters, generally 2D filters +# ============================================================= +class create_filter: + def __init__(self, name="filter"): + self.name = name + + def gaussian(self, kernel_size, sigma): + + # calculate which number to where the grid should be + # remember that, kernel_size[0] is the width of the kernel + # and kernel_size[1] is the height of the kernel + temp = np.floor(np.float32(kernel_size) / 2.) + + # create the grid + # example: if kernel_size = [5, 3], then: + # x: array([[-2., -1., 0., 1., 2.], + # [-2., -1., 0., 1., 2.], + # [-2., -1., 0., 1., 2.]]) + # y: array([[-1., -1., -1., -1., -1.], + # [ 0., 0., 0., 0., 0.], + # [ 1., 1., 1., 1., 1.]]) + x, y = np.meshgrid(np.linspace(-temp[0], temp[0], kernel_size[0]),\ + np.linspace(-temp[1], temp[1], kernel_size[1])) + + # Gaussian equation + temp = np.exp( -(x**2 + y**2) / (2. * sigma**2) ) + + # make kernel sum equal to 1 + return temp / np.sum(temp) + + def gaussian_separable(self, kernel_size, sigma): + + # calculate which number to where the grid should be + # remember that, kernel_size[0] is the width of the kernel + # and kernel_size[1] is the height of the kernel + temp = np.floor(np.float32(kernel_size) / 2.) + + # create the horizontal kernel + x = np.linspace(-temp[0], temp[0], kernel_size[0]) + x = x.reshape((1, kernel_size[0])) # reshape to create row vector + hx = np.exp(-x**2 / (2 * sigma**2)) + hx = hx / np.sum(hx) + + # create the vertical kernel + y = np.linspace(-temp[1], temp[1], kernel_size[1]) + y = y.reshape((kernel_size[1], 1)) # reshape to create column vector + hy = np.exp(-y**2 / (2 * sigma**2)) + hy = hy / np.sum(hy) + + return hx, hy + + def sobel(self, kernel_size): + # Returns the Sobel filter kernels Sx and Sy + + Sx = .25 * np.dot([[1.], [2.], [1.]], [[1., 0., -1.]]) + + if (kernel_size > 3): + + n = (np.floor((kernel_size - 5) / 2 + 1)).astype(int) + + for i in range(0, n): + + Sx = (1./16.) * signal.convolve2d(np.dot([[1.], [2.], [1.]], [[1., 2., 1.]]), Sx) + + Sy = np.transpose(Sx) + + return Sx, Sy + + def __str__(self): + return self.name + + +# ============================================================= +# class: color_conversion +# color conversion from one color space to another +# ============================================================= +class color_conversion: + def __init__(self, data, name="color conversion"): + self.data = np.float32(data) + self.name = name + + def rgb2gray(self): + return 0.299 * self.data[:, :, 0] +\ + 0.587 * self.data[:, :, 1] +\ + 0.114 * self.data[:, :, 2] + + def rgb2ycc(self, rule="bt601"): + + # map to select kr and kb + kr_kb_dict = {"bt601" : [0.299, 0.114],\ + "bt709" : [0.2126, 0.0722],\ + "bt2020" : [0.2627, 0.0593]} + + kr = kr_kb_dict[rule][0] + kb = kr_kb_dict[rule][1] + kg = 1 - (kr + kb) + + output = np.empty(np.shape(self.data), dtype=np.float32) + output[:, :, 0] = kr * self.data[:, :, 0] + \ + kg * self.data[:, :, 1] + \ + kb * self.data[:, :, 2] + output[:, :, 1] = 0.5 * ((self.data[:, :, 2] - output[:, :, 0]) / (1 - kb)) + output[:, :, 2] = 0.5 * ((self.data[:, :, 0] - output[:, :, 0]) / (1 - kr)) + + return output + + def ycc2rgb(self, rule="bt601"): + + # map to select kr and kb + kr_kb_dict = {"bt601" : [0.299, 0.114],\ + "bt709" : [0.2126, 0.0722],\ + "bt2020" : [0.2627, 0.0593]} + + kr = kr_kb_dict[rule][0] + kb = kr_kb_dict[rule][1] + kg = 1 - (kr + kb) + + output = np.empty(np.shape(self.data), dtype=np.float32) + output[:, :, 0] = 2. * self.data[:, :, 2] * (1 - kr) + self.data[:, :, 0] + output[:, :, 2] = 2. * self.data[:, :, 1] * (1 - kb) + self.data[:, :, 0] + output[:, :, 1] = (self.data[:, :, 0] - kr * output[:, :, 0] - kb * output[:, :, 2]) / kg + + return output + + def rgb2xyz(self, color_space="srgb", clip_range=[0, 65535]): + # input rgb in range clip_range + # output xyz is in range 0 to 1 + + if (color_space == "srgb"): + + # degamma / linearization + data = helpers(self.data).degamma_srgb(clip_range) + data = np.float32(data) + data = np.divide(data, clip_range[1]) + + # matrix multiplication` + output = np.empty(np.shape(self.data), dtype=np.float32) + output[:, :, 0] = data[:, :, 0] * 0.4124 + data[:, :, 1] * 0.3576 + data[:, :, 2] * 0.1805 + output[:, :, 1] = data[:, :, 0] * 0.2126 + data[:, :, 1] * 0.7152 + data[:, :, 2] * 0.0722 + output[:, :, 2] = data[:, :, 0] * 0.0193 + data[:, :, 1] * 0.1192 + data[:, :, 2] * 0.9505 + + elif (color_space == "adobe-rgb-1998"): + + # degamma / linearization + data = helpers(self.data).degamma_adobe_rgb_1998(clip_range) + data = np.float32(data) + data = np.divide(data, clip_range[1]) + + # matrix multiplication + output = np.empty(np.shape(self.data), dtype=np.float32) + output[:, :, 0] = data[:, :, 0] * 0.5767309 + data[:, :, 1] * 0.1855540 + data[:, :, 2] * 0.1881852 + output[:, :, 1] = data[:, :, 0] * 0.2973769 + data[:, :, 1] * 0.6273491 + data[:, :, 2] * 0.0752741 + output[:, :, 2] = data[:, :, 0] * 0.0270343 + data[:, :, 1] * 0.0706872 + data[:, :, 2] * 0.9911085 + + elif (color_space == "linear"): + + # matrix multiplication` + output = np.empty(np.shape(self.data), dtype=np.float32) + data = np.float32(self.data) + data = np.divide(data, clip_range[1]) + output[:, :, 0] = data[:, :, 0] * 0.4124 + data[:, :, 1] * 0.3576 + data[:, :, 2] * 0.1805 + output[:, :, 1] = data[:, :, 0] * 0.2126 + data[:, :, 1] * 0.7152 + data[:, :, 2] * 0.0722 + output[:, :, 2] = data[:, :, 0] * 0.0193 + data[:, :, 1] * 0.1192 + data[:, :, 2] * 0.9505 + + else: + print("Warning! color_space must be srgb or adobe-rgb-1998.") + return + + return output + + + def xyz2rgb(self, color_space="srgb", clip_range=[0, 65535]): + # input xyz is in range 0 to 1 + # output rgb in clip_range + + # allocate space for output + output = np.empty(np.shape(self.data), dtype=np.float32) + + if (color_space == "srgb"): + + # matrix multiplication + output[:, :, 0] = self.data[:, :, 0] * 3.2406 + self.data[:, :, 1] * -1.5372 + self.data[:, :, 2] * -0.4986 + output[:, :, 1] = self.data[:, :, 0] * -0.9689 + self.data[:, :, 1] * 1.8758 + self.data[:, :, 2] * 0.0415 + output[:, :, 2] = self.data[:, :, 0] * 0.0557 + self.data[:, :, 1] * -0.2040 + self.data[:, :, 2] * 1.0570 + + # gamma to retain nonlinearity + output = helpers(output * clip_range[1]).gamma_srgb(clip_range) + + + elif (color_space == "adobe-rgb-1998"): + + # matrix multiplication + output[:, :, 0] = self.data[:, :, 0] * 2.0413690 + self.data[:, :, 1] * -0.5649464 + self.data[:, :, 2] * -0.3446944 + output[:, :, 1] = self.data[:, :, 0] * -0.9692660 + self.data[:, :, 1] * 1.8760108 + self.data[:, :, 2] * 0.0415560 + output[:, :, 2] = self.data[:, :, 0] * 0.0134474 + self.data[:, :, 1] * -0.1183897 + self.data[:, :, 2] * 1.0154096 + + # gamma to retain nonlinearity + output = helpers(output * clip_range[1]).gamma_adobe_rgb_1998(clip_range) + + + elif (color_space == "linear"): + + # matrix multiplication + output[:, :, 0] = self.data[:, :, 0] * 3.2406 + self.data[:, :, 1] * -1.5372 + self.data[:, :, 2] * -0.4986 + output[:, :, 1] = self.data[:, :, 0] * -0.9689 + self.data[:, :, 1] * 1.8758 + self.data[:, :, 2] * 0.0415 + output[:, :, 2] = self.data[:, :, 0] * 0.0557 + self.data[:, :, 1] * -0.2040 + self.data[:, :, 2] * 1.0570 + + # gamma to retain nonlinearity + output = output * clip_range[1] + + else: + print("Warning! color_space must be srgb or adobe-rgb-1998.") + return + + return output + + + def xyz2lab(self, cie_version="1931", illuminant="d65"): + + xyz_reference = helpers().get_xyz_reference(cie_version, illuminant) + + data = self.data + data[:, :, 0] = data[:, :, 0] / xyz_reference[0] + data[:, :, 1] = data[:, :, 1] / xyz_reference[1] + data[:, :, 2] = data[:, :, 2] / xyz_reference[2] + + data = np.asarray(data) + + # if data[x, y, c] > 0.008856, data[x, y, c] = data[x, y, c] ^ (1/3) + # else, data[x, y, c] = 7.787 * data[x, y, c] + 16/116 + mask = data > 0.008856 + data[mask] **= 1./3. + data[np.invert(mask)] *= 7.787 + data[np.invert(mask)] += 16./116. + + data = np.float32(data) + output = np.empty(np.shape(self.data), dtype=np.float32) + output[:, :, 0] = 116. * data[:, :, 1] - 16. + output[:, :, 1] = 500. * (data[:, :, 0] - data[:, :, 1]) + output[:, :, 2] = 200. * (data[:, :, 1] - data[:, :, 2]) + + return output + + + def lab2xyz(self, cie_version="1931", illuminant="d65"): + + output = np.empty(np.shape(self.data), dtype=np.float32) + + output[:, :, 1] = (self.data[:, :, 0] + 16.) / 116. + output[:, :, 0] = (self.data[:, :, 1] / 500.) + output[:, :, 1] + output[:, :, 2] = output[:, :, 1] - (self.data[:, :, 2] / 200.) + + # if output[x, y, c] > 0.008856, output[x, y, c] ^ 3 + # else, output[x, y, c] = ( output[x, y, c] - 16/116 ) / 7.787 + output = np.asarray(output) + mask = output > 0.008856 + output[mask] **= 3. + output[np.invert(mask)] -= 16/116 + output[np.invert(mask)] /= 7.787 + + xyz_reference = helpers().get_xyz_reference(cie_version, illuminant) + + output = np.float32(output) + output[:, :, 0] = output[:, :, 0] * xyz_reference[0] + output[:, :, 1] = output[:, :, 1] * xyz_reference[1] + output[:, :, 2] = output[:, :, 2] * xyz_reference[2] + + return output + + def lab2lch(self): + + output = np.empty(np.shape(self.data), dtype=np.float32) + + output[:, :, 0] = self.data[:, :, 0] # L transfers directly + output[:, :, 1] = np.power(np.power(self.data[:, :, 1], 2) + np.power(self.data[:, :, 2], 2), 0.5) + output[:, :, 2] = np.arctan2(self.data[:, :, 2], self.data[:, :, 1]) * 180 / np.pi + + return output + + def lch2lab(self): + + output = np.empty(np.shape(self.data), dtype=np.float32) + + output[:, :, 0] = self.data[:, :, 0] # L transfers directly + output[:, :, 1] = np.multiply(np.cos(self.data[:, :, 2] * np.pi / 180), self.data[:, :, 1]) + output[:, :, 2] = np.multiply(np.sin(self.data[:, :, 2] * np.pi / 180), self.data[:, :, 1]) + + return output + + def __str__(self): + return self.name + + +# ============================================================= +# class: edge_detection +# detect edges in an image +# ============================================================= +class edge_detection: + def __init__(self, data, name="edge detection"): + self.data = np.float32(data) + self.name = name + + def sobel(self, kernel_size=3, output_type="all", threshold=0., clip_range=[0, 65535]): + + Sx, Sy = create_filter().sobel(kernel_size) + + # Gradient in x direction: Gx + # Gradient in y direction: Gy + if np.ndim(self.data) > 2: + + Gx = np.empty(np.shape(self.data), dtype=np.float32) + Gy = np.empty(np.shape(self.data), dtype=np.float32) + + for dimension_idx in range(0, np.shape(self.data)[2]): + Gx[:, :, dimension_idx] = signal.convolve2d(self.data[:, :, dimension_idx], Sx, mode="same", boundary="symm") + Gy[:, :, dimension_idx] = signal.convolve2d(self.data[:, :, dimension_idx], Sy, mode="same", boundary="symm") + + elif np.ndim(self.data) == 2: + Gx = signal.convolve2d(self.data, Sx, mode="same", boundary="symm") + Gy = signal.convolve2d(self.data, Sy, mode="same", boundary="symm") + + else: + print("Warning! Data dimension must be 2 or 3.") + + # Gradient magnitude + G = np.power(np.power(Gx, 2) + np.power(Gy, 2), .5) + + if (output_type == "gradient_magnitude"): + return G + + # Gradient angle + theta = np.arctan(np.divide(Gy, Gx)) * 180. / np.pi + + if (output_type == "gradient_magnitude_and_angle"): + return G, theta + + # Change the threshold according to the clip_range's maximum value + threshold = threshold * clip_range[1] + + # calculating if the edge is a strong edge + is_edge = np.zeros(np.shape(self.data)).astype(int) + mask = G > threshold + is_edge[mask] = 1 + + if (output_type == "is_edge"): + return is_edge + + + # Edge direction label + temp = np.asarray(theta) + direction_label = np.zeros(np.shape(self.data), dtype=np.float32) + + if np.ndim(self.data > 2): + for i in range(0, np.shape(self.data)[2]): + direction_label[:, :, i] = helpers().sobel_prewitt_direction_label(G[:, :, i], theta[:, :, i], threshold) + else: + direction_label = helpers().sobel_prewitt_direction_label(G, theta, threshold) + + if (output_type == "all"): + return G, Gx, Gy, theta, is_edge, direction_label + + + def __str__(self): + return self.name diff --git a/IIR-Lab/ISP_pipeline/utils/__init__.py b/IIR-Lab/ISP_pipeline/utils/__init__.py new file mode 100644 index 0000000000000000000000000000000000000000..7be7fa011f6695d63f117d3a522b0b3c15030dd7 --- /dev/null +++ b/IIR-Lab/ISP_pipeline/utils/__init__.py @@ -0,0 +1,36 @@ +from fractions import Fraction +from pathlib import Path +from json import JSONEncoder +from .utils import * + + +def rmtree(path: Path): + if path.is_file(): + path.unlink() + else: + for ch in path.iterdir(): + rmtree(ch) + path.rmdir() + + +def safe_save(fpath, data, save_fun, rewrite=False, error_msg='File {fpath} exists! To rewite it use `--rewrite` flag', **kwargs): + if not fpath.is_file() or rewrite: + save_fun(str(fpath), data, **kwargs) + else: + raise FileExistsError(error_msg.format(fpath=fpath)) + + +class FractionJSONEncoder(JSONEncoder): + def default(self, o): + if isinstance(o, Fraction): + return {'Fraction': [o.numerator, o.denominator]} + else: + return o.__dict__ + + +def fraction_from_json(json_object): + if 'Fraction' in json_object: + return Fraction(*json_object['Fraction']) + return json_object + + diff --git a/IIR-Lab/ISP_pipeline/utils/__pycache__/__init__.cpython-39.pyc b/IIR-Lab/ISP_pipeline/utils/__pycache__/__init__.cpython-39.pyc new file mode 100644 index 0000000000000000000000000000000000000000..adf4478ee68497fd2ea862e48f860506392e68b2 Binary files /dev/null and b/IIR-Lab/ISP_pipeline/utils/__pycache__/__init__.cpython-39.pyc differ diff --git a/IIR-Lab/ISP_pipeline/utils/__pycache__/utils.cpython-39.pyc b/IIR-Lab/ISP_pipeline/utils/__pycache__/utils.cpython-39.pyc new file mode 100644 index 0000000000000000000000000000000000000000..6531d58abf39f6a8fb09604df275be572ca4296a Binary files /dev/null and b/IIR-Lab/ISP_pipeline/utils/__pycache__/utils.cpython-39.pyc differ diff --git a/IIR-Lab/ISP_pipeline/utils/utils.py b/IIR-Lab/ISP_pipeline/utils/utils.py new file mode 100644 index 0000000000000000000000000000000000000000..343288891019bf465f42658e53952f7319241d91 --- /dev/null +++ b/IIR-Lab/ISP_pipeline/utils/utils.py @@ -0,0 +1,56 @@ +from PIL import Image +import json +import os + +def json_read(fname, **kwargs): + with open(fname) as j: + data = json.load(j, **kwargs) + return data + + +def json_save(fname, data, indent_len=4, **kwargs): + with open(fname, "w") as f: + s = json.dumps(data, sort_keys=True, ensure_ascii=False, + indent=" " * indent_len, **kwargs) + f.write(s) + + +def process_wb_from_txt(txt_path): + with open(txt_path, 'r') as fh: + txt = [line.rstrip().split() for line in fh] + + txt = [[float(k) for k in row] for row in txt] + + assert len(txt) in [1, 3] + + if len(txt) == 1: + # wb vector + txt = txt[0] + + return txt + + +def process_ids_from_txt(txt_path): + with open(txt_path, 'r') as fh: + temp = fh.read().splitlines() + return temp + + +def save_txt(p, s): + with open(p, 'w') as text_file: + text_file.write(s) + + +def downscale_jpg(img_path, new_shape, quality_perc=100): + img = Image.open(img_path) + if (img.size[0], img.size[1]) != new_shape: + new_img = img.resize(new_shape, Image.ANTIALIAS) + new_img.save(img_path[:-len('.jpg')] + '.jpg', + 'JPEG', quality=quality_perc) + + +def rename_img(img_path): + if img_path.lower().endswith('jpeg'): + os.rename(img_path, img_path[:-len('jpeg')] + 'jpg') + else: + os.rename(img_path, img_path[:-len('JPG')] + 'jpg') diff --git a/IIR-Lab/Rendering_models/high_iso.pth b/IIR-Lab/Rendering_models/high_iso.pth new file mode 100644 index 0000000000000000000000000000000000000000..a563b22bb8e40332bd7fcbf796e3307e16e4c5c8 --- /dev/null +++ b/IIR-Lab/Rendering_models/high_iso.pth @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:8f8b211f2939fdc7a030b7d263a4dc7791c4c385ec47ef3c8adefec5d8de8b7f +size 93994 diff --git a/IIR-Lab/Rendering_models/low_iso.pth b/IIR-Lab/Rendering_models/low_iso.pth new file mode 100644 index 0000000000000000000000000000000000000000..ee2ad8ae98338dc88aea9377fea86b535222ebd8 --- /dev/null +++ b/IIR-Lab/Rendering_models/low_iso.pth @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:c669a2f705ad7e8d912e81f89bd17eee621cffda6d8c4ca5701dbc19997449e4 +size 93994 diff --git a/IIR-Lab/aligned_utils.py b/IIR-Lab/aligned_utils.py new file mode 100644 index 0000000000000000000000000000000000000000..db4cdd2ff4016feaaaa9163818426c8262457b00 --- /dev/null +++ b/IIR-Lab/aligned_utils.py @@ -0,0 +1,252 @@ +import numpy as np +import cv2 +# from PIL import Image +import os +import glob +from tqdm import tqdm +from pathlib import Path +import torch +import torch.nn.functional as F +# Parameters of the motion estimation algorithms +def warp_flow(img, flow): + ''' + Applies to img the transformation described by flow. + ''' + #assert len(flow.shape) == 3 and flow.shape[-1] == 2 + hf, wf = flow.shape[:2] + # flow = -flow + flow[:, :, 0] += np.arange(wf) + flow[:, :, 1] += np.arange(hf)[:, np.newaxis] + res = cv2.remap(img, flow, None, cv2.INTER_LINEAR) + return res + +def estimate_invflow(img0, img1, me_algo): + ''' + Estimates inverse optical flow by using the me_algo algorithm. + ''' + + # Create estimator object + if me_algo == "DeepFlow": + of_estim = cv2.optflow.createOptFlow_DeepFlow() + else: + raise Exception("Incorrect motion estimation algorithm") + + # Run flow estimation (inverse flow) + flow = of_estim.calc(img1, img0, None) +# flow = cv.calcOpticalFlowFarneback(prvs,next, None, 0.5, 3, 15, 3, 5, 1.2, 0) + + return flow + +def align_frames(img_to_align, img_source, mc_alg='DeepFlow'): + ''' + Applies to img_to_align a transformation which converts it into img_source. + Args: + img_to_align: HxWxC image + img_source: HxWxC image + mc_alg: selects between DeepFlow, SimpleFlow, and TVL1. DeepFlow runs by default. + Returns: + HxWxC aligned image + ''' + if img_to_align.ndim == 2: + img0 = img_to_align + img1 = img_source + else: + img0 = img_to_align[:, :, 1] + img1 = img_source[:, :, 1] + out_img = None + + # Align frames according to selection in mc_alg + flow = estimate_invflow(img0, img1, mc_alg) + #print(flow.astype(np.float32)) + + # rectifier + out_img = warp_flow(img_to_align, flow.astype(np.float32)) + + return out_img, flow + + + +def SIFT(img1gray, img2gray): + # if i == 0: + sift = cv2.xfeatures2d.SIFT_create() # 创建sift方法 + # sift = cv2.SURF_create() # 创建sift方法 + # find the keypoints and descriptors with SIFT + kp1, des1 = sift.detectAndCompute(img1gray, None) # 用sift找到图像中的关键点和描述子 + kp2, des2 = sift.detectAndCompute(img2gray, None) + # FLANN parameters + FLANN_INDEX_KDTREE = 1 # FLANN使用的算法选择,有0,1等,具体多少算法不太清楚。 + index_params = dict(algorithm=FLANN_INDEX_KDTREE, trees=5) + search_params = dict(checks=10) + flann = cv2.FlannBasedMatcher(index_params, search_params) # 这里创建FLANN匹配算法 + matches = flann.knnMatch(des1, des2, k=2) # 这里是使用创建的FLANN算法对两张图上的描述子进行匹配,使用k近邻匹配,k=2即最近邻匹配 + # 上述返回的matches是一种数据类型,这样一个类型中包含了matches.queryIdx .trainIdx 和 .distance,由于是knn,k=2,返回两个最相似的特征点。 + # 而上面返回的特征点kp1和kp2也是一种类,包含了kp1.pt:关键点坐标 kp1.angle:关键点方向 kp1.response:关键点强度 kp1.size该点直径大小 + # Need to draw only good matches, so create a mask + matchesMask = [[0, 0] for i in range(len(matches))] # 为了去画匹配的情况,创建了一个掩膜 + + good = [] + # ratio test as per Lowe's paper + for i, (m, n) in enumerate(matches): + if m.distance < 0.65*n.distance: + good.append(m) + matchesMask[i] = [1, 0] + + + MIN_MATCH_COUNT = 9 + + print(len(good)) + + if len(good) > MIN_MATCH_COUNT: + src_pts = np.float32([kp1[m.queryIdx].pt for m in good]).reshape(-1, 1, 2) + dst_pts = np.float32([kp2[m.trainIdx].pt for m in good]).reshape(-1, 1, 2) + M, mask = cv2.findHomography(src_pts, dst_pts, cv2.RANSAC, 3) + + else: + print('error!!!!!!!!!!!!!!!!!!!!!!!!!!!') + return + + # print(M) + return M + + + + +def match_colors(im_ref, im_q, im_test): + + im_ref_mean_re = im_ref.view(*im_ref.shape[:2], -1) + im_q_mean_re = im_q.view(*im_q.shape[:2], -1) + + # Estimate color transformation matrix by minimizing the least squares error + c_mat_all = [] + for ir, iq in zip(im_ref_mean_re, im_q_mean_re): + c = torch.linalg.lstsq(iq.t(), ir.t()) + c = c.solution[:im_ref_mean_re.size(1)] + c_mat_all.append(c) + + c_mat = torch.stack(c_mat_all, dim=0) + # Apply the transformation to test image + im_test_re = im_test.view(*im_test.shape[:2], -1) + im_t_conv = torch.matmul(im_test_re.permute(0, 2, 1), c_mat).permute(0, 2, 1) + im_t_conv = im_t_conv.view(im_test.shape) + + return im_t_conv + +def color_correction(gt, in_put, output, scale_factor=2): + # ds_gt = F.interpolate(gt, scale_factor=1.0 / scale_factor, mode='bilinear', align_corners=False, recompute_scale_factor=True) + output_cor = match_channel_colors(gt, in_put, output) + return output_cor + +def match_channel_colors(im_ref, im_q, im_test): + + im_ref_reshape = im_ref.view(*im_ref.shape[:2], -1) + im_q_reshape = im_q.view(*im_q.shape[:2], -1) + im_test_reshape = im_test.view(*im_test.shape[:2], -1) + # Estimate color transformation matrix by minimizing the least squares error + + im_t_conv_list = [] + for i in range(im_ref.size(1)): + c_mat_all = [] + for ir_batch, iq_batch in zip(im_ref_reshape[:, i:i+1, :], im_q_reshape[:, i:i+1, :]): + c = torch.linalg.lstsq(iq_batch.t(), ir_batch.t()) + c = c.solution[:1] + c_mat_all.append(c) + + c_mat = torch.stack(c_mat_all, dim=0) + # Apply the transformation to test image + im_t_conv = torch.matmul(im_test_reshape[:, i:i+1, :].permute(0, 2, 1), c_mat).permute(0, 2, 1) + im_t_conv = im_t_conv.view(*im_t_conv.shape[:2], *im_test.shape[-2:]) + im_t_conv_list.append(im_t_conv) + + im_t_conv = torch.cat(im_t_conv_list, dim=1) + + return im_t_conv + + + + +def img2tensor(imgs, bgr2rgb=True, float32=True): + """Numpy array to tensor. + + Args: + imgs (list[ndarray] | ndarray): Input images. + bgr2rgb (bool): Whether to change bgr to rgb. + float32 (bool): Whether to change to float32. + + Returns: + list[tensor] | tensor: Tensor images. If returned results only have + one element, just return tensor. + """ + + def _totensor(img, bgr2rgb, float32): + if img.shape[2] == 3 and bgr2rgb: + if img.dtype == 'float64': + img = img.astype('float32') + img = cv2.cvtColor(img, cv2.COLOR_BGR2RGB) + img = torch.from_numpy(img.transpose(2, 0, 1)) + if float32: + img = img.float() + return img + + if isinstance(imgs, list): + return [_totensor(img, bgr2rgb, float32) for img in imgs] + else: + return _totensor(imgs, bgr2rgb, float32) + + +def tensor2img(tensor, rgb2bgr=True, out_type=np.uint8, min_max=(0, 1)): + """Convert torch Tensors into image numpy arrays. + + After clamping to [min, max], values will be normalized to [0, 1]. + + Args: + tensor (Tensor or list[Tensor]): Accept shapes: + 1) 4D mini-batch Tensor of shape (B x 3/1 x H x W); + 2) 3D Tensor of shape (3/1 x H x W); + 3) 2D Tensor of shape (H x W). + Tensor channel should be in RGB order. + rgb2bgr (bool): Whether to change rgb to bgr. + out_type (numpy type): output types. If ``np.uint8``, transform outputs + to uint8 type with range [0, 255]; otherwise, float type with + range [0, 1]. Default: ``np.uint8``. + min_max (tuple[int]): min and max values for clamp. + + Returns: + (Tensor or list): 3D ndarray of shape (H x W x C) OR 2D ndarray of + shape (H x W). The channel order is BGR. + """ + if not (torch.is_tensor(tensor) or (isinstance(tensor, list) and all(torch.is_tensor(t) for t in tensor))): + raise TypeError(f'tensor or list of tensors expected, got {type(tensor)}') + + if torch.is_tensor(tensor): + tensor = [tensor] + result = [] + for _tensor in tensor: + _tensor = _tensor.squeeze(0).float().detach().cpu().clamp_(*min_max) + _tensor = (_tensor - min_max[0]) / (min_max[1] - min_max[0]) + + n_dim = _tensor.dim() + if n_dim == 4: + img_np = make_grid(_tensor, nrow=int(math.sqrt(_tensor.size(0))), normalize=False).numpy() + img_np = img_np.transpose(1, 2, 0) + if rgb2bgr: + img_np = cv2.cvtColor(img_np, cv2.COLOR_RGB2BGR) + elif n_dim == 3: + img_np = _tensor.numpy() + img_np = img_np.transpose(1, 2, 0) + if img_np.shape[2] == 1: # gray image + img_np = np.squeeze(img_np, axis=2) + else: + if rgb2bgr: + img_np = cv2.cvtColor(img_np, cv2.COLOR_RGB2BGR) + elif n_dim == 2: + img_np = _tensor.numpy() + else: + raise TypeError(f'Only support 4D, 3D or 2D tensor. But received with dimension: {n_dim}') + if out_type == np.uint8: + # Unlike MATLAB, numpy.unit8() WILL NOT round by default. + img_np = (img_np * 255.0).round() + img_np = img_np.astype(out_type) + result.append(img_np) + if len(result) == 1: + result = result[0] + return result \ No newline at end of file diff --git a/IIR-Lab/dataloader/__pycache__/__init__.cpython-310.pyc b/IIR-Lab/dataloader/__pycache__/__init__.cpython-310.pyc new file mode 100644 index 0000000000000000000000000000000000000000..6df185c870745d802322a7a7d453584bf131b0e8 Binary files /dev/null and b/IIR-Lab/dataloader/__pycache__/__init__.cpython-310.pyc differ diff --git a/IIR-Lab/dataloader/__pycache__/__init__.cpython-38.pyc b/IIR-Lab/dataloader/__pycache__/__init__.cpython-38.pyc new file mode 100644 index 0000000000000000000000000000000000000000..114cbf4d46603f40b8d246417caee08d6cd6ee48 Binary files /dev/null and b/IIR-Lab/dataloader/__pycache__/__init__.cpython-38.pyc differ diff --git a/IIR-Lab/dataloader/__pycache__/data_sampler.cpython-310.pyc b/IIR-Lab/dataloader/__pycache__/data_sampler.cpython-310.pyc new file mode 100644 index 0000000000000000000000000000000000000000..fbeeb2fdab6d18eea40dcffcee5565d80995d60d Binary files /dev/null and b/IIR-Lab/dataloader/__pycache__/data_sampler.cpython-310.pyc differ diff --git a/IIR-Lab/dataloader/__pycache__/data_sampler.cpython-38.pyc b/IIR-Lab/dataloader/__pycache__/data_sampler.cpython-38.pyc new file mode 100644 index 0000000000000000000000000000000000000000..3300ba52b771594ac5b90800e8a21d74e6eb973e Binary files /dev/null and b/IIR-Lab/dataloader/__pycache__/data_sampler.cpython-38.pyc differ diff --git a/IIR-Lab/dataloader/__pycache__/data_utils.cpython-310.pyc b/IIR-Lab/dataloader/__pycache__/data_utils.cpython-310.pyc new file mode 100644 index 0000000000000000000000000000000000000000..96c573f64c18b124c2d5cf245248f3f6075945a0 Binary files /dev/null and b/IIR-Lab/dataloader/__pycache__/data_utils.cpython-310.pyc differ diff --git a/IIR-Lab/dataloader/__pycache__/data_utils.cpython-312.pyc b/IIR-Lab/dataloader/__pycache__/data_utils.cpython-312.pyc new file mode 100644 index 0000000000000000000000000000000000000000..695ec5cae7a27579e8b3555204854f95d2708a96 Binary files /dev/null and b/IIR-Lab/dataloader/__pycache__/data_utils.cpython-312.pyc differ diff --git a/IIR-Lab/dataloader/__pycache__/data_utils.cpython-39.pyc b/IIR-Lab/dataloader/__pycache__/data_utils.cpython-39.pyc new file mode 100644 index 0000000000000000000000000000000000000000..cd6899b27724f02d17f2c4a2542e4d7ba5beaeff Binary files /dev/null and b/IIR-Lab/dataloader/__pycache__/data_utils.cpython-39.pyc differ diff --git a/IIR-Lab/dataloader/__pycache__/dataset.cpython-310.pyc b/IIR-Lab/dataloader/__pycache__/dataset.cpython-310.pyc new file mode 100644 index 0000000000000000000000000000000000000000..9d2c88b0f2f027069ce695ad586fb263ba106234 Binary files /dev/null and b/IIR-Lab/dataloader/__pycache__/dataset.cpython-310.pyc differ diff --git a/IIR-Lab/dataloader/__pycache__/dataset.cpython-312.pyc b/IIR-Lab/dataloader/__pycache__/dataset.cpython-312.pyc new file mode 100644 index 0000000000000000000000000000000000000000..a57db53f8aa60ede1129a78186692ce15bdc4477 Binary files /dev/null and b/IIR-Lab/dataloader/__pycache__/dataset.cpython-312.pyc differ diff --git a/IIR-Lab/dataloader/__pycache__/dataset.cpython-38.pyc b/IIR-Lab/dataloader/__pycache__/dataset.cpython-38.pyc new file mode 100644 index 0000000000000000000000000000000000000000..7e5f74d2cc4cb6ced63c128789d4e960a5f6c22f Binary files /dev/null and b/IIR-Lab/dataloader/__pycache__/dataset.cpython-38.pyc differ diff --git a/IIR-Lab/dataloader/__pycache__/dataset.cpython-39.pyc b/IIR-Lab/dataloader/__pycache__/dataset.cpython-39.pyc new file mode 100644 index 0000000000000000000000000000000000000000..20850380d2cdf0610da32892ad22b8af6a0e6451 Binary files /dev/null and b/IIR-Lab/dataloader/__pycache__/dataset.cpython-39.pyc differ diff --git a/IIR-Lab/dataloader/data_utils.py b/IIR-Lab/dataloader/data_utils.py new file mode 100644 index 0000000000000000000000000000000000000000..8e1420805f51d6309748d44276a815e0e738ca02 --- /dev/null +++ b/IIR-Lab/dataloader/data_utils.py @@ -0,0 +1,504 @@ +from __future__ import division + +import numpy as np +import cv2 +import random +import torch +import glob +import os +from random import choices +from scipy.stats import poisson + +def Rawread(path,low=0): + if path.endswith('.raw'): + return read_img(path,low) + if path.endswith('.npy'): + return read_npy(path,low) + if path.endswith('.png'): + return read_png(path,low) + +def read_img(path,low): + w = 4000 + h = 3000 + + raw = np.fromfile(path,np.uint16) + raw = raw.reshape((h,w)) + raw = raw.astype(np.float32)-64 + raw = rggb_raw(raw) + raw = np.clip(raw, low, 959) + + return raw + + +def read_npy(path,low): + + raw = np.load(path) + + if raw.shape[0] == 4: + return raw * 959 + raw = raw.astype(np.float32)-64 + raw = rggb_raw(raw) + raw = np.clip(raw, low, 959) + return raw + +def read_rawpng(path, metadata): + + raw = cv2.imread(str(path), cv2.IMREAD_UNCHANGED) + + # if raw.shape[0] == 4: + # return raw * 959 + raw = ((raw.astype(np.float32) - 256.) / (4095.- 256.)).clip(0, 1) + + raw = bayer2raw(raw, metadata) + raw = np.clip(raw, 0., 1.) + return raw + +def read_png(path, low): + + raw = cv2.imread(str(path), cv2.IMREAD_UNCHANGED) + + if raw.shape[0] == 4: + return raw * 959 + raw = raw.astype(np.float32)-256 + raw = rggb_raw(raw) + raw = np.clip(raw, low, 4095) + return raw + +def random_crop(frames_0,frames_1=None ,crop_size=128): + + F,C, H, W = frames_0.shape + + rnd_w = random.randint(0, W - crop_size) + rnd_h = random.randint(0, H - crop_size) + + patch = frames_0[..., rnd_h:rnd_h + crop_size, rnd_w:rnd_w + crop_size] + if not frames_1 is None: + path1 = frames_1[..., rnd_h:rnd_h + crop_size, rnd_w:rnd_w + crop_size] + return np.concatenate([patch,path1],axis=0) + + return patch + +def rggb_raw(raw): + # pack RGGB Bayer raw to 4 channels + H, W = raw.shape + raw = raw[None, ...] + raw_pack = np.concatenate((raw[:, 0:H:2, 0:W:2], + raw[:, 0:H:2, 1:W:2], + raw[:, 1:H:2, 0:W:2], + raw[:, 1:H:2, 1:W:2]), axis=0) + return raw_pack + +def bayer2raw(raw, metadata): + # pack RGGB Bayer raw to 4 channels + H, W = raw.shape + raw = raw[None, ...] + if metadata['cfa_pattern'][0] == 0: + # RGGB + raw_pack = np.concatenate((raw[:, 0:H:2, 0:W:2], + raw[:, 0:H:2, 1:W:2], + raw[:, 1:H:2, 0:W:2], + raw[:, 1:H:2, 1:W:2]), axis=0) + else : + # BGGR + raw_pack = np.concatenate((raw[:, 1:H:2, 1:W:2], + raw[:, 0:H:2, 1:W:2], + raw[:, 1:H:2, 0:W:2], + raw[:, 0:H:2, 0:W:2]), axis=0) + return raw_pack + +def raw_rggb(raws): + # depack 4 channels raw to RGGB Bayer + C, H, W = raws.shape + output = np.zeros((H * 2, W * 2)).astype(np.uint16) + + output[0:2 * H:2, 0:2 * W:2] = raws[0:1, :, :] + output[0:2 * H:2, 1:2 * W:2] = raws[1:2, :, :] + output[1:2 * H:2, 0:2 * W:2] = raws[2:3, :, :] + output[1:2 * H:2, 1:2 * W:2] = raws[3:4, :, :] + + return output + + +def raw_rggb_float32(raws): + # depack 4 channels raw to RGGB Bayer + C, H, W = raws.shape + output = np.zeros((H * 2, W * 2)).astype(np.float32) + + output[0:2 * H:2, 0:2 * W:2] = raws[0:1, :, :] + output[0:2 * H:2, 1:2 * W:2] = raws[1:2, :, :] + output[1:2 * H:2, 0:2 * W:2] = raws[2:3, :, :] + output[1:2 * H:2, 1:2 * W:2] = raws[3:4, :, :] + + return output + + +def depack_rggb_raws(raws): + # depack 4 channels raw to RGGB Bayer + N, C, H, W = raws.shape + output = torch.zeros((N, 1, H * 2, W * 2)) + + output[:, :, 0:2 * H:2, 0:2 * W:2] = raws[:, 0:1, :, :] + output[:, :, 0:2 * H:2, 1:2 * W:2] = raws[:, 1:2, :, :] + output[:, :, 1:2 * H:2, 0:2 * W:2] = raws[:, 2:3, :, :] + output[:, :, 1:2 * H:2, 1:2 * W:2] = raws[:, 3:4, :, :] + + return output + + + +# IMAGETYPES = ('*.bmp', '*.png', '*.jpg', '*.jpeg', '*.tif') +IMAGETYPES = ('*.npy','*.raw',) #得加逗号 不然会拆分字符串 + +def get_imagenames(seq_dir, pattern=None): + """ Get ordered list of filenames + """ + files = [] + for typ in IMAGETYPES: + files.extend(glob.glob(os.path.join(seq_dir, typ))) + + # filter filenames + if not pattern is None: + ffiltered = [] + ffiltered = [f for f in files if pattern in os.path.split(f)[-1]] + files = ffiltered + del ffiltered + + # sort filenames alphabetically + files.sort(key=lambda f: int(''.join(filter(str.isdigit, f)))) + return files + + + + +def get_imagenames(seq_dir, pattern=None): + """ Get ordered list of filenames + """ + files = [] + for typ in IMAGETYPES: + files.extend(glob.glob(os.path.join(seq_dir, typ))) + + # filter filenames + if not pattern is None: + ffiltered = [] + ffiltered = [f for f in files if pattern in os.path.split(f)[-1]] + files = ffiltered + del ffiltered + + # sort filenames alphabetically + files.sort(key=lambda f: int(''.join(filter(str.isdigit, f)))) + return files + +def open_sequence(seq_dir, gray_mode, expand_if_needed=False, max_num_fr=100): + r""" Opens a sequence of images and expands it to even sizes if necesary + Args: + fpath: string, path to image sequence + gray_mode: boolean, True indicating if images is to be open are in grayscale mode + expand_if_needed: if True, the spatial dimensions will be expanded if + size is odd + expand_axis0: if True, output will have a fourth dimension + max_num_fr: maximum number of frames to load + Returns: + seq: array of dims [num_frames, C, H, W], C=1 grayscale or C=3 RGB, H and W are even. + The image gets normalized gets normalized to the range [0, 1]. + expanded_h: True if original dim H was odd and image got expanded in this dimension. + expanded_w: True if original dim W was odd and image got expanded in this dimension. + """ + # Get ordered list of filenames + files = get_imagenames(seq_dir) + + seq_list_raw = [] + seq_list_raw_noise = [] + print("\tOpen sequence in folder: ", seq_dir) + for fpath in files[0:max_num_fr]: + + raw, raw_noise, expanded_h, expanded_w = open_image(fpath,\ + gray_mode=gray_mode,\ + expand_if_needed=expand_if_needed,\ + expand_axis0=False) + + raw = rggb_raw(raw) + raw_noise = rggb_raw(raw_noise) + + + seq_list_raw.append(raw) + seq_list_raw_noise.append(raw_noise) + seq_raw = np.stack(seq_list_raw, axis=0) + seq_raw_noise = np.stack(seq_list_raw_noise, axis=0) + return seq_raw, seq_raw_noise, expanded_h, expanded_w + +def open_image(fpath, gray_mode, expand_if_needed=False, expand_axis0=True, normalize_data=True): + r""" Opens an image and expands it if necesary + Args: + fpath: string, path of image file + gray_mode: boolean, True indicating if image is to be open + in grayscale mode + expand_if_needed: if True, the spatial dimensions will be expanded if + size is odd + expand_axis0: if True, output will have a fourth dimension + Returns: + img: image of dims NxCxHxW, N=1, C=1 grayscale or C=3 RGB, H and W are even. + if expand_axis0=False, the output will have a shape CxHxW. + The image gets normalized to the range [0, 1]. + expanded_h: True if original dim H was odd and image got expanded in this dimension. + expanded_w: True if original dim W was odd and image got expanded in this dimension. + """ + # if not gray_mode: + # # Open image as a CxHxW torch.Tensor + # img = cv2.imread(fpath) + # # from HxWxC to CxHxW, RGB image + # img = (cv2.cvtColor(img, cv2.COLOR_BGR2RGB)).transpose(2, 0, 1) + # else: + # # from HxWxC to CxHxW grayscale image (C=1) + # img = cv2.imread(fpath, cv2.IMREAD_GRAYSCALE) + + + + # 测试真实的图片 + # raw_img = ((np.fromfile(fpath,np.uint16).astype(np.float32))*4833)/2048 + # raw_img = np.clip(raw_img-64, 0, 1023-64) + # raw_img = raw_img.reshape((3000,4000)) + + # raw_img = np.load(fpath).astype(np.float32)-64 + w = 4000 + h = 3000 + raw_img = np.fromfile(fpath,dtype=np.uint16,count=w*h) + raw_img = raw_img.reshape((h,w)).astype(np.float32)-64 + raw_img = np.clip(raw_img, 0, 959) + + noise_fpath =fpath.replace('onlyraw_test_clean_raw','onlyraw_test_noise_raw') + raw_img_noise = np.fromfile(noise_fpath,dtype=np.uint16,count=w*h) + raw_img_noise = raw_img_noise.reshape((h,w)).astype(np.float32)-64 + raw_img_noise = np.clip(raw_img_noise, 0, 959) + + + #blc + + + # if expand_axis0: + # img = np.expand_dims(img, 0) + + # Handle odd sizes + expanded_h = False + expanded_w = False + sh_im = raw_img.shape + # if expand_if_needed: + # if sh_im[-2]%2 == 1: + # expanded_h = True + # if expand_axis0: + # img = np.concatenate((img, \ + # img[:, :, -1, :][:, :, np.newaxis, :]), axis=2) + # else: + # img = np.concatenate((img, \ + # img[:, -1, :][:, np.newaxis, :]), axis=1) + + + # if sh_im[-1]%2 == 1: + # expanded_w = True + # if expand_axis0: + # img = np.concatenate((img, \ + # img[:, :, :, -1][:, :, :, np.newaxis]), axis=3) + # else: + # img = np.concatenate((img, \ + # img[:, :, -1][:, :, np.newaxis]), axis=2) + + if normalize_data: + raw_img = normalize(raw_img) + raw_img_noise = normalize(raw_img_noise) + return raw_img, raw_img_noise, expanded_h, expanded_w + + +def normalize(data): + r"""Normalizes a unit8 image to a float32 image in the range [0, 1] + + Args: + data: a unint8 numpy array to normalize from [0, 255] to [0, 1] + """ + return np.float32(data/(959)) + + +def augment_cuda(batches, args, spynet=None): + + def _augment(img, hflip=True, rot=True): + + hflip = hflip and random.random() < 0.5 + vflip = rot and random.random() < 0.5 + # rot90 = rot and random.random() < 0.5 + k1 = np.random.randint(0, 4) #0,1,2,3 + if hflip: img = img.flip(-1) + if vflip: img = img.flip(-2) + + img = torch.rot90(img, k=k1, dims=[-2, -1]) + + return img + + batches_aug = _augment(batches) + + if args.pair: + noise = batches_aug[:,:args.frame,...]/959 + clean = batches_aug[:,args.frame,...]/959 #if args.scene != 'noisedata' else batches_aug[:,args.frame,...] + + + else: + clean, noise = Noise_simulation(batches_aug,args) + if not args.consistent_loss: + clean = clean[:, args.frame // 2, ...] + B, F, C , H, W = noise.shape + noise = noise.reshape(B, F*C , H, W ) + + + return clean, noise, None + + +def Noise_simulation(batches_aug,args): + batches_aug = batches_aug/959 + batches_aug = torch.clamp(batches_aug , 0, 1) + B = batches_aug.shape[0] + batch_aug_mean = batches_aug.mean(dim=(1,2,3,4)) + if args.need_Scaling: + if args.sample_gain == 'type1': + # rand_avg = torch.randint(args.luminance_low, args.luminance_high ,(B, )).cuda(args.local_rank) + rand_avg = (torch.rand((B)) * 0.12 + 0.001).cuda(args.local_rank) + if args.sample_gain == 'type2': + rand_avg = Gain_Sampler(B).cuda(args.local_rank) + + coef = (batch_aug_mean / rand_avg).unsqueeze(-1).unsqueeze(-1).unsqueeze(-1).unsqueeze(-1) + batch_aug_dark = torch.clamp(batches_aug / coef, 0, 1) + else: + batch_aug_dark = batches_aug + + a,b, again, dgain = random_noise_levels_nightimaging(B, args) + batch_aug_dark,batch_aug_dark_noise = add_noise(args, batch_aug_dark,a.cuda(args.local_rank),b.cuda(args.local_rank),dgain.cuda(args.local_rank)) + + batch_aug_dark_noise = torch.clamp(batch_aug_dark_noise, -0.1, 1) + + # print(batch_aug_dark_noise.mean()) + return batch_aug_dark.float(), batch_aug_dark_noise.float() + +def random_noise_levels_nightimaging(B, args): + # print('use new') + g = torch.FloatTensor(B).uniform_(0, 125).int().long() + noise_profile = torch.from_numpy(np.load('/data1/chengqihua/02_code/03_night_photogrphy/nightimage_v1/dataloader/json_all_2nd.npy')) + + a = noise_profile[g,0] + b = noise_profile[g,1] + + return a, b, 1, 1*torch.ones(1) + +def random_noise_levels(B, args): + ak1=0.05244803 + ak2=0.01498041 + bk1=0.00648923 + bk2= 0.05899386 + bk3 = 0.21520193 + g = torch.FloatTensor(B).uniform_(args.min_gain, args.max_gain) + + maskA = g > 16 + + again = g.clone() + again[maskA] = 16 + + maskB = g < 16 + + dgain = g.clone() / 16 + dgain[maskB] = 1 + + + + a = ak1 * again + ak2 + b = bk1 * again*again + bk2* again + bk3 + + return a, b, again, dgain + +def add_noise(args, image, a, b, dgain): + + dgain = dgain.unsqueeze(1).unsqueeze(1).unsqueeze(1).unsqueeze(1) + a = a.unsqueeze(1).unsqueeze(1).unsqueeze(1).unsqueeze(1) + b = b.unsqueeze(1).unsqueeze(1).unsqueeze(1).unsqueeze(1) + + + B, F, C, H, W = image.size() + + image = image / dgain + + + poisson_noisy_img = torch.poisson(image/a)*a + + gaussian_noise = torch.sqrt(b)*torch.randn(B, F, C, H, W).cuda(args.local_rank) + + noiseimg = poisson_noisy_img + gaussian_noise + + if args.usedgain : + noiseimg = noiseimg * dgain + image = image * dgain + return image, noiseimg + + + +def normalize_augment(datain): + '''Normalizes and augments an input patch of dim [N, num_frames, C. H, W] in [0., 255.] to \ + [N, num_frames*C. H, W] in [0., 1.]. It also returns the central (edited by cjm : now all frames) frame of the temporal \ + patch as a ground truth. + ''' + def transform(sample): + # define transformations + do_nothing = lambda x: x + do_nothing.__name__ = 'do_nothing' + flipud = lambda x: torch.flip(x, dims=[2]) + flipud.__name__ = 'flipup' + rot90 = lambda x: torch.rot90(x, k=1, dims=[2, 3]) + rot90.__name__ = 'rot90' + rot90_flipud = lambda x: torch.flip(torch.rot90(x, k=1, dims=[2, 3]), dims=[2]) + rot90_flipud.__name__ = 'rot90_flipud' + rot180 = lambda x: torch.rot90(x, k=2, dims=[2, 3]) + rot180.__name__ = 'rot180' + rot180_flipud = lambda x: torch.flip(torch.rot90(x, k=2, dims=[2, 3]), dims=[2]) + rot180_flipud.__name__ = 'rot180_flipud' + rot270 = lambda x: torch.rot90(x, k=3, dims=[2, 3]) + rot270.__name__ = 'rot270' + rot270_flipud = lambda x: torch.flip(torch.rot90(x, k=3, dims=[2, 3]), dims=[2]) + rot270_flipud.__name__ = 'rot270_flipud' + add_csnt = lambda x: x + torch.normal(mean=torch.zeros(x.size()[0], 1, 1, 1), \ + std=(5/255.)).expand_as(x).to(x.device) + add_csnt.__name__ = 'add_csnt' + + # define transformations and their frequency, then pick one. + aug_list = [do_nothing, flipud, rot90, rot90_flipud, \ + rot180, rot180_flipud, rot270, rot270_flipud, add_csnt] + w_aug = [32, 12, 12, 12, 12, 12, 12, 12, 12] # one fourth chances to do_nothing + transf = choices(aug_list, w_aug) + + # transform all images in array + return transf[0](sample) + + img_train = datain #torch.Size([8, 11, 3, 96, 96]) + # convert to [N, num_frames*C. H, W] in [0., 1.] from [N, num_frames, C. H, W] in [0., 255.] + N, F, C, H, W = img_train.shape + img_train = img_train.view(img_train.size()[0], -1, \ + img_train.size()[-2], img_train.size()[-1]) / 255. # torch.Size([8, 33, 96, 96]) + + #augment + img_train = transform(img_train) + img_train = img_train.view(N, F, C, H, W) + # extract ground truth (central frame) + # gt_train = img_train[:, 3*ctrl_fr_idx:3*ctrl_fr_idx+3, :, :] + return img_train, img_train + +def Gain_Sampler(B): + gain_dict = { + 'low':[5,35], + 'mid':[35,60], + 'high':[60,100] + } + + level = ['low','mid','high'] + sampled = np.random.choice(level,B,[0.7,0.2,0.1]) + all = [] + for index in sampled: + all.append(torch.randint(gain_dict[index][0],gain_dict[index][1],(1,))) + + return torch.Tensor(all) + +def path_replace(path,args): + for i in range(len(args.replace_left)): + path = path.replace(args.replace_left[i],args.replace_right[i]) + return path \ No newline at end of file diff --git a/IIR-Lab/dataloader/dataset.py b/IIR-Lab/dataloader/dataset.py new file mode 100644 index 0000000000000000000000000000000000000000..29dff422be75f9f356a079be9fcdea38e161375e --- /dev/null +++ b/IIR-Lab/dataloader/dataset.py @@ -0,0 +1,97 @@ +import os +import numpy as np +import random +import torch +from torch.utils.data import Dataset +from torchvision import transforms +import cv2 +import sys +from pathlib import Path +import glob +sys.path.append('..') +# from utils_ours import util +from dataloader.data_utils import rggb_raw, random_crop, Rawread, path_replace, bayer2raw, read_rawpng +from natsort import ns, natsorted +import time +from tqdm import tqdm +from multiprocessing import Pool +import pdb +import json +from fractions import Fraction +from pathlib import Path +from json import JSONEncoder +from exifread.utils import Ratio + +class imageSet(Dataset): + def __init__(self,args): + super(imageSet).__init__() + self.args = args + self.test_dir= args.test_dir + self.debug = args.debug + + self.paths = [] + for file in os.listdir(self.test_dir): + if '.png' in file: + self.img_path = os.path.join(self.test_dir, file) + self.json_path = self.img_path.replace('png', 'json') + self.paths.append(dict([(f"{'img_path'}", self.img_path), (f"{'json_path'}", self.json_path)])) + + def __getitem__(self, index): + img_path = self.paths[index]['img_path'] + json_path = self.paths[index]['json_path'] + + metadata = json_read(json_path, object_hook=fraction_from_json) + + input_img = read_rawpng(img_path, metadata) + + return {'input': input_img, 'json_path': json_path} + + def __len__(self): + return len(self.paths) + + +def normalize(raw_image, black_level, white_level): + if type(black_level) is list and len(black_level) == 1: + black_level = float(black_level[0]) + if type(white_level) is list and len(white_level) == 1: + white_level = float(white_level[0]) + black_level_mask = black_level + if type(black_level) is list and len(black_level) == 4: + if type(black_level[0]) is Ratio: + black_level = ratios2floats(black_level) + if type(black_level[0]) is Fraction: + black_level = fractions2floats(black_level) + black_level_mask = np.zeros(raw_image.shape) + idx2by2 = [[0, 0], [0, 1], [1, 0], [1, 1]] + step2 = 2 + for i, idx in enumerate(idx2by2): + black_level_mask[idx[0]::step2, idx[1]::step2] = black_level[i] + normalized_image = raw_image.astype(np.float32) - black_level_mask + # if some values were smaller than black level + normalized_image[normalized_image < 0] = 0 + normalized_image = normalized_image / (white_level - black_level_mask) + return normalized_image + +def ratios2floats(ratios): + floats = [] + for ratio in ratios: + floats.append(float(ratio.num) / ratio.den) + return floats + +def fractions2floats(fractions): + floats = [] + for fraction in fractions: + floats.append(float(fraction.numerator) / fraction.denominator) + return floats + +def json_read(fname, **kwargs): + with open(fname) as j: + data = json.load(j, **kwargs) + return data + +def fraction_from_json(json_object): + if 'Fraction' in json_object: + return Fraction(*json_object['Fraction']) + return json_object + + diff --git a/IIR-Lab/denoise_model/high_iso.pth b/IIR-Lab/denoise_model/high_iso.pth new file mode 100644 index 0000000000000000000000000000000000000000..da69cbfe707f91be00ad1828cfa5c9f97551c95c --- /dev/null +++ b/IIR-Lab/denoise_model/high_iso.pth @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:b6bb9641b1fced9a33651132672d6dd649f5428849f6d20ddd4bb478cda3a03e +size 465804789 diff --git a/IIR-Lab/denoise_model/high_mid_iso.pth b/IIR-Lab/denoise_model/high_mid_iso.pth new file mode 100644 index 0000000000000000000000000000000000000000..b216ef32e06dc5395b282812afb1977aea5e8a1a --- /dev/null +++ b/IIR-Lab/denoise_model/high_mid_iso.pth @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:01deae3e68e447f4a4c7d4f20442941e8127bf1b4a718d451f151ead782c7254 +size 465796068 diff --git a/IIR-Lab/denoise_model/low_iso.pth b/IIR-Lab/denoise_model/low_iso.pth new file mode 100644 index 0000000000000000000000000000000000000000..18fb3c69cbe3417393f1a287ed670b09667cc5d9 --- /dev/null +++ b/IIR-Lab/denoise_model/low_iso.pth @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:ab5617cf98787e7e4a9c48d9e5fe6cde729ee7ad83913a01e43abb65734a0a5e +size 465795637 diff --git a/IIR-Lab/denoise_model/mid_iso.pth b/IIR-Lab/denoise_model/mid_iso.pth new file mode 100644 index 0000000000000000000000000000000000000000..f7524f4dfcdad030d308c490c760f332bcbe60c0 --- /dev/null +++ b/IIR-Lab/denoise_model/mid_iso.pth @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:db9e010a52d81d5cb1953c683f301458776cbc9dd3fa0a6ad7d7d15bf285540c +size 465796068 diff --git a/IIR-Lab/final_test.py b/IIR-Lab/final_test.py new file mode 100644 index 0000000000000000000000000000000000000000..60904c7f3b134a5ae4dbb5749a72035f16ea1e46 --- /dev/null +++ b/IIR-Lab/final_test.py @@ -0,0 +1,156 @@ +import os +import time +import argparse +import torch +import torch.backends.cudnn as cudnn +from utils_ours.util import setup_logger, print_args +from torch.utils.data import DataLoader +from dataloader.dataset import imageSet +from models.archs.NAF_arch import NAF_Video +from torch.nn.parallel import DistributedDataParallel +import numpy as np +import torch.nn.functional as F +from collections import OrderedDict +import torch.nn as nn +from models.utils import chunkV3 +import pdb +from ISP_pipeline import process_pngs_isp +import os +import json +import cv2 +from skimage import io + +ISO = [50,125,320,640,800] +a = [0.00025822882,0.000580020745,0.00141667975,0.00278965863,0.00347614807] +b = [2.32350645e-06,3.1125155625e-06,8.328992952e-06,3.3315971808e-05,5.205620595e-05] + +#拟合 +coeff_a = np.polyfit(ISO,a,1) +coeff_b = np.polyfit(ISO,b,2) + +def main(): + + parser = argparse.ArgumentParser(description='imageTest') + + parser.add_argument('--frame', default=1, type=int) + parser.add_argument('--test_dir', default = "/data/", type=str) + parser.add_argument('--model_type', type=str, default='NAF_Video') + parser.add_argument('--save_folder', default='/data/', type=str) + parser.add_argument('--resume', default='', type=str) + parser.add_argument('--testoption', default='image', type=str) + parser.add_argument('--chunk', action='store_true') + parser.add_argument('--debug', action='store_true') + + args = parser.parse_args() + args.src_save_folder = '/data/' + + print(args.src_save_folder,'**********************') + if not os.path.exists(args.src_save_folder): + os.makedirs(args.src_save_folder) + print(args.src_save_folder) + + low_iso_model = "denoise_model/low_iso.pth" + mid_iso_model = "denoise_model/mid_iso.pth" + high_mid_iso_model = "denoise_model/high_mid_iso.pth" + high_iso_model = "denoise_model/high_iso.pth" + + network = NAF_Video(args).cuda() + + load_low_iso_net = torch.load(low_iso_model, map_location=torch.device('cuda')) + load_low_iso_net_clean = OrderedDict() + for k, v in load_low_iso_net.items(): + if k.startswith('module.'): + load_low_iso_net_clean[k[7:]] = v + else: + load_low_iso_net_clean[k] = v + + load_mid_iso_net = torch.load(mid_iso_model, map_location=torch.device('cpu')) + load_mid_iso_net_clean = OrderedDict() + for k, v in load_mid_iso_net.items(): + if k.startswith('module.'): + load_mid_iso_net_clean[k[7:]] = v + else: + load_mid_iso_net_clean[k] = v + + load_high_mid_iso_net = torch.load(high_mid_iso_model, map_location=torch.device('cpu')) + load_high_mid_iso_net_clean = OrderedDict() + for k, v in load_high_mid_iso_net.items(): + if k.startswith('module.'): + load_high_mid_iso_net_clean[k[7:]] = v + else: + load_high_mid_iso_net_clean[k] = v + + load_high_iso_net_clean = torch.load(high_iso_model, map_location=torch.device('cpu')) + + cudnn.benchmark = True + + test_dataset = imageSet(args) + test_dataloader = DataLoader(test_dataset, batch_size=1, num_workers=0, shuffle=False) + inference_time = [] + with torch.no_grad(): + + for data in test_dataloader: + + noise = data['input'].cuda() + json_path = data['json_path'][0] + scene_name = os.path.splitext(os.path.basename(json_path))[0] + + # now let's process isp moudle + json_cfa = process_pngs_isp.readjson(json_path) + num_k = json_cfa['noise_profile'] + iso = (num_k[0] - coeff_a[1])/coeff_a[0] + + if iso < 900: + network.load_state_dict(load_low_iso_net_clean, strict=True) + network.eval() + elif iso < 1800: + network.load_state_dict(load_mid_iso_net_clean, strict=True) + network.eval() + elif iso < 5600: + network.load_state_dict(load_high_mid_iso_net_clean, strict=True) + network.eval() + else: + network.load_state_dict(load_high_iso_net_clean, strict=True) + network.eval() + + t0 = time.perf_counter() + + out = chunkV3(network, noise, args.testoption, patch_h=1024, patch_w=1024) + out = torch.clamp(out, 0., 1.) + + # name_rgb = os.path.join(args.src_save_folder, scene_name + '_' + str(int(iso)) + '.jpg') + name_rgb = os.path.join(args.src_save_folder, scene_name + '.jpg') + + if not os.path.exists(os.path.dirname(name_rgb)): + os.makedirs(os.path.dirname(name_rgb)) + + out = out[0] + del noise + torch.cuda.empty_cache() + + img_pro = process_pngs_isp.isp_night_imaging(out, json_cfa, iso, + do_demosaic = True, # H/2 W/2 + + do_channel_gain_white_balance = True, + do_xyz_transform = True, + do_srgb_transform = True, + + do_gamma_correct = True, # con + + do_refinement = True, # 32 bit + do_to_uint8 = True, + + do_resize_using_pil = True, # H/8, W/8 + do_fix_orientation = True + ) + + t1 = time.perf_counter() + inference_time.append(t1-t0) + img_pro = cv2.cvtColor(img_pro, cv2.COLOR_RGB2BGR) + cv2.imwrite(name_rgb, img_pro, [cv2.IMWRITE_PNG_COMPRESSION, 0]) + + print("Inference {} in {:.3f}s".format(scene_name, t1 - t0)) + print(f"Average inference time: {np.mean(inference_time)} seconds") + +if __name__ == '__main__': + main() \ No newline at end of file diff --git a/IIR-Lab/json_all.npy b/IIR-Lab/json_all.npy new file mode 100644 index 0000000000000000000000000000000000000000..f339460cc1f7300a922ea938107908841bd47550 --- /dev/null +++ b/IIR-Lab/json_all.npy @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:33de236d4d9a7e2375fd4677847670201cd913c6d28c8838be9f1fc1edeedd01 +size 3328 diff --git a/IIR-Lab/models/__init__.py b/IIR-Lab/models/__init__.py new file mode 100644 index 0000000000000000000000000000000000000000..8b137891791fe96927ad78e64b0aad7bded08bdc --- /dev/null +++ b/IIR-Lab/models/__init__.py @@ -0,0 +1 @@ + diff --git a/IIR-Lab/models/__pycache__/Ch_loss.cpython-310.pyc b/IIR-Lab/models/__pycache__/Ch_loss.cpython-310.pyc new file mode 100644 index 0000000000000000000000000000000000000000..6a467555ee4dae1a3d36b826c70e0724aae9e96f Binary files /dev/null and b/IIR-Lab/models/__pycache__/Ch_loss.cpython-310.pyc differ diff --git a/IIR-Lab/models/__pycache__/Ch_loss.cpython-312.pyc b/IIR-Lab/models/__pycache__/Ch_loss.cpython-312.pyc new file mode 100644 index 0000000000000000000000000000000000000000..d700e2c5dffb9082ea3cb01f8bea95df1254d11e Binary files /dev/null and b/IIR-Lab/models/__pycache__/Ch_loss.cpython-312.pyc differ diff --git a/IIR-Lab/models/__pycache__/Ch_loss.cpython-39.pyc b/IIR-Lab/models/__pycache__/Ch_loss.cpython-39.pyc new file mode 100644 index 0000000000000000000000000000000000000000..cc50c91473f69893461cb0ce041a7d53c4163d86 Binary files /dev/null and b/IIR-Lab/models/__pycache__/Ch_loss.cpython-39.pyc differ diff --git a/IIR-Lab/models/__pycache__/ISP.cpython-310.pyc b/IIR-Lab/models/__pycache__/ISP.cpython-310.pyc new file mode 100644 index 0000000000000000000000000000000000000000..59bb7df458116aa49821ebfd70ab010c6e4023e1 Binary files /dev/null and b/IIR-Lab/models/__pycache__/ISP.cpython-310.pyc differ diff --git a/IIR-Lab/models/__pycache__/ISP.cpython-312.pyc b/IIR-Lab/models/__pycache__/ISP.cpython-312.pyc new file mode 100644 index 0000000000000000000000000000000000000000..af8f88041c3730007d807023fa779a5f98545360 Binary files /dev/null and b/IIR-Lab/models/__pycache__/ISP.cpython-312.pyc differ diff --git a/IIR-Lab/models/__pycache__/ISP.cpython-39.pyc b/IIR-Lab/models/__pycache__/ISP.cpython-39.pyc new file mode 100644 index 0000000000000000000000000000000000000000..0d657e87fd3c7c5aa0acc7ebee8e8b871200e7a5 Binary files /dev/null and b/IIR-Lab/models/__pycache__/ISP.cpython-39.pyc differ diff --git a/IIR-Lab/models/__pycache__/VGG_loss.cpython-310.pyc b/IIR-Lab/models/__pycache__/VGG_loss.cpython-310.pyc new file mode 100644 index 0000000000000000000000000000000000000000..e488640f478dcbb8a03d6be8e16612396ef02f1f Binary files /dev/null and b/IIR-Lab/models/__pycache__/VGG_loss.cpython-310.pyc differ diff --git a/IIR-Lab/models/__pycache__/VGG_loss.cpython-312.pyc b/IIR-Lab/models/__pycache__/VGG_loss.cpython-312.pyc new file mode 100644 index 0000000000000000000000000000000000000000..e5df221b6cd0a30a2f3f693c4ef2967b579d8ef3 Binary files /dev/null and b/IIR-Lab/models/__pycache__/VGG_loss.cpython-312.pyc differ diff --git a/IIR-Lab/models/__pycache__/VGG_loss.cpython-39.pyc b/IIR-Lab/models/__pycache__/VGG_loss.cpython-39.pyc new file mode 100644 index 0000000000000000000000000000000000000000..14635998c73ff99ebd4e926a28221eb2315fe315 Binary files /dev/null and b/IIR-Lab/models/__pycache__/VGG_loss.cpython-39.pyc differ diff --git a/IIR-Lab/models/__pycache__/__init__.cpython-310.pyc b/IIR-Lab/models/__pycache__/__init__.cpython-310.pyc new file mode 100644 index 0000000000000000000000000000000000000000..5a36f60d4c5f61f4b35528363253d9e0029ea7fb Binary files /dev/null and b/IIR-Lab/models/__pycache__/__init__.cpython-310.pyc differ diff --git a/IIR-Lab/models/__pycache__/__init__.cpython-312.pyc b/IIR-Lab/models/__pycache__/__init__.cpython-312.pyc new file mode 100644 index 0000000000000000000000000000000000000000..3286cd00b5083de052e221c85f326f2bb8e64bef Binary files /dev/null and b/IIR-Lab/models/__pycache__/__init__.cpython-312.pyc differ diff --git a/IIR-Lab/models/__pycache__/__init__.cpython-38.pyc b/IIR-Lab/models/__pycache__/__init__.cpython-38.pyc new file mode 100644 index 0000000000000000000000000000000000000000..068d6a7ee1d09ae9451d092b77704e25a96691a6 Binary files /dev/null and b/IIR-Lab/models/__pycache__/__init__.cpython-38.pyc differ diff --git a/IIR-Lab/models/__pycache__/__init__.cpython-39.pyc b/IIR-Lab/models/__pycache__/__init__.cpython-39.pyc new file mode 100644 index 0000000000000000000000000000000000000000..a80e319a88f0fa6e2607650a22f36230b71153ed Binary files /dev/null and b/IIR-Lab/models/__pycache__/__init__.cpython-39.pyc differ diff --git a/IIR-Lab/models/__pycache__/arch_util.cpython-310.pyc b/IIR-Lab/models/__pycache__/arch_util.cpython-310.pyc new file mode 100644 index 0000000000000000000000000000000000000000..d8b21179565215ac001f47d46a4bea2cddc75bca Binary files /dev/null and b/IIR-Lab/models/__pycache__/arch_util.cpython-310.pyc differ diff --git a/IIR-Lab/models/__pycache__/global_queue_buffer.cpython-310.pyc b/IIR-Lab/models/__pycache__/global_queue_buffer.cpython-310.pyc new file mode 100644 index 0000000000000000000000000000000000000000..71d69d04b50c0c66477fdc7f316aaeb09e2dc6da Binary files /dev/null and b/IIR-Lab/models/__pycache__/global_queue_buffer.cpython-310.pyc differ diff --git a/IIR-Lab/models/__pycache__/loss_util.cpython-310.pyc b/IIR-Lab/models/__pycache__/loss_util.cpython-310.pyc new file mode 100644 index 0000000000000000000000000000000000000000..5669a929bc0f7a9f903dcfaaddea03704196af70 Binary files /dev/null and b/IIR-Lab/models/__pycache__/loss_util.cpython-310.pyc differ diff --git a/IIR-Lab/models/__pycache__/loss_util.cpython-38.pyc b/IIR-Lab/models/__pycache__/loss_util.cpython-38.pyc new file mode 100644 index 0000000000000000000000000000000000000000..2be79af817f437f4d40f79a00c4decd0eadca90a Binary files /dev/null and b/IIR-Lab/models/__pycache__/loss_util.cpython-38.pyc differ diff --git a/IIR-Lab/models/__pycache__/losses.cpython-310.pyc b/IIR-Lab/models/__pycache__/losses.cpython-310.pyc new file mode 100644 index 0000000000000000000000000000000000000000..9c865224fb9f4c2a13432239d3bdab063ca5dcdc Binary files /dev/null and b/IIR-Lab/models/__pycache__/losses.cpython-310.pyc differ diff --git a/IIR-Lab/models/__pycache__/losses.cpython-38.pyc b/IIR-Lab/models/__pycache__/losses.cpython-38.pyc new file mode 100644 index 0000000000000000000000000000000000000000..64e136b619b1ed6d7b571c22a035da780ff06f6d Binary files /dev/null and b/IIR-Lab/models/__pycache__/losses.cpython-38.pyc differ diff --git a/IIR-Lab/models/__pycache__/lr_scheduler.cpython-310.pyc b/IIR-Lab/models/__pycache__/lr_scheduler.cpython-310.pyc new file mode 100644 index 0000000000000000000000000000000000000000..cdaea6b4d8460de6d61f7ad397f5c2c435f67da4 Binary files /dev/null and b/IIR-Lab/models/__pycache__/lr_scheduler.cpython-310.pyc differ diff --git a/IIR-Lab/models/__pycache__/lr_scheduler.cpython-38.pyc b/IIR-Lab/models/__pycache__/lr_scheduler.cpython-38.pyc new file mode 100644 index 0000000000000000000000000000000000000000..aa212b07cd9c37c353f604d553a657d588953c3e Binary files /dev/null and b/IIR-Lab/models/__pycache__/lr_scheduler.cpython-38.pyc differ diff --git a/IIR-Lab/models/__pycache__/modules.cpython-310.pyc b/IIR-Lab/models/__pycache__/modules.cpython-310.pyc new file mode 100644 index 0000000000000000000000000000000000000000..f2af8cd73162ab1c19c5b4f653d0bb98250fd106 Binary files /dev/null and b/IIR-Lab/models/__pycache__/modules.cpython-310.pyc differ diff --git a/IIR-Lab/models/__pycache__/modules.cpython-38.pyc b/IIR-Lab/models/__pycache__/modules.cpython-38.pyc new file mode 100644 index 0000000000000000000000000000000000000000..910f052ecb9c84d7480fa9f17810d437562606ef Binary files /dev/null and b/IIR-Lab/models/__pycache__/modules.cpython-38.pyc differ diff --git a/IIR-Lab/models/__pycache__/trainer.cpython-310.pyc b/IIR-Lab/models/__pycache__/trainer.cpython-310.pyc new file mode 100644 index 0000000000000000000000000000000000000000..27225235a0334627c47e41ff0e0ce31002c84e27 Binary files /dev/null and b/IIR-Lab/models/__pycache__/trainer.cpython-310.pyc differ diff --git a/IIR-Lab/models/__pycache__/trainer.cpython-312.pyc b/IIR-Lab/models/__pycache__/trainer.cpython-312.pyc new file mode 100644 index 0000000000000000000000000000000000000000..6ad1a0f1e0fb060c1804a16b669d776bbab1b39b Binary files /dev/null and b/IIR-Lab/models/__pycache__/trainer.cpython-312.pyc differ diff --git a/IIR-Lab/models/__pycache__/trainer.cpython-38.pyc b/IIR-Lab/models/__pycache__/trainer.cpython-38.pyc new file mode 100644 index 0000000000000000000000000000000000000000..b902be0c04e9a01227c98132343fadd31de5e473 Binary files /dev/null and b/IIR-Lab/models/__pycache__/trainer.cpython-38.pyc differ diff --git a/IIR-Lab/models/__pycache__/trainer.cpython-39.pyc b/IIR-Lab/models/__pycache__/trainer.cpython-39.pyc new file mode 100644 index 0000000000000000000000000000000000000000..68b185a39e05e9301053a5b44dcba2c794d647bc Binary files /dev/null and b/IIR-Lab/models/__pycache__/trainer.cpython-39.pyc differ diff --git a/IIR-Lab/models/__pycache__/trainer_utils.cpython-310.pyc b/IIR-Lab/models/__pycache__/trainer_utils.cpython-310.pyc new file mode 100644 index 0000000000000000000000000000000000000000..63f2d3e7770a035acb044349ae48c23bc2b89775 Binary files /dev/null and b/IIR-Lab/models/__pycache__/trainer_utils.cpython-310.pyc differ diff --git a/IIR-Lab/models/__pycache__/trainer_utils.cpython-312.pyc b/IIR-Lab/models/__pycache__/trainer_utils.cpython-312.pyc new file mode 100644 index 0000000000000000000000000000000000000000..5f21b3a932b367a44eda562e45a5b84505e4e82a Binary files /dev/null and b/IIR-Lab/models/__pycache__/trainer_utils.cpython-312.pyc differ diff --git a/IIR-Lab/models/__pycache__/trainer_utils.cpython-39.pyc b/IIR-Lab/models/__pycache__/trainer_utils.cpython-39.pyc new file mode 100644 index 0000000000000000000000000000000000000000..435503f10dcf207cc9e7ae8907ce9f145ff93529 Binary files /dev/null and b/IIR-Lab/models/__pycache__/trainer_utils.cpython-39.pyc differ diff --git a/IIR-Lab/models/__pycache__/utils.cpython-310.pyc b/IIR-Lab/models/__pycache__/utils.cpython-310.pyc new file mode 100644 index 0000000000000000000000000000000000000000..b7e4fa656b3a4d7ea403a499cb7774672d6843ea Binary files /dev/null and b/IIR-Lab/models/__pycache__/utils.cpython-310.pyc differ diff --git a/IIR-Lab/models/__pycache__/utils.cpython-312.pyc b/IIR-Lab/models/__pycache__/utils.cpython-312.pyc new file mode 100644 index 0000000000000000000000000000000000000000..71fdae49b51e29183c84fede06fd2cdb29bd38f4 Binary files /dev/null and b/IIR-Lab/models/__pycache__/utils.cpython-312.pyc differ diff --git a/IIR-Lab/models/__pycache__/utils.cpython-39.pyc b/IIR-Lab/models/__pycache__/utils.cpython-39.pyc new file mode 100644 index 0000000000000000000000000000000000000000..469bc5dffed2b1931c0623d08b468df385e83a15 Binary files /dev/null and b/IIR-Lab/models/__pycache__/utils.cpython-39.pyc differ diff --git a/IIR-Lab/models/__pycache__/validation_seq_infer.cpython-310.pyc b/IIR-Lab/models/__pycache__/validation_seq_infer.cpython-310.pyc new file mode 100644 index 0000000000000000000000000000000000000000..2505b0759eb275162fb9e1635b9eae1abea0a50e Binary files /dev/null and b/IIR-Lab/models/__pycache__/validation_seq_infer.cpython-310.pyc differ diff --git a/IIR-Lab/models/__pycache__/vgg_model.cpython-310.pyc b/IIR-Lab/models/__pycache__/vgg_model.cpython-310.pyc new file mode 100644 index 0000000000000000000000000000000000000000..40b0004d4841e6d2f098ded2b6cf9227135fd8c8 Binary files /dev/null and b/IIR-Lab/models/__pycache__/vgg_model.cpython-310.pyc differ diff --git a/IIR-Lab/models/__pycache__/vgg_model.cpython-38.pyc b/IIR-Lab/models/__pycache__/vgg_model.cpython-38.pyc new file mode 100644 index 0000000000000000000000000000000000000000..b08782e8905179b9bc90ab7ee9145439c8398efb Binary files /dev/null and b/IIR-Lab/models/__pycache__/vgg_model.cpython-38.pyc differ diff --git a/IIR-Lab/models/arch_util.py b/IIR-Lab/models/arch_util.py new file mode 100644 index 0000000000000000000000000000000000000000..73b6c3f2c73459c6c7a10ae3a9ea3ccea899d045 --- /dev/null +++ b/IIR-Lab/models/arch_util.py @@ -0,0 +1,69 @@ + +import torch +import torchvision +from distutils.version import LooseVersion +from torch import nn as nn +from torch.nn import init as init +from .dcn import ModulatedDeformConvPack, modulated_deform_conv + + +class DCNv2Pack(ModulatedDeformConvPack): + """Modulated deformable conv for deformable alignment. + + Different from the official DCNv2Pack, which generates offsets and masks + from the preceding features, this DCNv2Pack takes another different + features to generate offsets and masks. + + ``Paper: Delving Deep into Deformable Alignment in Video Super-Resolution`` + """ + + def forward(self, x, feat): + out = self.conv_offset(feat) + o1, o2, mask = torch.chunk(out, 3, dim=1) + offset = torch.cat((o1, o2), dim=1) + mask = torch.sigmoid(mask) + + offset_absmean = torch.mean(torch.abs(offset)) + if offset_absmean > 250: + # logger = get_root_logger() + # logger.warning(f'Offset abs mean is {offset_absmean}, larger than 50.') + print(f'Offset abs mean is {offset_absmean}, larger than 50.') + + if LooseVersion(torchvision.__version__) >= LooseVersion('0.9.0'): + return torchvision.ops.deform_conv2d(x, offset, self.weight, self.bias, self.stride, self.padding, + self.dilation, mask) + else: + return modulated_deform_conv(x, offset, mask, self.weight, self.bias, self.stride, self.padding, + self.dilation, self.groups, self.deformable_groups) + + + +class FlowGuidedDCN(ModulatedDeformConvPack): + '''Use other features to generate offsets and masks''' + + + def forward(self, x, feat, flows): + '''input: input features for deformable conv: N, C, H, W. + fea: other features used for generating offsets and mask: N, C, H, W. + flows: N, 2, H, W. + ''' + out = self.conv_offset(feat) + o1, o2, mask = torch.chunk(out, 3, dim=1) + mask = torch.sigmoid(mask) + + offset = torch.tanh(torch.cat((o1, o2), dim=1)) * 15 # max_residue_magnitude + offset = offset + flows.flip(1).repeat(1, offset.size(1)//2, 1, 1) + + offset_mean = torch.mean(torch.abs(offset)) + if offset_mean > 250: + print('FlowGuidedDCN: Offset mean is {}, larger than 100.'.format(offset_mean)) + # offset = offset.clamp(-50, 50) + # return None + + + if LooseVersion(torchvision.__version__) >= LooseVersion('0.9.0'): + return torchvision.ops.deform_conv2d(x, offset, self.weight, self.bias, self.stride, self.padding, + self.dilation, mask) + else: + return modulated_deform_conv(x, offset, mask, self.weight, self.bias, self.stride, self.padding, + self.dilation, self.groups, self.deformable_groups) \ No newline at end of file diff --git a/IIR-Lab/models/archs/NAF_Tiny_arch.py b/IIR-Lab/models/archs/NAF_Tiny_arch.py new file mode 100644 index 0000000000000000000000000000000000000000..ddfebd2c10f20c5641cfd7a76d13843f42c9fc88 --- /dev/null +++ b/IIR-Lab/models/archs/NAF_Tiny_arch.py @@ -0,0 +1,227 @@ +import torch +import torch.nn as nn +# from basicsr.models.archs import recons_video81 as recons_video +# from basicsr.models.archs import flow_pwc82 as flow_pwc +import numpy as np +from torch.nn import functional as F +import torch.utils.checkpoint as checkpoint +from torch.cuda.amp import autocast as autocast +# from .StudentImage_arch import StudentImage +from torch.nn.parallel import DistributedDataParallel +from collections import OrderedDict +from .arch_util import DCNv2Pack +from .common import ResList + +class LayerNormFunction(torch.autograd.Function): + + @staticmethod + def forward(ctx, x, weight, bias, eps): + ctx.eps = eps + N, C, H, W = x.size() + mu = x.mean(1, keepdim=True) + var = (x - mu).pow(2).mean(1, keepdim=True) + y = (x - mu) / (var + eps).sqrt() + ctx.save_for_backward(y, var, weight) + y = weight.view(1, C, 1, 1) * y + bias.view(1, C, 1, 1) + return y + + @staticmethod + def backward(ctx, grad_output): + eps = ctx.eps + + N, C, H, W = grad_output.size() + y, var, weight = ctx.saved_variables + g = grad_output * weight.view(1, C, 1, 1) + mean_g = g.mean(dim=1, keepdim=True) + + mean_gy = (g * y).mean(dim=1, keepdim=True) + gx = 1. / torch.sqrt(var + eps) * (g - y * mean_gy - mean_g) + return gx, (grad_output * y).sum(dim=3).sum(dim=2).sum(dim=0), grad_output.sum(dim=3).sum(dim=2).sum( + dim=0), None + + +class LayerNorm2d(nn.Module): + + def __init__(self, channels, eps=1e-6): + super(LayerNorm2d, self).__init__() + self.register_parameter('weight', nn.Parameter(torch.ones(channels))) + self.register_parameter('bias', nn.Parameter(torch.zeros(channels))) + self.eps = eps + + def forward(self, x): + return LayerNormFunction.apply(x, self.weight, self.bias, self.eps) + +class SimpleGate(nn.Module): + def forward(self, x): + x1, x2 = x.chunk(2, dim=1) + return x1 * x2 + +class NAFBlock(nn.Module): + def __init__(self, c, DW_Expand=2, FFN_Expand=2, drop_out_rate=0.): + super().__init__() + dw_channel = c * DW_Expand + self.conv1 = nn.Conv2d(in_channels=c, out_channels=dw_channel, kernel_size=1, padding=0, stride=1, groups=1, bias=True) + self.conv2 = nn.Conv2d(in_channels=dw_channel, out_channels=dw_channel, kernel_size=3, padding=1, stride=1, groups=dw_channel, + bias=True) + self.conv3 = nn.Conv2d(in_channels=dw_channel // 2, out_channels=c, kernel_size=1, padding=0, stride=1, groups=1, bias=True) + + # Simplified Channel Attention + self.sca = nn.Sequential( + nn.AdaptiveAvgPool2d(1), + nn.Conv2d(in_channels=dw_channel // 2, out_channels=dw_channel // 2, kernel_size=1, padding=0, stride=1, + groups=1, bias=True), + ) + + # SimpleGate + self.sg = SimpleGate() + + ffn_channel = FFN_Expand * c + self.conv4 = nn.Conv2d(in_channels=c, out_channels=ffn_channel, kernel_size=1, padding=0, stride=1, groups=1, bias=True) + self.conv5 = nn.Conv2d(in_channels=ffn_channel // 2, out_channels=c, kernel_size=1, padding=0, stride=1, groups=1, bias=True) + + self.norm1 = LayerNorm2d(c) + self.norm2 = LayerNorm2d(c) + + self.dropout1 = nn.Dropout(drop_out_rate) if drop_out_rate > 0. else nn.Identity() + self.dropout2 = nn.Dropout(drop_out_rate) if drop_out_rate > 0. else nn.Identity() + + self.beta = nn.Parameter(torch.zeros((1, c, 1, 1)), requires_grad=True) + self.gamma = nn.Parameter(torch.zeros((1, c, 1, 1)), requires_grad=True) + + def forward(self, inp): + x = inp + + x = self.norm1(x) + + x = self.conv1(x) + x = self.conv2(x) + x = self.sg(x) + x = x * self.sca(x) + x = self.conv3(x) + + x = self.dropout1(x) + + y = inp + x * self.beta + + x = self.conv4(self.norm2(y)) + x = self.sg(x) + x = self.conv5(x) + + x = self.dropout2(x) + + return y + x * self.gamma + + +class NAF_Tiny(nn.Module): + + def __init__(self,args, img_channel=4, width=64, middle_blk_num=4, enc_blk_nums=[2, 2, 2, 2], dec_blk_nums=[2, 2, 2, 2]): + super().__init__() + + + self.lrelu = nn.LeakyReLU(0.2) + self.convfist = nn.Conv2d(4, 64, 3, 1, 1) + # self.feature_extraction = ResList(5, 64) + + + self.ending = nn.Conv2d(in_channels=width, out_channels=4, kernel_size=3, padding=1, stride=1, groups=1, + bias=True) + + self.encoders = nn.ModuleList() + self.decoders = nn.ModuleList() + self.middle_blks = nn.ModuleList() + self.ups = nn.ModuleList() + self.downs = nn.ModuleList() + + chan = width + for num in enc_blk_nums: + self.encoders.append( + nn.Sequential( + *[NAFBlock(chan) for _ in range(num)] + ) + ) + self.downs.append( + nn.Conv2d(chan, 2*chan, 2, 2) + ) + chan = chan * 2 + + self.middle_blks = \ + nn.Sequential( + *[NAFBlock(chan) for _ in range(middle_blk_num)] + ) + + for num in dec_blk_nums: + self.ups.append( + nn.Sequential( + nn.Conv2d(chan, chan * 2, 1, bias=False), + nn.PixelShuffle(2) + ) + ) + chan = chan // 2 + self.decoders.append( + nn.Sequential( + *[NAFBlock(chan) for _ in range(num)] + ) + ) + + self.padder_size = 2 ** len(self.encoders) # 16 + + def forward(self, x): + B,C,H,W = x.shape + x = self.check_image_size(x) + # B, FC, H, W = x.shape + # F = 1 + # C = 4 + + # three = three.reshape(B, F, C, H, W) + center = x + # three = three.reshape(-1, C, H, W) + + x = self.lrelu(self.convfist(x)) + # x = self.feature_extraction(x) + + + encs = [] + + for encoder, down in zip(self.encoders, self.downs): + x = encoder(x) + encs.append(x) + x = down(x) + + x = self.middle_blks(x) + + for decoder, up, enc_skip in zip(self.decoders, self.ups, encs[::-1]): + x = up(x) + x = x + enc_skip + x = decoder(x) + + x = self.ending(x) + + x = x + center + + return x[:, :, :H, :W] + + def check_image_size(self, x): + _, _, h, w = x.size() + mod_pad_h = (self.padder_size - h % self.padder_size) % self.padder_size + mod_pad_w = (self.padder_size - w % self.padder_size) % self.padder_size + x = F.pad(x, (0, mod_pad_w, 0, mod_pad_h)) + return x + + + + + +def load_networks(network, resume, strict=True): + load_path = resume + + if isinstance(network, nn.DataParallel) or isinstance(network, DistributedDataParallel): #会带有.module + network = network.module #可以当前net的把.module 去掉 + load_net = torch.load(load_path, map_location=torch.device('cuda')) + load_net_clean = OrderedDict() # remove unnecessary 'module.' + for k, v in load_net.items(): #可以把加载的net的把.module 去掉 + if k.startswith('module.'): + load_net_clean[k[7:]] = v + else: + load_net_clean[k] = v + + network.load_state_dict(load_net_clean, strict=True) diff --git a/IIR-Lab/models/archs/NAF_arch.py b/IIR-Lab/models/archs/NAF_arch.py new file mode 100644 index 0000000000000000000000000000000000000000..f8edd0a98e460af16d4c5f130c69fe4a9562d00b --- /dev/null +++ b/IIR-Lab/models/archs/NAF_arch.py @@ -0,0 +1,209 @@ +import torch +import torch.nn as nn +# from basicsr.models.archs import recons_video81 as recons_video +# from basicsr.models.archs import flow_pwc82 as flow_pwc +import numpy as np +from torch.nn import functional as F +import torch.utils.checkpoint as checkpoint +from torch.cuda.amp import autocast as autocast +# from .StudentImage_arch import StudentImage +from torch.nn.parallel import DistributedDataParallel +from collections import OrderedDict +from .arch_util import DCNv2Pack +from .common import ResList + +class LayerNormFunction(torch.autograd.Function): + + @staticmethod + def forward(ctx, x, weight, bias, eps): + ctx.eps = eps + N, C, H, W = x.size() + mu = x.mean(1, keepdim=True) + var = (x - mu).pow(2).mean(1, keepdim=True) + y = (x - mu) / (var + eps).sqrt() + ctx.save_for_backward(y, var, weight) + y = weight.view(1, C, 1, 1) * y + bias.view(1, C, 1, 1) + return y + + @staticmethod + def backward(ctx, grad_output): + eps = ctx.eps + + N, C, H, W = grad_output.size() + y, var, weight = ctx.saved_variables + g = grad_output * weight.view(1, C, 1, 1) + mean_g = g.mean(dim=1, keepdim=True) + + mean_gy = (g * y).mean(dim=1, keepdim=True) + gx = 1. / torch.sqrt(var + eps) * (g - y * mean_gy - mean_g) + return gx, (grad_output * y).sum(dim=3).sum(dim=2).sum(dim=0), grad_output.sum(dim=3).sum(dim=2).sum( + dim=0), None + + +class LayerNorm2d(nn.Module): + + def __init__(self, channels, eps=1e-6): + super(LayerNorm2d, self).__init__() + self.register_parameter('weight', nn.Parameter(torch.ones(channels))) + self.register_parameter('bias', nn.Parameter(torch.zeros(channels))) + self.eps = eps + + def forward(self, x): + return LayerNormFunction.apply(x, self.weight, self.bias, self.eps) + +class SimpleGate(nn.Module): + def forward(self, x): + x1, x2 = x.chunk(2, dim=1) + return x1 * x2 + +class NAFBlock(nn.Module): + def __init__(self, c, DW_Expand=2, FFN_Expand=2, drop_out_rate=0.): + super().__init__() + dw_channel = c * DW_Expand + self.conv1 = nn.Conv2d(in_channels=c, out_channels=dw_channel, kernel_size=1, padding=0, stride=1, groups=1, bias=True) + self.conv2 = nn.Conv2d(in_channels=dw_channel, out_channels=dw_channel, kernel_size=3, padding=1, stride=1, groups=dw_channel, + bias=True) + self.conv3 = nn.Conv2d(in_channels=dw_channel // 2, out_channels=c, kernel_size=1, padding=0, stride=1, groups=1, bias=True) + + # Simplified Channel Attention + self.sca = nn.Sequential( + nn.AdaptiveAvgPool2d(1), + nn.Conv2d(in_channels=dw_channel // 2, out_channels=dw_channel // 2, kernel_size=1, padding=0, stride=1, + groups=1, bias=True), + ) + + # SimpleGate + self.sg = SimpleGate() + + ffn_channel = FFN_Expand * c + self.conv4 = nn.Conv2d(in_channels=c, out_channels=ffn_channel, kernel_size=1, padding=0, stride=1, groups=1, bias=True) + self.conv5 = nn.Conv2d(in_channels=ffn_channel // 2, out_channels=c, kernel_size=1, padding=0, stride=1, groups=1, bias=True) + + self.norm1 = LayerNorm2d(c) + self.norm2 = LayerNorm2d(c) + + self.dropout1 = nn.Dropout(drop_out_rate) if drop_out_rate > 0. else nn.Identity() + self.dropout2 = nn.Dropout(drop_out_rate) if drop_out_rate > 0. else nn.Identity() + + self.beta = nn.Parameter(torch.zeros((1, c, 1, 1)), requires_grad=True) + self.gamma = nn.Parameter(torch.zeros((1, c, 1, 1)), requires_grad=True) + + def forward(self, inp): + x = inp + + x = self.norm1(x) + + x = self.conv1(x) + x = self.conv2(x) + x = self.sg(x) + x = x * self.sca(x) + x = self.conv3(x) + + x = self.dropout1(x) + + y = inp + x * self.beta + + x = self.conv4(self.norm2(y)) + x = self.sg(x) + x = self.conv5(x) + + x = self.dropout2(x) + + return y + x * self.gamma + + +class NAF_Video(nn.Module): + + def __init__(self,args, img_channel=4, width=64, middle_blk_num=12, enc_blk_nums=[2, 2, 4, 8], dec_blk_nums=[2, 2, 2, 2]): + super().__init__() + + self.lrelu = nn.LeakyReLU(0.2) + self.convfist = nn.Conv2d(4, 64, 3, 1, 1) + self.feature_extraction = ResList(5, 64) + + self.ending = nn.Conv2d(in_channels=width, out_channels=4, kernel_size=3, padding=1, stride=1, groups=1, + bias=True) + + self.encoders = nn.ModuleList() + self.decoders = nn.ModuleList() + self.middle_blks = nn.ModuleList() + self.ups = nn.ModuleList() + self.downs = nn.ModuleList() + + chan = width + for num in enc_blk_nums: + self.encoders.append( + nn.Sequential( + *[NAFBlock(chan) for _ in range(num)] + ) + ) + self.downs.append( + nn.Conv2d(chan, 2*chan, 2, 2) + ) + chan = chan * 2 + + self.middle_blks = \ + nn.Sequential( + *[NAFBlock(chan) for _ in range(middle_blk_num)] + ) + + for num in dec_blk_nums: + self.ups.append( + nn.Sequential( + nn.Conv2d(chan, chan * 2, 1, bias=False), + nn.PixelShuffle(2) + ) + ) + chan = chan // 2 + self.decoders.append( + nn.Sequential( + *[NAFBlock(chan) for _ in range(num)] + ) + ) + + self.padder_size = 2 ** len(self.encoders) # 16 + + def forward(self, x): + + center = x + + x = self.lrelu(self.convfist(x)) + x = self.feature_extraction(x) + + encs = [] + + + for encoder, down in zip(self.encoders, self.downs): + x = encoder(x) + encs.append(x) + x = down(x) + + x = self.middle_blks(x) + + + for decoder, up, enc_skip in zip(self.decoders, self.ups, encs[::-1]): + x = up(x) + x = x + enc_skip + x = decoder(x) + + x = self.ending(x) + + x = x + center + + return x + + +def load_networks(network, resume, strict=True): + load_path = resume + + if isinstance(network, nn.DataParallel) or isinstance(network, DistributedDataParallel): #会带有.module + network = network.module #可以当前net的把.module 去掉 + load_net = torch.load(load_path, map_location=torch.device('cuda')) + load_net_clean = OrderedDict() # remove unnecessary 'module.' + for k, v in load_net.items(): #可以把加载的net的把.module 去掉 + if k.startswith('module.'): + load_net_clean[k[7:]] = v + else: + load_net_clean[k] = v + + network.load_state_dict(load_net_clean, strict=True) diff --git a/IIR-Lab/models/archs/__pycache__/BSVDTset_arch.cpython-310.pyc b/IIR-Lab/models/archs/__pycache__/BSVDTset_arch.cpython-310.pyc new file mode 100644 index 0000000000000000000000000000000000000000..08aa4cac79f65d7d9ac637eb72d37da64447ebca Binary files /dev/null and b/IIR-Lab/models/archs/__pycache__/BSVDTset_arch.cpython-310.pyc differ diff --git a/IIR-Lab/models/archs/__pycache__/BSVD_arch.cpython-310.pyc b/IIR-Lab/models/archs/__pycache__/BSVD_arch.cpython-310.pyc new file mode 100644 index 0000000000000000000000000000000000000000..0a13d7433791847246e242da1eefb30aad30725b Binary files /dev/null and b/IIR-Lab/models/archs/__pycache__/BSVD_arch.cpython-310.pyc differ diff --git a/IIR-Lab/models/archs/__pycache__/ConvTrans_Block.cpython-310.pyc b/IIR-Lab/models/archs/__pycache__/ConvTrans_Block.cpython-310.pyc new file mode 100644 index 0000000000000000000000000000000000000000..26f5e5437f0dfdb63c8ce2d8976c793fabd49023 Binary files /dev/null and b/IIR-Lab/models/archs/__pycache__/ConvTrans_Block.cpython-310.pyc differ diff --git a/IIR-Lab/models/archs/__pycache__/ConvTrans_Block.cpython-38.pyc b/IIR-Lab/models/archs/__pycache__/ConvTrans_Block.cpython-38.pyc new file mode 100644 index 0000000000000000000000000000000000000000..9972f9ae7353b6896ac305a62f8e74582c9881cd Binary files /dev/null and b/IIR-Lab/models/archs/__pycache__/ConvTrans_Block.cpython-38.pyc differ diff --git a/IIR-Lab/models/archs/__pycache__/ConvTrans_Encoder.cpython-310.pyc b/IIR-Lab/models/archs/__pycache__/ConvTrans_Encoder.cpython-310.pyc new file mode 100644 index 0000000000000000000000000000000000000000..29cdae5a784721e8be943a6bb3e51ecbee12b863 Binary files /dev/null and b/IIR-Lab/models/archs/__pycache__/ConvTrans_Encoder.cpython-310.pyc differ diff --git a/IIR-Lab/models/archs/__pycache__/ConvTrans_Encoder.cpython-38.pyc b/IIR-Lab/models/archs/__pycache__/ConvTrans_Encoder.cpython-38.pyc new file mode 100644 index 0000000000000000000000000000000000000000..3583b31571d924d35eae949af94bc49a83d63632 Binary files /dev/null and b/IIR-Lab/models/archs/__pycache__/ConvTrans_Encoder.cpython-38.pyc differ diff --git a/IIR-Lab/models/archs/__pycache__/KFDLSR_arch.cpython-310.pyc b/IIR-Lab/models/archs/__pycache__/KFDLSR_arch.cpython-310.pyc new file mode 100644 index 0000000000000000000000000000000000000000..fed2c21faf0331aba272f13449f9d8e348f0b1e0 Binary files /dev/null and b/IIR-Lab/models/archs/__pycache__/KFDLSR_arch.cpython-310.pyc differ diff --git a/IIR-Lab/models/archs/__pycache__/MFD_3_64_arch.cpython-310.pyc b/IIR-Lab/models/archs/__pycache__/MFD_3_64_arch.cpython-310.pyc new file mode 100644 index 0000000000000000000000000000000000000000..16920e3ddfff7f97208c7d362dd7c46cf19d4ab9 Binary files /dev/null and b/IIR-Lab/models/archs/__pycache__/MFD_3_64_arch.cpython-310.pyc differ diff --git a/IIR-Lab/models/archs/__pycache__/MFD_3_64_arch.cpython-39.pyc b/IIR-Lab/models/archs/__pycache__/MFD_3_64_arch.cpython-39.pyc new file mode 100644 index 0000000000000000000000000000000000000000..36e2d1fe4f330d30676a77c52cda7158e90dee07 Binary files /dev/null and b/IIR-Lab/models/archs/__pycache__/MFD_3_64_arch.cpython-39.pyc differ diff --git a/IIR-Lab/models/archs/__pycache__/NAF_Tiny_arch.cpython-39.pyc b/IIR-Lab/models/archs/__pycache__/NAF_Tiny_arch.cpython-39.pyc new file mode 100644 index 0000000000000000000000000000000000000000..130719b76344d4aea4f724dcaca03ece9281c89b Binary files /dev/null and b/IIR-Lab/models/archs/__pycache__/NAF_Tiny_arch.cpython-39.pyc differ diff --git a/IIR-Lab/models/archs/__pycache__/NAF_Video_arch.cpython-310.pyc b/IIR-Lab/models/archs/__pycache__/NAF_Video_arch.cpython-310.pyc new file mode 100644 index 0000000000000000000000000000000000000000..6fd8ae047091935b26293a787d315a84756d319c Binary files /dev/null and b/IIR-Lab/models/archs/__pycache__/NAF_Video_arch.cpython-310.pyc differ diff --git a/IIR-Lab/models/archs/__pycache__/NAF_Video_arch.cpython-312.pyc b/IIR-Lab/models/archs/__pycache__/NAF_Video_arch.cpython-312.pyc new file mode 100644 index 0000000000000000000000000000000000000000..2177257056626813a4df13eb11a5d2e0b40b83a8 Binary files /dev/null and b/IIR-Lab/models/archs/__pycache__/NAF_Video_arch.cpython-312.pyc differ diff --git a/IIR-Lab/models/archs/__pycache__/NAF_Video_arch.cpython-39.pyc b/IIR-Lab/models/archs/__pycache__/NAF_Video_arch.cpython-39.pyc new file mode 100644 index 0000000000000000000000000000000000000000..e6d92c66b05423a0f56e366fa5cc3beb716cd6e4 Binary files /dev/null and b/IIR-Lab/models/archs/__pycache__/NAF_Video_arch.cpython-39.pyc differ diff --git a/IIR-Lab/models/archs/__pycache__/NAF_arch.cpython-312.pyc b/IIR-Lab/models/archs/__pycache__/NAF_arch.cpython-312.pyc new file mode 100644 index 0000000000000000000000000000000000000000..4507054e4b006623cf51d3843d671a915dace2d0 Binary files /dev/null and b/IIR-Lab/models/archs/__pycache__/NAF_arch.cpython-312.pyc differ diff --git a/IIR-Lab/models/archs/__pycache__/NAF_arch.cpython-39.pyc b/IIR-Lab/models/archs/__pycache__/NAF_arch.cpython-39.pyc new file mode 100644 index 0000000000000000000000000000000000000000..fe6def88b3d2346abee3145a0472b04a405d1dc1 Binary files /dev/null and b/IIR-Lab/models/archs/__pycache__/NAF_arch.cpython-39.pyc differ diff --git a/IIR-Lab/models/archs/__pycache__/RCAN.cpython-310.pyc b/IIR-Lab/models/archs/__pycache__/RCAN.cpython-310.pyc new file mode 100644 index 0000000000000000000000000000000000000000..e4b8458d9a88f6fc802aef029daad638813f3435 Binary files /dev/null and b/IIR-Lab/models/archs/__pycache__/RCAN.cpython-310.pyc differ diff --git a/IIR-Lab/models/archs/__pycache__/RCAN.cpython-38.pyc b/IIR-Lab/models/archs/__pycache__/RCAN.cpython-38.pyc new file mode 100644 index 0000000000000000000000000000000000000000..0996b97c9ed07b3771f78ac0a788b30323580f8e Binary files /dev/null and b/IIR-Lab/models/archs/__pycache__/RCAN.cpython-38.pyc differ diff --git a/IIR-Lab/models/archs/__pycache__/RCAN_arch.cpython-310.pyc b/IIR-Lab/models/archs/__pycache__/RCAN_arch.cpython-310.pyc new file mode 100644 index 0000000000000000000000000000000000000000..a22a5012bda0009424760f4cbfe7f79efceecc7b Binary files /dev/null and b/IIR-Lab/models/archs/__pycache__/RCAN_arch.cpython-310.pyc differ diff --git a/IIR-Lab/models/archs/__pycache__/RES_CA.cpython-310.pyc b/IIR-Lab/models/archs/__pycache__/RES_CA.cpython-310.pyc new file mode 100644 index 0000000000000000000000000000000000000000..b9f3b9e65f2860b17e3abb704f66d586e5c45360 Binary files /dev/null and b/IIR-Lab/models/archs/__pycache__/RES_CA.cpython-310.pyc differ diff --git a/IIR-Lab/models/archs/__pycache__/RES_CA.cpython-38.pyc b/IIR-Lab/models/archs/__pycache__/RES_CA.cpython-38.pyc new file mode 100644 index 0000000000000000000000000000000000000000..69137931fa7d24a6aa9e1c34507c33390df4afcc Binary files /dev/null and b/IIR-Lab/models/archs/__pycache__/RES_CA.cpython-38.pyc differ diff --git a/IIR-Lab/models/archs/__pycache__/SelfDZSR_util.cpython-310.pyc b/IIR-Lab/models/archs/__pycache__/SelfDZSR_util.cpython-310.pyc new file mode 100644 index 0000000000000000000000000000000000000000..7f1d16074c22f5464511899a829a765dc5fa33c6 Binary files /dev/null and b/IIR-Lab/models/archs/__pycache__/SelfDZSR_util.cpython-310.pyc differ diff --git a/IIR-Lab/models/archs/__pycache__/SelfDZSR_util.cpython-38.pyc b/IIR-Lab/models/archs/__pycache__/SelfDZSR_util.cpython-38.pyc new file mode 100644 index 0000000000000000000000000000000000000000..6a55108dd4ce020dc6b9172cb7db3583481698a7 Binary files /dev/null and b/IIR-Lab/models/archs/__pycache__/SelfDZSR_util.cpython-38.pyc differ diff --git a/IIR-Lab/models/archs/__pycache__/StudentHalf_arch.cpython-310.pyc b/IIR-Lab/models/archs/__pycache__/StudentHalf_arch.cpython-310.pyc new file mode 100644 index 0000000000000000000000000000000000000000..ed50805dbb3f8747f9b8be2bdb86b80c6732ca20 Binary files /dev/null and b/IIR-Lab/models/archs/__pycache__/StudentHalf_arch.cpython-310.pyc differ diff --git a/IIR-Lab/models/archs/__pycache__/StudentImage_arch.cpython-310.pyc b/IIR-Lab/models/archs/__pycache__/StudentImage_arch.cpython-310.pyc new file mode 100644 index 0000000000000000000000000000000000000000..c416079258ea2250f6884c1122ae467ed9f1b07f Binary files /dev/null and b/IIR-Lab/models/archs/__pycache__/StudentImage_arch.cpython-310.pyc differ diff --git a/IIR-Lab/models/archs/__pycache__/StudentImage_arch.cpython-39.pyc b/IIR-Lab/models/archs/__pycache__/StudentImage_arch.cpython-39.pyc new file mode 100644 index 0000000000000000000000000000000000000000..d28f2931fd1581e94d79c1f5095b27cb3bd9e3fa Binary files /dev/null and b/IIR-Lab/models/archs/__pycache__/StudentImage_arch.cpython-39.pyc differ diff --git a/IIR-Lab/models/archs/__pycache__/StudentTest_arch.cpython-310.pyc b/IIR-Lab/models/archs/__pycache__/StudentTest_arch.cpython-310.pyc new file mode 100644 index 0000000000000000000000000000000000000000..9be50db63e2f5ac5c07cfddaed23ea01d5a34acc Binary files /dev/null and b/IIR-Lab/models/archs/__pycache__/StudentTest_arch.cpython-310.pyc differ diff --git a/IIR-Lab/models/archs/__pycache__/Student_arch.cpython-310.pyc b/IIR-Lab/models/archs/__pycache__/Student_arch.cpython-310.pyc new file mode 100644 index 0000000000000000000000000000000000000000..c3750258cf10e319712e2ac7db3c6fa6c102c2a5 Binary files /dev/null and b/IIR-Lab/models/archs/__pycache__/Student_arch.cpython-310.pyc differ diff --git a/IIR-Lab/models/archs/__pycache__/Student_arch.cpython-39.pyc b/IIR-Lab/models/archs/__pycache__/Student_arch.cpython-39.pyc new file mode 100644 index 0000000000000000000000000000000000000000..6b6f95bde963f4aec2ab45fa401d8000f6b5a18c Binary files /dev/null and b/IIR-Lab/models/archs/__pycache__/Student_arch.cpython-39.pyc differ diff --git a/IIR-Lab/models/archs/__pycache__/UNet_NAF_arch.cpython-310.pyc b/IIR-Lab/models/archs/__pycache__/UNet_NAF_arch.cpython-310.pyc new file mode 100644 index 0000000000000000000000000000000000000000..0c996abebab142f9267604a468557e102f0f30f0 Binary files /dev/null and b/IIR-Lab/models/archs/__pycache__/UNet_NAF_arch.cpython-310.pyc differ diff --git a/IIR-Lab/models/archs/__pycache__/UNet_NAF_arch.cpython-312.pyc b/IIR-Lab/models/archs/__pycache__/UNet_NAF_arch.cpython-312.pyc new file mode 100644 index 0000000000000000000000000000000000000000..8a36f7f0208aaed5755df1a780ce6398f5404fd3 Binary files /dev/null and b/IIR-Lab/models/archs/__pycache__/UNet_NAF_arch.cpython-312.pyc differ diff --git a/IIR-Lab/models/archs/__pycache__/UNet_S_arch.cpython-310.pyc b/IIR-Lab/models/archs/__pycache__/UNet_S_arch.cpython-310.pyc new file mode 100644 index 0000000000000000000000000000000000000000..896cd1157d8f9f7024cf6b31f2c312c2b446d1ce Binary files /dev/null and b/IIR-Lab/models/archs/__pycache__/UNet_S_arch.cpython-310.pyc differ diff --git a/IIR-Lab/models/archs/__pycache__/UNet_ZXS4_arch.cpython-310.pyc b/IIR-Lab/models/archs/__pycache__/UNet_ZXS4_arch.cpython-310.pyc new file mode 100644 index 0000000000000000000000000000000000000000..7083a1d9515895052f6538fea2a1732160448912 Binary files /dev/null and b/IIR-Lab/models/archs/__pycache__/UNet_ZXS4_arch.cpython-310.pyc differ diff --git a/IIR-Lab/models/archs/__pycache__/UNet_ZXS4_arch.cpython-312.pyc b/IIR-Lab/models/archs/__pycache__/UNet_ZXS4_arch.cpython-312.pyc new file mode 100644 index 0000000000000000000000000000000000000000..b60590f512d3b2a5441a406d3f98a2a74e80bf94 Binary files /dev/null and b/IIR-Lab/models/archs/__pycache__/UNet_ZXS4_arch.cpython-312.pyc differ diff --git a/IIR-Lab/models/archs/__pycache__/UNet_ZXS4_arch.cpython-39.pyc b/IIR-Lab/models/archs/__pycache__/UNet_ZXS4_arch.cpython-39.pyc new file mode 100644 index 0000000000000000000000000000000000000000..f01935b119e24fde172fbe86f78d201d8b076121 Binary files /dev/null and b/IIR-Lab/models/archs/__pycache__/UNet_ZXS4_arch.cpython-39.pyc differ diff --git a/IIR-Lab/models/archs/__pycache__/UNet_ZXS5_arch.cpython-39.pyc b/IIR-Lab/models/archs/__pycache__/UNet_ZXS5_arch.cpython-39.pyc new file mode 100644 index 0000000000000000000000000000000000000000..c7d413d744af9d829cbc75a5bff1f33b2f6de93c Binary files /dev/null and b/IIR-Lab/models/archs/__pycache__/UNet_ZXS5_arch.cpython-39.pyc differ diff --git a/IIR-Lab/models/archs/__pycache__/UNet_baseS1_arch.cpython-310.pyc b/IIR-Lab/models/archs/__pycache__/UNet_baseS1_arch.cpython-310.pyc new file mode 100644 index 0000000000000000000000000000000000000000..fdb3b09b0515770147eda24f2e72eb4ee4c1eca2 Binary files /dev/null and b/IIR-Lab/models/archs/__pycache__/UNet_baseS1_arch.cpython-310.pyc differ diff --git a/IIR-Lab/models/archs/__pycache__/UNet_baseS2_arch.cpython-310.pyc b/IIR-Lab/models/archs/__pycache__/UNet_baseS2_arch.cpython-310.pyc new file mode 100644 index 0000000000000000000000000000000000000000..a4e6f77572bb95dfff4602ba320a6671a74505b7 Binary files /dev/null and b/IIR-Lab/models/archs/__pycache__/UNet_baseS2_arch.cpython-310.pyc differ diff --git a/IIR-Lab/models/archs/__pycache__/UNet_base_arch.cpython-310.pyc b/IIR-Lab/models/archs/__pycache__/UNet_base_arch.cpython-310.pyc new file mode 100644 index 0000000000000000000000000000000000000000..0c8079f65b0480462b36b709ddfc93ef2d1bf6fe Binary files /dev/null and b/IIR-Lab/models/archs/__pycache__/UNet_base_arch.cpython-310.pyc differ diff --git a/IIR-Lab/models/archs/__pycache__/Unet_M_F_arch.cpython-310.pyc b/IIR-Lab/models/archs/__pycache__/Unet_M_F_arch.cpython-310.pyc new file mode 100644 index 0000000000000000000000000000000000000000..2203a09fc64638104fe54179ec56dc84e2f36b85 Binary files /dev/null and b/IIR-Lab/models/archs/__pycache__/Unet_M_F_arch.cpython-310.pyc differ diff --git a/IIR-Lab/models/archs/__pycache__/Unet_M_F_arch.cpython-312.pyc b/IIR-Lab/models/archs/__pycache__/Unet_M_F_arch.cpython-312.pyc new file mode 100644 index 0000000000000000000000000000000000000000..520a8dccc896c296b73258d7b92416f2e4286b9d Binary files /dev/null and b/IIR-Lab/models/archs/__pycache__/Unet_M_F_arch.cpython-312.pyc differ diff --git a/IIR-Lab/models/archs/__pycache__/Unet_M_F_arch.cpython-39.pyc b/IIR-Lab/models/archs/__pycache__/Unet_M_F_arch.cpython-39.pyc new file mode 100644 index 0000000000000000000000000000000000000000..bf8f4a1ad57c6f0c3678b39ab8dd12ae0c847f15 Binary files /dev/null and b/IIR-Lab/models/archs/__pycache__/Unet_M_F_arch.cpython-39.pyc differ diff --git a/IIR-Lab/models/archs/__pycache__/Unetseeindark.cpython-39.pyc b/IIR-Lab/models/archs/__pycache__/Unetseeindark.cpython-39.pyc new file mode 100644 index 0000000000000000000000000000000000000000..784d4362183c88d65fd6eca16f138ebba645f86b Binary files /dev/null and b/IIR-Lab/models/archs/__pycache__/Unetseeindark.cpython-39.pyc differ diff --git a/IIR-Lab/models/archs/__pycache__/V10_arch.cpython-310.pyc b/IIR-Lab/models/archs/__pycache__/V10_arch.cpython-310.pyc new file mode 100644 index 0000000000000000000000000000000000000000..43f03129690c7d4baf5d9031e94499d02f2bb486 Binary files /dev/null and b/IIR-Lab/models/archs/__pycache__/V10_arch.cpython-310.pyc differ diff --git a/IIR-Lab/models/archs/__pycache__/V10_arch.cpython-38.pyc b/IIR-Lab/models/archs/__pycache__/V10_arch.cpython-38.pyc new file mode 100644 index 0000000000000000000000000000000000000000..f1eb5feef31a6f50b69449c2ead7c996d47a760c Binary files /dev/null and b/IIR-Lab/models/archs/__pycache__/V10_arch.cpython-38.pyc differ diff --git a/IIR-Lab/models/archs/__pycache__/V11_unaligned_arch.cpython-310.pyc b/IIR-Lab/models/archs/__pycache__/V11_unaligned_arch.cpython-310.pyc new file mode 100644 index 0000000000000000000000000000000000000000..a03f27343d50c7fda6479683e7f626bb8351328c Binary files /dev/null and b/IIR-Lab/models/archs/__pycache__/V11_unaligned_arch.cpython-310.pyc differ diff --git a/IIR-Lab/models/archs/__pycache__/V11_unaligned_arch.cpython-38.pyc b/IIR-Lab/models/archs/__pycache__/V11_unaligned_arch.cpython-38.pyc new file mode 100644 index 0000000000000000000000000000000000000000..6a4e7278c4f54385eb6b8b35391fb520533ecee2 Binary files /dev/null and b/IIR-Lab/models/archs/__pycache__/V11_unaligned_arch.cpython-38.pyc differ diff --git a/IIR-Lab/models/archs/__pycache__/V12_unaligned_arch.cpython-310.pyc b/IIR-Lab/models/archs/__pycache__/V12_unaligned_arch.cpython-310.pyc new file mode 100644 index 0000000000000000000000000000000000000000..7ba0a843290c44d75e822f8f2d8b9b33517a232c Binary files /dev/null and b/IIR-Lab/models/archs/__pycache__/V12_unaligned_arch.cpython-310.pyc differ diff --git a/IIR-Lab/models/archs/__pycache__/V1_arch.cpython-310.pyc b/IIR-Lab/models/archs/__pycache__/V1_arch.cpython-310.pyc new file mode 100644 index 0000000000000000000000000000000000000000..4de49781cd63f8b669ae5ae78792a73d72787cc9 Binary files /dev/null and b/IIR-Lab/models/archs/__pycache__/V1_arch.cpython-310.pyc differ diff --git a/IIR-Lab/models/archs/__pycache__/V21_arch.cpython-310.pyc b/IIR-Lab/models/archs/__pycache__/V21_arch.cpython-310.pyc new file mode 100644 index 0000000000000000000000000000000000000000..fd218e945fc553dae4e1f729f733325cfb83c544 Binary files /dev/null and b/IIR-Lab/models/archs/__pycache__/V21_arch.cpython-310.pyc differ diff --git a/IIR-Lab/models/archs/__pycache__/V21_arch.cpython-38.pyc b/IIR-Lab/models/archs/__pycache__/V21_arch.cpython-38.pyc new file mode 100644 index 0000000000000000000000000000000000000000..4b23580175242a3b7deea00bfbc4bd66943a2fe5 Binary files /dev/null and b/IIR-Lab/models/archs/__pycache__/V21_arch.cpython-38.pyc differ diff --git a/IIR-Lab/models/archs/__pycache__/V22_arch.cpython-310.pyc b/IIR-Lab/models/archs/__pycache__/V22_arch.cpython-310.pyc new file mode 100644 index 0000000000000000000000000000000000000000..9557f96a2525fc76a1a2fd383dd156f350f8dc49 Binary files /dev/null and b/IIR-Lab/models/archs/__pycache__/V22_arch.cpython-310.pyc differ diff --git a/IIR-Lab/models/archs/__pycache__/V23_arch.cpython-310.pyc b/IIR-Lab/models/archs/__pycache__/V23_arch.cpython-310.pyc new file mode 100644 index 0000000000000000000000000000000000000000..71834b0f08148a4da06e2c6057aacfcd3297900e Binary files /dev/null and b/IIR-Lab/models/archs/__pycache__/V23_arch.cpython-310.pyc differ diff --git a/IIR-Lab/models/archs/__pycache__/V24_arch.cpython-310.pyc b/IIR-Lab/models/archs/__pycache__/V24_arch.cpython-310.pyc new file mode 100644 index 0000000000000000000000000000000000000000..16abe6d0ea085801af2a2e72627ee072fca7cec5 Binary files /dev/null and b/IIR-Lab/models/archs/__pycache__/V24_arch.cpython-310.pyc differ diff --git a/IIR-Lab/models/archs/__pycache__/V24_arch.cpython-38.pyc b/IIR-Lab/models/archs/__pycache__/V24_arch.cpython-38.pyc new file mode 100644 index 0000000000000000000000000000000000000000..70dc983f748c59572c7953aba01db0c5b5c4b057 Binary files /dev/null and b/IIR-Lab/models/archs/__pycache__/V24_arch.cpython-38.pyc differ diff --git a/IIR-Lab/models/archs/__pycache__/V25L_arch.cpython-310.pyc b/IIR-Lab/models/archs/__pycache__/V25L_arch.cpython-310.pyc new file mode 100644 index 0000000000000000000000000000000000000000..b6b3323a9eea25b6631bfd0ccc4df2acb67a0712 Binary files /dev/null and b/IIR-Lab/models/archs/__pycache__/V25L_arch.cpython-310.pyc differ diff --git a/IIR-Lab/models/archs/__pycache__/V25L_arch.cpython-38.pyc b/IIR-Lab/models/archs/__pycache__/V25L_arch.cpython-38.pyc new file mode 100644 index 0000000000000000000000000000000000000000..fb363cf1ca49023838f25b44201a5d13cd0abc12 Binary files /dev/null and b/IIR-Lab/models/archs/__pycache__/V25L_arch.cpython-38.pyc differ diff --git a/IIR-Lab/models/archs/__pycache__/V25_arch.cpython-310.pyc b/IIR-Lab/models/archs/__pycache__/V25_arch.cpython-310.pyc new file mode 100644 index 0000000000000000000000000000000000000000..894aac2ce226ce03b14f14c5f7dfe2c09e4c21a2 Binary files /dev/null and b/IIR-Lab/models/archs/__pycache__/V25_arch.cpython-310.pyc differ diff --git a/IIR-Lab/models/archs/__pycache__/V25_arch.cpython-38.pyc b/IIR-Lab/models/archs/__pycache__/V25_arch.cpython-38.pyc new file mode 100644 index 0000000000000000000000000000000000000000..f0c5330d8d8767624db77d76e6a6e24fb1b16259 Binary files /dev/null and b/IIR-Lab/models/archs/__pycache__/V25_arch.cpython-38.pyc differ diff --git a/IIR-Lab/models/archs/__pycache__/V26_arch.cpython-310.pyc b/IIR-Lab/models/archs/__pycache__/V26_arch.cpython-310.pyc new file mode 100644 index 0000000000000000000000000000000000000000..74d5c2901f19b8fcc18c7b89484bac5f6beece4b Binary files /dev/null and b/IIR-Lab/models/archs/__pycache__/V26_arch.cpython-310.pyc differ diff --git a/IIR-Lab/models/archs/__pycache__/V26_arch.cpython-38.pyc b/IIR-Lab/models/archs/__pycache__/V26_arch.cpython-38.pyc new file mode 100644 index 0000000000000000000000000000000000000000..12451c8dddb6e0797fdf7f2f3bd055cc767b9c20 Binary files /dev/null and b/IIR-Lab/models/archs/__pycache__/V26_arch.cpython-38.pyc differ diff --git a/IIR-Lab/models/archs/__pycache__/V27_arch.cpython-310.pyc b/IIR-Lab/models/archs/__pycache__/V27_arch.cpython-310.pyc new file mode 100644 index 0000000000000000000000000000000000000000..ed2412c7ce5172023e87ac7392e182482f3cad37 Binary files /dev/null and b/IIR-Lab/models/archs/__pycache__/V27_arch.cpython-310.pyc differ diff --git a/IIR-Lab/models/archs/__pycache__/V29_arch.cpython-310.pyc b/IIR-Lab/models/archs/__pycache__/V29_arch.cpython-310.pyc new file mode 100644 index 0000000000000000000000000000000000000000..c80fcbddbce573a94ddca7b22649616d64f264a5 Binary files /dev/null and b/IIR-Lab/models/archs/__pycache__/V29_arch.cpython-310.pyc differ diff --git a/IIR-Lab/models/archs/__pycache__/V29_arch.cpython-38.pyc b/IIR-Lab/models/archs/__pycache__/V29_arch.cpython-38.pyc new file mode 100644 index 0000000000000000000000000000000000000000..1c549505cfdd51176ee7001138e28f81d63d43db Binary files /dev/null and b/IIR-Lab/models/archs/__pycache__/V29_arch.cpython-38.pyc differ diff --git a/IIR-Lab/models/archs/__pycache__/V2_arch.cpython-310.pyc b/IIR-Lab/models/archs/__pycache__/V2_arch.cpython-310.pyc new file mode 100644 index 0000000000000000000000000000000000000000..d3bebbacb1ee9270927623618d7ad3016137ace1 Binary files /dev/null and b/IIR-Lab/models/archs/__pycache__/V2_arch.cpython-310.pyc differ diff --git a/IIR-Lab/models/archs/__pycache__/V30CF_arch.cpython-310.pyc b/IIR-Lab/models/archs/__pycache__/V30CF_arch.cpython-310.pyc new file mode 100644 index 0000000000000000000000000000000000000000..adc0cd81bfcc140035012e98016093cf2cff84b4 Binary files /dev/null and b/IIR-Lab/models/archs/__pycache__/V30CF_arch.cpython-310.pyc differ diff --git a/IIR-Lab/models/archs/__pycache__/V30_arch.cpython-310.pyc b/IIR-Lab/models/archs/__pycache__/V30_arch.cpython-310.pyc new file mode 100644 index 0000000000000000000000000000000000000000..4e6dea9c83457eb871c3fae5cc01818c4c5f256a Binary files /dev/null and b/IIR-Lab/models/archs/__pycache__/V30_arch.cpython-310.pyc differ diff --git a/IIR-Lab/models/archs/__pycache__/V30_arch.cpython-38.pyc b/IIR-Lab/models/archs/__pycache__/V30_arch.cpython-38.pyc new file mode 100644 index 0000000000000000000000000000000000000000..acaa502c2df96d2843417313f25f5f9897c7f587 Binary files /dev/null and b/IIR-Lab/models/archs/__pycache__/V30_arch.cpython-38.pyc differ diff --git a/IIR-Lab/models/archs/__pycache__/V31D111_arch.cpython-310.pyc b/IIR-Lab/models/archs/__pycache__/V31D111_arch.cpython-310.pyc new file mode 100644 index 0000000000000000000000000000000000000000..08ebfbe2ed7471f2f3b7dd1647ae746a462f31a3 Binary files /dev/null and b/IIR-Lab/models/archs/__pycache__/V31D111_arch.cpython-310.pyc differ diff --git a/IIR-Lab/models/archs/__pycache__/V31D111_arch.cpython-38.pyc b/IIR-Lab/models/archs/__pycache__/V31D111_arch.cpython-38.pyc new file mode 100644 index 0000000000000000000000000000000000000000..5d9d1133ed519dbe3b08268402e6094b958999df Binary files /dev/null and b/IIR-Lab/models/archs/__pycache__/V31D111_arch.cpython-38.pyc differ diff --git a/IIR-Lab/models/archs/__pycache__/V31DD_arch.cpython-310.pyc b/IIR-Lab/models/archs/__pycache__/V31DD_arch.cpython-310.pyc new file mode 100644 index 0000000000000000000000000000000000000000..9f1b2a528ffe42103017c6def9a796301e9b7bef Binary files /dev/null and b/IIR-Lab/models/archs/__pycache__/V31DD_arch.cpython-310.pyc differ diff --git a/IIR-Lab/models/archs/__pycache__/V31DD_arch.cpython-38.pyc b/IIR-Lab/models/archs/__pycache__/V31DD_arch.cpython-38.pyc new file mode 100644 index 0000000000000000000000000000000000000000..5968b39f4cc4e74bd72391065e2e6a0f1d85ebca Binary files /dev/null and b/IIR-Lab/models/archs/__pycache__/V31DD_arch.cpython-38.pyc differ diff --git a/IIR-Lab/models/archs/__pycache__/V31DL_arch.cpython-310.pyc b/IIR-Lab/models/archs/__pycache__/V31DL_arch.cpython-310.pyc new file mode 100644 index 0000000000000000000000000000000000000000..4f8494cb7658a94738e5fda7265551dcf214553d Binary files /dev/null and b/IIR-Lab/models/archs/__pycache__/V31DL_arch.cpython-310.pyc differ diff --git a/IIR-Lab/models/archs/__pycache__/V31DL_arch.cpython-38.pyc b/IIR-Lab/models/archs/__pycache__/V31DL_arch.cpython-38.pyc new file mode 100644 index 0000000000000000000000000000000000000000..9400d23082b87d85634a55893c0a8d685de0de39 Binary files /dev/null and b/IIR-Lab/models/archs/__pycache__/V31DL_arch.cpython-38.pyc differ diff --git a/IIR-Lab/models/archs/__pycache__/V31DS1_arch.cpython-310.pyc b/IIR-Lab/models/archs/__pycache__/V31DS1_arch.cpython-310.pyc new file mode 100644 index 0000000000000000000000000000000000000000..4977a77cee0e76c2968738e660260868e29b8860 Binary files /dev/null and b/IIR-Lab/models/archs/__pycache__/V31DS1_arch.cpython-310.pyc differ diff --git a/IIR-Lab/models/archs/__pycache__/V31DS28_arch.cpython-310.pyc b/IIR-Lab/models/archs/__pycache__/V31DS28_arch.cpython-310.pyc new file mode 100644 index 0000000000000000000000000000000000000000..64975ccf1f1f6302324132ca2864c256111d7847 Binary files /dev/null and b/IIR-Lab/models/archs/__pycache__/V31DS28_arch.cpython-310.pyc differ diff --git a/IIR-Lab/models/archs/__pycache__/V31DS2_arch.cpython-310.pyc b/IIR-Lab/models/archs/__pycache__/V31DS2_arch.cpython-310.pyc new file mode 100644 index 0000000000000000000000000000000000000000..464fe372bc2d44ff0860fa1146114bf3f3c92294 Binary files /dev/null and b/IIR-Lab/models/archs/__pycache__/V31DS2_arch.cpython-310.pyc differ diff --git a/IIR-Lab/models/archs/__pycache__/V31DS3_arch.cpython-310.pyc b/IIR-Lab/models/archs/__pycache__/V31DS3_arch.cpython-310.pyc new file mode 100644 index 0000000000000000000000000000000000000000..c648d31fb322a47776bb017da31029f64f9f7674 Binary files /dev/null and b/IIR-Lab/models/archs/__pycache__/V31DS3_arch.cpython-310.pyc differ diff --git a/IIR-Lab/models/archs/__pycache__/V31DS4_arch.cpython-310.pyc b/IIR-Lab/models/archs/__pycache__/V31DS4_arch.cpython-310.pyc new file mode 100644 index 0000000000000000000000000000000000000000..50175e3440bf673611b66ce93317698a066a5aa9 Binary files /dev/null and b/IIR-Lab/models/archs/__pycache__/V31DS4_arch.cpython-310.pyc differ diff --git a/IIR-Lab/models/archs/__pycache__/V31DS5_arch.cpython-310.pyc b/IIR-Lab/models/archs/__pycache__/V31DS5_arch.cpython-310.pyc new file mode 100644 index 0000000000000000000000000000000000000000..469822788184c313b661bc1ab48b8b8ac12dbfb2 Binary files /dev/null and b/IIR-Lab/models/archs/__pycache__/V31DS5_arch.cpython-310.pyc differ diff --git a/IIR-Lab/models/archs/__pycache__/V31D_S1_arch.cpython-310.pyc b/IIR-Lab/models/archs/__pycache__/V31D_S1_arch.cpython-310.pyc new file mode 100644 index 0000000000000000000000000000000000000000..90a53e99dd284f1e0172762ff905e876c8409894 Binary files /dev/null and b/IIR-Lab/models/archs/__pycache__/V31D_S1_arch.cpython-310.pyc differ diff --git a/IIR-Lab/models/archs/__pycache__/V31D_arch.cpython-310.pyc b/IIR-Lab/models/archs/__pycache__/V31D_arch.cpython-310.pyc new file mode 100644 index 0000000000000000000000000000000000000000..d176d1e519105944ca15a83ae224104db2c81a1e Binary files /dev/null and b/IIR-Lab/models/archs/__pycache__/V31D_arch.cpython-310.pyc differ diff --git a/IIR-Lab/models/archs/__pycache__/V31D_arch.cpython-38.pyc b/IIR-Lab/models/archs/__pycache__/V31D_arch.cpython-38.pyc new file mode 100644 index 0000000000000000000000000000000000000000..c1e192f6e63a75b40a7be25080f44c647c42f925 Binary files /dev/null and b/IIR-Lab/models/archs/__pycache__/V31D_arch.cpython-38.pyc differ diff --git a/IIR-Lab/models/archs/__pycache__/V31_arch.cpython-310.pyc b/IIR-Lab/models/archs/__pycache__/V31_arch.cpython-310.pyc new file mode 100644 index 0000000000000000000000000000000000000000..6756bfd2767d8269f9464eda6bbbaabb52c59eba Binary files /dev/null and b/IIR-Lab/models/archs/__pycache__/V31_arch.cpython-310.pyc differ diff --git a/IIR-Lab/models/archs/__pycache__/V31_arch.cpython-38.pyc b/IIR-Lab/models/archs/__pycache__/V31_arch.cpython-38.pyc new file mode 100644 index 0000000000000000000000000000000000000000..0ce8c47be1837e6c1b547f591d3db417682d2d4c Binary files /dev/null and b/IIR-Lab/models/archs/__pycache__/V31_arch.cpython-38.pyc differ diff --git a/IIR-Lab/models/archs/__pycache__/arch_util.cpython-310.pyc b/IIR-Lab/models/archs/__pycache__/arch_util.cpython-310.pyc new file mode 100644 index 0000000000000000000000000000000000000000..9d13cbf55a9d4249622ed23037f084eca595b122 Binary files /dev/null and b/IIR-Lab/models/archs/__pycache__/arch_util.cpython-310.pyc differ diff --git a/IIR-Lab/models/archs/__pycache__/arch_util.cpython-312.pyc b/IIR-Lab/models/archs/__pycache__/arch_util.cpython-312.pyc new file mode 100644 index 0000000000000000000000000000000000000000..bbe9d8c5bbefc55da258fcf8fc3df80efe05073b Binary files /dev/null and b/IIR-Lab/models/archs/__pycache__/arch_util.cpython-312.pyc differ diff --git a/IIR-Lab/models/archs/__pycache__/arch_util.cpython-38.pyc b/IIR-Lab/models/archs/__pycache__/arch_util.cpython-38.pyc new file mode 100644 index 0000000000000000000000000000000000000000..2e132ceee76cb953dae93b68aa02ddf30307de27 Binary files /dev/null and b/IIR-Lab/models/archs/__pycache__/arch_util.cpython-38.pyc differ diff --git a/IIR-Lab/models/archs/__pycache__/arch_util.cpython-39.pyc b/IIR-Lab/models/archs/__pycache__/arch_util.cpython-39.pyc new file mode 100644 index 0000000000000000000000000000000000000000..54a51066e9225609372712fe9148ea24b4647c53 Binary files /dev/null and b/IIR-Lab/models/archs/__pycache__/arch_util.cpython-39.pyc differ diff --git a/IIR-Lab/models/archs/__pycache__/common.cpython-310.pyc b/IIR-Lab/models/archs/__pycache__/common.cpython-310.pyc new file mode 100644 index 0000000000000000000000000000000000000000..5299e23334689edb262dda1f30dc408710e34b79 Binary files /dev/null and b/IIR-Lab/models/archs/__pycache__/common.cpython-310.pyc differ diff --git a/IIR-Lab/models/archs/__pycache__/common.cpython-312.pyc b/IIR-Lab/models/archs/__pycache__/common.cpython-312.pyc new file mode 100644 index 0000000000000000000000000000000000000000..59e4046e3b89b17e39ca68321181d775f1bc8f72 Binary files /dev/null and b/IIR-Lab/models/archs/__pycache__/common.cpython-312.pyc differ diff --git a/IIR-Lab/models/archs/__pycache__/common.cpython-38.pyc b/IIR-Lab/models/archs/__pycache__/common.cpython-38.pyc new file mode 100644 index 0000000000000000000000000000000000000000..a4430df0eab097d587ee259ee488434228b1e301 Binary files /dev/null and b/IIR-Lab/models/archs/__pycache__/common.cpython-38.pyc differ diff --git a/IIR-Lab/models/archs/__pycache__/common.cpython-39.pyc b/IIR-Lab/models/archs/__pycache__/common.cpython-39.pyc new file mode 100644 index 0000000000000000000000000000000000000000..c379e132b2319d6edd446fbeacac8696ac50eda9 Binary files /dev/null and b/IIR-Lab/models/archs/__pycache__/common.cpython-39.pyc differ diff --git a/IIR-Lab/models/archs/__pycache__/common_RCAN.cpython-310.pyc b/IIR-Lab/models/archs/__pycache__/common_RCAN.cpython-310.pyc new file mode 100644 index 0000000000000000000000000000000000000000..0e7034876e3d4050b11e9c940267012798335945 Binary files /dev/null and b/IIR-Lab/models/archs/__pycache__/common_RCAN.cpython-310.pyc differ diff --git a/IIR-Lab/models/archs/__pycache__/common_RCAN.cpython-38.pyc b/IIR-Lab/models/archs/__pycache__/common_RCAN.cpython-38.pyc new file mode 100644 index 0000000000000000000000000000000000000000..2ea21f92e475d3f9017776de4f1ae197d08d7ff1 Binary files /dev/null and b/IIR-Lab/models/archs/__pycache__/common_RCAN.cpython-38.pyc differ diff --git a/IIR-Lab/models/archs/__pycache__/component.cpython-310.pyc b/IIR-Lab/models/archs/__pycache__/component.cpython-310.pyc new file mode 100644 index 0000000000000000000000000000000000000000..e3e5eb7f1266d5a1186a860e88a6e6d9e1382931 Binary files /dev/null and b/IIR-Lab/models/archs/__pycache__/component.cpython-310.pyc differ diff --git a/IIR-Lab/models/archs/__pycache__/matching.cpython-310.pyc b/IIR-Lab/models/archs/__pycache__/matching.cpython-310.pyc new file mode 100644 index 0000000000000000000000000000000000000000..f76688f174aa9f04ab6d63942a0c3909650ba39c Binary files /dev/null and b/IIR-Lab/models/archs/__pycache__/matching.cpython-310.pyc differ diff --git a/IIR-Lab/models/archs/__pycache__/matching.cpython-38.pyc b/IIR-Lab/models/archs/__pycache__/matching.cpython-38.pyc new file mode 100644 index 0000000000000000000000000000000000000000..b4161f6dea56dafd8f69a6b2a998ad5a8145c367 Binary files /dev/null and b/IIR-Lab/models/archs/__pycache__/matching.cpython-38.pyc differ diff --git a/IIR-Lab/models/archs/__pycache__/spynet.cpython-310.pyc b/IIR-Lab/models/archs/__pycache__/spynet.cpython-310.pyc new file mode 100644 index 0000000000000000000000000000000000000000..5660697a7c368fccc0d2944ce27c76488609f4ae Binary files /dev/null and b/IIR-Lab/models/archs/__pycache__/spynet.cpython-310.pyc differ diff --git a/IIR-Lab/models/archs/__pycache__/spynet.cpython-38.pyc b/IIR-Lab/models/archs/__pycache__/spynet.cpython-38.pyc new file mode 100644 index 0000000000000000000000000000000000000000..855af46342764ffe3aa69d969eebe67f2092d069 Binary files /dev/null and b/IIR-Lab/models/archs/__pycache__/spynet.cpython-38.pyc differ diff --git a/IIR-Lab/models/archs/__pycache__/spynet_flow.cpython-310.pyc b/IIR-Lab/models/archs/__pycache__/spynet_flow.cpython-310.pyc new file mode 100644 index 0000000000000000000000000000000000000000..45580bf039edf34ef2a745096c789c27bd1b935f Binary files /dev/null and b/IIR-Lab/models/archs/__pycache__/spynet_flow.cpython-310.pyc differ diff --git a/IIR-Lab/models/archs/arch_util.py b/IIR-Lab/models/archs/arch_util.py new file mode 100644 index 0000000000000000000000000000000000000000..73b6c3f2c73459c6c7a10ae3a9ea3ccea899d045 --- /dev/null +++ b/IIR-Lab/models/archs/arch_util.py @@ -0,0 +1,69 @@ + +import torch +import torchvision +from distutils.version import LooseVersion +from torch import nn as nn +from torch.nn import init as init +from .dcn import ModulatedDeformConvPack, modulated_deform_conv + + +class DCNv2Pack(ModulatedDeformConvPack): + """Modulated deformable conv for deformable alignment. + + Different from the official DCNv2Pack, which generates offsets and masks + from the preceding features, this DCNv2Pack takes another different + features to generate offsets and masks. + + ``Paper: Delving Deep into Deformable Alignment in Video Super-Resolution`` + """ + + def forward(self, x, feat): + out = self.conv_offset(feat) + o1, o2, mask = torch.chunk(out, 3, dim=1) + offset = torch.cat((o1, o2), dim=1) + mask = torch.sigmoid(mask) + + offset_absmean = torch.mean(torch.abs(offset)) + if offset_absmean > 250: + # logger = get_root_logger() + # logger.warning(f'Offset abs mean is {offset_absmean}, larger than 50.') + print(f'Offset abs mean is {offset_absmean}, larger than 50.') + + if LooseVersion(torchvision.__version__) >= LooseVersion('0.9.0'): + return torchvision.ops.deform_conv2d(x, offset, self.weight, self.bias, self.stride, self.padding, + self.dilation, mask) + else: + return modulated_deform_conv(x, offset, mask, self.weight, self.bias, self.stride, self.padding, + self.dilation, self.groups, self.deformable_groups) + + + +class FlowGuidedDCN(ModulatedDeformConvPack): + '''Use other features to generate offsets and masks''' + + + def forward(self, x, feat, flows): + '''input: input features for deformable conv: N, C, H, W. + fea: other features used for generating offsets and mask: N, C, H, W. + flows: N, 2, H, W. + ''' + out = self.conv_offset(feat) + o1, o2, mask = torch.chunk(out, 3, dim=1) + mask = torch.sigmoid(mask) + + offset = torch.tanh(torch.cat((o1, o2), dim=1)) * 15 # max_residue_magnitude + offset = offset + flows.flip(1).repeat(1, offset.size(1)//2, 1, 1) + + offset_mean = torch.mean(torch.abs(offset)) + if offset_mean > 250: + print('FlowGuidedDCN: Offset mean is {}, larger than 100.'.format(offset_mean)) + # offset = offset.clamp(-50, 50) + # return None + + + if LooseVersion(torchvision.__version__) >= LooseVersion('0.9.0'): + return torchvision.ops.deform_conv2d(x, offset, self.weight, self.bias, self.stride, self.padding, + self.dilation, mask) + else: + return modulated_deform_conv(x, offset, mask, self.weight, self.bias, self.stride, self.padding, + self.dilation, self.groups, self.deformable_groups) \ No newline at end of file diff --git a/IIR-Lab/models/archs/common.py b/IIR-Lab/models/archs/common.py new file mode 100644 index 0000000000000000000000000000000000000000..b2e029c48bc8c716ef34e10de96e6c3c9019bbd7 --- /dev/null +++ b/IIR-Lab/models/archs/common.py @@ -0,0 +1,498 @@ +import torch +import torch.nn as nn +import torch.nn.functional as F +from torch.nn import init as init +from torch.nn.modules.batchnorm import _BatchNorm +import matplotlib.pyplot as plt + + +def default_conv(in_channels, out_channels, kernel_size,stride=1, bias=True): + return nn.Conv2d( + in_channels, out_channels, kernel_size, + padding=(kernel_size//2),stride=stride, bias=bias) + +def conv1x1(in_channels, out_channels, stride=1): + return nn.Conv2d(in_channels, out_channels, kernel_size=1, + stride=stride, padding=0, bias=True) + +def conv3x3(in_channels, out_channels, stride=1): + return nn.Conv2d(in_channels, out_channels, kernel_size=3, + stride=stride, padding=1, bias=True) + +def conv5x5(in_channels, out_channels, stride=1): + return nn.Conv2d(in_channels, out_channels, kernel_size=5, + stride=stride, padding=2, bias=True) + +def make_layer(basic_block, num_basic_block, **kwarg): + """Make layers by stacking the same blocks. + + Args: + basic_block (nn.module): nn.module class for basic block. + num_basic_block (int): number of blocks. + + Returns: + nn.Sequential: Stacked blocks in nn.Sequential. + """ + layers = [] + for _ in range(num_basic_block): + layers.append(basic_block(**kwarg)) + return nn.Sequential(*layers) #30个 (0): ResidualBlockNoBN( + +class RBNoBN(nn.Module): + """Residual block without BN. + + Args: + num_feat (int): Channel number of intermediate features. + Default: 64. + res_scale (float): Residual scale. Default: 1. + pytorch_init (bool): If set to True, use pytorch default init, + otherwise, use default_init_weights. Default: False. + """ + + def __init__(self, num_feat=64, res_scale=1, pytorch_init=False): + super(RBNoBN, self).__init__() + self.res_scale = res_scale + self.conv1 = nn.Conv2d(num_feat, num_feat, 3, 1, 1, bias=True) + self.conv2 = nn.Conv2d(num_feat, num_feat, 3, 1, 1, bias=True) + self.relu = nn.ReLU(inplace=True) + + if not pytorch_init: + default_init_weights([self.conv1, self.conv2], 0.1) + + def forward(self, x): + identity = x + out = self.conv2(self.relu(self.conv1(x))) + return identity + out * self.res_scale + +class ResBlock(nn.Module): + def __init__(self, in_channels, out_channels, stride=1, downsample=None, res_scale=1): + super(ResBlock, self).__init__() + self.res_scale = res_scale + self.conv1 = conv3x3(in_channels, out_channels, stride) + self.relu = nn.LeakyReLU(0.2, inplace=True) + self.conv2 = conv3x3(out_channels, out_channels) + + def forward(self, x): + x1 = x + out = self.conv1(x) + out = self.relu(out) + out = self.conv2(out) + out = out * self.res_scale + x1 + return out + +# class ConvResidualBlocks(nn.Module): +# """Conv and residual block used in BasicVSR. + +# Args: +# num_in_ch (int): Number of input channels. Default: 3. +# num_out_ch (int): Number of output channels. Default: 64. +# num_block (int): Number of residual blocks. Default: 15. +# """ + +# def __init__(self, num_in_ch=3, num_out_ch=64, num_block=15): +# super().__init__() +# self.main = nn.Sequential( +# nn.Conv2d(num_in_ch, num_out_ch, 3, 1, 1, bias=True), nn.LeakyReLU(negative_slope=0.2, inplace=True), +# make_layer(RBNoBN, num_block, num_feat=num_out_ch)) + +# def forward(self, fea): +# return self.main(fea) + +class Encoder_input(nn.Module): + def __init__(self, num_res_blocks, n_feats, img_channel, res_scale=1): + super(Encoder_input, self).__init__() + self.num_res_blocks = num_res_blocks + self.conv_head = conv3x3(img_channel, n_feats) + + self.RBs = nn.ModuleList() + for i in range(self.num_res_blocks): + self.RBs.append(ResBlock(in_channels=n_feats, out_channels=n_feats, + res_scale=res_scale)) + + self.conv_tail = conv3x3(n_feats, n_feats) + self.relu = nn.LeakyReLU(0.2, inplace=True) + + def forward(self, x): + x = self.relu(self.conv_head(x)) + x1 = x + for i in range(self.num_res_blocks): + x = self.RBs[i](x) + x = self.conv_tail(x) + x = x + x1 + return x + + + + +class ResList(nn.Module): + def __init__(self, num_res_blocks, n_feats, res_scale=1): + super(ResList, self).__init__() + self.num_res_blocks = num_res_blocks + + self.RBs = nn.ModuleList() + for i in range(self.num_res_blocks): + self.RBs.append(ResBlock(in_channels=n_feats, out_channels=n_feats)) + + self.conv_tail = conv3x3(n_feats, n_feats) + + def forward(self, x): + x1 = x + for i in range(self.num_res_blocks): + x = self.RBs[i](x) + x = self.conv_tail(x) + x = x + x1 + return x + + +class Res_Attention_List(nn.Module): + def __init__(self, num_res_blocks, n_feats, res_scale=1): + super(Res_Attention_List, self).__init__() + self.num_res_blocks = num_res_blocks + + self.RBs = nn.ModuleList() + for i in range(self.num_res_blocks): + self.RBs.append(Res_Attention(in_channels=n_feats, out_channels=n_feats)) + + self.conv_tail = conv3x3(n_feats, n_feats) + + def forward(self, x): + x1 = x + for i in range(self.num_res_blocks): + x = self.RBs[i](x) + x = self.conv_tail(x) + x = x + x1 + return x + + +class PixelShufflePack(nn.Module): + """ Pixel Shuffle upsample layer. + + Args: + in_channels (int): Number of input channels. + out_channels (int): Number of output channels. + scale_factor (int): Upsample ratio. + upsample_kernel (int): Kernel size of Conv layer to expand channels. + + Returns: + Upsampled feature map. + """ + + def __init__(self, in_channels, out_channels, scale_factor, + upsample_kernel): + super().__init__() + self.in_channels = in_channels + self.out_channels = out_channels + self.scale_factor = scale_factor + self.upsample_kernel = upsample_kernel + self.upsample_conv = nn.Conv2d( + self.in_channels, + self.out_channels * scale_factor * scale_factor, + self.upsample_kernel, + padding=(self.upsample_kernel - 1) // 2) + self.init_weights() + + def init_weights(self): + """Initialize weights for PixelShufflePack. + """ + default_init_weights(self, 1) + + def forward(self, x): + """Forward function for PixelShufflePack. + + Args: + x (Tensor): Input tensor with shape (n, c, h, w). + + Returns: + Tensor: Forward results. + """ + x = self.upsample_conv(x) + x = F.pixel_shuffle(x, self.scale_factor) + return x + +class BasicBlock(nn.Sequential): + def __init__( + self, conv, in_channels, out_channels, kernel_size, stride=1, bias=True, + bn=False,In=False,act=nn.PReLU()): + + m = [conv(in_channels, out_channels, kernel_size, stride=stride, bias=bias)] + if bn: + m.append(nn.BatchNorm2d(out_channels)) + if In: + m.append(nn.InstanceNorm2d(out_channels)) + if act is not None: + m.append(act) + + super(BasicBlock, self).__init__(*m) + +class MeanShift(nn.Conv2d): + def __init__(self, rgb_range, rgb_mean, rgb_std, sign=-1): + super(MeanShift, self).__init__(3, 3, kernel_size=1) + std = torch.Tensor(rgb_std) + self.weight.data = torch.eye(3).view(3, 3, 1, 1) + self.weight.data.div_(std.view(3, 1, 1, 1)) + self.bias.data = sign * rgb_range * torch.Tensor(rgb_mean) + self.bias.data.div_(std) + + self.weight.requires_grad = False + self.bias.requires_grad = False + +def flow_warp(x, flow, interp_mode='bilinear', padding_mode='zeros', align_corners=True): + """Warp an image or feature map with optical flow. + + Args: + x (Tensor): Tensor with size (n, c, h, w). + flow (Tensor): Tensor with size (n, h, w, 2), normal value. + interp_mode (str): 'nearest' or 'bilinear'. Default: 'bilinear'. + padding_mode (str): 'zeros' or 'border' or 'reflection'. + Default: 'zeros'. + align_corners (bool): Before pytorch 1.3, the default value is + align_corners=True. After pytorch 1.3, the default value is + align_corners=False. Here, we use the True as default. + + Returns: + Tensor: Warped image or feature map. + """ + assert x.size()[-2:] == flow.size()[1:3] + _, _, h, w = x.size() + # create mesh grid + grid_y, grid_x = torch.meshgrid(torch.arange(0, h).type_as(x), torch.arange(0, w).type_as(x)) + grid = torch.stack((grid_x, grid_y), 2).float() # W(x), H(y), 2 + grid.requires_grad = False + + vgrid = grid + flow + # scale grid to [-1,1] + vgrid_x = 2.0 * vgrid[:, :, :, 0] / max(w - 1, 1) - 1.0 + vgrid_y = 2.0 * vgrid[:, :, :, 1] / max(h - 1, 1) - 1.0 + vgrid_scaled = torch.stack((vgrid_x, vgrid_y), dim=3) + output = F.grid_sample(x, vgrid_scaled, mode=interp_mode, padding_mode=padding_mode, align_corners=align_corners) + + # TODO, what if align_corners=False + return output + +@torch.no_grad() +def default_init_weights(module_list, scale=1, bias_fill=0, **kwargs): + """Initialize network weights. + + Args: + module_list (list[nn.Module] | nn.Module): Modules to be initialized. + scale (float): Scale initialized weights, especially for residual + blocks. Default: 1. + bias_fill (float): The value to fill bias. Default: 0 + kwargs (dict): Other arguments for initialization function. + """ + if not isinstance(module_list, list): + module_list = [module_list] + for module in module_list: + for m in module.modules(): + if isinstance(m, nn.Conv2d): + init.kaiming_normal_(m.weight, **kwargs) + m.weight.data *= scale + if m.bias is not None: + m.bias.data.fill_(bias_fill) + elif isinstance(m, nn.Linear): + init.kaiming_normal_(m.weight, **kwargs) + m.weight.data *= scale + if m.bias is not None: + m.bias.data.fill_(bias_fill) + elif isinstance(m, _BatchNorm): + init.constant_(m.weight, 1) + if m.bias is not None: + m.bias.data.fill_(bias_fill) + + + +class ChannelPool(nn.Module): + def forward(self, x): #是一个元祖 第一个是最大值 第二个是坐标 所以要[0] + return torch.cat((torch.max(x,1)[0].unsqueeze(1), torch.mean(x,1).unsqueeze(1)), dim=1 ) + + +## Channel Attention (CA) Layer +class CALayer(nn.Module): + def __init__(self, channel, reduction=16): + super(CALayer, self).__init__() + # global average pooling: feature --> point + self.avg_pool = nn.AdaptiveAvgPool2d(1) + # feature channel downscale and upscale --> channel weight + self.conv_du = nn.Sequential( + nn.Conv2d(channel, channel // reduction, 1, padding=0, bias=True), + nn.ReLU(inplace=True), + nn.Conv2d(channel // reduction, channel, 1, padding=0, bias=True), + nn.Sigmoid() + ) + + def forward(self, x): + y = self.avg_pool(x) + y = self.conv_du(y) + return x * y + + +class SpatialGate(nn.Module): + def __init__(self): + super(SpatialGate, self).__init__() + kernel_size = 7 + self.compress = ChannelPool() + # self.spatial = BasicConv(2, 1, kernel_size, stride=1, padding=(kernel_size-1) // 2, relu=False) + self.spatial = nn.Conv2d(2, 1, 7, 1, 3) + self.sigmoid = nn.Sigmoid() + + def forward(self, x): + x_compress = self.compress(x) #torch.Size([4, 2, 64, 64]) + x_out = F.relu(self.spatial(x_compress)) + # import pdb + # pdb.set_trace() + scale = self.sigmoid(x_out)# broadcasting + return x * scale + + +class Res_Attention_Conf(nn.Module): + def __init__(self, in_channels, out_channels, stride=1, downsample=None, res_scale=1, SA=False, CA=False): + super(Res_Attention_Conf, self).__init__() + + conv=default_conv + + + self.res_scale = res_scale + self.conv1 = conv3x3(in_channels, out_channels, stride) + self.relu = nn.LeakyReLU(0.2, inplace=True) + self.conv2 = conv3x3(out_channels, out_channels) + self.channel_attention = CALayer(out_channels, reduction=16) + self.spatial_attention = SpatialGate() + # self.conv3 = conv3x3(out_channels, out_channels) + self.CA = CA + self.SA = SA + + def forward(self, x): + + + x1 = x + out = self.relu(self.conv1(x)) + + if self.SA: + out = self.spatial_attention(out) + out = out + + if self.CA: + out = self.channel_attention(out) + + out = self.relu(self.conv2(out)) + # out = self.conv3(out) + + out = out * self.res_scale + x1 + return out + + + + +class Res_CA_Block(nn.Module): + def __init__(self, in_channels, out_channels, stride=1, res_scale=1, CA=False): + super(Res_CA_Block, self).__init__() + + # conv=default_conv + self.res_scale = res_scale + self.conv1 = conv3x3(in_channels, out_channels, stride) + self.relu = nn.LeakyReLU(0.2, inplace=True) + self.conv2 = conv3x3(out_channels, out_channels) + self.channel_attention = CALayer(out_channels, reduction=16) + + # self.conv3 = conv3x3(out_channels, out_channels) + self.CA = CA + + + def forward(self, x): + x1 = x + out = self.relu(self.conv1(x)) + if self.CA: + out = self.channel_attention(out) + + out = self.relu(self.conv2(out)) + # out = self.conv3(out) + + out = out * self.res_scale + x1 + return out + + +class Res_Attention_List(nn.Module): + def __init__(self, num_res_blocks, n_feats, res_scale=1): + super(Res_Attention_List, self).__init__() + self.num_res_blocks = num_res_blocks + + self.RBs = nn.ModuleList() + for i in range(self.num_res_blocks): + self.RBs.append(Res_CA_Block(in_channels=n_feats, out_channels=n_feats)) + + self.conv_tail = conv3x3(n_feats, n_feats) + + def forward(self, x): + x1 = x + for i in range(self.num_res_blocks): + x = self.RBs[i](x) + x = self.conv_tail(x) + x = x + x1 + return x + + + +class Res_Attention(nn.Module): + def __init__(self, in_channels, out_channels, stride=1, downsample=None, res_scale=1, SA=False, CA=False): + super(Res_Attention, self).__init__() + self.res_scale = res_scale + self.conv1 = conv3x3(in_channels, out_channels, stride) + self.relu = nn.LeakyReLU(0.2, inplace=True) + self.conv2 = conv3x3(out_channels, out_channels) + self.channel_attention = CALayer(out_channels, reduction=16) + self.spatial_attention = SpatialGate() + # self.conv3 = conv3x3(out_channels, out_channels) + self.CA = CA + self.SA = SA + + def forward(self, x): + x1 = x + out = self.relu(self.conv1(x)) + + if self.SA: + out = self.spatial_attention(out) + + if self.CA: + out = self.channel_attention(out) + + out = self.relu(self.conv2(out)) + # out = self.conv3(out) + + out = out * self.res_scale + x1 + return out + + + + + + + +def record(fea, path): + fea = fea[0][0] + mean = fea.mean() + std = fea.std() + + fea_norm = (fea- mean)/std + + # fea = (fea.cpu().numpy()*255).round().astype(np.uint8) + fea_norm = fea_norm.detach().cpu().numpy() + # cv2.imwrite(path, fea_norm) + + plt.imsave(path, fea_norm, cmap = 'gray') + pass + + + +def record2(fea, path): + fea = fea[0][0] + mean = fea.mean() + std = fea.std() + + fea_norm = (fea- mean)/std + + # fea = (fea.cpu().numpy()*255).round().astype(np.uint8) + fea_norm = fea_norm.detach().cpu().numpy() + # cv2.imwrite(path, fea_norm) + + plt.imsave(path, fea_norm, cmap = 'gray') + pass \ No newline at end of file diff --git a/IIR-Lab/models/archs/component.py b/IIR-Lab/models/archs/component.py new file mode 100644 index 0000000000000000000000000000000000000000..559994843e212daafcb13226fca679361a3a758d --- /dev/null +++ b/IIR-Lab/models/archs/component.py @@ -0,0 +1,384 @@ +import torch +import torch.nn as nn +import torch.nn.functional as F +from torch.nn import init as init +from torch.nn.modules.batchnorm import _BatchNorm +import matplotlib.pyplot as plt + + +class CvBlock(nn.Module): + '''(Conv2d => BN => ReLU) x 2''' + def __init__(self, in_ch, out_ch): + super(CvBlock, self).__init__() + self.convblock = nn.Sequential( + nn.Conv2d(in_ch, out_ch, kernel_size=3, padding=1, bias=False), + nn.BatchNorm2d(out_ch), + nn.ReLU(inplace=True), + nn.Conv2d(out_ch, out_ch, kernel_size=3, padding=1, bias=False), + nn.BatchNorm2d(out_ch), + nn.ReLU(inplace=True) + ) + + def forward(self, x): + return self.convblock(x) + + +class InputCvBlock(nn.Module): + '''(Conv with num_in_frames groups => BN => ReLU) + (Conv => BN => ReLU)''' + def __init__(self, num_in_frames, out_ch): + super(InputCvBlock, self).__init__() + self.interm_ch = 30 + self.convblock = nn.Sequential( + nn.Conv2d(num_in_frames*(3+1), num_in_frames*self.interm_ch, \ + kernel_size=3, padding=1, groups=num_in_frames, bias=False), + nn.BatchNorm2d(num_in_frames*self.interm_ch), + nn.ReLU(inplace=True), + nn.Conv2d(num_in_frames*self.interm_ch, out_ch, kernel_size=3, padding=1, bias=False), + nn.BatchNorm2d(out_ch), + nn.ReLU(inplace=True) + ) + + def forward(self, x): + return self.convblock(x) + + +class InputCvBlock_1(nn.Module): + '''(Conv with num_in_frames groups => BN => ReLU) + (Conv => BN => ReLU)''' + def __init__(self, num_in_frames, out_ch): + super(InputCvBlock_1, self).__init__() + self.interm_ch = 30 + self.convblock = nn.Sequential( + nn.Conv2d(num_in_frames*(3+1), num_in_frames*self.interm_ch, \ + kernel_size=3, padding=1, groups=num_in_frames, bias=False), + nn.BatchNorm2d(num_in_frames*self.interm_ch), + nn.ReLU(inplace=True), + nn.Conv2d(num_in_frames*self.interm_ch, out_ch, kernel_size=3, padding=1, bias=False), + nn.BatchNorm2d(out_ch), + nn.ReLU(inplace=True) + ) + + # self.NAF1 = NAFBlock(out_ch) + # self.NAF2 = NAFBlock(out_ch) + + def forward(self, x): + x = self.convblock(x) + # x = self.NAF1(x) + # return self.NAF2(x) + return x + + +class DownBlock(nn.Module): + '''Downscale + (Conv2d => BN => ReLU)*2''' + def __init__(self, in_ch, out_ch): + super(DownBlock, self).__init__() + self.convblock = nn.Sequential( + nn.Conv2d(in_ch, out_ch, kernel_size=3, padding=1, stride=2, bias=False), + nn.BatchNorm2d(out_ch), + nn.ReLU(inplace=True), + CvBlock(out_ch, out_ch) + ) + + def forward(self, x): + return self.convblock(x) + + +class DownBlock_1(nn.Module): + '''Downscale + (Conv2d => BN => ReLU)*2''' + def __init__(self, in_ch, out_ch): + super(DownBlock_1, self).__init__() + self.convblock = nn.Sequential( + nn.Conv2d(in_ch, out_ch, kernel_size=3, padding=1, stride=2, bias=False), + nn.BatchNorm2d(out_ch), + nn.ReLU(inplace=True), + CvBlock(out_ch, out_ch) + ) + + self.NAF1 = NAFBlock(in_ch) + self.NAF2 = NAFBlock(in_ch) + + + def forward(self, x): + + + x = self.NAF1(x) + x = self.NAF2(x) + return self.convblock(x) + + +class UpBlock(nn.Module): + '''(Conv2d => BN => ReLU)*2 + Upscale''' + def __init__(self, in_ch, out_ch): + super(UpBlock, self).__init__() + self.convblock = nn.Sequential( + CvBlock(in_ch, in_ch), + nn.Conv2d(in_ch, out_ch*4, kernel_size=3, padding=1, bias=False), + nn.PixelShuffle(2) + ) + + def forward(self, x): + return self.convblock(x) + + +class UpBlock_1(nn.Module): + '''(Conv2d => BN => ReLU)*2 + Upscale''' + def __init__(self, in_ch, out_ch): + super(UpBlock_1, self).__init__() + self.convblock = nn.Sequential( + CvBlock(in_ch, in_ch), + nn.Conv2d(in_ch, out_ch*4, kernel_size=3, padding=1, bias=False), + nn.PixelShuffle(2) + ) + + self.NAF1 = NAFBlock(in_ch) + self.NAF2 = NAFBlock(in_ch) + + def forward(self, x): + x = self.NAF1(x) + x = self.NAF2(x) + return self.convblock(x) + + +class OutputCvBlock(nn.Module): + '''Conv2d => BN => ReLU => Conv2d''' + def __init__(self, in_ch, out_ch): + super(OutputCvBlock, self).__init__() + self.convblock = nn.Sequential( + nn.Conv2d(in_ch, in_ch, kernel_size=3, padding=1, bias=False), + nn.BatchNorm2d(in_ch), + nn.ReLU(inplace=True), + nn.Conv2d(in_ch, out_ch, kernel_size=3, padding=1, bias=False) + ) + + def forward(self, x): + return self.convblock(x) + + +class OutputCvBlock_1(nn.Module): + '''Conv2d => BN => ReLU => Conv2d''' + def __init__(self, in_ch, out_ch): + super(OutputCvBlock_1, self).__init__() + self.convblock = nn.Sequential( + nn.Conv2d(in_ch, in_ch, kernel_size=3, padding=1, bias=False), + nn.BatchNorm2d(in_ch), + nn.ReLU(inplace=True), + nn.Conv2d(in_ch, out_ch, kernel_size=3, padding=1, bias=False) + ) + + # self.NAF1 = NAFBlock(in_ch) + # self.NAF2 = NAFBlock(in_ch) + + def forward(self, x): + # x = self.NAF1(x) + # x = self.NAF2(x) + return self.convblock(x) + + +class SimpleGate(nn.Module): + def forward(self, x): + x1, x2 = x.chunk(2, dim=1) + return x1 * x2 + + +class LayerNormFunction(torch.autograd.Function): + + @staticmethod + def forward(ctx, x, weight, bias, eps): + ctx.eps = eps + N, C, H, W = x.size() + mu = x.mean(1, keepdim=True) + var = (x - mu).pow(2).mean(1, keepdim=True) + y = (x - mu) / (var + eps).sqrt() + ctx.save_for_backward(y, var, weight) + y = weight.view(1, C, 1, 1) * y + bias.view(1, C, 1, 1) + return y + + @staticmethod + def backward(ctx, grad_output): + eps = ctx.eps + + N, C, H, W = grad_output.size() + y, var, weight = ctx.saved_variables + g = grad_output * weight.view(1, C, 1, 1) + mean_g = g.mean(dim=1, keepdim=True) + + mean_gy = (g * y).mean(dim=1, keepdim=True) + gx = 1. / torch.sqrt(var + eps) * (g - y * mean_gy - mean_g) + return gx, (grad_output * y).sum(dim=3).sum(dim=2).sum(dim=0), grad_output.sum(dim=3).sum(dim=2).sum( + dim=0), None + + +class LayerNorm2d(nn.Module): + + def __init__(self, channels, eps=1e-6): + super(LayerNorm2d, self).__init__() + self.register_parameter('weight', nn.Parameter(torch.ones(channels))) + self.register_parameter('bias', nn.Parameter(torch.zeros(channels))) + self.eps = eps + + def forward(self, x): + return LayerNormFunction.apply(x, self.weight, self.bias, self.eps) + + +class NAFBlock(nn.Module): + def __init__(self, c, DW_Expand=2, FFN_Expand=2, drop_out_rate=0.): + super().__init__() + dw_channel = c * DW_Expand + self.conv1 = nn.Conv2d(in_channels=c, out_channels=dw_channel, kernel_size=1, padding=0, stride=1, groups=1, bias=True) + self.conv2 = nn.Conv2d(in_channels=dw_channel, out_channels=dw_channel, kernel_size=3, padding=1, stride=1, groups=dw_channel, + bias=True) + self.conv3 = nn.Conv2d(in_channels=dw_channel // 2, out_channels=c, kernel_size=1, padding=0, stride=1, groups=1, bias=True) + + # Simplified Channel Attention + self.sca = nn.Sequential( + nn.AdaptiveAvgPool2d(1), + nn.Conv2d(in_channels=dw_channel // 2, out_channels=dw_channel // 2, kernel_size=1, padding=0, stride=1, + groups=1, bias=True), + ) + + # SimpleGate + self.sg = SimpleGate() + + ffn_channel = FFN_Expand * c + self.conv4 = nn.Conv2d(in_channels=c, out_channels=ffn_channel, kernel_size=1, padding=0, stride=1, groups=1, bias=True) + self.conv5 = nn.Conv2d(in_channels=ffn_channel // 2, out_channels=c, kernel_size=1, padding=0, stride=1, groups=1, bias=True) + + self.norm1 = LayerNorm2d(c) + self.norm2 = LayerNorm2d(c) + + self.dropout1 = nn.Dropout(drop_out_rate) if drop_out_rate > 0. else nn.Identity() + self.dropout2 = nn.Dropout(drop_out_rate) if drop_out_rate > 0. else nn.Identity() + + self.beta = nn.Parameter(torch.zeros((1, c, 1, 1)), requires_grad=True) + self.gamma = nn.Parameter(torch.zeros((1, c, 1, 1)), requires_grad=True) + + def forward(self, inp): + x = inp + + x = self.norm1(x) + + x = self.conv1(x) + x = self.conv2(x) + x = self.sg(x) + x = x * self.sca(x) + x = self.conv3(x) + + x = self.dropout1(x) + + y = inp + x * self.beta + + x = self.conv4(self.norm2(y)) + x = self.sg(x) + x = self.conv5(x) + + x = self.dropout2(x) + + return y + x * self.gamma + + + + + + + + + + + + + + + + + + + + + +class DenBlock(nn.Module): + """ Definition of the denosing block of FastDVDnet. + Inputs of constructor: + num_input_frames: int. number of input frames + Inputs of forward(): + xn: input frames of dim [N, C, H, W], (C=3 RGB) + noise_map: array with noise map of dim [N, 1, H, W] + """ + + def __init__(self, num_input_frames=3): + super(DenBlock, self).__init__() + self.chs_lyr0 = 32 + self.chs_lyr1 = 64 + self.chs_lyr2 = 128 + + self.inc = InputCvBlock(num_in_frames=num_input_frames, out_ch=self.chs_lyr0) + self.downc0 = DownBlock(in_ch=self.chs_lyr0, out_ch=self.chs_lyr1) + self.downc1 = DownBlock(in_ch=self.chs_lyr1, out_ch=self.chs_lyr2) + self.upc2 = UpBlock(in_ch=self.chs_lyr2, out_ch=self.chs_lyr1) + self.upc1 = UpBlock(in_ch=self.chs_lyr1, out_ch=self.chs_lyr0) + self.outc = OutputCvBlock(in_ch=self.chs_lyr0, out_ch=3) + + + def forward(self, in0, in1, in2, noise_map): + '''Args: + inX: Tensor, [N, C, H, W] in the [0., 1.] range + noise_map: Tensor [N, 1, H, W] in the [0., 1.] range + ''' + # Input convolution block + x0 = self.inc(torch.cat((in0, noise_map, in1, noise_map, in2, noise_map), dim=1)) + # Downsampling + x1 = self.downc0(x0) + x2 = self.downc1(x1) + # Upsampling + x2 = self.upc2(x2) + x1 = self.upc1(x1+x2) + # Estimation + x = self.outc(x0+x1) + + # Residual + x = in1 - x + + return x + + +class DenBlock_1(nn.Module): + """ Definition of the denosing block of FastDVDnet. + Inputs of constructor: + num_input_frames: int. number of input frames + Inputs of forward(): + xn: input frames of dim [N, C, H, W], (C=3 RGB) + noise_map: array with noise map of dim [N, 1, H, W] + """ + + def __init__(self, num_input_frames=3): + super(DenBlock_1, self).__init__() + self.chs_lyr0 = 32 + self.chs_lyr1 = 64 + self.chs_lyr2 = 128 + + self.inc = InputCvBlock_1(num_in_frames=num_input_frames, out_ch=self.chs_lyr0) + self.downc0 = DownBlock_1(in_ch=self.chs_lyr0, out_ch=self.chs_lyr1) + self.downc1 = DownBlock_1(in_ch=self.chs_lyr1, out_ch=self.chs_lyr2) + self.upc2 = UpBlock_1(in_ch=self.chs_lyr2, out_ch=self.chs_lyr1) + self.upc1 = UpBlock_1(in_ch=self.chs_lyr1, out_ch=self.chs_lyr0) + self.outc = OutputCvBlock_1(in_ch=self.chs_lyr0, out_ch=3) + + + def forward(self, in0, in1, in2, noise_map): + '''Args: + inX: Tensor, [N, C, H, W] in the [0., 1.] range + noise_map: Tensor [N, 1, H, W] in the [0., 1.] range + ''' + # Input convolution block + x0 = self.inc(torch.cat((in0, noise_map, in1, noise_map, in2, noise_map), dim=1)) + # Downsampling + x1 = self.downc0(x0) + x2 = self.downc1(x1) + # Upsampling + x2 = self.upc2(x2) + x1 = self.upc1(x1+x2) + # Estimation + x = self.outc(x0+x1) + + # Residual + x = in1 - x + + return x \ No newline at end of file diff --git a/IIR-Lab/models/archs/dcn/__init__.py b/IIR-Lab/models/archs/dcn/__init__.py new file mode 100644 index 0000000000000000000000000000000000000000..32e3592f896d61b4127e09d0476381b9d55e32ff --- /dev/null +++ b/IIR-Lab/models/archs/dcn/__init__.py @@ -0,0 +1,7 @@ +from .deform_conv import (DeformConv, DeformConvPack, ModulatedDeformConv, ModulatedDeformConvPack, deform_conv, + modulated_deform_conv) + +__all__ = [ + 'DeformConv', 'DeformConvPack', 'ModulatedDeformConv', 'ModulatedDeformConvPack', 'deform_conv', + 'modulated_deform_conv' +] diff --git a/IIR-Lab/models/archs/dcn/__pycache__/__init__.cpython-310.pyc b/IIR-Lab/models/archs/dcn/__pycache__/__init__.cpython-310.pyc new file mode 100644 index 0000000000000000000000000000000000000000..600f6f26af983d6854c22d8f374cd46857cef71b Binary files /dev/null and b/IIR-Lab/models/archs/dcn/__pycache__/__init__.cpython-310.pyc differ diff --git a/IIR-Lab/models/archs/dcn/__pycache__/__init__.cpython-312.pyc b/IIR-Lab/models/archs/dcn/__pycache__/__init__.cpython-312.pyc new file mode 100644 index 0000000000000000000000000000000000000000..7a9f9f962afa1933f6c50da6b72c71189378bfb4 Binary files /dev/null and b/IIR-Lab/models/archs/dcn/__pycache__/__init__.cpython-312.pyc differ diff --git a/IIR-Lab/models/archs/dcn/__pycache__/__init__.cpython-38.pyc b/IIR-Lab/models/archs/dcn/__pycache__/__init__.cpython-38.pyc new file mode 100644 index 0000000000000000000000000000000000000000..663b0797ac2bc7752910b7459b157ce30410c5f7 Binary files /dev/null and b/IIR-Lab/models/archs/dcn/__pycache__/__init__.cpython-38.pyc differ diff --git a/IIR-Lab/models/archs/dcn/__pycache__/__init__.cpython-39.pyc b/IIR-Lab/models/archs/dcn/__pycache__/__init__.cpython-39.pyc new file mode 100644 index 0000000000000000000000000000000000000000..b80426dcaa53b083fbc992dd7748e2a5971da7be Binary files /dev/null and b/IIR-Lab/models/archs/dcn/__pycache__/__init__.cpython-39.pyc differ diff --git a/IIR-Lab/models/archs/dcn/__pycache__/deform_conv.cpython-310.pyc b/IIR-Lab/models/archs/dcn/__pycache__/deform_conv.cpython-310.pyc new file mode 100644 index 0000000000000000000000000000000000000000..bf65111b43bc4967962f6109ec0a1fdea2c8adb2 Binary files /dev/null and b/IIR-Lab/models/archs/dcn/__pycache__/deform_conv.cpython-310.pyc differ diff --git a/IIR-Lab/models/archs/dcn/__pycache__/deform_conv.cpython-312.pyc b/IIR-Lab/models/archs/dcn/__pycache__/deform_conv.cpython-312.pyc new file mode 100644 index 0000000000000000000000000000000000000000..faff316465cc908db1f0cf6d251b0e1fbfa8e3eb Binary files /dev/null and b/IIR-Lab/models/archs/dcn/__pycache__/deform_conv.cpython-312.pyc differ diff --git a/IIR-Lab/models/archs/dcn/__pycache__/deform_conv.cpython-38.pyc b/IIR-Lab/models/archs/dcn/__pycache__/deform_conv.cpython-38.pyc new file mode 100644 index 0000000000000000000000000000000000000000..991621bdfdc40736c33b8a6e126607e36f62e0a5 Binary files /dev/null and b/IIR-Lab/models/archs/dcn/__pycache__/deform_conv.cpython-38.pyc differ diff --git a/IIR-Lab/models/archs/dcn/__pycache__/deform_conv.cpython-39.pyc b/IIR-Lab/models/archs/dcn/__pycache__/deform_conv.cpython-39.pyc new file mode 100644 index 0000000000000000000000000000000000000000..22086ce145867dd3d5ef974adc8d642a05fcddff Binary files /dev/null and b/IIR-Lab/models/archs/dcn/__pycache__/deform_conv.cpython-39.pyc differ diff --git a/IIR-Lab/models/archs/dcn/deform_conv.py b/IIR-Lab/models/archs/dcn/deform_conv.py new file mode 100644 index 0000000000000000000000000000000000000000..6268ca825d59ef4a30d4d2156c4438cbbe9b3c1e --- /dev/null +++ b/IIR-Lab/models/archs/dcn/deform_conv.py @@ -0,0 +1,379 @@ +import math +import os +import torch +from torch import nn as nn +from torch.autograd import Function +from torch.autograd.function import once_differentiable +from torch.nn import functional as F +from torch.nn.modules.utils import _pair, _single + +BASICSR_JIT = os.getenv('BASICSR_JIT') +if BASICSR_JIT == 'True': + from torch.utils.cpp_extension import load + module_path = os.path.dirname(__file__) + deform_conv_ext = load( + 'deform_conv', + sources=[ + os.path.join(module_path, 'src', 'deform_conv_ext.cpp'), + os.path.join(module_path, 'src', 'deform_conv_cuda.cpp'), + os.path.join(module_path, 'src', 'deform_conv_cuda_kernel.cu'), + ], + ) +else: + try: + from . import deform_conv_ext + except ImportError: + pass + # avoid annoying print output + # print(f'Cannot import deform_conv_ext. Error: {error}. You may need to: \n ' + # '1. compile with BASICSR_EXT=True. or\n ' + # '2. set BASICSR_JIT=True during running') + + +class DeformConvFunction(Function): + + @staticmethod + def forward(ctx, + input, + offset, + weight, + stride=1, + padding=0, + dilation=1, + groups=1, + deformable_groups=1, + im2col_step=64): + if input is not None and input.dim() != 4: + raise ValueError(f'Expected 4D tensor as input, got {input.dim()}D tensor instead.') + ctx.stride = _pair(stride) + ctx.padding = _pair(padding) + ctx.dilation = _pair(dilation) + ctx.groups = groups + ctx.deformable_groups = deformable_groups + ctx.im2col_step = im2col_step + + ctx.save_for_backward(input, offset, weight) + + output = input.new_empty(DeformConvFunction._output_size(input, weight, ctx.padding, ctx.dilation, ctx.stride)) + + ctx.bufs_ = [input.new_empty(0), input.new_empty(0)] # columns, ones + + if not input.is_cuda: + raise NotImplementedError + else: + cur_im2col_step = min(ctx.im2col_step, input.shape[0]) + assert (input.shape[0] % cur_im2col_step) == 0, 'im2col step must divide batchsize' + deform_conv_ext.deform_conv_forward(input, weight, + offset, output, ctx.bufs_[0], ctx.bufs_[1], weight.size(3), + weight.size(2), ctx.stride[1], ctx.stride[0], ctx.padding[1], + ctx.padding[0], ctx.dilation[1], ctx.dilation[0], ctx.groups, + ctx.deformable_groups, cur_im2col_step) + return output + + @staticmethod + @once_differentiable + def backward(ctx, grad_output): + input, offset, weight = ctx.saved_tensors + + grad_input = grad_offset = grad_weight = None + + if not grad_output.is_cuda: + raise NotImplementedError + else: + cur_im2col_step = min(ctx.im2col_step, input.shape[0]) + assert (input.shape[0] % cur_im2col_step) == 0, 'im2col step must divide batchsize' + + if ctx.needs_input_grad[0] or ctx.needs_input_grad[1]: + grad_input = torch.zeros_like(input) + grad_offset = torch.zeros_like(offset) + deform_conv_ext.deform_conv_backward_input(input, offset, grad_output, grad_input, + grad_offset, weight, ctx.bufs_[0], weight.size(3), + weight.size(2), ctx.stride[1], ctx.stride[0], ctx.padding[1], + ctx.padding[0], ctx.dilation[1], ctx.dilation[0], ctx.groups, + ctx.deformable_groups, cur_im2col_step) + + if ctx.needs_input_grad[2]: + grad_weight = torch.zeros_like(weight) + deform_conv_ext.deform_conv_backward_parameters(input, offset, grad_output, grad_weight, + ctx.bufs_[0], ctx.bufs_[1], weight.size(3), + weight.size(2), ctx.stride[1], ctx.stride[0], + ctx.padding[1], ctx.padding[0], ctx.dilation[1], + ctx.dilation[0], ctx.groups, ctx.deformable_groups, 1, + cur_im2col_step) + + return (grad_input, grad_offset, grad_weight, None, None, None, None, None) + + @staticmethod + def _output_size(input, weight, padding, dilation, stride): + channels = weight.size(0) + output_size = (input.size(0), channels) + for d in range(input.dim() - 2): + in_size = input.size(d + 2) + pad = padding[d] + kernel = dilation[d] * (weight.size(d + 2) - 1) + 1 + stride_ = stride[d] + output_size += ((in_size + (2 * pad) - kernel) // stride_ + 1, ) + if not all(map(lambda s: s > 0, output_size)): + raise ValueError(f'convolution input is too small (output would be {"x".join(map(str, output_size))})') + return output_size + + +class ModulatedDeformConvFunction(Function): + + @staticmethod + def forward(ctx, + input, + offset, + mask, + weight, + bias=None, + stride=1, + padding=0, + dilation=1, + groups=1, + deformable_groups=1): + ctx.stride = stride + ctx.padding = padding + ctx.dilation = dilation + ctx.groups = groups + ctx.deformable_groups = deformable_groups + ctx.with_bias = bias is not None + if not ctx.with_bias: + bias = input.new_empty(1) # fake tensor + if not input.is_cuda: + raise NotImplementedError + if weight.requires_grad or mask.requires_grad or offset.requires_grad or input.requires_grad: + ctx.save_for_backward(input, offset, mask, weight, bias) + output = input.new_empty(ModulatedDeformConvFunction._infer_shape(ctx, input, weight)) + ctx._bufs = [input.new_empty(0), input.new_empty(0)] + deform_conv_ext.modulated_deform_conv_forward(input, weight, bias, ctx._bufs[0], offset, mask, output, + ctx._bufs[1], weight.shape[2], weight.shape[3], ctx.stride, + ctx.stride, ctx.padding, ctx.padding, ctx.dilation, ctx.dilation, + ctx.groups, ctx.deformable_groups, ctx.with_bias) + return output + + @staticmethod + @once_differentiable + def backward(ctx, grad_output): + if not grad_output.is_cuda: + raise NotImplementedError + input, offset, mask, weight, bias = ctx.saved_tensors + grad_input = torch.zeros_like(input) + grad_offset = torch.zeros_like(offset) + grad_mask = torch.zeros_like(mask) + grad_weight = torch.zeros_like(weight) + grad_bias = torch.zeros_like(bias) + deform_conv_ext.modulated_deform_conv_backward(input, weight, bias, ctx._bufs[0], offset, mask, ctx._bufs[1], + grad_input, grad_weight, grad_bias, grad_offset, grad_mask, + grad_output, weight.shape[2], weight.shape[3], ctx.stride, + ctx.stride, ctx.padding, ctx.padding, ctx.dilation, ctx.dilation, + ctx.groups, ctx.deformable_groups, ctx.with_bias) + if not ctx.with_bias: + grad_bias = None + + return (grad_input, grad_offset, grad_mask, grad_weight, grad_bias, None, None, None, None, None) + + @staticmethod + def _infer_shape(ctx, input, weight): + n = input.size(0) + channels_out = weight.size(0) + height, width = input.shape[2:4] + kernel_h, kernel_w = weight.shape[2:4] + height_out = (height + 2 * ctx.padding - (ctx.dilation * (kernel_h - 1) + 1)) // ctx.stride + 1 + width_out = (width + 2 * ctx.padding - (ctx.dilation * (kernel_w - 1) + 1)) // ctx.stride + 1 + return n, channels_out, height_out, width_out + + +deform_conv = DeformConvFunction.apply +modulated_deform_conv = ModulatedDeformConvFunction.apply + + +class DeformConv(nn.Module): + + def __init__(self, + in_channels, + out_channels, + kernel_size, + stride=1, + padding=0, + dilation=1, + groups=1, + deformable_groups=1, + bias=False): + super(DeformConv, self).__init__() + + assert not bias + assert in_channels % groups == 0, f'in_channels {in_channels} is not divisible by groups {groups}' + assert out_channels % groups == 0, f'out_channels {out_channels} is not divisible by groups {groups}' + + self.in_channels = in_channels + self.out_channels = out_channels + self.kernel_size = _pair(kernel_size) + self.stride = _pair(stride) + self.padding = _pair(padding) + self.dilation = _pair(dilation) + self.groups = groups + self.deformable_groups = deformable_groups + # enable compatibility with nn.Conv2d + self.transposed = False + self.output_padding = _single(0) + + self.weight = nn.Parameter(torch.Tensor(out_channels, in_channels // self.groups, *self.kernel_size)) + + self.reset_parameters() + + def reset_parameters(self): + n = self.in_channels + for k in self.kernel_size: + n *= k + stdv = 1. / math.sqrt(n) + self.weight.data.uniform_(-stdv, stdv) + + def forward(self, x, offset): + # To fix an assert error in deform_conv_cuda.cpp:128 + # input image is smaller than kernel + input_pad = (x.size(2) < self.kernel_size[0] or x.size(3) < self.kernel_size[1]) + if input_pad: + pad_h = max(self.kernel_size[0] - x.size(2), 0) + pad_w = max(self.kernel_size[1] - x.size(3), 0) + x = F.pad(x, (0, pad_w, 0, pad_h), 'constant', 0).contiguous() + offset = F.pad(offset, (0, pad_w, 0, pad_h), 'constant', 0).contiguous() + out = deform_conv(x, offset, self.weight, self.stride, self.padding, self.dilation, self.groups, + self.deformable_groups) + if input_pad: + out = out[:, :, :out.size(2) - pad_h, :out.size(3) - pad_w].contiguous() + return out + + +class DeformConvPack(DeformConv): + """A Deformable Conv Encapsulation that acts as normal Conv layers. + + Args: + in_channels (int): Same as nn.Conv2d. + out_channels (int): Same as nn.Conv2d. + kernel_size (int or tuple[int]): Same as nn.Conv2d. + stride (int or tuple[int]): Same as nn.Conv2d. + padding (int or tuple[int]): Same as nn.Conv2d. + dilation (int or tuple[int]): Same as nn.Conv2d. + groups (int): Same as nn.Conv2d. + bias (bool or str): If specified as `auto`, it will be decided by the + norm_cfg. Bias will be set as True if norm_cfg is None, otherwise + False. + """ + + _version = 2 + + def __init__(self, *args, **kwargs): + super(DeformConvPack, self).__init__(*args, **kwargs) + + self.conv_offset = nn.Conv2d( + self.in_channels, + self.deformable_groups * 2 * self.kernel_size[0] * self.kernel_size[1], + kernel_size=self.kernel_size, + stride=_pair(self.stride), + padding=_pair(self.padding), + dilation=_pair(self.dilation), + bias=True) + self.init_offset() + + def init_offset(self): + self.conv_offset.weight.data.zero_() + self.conv_offset.bias.data.zero_() + + def forward(self, x): + offset = self.conv_offset(x) + return deform_conv(x, offset, self.weight, self.stride, self.padding, self.dilation, self.groups, + self.deformable_groups) + + +class ModulatedDeformConv(nn.Module): + + def __init__(self, + in_channels, + out_channels, + kernel_size, + stride=1, + padding=0, + dilation=1, + groups=1, + deformable_groups=1, + bias=True): + super(ModulatedDeformConv, self).__init__() + self.in_channels = in_channels + self.out_channels = out_channels + self.kernel_size = _pair(kernel_size) + self.stride = stride + self.padding = padding + self.dilation = dilation + self.groups = groups + self.deformable_groups = deformable_groups + self.with_bias = bias + # enable compatibility with nn.Conv2d + self.transposed = False + self.output_padding = _single(0) + + self.weight = nn.Parameter(torch.Tensor(out_channels, in_channels // groups, *self.kernel_size)) + if bias: + self.bias = nn.Parameter(torch.Tensor(out_channels)) + else: + self.register_parameter('bias', None) + self.init_weights() + + def init_weights(self): + n = self.in_channels + for k in self.kernel_size: + n *= k + stdv = 1. / math.sqrt(n) + self.weight.data.uniform_(-stdv, stdv) + if self.bias is not None: + self.bias.data.zero_() + + def forward(self, x, offset, mask): + return modulated_deform_conv(x, offset, mask, self.weight, self.bias, self.stride, self.padding, self.dilation, + self.groups, self.deformable_groups) + + +class ModulatedDeformConvPack(ModulatedDeformConv): + """A ModulatedDeformable Conv Encapsulation that acts as normal Conv layers. + + Args: + in_channels (int): Same as nn.Conv2d. + out_channels (int): Same as nn.Conv2d. + kernel_size (int or tuple[int]): Same as nn.Conv2d. + stride (int or tuple[int]): Same as nn.Conv2d. + padding (int or tuple[int]): Same as nn.Conv2d. + dilation (int or tuple[int]): Same as nn.Conv2d. + groups (int): Same as nn.Conv2d. + bias (bool or str): If specified as `auto`, it will be decided by the + norm_cfg. Bias will be set as True if norm_cfg is None, otherwise + False. + """ + + _version = 2 + + def __init__(self, *args, **kwargs): + super(ModulatedDeformConvPack, self).__init__(*args, **kwargs) + + self.conv_offset = nn.Conv2d( + self.in_channels, + self.deformable_groups * 3 * self.kernel_size[0] * self.kernel_size[1], + kernel_size=self.kernel_size, + stride=_pair(self.stride), + padding=_pair(self.padding), + dilation=_pair(self.dilation), + bias=True) + self.init_weights() + + def init_weights(self): + super(ModulatedDeformConvPack, self).init_weights() + if hasattr(self, 'conv_offset'): + self.conv_offset.weight.data.zero_() + self.conv_offset.bias.data.zero_() + + def forward(self, x): + out = self.conv_offset(x) + o1, o2, mask = torch.chunk(out, 3, dim=1) + offset = torch.cat((o1, o2), dim=1) + mask = torch.sigmoid(mask) + return modulated_deform_conv(x, offset, mask, self.weight, self.bias, self.stride, self.padding, self.dilation, + self.groups, self.deformable_groups) diff --git a/IIR-Lab/models/archs/dcn/src/deform_conv_cuda.cpp b/IIR-Lab/models/archs/dcn/src/deform_conv_cuda.cpp new file mode 100644 index 0000000000000000000000000000000000000000..b465c493a3dd67d320b7a8997fbd501d2f89c807 --- /dev/null +++ b/IIR-Lab/models/archs/dcn/src/deform_conv_cuda.cpp @@ -0,0 +1,685 @@ +// modify from +// https://github.com/chengdazhi/Deformable-Convolution-V2-PyTorch/blob/mmdetection/mmdet/ops/dcn/src/deform_conv_cuda.c + +#include +#include + +#include +#include + +void deformable_im2col(const at::Tensor data_im, const at::Tensor data_offset, + const int channels, const int height, const int width, + const int ksize_h, const int ksize_w, const int pad_h, + const int pad_w, const int stride_h, const int stride_w, + const int dilation_h, const int dilation_w, + const int parallel_imgs, const int deformable_group, + at::Tensor data_col); + +void deformable_col2im(const at::Tensor data_col, const at::Tensor data_offset, + const int channels, const int height, const int width, + const int ksize_h, const int ksize_w, const int pad_h, + const int pad_w, const int stride_h, const int stride_w, + const int dilation_h, const int dilation_w, + const int parallel_imgs, const int deformable_group, + at::Tensor grad_im); + +void deformable_col2im_coord( + const at::Tensor data_col, const at::Tensor data_im, + const at::Tensor data_offset, const int channels, const int height, + const int width, const int ksize_h, const int ksize_w, const int pad_h, + const int pad_w, const int stride_h, const int stride_w, + const int dilation_h, const int dilation_w, const int parallel_imgs, + const int deformable_group, at::Tensor grad_offset); + +void modulated_deformable_im2col_cuda( + const at::Tensor data_im, const at::Tensor data_offset, + const at::Tensor data_mask, const int batch_size, const int channels, + const int height_im, const int width_im, const int height_col, + const int width_col, const int kernel_h, const int kenerl_w, + const int pad_h, const int pad_w, const int stride_h, const int stride_w, + const int dilation_h, const int dilation_w, const int deformable_group, + at::Tensor data_col); + +void modulated_deformable_col2im_cuda( + const at::Tensor data_col, const at::Tensor data_offset, + const at::Tensor data_mask, const int batch_size, const int channels, + const int height_im, const int width_im, const int height_col, + const int width_col, const int kernel_h, const int kenerl_w, + const int pad_h, const int pad_w, const int stride_h, const int stride_w, + const int dilation_h, const int dilation_w, const int deformable_group, + at::Tensor grad_im); + +void modulated_deformable_col2im_coord_cuda( + const at::Tensor data_col, const at::Tensor data_im, + const at::Tensor data_offset, const at::Tensor data_mask, + const int batch_size, const int channels, const int height_im, + const int width_im, const int height_col, const int width_col, + const int kernel_h, const int kenerl_w, const int pad_h, const int pad_w, + const int stride_h, const int stride_w, const int dilation_h, + const int dilation_w, const int deformable_group, at::Tensor grad_offset, + at::Tensor grad_mask); + +void shape_check(at::Tensor input, at::Tensor offset, at::Tensor *gradOutput, + at::Tensor weight, int kH, int kW, int dH, int dW, int padH, + int padW, int dilationH, int dilationW, int group, + int deformable_group) { + TORCH_CHECK(weight.ndimension() == 4, + "4D weight tensor (nOutputPlane,nInputPlane,kH,kW) expected, " + "but got: %s", + weight.ndimension()); + + TORCH_CHECK(weight.is_contiguous(), "weight tensor has to be contiguous"); + + TORCH_CHECK(kW > 0 && kH > 0, + "kernel size should be greater than zero, but got kH: %d kW: %d", kH, + kW); + + TORCH_CHECK((weight.size(2) == kH && weight.size(3) == kW), + "kernel size should be consistent with weight, ", + "but got kH: %d kW: %d weight.size(2): %d, weight.size(3): %d", kH, + kW, weight.size(2), weight.size(3)); + + TORCH_CHECK(dW > 0 && dH > 0, + "stride should be greater than zero, but got dH: %d dW: %d", dH, dW); + + TORCH_CHECK( + dilationW > 0 && dilationH > 0, + "dilation should be greater than 0, but got dilationH: %d dilationW: %d", + dilationH, dilationW); + + int ndim = input.ndimension(); + int dimf = 0; + int dimh = 1; + int dimw = 2; + + if (ndim == 4) { + dimf++; + dimh++; + dimw++; + } + + TORCH_CHECK(ndim == 3 || ndim == 4, "3D or 4D input tensor expected but got: %s", + ndim); + + long nInputPlane = weight.size(1) * group; + long inputHeight = input.size(dimh); + long inputWidth = input.size(dimw); + long nOutputPlane = weight.size(0); + long outputHeight = + (inputHeight + 2 * padH - (dilationH * (kH - 1) + 1)) / dH + 1; + long outputWidth = + (inputWidth + 2 * padW - (dilationW * (kW - 1) + 1)) / dW + 1; + + TORCH_CHECK(nInputPlane % deformable_group == 0, + "input channels must divide deformable group size"); + + if (outputWidth < 1 || outputHeight < 1) + AT_ERROR( + "Given input size: (%ld x %ld x %ld). " + "Calculated output size: (%ld x %ld x %ld). Output size is too small", + nInputPlane, inputHeight, inputWidth, nOutputPlane, outputHeight, + outputWidth); + + TORCH_CHECK(input.size(1) == nInputPlane, + "invalid number of input planes, expected: %d, but got: %d", + nInputPlane, input.size(1)); + + TORCH_CHECK((inputHeight >= kH && inputWidth >= kW), + "input image is smaller than kernel"); + + TORCH_CHECK((offset.size(2) == outputHeight && offset.size(3) == outputWidth), + "invalid spatial size of offset, expected height: %d width: %d, but " + "got height: %d width: %d", + outputHeight, outputWidth, offset.size(2), offset.size(3)); + + TORCH_CHECK((offset.size(1) == deformable_group * 2 * kH * kW), + "invalid number of channels of offset"); + + if (gradOutput != NULL) { + TORCH_CHECK(gradOutput->size(dimf) == nOutputPlane, + "invalid number of gradOutput planes, expected: %d, but got: %d", + nOutputPlane, gradOutput->size(dimf)); + + TORCH_CHECK((gradOutput->size(dimh) == outputHeight && + gradOutput->size(dimw) == outputWidth), + "invalid size of gradOutput, expected height: %d width: %d , but " + "got height: %d width: %d", + outputHeight, outputWidth, gradOutput->size(dimh), + gradOutput->size(dimw)); + } +} + +int deform_conv_forward_cuda(at::Tensor input, at::Tensor weight, + at::Tensor offset, at::Tensor output, + at::Tensor columns, at::Tensor ones, int kW, + int kH, int dW, int dH, int padW, int padH, + int dilationW, int dilationH, int group, + int deformable_group, int im2col_step) { + // todo: resize columns to include im2col: done + // todo: add im2col_step as input + // todo: add new output buffer and transpose it to output (or directly + // transpose output) todo: possibly change data indexing because of + // parallel_imgs + + shape_check(input, offset, NULL, weight, kH, kW, dH, dW, padH, padW, + dilationH, dilationW, group, deformable_group); + at::DeviceGuard guard(input.device()); + + input = input.contiguous(); + offset = offset.contiguous(); + weight = weight.contiguous(); + + int batch = 1; + if (input.ndimension() == 3) { + // Force batch + batch = 0; + input.unsqueeze_(0); + offset.unsqueeze_(0); + } + + // todo: assert batchsize dividable by im2col_step + + long batchSize = input.size(0); + long nInputPlane = input.size(1); + long inputHeight = input.size(2); + long inputWidth = input.size(3); + + long nOutputPlane = weight.size(0); + + long outputWidth = + (inputWidth + 2 * padW - (dilationW * (kW - 1) + 1)) / dW + 1; + long outputHeight = + (inputHeight + 2 * padH - (dilationH * (kH - 1) + 1)) / dH + 1; + + TORCH_CHECK((offset.size(0) == batchSize), "invalid batch size of offset"); + + output = output.view({batchSize / im2col_step, im2col_step, nOutputPlane, + outputHeight, outputWidth}); + columns = at::zeros( + {nInputPlane * kW * kH, im2col_step * outputHeight * outputWidth}, + input.options()); + + if (ones.ndimension() != 2 || + ones.size(0) * ones.size(1) < outputHeight * outputWidth) { + ones = at::ones({outputHeight, outputWidth}, input.options()); + } + + input = input.view({batchSize / im2col_step, im2col_step, nInputPlane, + inputHeight, inputWidth}); + offset = + offset.view({batchSize / im2col_step, im2col_step, + deformable_group * 2 * kH * kW, outputHeight, outputWidth}); + + at::Tensor output_buffer = + at::zeros({batchSize / im2col_step, nOutputPlane, + im2col_step * outputHeight, outputWidth}, + output.options()); + + output_buffer = output_buffer.view( + {output_buffer.size(0), group, output_buffer.size(1) / group, + output_buffer.size(2), output_buffer.size(3)}); + + for (int elt = 0; elt < batchSize / im2col_step; elt++) { + deformable_im2col(input[elt], offset[elt], nInputPlane, inputHeight, + inputWidth, kH, kW, padH, padW, dH, dW, dilationH, + dilationW, im2col_step, deformable_group, columns); + + columns = columns.view({group, columns.size(0) / group, columns.size(1)}); + weight = weight.view({group, weight.size(0) / group, weight.size(1), + weight.size(2), weight.size(3)}); + + for (int g = 0; g < group; g++) { + output_buffer[elt][g] = output_buffer[elt][g] + .flatten(1) + .addmm_(weight[g].flatten(1), columns[g]) + .view_as(output_buffer[elt][g]); + } + } + + output_buffer = output_buffer.view( + {output_buffer.size(0), output_buffer.size(1) * output_buffer.size(2), + output_buffer.size(3), output_buffer.size(4)}); + + output_buffer = output_buffer.view({batchSize / im2col_step, nOutputPlane, + im2col_step, outputHeight, outputWidth}); + output_buffer.transpose_(1, 2); + output.copy_(output_buffer); + output = output.view({batchSize, nOutputPlane, outputHeight, outputWidth}); + + input = input.view({batchSize, nInputPlane, inputHeight, inputWidth}); + offset = offset.view( + {batchSize, deformable_group * 2 * kH * kW, outputHeight, outputWidth}); + + if (batch == 0) { + output = output.view({nOutputPlane, outputHeight, outputWidth}); + input = input.view({nInputPlane, inputHeight, inputWidth}); + offset = offset.view({offset.size(1), offset.size(2), offset.size(3)}); + } + + return 1; +} + +int deform_conv_backward_input_cuda(at::Tensor input, at::Tensor offset, + at::Tensor gradOutput, at::Tensor gradInput, + at::Tensor gradOffset, at::Tensor weight, + at::Tensor columns, int kW, int kH, int dW, + int dH, int padW, int padH, int dilationW, + int dilationH, int group, + int deformable_group, int im2col_step) { + shape_check(input, offset, &gradOutput, weight, kH, kW, dH, dW, padH, padW, + dilationH, dilationW, group, deformable_group); + at::DeviceGuard guard(input.device()); + + input = input.contiguous(); + offset = offset.contiguous(); + gradOutput = gradOutput.contiguous(); + weight = weight.contiguous(); + + int batch = 1; + + if (input.ndimension() == 3) { + // Force batch + batch = 0; + input = input.view({1, input.size(0), input.size(1), input.size(2)}); + offset = offset.view({1, offset.size(0), offset.size(1), offset.size(2)}); + gradOutput = gradOutput.view( + {1, gradOutput.size(0), gradOutput.size(1), gradOutput.size(2)}); + } + + long batchSize = input.size(0); + long nInputPlane = input.size(1); + long inputHeight = input.size(2); + long inputWidth = input.size(3); + + long nOutputPlane = weight.size(0); + + long outputWidth = + (inputWidth + 2 * padW - (dilationW * (kW - 1) + 1)) / dW + 1; + long outputHeight = + (inputHeight + 2 * padH - (dilationH * (kH - 1) + 1)) / dH + 1; + + TORCH_CHECK((offset.size(0) == batchSize), 3, "invalid batch size of offset"); + gradInput = gradInput.view({batchSize, nInputPlane, inputHeight, inputWidth}); + columns = at::zeros( + {nInputPlane * kW * kH, im2col_step * outputHeight * outputWidth}, + input.options()); + + // change order of grad output + gradOutput = gradOutput.view({batchSize / im2col_step, im2col_step, + nOutputPlane, outputHeight, outputWidth}); + gradOutput.transpose_(1, 2); + + gradInput = gradInput.view({batchSize / im2col_step, im2col_step, nInputPlane, + inputHeight, inputWidth}); + input = input.view({batchSize / im2col_step, im2col_step, nInputPlane, + inputHeight, inputWidth}); + gradOffset = gradOffset.view({batchSize / im2col_step, im2col_step, + deformable_group * 2 * kH * kW, outputHeight, + outputWidth}); + offset = + offset.view({batchSize / im2col_step, im2col_step, + deformable_group * 2 * kH * kW, outputHeight, outputWidth}); + + for (int elt = 0; elt < batchSize / im2col_step; elt++) { + // divide into groups + columns = columns.view({group, columns.size(0) / group, columns.size(1)}); + weight = weight.view({group, weight.size(0) / group, weight.size(1), + weight.size(2), weight.size(3)}); + gradOutput = gradOutput.view( + {gradOutput.size(0), group, gradOutput.size(1) / group, + gradOutput.size(2), gradOutput.size(3), gradOutput.size(4)}); + + for (int g = 0; g < group; g++) { + columns[g] = columns[g].addmm_(weight[g].flatten(1).transpose(0, 1), + gradOutput[elt][g].flatten(1), 0.0f, 1.0f); + } + + columns = + columns.view({columns.size(0) * columns.size(1), columns.size(2)}); + gradOutput = gradOutput.view( + {gradOutput.size(0), gradOutput.size(1) * gradOutput.size(2), + gradOutput.size(3), gradOutput.size(4), gradOutput.size(5)}); + + deformable_col2im_coord(columns, input[elt], offset[elt], nInputPlane, + inputHeight, inputWidth, kH, kW, padH, padW, dH, dW, + dilationH, dilationW, im2col_step, deformable_group, + gradOffset[elt]); + + deformable_col2im(columns, offset[elt], nInputPlane, inputHeight, + inputWidth, kH, kW, padH, padW, dH, dW, dilationH, + dilationW, im2col_step, deformable_group, gradInput[elt]); + } + + gradOutput.transpose_(1, 2); + gradOutput = + gradOutput.view({batchSize, nOutputPlane, outputHeight, outputWidth}); + + gradInput = gradInput.view({batchSize, nInputPlane, inputHeight, inputWidth}); + input = input.view({batchSize, nInputPlane, inputHeight, inputWidth}); + gradOffset = gradOffset.view( + {batchSize, deformable_group * 2 * kH * kW, outputHeight, outputWidth}); + offset = offset.view( + {batchSize, deformable_group * 2 * kH * kW, outputHeight, outputWidth}); + + if (batch == 0) { + gradOutput = gradOutput.view({nOutputPlane, outputHeight, outputWidth}); + input = input.view({nInputPlane, inputHeight, inputWidth}); + gradInput = gradInput.view({nInputPlane, inputHeight, inputWidth}); + offset = offset.view({offset.size(1), offset.size(2), offset.size(3)}); + gradOffset = + gradOffset.view({offset.size(1), offset.size(2), offset.size(3)}); + } + + return 1; +} + +int deform_conv_backward_parameters_cuda( + at::Tensor input, at::Tensor offset, at::Tensor gradOutput, + at::Tensor gradWeight, // at::Tensor gradBias, + at::Tensor columns, at::Tensor ones, int kW, int kH, int dW, int dH, + int padW, int padH, int dilationW, int dilationH, int group, + int deformable_group, float scale, int im2col_step) { + // todo: transpose and reshape outGrad + // todo: reshape columns + // todo: add im2col_step as input + + shape_check(input, offset, &gradOutput, gradWeight, kH, kW, dH, dW, padH, + padW, dilationH, dilationW, group, deformable_group); + at::DeviceGuard guard(input.device()); + + input = input.contiguous(); + offset = offset.contiguous(); + gradOutput = gradOutput.contiguous(); + + int batch = 1; + + if (input.ndimension() == 3) { + // Force batch + batch = 0; + input = input.view( + at::IntList({1, input.size(0), input.size(1), input.size(2)})); + gradOutput = gradOutput.view( + {1, gradOutput.size(0), gradOutput.size(1), gradOutput.size(2)}); + } + + long batchSize = input.size(0); + long nInputPlane = input.size(1); + long inputHeight = input.size(2); + long inputWidth = input.size(3); + + long nOutputPlane = gradWeight.size(0); + + long outputWidth = + (inputWidth + 2 * padW - (dilationW * (kW - 1) + 1)) / dW + 1; + long outputHeight = + (inputHeight + 2 * padH - (dilationH * (kH - 1) + 1)) / dH + 1; + + TORCH_CHECK((offset.size(0) == batchSize), "invalid batch size of offset"); + + columns = at::zeros( + {nInputPlane * kW * kH, im2col_step * outputHeight * outputWidth}, + input.options()); + + gradOutput = gradOutput.view({batchSize / im2col_step, im2col_step, + nOutputPlane, outputHeight, outputWidth}); + gradOutput.transpose_(1, 2); + + at::Tensor gradOutputBuffer = at::zeros_like(gradOutput); + gradOutputBuffer = + gradOutputBuffer.view({batchSize / im2col_step, nOutputPlane, im2col_step, + outputHeight, outputWidth}); + gradOutputBuffer.copy_(gradOutput); + gradOutputBuffer = + gradOutputBuffer.view({batchSize / im2col_step, nOutputPlane, + im2col_step * outputHeight, outputWidth}); + + gradOutput.transpose_(1, 2); + gradOutput = + gradOutput.view({batchSize, nOutputPlane, outputHeight, outputWidth}); + + input = input.view({batchSize / im2col_step, im2col_step, nInputPlane, + inputHeight, inputWidth}); + offset = + offset.view({batchSize / im2col_step, im2col_step, + deformable_group * 2 * kH * kW, outputHeight, outputWidth}); + + for (int elt = 0; elt < batchSize / im2col_step; elt++) { + deformable_im2col(input[elt], offset[elt], nInputPlane, inputHeight, + inputWidth, kH, kW, padH, padW, dH, dW, dilationH, + dilationW, im2col_step, deformable_group, columns); + + // divide into group + gradOutputBuffer = gradOutputBuffer.view( + {gradOutputBuffer.size(0), group, gradOutputBuffer.size(1) / group, + gradOutputBuffer.size(2), gradOutputBuffer.size(3)}); + columns = columns.view({group, columns.size(0) / group, columns.size(1)}); + gradWeight = + gradWeight.view({group, gradWeight.size(0) / group, gradWeight.size(1), + gradWeight.size(2), gradWeight.size(3)}); + + for (int g = 0; g < group; g++) { + gradWeight[g] = gradWeight[g] + .flatten(1) + .addmm_(gradOutputBuffer[elt][g].flatten(1), + columns[g].transpose(1, 0), 1.0, scale) + .view_as(gradWeight[g]); + } + gradOutputBuffer = gradOutputBuffer.view( + {gradOutputBuffer.size(0), + gradOutputBuffer.size(1) * gradOutputBuffer.size(2), + gradOutputBuffer.size(3), gradOutputBuffer.size(4)}); + columns = + columns.view({columns.size(0) * columns.size(1), columns.size(2)}); + gradWeight = gradWeight.view({gradWeight.size(0) * gradWeight.size(1), + gradWeight.size(2), gradWeight.size(3), + gradWeight.size(4)}); + } + + input = input.view({batchSize, nInputPlane, inputHeight, inputWidth}); + offset = offset.view( + {batchSize, deformable_group * 2 * kH * kW, outputHeight, outputWidth}); + + if (batch == 0) { + gradOutput = gradOutput.view({nOutputPlane, outputHeight, outputWidth}); + input = input.view({nInputPlane, inputHeight, inputWidth}); + } + + return 1; +} + +void modulated_deform_conv_cuda_forward( + at::Tensor input, at::Tensor weight, at::Tensor bias, at::Tensor ones, + at::Tensor offset, at::Tensor mask, at::Tensor output, at::Tensor columns, + int kernel_h, int kernel_w, const int stride_h, const int stride_w, + const int pad_h, const int pad_w, const int dilation_h, + const int dilation_w, const int group, const int deformable_group, + const bool with_bias) { + TORCH_CHECK(input.is_contiguous(), "input tensor has to be contiguous"); + TORCH_CHECK(weight.is_contiguous(), "weight tensor has to be contiguous"); + at::DeviceGuard guard(input.device()); + + const int batch = input.size(0); + const int channels = input.size(1); + const int height = input.size(2); + const int width = input.size(3); + + const int channels_out = weight.size(0); + const int channels_kernel = weight.size(1); + const int kernel_h_ = weight.size(2); + const int kernel_w_ = weight.size(3); + + if (kernel_h_ != kernel_h || kernel_w_ != kernel_w) + AT_ERROR("Input shape and kernel shape won't match: (%d x %d vs %d x %d).", + kernel_h_, kernel_w, kernel_h_, kernel_w_); + if (channels != channels_kernel * group) + AT_ERROR("Input shape and kernel channels won't match: (%d vs %d).", + channels, channels_kernel * group); + + const int height_out = + (height + 2 * pad_h - (dilation_h * (kernel_h - 1) + 1)) / stride_h + 1; + const int width_out = + (width + 2 * pad_w - (dilation_w * (kernel_w - 1) + 1)) / stride_w + 1; + + if (ones.ndimension() != 2 || + ones.size(0) * ones.size(1) < height_out * width_out) { + // Resize plane and fill with ones... + ones = at::ones({height_out, width_out}, input.options()); + } + + // resize output + output = output.view({batch, channels_out, height_out, width_out}).zero_(); + // resize temporary columns + columns = + at::zeros({channels * kernel_h * kernel_w, 1 * height_out * width_out}, + input.options()); + + output = output.view({output.size(0), group, output.size(1) / group, + output.size(2), output.size(3)}); + + for (int b = 0; b < batch; b++) { + modulated_deformable_im2col_cuda( + input[b], offset[b], mask[b], 1, channels, height, width, height_out, + width_out, kernel_h, kernel_w, pad_h, pad_w, stride_h, stride_w, + dilation_h, dilation_w, deformable_group, columns); + + // divide into group + weight = weight.view({group, weight.size(0) / group, weight.size(1), + weight.size(2), weight.size(3)}); + columns = columns.view({group, columns.size(0) / group, columns.size(1)}); + + for (int g = 0; g < group; g++) { + output[b][g] = output[b][g] + .flatten(1) + .addmm_(weight[g].flatten(1), columns[g]) + .view_as(output[b][g]); + } + + weight = weight.view({weight.size(0) * weight.size(1), weight.size(2), + weight.size(3), weight.size(4)}); + columns = + columns.view({columns.size(0) * columns.size(1), columns.size(2)}); + } + + output = output.view({output.size(0), output.size(1) * output.size(2), + output.size(3), output.size(4)}); + + if (with_bias) { + output += bias.view({1, bias.size(0), 1, 1}); + } +} + +void modulated_deform_conv_cuda_backward( + at::Tensor input, at::Tensor weight, at::Tensor bias, at::Tensor ones, + at::Tensor offset, at::Tensor mask, at::Tensor columns, + at::Tensor grad_input, at::Tensor grad_weight, at::Tensor grad_bias, + at::Tensor grad_offset, at::Tensor grad_mask, at::Tensor grad_output, + int kernel_h, int kernel_w, int stride_h, int stride_w, int pad_h, + int pad_w, int dilation_h, int dilation_w, int group, int deformable_group, + const bool with_bias) { + TORCH_CHECK(input.is_contiguous(), "input tensor has to be contiguous"); + TORCH_CHECK(weight.is_contiguous(), "weight tensor has to be contiguous"); + at::DeviceGuard guard(input.device()); + + const int batch = input.size(0); + const int channels = input.size(1); + const int height = input.size(2); + const int width = input.size(3); + + const int channels_kernel = weight.size(1); + const int kernel_h_ = weight.size(2); + const int kernel_w_ = weight.size(3); + if (kernel_h_ != kernel_h || kernel_w_ != kernel_w) + AT_ERROR("Input shape and kernel shape won't match: (%d x %d vs %d x %d).", + kernel_h_, kernel_w, kernel_h_, kernel_w_); + if (channels != channels_kernel * group) + AT_ERROR("Input shape and kernel channels won't match: (%d vs %d).", + channels, channels_kernel * group); + + const int height_out = + (height + 2 * pad_h - (dilation_h * (kernel_h - 1) + 1)) / stride_h + 1; + const int width_out = + (width + 2 * pad_w - (dilation_w * (kernel_w - 1) + 1)) / stride_w + 1; + + if (ones.ndimension() != 2 || + ones.size(0) * ones.size(1) < height_out * width_out) { + // Resize plane and fill with ones... + ones = at::ones({height_out, width_out}, input.options()); + } + + grad_input = grad_input.view({batch, channels, height, width}); + columns = at::zeros({channels * kernel_h * kernel_w, height_out * width_out}, + input.options()); + + grad_output = + grad_output.view({grad_output.size(0), group, grad_output.size(1) / group, + grad_output.size(2), grad_output.size(3)}); + + for (int b = 0; b < batch; b++) { + // divide int group + columns = columns.view({group, columns.size(0) / group, columns.size(1)}); + weight = weight.view({group, weight.size(0) / group, weight.size(1), + weight.size(2), weight.size(3)}); + + for (int g = 0; g < group; g++) { + columns[g].addmm_(weight[g].flatten(1).transpose(0, 1), + grad_output[b][g].flatten(1), 0.0f, 1.0f); + } + + columns = + columns.view({columns.size(0) * columns.size(1), columns.size(2)}); + weight = weight.view({weight.size(0) * weight.size(1), weight.size(2), + weight.size(3), weight.size(4)}); + + // gradient w.r.t. input coordinate data + modulated_deformable_col2im_coord_cuda( + columns, input[b], offset[b], mask[b], 1, channels, height, width, + height_out, width_out, kernel_h, kernel_w, pad_h, pad_w, stride_h, + stride_w, dilation_h, dilation_w, deformable_group, grad_offset[b], + grad_mask[b]); + // gradient w.r.t. input data + modulated_deformable_col2im_cuda( + columns, offset[b], mask[b], 1, channels, height, width, height_out, + width_out, kernel_h, kernel_w, pad_h, pad_w, stride_h, stride_w, + dilation_h, dilation_w, deformable_group, grad_input[b]); + + // gradient w.r.t. weight, dWeight should accumulate across the batch and + // group + modulated_deformable_im2col_cuda( + input[b], offset[b], mask[b], 1, channels, height, width, height_out, + width_out, kernel_h, kernel_w, pad_h, pad_w, stride_h, stride_w, + dilation_h, dilation_w, deformable_group, columns); + + columns = columns.view({group, columns.size(0) / group, columns.size(1)}); + grad_weight = grad_weight.view({group, grad_weight.size(0) / group, + grad_weight.size(1), grad_weight.size(2), + grad_weight.size(3)}); + if (with_bias) + grad_bias = grad_bias.view({group, grad_bias.size(0) / group}); + + for (int g = 0; g < group; g++) { + grad_weight[g] = + grad_weight[g] + .flatten(1) + .addmm_(grad_output[b][g].flatten(1), columns[g].transpose(0, 1)) + .view_as(grad_weight[g]); + if (with_bias) { + grad_bias[g] = + grad_bias[g] + .view({-1, 1}) + .addmm_(grad_output[b][g].flatten(1), ones.view({-1, 1})) + .view(-1); + } + } + + columns = + columns.view({columns.size(0) * columns.size(1), columns.size(2)}); + grad_weight = grad_weight.view({grad_weight.size(0) * grad_weight.size(1), + grad_weight.size(2), grad_weight.size(3), + grad_weight.size(4)}); + if (with_bias) + grad_bias = grad_bias.view({grad_bias.size(0) * grad_bias.size(1)}); + } + grad_output = grad_output.view({grad_output.size(0) * grad_output.size(1), + grad_output.size(2), grad_output.size(3), + grad_output.size(4)}); +} diff --git a/IIR-Lab/models/archs/dcn/src/deform_conv_cuda_kernel.cu b/IIR-Lab/models/archs/dcn/src/deform_conv_cuda_kernel.cu new file mode 100644 index 0000000000000000000000000000000000000000..98752dccf8c58817ca1a952554dd3f33188a2d34 --- /dev/null +++ b/IIR-Lab/models/archs/dcn/src/deform_conv_cuda_kernel.cu @@ -0,0 +1,867 @@ +/*! + ******************* BEGIN Caffe Copyright Notice and Disclaimer **************** + * + * COPYRIGHT + * + * All contributions by the University of California: + * Copyright (c) 2014-2017 The Regents of the University of California (Regents) + * All rights reserved. + * + * All other contributions: + * Copyright (c) 2014-2017, the respective contributors + * All rights reserved. + * + * Caffe uses a shared copyright model: each contributor holds copyright over + * their contributions to Caffe. The project versioning records all such + * contribution and copyright details. If a contributor wants to further mark + * their specific copyright on a particular contribution, they should indicate + * their copyright solely in the commit message of the change when it is + * committed. + * + * LICENSE + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions are met: + * + * 1. Redistributions of source code must retain the above copyright notice, this + * list of conditions and the following disclaimer. + * 2. Redistributions in binary form must reproduce the above copyright notice, + * this list of conditions and the following disclaimer in the documentation + * and/or other materials provided with the distribution. + * + * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND + * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED + * WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE + * DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE LIABLE FOR + * ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES + * (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; + * LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND + * ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT + * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS + * SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + * + * CONTRIBUTION AGREEMENT + * + * By contributing to the BVLC/caffe repository through pull-request, comment, + * or otherwise, the contributor releases their content to the + * license and copyright terms herein. + * + ***************** END Caffe Copyright Notice and Disclaimer ******************** + * + * Copyright (c) 2018 Microsoft + * Licensed under The MIT License [see LICENSE for details] + * \file modulated_deformable_im2col.cuh + * \brief Function definitions of converting an image to + * column matrix based on kernel, padding, dilation, and offset. + * These functions are mainly used in deformable convolution operators. + * \ref: https://arxiv.org/abs/1703.06211 + * \author Yuwen Xiong, Haozhi Qi, Jifeng Dai, Xizhou Zhu, Han Hu, Dazhi Cheng + */ + +// modified from https://github.com/chengdazhi/Deformable-Convolution-V2-PyTorch/blob/mmdetection/mmdet/ops/dcn/src/deform_conv_cuda_kernel.cu + +#include +#include +#include +#include +#include +#include + +using namespace at; + +#define CUDA_KERNEL_LOOP(i, n) \ + for (int i = blockIdx.x * blockDim.x + threadIdx.x; i < (n); \ + i += blockDim.x * gridDim.x) + +const int CUDA_NUM_THREADS = 1024; +const int kMaxGridNum = 65535; + +inline int GET_BLOCKS(const int N) +{ + return std::min(kMaxGridNum, (N + CUDA_NUM_THREADS - 1) / CUDA_NUM_THREADS); +} + +template +__device__ scalar_t deformable_im2col_bilinear(const scalar_t *bottom_data, const int data_width, + const int height, const int width, scalar_t h, scalar_t w) +{ + + int h_low = floor(h); + int w_low = floor(w); + int h_high = h_low + 1; + int w_high = w_low + 1; + + scalar_t lh = h - h_low; + scalar_t lw = w - w_low; + scalar_t hh = 1 - lh, hw = 1 - lw; + + scalar_t v1 = 0; + if (h_low >= 0 && w_low >= 0) + v1 = bottom_data[h_low * data_width + w_low]; + scalar_t v2 = 0; + if (h_low >= 0 && w_high <= width - 1) + v2 = bottom_data[h_low * data_width + w_high]; + scalar_t v3 = 0; + if (h_high <= height - 1 && w_low >= 0) + v3 = bottom_data[h_high * data_width + w_low]; + scalar_t v4 = 0; + if (h_high <= height - 1 && w_high <= width - 1) + v4 = bottom_data[h_high * data_width + w_high]; + + scalar_t w1 = hh * hw, w2 = hh * lw, w3 = lh * hw, w4 = lh * lw; + + scalar_t val = (w1 * v1 + w2 * v2 + w3 * v3 + w4 * v4); + return val; +} + +template +__device__ scalar_t get_gradient_weight(scalar_t argmax_h, scalar_t argmax_w, + const int h, const int w, const int height, const int width) +{ + + if (argmax_h <= -1 || argmax_h >= height || argmax_w <= -1 || argmax_w >= width) + { + //empty + return 0; + } + + int argmax_h_low = floor(argmax_h); + int argmax_w_low = floor(argmax_w); + int argmax_h_high = argmax_h_low + 1; + int argmax_w_high = argmax_w_low + 1; + + scalar_t weight = 0; + if (h == argmax_h_low && w == argmax_w_low) + weight = (h + 1 - argmax_h) * (w + 1 - argmax_w); + if (h == argmax_h_low && w == argmax_w_high) + weight = (h + 1 - argmax_h) * (argmax_w + 1 - w); + if (h == argmax_h_high && w == argmax_w_low) + weight = (argmax_h + 1 - h) * (w + 1 - argmax_w); + if (h == argmax_h_high && w == argmax_w_high) + weight = (argmax_h + 1 - h) * (argmax_w + 1 - w); + return weight; +} + +template +__device__ scalar_t get_coordinate_weight(scalar_t argmax_h, scalar_t argmax_w, + const int height, const int width, const scalar_t *im_data, + const int data_width, const int bp_dir) +{ + + if (argmax_h <= -1 || argmax_h >= height || argmax_w <= -1 || argmax_w >= width) + { + //empty + return 0; + } + + int argmax_h_low = floor(argmax_h); + int argmax_w_low = floor(argmax_w); + int argmax_h_high = argmax_h_low + 1; + int argmax_w_high = argmax_w_low + 1; + + scalar_t weight = 0; + + if (bp_dir == 0) + { + if (argmax_h_low >= 0 && argmax_w_low >= 0) + weight += -1 * (argmax_w_low + 1 - argmax_w) * im_data[argmax_h_low * data_width + argmax_w_low]; + if (argmax_h_low >= 0 && argmax_w_high <= width - 1) + weight += -1 * (argmax_w - argmax_w_low) * im_data[argmax_h_low * data_width + argmax_w_high]; + if (argmax_h_high <= height - 1 && argmax_w_low >= 0) + weight += (argmax_w_low + 1 - argmax_w) * im_data[argmax_h_high * data_width + argmax_w_low]; + if (argmax_h_high <= height - 1 && argmax_w_high <= width - 1) + weight += (argmax_w - argmax_w_low) * im_data[argmax_h_high * data_width + argmax_w_high]; + } + else if (bp_dir == 1) + { + if (argmax_h_low >= 0 && argmax_w_low >= 0) + weight += -1 * (argmax_h_low + 1 - argmax_h) * im_data[argmax_h_low * data_width + argmax_w_low]; + if (argmax_h_low >= 0 && argmax_w_high <= width - 1) + weight += (argmax_h_low + 1 - argmax_h) * im_data[argmax_h_low * data_width + argmax_w_high]; + if (argmax_h_high <= height - 1 && argmax_w_low >= 0) + weight += -1 * (argmax_h - argmax_h_low) * im_data[argmax_h_high * data_width + argmax_w_low]; + if (argmax_h_high <= height - 1 && argmax_w_high <= width - 1) + weight += (argmax_h - argmax_h_low) * im_data[argmax_h_high * data_width + argmax_w_high]; + } + + return weight; +} + +template +__global__ void deformable_im2col_gpu_kernel(const int n, const scalar_t *data_im, const scalar_t *data_offset, + const int height, const int width, const int kernel_h, const int kernel_w, + const int pad_h, const int pad_w, const int stride_h, const int stride_w, + const int dilation_h, const int dilation_w, const int channel_per_deformable_group, + const int batch_size, const int num_channels, const int deformable_group, + const int height_col, const int width_col, + scalar_t *data_col) +{ + CUDA_KERNEL_LOOP(index, n) + { + // index index of output matrix + const int w_col = index % width_col; + const int h_col = (index / width_col) % height_col; + const int b_col = (index / width_col / height_col) % batch_size; + const int c_im = (index / width_col / height_col) / batch_size; + const int c_col = c_im * kernel_h * kernel_w; + + // compute deformable group index + const int deformable_group_index = c_im / channel_per_deformable_group; + + const int h_in = h_col * stride_h - pad_h; + const int w_in = w_col * stride_w - pad_w; + scalar_t *data_col_ptr = data_col + ((c_col * batch_size + b_col) * height_col + h_col) * width_col + w_col; + //const scalar_t* data_im_ptr = data_im + ((b_col * num_channels + c_im) * height + h_in) * width + w_in; + const scalar_t *data_im_ptr = data_im + (b_col * num_channels + c_im) * height * width; + const scalar_t *data_offset_ptr = data_offset + (b_col * deformable_group + deformable_group_index) * 2 * kernel_h * kernel_w * height_col * width_col; + + for (int i = 0; i < kernel_h; ++i) + { + for (int j = 0; j < kernel_w; ++j) + { + const int data_offset_h_ptr = ((2 * (i * kernel_w + j)) * height_col + h_col) * width_col + w_col; + const int data_offset_w_ptr = ((2 * (i * kernel_w + j) + 1) * height_col + h_col) * width_col + w_col; + const scalar_t offset_h = data_offset_ptr[data_offset_h_ptr]; + const scalar_t offset_w = data_offset_ptr[data_offset_w_ptr]; + scalar_t val = static_cast(0); + const scalar_t h_im = h_in + i * dilation_h + offset_h; + const scalar_t w_im = w_in + j * dilation_w + offset_w; + if (h_im > -1 && w_im > -1 && h_im < height && w_im < width) + { + //const scalar_t map_h = i * dilation_h + offset_h; + //const scalar_t map_w = j * dilation_w + offset_w; + //const int cur_height = height - h_in; + //const int cur_width = width - w_in; + //val = deformable_im2col_bilinear(data_im_ptr, width, cur_height, cur_width, map_h, map_w); + val = deformable_im2col_bilinear(data_im_ptr, width, height, width, h_im, w_im); + } + *data_col_ptr = val; + data_col_ptr += batch_size * height_col * width_col; + } + } + } +} + +void deformable_im2col( + const at::Tensor data_im, const at::Tensor data_offset, const int channels, + const int height, const int width, const int ksize_h, const int ksize_w, + const int pad_h, const int pad_w, const int stride_h, const int stride_w, + const int dilation_h, const int dilation_w, const int parallel_imgs, + const int deformable_group, at::Tensor data_col) +{ + // num_axes should be smaller than block size + // todo: check parallel_imgs is correctly passed in + int height_col = (height + 2 * pad_h - (dilation_h * (ksize_h - 1) + 1)) / stride_h + 1; + int width_col = (width + 2 * pad_w - (dilation_w * (ksize_w - 1) + 1)) / stride_w + 1; + int num_kernels = channels * height_col * width_col * parallel_imgs; + int channel_per_deformable_group = channels / deformable_group; + + AT_DISPATCH_FLOATING_TYPES_AND_HALF( + data_im.scalar_type(), "deformable_im2col_gpu", ([&] { + const scalar_t *data_im_ = data_im.data_ptr(); + const scalar_t *data_offset_ = data_offset.data_ptr(); + scalar_t *data_col_ = data_col.data_ptr(); + + deformable_im2col_gpu_kernel<<>>( + num_kernels, data_im_, data_offset_, height, width, ksize_h, ksize_w, + pad_h, pad_w, stride_h, stride_w, dilation_h, dilation_w, + channel_per_deformable_group, parallel_imgs, channels, deformable_group, + height_col, width_col, data_col_); + })); + + cudaError_t err = cudaGetLastError(); + if (err != cudaSuccess) + { + printf("error in deformable_im2col: %s\n", cudaGetErrorString(err)); + } +} + +template +__global__ void deformable_col2im_gpu_kernel( + const int n, const scalar_t *data_col, const scalar_t *data_offset, + const int channels, const int height, const int width, + const int kernel_h, const int kernel_w, + const int pad_h, const int pad_w, + const int stride_h, const int stride_w, + const int dilation_h, const int dilation_w, + const int channel_per_deformable_group, + const int batch_size, const int deformable_group, + const int height_col, const int width_col, + scalar_t *grad_im) +{ + CUDA_KERNEL_LOOP(index, n) + { + const int j = (index / width_col / height_col / batch_size) % kernel_w; + const int i = (index / width_col / height_col / batch_size / kernel_w) % kernel_h; + const int c = index / width_col / height_col / batch_size / kernel_w / kernel_h; + // compute the start and end of the output + + const int deformable_group_index = c / channel_per_deformable_group; + + int w_out = index % width_col; + int h_out = (index / width_col) % height_col; + int b = (index / width_col / height_col) % batch_size; + int w_in = w_out * stride_w - pad_w; + int h_in = h_out * stride_h - pad_h; + + const scalar_t *data_offset_ptr = data_offset + (b * deformable_group + deformable_group_index) * + 2 * kernel_h * kernel_w * height_col * width_col; + const int data_offset_h_ptr = ((2 * (i * kernel_w + j)) * height_col + h_out) * width_col + w_out; + const int data_offset_w_ptr = ((2 * (i * kernel_w + j) + 1) * height_col + h_out) * width_col + w_out; + const scalar_t offset_h = data_offset_ptr[data_offset_h_ptr]; + const scalar_t offset_w = data_offset_ptr[data_offset_w_ptr]; + const scalar_t cur_inv_h_data = h_in + i * dilation_h + offset_h; + const scalar_t cur_inv_w_data = w_in + j * dilation_w + offset_w; + + const scalar_t cur_top_grad = data_col[index]; + const int cur_h = (int)cur_inv_h_data; + const int cur_w = (int)cur_inv_w_data; + for (int dy = -2; dy <= 2; dy++) + { + for (int dx = -2; dx <= 2; dx++) + { + if (cur_h + dy >= 0 && cur_h + dy < height && + cur_w + dx >= 0 && cur_w + dx < width && + abs(cur_inv_h_data - (cur_h + dy)) < 1 && + abs(cur_inv_w_data - (cur_w + dx)) < 1) + { + int cur_bottom_grad_pos = ((b * channels + c) * height + cur_h + dy) * width + cur_w + dx; + scalar_t weight = get_gradient_weight(cur_inv_h_data, cur_inv_w_data, cur_h + dy, cur_w + dx, height, width); + atomicAdd(grad_im + cur_bottom_grad_pos, weight * cur_top_grad); + } + } + } + } +} + +void deformable_col2im( + const at::Tensor data_col, const at::Tensor data_offset, const int channels, + const int height, const int width, const int ksize_h, + const int ksize_w, const int pad_h, const int pad_w, + const int stride_h, const int stride_w, + const int dilation_h, const int dilation_w, + const int parallel_imgs, const int deformable_group, + at::Tensor grad_im) +{ + + // todo: make sure parallel_imgs is passed in correctly + int height_col = (height + 2 * pad_h - (dilation_h * (ksize_h - 1) + 1)) / stride_h + 1; + int width_col = (width + 2 * pad_w - (dilation_w * (ksize_w - 1) + 1)) / stride_w + 1; + int num_kernels = channels * ksize_h * ksize_w * height_col * width_col * parallel_imgs; + int channel_per_deformable_group = channels / deformable_group; + + AT_DISPATCH_FLOATING_TYPES_AND_HALF( + data_col.scalar_type(), "deformable_col2im_gpu", ([&] { + const scalar_t *data_col_ = data_col.data_ptr(); + const scalar_t *data_offset_ = data_offset.data_ptr(); + scalar_t *grad_im_ = grad_im.data_ptr(); + + deformable_col2im_gpu_kernel<<>>( + num_kernels, data_col_, data_offset_, channels, height, width, ksize_h, + ksize_w, pad_h, pad_w, stride_h, stride_w, + dilation_h, dilation_w, channel_per_deformable_group, + parallel_imgs, deformable_group, height_col, width_col, grad_im_); + })); + + cudaError_t err = cudaGetLastError(); + if (err != cudaSuccess) + { + printf("error in deformable_col2im: %s\n", cudaGetErrorString(err)); + } +} + +template +__global__ void deformable_col2im_coord_gpu_kernel(const int n, const scalar_t *data_col, + const scalar_t *data_im, const scalar_t *data_offset, + const int channels, const int height, const int width, + const int kernel_h, const int kernel_w, + const int pad_h, const int pad_w, + const int stride_h, const int stride_w, + const int dilation_h, const int dilation_w, + const int channel_per_deformable_group, + const int batch_size, const int offset_channels, const int deformable_group, + const int height_col, const int width_col, scalar_t *grad_offset) +{ + CUDA_KERNEL_LOOP(index, n) + { + scalar_t val = 0; + int w = index % width_col; + int h = (index / width_col) % height_col; + int c = (index / width_col / height_col) % offset_channels; + int b = (index / width_col / height_col) / offset_channels; + // compute the start and end of the output + + const int deformable_group_index = c / (2 * kernel_h * kernel_w); + const int col_step = kernel_h * kernel_w; + int cnt = 0; + const scalar_t *data_col_ptr = data_col + deformable_group_index * channel_per_deformable_group * + batch_size * width_col * height_col; + const scalar_t *data_im_ptr = data_im + (b * deformable_group + deformable_group_index) * + channel_per_deformable_group / kernel_h / kernel_w * height * width; + const scalar_t *data_offset_ptr = data_offset + (b * deformable_group + deformable_group_index) * 2 * + kernel_h * kernel_w * height_col * width_col; + + const int offset_c = c - deformable_group_index * 2 * kernel_h * kernel_w; + + for (int col_c = (offset_c / 2); col_c < channel_per_deformable_group; col_c += col_step) + { + const int col_pos = (((col_c * batch_size + b) * height_col) + h) * width_col + w; + const int bp_dir = offset_c % 2; + + int j = (col_pos / width_col / height_col / batch_size) % kernel_w; + int i = (col_pos / width_col / height_col / batch_size / kernel_w) % kernel_h; + int w_out = col_pos % width_col; + int h_out = (col_pos / width_col) % height_col; + int w_in = w_out * stride_w - pad_w; + int h_in = h_out * stride_h - pad_h; + const int data_offset_h_ptr = (((2 * (i * kernel_w + j)) * height_col + h_out) * width_col + w_out); + const int data_offset_w_ptr = (((2 * (i * kernel_w + j) + 1) * height_col + h_out) * width_col + w_out); + const scalar_t offset_h = data_offset_ptr[data_offset_h_ptr]; + const scalar_t offset_w = data_offset_ptr[data_offset_w_ptr]; + scalar_t inv_h = h_in + i * dilation_h + offset_h; + scalar_t inv_w = w_in + j * dilation_w + offset_w; + if (inv_h <= -1 || inv_w <= -1 || inv_h >= height || inv_w >= width) + { + inv_h = inv_w = -2; + } + const scalar_t weight = get_coordinate_weight( + inv_h, inv_w, + height, width, data_im_ptr + cnt * height * width, width, bp_dir); + val += weight * data_col_ptr[col_pos]; + cnt += 1; + } + + grad_offset[index] = val; + } +} + +void deformable_col2im_coord( + const at::Tensor data_col, const at::Tensor data_im, const at::Tensor data_offset, + const int channels, const int height, const int width, const int ksize_h, + const int ksize_w, const int pad_h, const int pad_w, const int stride_h, + const int stride_w, const int dilation_h, const int dilation_w, + const int parallel_imgs, const int deformable_group, at::Tensor grad_offset) +{ + + int height_col = (height + 2 * pad_h - (dilation_h * (ksize_h - 1) + 1)) / stride_h + 1; + int width_col = (width + 2 * pad_w - (dilation_w * (ksize_w - 1) + 1)) / stride_w + 1; + int num_kernels = height_col * width_col * 2 * ksize_h * ksize_w * deformable_group * parallel_imgs; + int channel_per_deformable_group = channels * ksize_h * ksize_w / deformable_group; + + AT_DISPATCH_FLOATING_TYPES_AND_HALF( + data_col.scalar_type(), "deformable_col2im_coord_gpu", ([&] { + const scalar_t *data_col_ = data_col.data_ptr(); + const scalar_t *data_im_ = data_im.data_ptr(); + const scalar_t *data_offset_ = data_offset.data_ptr(); + scalar_t *grad_offset_ = grad_offset.data_ptr(); + + deformable_col2im_coord_gpu_kernel<<>>( + num_kernels, data_col_, data_im_, data_offset_, channels, height, width, + ksize_h, ksize_w, pad_h, pad_w, stride_h, stride_w, + dilation_h, dilation_w, channel_per_deformable_group, + parallel_imgs, 2 * ksize_h * ksize_w * deformable_group, deformable_group, + height_col, width_col, grad_offset_); + })); +} + +template +__device__ scalar_t dmcn_im2col_bilinear(const scalar_t *bottom_data, const int data_width, + const int height, const int width, scalar_t h, scalar_t w) +{ + int h_low = floor(h); + int w_low = floor(w); + int h_high = h_low + 1; + int w_high = w_low + 1; + + scalar_t lh = h - h_low; + scalar_t lw = w - w_low; + scalar_t hh = 1 - lh, hw = 1 - lw; + + scalar_t v1 = 0; + if (h_low >= 0 && w_low >= 0) + v1 = bottom_data[h_low * data_width + w_low]; + scalar_t v2 = 0; + if (h_low >= 0 && w_high <= width - 1) + v2 = bottom_data[h_low * data_width + w_high]; + scalar_t v3 = 0; + if (h_high <= height - 1 && w_low >= 0) + v3 = bottom_data[h_high * data_width + w_low]; + scalar_t v4 = 0; + if (h_high <= height - 1 && w_high <= width - 1) + v4 = bottom_data[h_high * data_width + w_high]; + + scalar_t w1 = hh * hw, w2 = hh * lw, w3 = lh * hw, w4 = lh * lw; + + scalar_t val = (w1 * v1 + w2 * v2 + w3 * v3 + w4 * v4); + return val; +} + +template +__device__ scalar_t dmcn_get_gradient_weight(scalar_t argmax_h, scalar_t argmax_w, + const int h, const int w, const int height, const int width) +{ + if (argmax_h <= -1 || argmax_h >= height || argmax_w <= -1 || argmax_w >= width) + { + //empty + return 0; + } + + int argmax_h_low = floor(argmax_h); + int argmax_w_low = floor(argmax_w); + int argmax_h_high = argmax_h_low + 1; + int argmax_w_high = argmax_w_low + 1; + + scalar_t weight = 0; + if (h == argmax_h_low && w == argmax_w_low) + weight = (h + 1 - argmax_h) * (w + 1 - argmax_w); + if (h == argmax_h_low && w == argmax_w_high) + weight = (h + 1 - argmax_h) * (argmax_w + 1 - w); + if (h == argmax_h_high && w == argmax_w_low) + weight = (argmax_h + 1 - h) * (w + 1 - argmax_w); + if (h == argmax_h_high && w == argmax_w_high) + weight = (argmax_h + 1 - h) * (argmax_w + 1 - w); + return weight; +} + +template +__device__ scalar_t dmcn_get_coordinate_weight(scalar_t argmax_h, scalar_t argmax_w, + const int height, const int width, const scalar_t *im_data, + const int data_width, const int bp_dir) +{ + if (argmax_h <= -1 || argmax_h >= height || argmax_w <= -1 || argmax_w >= width) + { + //empty + return 0; + } + + int argmax_h_low = floor(argmax_h); + int argmax_w_low = floor(argmax_w); + int argmax_h_high = argmax_h_low + 1; + int argmax_w_high = argmax_w_low + 1; + + scalar_t weight = 0; + + if (bp_dir == 0) + { + if (argmax_h_low >= 0 && argmax_w_low >= 0) + weight += -1 * (argmax_w_low + 1 - argmax_w) * im_data[argmax_h_low * data_width + argmax_w_low]; + if (argmax_h_low >= 0 && argmax_w_high <= width - 1) + weight += -1 * (argmax_w - argmax_w_low) * im_data[argmax_h_low * data_width + argmax_w_high]; + if (argmax_h_high <= height - 1 && argmax_w_low >= 0) + weight += (argmax_w_low + 1 - argmax_w) * im_data[argmax_h_high * data_width + argmax_w_low]; + if (argmax_h_high <= height - 1 && argmax_w_high <= width - 1) + weight += (argmax_w - argmax_w_low) * im_data[argmax_h_high * data_width + argmax_w_high]; + } + else if (bp_dir == 1) + { + if (argmax_h_low >= 0 && argmax_w_low >= 0) + weight += -1 * (argmax_h_low + 1 - argmax_h) * im_data[argmax_h_low * data_width + argmax_w_low]; + if (argmax_h_low >= 0 && argmax_w_high <= width - 1) + weight += (argmax_h_low + 1 - argmax_h) * im_data[argmax_h_low * data_width + argmax_w_high]; + if (argmax_h_high <= height - 1 && argmax_w_low >= 0) + weight += -1 * (argmax_h - argmax_h_low) * im_data[argmax_h_high * data_width + argmax_w_low]; + if (argmax_h_high <= height - 1 && argmax_w_high <= width - 1) + weight += (argmax_h - argmax_h_low) * im_data[argmax_h_high * data_width + argmax_w_high]; + } + + return weight; +} + +template +__global__ void modulated_deformable_im2col_gpu_kernel(const int n, + const scalar_t *data_im, const scalar_t *data_offset, const scalar_t *data_mask, + const int height, const int width, const int kernel_h, const int kernel_w, + const int pad_h, const int pad_w, + const int stride_h, const int stride_w, + const int dilation_h, const int dilation_w, + const int channel_per_deformable_group, + const int batch_size, const int num_channels, const int deformable_group, + const int height_col, const int width_col, + scalar_t *data_col) +{ + CUDA_KERNEL_LOOP(index, n) + { + // index index of output matrix + const int w_col = index % width_col; + const int h_col = (index / width_col) % height_col; + const int b_col = (index / width_col / height_col) % batch_size; + const int c_im = (index / width_col / height_col) / batch_size; + const int c_col = c_im * kernel_h * kernel_w; + + // compute deformable group index + const int deformable_group_index = c_im / channel_per_deformable_group; + + const int h_in = h_col * stride_h - pad_h; + const int w_in = w_col * stride_w - pad_w; + + scalar_t *data_col_ptr = data_col + ((c_col * batch_size + b_col) * height_col + h_col) * width_col + w_col; + //const float* data_im_ptr = data_im + ((b_col * num_channels + c_im) * height + h_in) * width + w_in; + const scalar_t *data_im_ptr = data_im + (b_col * num_channels + c_im) * height * width; + const scalar_t *data_offset_ptr = data_offset + (b_col * deformable_group + deformable_group_index) * 2 * kernel_h * kernel_w * height_col * width_col; + + const scalar_t *data_mask_ptr = data_mask + (b_col * deformable_group + deformable_group_index) * kernel_h * kernel_w * height_col * width_col; + + for (int i = 0; i < kernel_h; ++i) + { + for (int j = 0; j < kernel_w; ++j) + { + const int data_offset_h_ptr = ((2 * (i * kernel_w + j)) * height_col + h_col) * width_col + w_col; + const int data_offset_w_ptr = ((2 * (i * kernel_w + j) + 1) * height_col + h_col) * width_col + w_col; + const int data_mask_hw_ptr = ((i * kernel_w + j) * height_col + h_col) * width_col + w_col; + const scalar_t offset_h = data_offset_ptr[data_offset_h_ptr]; + const scalar_t offset_w = data_offset_ptr[data_offset_w_ptr]; + const scalar_t mask = data_mask_ptr[data_mask_hw_ptr]; + scalar_t val = static_cast(0); + const scalar_t h_im = h_in + i * dilation_h + offset_h; + const scalar_t w_im = w_in + j * dilation_w + offset_w; + //if (h_im >= 0 && w_im >= 0 && h_im < height && w_im < width) { + if (h_im > -1 && w_im > -1 && h_im < height && w_im < width) + { + //const float map_h = i * dilation_h + offset_h; + //const float map_w = j * dilation_w + offset_w; + //const int cur_height = height - h_in; + //const int cur_width = width - w_in; + //val = dmcn_im2col_bilinear(data_im_ptr, width, cur_height, cur_width, map_h, map_w); + val = dmcn_im2col_bilinear(data_im_ptr, width, height, width, h_im, w_im); + } + *data_col_ptr = val * mask; + data_col_ptr += batch_size * height_col * width_col; + //data_col_ptr += height_col * width_col; + } + } + } +} + +template +__global__ void modulated_deformable_col2im_gpu_kernel(const int n, + const scalar_t *data_col, const scalar_t *data_offset, const scalar_t *data_mask, + const int channels, const int height, const int width, + const int kernel_h, const int kernel_w, + const int pad_h, const int pad_w, + const int stride_h, const int stride_w, + const int dilation_h, const int dilation_w, + const int channel_per_deformable_group, + const int batch_size, const int deformable_group, + const int height_col, const int width_col, + scalar_t *grad_im) +{ + CUDA_KERNEL_LOOP(index, n) + { + const int j = (index / width_col / height_col / batch_size) % kernel_w; + const int i = (index / width_col / height_col / batch_size / kernel_w) % kernel_h; + const int c = index / width_col / height_col / batch_size / kernel_w / kernel_h; + // compute the start and end of the output + + const int deformable_group_index = c / channel_per_deformable_group; + + int w_out = index % width_col; + int h_out = (index / width_col) % height_col; + int b = (index / width_col / height_col) % batch_size; + int w_in = w_out * stride_w - pad_w; + int h_in = h_out * stride_h - pad_h; + + const scalar_t *data_offset_ptr = data_offset + (b * deformable_group + deformable_group_index) * 2 * kernel_h * kernel_w * height_col * width_col; + const scalar_t *data_mask_ptr = data_mask + (b * deformable_group + deformable_group_index) * kernel_h * kernel_w * height_col * width_col; + const int data_offset_h_ptr = ((2 * (i * kernel_w + j)) * height_col + h_out) * width_col + w_out; + const int data_offset_w_ptr = ((2 * (i * kernel_w + j) + 1) * height_col + h_out) * width_col + w_out; + const int data_mask_hw_ptr = ((i * kernel_w + j) * height_col + h_out) * width_col + w_out; + const scalar_t offset_h = data_offset_ptr[data_offset_h_ptr]; + const scalar_t offset_w = data_offset_ptr[data_offset_w_ptr]; + const scalar_t mask = data_mask_ptr[data_mask_hw_ptr]; + const scalar_t cur_inv_h_data = h_in + i * dilation_h + offset_h; + const scalar_t cur_inv_w_data = w_in + j * dilation_w + offset_w; + + const scalar_t cur_top_grad = data_col[index] * mask; + const int cur_h = (int)cur_inv_h_data; + const int cur_w = (int)cur_inv_w_data; + for (int dy = -2; dy <= 2; dy++) + { + for (int dx = -2; dx <= 2; dx++) + { + if (cur_h + dy >= 0 && cur_h + dy < height && + cur_w + dx >= 0 && cur_w + dx < width && + abs(cur_inv_h_data - (cur_h + dy)) < 1 && + abs(cur_inv_w_data - (cur_w + dx)) < 1) + { + int cur_bottom_grad_pos = ((b * channels + c) * height + cur_h + dy) * width + cur_w + dx; + scalar_t weight = dmcn_get_gradient_weight(cur_inv_h_data, cur_inv_w_data, cur_h + dy, cur_w + dx, height, width); + atomicAdd(grad_im + cur_bottom_grad_pos, weight * cur_top_grad); + } + } + } + } +} + +template +__global__ void modulated_deformable_col2im_coord_gpu_kernel(const int n, + const scalar_t *data_col, const scalar_t *data_im, + const scalar_t *data_offset, const scalar_t *data_mask, + const int channels, const int height, const int width, + const int kernel_h, const int kernel_w, + const int pad_h, const int pad_w, + const int stride_h, const int stride_w, + const int dilation_h, const int dilation_w, + const int channel_per_deformable_group, + const int batch_size, const int offset_channels, const int deformable_group, + const int height_col, const int width_col, + scalar_t *grad_offset, scalar_t *grad_mask) +{ + CUDA_KERNEL_LOOP(index, n) + { + scalar_t val = 0, mval = 0; + int w = index % width_col; + int h = (index / width_col) % height_col; + int c = (index / width_col / height_col) % offset_channels; + int b = (index / width_col / height_col) / offset_channels; + // compute the start and end of the output + + const int deformable_group_index = c / (2 * kernel_h * kernel_w); + const int col_step = kernel_h * kernel_w; + int cnt = 0; + const scalar_t *data_col_ptr = data_col + deformable_group_index * channel_per_deformable_group * batch_size * width_col * height_col; + const scalar_t *data_im_ptr = data_im + (b * deformable_group + deformable_group_index) * channel_per_deformable_group / kernel_h / kernel_w * height * width; + const scalar_t *data_offset_ptr = data_offset + (b * deformable_group + deformable_group_index) * 2 * kernel_h * kernel_w * height_col * width_col; + const scalar_t *data_mask_ptr = data_mask + (b * deformable_group + deformable_group_index) * kernel_h * kernel_w * height_col * width_col; + + const int offset_c = c - deformable_group_index * 2 * kernel_h * kernel_w; + + for (int col_c = (offset_c / 2); col_c < channel_per_deformable_group; col_c += col_step) + { + const int col_pos = (((col_c * batch_size + b) * height_col) + h) * width_col + w; + const int bp_dir = offset_c % 2; + + int j = (col_pos / width_col / height_col / batch_size) % kernel_w; + int i = (col_pos / width_col / height_col / batch_size / kernel_w) % kernel_h; + int w_out = col_pos % width_col; + int h_out = (col_pos / width_col) % height_col; + int w_in = w_out * stride_w - pad_w; + int h_in = h_out * stride_h - pad_h; + const int data_offset_h_ptr = (((2 * (i * kernel_w + j)) * height_col + h_out) * width_col + w_out); + const int data_offset_w_ptr = (((2 * (i * kernel_w + j) + 1) * height_col + h_out) * width_col + w_out); + const int data_mask_hw_ptr = (((i * kernel_w + j) * height_col + h_out) * width_col + w_out); + const scalar_t offset_h = data_offset_ptr[data_offset_h_ptr]; + const scalar_t offset_w = data_offset_ptr[data_offset_w_ptr]; + const scalar_t mask = data_mask_ptr[data_mask_hw_ptr]; + scalar_t inv_h = h_in + i * dilation_h + offset_h; + scalar_t inv_w = w_in + j * dilation_w + offset_w; + if (inv_h <= -1 || inv_w <= -1 || inv_h >= height || inv_w >= width) + { + inv_h = inv_w = -2; + } + else + { + mval += data_col_ptr[col_pos] * dmcn_im2col_bilinear(data_im_ptr + cnt * height * width, width, height, width, inv_h, inv_w); + } + const scalar_t weight = dmcn_get_coordinate_weight( + inv_h, inv_w, + height, width, data_im_ptr + cnt * height * width, width, bp_dir); + val += weight * data_col_ptr[col_pos] * mask; + cnt += 1; + } + // KERNEL_ASSIGN(grad_offset[index], offset_req, val); + grad_offset[index] = val; + if (offset_c % 2 == 0) + // KERNEL_ASSIGN(grad_mask[(((b * deformable_group + deformable_group_index) * kernel_h * kernel_w + offset_c / 2) * height_col + h) * width_col + w], mask_req, mval); + grad_mask[(((b * deformable_group + deformable_group_index) * kernel_h * kernel_w + offset_c / 2) * height_col + h) * width_col + w] = mval; + } +} + +void modulated_deformable_im2col_cuda( + const at::Tensor data_im, const at::Tensor data_offset, const at::Tensor data_mask, + const int batch_size, const int channels, const int height_im, const int width_im, + const int height_col, const int width_col, const int kernel_h, const int kenerl_w, + const int pad_h, const int pad_w, const int stride_h, const int stride_w, + const int dilation_h, const int dilation_w, + const int deformable_group, at::Tensor data_col) +{ + // num_axes should be smaller than block size + const int channel_per_deformable_group = channels / deformable_group; + const int num_kernels = channels * batch_size * height_col * width_col; + + AT_DISPATCH_FLOATING_TYPES_AND_HALF( + data_im.scalar_type(), "modulated_deformable_im2col_gpu", ([&] { + const scalar_t *data_im_ = data_im.data_ptr(); + const scalar_t *data_offset_ = data_offset.data_ptr(); + const scalar_t *data_mask_ = data_mask.data_ptr(); + scalar_t *data_col_ = data_col.data_ptr(); + + modulated_deformable_im2col_gpu_kernel<<>>( + num_kernels, data_im_, data_offset_, data_mask_, height_im, width_im, kernel_h, kenerl_w, + pad_h, pad_w, stride_h, stride_w, dilation_h, dilation_w, channel_per_deformable_group, + batch_size, channels, deformable_group, height_col, width_col, data_col_); + })); + + cudaError_t err = cudaGetLastError(); + if (err != cudaSuccess) + { + printf("error in modulated_deformable_im2col_cuda: %s\n", cudaGetErrorString(err)); + } +} + +void modulated_deformable_col2im_cuda( + const at::Tensor data_col, const at::Tensor data_offset, const at::Tensor data_mask, + const int batch_size, const int channels, const int height_im, const int width_im, + const int height_col, const int width_col, const int kernel_h, const int kernel_w, + const int pad_h, const int pad_w, const int stride_h, const int stride_w, + const int dilation_h, const int dilation_w, + const int deformable_group, at::Tensor grad_im) +{ + + const int channel_per_deformable_group = channels / deformable_group; + const int num_kernels = channels * kernel_h * kernel_w * batch_size * height_col * width_col; + + AT_DISPATCH_FLOATING_TYPES_AND_HALF( + data_col.scalar_type(), "modulated_deformable_col2im_gpu", ([&] { + const scalar_t *data_col_ = data_col.data_ptr(); + const scalar_t *data_offset_ = data_offset.data_ptr(); + const scalar_t *data_mask_ = data_mask.data_ptr(); + scalar_t *grad_im_ = grad_im.data_ptr(); + + modulated_deformable_col2im_gpu_kernel<<>>( + num_kernels, data_col_, data_offset_, data_mask_, channels, height_im, width_im, + kernel_h, kernel_w, pad_h, pad_w, stride_h, stride_w, + dilation_h, dilation_w, channel_per_deformable_group, + batch_size, deformable_group, height_col, width_col, grad_im_); + })); + + cudaError_t err = cudaGetLastError(); + if (err != cudaSuccess) + { + printf("error in modulated_deformable_col2im_cuda: %s\n", cudaGetErrorString(err)); + } +} + +void modulated_deformable_col2im_coord_cuda( + const at::Tensor data_col, const at::Tensor data_im, const at::Tensor data_offset, const at::Tensor data_mask, + const int batch_size, const int channels, const int height_im, const int width_im, + const int height_col, const int width_col, const int kernel_h, const int kernel_w, + const int pad_h, const int pad_w, const int stride_h, const int stride_w, + const int dilation_h, const int dilation_w, + const int deformable_group, + at::Tensor grad_offset, at::Tensor grad_mask) +{ + const int num_kernels = batch_size * height_col * width_col * 2 * kernel_h * kernel_w * deformable_group; + const int channel_per_deformable_group = channels * kernel_h * kernel_w / deformable_group; + + AT_DISPATCH_FLOATING_TYPES_AND_HALF( + data_col.scalar_type(), "modulated_deformable_col2im_coord_gpu", ([&] { + const scalar_t *data_col_ = data_col.data_ptr(); + const scalar_t *data_im_ = data_im.data_ptr(); + const scalar_t *data_offset_ = data_offset.data_ptr(); + const scalar_t *data_mask_ = data_mask.data_ptr(); + scalar_t *grad_offset_ = grad_offset.data_ptr(); + scalar_t *grad_mask_ = grad_mask.data_ptr(); + + modulated_deformable_col2im_coord_gpu_kernel<<>>( + num_kernels, data_col_, data_im_, data_offset_, data_mask_, channels, height_im, width_im, + kernel_h, kernel_w, pad_h, pad_w, stride_h, stride_w, + dilation_h, dilation_w, channel_per_deformable_group, + batch_size, 2 * kernel_h * kernel_w * deformable_group, deformable_group, height_col, width_col, + grad_offset_, grad_mask_); + })); + cudaError_t err = cudaGetLastError(); + if (err != cudaSuccess) + { + printf("error in modulated_deformable_col2im_coord_cuda: %s\n", cudaGetErrorString(err)); + } +} diff --git a/IIR-Lab/models/archs/dcn/src/deform_conv_ext.cpp b/IIR-Lab/models/archs/dcn/src/deform_conv_ext.cpp new file mode 100644 index 0000000000000000000000000000000000000000..41c6df6f721bd95a525fd6a03dd9882e863de042 --- /dev/null +++ b/IIR-Lab/models/archs/dcn/src/deform_conv_ext.cpp @@ -0,0 +1,164 @@ +// modify from +// https://github.com/chengdazhi/Deformable-Convolution-V2-PyTorch/blob/mmdetection/mmdet/ops/dcn/src/deform_conv_cuda.c + +#include +#include + +#include +#include + +#define WITH_CUDA // always use cuda +#ifdef WITH_CUDA +int deform_conv_forward_cuda(at::Tensor input, at::Tensor weight, + at::Tensor offset, at::Tensor output, + at::Tensor columns, at::Tensor ones, int kW, + int kH, int dW, int dH, int padW, int padH, + int dilationW, int dilationH, int group, + int deformable_group, int im2col_step); + +int deform_conv_backward_input_cuda(at::Tensor input, at::Tensor offset, + at::Tensor gradOutput, at::Tensor gradInput, + at::Tensor gradOffset, at::Tensor weight, + at::Tensor columns, int kW, int kH, int dW, + int dH, int padW, int padH, int dilationW, + int dilationH, int group, + int deformable_group, int im2col_step); + +int deform_conv_backward_parameters_cuda( + at::Tensor input, at::Tensor offset, at::Tensor gradOutput, + at::Tensor gradWeight, // at::Tensor gradBias, + at::Tensor columns, at::Tensor ones, int kW, int kH, int dW, int dH, + int padW, int padH, int dilationW, int dilationH, int group, + int deformable_group, float scale, int im2col_step); + +void modulated_deform_conv_cuda_forward( + at::Tensor input, at::Tensor weight, at::Tensor bias, at::Tensor ones, + at::Tensor offset, at::Tensor mask, at::Tensor output, at::Tensor columns, + int kernel_h, int kernel_w, const int stride_h, const int stride_w, + const int pad_h, const int pad_w, const int dilation_h, + const int dilation_w, const int group, const int deformable_group, + const bool with_bias); + +void modulated_deform_conv_cuda_backward( + at::Tensor input, at::Tensor weight, at::Tensor bias, at::Tensor ones, + at::Tensor offset, at::Tensor mask, at::Tensor columns, + at::Tensor grad_input, at::Tensor grad_weight, at::Tensor grad_bias, + at::Tensor grad_offset, at::Tensor grad_mask, at::Tensor grad_output, + int kernel_h, int kernel_w, int stride_h, int stride_w, int pad_h, + int pad_w, int dilation_h, int dilation_w, int group, int deformable_group, + const bool with_bias); +#endif + +int deform_conv_forward(at::Tensor input, at::Tensor weight, + at::Tensor offset, at::Tensor output, + at::Tensor columns, at::Tensor ones, int kW, + int kH, int dW, int dH, int padW, int padH, + int dilationW, int dilationH, int group, + int deformable_group, int im2col_step) { + if (input.device().is_cuda()) { +#ifdef WITH_CUDA + return deform_conv_forward_cuda(input, weight, offset, output, columns, + ones, kW, kH, dW, dH, padW, padH, dilationW, dilationH, group, + deformable_group, im2col_step); +#else + AT_ERROR("deform conv is not compiled with GPU support"); +#endif + } + AT_ERROR("deform conv is not implemented on CPU"); +} + +int deform_conv_backward_input(at::Tensor input, at::Tensor offset, + at::Tensor gradOutput, at::Tensor gradInput, + at::Tensor gradOffset, at::Tensor weight, + at::Tensor columns, int kW, int kH, int dW, + int dH, int padW, int padH, int dilationW, + int dilationH, int group, + int deformable_group, int im2col_step) { + if (input.device().is_cuda()) { +#ifdef WITH_CUDA + return deform_conv_backward_input_cuda(input, offset, gradOutput, + gradInput, gradOffset, weight, columns, kW, kH, dW, dH, padW, padH, + dilationW, dilationH, group, deformable_group, im2col_step); +#else + AT_ERROR("deform conv is not compiled with GPU support"); +#endif + } + AT_ERROR("deform conv is not implemented on CPU"); +} + +int deform_conv_backward_parameters( + at::Tensor input, at::Tensor offset, at::Tensor gradOutput, + at::Tensor gradWeight, // at::Tensor gradBias, + at::Tensor columns, at::Tensor ones, int kW, int kH, int dW, int dH, + int padW, int padH, int dilationW, int dilationH, int group, + int deformable_group, float scale, int im2col_step) { + if (input.device().is_cuda()) { +#ifdef WITH_CUDA + return deform_conv_backward_parameters_cuda(input, offset, gradOutput, + gradWeight, columns, ones, kW, kH, dW, dH, padW, padH, dilationW, + dilationH, group, deformable_group, scale, im2col_step); +#else + AT_ERROR("deform conv is not compiled with GPU support"); +#endif + } + AT_ERROR("deform conv is not implemented on CPU"); +} + +void modulated_deform_conv_forward( + at::Tensor input, at::Tensor weight, at::Tensor bias, at::Tensor ones, + at::Tensor offset, at::Tensor mask, at::Tensor output, at::Tensor columns, + int kernel_h, int kernel_w, const int stride_h, const int stride_w, + const int pad_h, const int pad_w, const int dilation_h, + const int dilation_w, const int group, const int deformable_group, + const bool with_bias) { + if (input.device().is_cuda()) { +#ifdef WITH_CUDA + return modulated_deform_conv_cuda_forward(input, weight, bias, ones, + offset, mask, output, columns, kernel_h, kernel_w, stride_h, + stride_w, pad_h, pad_w, dilation_h, dilation_w, group, + deformable_group, with_bias); +#else + AT_ERROR("modulated deform conv is not compiled with GPU support"); +#endif + } + AT_ERROR("modulated deform conv is not implemented on CPU"); +} + +void modulated_deform_conv_backward( + at::Tensor input, at::Tensor weight, at::Tensor bias, at::Tensor ones, + at::Tensor offset, at::Tensor mask, at::Tensor columns, + at::Tensor grad_input, at::Tensor grad_weight, at::Tensor grad_bias, + at::Tensor grad_offset, at::Tensor grad_mask, at::Tensor grad_output, + int kernel_h, int kernel_w, int stride_h, int stride_w, int pad_h, + int pad_w, int dilation_h, int dilation_w, int group, int deformable_group, + const bool with_bias) { + if (input.device().is_cuda()) { +#ifdef WITH_CUDA + return modulated_deform_conv_cuda_backward(input, weight, bias, ones, + offset, mask, columns, grad_input, grad_weight, grad_bias, grad_offset, + grad_mask, grad_output, kernel_h, kernel_w, stride_h, stride_w, + pad_h, pad_w, dilation_h, dilation_w, group, deformable_group, + with_bias); +#else + AT_ERROR("modulated deform conv is not compiled with GPU support"); +#endif + } + AT_ERROR("modulated deform conv is not implemented on CPU"); +} + + +PYBIND11_MODULE(TORCH_EXTENSION_NAME, m) { + m.def("deform_conv_forward", &deform_conv_forward, + "deform forward"); + m.def("deform_conv_backward_input", &deform_conv_backward_input, + "deform_conv_backward_input"); + m.def("deform_conv_backward_parameters", + &deform_conv_backward_parameters, + "deform_conv_backward_parameters"); + m.def("modulated_deform_conv_forward", + &modulated_deform_conv_forward, + "modulated deform conv forward"); + m.def("modulated_deform_conv_backward", + &modulated_deform_conv_backward, + "modulated deform conv backward"); +} diff --git a/IIR-Lab/models/dcn/__init__.py b/IIR-Lab/models/dcn/__init__.py new file mode 100644 index 0000000000000000000000000000000000000000..32e3592f896d61b4127e09d0476381b9d55e32ff --- /dev/null +++ b/IIR-Lab/models/dcn/__init__.py @@ -0,0 +1,7 @@ +from .deform_conv import (DeformConv, DeformConvPack, ModulatedDeformConv, ModulatedDeformConvPack, deform_conv, + modulated_deform_conv) + +__all__ = [ + 'DeformConv', 'DeformConvPack', 'ModulatedDeformConv', 'ModulatedDeformConvPack', 'deform_conv', + 'modulated_deform_conv' +] diff --git a/IIR-Lab/models/dcn/__pycache__/__init__.cpython-310.pyc b/IIR-Lab/models/dcn/__pycache__/__init__.cpython-310.pyc new file mode 100644 index 0000000000000000000000000000000000000000..bfbea69661d64cf4987690ec9641116b8f0f1d47 Binary files /dev/null and b/IIR-Lab/models/dcn/__pycache__/__init__.cpython-310.pyc differ diff --git a/IIR-Lab/models/dcn/__pycache__/__init__.cpython-38.pyc b/IIR-Lab/models/dcn/__pycache__/__init__.cpython-38.pyc new file mode 100644 index 0000000000000000000000000000000000000000..663b0797ac2bc7752910b7459b157ce30410c5f7 Binary files /dev/null and b/IIR-Lab/models/dcn/__pycache__/__init__.cpython-38.pyc differ diff --git a/IIR-Lab/models/dcn/__pycache__/deform_conv.cpython-310.pyc b/IIR-Lab/models/dcn/__pycache__/deform_conv.cpython-310.pyc new file mode 100644 index 0000000000000000000000000000000000000000..145e66f55b4c3dbf4812067a1eb9b5c82aa65d75 Binary files /dev/null and b/IIR-Lab/models/dcn/__pycache__/deform_conv.cpython-310.pyc differ diff --git a/IIR-Lab/models/dcn/__pycache__/deform_conv.cpython-38.pyc b/IIR-Lab/models/dcn/__pycache__/deform_conv.cpython-38.pyc new file mode 100644 index 0000000000000000000000000000000000000000..991621bdfdc40736c33b8a6e126607e36f62e0a5 Binary files /dev/null and b/IIR-Lab/models/dcn/__pycache__/deform_conv.cpython-38.pyc differ diff --git a/IIR-Lab/models/dcn/deform_conv.py b/IIR-Lab/models/dcn/deform_conv.py new file mode 100644 index 0000000000000000000000000000000000000000..6268ca825d59ef4a30d4d2156c4438cbbe9b3c1e --- /dev/null +++ b/IIR-Lab/models/dcn/deform_conv.py @@ -0,0 +1,379 @@ +import math +import os +import torch +from torch import nn as nn +from torch.autograd import Function +from torch.autograd.function import once_differentiable +from torch.nn import functional as F +from torch.nn.modules.utils import _pair, _single + +BASICSR_JIT = os.getenv('BASICSR_JIT') +if BASICSR_JIT == 'True': + from torch.utils.cpp_extension import load + module_path = os.path.dirname(__file__) + deform_conv_ext = load( + 'deform_conv', + sources=[ + os.path.join(module_path, 'src', 'deform_conv_ext.cpp'), + os.path.join(module_path, 'src', 'deform_conv_cuda.cpp'), + os.path.join(module_path, 'src', 'deform_conv_cuda_kernel.cu'), + ], + ) +else: + try: + from . import deform_conv_ext + except ImportError: + pass + # avoid annoying print output + # print(f'Cannot import deform_conv_ext. Error: {error}. You may need to: \n ' + # '1. compile with BASICSR_EXT=True. or\n ' + # '2. set BASICSR_JIT=True during running') + + +class DeformConvFunction(Function): + + @staticmethod + def forward(ctx, + input, + offset, + weight, + stride=1, + padding=0, + dilation=1, + groups=1, + deformable_groups=1, + im2col_step=64): + if input is not None and input.dim() != 4: + raise ValueError(f'Expected 4D tensor as input, got {input.dim()}D tensor instead.') + ctx.stride = _pair(stride) + ctx.padding = _pair(padding) + ctx.dilation = _pair(dilation) + ctx.groups = groups + ctx.deformable_groups = deformable_groups + ctx.im2col_step = im2col_step + + ctx.save_for_backward(input, offset, weight) + + output = input.new_empty(DeformConvFunction._output_size(input, weight, ctx.padding, ctx.dilation, ctx.stride)) + + ctx.bufs_ = [input.new_empty(0), input.new_empty(0)] # columns, ones + + if not input.is_cuda: + raise NotImplementedError + else: + cur_im2col_step = min(ctx.im2col_step, input.shape[0]) + assert (input.shape[0] % cur_im2col_step) == 0, 'im2col step must divide batchsize' + deform_conv_ext.deform_conv_forward(input, weight, + offset, output, ctx.bufs_[0], ctx.bufs_[1], weight.size(3), + weight.size(2), ctx.stride[1], ctx.stride[0], ctx.padding[1], + ctx.padding[0], ctx.dilation[1], ctx.dilation[0], ctx.groups, + ctx.deformable_groups, cur_im2col_step) + return output + + @staticmethod + @once_differentiable + def backward(ctx, grad_output): + input, offset, weight = ctx.saved_tensors + + grad_input = grad_offset = grad_weight = None + + if not grad_output.is_cuda: + raise NotImplementedError + else: + cur_im2col_step = min(ctx.im2col_step, input.shape[0]) + assert (input.shape[0] % cur_im2col_step) == 0, 'im2col step must divide batchsize' + + if ctx.needs_input_grad[0] or ctx.needs_input_grad[1]: + grad_input = torch.zeros_like(input) + grad_offset = torch.zeros_like(offset) + deform_conv_ext.deform_conv_backward_input(input, offset, grad_output, grad_input, + grad_offset, weight, ctx.bufs_[0], weight.size(3), + weight.size(2), ctx.stride[1], ctx.stride[0], ctx.padding[1], + ctx.padding[0], ctx.dilation[1], ctx.dilation[0], ctx.groups, + ctx.deformable_groups, cur_im2col_step) + + if ctx.needs_input_grad[2]: + grad_weight = torch.zeros_like(weight) + deform_conv_ext.deform_conv_backward_parameters(input, offset, grad_output, grad_weight, + ctx.bufs_[0], ctx.bufs_[1], weight.size(3), + weight.size(2), ctx.stride[1], ctx.stride[0], + ctx.padding[1], ctx.padding[0], ctx.dilation[1], + ctx.dilation[0], ctx.groups, ctx.deformable_groups, 1, + cur_im2col_step) + + return (grad_input, grad_offset, grad_weight, None, None, None, None, None) + + @staticmethod + def _output_size(input, weight, padding, dilation, stride): + channels = weight.size(0) + output_size = (input.size(0), channels) + for d in range(input.dim() - 2): + in_size = input.size(d + 2) + pad = padding[d] + kernel = dilation[d] * (weight.size(d + 2) - 1) + 1 + stride_ = stride[d] + output_size += ((in_size + (2 * pad) - kernel) // stride_ + 1, ) + if not all(map(lambda s: s > 0, output_size)): + raise ValueError(f'convolution input is too small (output would be {"x".join(map(str, output_size))})') + return output_size + + +class ModulatedDeformConvFunction(Function): + + @staticmethod + def forward(ctx, + input, + offset, + mask, + weight, + bias=None, + stride=1, + padding=0, + dilation=1, + groups=1, + deformable_groups=1): + ctx.stride = stride + ctx.padding = padding + ctx.dilation = dilation + ctx.groups = groups + ctx.deformable_groups = deformable_groups + ctx.with_bias = bias is not None + if not ctx.with_bias: + bias = input.new_empty(1) # fake tensor + if not input.is_cuda: + raise NotImplementedError + if weight.requires_grad or mask.requires_grad or offset.requires_grad or input.requires_grad: + ctx.save_for_backward(input, offset, mask, weight, bias) + output = input.new_empty(ModulatedDeformConvFunction._infer_shape(ctx, input, weight)) + ctx._bufs = [input.new_empty(0), input.new_empty(0)] + deform_conv_ext.modulated_deform_conv_forward(input, weight, bias, ctx._bufs[0], offset, mask, output, + ctx._bufs[1], weight.shape[2], weight.shape[3], ctx.stride, + ctx.stride, ctx.padding, ctx.padding, ctx.dilation, ctx.dilation, + ctx.groups, ctx.deformable_groups, ctx.with_bias) + return output + + @staticmethod + @once_differentiable + def backward(ctx, grad_output): + if not grad_output.is_cuda: + raise NotImplementedError + input, offset, mask, weight, bias = ctx.saved_tensors + grad_input = torch.zeros_like(input) + grad_offset = torch.zeros_like(offset) + grad_mask = torch.zeros_like(mask) + grad_weight = torch.zeros_like(weight) + grad_bias = torch.zeros_like(bias) + deform_conv_ext.modulated_deform_conv_backward(input, weight, bias, ctx._bufs[0], offset, mask, ctx._bufs[1], + grad_input, grad_weight, grad_bias, grad_offset, grad_mask, + grad_output, weight.shape[2], weight.shape[3], ctx.stride, + ctx.stride, ctx.padding, ctx.padding, ctx.dilation, ctx.dilation, + ctx.groups, ctx.deformable_groups, ctx.with_bias) + if not ctx.with_bias: + grad_bias = None + + return (grad_input, grad_offset, grad_mask, grad_weight, grad_bias, None, None, None, None, None) + + @staticmethod + def _infer_shape(ctx, input, weight): + n = input.size(0) + channels_out = weight.size(0) + height, width = input.shape[2:4] + kernel_h, kernel_w = weight.shape[2:4] + height_out = (height + 2 * ctx.padding - (ctx.dilation * (kernel_h - 1) + 1)) // ctx.stride + 1 + width_out = (width + 2 * ctx.padding - (ctx.dilation * (kernel_w - 1) + 1)) // ctx.stride + 1 + return n, channels_out, height_out, width_out + + +deform_conv = DeformConvFunction.apply +modulated_deform_conv = ModulatedDeformConvFunction.apply + + +class DeformConv(nn.Module): + + def __init__(self, + in_channels, + out_channels, + kernel_size, + stride=1, + padding=0, + dilation=1, + groups=1, + deformable_groups=1, + bias=False): + super(DeformConv, self).__init__() + + assert not bias + assert in_channels % groups == 0, f'in_channels {in_channels} is not divisible by groups {groups}' + assert out_channels % groups == 0, f'out_channels {out_channels} is not divisible by groups {groups}' + + self.in_channels = in_channels + self.out_channels = out_channels + self.kernel_size = _pair(kernel_size) + self.stride = _pair(stride) + self.padding = _pair(padding) + self.dilation = _pair(dilation) + self.groups = groups + self.deformable_groups = deformable_groups + # enable compatibility with nn.Conv2d + self.transposed = False + self.output_padding = _single(0) + + self.weight = nn.Parameter(torch.Tensor(out_channels, in_channels // self.groups, *self.kernel_size)) + + self.reset_parameters() + + def reset_parameters(self): + n = self.in_channels + for k in self.kernel_size: + n *= k + stdv = 1. / math.sqrt(n) + self.weight.data.uniform_(-stdv, stdv) + + def forward(self, x, offset): + # To fix an assert error in deform_conv_cuda.cpp:128 + # input image is smaller than kernel + input_pad = (x.size(2) < self.kernel_size[0] or x.size(3) < self.kernel_size[1]) + if input_pad: + pad_h = max(self.kernel_size[0] - x.size(2), 0) + pad_w = max(self.kernel_size[1] - x.size(3), 0) + x = F.pad(x, (0, pad_w, 0, pad_h), 'constant', 0).contiguous() + offset = F.pad(offset, (0, pad_w, 0, pad_h), 'constant', 0).contiguous() + out = deform_conv(x, offset, self.weight, self.stride, self.padding, self.dilation, self.groups, + self.deformable_groups) + if input_pad: + out = out[:, :, :out.size(2) - pad_h, :out.size(3) - pad_w].contiguous() + return out + + +class DeformConvPack(DeformConv): + """A Deformable Conv Encapsulation that acts as normal Conv layers. + + Args: + in_channels (int): Same as nn.Conv2d. + out_channels (int): Same as nn.Conv2d. + kernel_size (int or tuple[int]): Same as nn.Conv2d. + stride (int or tuple[int]): Same as nn.Conv2d. + padding (int or tuple[int]): Same as nn.Conv2d. + dilation (int or tuple[int]): Same as nn.Conv2d. + groups (int): Same as nn.Conv2d. + bias (bool or str): If specified as `auto`, it will be decided by the + norm_cfg. Bias will be set as True if norm_cfg is None, otherwise + False. + """ + + _version = 2 + + def __init__(self, *args, **kwargs): + super(DeformConvPack, self).__init__(*args, **kwargs) + + self.conv_offset = nn.Conv2d( + self.in_channels, + self.deformable_groups * 2 * self.kernel_size[0] * self.kernel_size[1], + kernel_size=self.kernel_size, + stride=_pair(self.stride), + padding=_pair(self.padding), + dilation=_pair(self.dilation), + bias=True) + self.init_offset() + + def init_offset(self): + self.conv_offset.weight.data.zero_() + self.conv_offset.bias.data.zero_() + + def forward(self, x): + offset = self.conv_offset(x) + return deform_conv(x, offset, self.weight, self.stride, self.padding, self.dilation, self.groups, + self.deformable_groups) + + +class ModulatedDeformConv(nn.Module): + + def __init__(self, + in_channels, + out_channels, + kernel_size, + stride=1, + padding=0, + dilation=1, + groups=1, + deformable_groups=1, + bias=True): + super(ModulatedDeformConv, self).__init__() + self.in_channels = in_channels + self.out_channels = out_channels + self.kernel_size = _pair(kernel_size) + self.stride = stride + self.padding = padding + self.dilation = dilation + self.groups = groups + self.deformable_groups = deformable_groups + self.with_bias = bias + # enable compatibility with nn.Conv2d + self.transposed = False + self.output_padding = _single(0) + + self.weight = nn.Parameter(torch.Tensor(out_channels, in_channels // groups, *self.kernel_size)) + if bias: + self.bias = nn.Parameter(torch.Tensor(out_channels)) + else: + self.register_parameter('bias', None) + self.init_weights() + + def init_weights(self): + n = self.in_channels + for k in self.kernel_size: + n *= k + stdv = 1. / math.sqrt(n) + self.weight.data.uniform_(-stdv, stdv) + if self.bias is not None: + self.bias.data.zero_() + + def forward(self, x, offset, mask): + return modulated_deform_conv(x, offset, mask, self.weight, self.bias, self.stride, self.padding, self.dilation, + self.groups, self.deformable_groups) + + +class ModulatedDeformConvPack(ModulatedDeformConv): + """A ModulatedDeformable Conv Encapsulation that acts as normal Conv layers. + + Args: + in_channels (int): Same as nn.Conv2d. + out_channels (int): Same as nn.Conv2d. + kernel_size (int or tuple[int]): Same as nn.Conv2d. + stride (int or tuple[int]): Same as nn.Conv2d. + padding (int or tuple[int]): Same as nn.Conv2d. + dilation (int or tuple[int]): Same as nn.Conv2d. + groups (int): Same as nn.Conv2d. + bias (bool or str): If specified as `auto`, it will be decided by the + norm_cfg. Bias will be set as True if norm_cfg is None, otherwise + False. + """ + + _version = 2 + + def __init__(self, *args, **kwargs): + super(ModulatedDeformConvPack, self).__init__(*args, **kwargs) + + self.conv_offset = nn.Conv2d( + self.in_channels, + self.deformable_groups * 3 * self.kernel_size[0] * self.kernel_size[1], + kernel_size=self.kernel_size, + stride=_pair(self.stride), + padding=_pair(self.padding), + dilation=_pair(self.dilation), + bias=True) + self.init_weights() + + def init_weights(self): + super(ModulatedDeformConvPack, self).init_weights() + if hasattr(self, 'conv_offset'): + self.conv_offset.weight.data.zero_() + self.conv_offset.bias.data.zero_() + + def forward(self, x): + out = self.conv_offset(x) + o1, o2, mask = torch.chunk(out, 3, dim=1) + offset = torch.cat((o1, o2), dim=1) + mask = torch.sigmoid(mask) + return modulated_deform_conv(x, offset, mask, self.weight, self.bias, self.stride, self.padding, self.dilation, + self.groups, self.deformable_groups) diff --git a/IIR-Lab/models/dcn/src/deform_conv_cuda.cpp b/IIR-Lab/models/dcn/src/deform_conv_cuda.cpp new file mode 100644 index 0000000000000000000000000000000000000000..b465c493a3dd67d320b7a8997fbd501d2f89c807 --- /dev/null +++ b/IIR-Lab/models/dcn/src/deform_conv_cuda.cpp @@ -0,0 +1,685 @@ +// modify from +// https://github.com/chengdazhi/Deformable-Convolution-V2-PyTorch/blob/mmdetection/mmdet/ops/dcn/src/deform_conv_cuda.c + +#include +#include + +#include +#include + +void deformable_im2col(const at::Tensor data_im, const at::Tensor data_offset, + const int channels, const int height, const int width, + const int ksize_h, const int ksize_w, const int pad_h, + const int pad_w, const int stride_h, const int stride_w, + const int dilation_h, const int dilation_w, + const int parallel_imgs, const int deformable_group, + at::Tensor data_col); + +void deformable_col2im(const at::Tensor data_col, const at::Tensor data_offset, + const int channels, const int height, const int width, + const int ksize_h, const int ksize_w, const int pad_h, + const int pad_w, const int stride_h, const int stride_w, + const int dilation_h, const int dilation_w, + const int parallel_imgs, const int deformable_group, + at::Tensor grad_im); + +void deformable_col2im_coord( + const at::Tensor data_col, const at::Tensor data_im, + const at::Tensor data_offset, const int channels, const int height, + const int width, const int ksize_h, const int ksize_w, const int pad_h, + const int pad_w, const int stride_h, const int stride_w, + const int dilation_h, const int dilation_w, const int parallel_imgs, + const int deformable_group, at::Tensor grad_offset); + +void modulated_deformable_im2col_cuda( + const at::Tensor data_im, const at::Tensor data_offset, + const at::Tensor data_mask, const int batch_size, const int channels, + const int height_im, const int width_im, const int height_col, + const int width_col, const int kernel_h, const int kenerl_w, + const int pad_h, const int pad_w, const int stride_h, const int stride_w, + const int dilation_h, const int dilation_w, const int deformable_group, + at::Tensor data_col); + +void modulated_deformable_col2im_cuda( + const at::Tensor data_col, const at::Tensor data_offset, + const at::Tensor data_mask, const int batch_size, const int channels, + const int height_im, const int width_im, const int height_col, + const int width_col, const int kernel_h, const int kenerl_w, + const int pad_h, const int pad_w, const int stride_h, const int stride_w, + const int dilation_h, const int dilation_w, const int deformable_group, + at::Tensor grad_im); + +void modulated_deformable_col2im_coord_cuda( + const at::Tensor data_col, const at::Tensor data_im, + const at::Tensor data_offset, const at::Tensor data_mask, + const int batch_size, const int channels, const int height_im, + const int width_im, const int height_col, const int width_col, + const int kernel_h, const int kenerl_w, const int pad_h, const int pad_w, + const int stride_h, const int stride_w, const int dilation_h, + const int dilation_w, const int deformable_group, at::Tensor grad_offset, + at::Tensor grad_mask); + +void shape_check(at::Tensor input, at::Tensor offset, at::Tensor *gradOutput, + at::Tensor weight, int kH, int kW, int dH, int dW, int padH, + int padW, int dilationH, int dilationW, int group, + int deformable_group) { + TORCH_CHECK(weight.ndimension() == 4, + "4D weight tensor (nOutputPlane,nInputPlane,kH,kW) expected, " + "but got: %s", + weight.ndimension()); + + TORCH_CHECK(weight.is_contiguous(), "weight tensor has to be contiguous"); + + TORCH_CHECK(kW > 0 && kH > 0, + "kernel size should be greater than zero, but got kH: %d kW: %d", kH, + kW); + + TORCH_CHECK((weight.size(2) == kH && weight.size(3) == kW), + "kernel size should be consistent with weight, ", + "but got kH: %d kW: %d weight.size(2): %d, weight.size(3): %d", kH, + kW, weight.size(2), weight.size(3)); + + TORCH_CHECK(dW > 0 && dH > 0, + "stride should be greater than zero, but got dH: %d dW: %d", dH, dW); + + TORCH_CHECK( + dilationW > 0 && dilationH > 0, + "dilation should be greater than 0, but got dilationH: %d dilationW: %d", + dilationH, dilationW); + + int ndim = input.ndimension(); + int dimf = 0; + int dimh = 1; + int dimw = 2; + + if (ndim == 4) { + dimf++; + dimh++; + dimw++; + } + + TORCH_CHECK(ndim == 3 || ndim == 4, "3D or 4D input tensor expected but got: %s", + ndim); + + long nInputPlane = weight.size(1) * group; + long inputHeight = input.size(dimh); + long inputWidth = input.size(dimw); + long nOutputPlane = weight.size(0); + long outputHeight = + (inputHeight + 2 * padH - (dilationH * (kH - 1) + 1)) / dH + 1; + long outputWidth = + (inputWidth + 2 * padW - (dilationW * (kW - 1) + 1)) / dW + 1; + + TORCH_CHECK(nInputPlane % deformable_group == 0, + "input channels must divide deformable group size"); + + if (outputWidth < 1 || outputHeight < 1) + AT_ERROR( + "Given input size: (%ld x %ld x %ld). " + "Calculated output size: (%ld x %ld x %ld). Output size is too small", + nInputPlane, inputHeight, inputWidth, nOutputPlane, outputHeight, + outputWidth); + + TORCH_CHECK(input.size(1) == nInputPlane, + "invalid number of input planes, expected: %d, but got: %d", + nInputPlane, input.size(1)); + + TORCH_CHECK((inputHeight >= kH && inputWidth >= kW), + "input image is smaller than kernel"); + + TORCH_CHECK((offset.size(2) == outputHeight && offset.size(3) == outputWidth), + "invalid spatial size of offset, expected height: %d width: %d, but " + "got height: %d width: %d", + outputHeight, outputWidth, offset.size(2), offset.size(3)); + + TORCH_CHECK((offset.size(1) == deformable_group * 2 * kH * kW), + "invalid number of channels of offset"); + + if (gradOutput != NULL) { + TORCH_CHECK(gradOutput->size(dimf) == nOutputPlane, + "invalid number of gradOutput planes, expected: %d, but got: %d", + nOutputPlane, gradOutput->size(dimf)); + + TORCH_CHECK((gradOutput->size(dimh) == outputHeight && + gradOutput->size(dimw) == outputWidth), + "invalid size of gradOutput, expected height: %d width: %d , but " + "got height: %d width: %d", + outputHeight, outputWidth, gradOutput->size(dimh), + gradOutput->size(dimw)); + } +} + +int deform_conv_forward_cuda(at::Tensor input, at::Tensor weight, + at::Tensor offset, at::Tensor output, + at::Tensor columns, at::Tensor ones, int kW, + int kH, int dW, int dH, int padW, int padH, + int dilationW, int dilationH, int group, + int deformable_group, int im2col_step) { + // todo: resize columns to include im2col: done + // todo: add im2col_step as input + // todo: add new output buffer and transpose it to output (or directly + // transpose output) todo: possibly change data indexing because of + // parallel_imgs + + shape_check(input, offset, NULL, weight, kH, kW, dH, dW, padH, padW, + dilationH, dilationW, group, deformable_group); + at::DeviceGuard guard(input.device()); + + input = input.contiguous(); + offset = offset.contiguous(); + weight = weight.contiguous(); + + int batch = 1; + if (input.ndimension() == 3) { + // Force batch + batch = 0; + input.unsqueeze_(0); + offset.unsqueeze_(0); + } + + // todo: assert batchsize dividable by im2col_step + + long batchSize = input.size(0); + long nInputPlane = input.size(1); + long inputHeight = input.size(2); + long inputWidth = input.size(3); + + long nOutputPlane = weight.size(0); + + long outputWidth = + (inputWidth + 2 * padW - (dilationW * (kW - 1) + 1)) / dW + 1; + long outputHeight = + (inputHeight + 2 * padH - (dilationH * (kH - 1) + 1)) / dH + 1; + + TORCH_CHECK((offset.size(0) == batchSize), "invalid batch size of offset"); + + output = output.view({batchSize / im2col_step, im2col_step, nOutputPlane, + outputHeight, outputWidth}); + columns = at::zeros( + {nInputPlane * kW * kH, im2col_step * outputHeight * outputWidth}, + input.options()); + + if (ones.ndimension() != 2 || + ones.size(0) * ones.size(1) < outputHeight * outputWidth) { + ones = at::ones({outputHeight, outputWidth}, input.options()); + } + + input = input.view({batchSize / im2col_step, im2col_step, nInputPlane, + inputHeight, inputWidth}); + offset = + offset.view({batchSize / im2col_step, im2col_step, + deformable_group * 2 * kH * kW, outputHeight, outputWidth}); + + at::Tensor output_buffer = + at::zeros({batchSize / im2col_step, nOutputPlane, + im2col_step * outputHeight, outputWidth}, + output.options()); + + output_buffer = output_buffer.view( + {output_buffer.size(0), group, output_buffer.size(1) / group, + output_buffer.size(2), output_buffer.size(3)}); + + for (int elt = 0; elt < batchSize / im2col_step; elt++) { + deformable_im2col(input[elt], offset[elt], nInputPlane, inputHeight, + inputWidth, kH, kW, padH, padW, dH, dW, dilationH, + dilationW, im2col_step, deformable_group, columns); + + columns = columns.view({group, columns.size(0) / group, columns.size(1)}); + weight = weight.view({group, weight.size(0) / group, weight.size(1), + weight.size(2), weight.size(3)}); + + for (int g = 0; g < group; g++) { + output_buffer[elt][g] = output_buffer[elt][g] + .flatten(1) + .addmm_(weight[g].flatten(1), columns[g]) + .view_as(output_buffer[elt][g]); + } + } + + output_buffer = output_buffer.view( + {output_buffer.size(0), output_buffer.size(1) * output_buffer.size(2), + output_buffer.size(3), output_buffer.size(4)}); + + output_buffer = output_buffer.view({batchSize / im2col_step, nOutputPlane, + im2col_step, outputHeight, outputWidth}); + output_buffer.transpose_(1, 2); + output.copy_(output_buffer); + output = output.view({batchSize, nOutputPlane, outputHeight, outputWidth}); + + input = input.view({batchSize, nInputPlane, inputHeight, inputWidth}); + offset = offset.view( + {batchSize, deformable_group * 2 * kH * kW, outputHeight, outputWidth}); + + if (batch == 0) { + output = output.view({nOutputPlane, outputHeight, outputWidth}); + input = input.view({nInputPlane, inputHeight, inputWidth}); + offset = offset.view({offset.size(1), offset.size(2), offset.size(3)}); + } + + return 1; +} + +int deform_conv_backward_input_cuda(at::Tensor input, at::Tensor offset, + at::Tensor gradOutput, at::Tensor gradInput, + at::Tensor gradOffset, at::Tensor weight, + at::Tensor columns, int kW, int kH, int dW, + int dH, int padW, int padH, int dilationW, + int dilationH, int group, + int deformable_group, int im2col_step) { + shape_check(input, offset, &gradOutput, weight, kH, kW, dH, dW, padH, padW, + dilationH, dilationW, group, deformable_group); + at::DeviceGuard guard(input.device()); + + input = input.contiguous(); + offset = offset.contiguous(); + gradOutput = gradOutput.contiguous(); + weight = weight.contiguous(); + + int batch = 1; + + if (input.ndimension() == 3) { + // Force batch + batch = 0; + input = input.view({1, input.size(0), input.size(1), input.size(2)}); + offset = offset.view({1, offset.size(0), offset.size(1), offset.size(2)}); + gradOutput = gradOutput.view( + {1, gradOutput.size(0), gradOutput.size(1), gradOutput.size(2)}); + } + + long batchSize = input.size(0); + long nInputPlane = input.size(1); + long inputHeight = input.size(2); + long inputWidth = input.size(3); + + long nOutputPlane = weight.size(0); + + long outputWidth = + (inputWidth + 2 * padW - (dilationW * (kW - 1) + 1)) / dW + 1; + long outputHeight = + (inputHeight + 2 * padH - (dilationH * (kH - 1) + 1)) / dH + 1; + + TORCH_CHECK((offset.size(0) == batchSize), 3, "invalid batch size of offset"); + gradInput = gradInput.view({batchSize, nInputPlane, inputHeight, inputWidth}); + columns = at::zeros( + {nInputPlane * kW * kH, im2col_step * outputHeight * outputWidth}, + input.options()); + + // change order of grad output + gradOutput = gradOutput.view({batchSize / im2col_step, im2col_step, + nOutputPlane, outputHeight, outputWidth}); + gradOutput.transpose_(1, 2); + + gradInput = gradInput.view({batchSize / im2col_step, im2col_step, nInputPlane, + inputHeight, inputWidth}); + input = input.view({batchSize / im2col_step, im2col_step, nInputPlane, + inputHeight, inputWidth}); + gradOffset = gradOffset.view({batchSize / im2col_step, im2col_step, + deformable_group * 2 * kH * kW, outputHeight, + outputWidth}); + offset = + offset.view({batchSize / im2col_step, im2col_step, + deformable_group * 2 * kH * kW, outputHeight, outputWidth}); + + for (int elt = 0; elt < batchSize / im2col_step; elt++) { + // divide into groups + columns = columns.view({group, columns.size(0) / group, columns.size(1)}); + weight = weight.view({group, weight.size(0) / group, weight.size(1), + weight.size(2), weight.size(3)}); + gradOutput = gradOutput.view( + {gradOutput.size(0), group, gradOutput.size(1) / group, + gradOutput.size(2), gradOutput.size(3), gradOutput.size(4)}); + + for (int g = 0; g < group; g++) { + columns[g] = columns[g].addmm_(weight[g].flatten(1).transpose(0, 1), + gradOutput[elt][g].flatten(1), 0.0f, 1.0f); + } + + columns = + columns.view({columns.size(0) * columns.size(1), columns.size(2)}); + gradOutput = gradOutput.view( + {gradOutput.size(0), gradOutput.size(1) * gradOutput.size(2), + gradOutput.size(3), gradOutput.size(4), gradOutput.size(5)}); + + deformable_col2im_coord(columns, input[elt], offset[elt], nInputPlane, + inputHeight, inputWidth, kH, kW, padH, padW, dH, dW, + dilationH, dilationW, im2col_step, deformable_group, + gradOffset[elt]); + + deformable_col2im(columns, offset[elt], nInputPlane, inputHeight, + inputWidth, kH, kW, padH, padW, dH, dW, dilationH, + dilationW, im2col_step, deformable_group, gradInput[elt]); + } + + gradOutput.transpose_(1, 2); + gradOutput = + gradOutput.view({batchSize, nOutputPlane, outputHeight, outputWidth}); + + gradInput = gradInput.view({batchSize, nInputPlane, inputHeight, inputWidth}); + input = input.view({batchSize, nInputPlane, inputHeight, inputWidth}); + gradOffset = gradOffset.view( + {batchSize, deformable_group * 2 * kH * kW, outputHeight, outputWidth}); + offset = offset.view( + {batchSize, deformable_group * 2 * kH * kW, outputHeight, outputWidth}); + + if (batch == 0) { + gradOutput = gradOutput.view({nOutputPlane, outputHeight, outputWidth}); + input = input.view({nInputPlane, inputHeight, inputWidth}); + gradInput = gradInput.view({nInputPlane, inputHeight, inputWidth}); + offset = offset.view({offset.size(1), offset.size(2), offset.size(3)}); + gradOffset = + gradOffset.view({offset.size(1), offset.size(2), offset.size(3)}); + } + + return 1; +} + +int deform_conv_backward_parameters_cuda( + at::Tensor input, at::Tensor offset, at::Tensor gradOutput, + at::Tensor gradWeight, // at::Tensor gradBias, + at::Tensor columns, at::Tensor ones, int kW, int kH, int dW, int dH, + int padW, int padH, int dilationW, int dilationH, int group, + int deformable_group, float scale, int im2col_step) { + // todo: transpose and reshape outGrad + // todo: reshape columns + // todo: add im2col_step as input + + shape_check(input, offset, &gradOutput, gradWeight, kH, kW, dH, dW, padH, + padW, dilationH, dilationW, group, deformable_group); + at::DeviceGuard guard(input.device()); + + input = input.contiguous(); + offset = offset.contiguous(); + gradOutput = gradOutput.contiguous(); + + int batch = 1; + + if (input.ndimension() == 3) { + // Force batch + batch = 0; + input = input.view( + at::IntList({1, input.size(0), input.size(1), input.size(2)})); + gradOutput = gradOutput.view( + {1, gradOutput.size(0), gradOutput.size(1), gradOutput.size(2)}); + } + + long batchSize = input.size(0); + long nInputPlane = input.size(1); + long inputHeight = input.size(2); + long inputWidth = input.size(3); + + long nOutputPlane = gradWeight.size(0); + + long outputWidth = + (inputWidth + 2 * padW - (dilationW * (kW - 1) + 1)) / dW + 1; + long outputHeight = + (inputHeight + 2 * padH - (dilationH * (kH - 1) + 1)) / dH + 1; + + TORCH_CHECK((offset.size(0) == batchSize), "invalid batch size of offset"); + + columns = at::zeros( + {nInputPlane * kW * kH, im2col_step * outputHeight * outputWidth}, + input.options()); + + gradOutput = gradOutput.view({batchSize / im2col_step, im2col_step, + nOutputPlane, outputHeight, outputWidth}); + gradOutput.transpose_(1, 2); + + at::Tensor gradOutputBuffer = at::zeros_like(gradOutput); + gradOutputBuffer = + gradOutputBuffer.view({batchSize / im2col_step, nOutputPlane, im2col_step, + outputHeight, outputWidth}); + gradOutputBuffer.copy_(gradOutput); + gradOutputBuffer = + gradOutputBuffer.view({batchSize / im2col_step, nOutputPlane, + im2col_step * outputHeight, outputWidth}); + + gradOutput.transpose_(1, 2); + gradOutput = + gradOutput.view({batchSize, nOutputPlane, outputHeight, outputWidth}); + + input = input.view({batchSize / im2col_step, im2col_step, nInputPlane, + inputHeight, inputWidth}); + offset = + offset.view({batchSize / im2col_step, im2col_step, + deformable_group * 2 * kH * kW, outputHeight, outputWidth}); + + for (int elt = 0; elt < batchSize / im2col_step; elt++) { + deformable_im2col(input[elt], offset[elt], nInputPlane, inputHeight, + inputWidth, kH, kW, padH, padW, dH, dW, dilationH, + dilationW, im2col_step, deformable_group, columns); + + // divide into group + gradOutputBuffer = gradOutputBuffer.view( + {gradOutputBuffer.size(0), group, gradOutputBuffer.size(1) / group, + gradOutputBuffer.size(2), gradOutputBuffer.size(3)}); + columns = columns.view({group, columns.size(0) / group, columns.size(1)}); + gradWeight = + gradWeight.view({group, gradWeight.size(0) / group, gradWeight.size(1), + gradWeight.size(2), gradWeight.size(3)}); + + for (int g = 0; g < group; g++) { + gradWeight[g] = gradWeight[g] + .flatten(1) + .addmm_(gradOutputBuffer[elt][g].flatten(1), + columns[g].transpose(1, 0), 1.0, scale) + .view_as(gradWeight[g]); + } + gradOutputBuffer = gradOutputBuffer.view( + {gradOutputBuffer.size(0), + gradOutputBuffer.size(1) * gradOutputBuffer.size(2), + gradOutputBuffer.size(3), gradOutputBuffer.size(4)}); + columns = + columns.view({columns.size(0) * columns.size(1), columns.size(2)}); + gradWeight = gradWeight.view({gradWeight.size(0) * gradWeight.size(1), + gradWeight.size(2), gradWeight.size(3), + gradWeight.size(4)}); + } + + input = input.view({batchSize, nInputPlane, inputHeight, inputWidth}); + offset = offset.view( + {batchSize, deformable_group * 2 * kH * kW, outputHeight, outputWidth}); + + if (batch == 0) { + gradOutput = gradOutput.view({nOutputPlane, outputHeight, outputWidth}); + input = input.view({nInputPlane, inputHeight, inputWidth}); + } + + return 1; +} + +void modulated_deform_conv_cuda_forward( + at::Tensor input, at::Tensor weight, at::Tensor bias, at::Tensor ones, + at::Tensor offset, at::Tensor mask, at::Tensor output, at::Tensor columns, + int kernel_h, int kernel_w, const int stride_h, const int stride_w, + const int pad_h, const int pad_w, const int dilation_h, + const int dilation_w, const int group, const int deformable_group, + const bool with_bias) { + TORCH_CHECK(input.is_contiguous(), "input tensor has to be contiguous"); + TORCH_CHECK(weight.is_contiguous(), "weight tensor has to be contiguous"); + at::DeviceGuard guard(input.device()); + + const int batch = input.size(0); + const int channels = input.size(1); + const int height = input.size(2); + const int width = input.size(3); + + const int channels_out = weight.size(0); + const int channels_kernel = weight.size(1); + const int kernel_h_ = weight.size(2); + const int kernel_w_ = weight.size(3); + + if (kernel_h_ != kernel_h || kernel_w_ != kernel_w) + AT_ERROR("Input shape and kernel shape won't match: (%d x %d vs %d x %d).", + kernel_h_, kernel_w, kernel_h_, kernel_w_); + if (channels != channels_kernel * group) + AT_ERROR("Input shape and kernel channels won't match: (%d vs %d).", + channels, channels_kernel * group); + + const int height_out = + (height + 2 * pad_h - (dilation_h * (kernel_h - 1) + 1)) / stride_h + 1; + const int width_out = + (width + 2 * pad_w - (dilation_w * (kernel_w - 1) + 1)) / stride_w + 1; + + if (ones.ndimension() != 2 || + ones.size(0) * ones.size(1) < height_out * width_out) { + // Resize plane and fill with ones... + ones = at::ones({height_out, width_out}, input.options()); + } + + // resize output + output = output.view({batch, channels_out, height_out, width_out}).zero_(); + // resize temporary columns + columns = + at::zeros({channels * kernel_h * kernel_w, 1 * height_out * width_out}, + input.options()); + + output = output.view({output.size(0), group, output.size(1) / group, + output.size(2), output.size(3)}); + + for (int b = 0; b < batch; b++) { + modulated_deformable_im2col_cuda( + input[b], offset[b], mask[b], 1, channels, height, width, height_out, + width_out, kernel_h, kernel_w, pad_h, pad_w, stride_h, stride_w, + dilation_h, dilation_w, deformable_group, columns); + + // divide into group + weight = weight.view({group, weight.size(0) / group, weight.size(1), + weight.size(2), weight.size(3)}); + columns = columns.view({group, columns.size(0) / group, columns.size(1)}); + + for (int g = 0; g < group; g++) { + output[b][g] = output[b][g] + .flatten(1) + .addmm_(weight[g].flatten(1), columns[g]) + .view_as(output[b][g]); + } + + weight = weight.view({weight.size(0) * weight.size(1), weight.size(2), + weight.size(3), weight.size(4)}); + columns = + columns.view({columns.size(0) * columns.size(1), columns.size(2)}); + } + + output = output.view({output.size(0), output.size(1) * output.size(2), + output.size(3), output.size(4)}); + + if (with_bias) { + output += bias.view({1, bias.size(0), 1, 1}); + } +} + +void modulated_deform_conv_cuda_backward( + at::Tensor input, at::Tensor weight, at::Tensor bias, at::Tensor ones, + at::Tensor offset, at::Tensor mask, at::Tensor columns, + at::Tensor grad_input, at::Tensor grad_weight, at::Tensor grad_bias, + at::Tensor grad_offset, at::Tensor grad_mask, at::Tensor grad_output, + int kernel_h, int kernel_w, int stride_h, int stride_w, int pad_h, + int pad_w, int dilation_h, int dilation_w, int group, int deformable_group, + const bool with_bias) { + TORCH_CHECK(input.is_contiguous(), "input tensor has to be contiguous"); + TORCH_CHECK(weight.is_contiguous(), "weight tensor has to be contiguous"); + at::DeviceGuard guard(input.device()); + + const int batch = input.size(0); + const int channels = input.size(1); + const int height = input.size(2); + const int width = input.size(3); + + const int channels_kernel = weight.size(1); + const int kernel_h_ = weight.size(2); + const int kernel_w_ = weight.size(3); + if (kernel_h_ != kernel_h || kernel_w_ != kernel_w) + AT_ERROR("Input shape and kernel shape won't match: (%d x %d vs %d x %d).", + kernel_h_, kernel_w, kernel_h_, kernel_w_); + if (channels != channels_kernel * group) + AT_ERROR("Input shape and kernel channels won't match: (%d vs %d).", + channels, channels_kernel * group); + + const int height_out = + (height + 2 * pad_h - (dilation_h * (kernel_h - 1) + 1)) / stride_h + 1; + const int width_out = + (width + 2 * pad_w - (dilation_w * (kernel_w - 1) + 1)) / stride_w + 1; + + if (ones.ndimension() != 2 || + ones.size(0) * ones.size(1) < height_out * width_out) { + // Resize plane and fill with ones... + ones = at::ones({height_out, width_out}, input.options()); + } + + grad_input = grad_input.view({batch, channels, height, width}); + columns = at::zeros({channels * kernel_h * kernel_w, height_out * width_out}, + input.options()); + + grad_output = + grad_output.view({grad_output.size(0), group, grad_output.size(1) / group, + grad_output.size(2), grad_output.size(3)}); + + for (int b = 0; b < batch; b++) { + // divide int group + columns = columns.view({group, columns.size(0) / group, columns.size(1)}); + weight = weight.view({group, weight.size(0) / group, weight.size(1), + weight.size(2), weight.size(3)}); + + for (int g = 0; g < group; g++) { + columns[g].addmm_(weight[g].flatten(1).transpose(0, 1), + grad_output[b][g].flatten(1), 0.0f, 1.0f); + } + + columns = + columns.view({columns.size(0) * columns.size(1), columns.size(2)}); + weight = weight.view({weight.size(0) * weight.size(1), weight.size(2), + weight.size(3), weight.size(4)}); + + // gradient w.r.t. input coordinate data + modulated_deformable_col2im_coord_cuda( + columns, input[b], offset[b], mask[b], 1, channels, height, width, + height_out, width_out, kernel_h, kernel_w, pad_h, pad_w, stride_h, + stride_w, dilation_h, dilation_w, deformable_group, grad_offset[b], + grad_mask[b]); + // gradient w.r.t. input data + modulated_deformable_col2im_cuda( + columns, offset[b], mask[b], 1, channels, height, width, height_out, + width_out, kernel_h, kernel_w, pad_h, pad_w, stride_h, stride_w, + dilation_h, dilation_w, deformable_group, grad_input[b]); + + // gradient w.r.t. weight, dWeight should accumulate across the batch and + // group + modulated_deformable_im2col_cuda( + input[b], offset[b], mask[b], 1, channels, height, width, height_out, + width_out, kernel_h, kernel_w, pad_h, pad_w, stride_h, stride_w, + dilation_h, dilation_w, deformable_group, columns); + + columns = columns.view({group, columns.size(0) / group, columns.size(1)}); + grad_weight = grad_weight.view({group, grad_weight.size(0) / group, + grad_weight.size(1), grad_weight.size(2), + grad_weight.size(3)}); + if (with_bias) + grad_bias = grad_bias.view({group, grad_bias.size(0) / group}); + + for (int g = 0; g < group; g++) { + grad_weight[g] = + grad_weight[g] + .flatten(1) + .addmm_(grad_output[b][g].flatten(1), columns[g].transpose(0, 1)) + .view_as(grad_weight[g]); + if (with_bias) { + grad_bias[g] = + grad_bias[g] + .view({-1, 1}) + .addmm_(grad_output[b][g].flatten(1), ones.view({-1, 1})) + .view(-1); + } + } + + columns = + columns.view({columns.size(0) * columns.size(1), columns.size(2)}); + grad_weight = grad_weight.view({grad_weight.size(0) * grad_weight.size(1), + grad_weight.size(2), grad_weight.size(3), + grad_weight.size(4)}); + if (with_bias) + grad_bias = grad_bias.view({grad_bias.size(0) * grad_bias.size(1)}); + } + grad_output = grad_output.view({grad_output.size(0) * grad_output.size(1), + grad_output.size(2), grad_output.size(3), + grad_output.size(4)}); +} diff --git a/IIR-Lab/models/dcn/src/deform_conv_cuda_kernel.cu b/IIR-Lab/models/dcn/src/deform_conv_cuda_kernel.cu new file mode 100644 index 0000000000000000000000000000000000000000..98752dccf8c58817ca1a952554dd3f33188a2d34 --- /dev/null +++ b/IIR-Lab/models/dcn/src/deform_conv_cuda_kernel.cu @@ -0,0 +1,867 @@ +/*! + ******************* BEGIN Caffe Copyright Notice and Disclaimer **************** + * + * COPYRIGHT + * + * All contributions by the University of California: + * Copyright (c) 2014-2017 The Regents of the University of California (Regents) + * All rights reserved. + * + * All other contributions: + * Copyright (c) 2014-2017, the respective contributors + * All rights reserved. + * + * Caffe uses a shared copyright model: each contributor holds copyright over + * their contributions to Caffe. The project versioning records all such + * contribution and copyright details. If a contributor wants to further mark + * their specific copyright on a particular contribution, they should indicate + * their copyright solely in the commit message of the change when it is + * committed. + * + * LICENSE + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions are met: + * + * 1. Redistributions of source code must retain the above copyright notice, this + * list of conditions and the following disclaimer. + * 2. Redistributions in binary form must reproduce the above copyright notice, + * this list of conditions and the following disclaimer in the documentation + * and/or other materials provided with the distribution. + * + * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND + * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED + * WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE + * DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE LIABLE FOR + * ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES + * (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; + * LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND + * ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT + * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS + * SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + * + * CONTRIBUTION AGREEMENT + * + * By contributing to the BVLC/caffe repository through pull-request, comment, + * or otherwise, the contributor releases their content to the + * license and copyright terms herein. + * + ***************** END Caffe Copyright Notice and Disclaimer ******************** + * + * Copyright (c) 2018 Microsoft + * Licensed under The MIT License [see LICENSE for details] + * \file modulated_deformable_im2col.cuh + * \brief Function definitions of converting an image to + * column matrix based on kernel, padding, dilation, and offset. + * These functions are mainly used in deformable convolution operators. + * \ref: https://arxiv.org/abs/1703.06211 + * \author Yuwen Xiong, Haozhi Qi, Jifeng Dai, Xizhou Zhu, Han Hu, Dazhi Cheng + */ + +// modified from https://github.com/chengdazhi/Deformable-Convolution-V2-PyTorch/blob/mmdetection/mmdet/ops/dcn/src/deform_conv_cuda_kernel.cu + +#include +#include +#include +#include +#include +#include + +using namespace at; + +#define CUDA_KERNEL_LOOP(i, n) \ + for (int i = blockIdx.x * blockDim.x + threadIdx.x; i < (n); \ + i += blockDim.x * gridDim.x) + +const int CUDA_NUM_THREADS = 1024; +const int kMaxGridNum = 65535; + +inline int GET_BLOCKS(const int N) +{ + return std::min(kMaxGridNum, (N + CUDA_NUM_THREADS - 1) / CUDA_NUM_THREADS); +} + +template +__device__ scalar_t deformable_im2col_bilinear(const scalar_t *bottom_data, const int data_width, + const int height, const int width, scalar_t h, scalar_t w) +{ + + int h_low = floor(h); + int w_low = floor(w); + int h_high = h_low + 1; + int w_high = w_low + 1; + + scalar_t lh = h - h_low; + scalar_t lw = w - w_low; + scalar_t hh = 1 - lh, hw = 1 - lw; + + scalar_t v1 = 0; + if (h_low >= 0 && w_low >= 0) + v1 = bottom_data[h_low * data_width + w_low]; + scalar_t v2 = 0; + if (h_low >= 0 && w_high <= width - 1) + v2 = bottom_data[h_low * data_width + w_high]; + scalar_t v3 = 0; + if (h_high <= height - 1 && w_low >= 0) + v3 = bottom_data[h_high * data_width + w_low]; + scalar_t v4 = 0; + if (h_high <= height - 1 && w_high <= width - 1) + v4 = bottom_data[h_high * data_width + w_high]; + + scalar_t w1 = hh * hw, w2 = hh * lw, w3 = lh * hw, w4 = lh * lw; + + scalar_t val = (w1 * v1 + w2 * v2 + w3 * v3 + w4 * v4); + return val; +} + +template +__device__ scalar_t get_gradient_weight(scalar_t argmax_h, scalar_t argmax_w, + const int h, const int w, const int height, const int width) +{ + + if (argmax_h <= -1 || argmax_h >= height || argmax_w <= -1 || argmax_w >= width) + { + //empty + return 0; + } + + int argmax_h_low = floor(argmax_h); + int argmax_w_low = floor(argmax_w); + int argmax_h_high = argmax_h_low + 1; + int argmax_w_high = argmax_w_low + 1; + + scalar_t weight = 0; + if (h == argmax_h_low && w == argmax_w_low) + weight = (h + 1 - argmax_h) * (w + 1 - argmax_w); + if (h == argmax_h_low && w == argmax_w_high) + weight = (h + 1 - argmax_h) * (argmax_w + 1 - w); + if (h == argmax_h_high && w == argmax_w_low) + weight = (argmax_h + 1 - h) * (w + 1 - argmax_w); + if (h == argmax_h_high && w == argmax_w_high) + weight = (argmax_h + 1 - h) * (argmax_w + 1 - w); + return weight; +} + +template +__device__ scalar_t get_coordinate_weight(scalar_t argmax_h, scalar_t argmax_w, + const int height, const int width, const scalar_t *im_data, + const int data_width, const int bp_dir) +{ + + if (argmax_h <= -1 || argmax_h >= height || argmax_w <= -1 || argmax_w >= width) + { + //empty + return 0; + } + + int argmax_h_low = floor(argmax_h); + int argmax_w_low = floor(argmax_w); + int argmax_h_high = argmax_h_low + 1; + int argmax_w_high = argmax_w_low + 1; + + scalar_t weight = 0; + + if (bp_dir == 0) + { + if (argmax_h_low >= 0 && argmax_w_low >= 0) + weight += -1 * (argmax_w_low + 1 - argmax_w) * im_data[argmax_h_low * data_width + argmax_w_low]; + if (argmax_h_low >= 0 && argmax_w_high <= width - 1) + weight += -1 * (argmax_w - argmax_w_low) * im_data[argmax_h_low * data_width + argmax_w_high]; + if (argmax_h_high <= height - 1 && argmax_w_low >= 0) + weight += (argmax_w_low + 1 - argmax_w) * im_data[argmax_h_high * data_width + argmax_w_low]; + if (argmax_h_high <= height - 1 && argmax_w_high <= width - 1) + weight += (argmax_w - argmax_w_low) * im_data[argmax_h_high * data_width + argmax_w_high]; + } + else if (bp_dir == 1) + { + if (argmax_h_low >= 0 && argmax_w_low >= 0) + weight += -1 * (argmax_h_low + 1 - argmax_h) * im_data[argmax_h_low * data_width + argmax_w_low]; + if (argmax_h_low >= 0 && argmax_w_high <= width - 1) + weight += (argmax_h_low + 1 - argmax_h) * im_data[argmax_h_low * data_width + argmax_w_high]; + if (argmax_h_high <= height - 1 && argmax_w_low >= 0) + weight += -1 * (argmax_h - argmax_h_low) * im_data[argmax_h_high * data_width + argmax_w_low]; + if (argmax_h_high <= height - 1 && argmax_w_high <= width - 1) + weight += (argmax_h - argmax_h_low) * im_data[argmax_h_high * data_width + argmax_w_high]; + } + + return weight; +} + +template +__global__ void deformable_im2col_gpu_kernel(const int n, const scalar_t *data_im, const scalar_t *data_offset, + const int height, const int width, const int kernel_h, const int kernel_w, + const int pad_h, const int pad_w, const int stride_h, const int stride_w, + const int dilation_h, const int dilation_w, const int channel_per_deformable_group, + const int batch_size, const int num_channels, const int deformable_group, + const int height_col, const int width_col, + scalar_t *data_col) +{ + CUDA_KERNEL_LOOP(index, n) + { + // index index of output matrix + const int w_col = index % width_col; + const int h_col = (index / width_col) % height_col; + const int b_col = (index / width_col / height_col) % batch_size; + const int c_im = (index / width_col / height_col) / batch_size; + const int c_col = c_im * kernel_h * kernel_w; + + // compute deformable group index + const int deformable_group_index = c_im / channel_per_deformable_group; + + const int h_in = h_col * stride_h - pad_h; + const int w_in = w_col * stride_w - pad_w; + scalar_t *data_col_ptr = data_col + ((c_col * batch_size + b_col) * height_col + h_col) * width_col + w_col; + //const scalar_t* data_im_ptr = data_im + ((b_col * num_channels + c_im) * height + h_in) * width + w_in; + const scalar_t *data_im_ptr = data_im + (b_col * num_channels + c_im) * height * width; + const scalar_t *data_offset_ptr = data_offset + (b_col * deformable_group + deformable_group_index) * 2 * kernel_h * kernel_w * height_col * width_col; + + for (int i = 0; i < kernel_h; ++i) + { + for (int j = 0; j < kernel_w; ++j) + { + const int data_offset_h_ptr = ((2 * (i * kernel_w + j)) * height_col + h_col) * width_col + w_col; + const int data_offset_w_ptr = ((2 * (i * kernel_w + j) + 1) * height_col + h_col) * width_col + w_col; + const scalar_t offset_h = data_offset_ptr[data_offset_h_ptr]; + const scalar_t offset_w = data_offset_ptr[data_offset_w_ptr]; + scalar_t val = static_cast(0); + const scalar_t h_im = h_in + i * dilation_h + offset_h; + const scalar_t w_im = w_in + j * dilation_w + offset_w; + if (h_im > -1 && w_im > -1 && h_im < height && w_im < width) + { + //const scalar_t map_h = i * dilation_h + offset_h; + //const scalar_t map_w = j * dilation_w + offset_w; + //const int cur_height = height - h_in; + //const int cur_width = width - w_in; + //val = deformable_im2col_bilinear(data_im_ptr, width, cur_height, cur_width, map_h, map_w); + val = deformable_im2col_bilinear(data_im_ptr, width, height, width, h_im, w_im); + } + *data_col_ptr = val; + data_col_ptr += batch_size * height_col * width_col; + } + } + } +} + +void deformable_im2col( + const at::Tensor data_im, const at::Tensor data_offset, const int channels, + const int height, const int width, const int ksize_h, const int ksize_w, + const int pad_h, const int pad_w, const int stride_h, const int stride_w, + const int dilation_h, const int dilation_w, const int parallel_imgs, + const int deformable_group, at::Tensor data_col) +{ + // num_axes should be smaller than block size + // todo: check parallel_imgs is correctly passed in + int height_col = (height + 2 * pad_h - (dilation_h * (ksize_h - 1) + 1)) / stride_h + 1; + int width_col = (width + 2 * pad_w - (dilation_w * (ksize_w - 1) + 1)) / stride_w + 1; + int num_kernels = channels * height_col * width_col * parallel_imgs; + int channel_per_deformable_group = channels / deformable_group; + + AT_DISPATCH_FLOATING_TYPES_AND_HALF( + data_im.scalar_type(), "deformable_im2col_gpu", ([&] { + const scalar_t *data_im_ = data_im.data_ptr(); + const scalar_t *data_offset_ = data_offset.data_ptr(); + scalar_t *data_col_ = data_col.data_ptr(); + + deformable_im2col_gpu_kernel<<>>( + num_kernels, data_im_, data_offset_, height, width, ksize_h, ksize_w, + pad_h, pad_w, stride_h, stride_w, dilation_h, dilation_w, + channel_per_deformable_group, parallel_imgs, channels, deformable_group, + height_col, width_col, data_col_); + })); + + cudaError_t err = cudaGetLastError(); + if (err != cudaSuccess) + { + printf("error in deformable_im2col: %s\n", cudaGetErrorString(err)); + } +} + +template +__global__ void deformable_col2im_gpu_kernel( + const int n, const scalar_t *data_col, const scalar_t *data_offset, + const int channels, const int height, const int width, + const int kernel_h, const int kernel_w, + const int pad_h, const int pad_w, + const int stride_h, const int stride_w, + const int dilation_h, const int dilation_w, + const int channel_per_deformable_group, + const int batch_size, const int deformable_group, + const int height_col, const int width_col, + scalar_t *grad_im) +{ + CUDA_KERNEL_LOOP(index, n) + { + const int j = (index / width_col / height_col / batch_size) % kernel_w; + const int i = (index / width_col / height_col / batch_size / kernel_w) % kernel_h; + const int c = index / width_col / height_col / batch_size / kernel_w / kernel_h; + // compute the start and end of the output + + const int deformable_group_index = c / channel_per_deformable_group; + + int w_out = index % width_col; + int h_out = (index / width_col) % height_col; + int b = (index / width_col / height_col) % batch_size; + int w_in = w_out * stride_w - pad_w; + int h_in = h_out * stride_h - pad_h; + + const scalar_t *data_offset_ptr = data_offset + (b * deformable_group + deformable_group_index) * + 2 * kernel_h * kernel_w * height_col * width_col; + const int data_offset_h_ptr = ((2 * (i * kernel_w + j)) * height_col + h_out) * width_col + w_out; + const int data_offset_w_ptr = ((2 * (i * kernel_w + j) + 1) * height_col + h_out) * width_col + w_out; + const scalar_t offset_h = data_offset_ptr[data_offset_h_ptr]; + const scalar_t offset_w = data_offset_ptr[data_offset_w_ptr]; + const scalar_t cur_inv_h_data = h_in + i * dilation_h + offset_h; + const scalar_t cur_inv_w_data = w_in + j * dilation_w + offset_w; + + const scalar_t cur_top_grad = data_col[index]; + const int cur_h = (int)cur_inv_h_data; + const int cur_w = (int)cur_inv_w_data; + for (int dy = -2; dy <= 2; dy++) + { + for (int dx = -2; dx <= 2; dx++) + { + if (cur_h + dy >= 0 && cur_h + dy < height && + cur_w + dx >= 0 && cur_w + dx < width && + abs(cur_inv_h_data - (cur_h + dy)) < 1 && + abs(cur_inv_w_data - (cur_w + dx)) < 1) + { + int cur_bottom_grad_pos = ((b * channels + c) * height + cur_h + dy) * width + cur_w + dx; + scalar_t weight = get_gradient_weight(cur_inv_h_data, cur_inv_w_data, cur_h + dy, cur_w + dx, height, width); + atomicAdd(grad_im + cur_bottom_grad_pos, weight * cur_top_grad); + } + } + } + } +} + +void deformable_col2im( + const at::Tensor data_col, const at::Tensor data_offset, const int channels, + const int height, const int width, const int ksize_h, + const int ksize_w, const int pad_h, const int pad_w, + const int stride_h, const int stride_w, + const int dilation_h, const int dilation_w, + const int parallel_imgs, const int deformable_group, + at::Tensor grad_im) +{ + + // todo: make sure parallel_imgs is passed in correctly + int height_col = (height + 2 * pad_h - (dilation_h * (ksize_h - 1) + 1)) / stride_h + 1; + int width_col = (width + 2 * pad_w - (dilation_w * (ksize_w - 1) + 1)) / stride_w + 1; + int num_kernels = channels * ksize_h * ksize_w * height_col * width_col * parallel_imgs; + int channel_per_deformable_group = channels / deformable_group; + + AT_DISPATCH_FLOATING_TYPES_AND_HALF( + data_col.scalar_type(), "deformable_col2im_gpu", ([&] { + const scalar_t *data_col_ = data_col.data_ptr(); + const scalar_t *data_offset_ = data_offset.data_ptr(); + scalar_t *grad_im_ = grad_im.data_ptr(); + + deformable_col2im_gpu_kernel<<>>( + num_kernels, data_col_, data_offset_, channels, height, width, ksize_h, + ksize_w, pad_h, pad_w, stride_h, stride_w, + dilation_h, dilation_w, channel_per_deformable_group, + parallel_imgs, deformable_group, height_col, width_col, grad_im_); + })); + + cudaError_t err = cudaGetLastError(); + if (err != cudaSuccess) + { + printf("error in deformable_col2im: %s\n", cudaGetErrorString(err)); + } +} + +template +__global__ void deformable_col2im_coord_gpu_kernel(const int n, const scalar_t *data_col, + const scalar_t *data_im, const scalar_t *data_offset, + const int channels, const int height, const int width, + const int kernel_h, const int kernel_w, + const int pad_h, const int pad_w, + const int stride_h, const int stride_w, + const int dilation_h, const int dilation_w, + const int channel_per_deformable_group, + const int batch_size, const int offset_channels, const int deformable_group, + const int height_col, const int width_col, scalar_t *grad_offset) +{ + CUDA_KERNEL_LOOP(index, n) + { + scalar_t val = 0; + int w = index % width_col; + int h = (index / width_col) % height_col; + int c = (index / width_col / height_col) % offset_channels; + int b = (index / width_col / height_col) / offset_channels; + // compute the start and end of the output + + const int deformable_group_index = c / (2 * kernel_h * kernel_w); + const int col_step = kernel_h * kernel_w; + int cnt = 0; + const scalar_t *data_col_ptr = data_col + deformable_group_index * channel_per_deformable_group * + batch_size * width_col * height_col; + const scalar_t *data_im_ptr = data_im + (b * deformable_group + deformable_group_index) * + channel_per_deformable_group / kernel_h / kernel_w * height * width; + const scalar_t *data_offset_ptr = data_offset + (b * deformable_group + deformable_group_index) * 2 * + kernel_h * kernel_w * height_col * width_col; + + const int offset_c = c - deformable_group_index * 2 * kernel_h * kernel_w; + + for (int col_c = (offset_c / 2); col_c < channel_per_deformable_group; col_c += col_step) + { + const int col_pos = (((col_c * batch_size + b) * height_col) + h) * width_col + w; + const int bp_dir = offset_c % 2; + + int j = (col_pos / width_col / height_col / batch_size) % kernel_w; + int i = (col_pos / width_col / height_col / batch_size / kernel_w) % kernel_h; + int w_out = col_pos % width_col; + int h_out = (col_pos / width_col) % height_col; + int w_in = w_out * stride_w - pad_w; + int h_in = h_out * stride_h - pad_h; + const int data_offset_h_ptr = (((2 * (i * kernel_w + j)) * height_col + h_out) * width_col + w_out); + const int data_offset_w_ptr = (((2 * (i * kernel_w + j) + 1) * height_col + h_out) * width_col + w_out); + const scalar_t offset_h = data_offset_ptr[data_offset_h_ptr]; + const scalar_t offset_w = data_offset_ptr[data_offset_w_ptr]; + scalar_t inv_h = h_in + i * dilation_h + offset_h; + scalar_t inv_w = w_in + j * dilation_w + offset_w; + if (inv_h <= -1 || inv_w <= -1 || inv_h >= height || inv_w >= width) + { + inv_h = inv_w = -2; + } + const scalar_t weight = get_coordinate_weight( + inv_h, inv_w, + height, width, data_im_ptr + cnt * height * width, width, bp_dir); + val += weight * data_col_ptr[col_pos]; + cnt += 1; + } + + grad_offset[index] = val; + } +} + +void deformable_col2im_coord( + const at::Tensor data_col, const at::Tensor data_im, const at::Tensor data_offset, + const int channels, const int height, const int width, const int ksize_h, + const int ksize_w, const int pad_h, const int pad_w, const int stride_h, + const int stride_w, const int dilation_h, const int dilation_w, + const int parallel_imgs, const int deformable_group, at::Tensor grad_offset) +{ + + int height_col = (height + 2 * pad_h - (dilation_h * (ksize_h - 1) + 1)) / stride_h + 1; + int width_col = (width + 2 * pad_w - (dilation_w * (ksize_w - 1) + 1)) / stride_w + 1; + int num_kernels = height_col * width_col * 2 * ksize_h * ksize_w * deformable_group * parallel_imgs; + int channel_per_deformable_group = channels * ksize_h * ksize_w / deformable_group; + + AT_DISPATCH_FLOATING_TYPES_AND_HALF( + data_col.scalar_type(), "deformable_col2im_coord_gpu", ([&] { + const scalar_t *data_col_ = data_col.data_ptr(); + const scalar_t *data_im_ = data_im.data_ptr(); + const scalar_t *data_offset_ = data_offset.data_ptr(); + scalar_t *grad_offset_ = grad_offset.data_ptr(); + + deformable_col2im_coord_gpu_kernel<<>>( + num_kernels, data_col_, data_im_, data_offset_, channels, height, width, + ksize_h, ksize_w, pad_h, pad_w, stride_h, stride_w, + dilation_h, dilation_w, channel_per_deformable_group, + parallel_imgs, 2 * ksize_h * ksize_w * deformable_group, deformable_group, + height_col, width_col, grad_offset_); + })); +} + +template +__device__ scalar_t dmcn_im2col_bilinear(const scalar_t *bottom_data, const int data_width, + const int height, const int width, scalar_t h, scalar_t w) +{ + int h_low = floor(h); + int w_low = floor(w); + int h_high = h_low + 1; + int w_high = w_low + 1; + + scalar_t lh = h - h_low; + scalar_t lw = w - w_low; + scalar_t hh = 1 - lh, hw = 1 - lw; + + scalar_t v1 = 0; + if (h_low >= 0 && w_low >= 0) + v1 = bottom_data[h_low * data_width + w_low]; + scalar_t v2 = 0; + if (h_low >= 0 && w_high <= width - 1) + v2 = bottom_data[h_low * data_width + w_high]; + scalar_t v3 = 0; + if (h_high <= height - 1 && w_low >= 0) + v3 = bottom_data[h_high * data_width + w_low]; + scalar_t v4 = 0; + if (h_high <= height - 1 && w_high <= width - 1) + v4 = bottom_data[h_high * data_width + w_high]; + + scalar_t w1 = hh * hw, w2 = hh * lw, w3 = lh * hw, w4 = lh * lw; + + scalar_t val = (w1 * v1 + w2 * v2 + w3 * v3 + w4 * v4); + return val; +} + +template +__device__ scalar_t dmcn_get_gradient_weight(scalar_t argmax_h, scalar_t argmax_w, + const int h, const int w, const int height, const int width) +{ + if (argmax_h <= -1 || argmax_h >= height || argmax_w <= -1 || argmax_w >= width) + { + //empty + return 0; + } + + int argmax_h_low = floor(argmax_h); + int argmax_w_low = floor(argmax_w); + int argmax_h_high = argmax_h_low + 1; + int argmax_w_high = argmax_w_low + 1; + + scalar_t weight = 0; + if (h == argmax_h_low && w == argmax_w_low) + weight = (h + 1 - argmax_h) * (w + 1 - argmax_w); + if (h == argmax_h_low && w == argmax_w_high) + weight = (h + 1 - argmax_h) * (argmax_w + 1 - w); + if (h == argmax_h_high && w == argmax_w_low) + weight = (argmax_h + 1 - h) * (w + 1 - argmax_w); + if (h == argmax_h_high && w == argmax_w_high) + weight = (argmax_h + 1 - h) * (argmax_w + 1 - w); + return weight; +} + +template +__device__ scalar_t dmcn_get_coordinate_weight(scalar_t argmax_h, scalar_t argmax_w, + const int height, const int width, const scalar_t *im_data, + const int data_width, const int bp_dir) +{ + if (argmax_h <= -1 || argmax_h >= height || argmax_w <= -1 || argmax_w >= width) + { + //empty + return 0; + } + + int argmax_h_low = floor(argmax_h); + int argmax_w_low = floor(argmax_w); + int argmax_h_high = argmax_h_low + 1; + int argmax_w_high = argmax_w_low + 1; + + scalar_t weight = 0; + + if (bp_dir == 0) + { + if (argmax_h_low >= 0 && argmax_w_low >= 0) + weight += -1 * (argmax_w_low + 1 - argmax_w) * im_data[argmax_h_low * data_width + argmax_w_low]; + if (argmax_h_low >= 0 && argmax_w_high <= width - 1) + weight += -1 * (argmax_w - argmax_w_low) * im_data[argmax_h_low * data_width + argmax_w_high]; + if (argmax_h_high <= height - 1 && argmax_w_low >= 0) + weight += (argmax_w_low + 1 - argmax_w) * im_data[argmax_h_high * data_width + argmax_w_low]; + if (argmax_h_high <= height - 1 && argmax_w_high <= width - 1) + weight += (argmax_w - argmax_w_low) * im_data[argmax_h_high * data_width + argmax_w_high]; + } + else if (bp_dir == 1) + { + if (argmax_h_low >= 0 && argmax_w_low >= 0) + weight += -1 * (argmax_h_low + 1 - argmax_h) * im_data[argmax_h_low * data_width + argmax_w_low]; + if (argmax_h_low >= 0 && argmax_w_high <= width - 1) + weight += (argmax_h_low + 1 - argmax_h) * im_data[argmax_h_low * data_width + argmax_w_high]; + if (argmax_h_high <= height - 1 && argmax_w_low >= 0) + weight += -1 * (argmax_h - argmax_h_low) * im_data[argmax_h_high * data_width + argmax_w_low]; + if (argmax_h_high <= height - 1 && argmax_w_high <= width - 1) + weight += (argmax_h - argmax_h_low) * im_data[argmax_h_high * data_width + argmax_w_high]; + } + + return weight; +} + +template +__global__ void modulated_deformable_im2col_gpu_kernel(const int n, + const scalar_t *data_im, const scalar_t *data_offset, const scalar_t *data_mask, + const int height, const int width, const int kernel_h, const int kernel_w, + const int pad_h, const int pad_w, + const int stride_h, const int stride_w, + const int dilation_h, const int dilation_w, + const int channel_per_deformable_group, + const int batch_size, const int num_channels, const int deformable_group, + const int height_col, const int width_col, + scalar_t *data_col) +{ + CUDA_KERNEL_LOOP(index, n) + { + // index index of output matrix + const int w_col = index % width_col; + const int h_col = (index / width_col) % height_col; + const int b_col = (index / width_col / height_col) % batch_size; + const int c_im = (index / width_col / height_col) / batch_size; + const int c_col = c_im * kernel_h * kernel_w; + + // compute deformable group index + const int deformable_group_index = c_im / channel_per_deformable_group; + + const int h_in = h_col * stride_h - pad_h; + const int w_in = w_col * stride_w - pad_w; + + scalar_t *data_col_ptr = data_col + ((c_col * batch_size + b_col) * height_col + h_col) * width_col + w_col; + //const float* data_im_ptr = data_im + ((b_col * num_channels + c_im) * height + h_in) * width + w_in; + const scalar_t *data_im_ptr = data_im + (b_col * num_channels + c_im) * height * width; + const scalar_t *data_offset_ptr = data_offset + (b_col * deformable_group + deformable_group_index) * 2 * kernel_h * kernel_w * height_col * width_col; + + const scalar_t *data_mask_ptr = data_mask + (b_col * deformable_group + deformable_group_index) * kernel_h * kernel_w * height_col * width_col; + + for (int i = 0; i < kernel_h; ++i) + { + for (int j = 0; j < kernel_w; ++j) + { + const int data_offset_h_ptr = ((2 * (i * kernel_w + j)) * height_col + h_col) * width_col + w_col; + const int data_offset_w_ptr = ((2 * (i * kernel_w + j) + 1) * height_col + h_col) * width_col + w_col; + const int data_mask_hw_ptr = ((i * kernel_w + j) * height_col + h_col) * width_col + w_col; + const scalar_t offset_h = data_offset_ptr[data_offset_h_ptr]; + const scalar_t offset_w = data_offset_ptr[data_offset_w_ptr]; + const scalar_t mask = data_mask_ptr[data_mask_hw_ptr]; + scalar_t val = static_cast(0); + const scalar_t h_im = h_in + i * dilation_h + offset_h; + const scalar_t w_im = w_in + j * dilation_w + offset_w; + //if (h_im >= 0 && w_im >= 0 && h_im < height && w_im < width) { + if (h_im > -1 && w_im > -1 && h_im < height && w_im < width) + { + //const float map_h = i * dilation_h + offset_h; + //const float map_w = j * dilation_w + offset_w; + //const int cur_height = height - h_in; + //const int cur_width = width - w_in; + //val = dmcn_im2col_bilinear(data_im_ptr, width, cur_height, cur_width, map_h, map_w); + val = dmcn_im2col_bilinear(data_im_ptr, width, height, width, h_im, w_im); + } + *data_col_ptr = val * mask; + data_col_ptr += batch_size * height_col * width_col; + //data_col_ptr += height_col * width_col; + } + } + } +} + +template +__global__ void modulated_deformable_col2im_gpu_kernel(const int n, + const scalar_t *data_col, const scalar_t *data_offset, const scalar_t *data_mask, + const int channels, const int height, const int width, + const int kernel_h, const int kernel_w, + const int pad_h, const int pad_w, + const int stride_h, const int stride_w, + const int dilation_h, const int dilation_w, + const int channel_per_deformable_group, + const int batch_size, const int deformable_group, + const int height_col, const int width_col, + scalar_t *grad_im) +{ + CUDA_KERNEL_LOOP(index, n) + { + const int j = (index / width_col / height_col / batch_size) % kernel_w; + const int i = (index / width_col / height_col / batch_size / kernel_w) % kernel_h; + const int c = index / width_col / height_col / batch_size / kernel_w / kernel_h; + // compute the start and end of the output + + const int deformable_group_index = c / channel_per_deformable_group; + + int w_out = index % width_col; + int h_out = (index / width_col) % height_col; + int b = (index / width_col / height_col) % batch_size; + int w_in = w_out * stride_w - pad_w; + int h_in = h_out * stride_h - pad_h; + + const scalar_t *data_offset_ptr = data_offset + (b * deformable_group + deformable_group_index) * 2 * kernel_h * kernel_w * height_col * width_col; + const scalar_t *data_mask_ptr = data_mask + (b * deformable_group + deformable_group_index) * kernel_h * kernel_w * height_col * width_col; + const int data_offset_h_ptr = ((2 * (i * kernel_w + j)) * height_col + h_out) * width_col + w_out; + const int data_offset_w_ptr = ((2 * (i * kernel_w + j) + 1) * height_col + h_out) * width_col + w_out; + const int data_mask_hw_ptr = ((i * kernel_w + j) * height_col + h_out) * width_col + w_out; + const scalar_t offset_h = data_offset_ptr[data_offset_h_ptr]; + const scalar_t offset_w = data_offset_ptr[data_offset_w_ptr]; + const scalar_t mask = data_mask_ptr[data_mask_hw_ptr]; + const scalar_t cur_inv_h_data = h_in + i * dilation_h + offset_h; + const scalar_t cur_inv_w_data = w_in + j * dilation_w + offset_w; + + const scalar_t cur_top_grad = data_col[index] * mask; + const int cur_h = (int)cur_inv_h_data; + const int cur_w = (int)cur_inv_w_data; + for (int dy = -2; dy <= 2; dy++) + { + for (int dx = -2; dx <= 2; dx++) + { + if (cur_h + dy >= 0 && cur_h + dy < height && + cur_w + dx >= 0 && cur_w + dx < width && + abs(cur_inv_h_data - (cur_h + dy)) < 1 && + abs(cur_inv_w_data - (cur_w + dx)) < 1) + { + int cur_bottom_grad_pos = ((b * channels + c) * height + cur_h + dy) * width + cur_w + dx; + scalar_t weight = dmcn_get_gradient_weight(cur_inv_h_data, cur_inv_w_data, cur_h + dy, cur_w + dx, height, width); + atomicAdd(grad_im + cur_bottom_grad_pos, weight * cur_top_grad); + } + } + } + } +} + +template +__global__ void modulated_deformable_col2im_coord_gpu_kernel(const int n, + const scalar_t *data_col, const scalar_t *data_im, + const scalar_t *data_offset, const scalar_t *data_mask, + const int channels, const int height, const int width, + const int kernel_h, const int kernel_w, + const int pad_h, const int pad_w, + const int stride_h, const int stride_w, + const int dilation_h, const int dilation_w, + const int channel_per_deformable_group, + const int batch_size, const int offset_channels, const int deformable_group, + const int height_col, const int width_col, + scalar_t *grad_offset, scalar_t *grad_mask) +{ + CUDA_KERNEL_LOOP(index, n) + { + scalar_t val = 0, mval = 0; + int w = index % width_col; + int h = (index / width_col) % height_col; + int c = (index / width_col / height_col) % offset_channels; + int b = (index / width_col / height_col) / offset_channels; + // compute the start and end of the output + + const int deformable_group_index = c / (2 * kernel_h * kernel_w); + const int col_step = kernel_h * kernel_w; + int cnt = 0; + const scalar_t *data_col_ptr = data_col + deformable_group_index * channel_per_deformable_group * batch_size * width_col * height_col; + const scalar_t *data_im_ptr = data_im + (b * deformable_group + deformable_group_index) * channel_per_deformable_group / kernel_h / kernel_w * height * width; + const scalar_t *data_offset_ptr = data_offset + (b * deformable_group + deformable_group_index) * 2 * kernel_h * kernel_w * height_col * width_col; + const scalar_t *data_mask_ptr = data_mask + (b * deformable_group + deformable_group_index) * kernel_h * kernel_w * height_col * width_col; + + const int offset_c = c - deformable_group_index * 2 * kernel_h * kernel_w; + + for (int col_c = (offset_c / 2); col_c < channel_per_deformable_group; col_c += col_step) + { + const int col_pos = (((col_c * batch_size + b) * height_col) + h) * width_col + w; + const int bp_dir = offset_c % 2; + + int j = (col_pos / width_col / height_col / batch_size) % kernel_w; + int i = (col_pos / width_col / height_col / batch_size / kernel_w) % kernel_h; + int w_out = col_pos % width_col; + int h_out = (col_pos / width_col) % height_col; + int w_in = w_out * stride_w - pad_w; + int h_in = h_out * stride_h - pad_h; + const int data_offset_h_ptr = (((2 * (i * kernel_w + j)) * height_col + h_out) * width_col + w_out); + const int data_offset_w_ptr = (((2 * (i * kernel_w + j) + 1) * height_col + h_out) * width_col + w_out); + const int data_mask_hw_ptr = (((i * kernel_w + j) * height_col + h_out) * width_col + w_out); + const scalar_t offset_h = data_offset_ptr[data_offset_h_ptr]; + const scalar_t offset_w = data_offset_ptr[data_offset_w_ptr]; + const scalar_t mask = data_mask_ptr[data_mask_hw_ptr]; + scalar_t inv_h = h_in + i * dilation_h + offset_h; + scalar_t inv_w = w_in + j * dilation_w + offset_w; + if (inv_h <= -1 || inv_w <= -1 || inv_h >= height || inv_w >= width) + { + inv_h = inv_w = -2; + } + else + { + mval += data_col_ptr[col_pos] * dmcn_im2col_bilinear(data_im_ptr + cnt * height * width, width, height, width, inv_h, inv_w); + } + const scalar_t weight = dmcn_get_coordinate_weight( + inv_h, inv_w, + height, width, data_im_ptr + cnt * height * width, width, bp_dir); + val += weight * data_col_ptr[col_pos] * mask; + cnt += 1; + } + // KERNEL_ASSIGN(grad_offset[index], offset_req, val); + grad_offset[index] = val; + if (offset_c % 2 == 0) + // KERNEL_ASSIGN(grad_mask[(((b * deformable_group + deformable_group_index) * kernel_h * kernel_w + offset_c / 2) * height_col + h) * width_col + w], mask_req, mval); + grad_mask[(((b * deformable_group + deformable_group_index) * kernel_h * kernel_w + offset_c / 2) * height_col + h) * width_col + w] = mval; + } +} + +void modulated_deformable_im2col_cuda( + const at::Tensor data_im, const at::Tensor data_offset, const at::Tensor data_mask, + const int batch_size, const int channels, const int height_im, const int width_im, + const int height_col, const int width_col, const int kernel_h, const int kenerl_w, + const int pad_h, const int pad_w, const int stride_h, const int stride_w, + const int dilation_h, const int dilation_w, + const int deformable_group, at::Tensor data_col) +{ + // num_axes should be smaller than block size + const int channel_per_deformable_group = channels / deformable_group; + const int num_kernels = channels * batch_size * height_col * width_col; + + AT_DISPATCH_FLOATING_TYPES_AND_HALF( + data_im.scalar_type(), "modulated_deformable_im2col_gpu", ([&] { + const scalar_t *data_im_ = data_im.data_ptr(); + const scalar_t *data_offset_ = data_offset.data_ptr(); + const scalar_t *data_mask_ = data_mask.data_ptr(); + scalar_t *data_col_ = data_col.data_ptr(); + + modulated_deformable_im2col_gpu_kernel<<>>( + num_kernels, data_im_, data_offset_, data_mask_, height_im, width_im, kernel_h, kenerl_w, + pad_h, pad_w, stride_h, stride_w, dilation_h, dilation_w, channel_per_deformable_group, + batch_size, channels, deformable_group, height_col, width_col, data_col_); + })); + + cudaError_t err = cudaGetLastError(); + if (err != cudaSuccess) + { + printf("error in modulated_deformable_im2col_cuda: %s\n", cudaGetErrorString(err)); + } +} + +void modulated_deformable_col2im_cuda( + const at::Tensor data_col, const at::Tensor data_offset, const at::Tensor data_mask, + const int batch_size, const int channels, const int height_im, const int width_im, + const int height_col, const int width_col, const int kernel_h, const int kernel_w, + const int pad_h, const int pad_w, const int stride_h, const int stride_w, + const int dilation_h, const int dilation_w, + const int deformable_group, at::Tensor grad_im) +{ + + const int channel_per_deformable_group = channels / deformable_group; + const int num_kernels = channels * kernel_h * kernel_w * batch_size * height_col * width_col; + + AT_DISPATCH_FLOATING_TYPES_AND_HALF( + data_col.scalar_type(), "modulated_deformable_col2im_gpu", ([&] { + const scalar_t *data_col_ = data_col.data_ptr(); + const scalar_t *data_offset_ = data_offset.data_ptr(); + const scalar_t *data_mask_ = data_mask.data_ptr(); + scalar_t *grad_im_ = grad_im.data_ptr(); + + modulated_deformable_col2im_gpu_kernel<<>>( + num_kernels, data_col_, data_offset_, data_mask_, channels, height_im, width_im, + kernel_h, kernel_w, pad_h, pad_w, stride_h, stride_w, + dilation_h, dilation_w, channel_per_deformable_group, + batch_size, deformable_group, height_col, width_col, grad_im_); + })); + + cudaError_t err = cudaGetLastError(); + if (err != cudaSuccess) + { + printf("error in modulated_deformable_col2im_cuda: %s\n", cudaGetErrorString(err)); + } +} + +void modulated_deformable_col2im_coord_cuda( + const at::Tensor data_col, const at::Tensor data_im, const at::Tensor data_offset, const at::Tensor data_mask, + const int batch_size, const int channels, const int height_im, const int width_im, + const int height_col, const int width_col, const int kernel_h, const int kernel_w, + const int pad_h, const int pad_w, const int stride_h, const int stride_w, + const int dilation_h, const int dilation_w, + const int deformable_group, + at::Tensor grad_offset, at::Tensor grad_mask) +{ + const int num_kernels = batch_size * height_col * width_col * 2 * kernel_h * kernel_w * deformable_group; + const int channel_per_deformable_group = channels * kernel_h * kernel_w / deformable_group; + + AT_DISPATCH_FLOATING_TYPES_AND_HALF( + data_col.scalar_type(), "modulated_deformable_col2im_coord_gpu", ([&] { + const scalar_t *data_col_ = data_col.data_ptr(); + const scalar_t *data_im_ = data_im.data_ptr(); + const scalar_t *data_offset_ = data_offset.data_ptr(); + const scalar_t *data_mask_ = data_mask.data_ptr(); + scalar_t *grad_offset_ = grad_offset.data_ptr(); + scalar_t *grad_mask_ = grad_mask.data_ptr(); + + modulated_deformable_col2im_coord_gpu_kernel<<>>( + num_kernels, data_col_, data_im_, data_offset_, data_mask_, channels, height_im, width_im, + kernel_h, kernel_w, pad_h, pad_w, stride_h, stride_w, + dilation_h, dilation_w, channel_per_deformable_group, + batch_size, 2 * kernel_h * kernel_w * deformable_group, deformable_group, height_col, width_col, + grad_offset_, grad_mask_); + })); + cudaError_t err = cudaGetLastError(); + if (err != cudaSuccess) + { + printf("error in modulated_deformable_col2im_coord_cuda: %s\n", cudaGetErrorString(err)); + } +} diff --git a/IIR-Lab/models/dcn/src/deform_conv_ext.cpp b/IIR-Lab/models/dcn/src/deform_conv_ext.cpp new file mode 100644 index 0000000000000000000000000000000000000000..41c6df6f721bd95a525fd6a03dd9882e863de042 --- /dev/null +++ b/IIR-Lab/models/dcn/src/deform_conv_ext.cpp @@ -0,0 +1,164 @@ +// modify from +// https://github.com/chengdazhi/Deformable-Convolution-V2-PyTorch/blob/mmdetection/mmdet/ops/dcn/src/deform_conv_cuda.c + +#include +#include + +#include +#include + +#define WITH_CUDA // always use cuda +#ifdef WITH_CUDA +int deform_conv_forward_cuda(at::Tensor input, at::Tensor weight, + at::Tensor offset, at::Tensor output, + at::Tensor columns, at::Tensor ones, int kW, + int kH, int dW, int dH, int padW, int padH, + int dilationW, int dilationH, int group, + int deformable_group, int im2col_step); + +int deform_conv_backward_input_cuda(at::Tensor input, at::Tensor offset, + at::Tensor gradOutput, at::Tensor gradInput, + at::Tensor gradOffset, at::Tensor weight, + at::Tensor columns, int kW, int kH, int dW, + int dH, int padW, int padH, int dilationW, + int dilationH, int group, + int deformable_group, int im2col_step); + +int deform_conv_backward_parameters_cuda( + at::Tensor input, at::Tensor offset, at::Tensor gradOutput, + at::Tensor gradWeight, // at::Tensor gradBias, + at::Tensor columns, at::Tensor ones, int kW, int kH, int dW, int dH, + int padW, int padH, int dilationW, int dilationH, int group, + int deformable_group, float scale, int im2col_step); + +void modulated_deform_conv_cuda_forward( + at::Tensor input, at::Tensor weight, at::Tensor bias, at::Tensor ones, + at::Tensor offset, at::Tensor mask, at::Tensor output, at::Tensor columns, + int kernel_h, int kernel_w, const int stride_h, const int stride_w, + const int pad_h, const int pad_w, const int dilation_h, + const int dilation_w, const int group, const int deformable_group, + const bool with_bias); + +void modulated_deform_conv_cuda_backward( + at::Tensor input, at::Tensor weight, at::Tensor bias, at::Tensor ones, + at::Tensor offset, at::Tensor mask, at::Tensor columns, + at::Tensor grad_input, at::Tensor grad_weight, at::Tensor grad_bias, + at::Tensor grad_offset, at::Tensor grad_mask, at::Tensor grad_output, + int kernel_h, int kernel_w, int stride_h, int stride_w, int pad_h, + int pad_w, int dilation_h, int dilation_w, int group, int deformable_group, + const bool with_bias); +#endif + +int deform_conv_forward(at::Tensor input, at::Tensor weight, + at::Tensor offset, at::Tensor output, + at::Tensor columns, at::Tensor ones, int kW, + int kH, int dW, int dH, int padW, int padH, + int dilationW, int dilationH, int group, + int deformable_group, int im2col_step) { + if (input.device().is_cuda()) { +#ifdef WITH_CUDA + return deform_conv_forward_cuda(input, weight, offset, output, columns, + ones, kW, kH, dW, dH, padW, padH, dilationW, dilationH, group, + deformable_group, im2col_step); +#else + AT_ERROR("deform conv is not compiled with GPU support"); +#endif + } + AT_ERROR("deform conv is not implemented on CPU"); +} + +int deform_conv_backward_input(at::Tensor input, at::Tensor offset, + at::Tensor gradOutput, at::Tensor gradInput, + at::Tensor gradOffset, at::Tensor weight, + at::Tensor columns, int kW, int kH, int dW, + int dH, int padW, int padH, int dilationW, + int dilationH, int group, + int deformable_group, int im2col_step) { + if (input.device().is_cuda()) { +#ifdef WITH_CUDA + return deform_conv_backward_input_cuda(input, offset, gradOutput, + gradInput, gradOffset, weight, columns, kW, kH, dW, dH, padW, padH, + dilationW, dilationH, group, deformable_group, im2col_step); +#else + AT_ERROR("deform conv is not compiled with GPU support"); +#endif + } + AT_ERROR("deform conv is not implemented on CPU"); +} + +int deform_conv_backward_parameters( + at::Tensor input, at::Tensor offset, at::Tensor gradOutput, + at::Tensor gradWeight, // at::Tensor gradBias, + at::Tensor columns, at::Tensor ones, int kW, int kH, int dW, int dH, + int padW, int padH, int dilationW, int dilationH, int group, + int deformable_group, float scale, int im2col_step) { + if (input.device().is_cuda()) { +#ifdef WITH_CUDA + return deform_conv_backward_parameters_cuda(input, offset, gradOutput, + gradWeight, columns, ones, kW, kH, dW, dH, padW, padH, dilationW, + dilationH, group, deformable_group, scale, im2col_step); +#else + AT_ERROR("deform conv is not compiled with GPU support"); +#endif + } + AT_ERROR("deform conv is not implemented on CPU"); +} + +void modulated_deform_conv_forward( + at::Tensor input, at::Tensor weight, at::Tensor bias, at::Tensor ones, + at::Tensor offset, at::Tensor mask, at::Tensor output, at::Tensor columns, + int kernel_h, int kernel_w, const int stride_h, const int stride_w, + const int pad_h, const int pad_w, const int dilation_h, + const int dilation_w, const int group, const int deformable_group, + const bool with_bias) { + if (input.device().is_cuda()) { +#ifdef WITH_CUDA + return modulated_deform_conv_cuda_forward(input, weight, bias, ones, + offset, mask, output, columns, kernel_h, kernel_w, stride_h, + stride_w, pad_h, pad_w, dilation_h, dilation_w, group, + deformable_group, with_bias); +#else + AT_ERROR("modulated deform conv is not compiled with GPU support"); +#endif + } + AT_ERROR("modulated deform conv is not implemented on CPU"); +} + +void modulated_deform_conv_backward( + at::Tensor input, at::Tensor weight, at::Tensor bias, at::Tensor ones, + at::Tensor offset, at::Tensor mask, at::Tensor columns, + at::Tensor grad_input, at::Tensor grad_weight, at::Tensor grad_bias, + at::Tensor grad_offset, at::Tensor grad_mask, at::Tensor grad_output, + int kernel_h, int kernel_w, int stride_h, int stride_w, int pad_h, + int pad_w, int dilation_h, int dilation_w, int group, int deformable_group, + const bool with_bias) { + if (input.device().is_cuda()) { +#ifdef WITH_CUDA + return modulated_deform_conv_cuda_backward(input, weight, bias, ones, + offset, mask, columns, grad_input, grad_weight, grad_bias, grad_offset, + grad_mask, grad_output, kernel_h, kernel_w, stride_h, stride_w, + pad_h, pad_w, dilation_h, dilation_w, group, deformable_group, + with_bias); +#else + AT_ERROR("modulated deform conv is not compiled with GPU support"); +#endif + } + AT_ERROR("modulated deform conv is not implemented on CPU"); +} + + +PYBIND11_MODULE(TORCH_EXTENSION_NAME, m) { + m.def("deform_conv_forward", &deform_conv_forward, + "deform forward"); + m.def("deform_conv_backward_input", &deform_conv_backward_input, + "deform_conv_backward_input"); + m.def("deform_conv_backward_parameters", + &deform_conv_backward_parameters, + "deform_conv_backward_parameters"); + m.def("modulated_deform_conv_forward", + &modulated_deform_conv_forward, + "modulated deform conv forward"); + m.def("modulated_deform_conv_backward", + &modulated_deform_conv_backward, + "modulated deform conv backward"); +} diff --git a/IIR-Lab/models/utils.py b/IIR-Lab/models/utils.py new file mode 100644 index 0000000000000000000000000000000000000000..5dc6140a982a46b4d1687ee0b21c4ca14d7a8d99 --- /dev/null +++ b/IIR-Lab/models/utils.py @@ -0,0 +1,186 @@ +import torch +import numpy as np +import torch.nn.functional as F +import cv2 +def padding_4x(seq_noise): + sh_im = seq_noise.size() + expanded_h = sh_im[-2]%16 + + if expanded_h: + expanded_h = 16-expanded_h + expanded_w = sh_im[-1]%16 + if expanded_w: + expanded_w = 16-expanded_w + + padexp = (0, expanded_w, 0, expanded_h) + seq_noise = F.pad(input=seq_noise, pad=padexp, mode='reflect') + return seq_noise, expanded_h, expanded_w + +def depadding(seq_denoise,expanded_h, expanded_w): + if expanded_h: + seq_denoise = seq_denoise[:, :, :-expanded_h, :] + if expanded_w: + seq_denoise = seq_denoise[:, :, :, :-expanded_w] + return seq_denoise +def chunkV3(net, input_data, option, patch_h = 516, patch_w = 516, patch_h_overlap = 128, patch_w_overlap = 128): + #input_data [1,6,4,1500, 2000] + + # H = input_data.shape[3] + # W = input_data.shape[4] + + shape_list = input_data.shape + + B, C, H, W = shape_list[0], shape_list[1], shape_list[2], shape_list[3] # 1,4,1500,2000 + + test_result = torch.zeros_like(input_data).cuda() # 和input的shape一样 + + # t0 = time.perf_counter() + h_index = 1 + while (patch_h*h_index-patch_h_overlap*(h_index-1)) < H: + + test_horizontal_result = torch.zeros((B,C,patch_h,W)).cuda() #和input的shape一样 patch_h不一样 + + h_begin = patch_h*(h_index-1)-patch_h_overlap*(h_index-1) + h_end = patch_h*h_index-patch_h_overlap*(h_index-1) + w_index = 1 + while (patch_w*w_index-patch_w_overlap*(w_index-1)) < W: + w_begin = patch_w*(w_index-1)-patch_w_overlap*(w_index-1) + w_end = patch_w*w_index-patch_w_overlap*(w_index-1) + test_patch = input_data[...,h_begin:h_end,w_begin:w_end] + + with torch.no_grad(): + test_patch_result = net(test_patch).detach().cuda() + + if w_index == 1: + test_horizontal_result[...,w_begin:w_end] = test_patch_result + else: + for i in range(patch_w_overlap): + test_horizontal_result[...,w_begin+i] = test_horizontal_result[...,w_begin+i]*(patch_w_overlap-1-i)/(patch_w_overlap-1)+test_patch_result[...,i]*i/(patch_w_overlap-1) + test_horizontal_result[...,w_begin+patch_w_overlap:w_end] = test_patch_result[...,patch_w_overlap:] + w_index += 1 + + test_patch = input_data[...,h_begin:h_end,-patch_w:] + + with torch.no_grad(): + test_patch_result = net(test_patch).detach().cuda() + last_range = w_end-(W-patch_w) + + for i in range(last_range): + test_horizontal_result[...,W-patch_w+i] = test_horizontal_result[...,W-patch_w+i]*(last_range-1-i)/(last_range-1)+test_patch_result[...,i]*i/(last_range-1) + test_horizontal_result[...,w_end:] = test_patch_result[...,last_range:] + + if h_index == 1: + test_result[...,h_begin:h_end,:] = test_horizontal_result + else: + for i in range(patch_h_overlap): + test_result[...,h_begin+i,:] = test_result[...,h_begin+i,:]*(patch_h_overlap-1-i)/(patch_h_overlap-1)+test_horizontal_result[...,i,:]*i/(patch_h_overlap-1) + test_result[...,h_begin+patch_h_overlap:h_end,:] = test_horizontal_result[...,patch_h_overlap:,:] + h_index += 1 + + + test_horizontal_result = torch.zeros((B,C,patch_h,W)).cuda() #和input的shape一样 patch_h不一样 + + w_index = 1 + while (patch_w*w_index-patch_w_overlap*(w_index-1)) < W: + w_begin = patch_w*(w_index-1)-patch_w_overlap*(w_index-1) + w_end = patch_w*w_index-patch_w_overlap*(w_index-1) + test_patch = input_data[...,-patch_h:,w_begin:w_end] + + with torch.no_grad(): + test_patch_result = net(test_patch).detach().cuda() + + if w_index == 1: + test_horizontal_result[...,w_begin:w_end] = test_patch_result + else: + for i in range(patch_w_overlap): + test_horizontal_result[...,w_begin+i] = test_horizontal_result[...,w_begin+i]*(patch_w_overlap-1-i)/(patch_w_overlap-1)+test_patch_result[...,i]*i/(patch_w_overlap-1) + test_horizontal_result[...,w_begin+patch_w_overlap:w_end] = test_patch_result[...,patch_w_overlap:] + w_index += 1 + + test_patch = input_data[...,-patch_h:,-patch_w:] + + with torch.no_grad(): + test_patch_result = net(test_patch).detach().cuda() + last_range = w_end-(W-patch_w) + for i in range(last_range): + test_horizontal_result[...,W-patch_w+i] = test_horizontal_result[...,W-patch_w+i]*(last_range-1-i)/(last_range-1)+test_patch_result[...,i]*i/(last_range-1) + test_horizontal_result[...,w_end:] = test_patch_result[...,last_range:] + + last_last_range = h_end-(H-patch_h) + for i in range(last_last_range): + test_result[...,H-patch_w+i,:] = test_result[...,H-patch_w+i,:]*(last_last_range-1-i)/(last_last_range-1)+test_horizontal_result[...,i,:]*i/(last_last_range-1) + test_result[...,h_end:,:] = test_horizontal_result[...,last_last_range:,:] + + del test_horizontal_result, test_patch, input_data, test_patch_result + return test_result + + +def calculate_psnr(img, img2, input_order='HWC'): + + + assert img.shape == img2.shape, (f'Image shapes are different: {img.shape}, {img2.shape}.') + if input_order not in ['HWC', 'CHW']: + raise ValueError(f'Wrong input_order {input_order}. Supported input_orders are "HWC" and "CHW"') + + img = img.transpose(1, 2, 0) + img2 = img2.transpose(1, 2, 0) + + + img = img.astype(np.float64) + img2 = img2.astype(np.float64) + + mse = np.mean((img - img2)**2) + if mse == 0: + return float('inf') + return 10. * np.log10(1. * 1. / mse) + + +def calculate_ssim(img, img2, input_order='HWC'): + + + assert img.shape == img2.shape, (f'Image shapes are different: {img.shape}, {img2.shape}.') + if input_order not in ['HWC', 'CHW']: + raise ValueError(f'Wrong input_order {input_order}. Supported input_orders are "HWC" and "CHW"') + + + img = img.transpose(1, 2, 0) + img2 = img2.transpose(1, 2, 0) + + + img = img.astype(np.float64) + img2 = img2.astype(np.float64) + + ssims = [] + for i in range(img.shape[2]): + ssims.append(_ssim(img[..., i], img2[..., i])) + return np.array(ssims).mean() + +def _ssim(img, img2): + """Calculate SSIM (structural similarity) for one channel images. + + It is called by func:`calculate_ssim`. + + Args: + img (ndarray): Images with range [0, 255] with order 'HWC'. + img2 (ndarray): Images with range [0, 255] with order 'HWC'. + + Returns: + float: SSIM result. + """ + + c1 = (0.01 * 1)**2 + c2 = (0.03 * 1)**2 + kernel = cv2.getGaussianKernel(11, 1.5) + window = np.outer(kernel, kernel.transpose()) + + mu1 = cv2.filter2D(img, -1, window)[5:-5, 5:-5] # valid mode for window size 11 + mu2 = cv2.filter2D(img2, -1, window)[5:-5, 5:-5] + mu1_sq = mu1**2 + mu2_sq = mu2**2 + mu1_mu2 = mu1 * mu2 + sigma1_sq = cv2.filter2D(img**2, -1, window)[5:-5, 5:-5] - mu1_sq + sigma2_sq = cv2.filter2D(img2**2, -1, window)[5:-5, 5:-5] - mu2_sq + sigma12 = cv2.filter2D(img * img2, -1, window)[5:-5, 5:-5] - mu1_mu2 + + ssim_map = ((2 * mu1_mu2 + c1) * (2 * sigma12 + c2)) / ((mu1_sq + mu2_sq + c1) * (sigma1_sq + sigma2_sq + c2)) + return ssim_map.mean() \ No newline at end of file diff --git a/IIR-Lab/requirements.txt b/IIR-Lab/requirements.txt new file mode 100644 index 0000000000000000000000000000000000000000..ffec907057be1f60e43558610dbe12c77e567f51 --- /dev/null +++ b/IIR-Lab/requirements.txt @@ -0,0 +1,141 @@ +absl-py==2.1.0 +addict==2.4.0 +asttokens==2.4.1 +attrs==23.2.0 +backcall==0.2.0 +beautifulsoup4==4.12.3 +bleach==6.1.0 +bm3d==4.0.1 +bm4d==4.2.3 +cachetools==5.3.2 +certifi==2023.11.17 +charset-normalizer==3.3.2 +comm==0.2.1 +contourpy==1.1.1 +cycler==0.12.1 +debugpy==1.8.1 +decorator==5.1.1 +defusedxml==0.7.1 +docopt==0.6.2 +einops==0.7.0 +exceptiongroup==1.2.0 +executing==2.0.1 +ExifRead==3.0.0 +fastjsonschema==2.19.1 +fastrlock==0.8.2 +filelock==3.13.1 +fonttools==4.47.2 +fsspec==2023.12.2 +future==0.18.3 +fvcore==0.1.5.post20221221 +gitdb==4.0.11 +GitPython==3.1.41 +google-auth==2.27.0 +google-auth-oauthlib==1.2.0 +grpcio==1.60.0 +h5py==3.10.0 +hdf5storage==0.1.19 +huggingface-hub==0.20.2 +idna==3.6 +imageio==2.33.1 +importlib-metadata==7.0.1 +importlib-resources==6.1.1 +iopath==0.1.10 +ipykernel==6.29.2 +ipython==8.12.3 +jedi==0.19.1 +Jinja2==3.1.3 +joblib==1.3.2 +jsonschema==4.21.1 +jsonschema-specifications==2023.12.1 +jupyter_client==8.6.0 +jupyter_core==5.7.1 +jupyterlab_pygments==0.3.0 +kiwisolver==1.4.5 +kornia==0.7.1 +lazy_loader==0.3 +lmdb==1.4.1 +lpips==0.1.4 +Markdown==3.5.2 +MarkupSafe==2.1.3 +matplotlib==3.7.2 +matplotlib-inline==0.1.6 +mistune==3.0.2 +mpmath==1.3.0 +natsort==8.4.0 +nbclient==0.9.0 +nbconvert==7.16.1 +nbformat==5.9.2 +nest-asyncio==1.6.0 +networkx==3.1 +numpy==1.24.4 +oauthlib==3.2.2 +opencv-python==4.9.0.80 +packaging==23.2 +pandas==2.0.3 +pandocfilters==1.5.1 +parso==0.8.3 +pexpect==4.9.0 +pickleshare==0.7.5 +pillow==10.2.0 +pipreqs==0.5.0 +platformdirs==4.1.0 +portalocker==2.8.2 +prompt-toolkit==3.0.43 +protobuf==4.23.4 +psutil==5.9.8 +ptflops==0.7.2.2 +ptyprocess==0.7.0 +pure-eval==0.2.2 +pyasn1==0.5.1 +pyasn1-modules==0.3.0 +Pygments==2.17.2 +pyparsing==3.0.0 +pypng==0.20220715.0 +python-dateutil==2.8.2 +pytorch-msssim==1.0.0 +pytz==2024.1 +PyWavelets==1.4.1 +PyYAML==6.0.1 +pyzmq==25.1.2 +rawpy==0.19.0 +referencing==0.33.0 +requests==2.31.0 +requests-oauthlib==1.3.1 +rpds-py==0.18.0 +rsa==4.9 +safetensors==0.4.1 +scikit-image==0.21.0 +scikit-learn==1.3.2 +scipy==1.11.4 +seaborn==0.13.2 +six==1.16.0 +smmap==5.0.1 +soupsieve==2.5 +stack-data==0.6.3 +sympy==1.12 +tabulate==0.9.0 +tensorboard==2.15.1 +tensorboard-data-server==0.7.2 +termcolor==2.4.0 +tf_keras-nightly==2.16.0.dev2024011811 +thop==0.1.1.post2209072238 +threadpoolctl==3.2.0 +tifffile==2023.12.9 +timm==0.9.12 +tinycss2==1.2.1 +tomli==2.0.1 +tornado==6.4 +tqdm==4.66.1 +traitlets==5.14.1 +triton==2.1.0 +typing_extensions==4.9.0 +tzdata==2023.4 +urllib3==2.1.0 +wcwidth==0.2.13 +webencodings==0.5.1 +Werkzeug==3.0.1 +yacs==0.1.8 +yapf==0.40.2 +yarg==0.1.9 +zipp==3.17.0 diff --git a/IIR-Lab/run.sh b/IIR-Lab/run.sh new file mode 100644 index 0000000000000000000000000000000000000000..4beafc3264e1dfa2cd0331610d2a2ab8b08fc386 --- /dev/null +++ b/IIR-Lab/run.sh @@ -0,0 +1,3 @@ +#! /bin/bash +cd /nightimage +python final_test.py --chunk diff --git a/IIR-Lab/utils.py b/IIR-Lab/utils.py new file mode 100644 index 0000000000000000000000000000000000000000..bd8d259b1d584a527b84a6397c67c78974674101 --- /dev/null +++ b/IIR-Lab/utils.py @@ -0,0 +1,207 @@ +import torch +import numpy as np +import torch.nn.functional as F +import cv2 +def padding_4x(seq_noise): + sh_im = seq_noise.size() + expanded_h = sh_im[-2]%16 + + if expanded_h: + expanded_h = 16-expanded_h + expanded_w = sh_im[-1]%16 + if expanded_w: + expanded_w = 16-expanded_w + + padexp = (0, expanded_w, 0, expanded_h) + seq_noise = F.pad(input=seq_noise, pad=padexp, mode='reflect') + return seq_noise, expanded_h, expanded_w + +def depadding(seq_denoise,expanded_h, expanded_w): + if expanded_h: + seq_denoise = seq_denoise[:, :, :-expanded_h, :] + if expanded_w: + seq_denoise = seq_denoise[:, :, :, :-expanded_w] + return seq_denoise +def chunkV3(net, input_data, option, patch_h = 516, patch_w = 516, patch_h_overlap = 16, patch_w_overlap = 16): + #input_data [1,6,4,1500, 2000] + + # H = input_data.shape[3] + # W = input_data.shape[4] + + shape_list = input_data.shape + + if option == 'image': + B, C, H, W = shape_list[0], shape_list[1], shape_list[2], shape_list[3] # 1,4,1500,2000 + if option == 'RViDeformer': + B, F, C, H, W = shape_list[0], shape_list[1], shape_list[2], shape_list[3], shape_list[4] # 1,6, 4,1500,2000 + if option == 'three2one': + B, FC , H, W = shape_list[0], shape_list[1], shape_list[2], shape_list[3] # 1,12,1500,2000 + + if option == 'image': + test_result = torch.zeros_like(input_data).cpu() # 和input的shape一样 + if option == 'RViDeformer': + test_result = torch.zeros_like(input_data).cpu() # 和input的shape一样 + if option == 'three2one': + test_result = torch.zeros((B, 4 , H, W)).cpu() # 和input的shape一样 + + + # t0 = time.perf_counter() + h_index = 1 + while (patch_h*h_index-patch_h_overlap*(h_index-1)) < H: + if option == 'image': + test_horizontal_result = torch.zeros((B,C,patch_h,W)).cpu() #和input的shape一样 patch_h不一样 + if option == 'RViDeformer': + test_horizontal_result = torch.zeros((B, F, C, patch_h, W)).cpu() + if option == 'three2one': + test_horizontal_result = torch.zeros((B, 4, patch_h, W)).cpu() + + h_begin = patch_h*(h_index-1)-patch_h_overlap*(h_index-1) + h_end = patch_h*h_index-patch_h_overlap*(h_index-1) + w_index = 1 + while (patch_w*w_index-patch_w_overlap*(w_index-1)) < W: + w_begin = patch_w*(w_index-1)-patch_w_overlap*(w_index-1) + w_end = patch_w*w_index-patch_w_overlap*(w_index-1) + test_patch = input_data[...,h_begin:h_end,w_begin:w_end] + + with torch.no_grad(): + test_patch_result = net(test_patch).detach().cpu() + + if w_index == 1: + test_horizontal_result[...,w_begin:w_end] = test_patch_result + else: + for i in range(patch_w_overlap): + test_horizontal_result[...,w_begin+i] = test_horizontal_result[...,w_begin+i]*(patch_w_overlap-1-i)/(patch_w_overlap-1)+test_patch_result[...,i]*i/(patch_w_overlap-1) + test_horizontal_result[...,w_begin+patch_w_overlap:w_end] = test_patch_result[...,patch_w_overlap:] + w_index += 1 + + test_patch = input_data[...,h_begin:h_end,-patch_w:] + + with torch.no_grad(): + test_patch_result = net(test_patch).detach().cpu() + last_range = w_end-(W-patch_w) + + for i in range(last_range): + test_horizontal_result[...,W-patch_w+i] = test_horizontal_result[...,W-patch_w+i]*(last_range-1-i)/(last_range-1)+test_patch_result[...,i]*i/(last_range-1) + test_horizontal_result[...,w_end:] = test_patch_result[...,last_range:] + + if h_index == 1: + test_result[...,h_begin:h_end,:] = test_horizontal_result + else: + for i in range(patch_h_overlap): + test_result[...,h_begin+i,:] = test_result[...,h_begin+i,:]*(patch_h_overlap-1-i)/(patch_h_overlap-1)+test_horizontal_result[...,i,:]*i/(patch_h_overlap-1) + test_result[...,h_begin+patch_h_overlap:h_end,:] = test_horizontal_result[...,patch_h_overlap:,:] + h_index += 1 + + if option == 'image': + test_horizontal_result = torch.zeros((B,C,patch_h,W)).cpu() #和input的shape一样 patch_h不一样 + if option == 'RViDeformer': + test_horizontal_result = torch.zeros((B, F, C, patch_h, W)).cpu() + if option == 'three2one': + test_horizontal_result = torch.zeros((B, 4, patch_h, W)).cpu() + + w_index = 1 + while (patch_w*w_index-patch_w_overlap*(w_index-1)) < W: + w_begin = patch_w*(w_index-1)-patch_w_overlap*(w_index-1) + w_end = patch_w*w_index-patch_w_overlap*(w_index-1) + test_patch = input_data[...,-patch_h:,w_begin:w_end] + + with torch.no_grad(): + test_patch_result = net(test_patch).detach().cpu() + + if w_index == 1: + test_horizontal_result[...,w_begin:w_end] = test_patch_result + else: + for i in range(patch_w_overlap): + test_horizontal_result[...,w_begin+i] = test_horizontal_result[...,w_begin+i]*(patch_w_overlap-1-i)/(patch_w_overlap-1)+test_patch_result[...,i]*i/(patch_w_overlap-1) + test_horizontal_result[...,w_begin+patch_w_overlap:w_end] = test_patch_result[...,patch_w_overlap:] + w_index += 1 + + test_patch = input_data[...,-patch_h:,-patch_w:] + + with torch.no_grad(): + test_patch_result = net(test_patch).detach().cpu() + last_range = w_end-(W-patch_w) + for i in range(last_range): + test_horizontal_result[...,W-patch_w+i] = test_horizontal_result[...,W-patch_w+i]*(last_range-1-i)/(last_range-1)+test_patch_result[...,i]*i/(last_range-1) + test_horizontal_result[...,w_end:] = test_patch_result[...,last_range:] + + last_last_range = h_end-(H-patch_h) + for i in range(last_last_range): + test_result[...,H-patch_w+i,:] = test_result[...,H-patch_w+i,:]*(last_last_range-1-i)/(last_last_range-1)+test_horizontal_result[...,i,:]*i/(last_last_range-1) + test_result[...,h_end:,:] = test_horizontal_result[...,last_last_range:,:] + + # t1 = time.perf_counter() + # print('Total running time: %s s' % (str(t1 - t0))) + + return test_result + + +def calculate_psnr(img, img2, input_order='HWC'): + + + assert img.shape == img2.shape, (f'Image shapes are different: {img.shape}, {img2.shape}.') + if input_order not in ['HWC', 'CHW']: + raise ValueError(f'Wrong input_order {input_order}. Supported input_orders are "HWC" and "CHW"') + + img = img.transpose(1, 2, 0) + img2 = img2.transpose(1, 2, 0) + + + img = img.astype(np.float64) + img2 = img2.astype(np.float64) + + mse = np.mean((img - img2)**2) + if mse == 0: + return float('inf') + return 10. * np.log10(1. * 1. / mse) + + +def calculate_ssim(img, img2, input_order='HWC'): + + + assert img.shape == img2.shape, (f'Image shapes are different: {img.shape}, {img2.shape}.') + if input_order not in ['HWC', 'CHW']: + raise ValueError(f'Wrong input_order {input_order}. Supported input_orders are "HWC" and "CHW"') + + + img = img.transpose(1, 2, 0) + img2 = img2.transpose(1, 2, 0) + + + img = img.astype(np.float64) + img2 = img2.astype(np.float64) + + ssims = [] + for i in range(img.shape[2]): + ssims.append(_ssim(img[..., i], img2[..., i])) + return np.array(ssims).mean() + +def _ssim(img, img2): + """Calculate SSIM (structural similarity) for one channel images. + + It is called by func:`calculate_ssim`. + + Args: + img (ndarray): Images with range [0, 255] with order 'HWC'. + img2 (ndarray): Images with range [0, 255] with order 'HWC'. + + Returns: + float: SSIM result. + """ + + c1 = (0.01 * 1)**2 + c2 = (0.03 * 1)**2 + kernel = cv2.getGaussianKernel(11, 1.5) + window = np.outer(kernel, kernel.transpose()) + + mu1 = cv2.filter2D(img, -1, window)[5:-5, 5:-5] # valid mode for window size 11 + mu2 = cv2.filter2D(img2, -1, window)[5:-5, 5:-5] + mu1_sq = mu1**2 + mu2_sq = mu2**2 + mu1_mu2 = mu1 * mu2 + sigma1_sq = cv2.filter2D(img**2, -1, window)[5:-5, 5:-5] - mu1_sq + sigma2_sq = cv2.filter2D(img2**2, -1, window)[5:-5, 5:-5] - mu2_sq + sigma12 = cv2.filter2D(img * img2, -1, window)[5:-5, 5:-5] - mu1_mu2 + + ssim_map = ((2 * mu1_mu2 + c1) * (2 * sigma12 + c2)) / ((mu1_sq + mu2_sq + c1) * (sigma1_sq + sigma2_sq + c2)) + return ssim_map.mean() \ No newline at end of file diff --git a/IIR-Lab/utils_ours/__pycache__/calculate_PSNR_SSIM.cpython-310.pyc b/IIR-Lab/utils_ours/__pycache__/calculate_PSNR_SSIM.cpython-310.pyc new file mode 100644 index 0000000000000000000000000000000000000000..e21c4492d89082e490a64f96ca42e8585e9a3322 Binary files /dev/null and b/IIR-Lab/utils_ours/__pycache__/calculate_PSNR_SSIM.cpython-310.pyc differ diff --git a/IIR-Lab/utils_ours/__pycache__/calculate_PSNR_SSIM.cpython-38.pyc b/IIR-Lab/utils_ours/__pycache__/calculate_PSNR_SSIM.cpython-38.pyc new file mode 100644 index 0000000000000000000000000000000000000000..df76eac3a07acb5f0a91673d80b350d1d8513df0 Binary files /dev/null and b/IIR-Lab/utils_ours/__pycache__/calculate_PSNR_SSIM.cpython-38.pyc differ diff --git a/IIR-Lab/utils_ours/__pycache__/calculate_PSNR_SSIM.cpython-39.pyc b/IIR-Lab/utils_ours/__pycache__/calculate_PSNR_SSIM.cpython-39.pyc new file mode 100644 index 0000000000000000000000000000000000000000..ef7aa5fbaf7f7b2d40135b460314d0f9fa7d3d1b Binary files /dev/null and b/IIR-Lab/utils_ours/__pycache__/calculate_PSNR_SSIM.cpython-39.pyc differ diff --git a/IIR-Lab/utils_ours/__pycache__/lc.cpython-310.pyc b/IIR-Lab/utils_ours/__pycache__/lc.cpython-310.pyc new file mode 100644 index 0000000000000000000000000000000000000000..708144b77f6fa7b6f7667124bcff5a19eb57ca47 Binary files /dev/null and b/IIR-Lab/utils_ours/__pycache__/lc.cpython-310.pyc differ diff --git a/IIR-Lab/utils_ours/__pycache__/psnr_ssim.cpython-310.pyc b/IIR-Lab/utils_ours/__pycache__/psnr_ssim.cpython-310.pyc new file mode 100644 index 0000000000000000000000000000000000000000..ab93e4b91ad3568a13018c7e9cea36df5d66e8af Binary files /dev/null and b/IIR-Lab/utils_ours/__pycache__/psnr_ssim.cpython-310.pyc differ diff --git a/IIR-Lab/utils_ours/__pycache__/psnr_ssim.cpython-38.pyc b/IIR-Lab/utils_ours/__pycache__/psnr_ssim.cpython-38.pyc new file mode 100644 index 0000000000000000000000000000000000000000..d670918d9e91b8ba4de8820531b7736aa351e9cc Binary files /dev/null and b/IIR-Lab/utils_ours/__pycache__/psnr_ssim.cpython-38.pyc differ diff --git a/IIR-Lab/utils_ours/__pycache__/util.cpython-310.pyc b/IIR-Lab/utils_ours/__pycache__/util.cpython-310.pyc new file mode 100644 index 0000000000000000000000000000000000000000..c4c220d5bdb45721fdc1c52b8b39d7d316364708 Binary files /dev/null and b/IIR-Lab/utils_ours/__pycache__/util.cpython-310.pyc differ diff --git a/IIR-Lab/utils_ours/__pycache__/util.cpython-311.pyc b/IIR-Lab/utils_ours/__pycache__/util.cpython-311.pyc new file mode 100644 index 0000000000000000000000000000000000000000..8cbc1d8da8578bd6766881548b71e18e021a28a3 Binary files /dev/null and b/IIR-Lab/utils_ours/__pycache__/util.cpython-311.pyc differ diff --git a/IIR-Lab/utils_ours/__pycache__/util.cpython-312.pyc b/IIR-Lab/utils_ours/__pycache__/util.cpython-312.pyc new file mode 100644 index 0000000000000000000000000000000000000000..969bf464fda6645c87c51f2fb8ebfb9fa84c87a4 Binary files /dev/null and b/IIR-Lab/utils_ours/__pycache__/util.cpython-312.pyc differ diff --git a/IIR-Lab/utils_ours/__pycache__/util.cpython-38.pyc b/IIR-Lab/utils_ours/__pycache__/util.cpython-38.pyc new file mode 100644 index 0000000000000000000000000000000000000000..1ec25a0bab3b5f98a9f799a0701b1547bbef58a0 Binary files /dev/null and b/IIR-Lab/utils_ours/__pycache__/util.cpython-38.pyc differ diff --git a/IIR-Lab/utils_ours/__pycache__/util.cpython-39.pyc b/IIR-Lab/utils_ours/__pycache__/util.cpython-39.pyc new file mode 100644 index 0000000000000000000000000000000000000000..c8132aebdf4bdf2211cc56cb16c87bd547b05cf7 Binary files /dev/null and b/IIR-Lab/utils_ours/__pycache__/util.cpython-39.pyc differ diff --git a/IIR-Lab/utils_ours/util.py b/IIR-Lab/utils_ours/util.py new file mode 100644 index 0000000000000000000000000000000000000000..8d9e9b4efa703ba887ab34c1d018a3054b4eb67a --- /dev/null +++ b/IIR-Lab/utils_ours/util.py @@ -0,0 +1,80 @@ +import os +import sys +import time +from datetime import datetime +import logging +import numpy as np +import torch +import math + +def get_timestamp(): + return datetime.now().strftime('%y%m%d-%H%M%S') + +def mkdir_and_rename(path): + if os.path.exists(path): + new_name = path + '_archived_' + get_timestamp() + print('Path already exists. Rename it to [{:s}]'.format(new_name)) + os.rename(path, new_name) + os.makedirs(path) + + +def scandir(dir_path, suffix=None, recursive=False, full_path=False): + """Scan a directory to find the interested files. + Args: + dir_path (str): Path of the directory. + suffix (str | tuple(str), optional): File suffix that we are + interested in. Default: None. + recursive (bool, optional): If set to True, recursively scan the + directory. Default: False. + full_path (bool, optional): If set to True, include the dir_path. + Default: False. + Returns: + A generator for all the interested files with relative pathes. + """ + + if (suffix is not None) and not isinstance(suffix, (str, tuple)): + raise TypeError('"suffix" must be a string or tuple of strings') + + root = dir_path + + def _scandir(dir_path, suffix, recursive): + for entry in os.scandir(dir_path): + if not entry.name.startswith('.') and entry.is_file(): + if full_path: + return_path = entry.path + else: + return_path = os.path.relpath(entry.path, root) + + if suffix is None: + yield return_path + elif return_path.endswith(suffix): + yield return_path + else: + if recursive: + yield from _scandir( + entry.path, suffix=suffix, recursive=recursive) + else: + continue + + return _scandir(dir_path, suffix=suffix, recursive=recursive) + + +def setup_logger(log_file_path): + log_formatter = logging.Formatter("%(asctime)s [%(levelname)-5.5s] %(message)s") + root_logger = logging.getLogger() + root_logger.setLevel(logging.INFO) + + log_file_handler = logging.FileHandler(log_file_path, encoding='utf-8') + log_file_handler.setFormatter(log_formatter) + root_logger.addHandler(log_file_handler) + + log_stream_handler = logging.StreamHandler(sys.stdout) + log_stream_handler.setFormatter(log_formatter) + root_logger.addHandler(log_stream_handler) + + logging.info('Logging file is %s' % log_file_path) + + +def print_args(args): + for arg in vars(args): + logging.info(arg + ':%s'%(getattr(args, arg))) \ No newline at end of file