Artyom commited on Apr 21

Commit

bd1c686

•

1 Parent(s): 6721043

dh-aisp

Browse files

This view is limited to 50 files because it contains too many changes. See raw diff

Files changed (50) hide show

.gitattributes +3 -0
DH-AISP/1/__pycache__/awb.cpython-36.pyc +0 -0
DH-AISP/1/awb.py +184 -0
DH-AISP/1/daylight_isp_03_3_unet_sid_5/checkpoint +2 -0
DH-AISP/1/daylight_isp_03_3_unet_sid_5/model.ckpt.data-00000-of-00001 +3 -0
DH-AISP/1/daylight_isp_03_3_unet_sid_5/model.ckpt.index +0 -0
DH-AISP/1/daylight_isp_03_3_unet_sid_5/model.ckpt.meta +3 -0
DH-AISP/1/tensorflow2to1_3_unet_bining3_7.py +451 -0
DH-AISP/2/__pycache__/model_convnext2_hdr.cpython-37.pyc +0 -0
DH-AISP/2/__pycache__/myFFCResblock0.cpython-37.pyc +0 -0
DH-AISP/2/__pycache__/test_dataset_for_testing.cpython-37.pyc +0 -0
DH-AISP/2/focal_frequency_loss/__init__.py +3 -0
DH-AISP/2/focal_frequency_loss/__pycache__/__init__.cpython-37.pyc +0 -0
DH-AISP/2/focal_frequency_loss/__pycache__/focal_frequency_loss.cpython-37.pyc +0 -0
DH-AISP/2/focal_frequency_loss/focal_frequency_loss.py +114 -0
DH-AISP/2/model_convnext2_hdr.py +592 -0
DH-AISP/2/myFFCResblock0.py +60 -0
DH-AISP/2/perceptual.py +30 -0
DH-AISP/2/pytorch_msssim/__init__.py +133 -0
DH-AISP/2/pytorch_msssim/__pycache__/__init__.cpython-36.pyc +0 -0
DH-AISP/2/pytorch_msssim/__pycache__/__init__.cpython-37.pyc +0 -0
DH-AISP/2/result_low_light_hdr/checkpoint_gen.pth +3 -0
DH-AISP/2/saicinpainting/__init__.py +0 -0
DH-AISP/2/saicinpainting/__pycache__/__init__.cpython-36.pyc +0 -0
DH-AISP/2/saicinpainting/__pycache__/__init__.cpython-37.pyc +0 -0
DH-AISP/2/saicinpainting/__pycache__/utils.cpython-36.pyc +0 -0
DH-AISP/2/saicinpainting/__pycache__/utils.cpython-37.pyc +0 -0
DH-AISP/2/saicinpainting/evaluation/__init__.py +33 -0
DH-AISP/2/saicinpainting/evaluation/data.py +168 -0
DH-AISP/2/saicinpainting/evaluation/evaluator.py +220 -0
DH-AISP/2/saicinpainting/evaluation/losses/__init__.py +0 -0
DH-AISP/2/saicinpainting/evaluation/losses/base_loss.py +528 -0
DH-AISP/2/saicinpainting/evaluation/losses/fid/__init__.py +0 -0
DH-AISP/2/saicinpainting/evaluation/losses/fid/fid_score.py +328 -0
DH-AISP/2/saicinpainting/evaluation/losses/fid/inception.py +323 -0
DH-AISP/2/saicinpainting/evaluation/losses/lpips.py +891 -0
DH-AISP/2/saicinpainting/evaluation/losses/ssim.py +74 -0
DH-AISP/2/saicinpainting/evaluation/masks/README.md +27 -0
DH-AISP/2/saicinpainting/evaluation/masks/__init__.py +0 -0
DH-AISP/2/saicinpainting/evaluation/masks/countless/README.md +25 -0
DH-AISP/2/saicinpainting/evaluation/masks/countless/__init__.py +0 -0
DH-AISP/2/saicinpainting/evaluation/masks/countless/countless2d.py +529 -0
DH-AISP/2/saicinpainting/evaluation/masks/countless/countless3d.py +356 -0
DH-AISP/2/saicinpainting/evaluation/masks/countless/images/gcim.jpg +3 -0
DH-AISP/2/saicinpainting/evaluation/masks/countless/images/gray_segmentation.png +0 -0
DH-AISP/2/saicinpainting/evaluation/masks/countless/images/segmentation.png +0 -0
DH-AISP/2/saicinpainting/evaluation/masks/countless/images/sparse.png +0 -0
DH-AISP/2/saicinpainting/evaluation/masks/countless/memprof/countless2d_gcim_N_1000.png +0 -0
DH-AISP/2/saicinpainting/evaluation/masks/countless/memprof/countless2d_quick_gcim_N_1000.png +0 -0
DH-AISP/2/saicinpainting/evaluation/masks/countless/memprof/countless3d.png +0 -0

.gitattributes CHANGED Viewed

@@ -39,3 +39,6 @@ SCBC/Input/IMG_20240215_214449.png filter=lfs diff=lfs merge=lfs -text
 SCBC/Output/IMG_20240215_213330.png filter=lfs diff=lfs merge=lfs -text
 SCBC/Output/IMG_20240215_214449.png filter=lfs diff=lfs merge=lfs -text
 PolyuColor/resources/average_shading.png filter=lfs diff=lfs merge=lfs -text

 SCBC/Output/IMG_20240215_213330.png filter=lfs diff=lfs merge=lfs -text
 SCBC/Output/IMG_20240215_214449.png filter=lfs diff=lfs merge=lfs -text
 PolyuColor/resources/average_shading.png filter=lfs diff=lfs merge=lfs -text
+DH-AISP/1/daylight_isp_03_3_unet_sid_5/model.ckpt.data-00000-of-00001 filter=lfs diff=lfs merge=lfs -text
+DH-AISP/1/daylight_isp_03_3_unet_sid_5/model.ckpt.meta filter=lfs diff=lfs merge=lfs -text
+DH-AISP/2/saicinpainting/evaluation/masks/countless/images/gcim.jpg filter=lfs diff=lfs merge=lfs -text

DH-AISP/1/__pycache__/awb.cpython-36.pyc ADDED Viewed

Binary file (3.82 kB). View file

DH-AISP/1/awb.py ADDED Viewed

	@@ -0,0 +1,184 @@

+import os
+import cv2
+import numpy as np
+from glob import glob
+def dynamic(rgb):
+    rgb = rgb[:-1, :-1, :]  # 删去一行一列
+    h, w, _ = rgb.shape
+    col = 4
+    row = 3
+    h1 = h // row
+    w1 = w // col
+    r, g, b = cv2.split(rgb)
+    r_mask = r < 0.95
+    g_mask = g < 0.95
+    b_mask = b < 0.95
+    mask = r_mask * g_mask * b_mask
+    r *= mask
+    g *= mask
+    b *= mask
+    rgb = np.stack((r, g, b), axis=2)
+    y, cr, cb = cv2.split(cv2.cvtColor(rgb, cv2.COLOR_RGB2YCrCb))
+    cr -= 0.5
+    cb -= 0.5
+    mr, mb, dr, db = 0, 0, 0, 0
+    for r in range(row):
+        for c in range(col):
+            cr_1 = cr[r * h1:(r + 1) * h1, c * w1:(c + 1) * w1]
+            cb_1 = cb[r * h1:(r + 1) * h1, c * w1:(c + 1) * w1]
+            mr_1 = np.mean(cr_1)
+            mb_1 = np.mean(cb_1)
+            dr_1 = np.mean(np.abs(cr_1 - mr))
+            db_1 = np.mean(np.abs(cb_1 - mb))
+            mr += mr_1
+            mb += mb_1
+            dr += dr_1
+            db += db_1
+    mr /= col * row
+    mb /= col * row
+    dr /= col * row
+    db /= col * row
+    cb_mask = np.abs(cb - (mb + db * np.sign(mb))) < 1.5 * db
+    cr_mask = np.abs(cr - (1.5 * mr + dr * np.sign(mr))) < 1.5 * dr
+    mask = cb_mask * cr_mask
+    y_white = y * mask
+    hist_y = np.zeros(256, dtype=np.int)
+    y_white_uint8 = (y_white * 255).astype(np.int)
+    for v in range(255):
+        hist_y[v] = np.sum(y_white_uint8 == v)
+    thr_sum = 0.05 * np.sum(mask)
+    sum_v = 0
+    thr = 0
+    for v in range(255, -1, -1):
+        sum_v = sum_v + hist_y[v]
+        if sum_v > thr_sum:
+            thr = v
+            break
+    white_mask = y_white_uint8 > thr
+    cv2.imwrite(r'V:\Project\3_MEWDR\data\2nd_awb\t.png', (white_mask + 0) * 255)
+    r, g, b = cv2.split(rgb)
+    r_ave = np.sum(r[white_mask]) / np.sum(white_mask)
+    g_ave = np.sum(g[white_mask]) / np.sum(white_mask)
+    b_ave = np.sum(b[white_mask]) / np.sum(white_mask)
+    return 1 / r_ave, 1 / g_ave, 1 / b_ave
+def perf_ref(rgb, eps):
+    h, w, _ = rgb.shape
+    r, g, b = cv2.split(rgb)
+    r_mask = r < 0.95
+    g_mask = g < 0.95
+    b_mask = b < 0.95
+    mask = r_mask * g_mask * b_mask
+    r *= mask
+    g *= mask
+    b *= mask
+    rgb = np.stack((r, g, b), axis=2)
+    rgb = np.clip(rgb * 255, 0, 255).astype(np.int)
+    hist_rgb = np.zeros(255 * 3, dtype=np.int)
+    rgb_sum = np.sum(rgb, axis=2)
+    for v in range(255 * 3):
+        hist_rgb[v] = np.sum(rgb_sum == v)
+    thr_sum = eps * h * w
+    sum_v = 0
+    thr = 0
+    for v in range(255 * 3 - 1, -1, -1):
+        sum_v = sum_v + hist_rgb[v]
+        if sum_v > thr_sum:
+            thr = v
+            break
+    thr_mask = rgb_sum > thr
+    r_ave = np.sum(r[thr_mask]) / np.sum(thr_mask)
+    g_ave = np.sum(g[thr_mask]) / np.sum(thr_mask)
+    b_ave = np.sum(b[thr_mask]) / np.sum(thr_mask)
+    # k = (r_ave + g_ave + b_ave) / 3.
+    # k = 255
+    # print(k)
+    # r = np.clip(r * k / r_ave, 0, 255)
+    # g = np.clip(g * k / g_ave, 0, 255)
+    # b = np.clip(b * k / b_ave, 0, 255)
+    return 1 / r_ave, 1 / g_ave, 1 / b_ave
+def awb_v(in_image, bayer, eps):
+    assert bayer in ['GBRG', 'RGGB']
+    if bayer == 'GBRG':
+        g = in_image[0::2, 0::2]  # [0,0]
+        b = in_image[0::2, 1::2]  # [0,1]
+        r = in_image[1::2, 0::2]  # [1,0]
+    else:
+        r = in_image[0::2, 0::2]  # [0,0]
+        g = in_image[0::2, 1::2]  # [0,1]
+        b = in_image[1::2, 1::2]  # [1,1]
+    rgb = cv2.merge((r, g, b)) * 1
+    r_gain, g_gain, b_gain = perf_ref(rgb, eps)
+    return r_gain / g_gain, b_gain / g_gain
+def main():
+    path = r'V:\Project\3_MEWDR\data\2nd_raw'
+    # out_path = r'V:\Project\3_MEWDR\data\2nd_awb'
+    files = glob(os.path.join(path, '*.png'))
+    for f in files:
+        img = cv2.imread(f, cv2.CV_16UC1)
+        img = (img.astype(np.float) - 2048) / (15400 - 2048) * 4
+        g = img[0::2, 0::2]  # [0,0]
+        b = img[0::2, 1::2]  # [0,1]
+        r = img[1::2, 0::2]  # [1,0]
+        # g_ = img[1::2, 1::2]
+        rgb = cv2.merge((r, g, b))
+        # save_name = f.replace('.png', '_rgb.png').replace('2nd_raw', '2nd_awb')
+        r_gain, g_gain, b_gain = perf_ref(rgb, eps=0.1)
+        # r_gain, g_gain, b_gain = dynamic(rgb.astype(np.float32))
+        r *= r_gain / g_gain
+        b *= b_gain / g_gain
+        print(r_gain / g_gain, b_gain / g_gain)
+        out_rgb = np.clip(cv2.merge((r, g, b)) * 255, 0, 255)
+        save_name = f.replace('.png', '_awb4_dyn.png').replace('2nd_raw', '2nd_awb')
+        cv2.imwrite(save_name, cv2.cvtColor(out_rgb.astype(np.uint8), cv2.COLOR_RGB2BGR))
+        # break
+if __name__ == '__main__':
+    main()

DH-AISP/1/daylight_isp_03_3_unet_sid_5/checkpoint ADDED Viewed

	@@ -0,0 +1,2 @@


1	+ model_checkpoint_path: "model.ckpt"
2	+ all_model_checkpoint_paths: "model.ckpt"

DH-AISP/1/daylight_isp_03_3_unet_sid_5/model.ckpt.data-00000-of-00001 ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:6997bfa5624aba66e2497088cc8f379db63bac343a0a648e08f6a5840a48259f
+size 175070404

DH-AISP/1/daylight_isp_03_3_unet_sid_5/model.ckpt.index ADDED Viewed

Binary file (6.36 kB). View file

DH-AISP/1/daylight_isp_03_3_unet_sid_5/model.ckpt.meta ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:79f84947bf3a5a9e851539308b85b43ecc6a8e93ed2c7ab9adb23f0fd6796286
+size 124053471

DH-AISP/1/tensorflow2to1_3_unet_bining3_7.py ADDED Viewed

	@@ -0,0 +1,451 @@

+# uniform content loss + adaptive threshold + per_class_input + recursive G
+# improvement upon cqf37
+from __future__ import division
+import os
+import tensorflow.compat.v1 as tf
+os.environ['TF_CPP_MIN_LOG_LEVEL'] = '3'
+import tf_slim as slim
+import tensorflow as tf2
+tf2.test.is_gpu_available()
+import numpy as np
+import glob
+# import scipy.io as sio
+import cv2
+import json
+from fractions import Fraction
+import pdb
+import sys
+import argparse
+from awb import awb_v
+os.environ["CUDA_VISIBLE_DEVICES"] = "0"
+input_dir = '../data/'
+cha1 = 32
+# get train IDs
+train_fns = glob.glob(input_dir + '*.png')
+train_ids = [os.path.basename(train_fn) for train_fn in train_fns]
+result_dir = './mid/'
+checkpoint_dir = './daylight_isp_03_3_unet_sid_5/'
+if not os.path.exists(result_dir):
+    os.mkdir(result_dir)
+#run python tensorflow2to1_1214_5202x3464_01_unetpp3.py ./data/ ./result/ ./daylight_isp_03/
+# DEBUG = 0
+# if DEBUG == 1:
+#     save_freq = 2
+#     test_ids = test_ids[0:5]
+def json_read(fname, **kwargs):
+    with open(fname) as j:
+        data = json.load(j, **kwargs)
+    return data
+def fraction_from_json(json_object):
+    if 'Fraction' in json_object:
+        return Fraction(*json_object['Fraction'])
+    return json_object
+def fractions2floats(fractions):
+    floats = []
+    for fraction in fractions:
+        floats.append(float(fraction.numerator) / fraction.denominator)
+    return floats
+def tv_loss(input_, output):
+    I = tf.image.rgb_to_grayscale(input_)
+    L = tf.log(I+0.0001)
+    dx = L[:, :-1, :-1, :] - L[:, :-1, 1:, :]
+    dy = L[:, :-1, :-1, :] - L[:, 1:, :-1, :]
+    alpha = tf.constant(1.2)
+    lamda = tf.constant(1.5)
+    dx = tf.divide(lamda, tf.pow(tf.abs(dx),alpha)+ tf.constant(0.0001))
+    dy = tf.divide(lamda, tf.pow(tf.abs(dy),alpha)+ tf.constant(0.0001))
+    shape = output.get_shape()
+    x_loss = dx *((output[:, :-1, :-1, :] - output[:, :-1, 1:, :])**2)
+    y_loss = dy *((output[:, :-1, :-1, :] - output[:, 1:, :-1, :])**2)
+    tvloss = tf.reduce_mean(x_loss + y_loss)/2.0
+    return tvloss
+def lrelu(x):
+    return tf.maximum(x * 0.2, x)
+def upsample_and_concat_3(x1, x2, output_channels, in_channels, name):
+    with tf.variable_scope(name):
+        x1 = slim.conv2d(x1, output_channels, [3, 3], rate=1, activation_fn=lrelu, scope='conv_2to1')
+        deconv = tf.image.resize_images(x1, [x1.shape[1] * 2, x1.shape[2] * 2])
+        deconv_output = tf.concat([deconv, x2], 3)
+        deconv_output.set_shape([None, None, None, output_channels * 2])
+    return deconv_output
+def upsample_and_concat_h(x1, x2, output_channels, in_channels, name):
+    with tf.variable_scope(name):
+        #deconv = tf.image.resize_images(x1, [x1.shape[1].value*2, x1.shape[2].value*2])
+        pool_size = 2
+        deconv_filter = tf.Variable(tf.truncated_normal([pool_size, pool_size, output_channels, in_channels], stddev=0.02))
+        deconv = tf.nn.conv2d_transpose(x1, deconv_filter, tf.shape(x2), strides=[1, pool_size, pool_size, 1])
+        deconv_output = tf.concat([deconv, x2], 3)
+        deconv_output.set_shape([None, None, None, output_channels * 2])
+    return deconv_output
+def upsample_and_concat_h_only(x1, output_channels, in_channels, name):
+    with tf.variable_scope(name):
+        x1 = tf.image.resize_images(x1, [x1.shape[1] * 2, x1.shape[2] * 2])
+        x1.set_shape([None, None, None, output_channels])
+    return x1
+def conv_block(input, output_channels, name):
+    with tf.variable_scope(name):
+        conv = slim.conv2d(input, output_channels, [3, 3], activation_fn=lrelu, scope='conv1')
+        conv = slim.conv2d(conv, output_channels, [3, 3], activation_fn=lrelu, scope='conv2')
+    return conv
+def conv_block_up(input, output_channels, name):
+    with tf.variable_scope(name):
+        conv = slim.conv2d(input, output_channels, [1, 1], scope='conv0')
+        conv = slim.conv2d(conv, output_channels, [3, 3], activation_fn=lrelu, scope='conv1')
+        conv = slim.conv2d(conv, output_channels, [3, 3], activation_fn=lrelu, scope='conv2')
+    return conv
+def upsample_and_concat(x1, x2, output_channels, in_channels, p, name):
+    with tf.variable_scope(name):
+        pool_size = 2
+        deconv_filter = tf.Variable(tf.truncated_normal([pool_size, pool_size, output_channels, in_channels], stddev=0.02))
+        deconv_filter = tf.cast(deconv_filter, x1.dtype)
+        deconv = tf.nn.conv2d_transpose(x1, deconv_filter, tf.shape(x2[0]), strides=[1, pool_size, pool_size, 1])
+        # x2.append(deconv)
+        x2 = tf.concat(x2, axis=3)
+        deconv_output = tf.concat([x2, deconv], axis=3)
+        deconv_output.set_shape([None, None, None, output_channels * (p + 1)])
+        return deconv_output
+def network(input):
+    with tf.variable_scope("generator_h"):
+        conv1_h = slim.conv2d(input, 32, [3, 3], rate=1, activation_fn=lrelu, scope='g_conv1_1')
+        conv1_h = slim.conv2d(conv1_h, 32, [3, 3], rate=1, activation_fn=lrelu, scope='g_conv1_2')
+        pool1_h = slim.max_pool2d(conv1_h, [2, 2], padding='SAME')
+        conv2_h = slim.conv2d(pool1_h, cha1*2, [3, 3], rate=1, activation_fn=lrelu, scope='g_conv2_1')
+        conv2_h = slim.conv2d(conv2_h, cha1*2, [3, 3], rate=1, activation_fn=lrelu, scope='g_conv2_2')
+        pool2_h = slim.max_pool2d(conv2_h, [2, 2], padding='SAME')
+        conv3_h = slim.conv2d(pool2_h, cha1*4, [3, 3], rate=1, activation_fn=lrelu, scope='g_conv3_1')
+        conv3_h = slim.conv2d(conv3_h, cha1*4, [3, 3], rate=1, activation_fn=lrelu, scope='g_conv3_2')
+        pool3_h = slim.max_pool2d(conv3_h, [2, 2], padding='SAME')
+        conv4_h = slim.conv2d(pool3_h, cha1*8, [3, 3], rate=1, activation_fn=lrelu, scope='g_conv4_1')
+        conv4_h = slim.conv2d(conv4_h, cha1*8, [3, 3], rate=1, activation_fn=lrelu, scope='g_conv4_2')
+        conv6_h = slim.conv2d(conv4_h, cha1*8, [3, 3], rate=1, activation_fn=lrelu, scope='g_conv6_1')
+        conv6_h = slim.conv2d(conv6_h, cha1*8, [3, 3], rate=1, activation_fn=lrelu, scope='g_conv6_2')
+        up7_h =  upsample_and_concat_3(conv6_h, conv3_h, cha1*4,cha1*8, name='up7')
+        conv7_h = slim.conv2d(up7_h, cha1*4, [3, 3], rate=1, activation_fn=lrelu, scope='g_conv7_1')
+        conv7_h = slim.conv2d(conv7_h, cha1*4, [3, 3], rate=1, activation_fn=lrelu, scope='g_conv7_2')
+        up8_h =  upsample_and_concat_3(conv7_h, conv2_h, cha1*2,cha1*4, name='up8')
+        conv8_h = slim.conv2d(up8_h, cha1*2, [3, 3], rate=1, activation_fn=lrelu, scope='g_conv8_1')
+        conv8_h = slim.conv2d(conv8_h, cha1*2, [3, 3], rate=1, activation_fn=lrelu, scope='g_conv8_2')
+        up9_h =  upsample_and_concat_3(conv8_h, conv1_h, cha1,cha1*2, name='up9')
+        conv9_h = slim.conv2d(up9_h, cha1, [3, 3], rate=1, activation_fn=lrelu, scope='g_conv9_1')
+        conv9_h = slim.conv2d(conv9_h, cha1, [3, 3], rate=1, activation_fn=lrelu, scope='g_conv9_2')
+        up10_h =  upsample_and_concat_h_only(conv9_h, cha1,cha1, name='up10')
+        conv10_h = slim.conv2d(up10_h, cha1, [3, 3], rate=1, activation_fn=lrelu, scope='g_conv10_1')
+        out = slim.conv2d(conv10_h, 3, [3, 3], rate=1, activation_fn=None, scope='g_conv10_2')
+    return out
+def fix_orientation(image, orientation):
+    # 1 = Horizontal(normal)
+    # 2 = Mirror horizontal
+    # 3 = Rotate 180
+    # 4 = Mirror vertical
+    # 5 = Mirror horizontal and rotate 270 CW
+    # 6 = Rotate 90 CW
+    # 7 = Mirror horizontal and rotate 90 CW
+    # 8 = Rotate 270 CW
+    if type(orientation) is list:
+        orientation = orientation[0]
+    if orientation == 'Horizontal (normal)':
+        pass
+    elif orientation == 'Mirror horizontal':
+        image = cv2.flip(image, 0)
+    elif orientation == 'Rotate 180':
+        image = cv2.rotate(image, cv2.ROTATE_180)
+    elif orientation == 'Mirror vertical':
+        image = cv2.flip(image, 1)
+    elif orientation == 'Mirror horizontal and rotate 270 CW':
+        image = cv2.flip(image, 0)
+        image = cv2.rotate(image, cv2.ROTATE_90_COUNTERCLOCKWISE)
+    elif orientation == 'Rotate 90 CW':
+        image = cv2.rotate(image, cv2.ROTATE_90_CLOCKWISE)
+    elif orientation == 'Mirror horizontal and rotate 90 CW':
+        image = cv2.flip(image, 0)
+        image = cv2.rotate(image, cv2.ROTATE_90_CLOCKWISE)
+    elif orientation == 'Rotate 270 CW':
+        image = cv2.rotate(image, cv2.ROTATE_90_COUNTERCLOCKWISE)
+    return image
+class ExposureFusion(object):
+    def __init__(self, sequence, best_exposedness=0.5, sigma=0.2, eps=1e-12, exponents=(1.0, 1.0, 1.0), layers=11):
+        self.sequence = sequence  # [N, H, W, 3], (0..1), float32
+        self.img_num = sequence.shape[0]
+        self.best_exposedness = best_exposedness
+        self.sigma = sigma
+        self.eps = eps
+        self.exponents = exponents
+        self.layers = layers
+    @staticmethod
+    def cal_contrast(src):
+        gray = cv2.cvtColor(src, cv2.COLOR_BGR2GRAY)
+        laplace_kernel = np.array([[0, 1, 0], [1, -4, 1], [0, 1, 0]], dtype=np.float32)
+        contrast = cv2.filter2D(gray, -1, laplace_kernel, borderType=cv2.BORDER_REPLICATE)
+        return np.abs(contrast)
+    @staticmethod
+    def cal_saturation(src):
+        mean = np.mean(src, axis=-1)
+        channels = [(src[:, :, c] - mean)**2 for c in range(3)]
+        saturation = np.sqrt(np.mean(channels, axis=0))
+        return saturation
+    @staticmethod
+    def cal_exposedness(src, best_exposedness, sigma):
+        exposedness = [gauss_curve(src[:, :, c], best_exposedness, sigma) for c in range(3)]
+        exposedness = np.prod(exposedness, axis=0)
+        return exposedness
+    def cal_weight_map(self):
+        #pdb.set_trace()
+        weights = []
+        for idx in range(self.sequence.shape[0]):
+            contrast = self.cal_contrast(self.sequence[idx])
+            saturation = self.cal_saturation(self.sequence[idx])
+            exposedness = self.cal_exposedness(self.sequence[idx], self.best_exposedness, self.sigma)
+            weight = np.power(contrast, self.exponents[0]) * np.power(saturation, self.exponents[1]) * np.power(exposedness, self.exponents[2])
+            # Gauss Blur
+            # weight = cv2.GaussianBlur(weight, (21, 21), 2.1)
+            weights.append(weight)
+        #pdb.set_trace()
+        weights = np.stack(weights, 0) + self.eps
+        # normalize
+        weights = weights / np.expand_dims(np.sum(weights, axis=0), axis=0)
+        return weights
+    def naive_fusion(self):
+        weights = self.cal_weight_map()  # [N, H, W]
+        weights = np.stack([weights, weights, weights], axis=-1)  # [N, H, W, 3]
+        naive_fusion = np.sum(weights * self.sequence * 255, axis=0)
+        naive_fusion = np.clip(naive_fusion, 0, 255).astype(np.uint8)
+        return naive_fusion
+    def build_gaussian_pyramid(self, high_res):
+        #pdb.set_trace()
+        gaussian_pyramid = [high_res]
+        for idx in range(1, self.layers):
+            kernel1=np.array([[0.0039,0.0156,0.0234,0.0156,0.0039],[0.0156,0.0625,0.0938,0.0625,0.0156],[0.0234,0.0938,0.1406,0.0938,0.0234],[0.0156,0.0625,0.0938,0.0625,0.0156],[0.0039,0.0156,0.0234,0.0156,0.0039]],dtype='float32')
+            gaussian_pyramid.append(cv2.filter2D(gaussian_pyramid[-1], -1,kernel=kernel1)[::2, ::2])
+            #gaussian_pyramid.append(cv2.GaussianBlur(gaussian_pyramid[-1], (5, 5), 0.83)[::2, ::2])
+        return gaussian_pyramid
+    def build_laplace_pyramid(self, gaussian_pyramid):
+        laplace_pyramid = [gaussian_pyramid[-1]]
+        for idx in range(1, self.layers):
+            size = (gaussian_pyramid[self.layers - idx - 1].shape[1], gaussian_pyramid[self.layers - idx - 1].shape[0])
+            upsampled = cv2.resize(gaussian_pyramid[self.layers - idx], size, interpolation=cv2.INTER_LINEAR)
+            laplace_pyramid.append(gaussian_pyramid[self.layers - idx - 1] - upsampled)
+        laplace_pyramid.reverse()
+        return laplace_pyramid
+    def multi_resolution_fusion(self):
+        #pdb.set_trace()
+        weights = self.cal_weight_map()  # [N, H, W]
+        weights = np.stack([weights, weights, weights], axis=-1)  # [N, H, W, 3]
+        image_gaussian_pyramid = [self.build_gaussian_pyramid(self.sequence[i] * 255) for i in range(self.img_num)]
+        image_laplace_pyramid = [self.build_laplace_pyramid(image_gaussian_pyramid[i]) for i in range(self.img_num)]
+        weights_gaussian_pyramid = [self.build_gaussian_pyramid(weights[i]) for i in range(self.img_num)]
+        fused_laplace_pyramid = [np.sum([image_laplace_pyramid[n][l] *
+                                         weights_gaussian_pyramid[n][l] for n in range(self.img_num)], axis=0) for l in range(self.layers)]
+        result = fused_laplace_pyramid[-1]
+        for k in range(1, self.layers):
+            size = (fused_laplace_pyramid[self.layers - k - 1].shape[1], fused_laplace_pyramid[self.layers - k - 1].shape[0])
+            upsampled = cv2.resize(result, size, interpolation=cv2.INTER_LINEAR)
+            result = upsampled + fused_laplace_pyramid[self.layers - k - 1]
+        #pdb.set_trace()
+        #result = np.clip(result, 0, 255).astype(np.uint8)
+        return result
+h_pre1, w_pre1 = 6144, 8192
+pad_1 = 0
+pad_2 = 0
+h_exp1, w_exp1 = h_pre1 // 2, w_pre1 // 2
+sess = tf.Session()
+in_image = tf.placeholder(tf.float32, [None, h_exp1, w_exp1, 4])
+in_image1 = tf.nn.avg_pool(in_image,ksize=[1,4,4,1],strides=[1,4,4,1],padding='SAME')
+in_image2 = tf.nn.avg_pool(in_image,ksize=[1,8,8,1],strides=[1,8,8,1],padding='SAME')
+out_image1 = network(in_image1)
+out_image2 = network(in_image2, reuse=True)
+t_vars = tf.trainable_variables()
+for ele1 in t_vars:
+    print("variable: ", ele1)
+saver = tf.train.Saver()
+sess.run(tf.global_variables_initializer())
+ckpt = tf.train.get_checkpoint_state(checkpoint_dir)
+if ckpt:
+    print('loaded ' + ckpt.model_checkpoint_path)
+    saver.restore(sess, ckpt.model_checkpoint_path)
+in_pic4 = np.zeros([h_exp1, w_exp1, 4])
+for k in range(len(train_ids)):
+    print(k)
+    train_id = train_ids[k]
+    in_path = input_dir + train_id[:-4] + '.png'
+    #raw_image = cv2.imread(in_path, cv2.IMREAD_UNCHANGED).astype(np.float32)
+    raw_image = cv2.imread(in_path, cv2.IMREAD_UNCHANGED).astype(np.float32)
+    #meta = np.load(input_dir1 + train_id[:-4] + '.npy').astype(np.float32)
+    #meta = scipy.io.loadmat(input_dir2 + train_id[:-4] + '.mat')
+    metadata = json_read(in_path[:-4] + '.json', object_hook=fraction_from_json)
+    white_level = float(metadata['white_level'])
+    black_level = float(metadata['black_level'][0].numerator)
+    orientation = metadata['orientation']
+    in_pic2 = np.clip((raw_image - black_level) /(white_level-black_level),0,1)
+    mean = np.mean(np.mean(in_pic2))
+    var = np.var(in_pic2)
+    bining = 4
+    if (mean < 0.01):
+        ratio = 6
+    elif (mean < 0.02):
+        ratio = 4
+    elif (mean < 0.037):
+        ratio = 3
+    else:
+        ratio = 2
+    if (var > 0.015):
+        ratio = ratio + 1
+    noise_profile = float(metadata['noise_profile'][0]) * ratio
+    if (noise_profile > 0.02):
+        bining = 8
+        ratio = np.clip(ratio - 1,2,4)
+    #r_gain, b_gain = awb_v(in_pic2, bayer='RGGB', eps=1)
+    r_gain1 = 1./metadata['as_shot_neutral'][0]
+    b_gain1 = 1./metadata['as_shot_neutral'][2]
+    #in_pic3 = np.pad(in_pic2, ((top_pad, btm_pad), (left_pad, right_pad)), mode='reflect')  # GBRG to RGGB + reflect padding
+    h_pre,w_pre = in_pic2.shape
+    if (metadata['cfa_pattern'][0].numerator == 2):
+        in_pic2[0:h_pre-1,0:w_pre-1] = in_pic2[1:h_pre,1:w_pre]
+    r_gain, b_gain = awb_v(in_pic2 * (ratio**2), bayer='RGGB', eps=1)
+    in_pic3 = in_pic2
+    in_pic4[0:h_pre//2, 0:w_pre//2, 0] = in_pic3[0::2, 0::2] * r_gain
+    in_pic4[0:h_pre//2, 0:w_pre//2, 1] = in_pic3[0::2, 1::2]
+    in_pic4[0:h_pre//2, 0:w_pre//2, 2] = in_pic3[1::2, 1::2] * b_gain
+    in_pic4[0:h_pre//2, 0:w_pre//2, 3] = in_pic3[1::2, 0::2]
+    im1=np.clip(in_pic4*1,0,1)
+    in_np1 = np.expand_dims(im1,axis = 0)
+    if (bining == 4):
+        out_np1 =sess.run(out_image1,feed_dict={in_image: in_np1})
+    else:
+        out_np1 =sess.run(out_image2,feed_dict={in_image: in_np1})
+    out_np2 = fix_orientation(out_np1[0,0:h_pre//bining,0:w_pre//bining,:], orientation)
+    h_pre2,w_pre2,cc = out_np2.shape
+    if h_pre2 > w_pre2:
+        out_np_1 = cv2.resize(out_np2, (768, 1024), cv2.INTER_CUBIC)
+    if w_pre2 > h_pre2:
+        out_np_1 = cv2.resize(out_np2, (1024, 768), cv2.INTER_CUBIC)
+    im1=np.clip(in_pic4*ratio,0,1)
+    in_np1 = np.expand_dims(im1,axis = 0)
+    if (bining == 4):
+        out_np1 =sess.run(out_image1,feed_dict={in_image: in_np1})
+    else:
+        out_np1 =sess.run(out_image2,feed_dict={in_image: in_np1})
+    out_np2 = fix_orientation(out_np1[0,0:h_pre//bining,0:w_pre//bining,:], orientation)
+    h_pre2,w_pre2,cc = out_np2.shape
+    if h_pre2 > w_pre2:
+        out_np_2 = cv2.resize(out_np2, (768, 1024), cv2.INTER_CUBIC)
+    if w_pre2 > h_pre2:
+        out_np_2 = cv2.resize(out_np2, (1024, 768), cv2.INTER_CUBIC)
+    im1=np.clip(in_pic4*(ratio**2),0,1)
+    in_np1 = np.expand_dims(im1,axis = 0)
+    if (bining == 4):
+        out_np1 =sess.run(out_image1,feed_dict={in_image: in_np1})
+    else:
+        out_np1 =sess.run(out_image2,feed_dict={in_image: in_np1})
+    out_np2 = fix_orientation(out_np1[0,0:h_pre//bining,0:w_pre//bining,:], orientation)
+    h_pre2,w_pre2,cc = out_np2.shape
+    if h_pre2 > w_pre2:
+        out_np_3 = cv2.resize(out_np2, (768, 1024), cv2.INTER_CUBIC)
+    if w_pre2 > h_pre2:
+        out_np_3 = cv2.resize(out_np2, (1024, 768), cv2.INTER_CUBIC)
+    #pdb.set_trace()
+    '''sequence = np.stack([out_np_1, out_np_2, out_np_3], axis=0)
+    #sequence0 = sequence[0]
+    mef = ExposureFusion(sequence.astype(np.float32))
+    multi_res_fusion = mef.multi_resolution_fusion()
+    #pdb.set_trace()
+    result = reprocessing(multi_res_fusion)'''
+    #out_crop = multi_res_fusion
+    #np.save(result_dir + train_id[0:-4] + '_gray_{:}.npy'.format(gain), out_crop)
+    cv2.imwrite(result_dir + train_id[0:-4] + '_1.png', out_np_1[:,:,::-1]*255)
+    cv2.imwrite(result_dir + train_id[0:-4] + '_2.png', out_np_2[:,:,::-1]*255)
+    cv2.imwrite(result_dir + train_id[0:-4] + '_3.png', out_np_3[:,:,::-1]*255)

DH-AISP/2/__pycache__/model_convnext2_hdr.cpython-37.pyc ADDED Viewed

Binary file (18.5 kB). View file

DH-AISP/2/__pycache__/myFFCResblock0.cpython-37.pyc ADDED Viewed

Binary file (1.55 kB). View file

DH-AISP/2/__pycache__/test_dataset_for_testing.cpython-37.pyc ADDED Viewed

Binary file (1.99 kB). View file

DH-AISP/2/focal_frequency_loss/__init__.py ADDED Viewed

	@@ -0,0 +1,3 @@


1	+ from .focal_frequency_loss import FocalFrequencyLoss
2	+
3	+ __all__ = ['FocalFrequencyLoss']

DH-AISP/2/focal_frequency_loss/__pycache__/__init__.cpython-37.pyc ADDED Viewed

Binary file (263 Bytes). View file

DH-AISP/2/focal_frequency_loss/__pycache__/focal_frequency_loss.cpython-37.pyc ADDED Viewed

Binary file (4.01 kB). View file

DH-AISP/2/focal_frequency_loss/focal_frequency_loss.py ADDED Viewed

	@@ -0,0 +1,114 @@

+import torch
+import torch.nn as nn
+# version adaptation for PyTorch > 1.7.1
+IS_HIGH_VERSION = tuple(map(int, torch.__version__.split('+')[0].split('.'))) > (1, 7, 1)
+if IS_HIGH_VERSION:
+    import torch.fft
+class FocalFrequencyLoss(nn.Module):
+    """The torch.nn.Module class that implements focal frequency loss - a
+    frequency domain loss function for optimizing generative models.
+    Ref:
+    Focal Frequency Loss for Image Reconstruction and Synthesis. In ICCV 2021.
+    <https://arxiv.org/pdf/2012.12821.pdf>
+    Args:
+        loss_weight (float): weight for focal frequency loss. Default: 1.0
+        alpha (float): the scaling factor alpha of the spectrum weight matrix for flexibility. Default: 1.0
+        patch_factor (int): the factor to crop image patches for patch-based focal frequency loss. Default: 1
+        ave_spectrum (bool): whether to use minibatch average spectrum. Default: False
+        log_matrix (bool): whether to adjust the spectrum weight matrix by logarithm. Default: False
+        batch_matrix (bool): whether to calculate the spectrum weight matrix using batch-based statistics. Default: False
+    """
+    def __init__(self, loss_weight=1.0, alpha=1.0, patch_factor=1, ave_spectrum=False, log_matrix=False, batch_matrix=False):
+        super(FocalFrequencyLoss, self).__init__()
+        self.loss_weight = loss_weight
+        self.alpha = alpha
+        self.patch_factor = patch_factor
+        self.ave_spectrum = ave_spectrum
+        self.log_matrix = log_matrix
+        self.batch_matrix = batch_matrix
+    def tensor2freq(self, x):
+        # crop image patches
+        patch_factor = self.patch_factor
+        _, _, h, w = x.shape
+        assert h % patch_factor == 0 and w % patch_factor == 0, (
+            'Patch factor should be divisible by image height and width')
+        patch_list = []
+        patch_h = h // patch_factor
+        patch_w = w // patch_factor
+        for i in range(patch_factor):
+            for j in range(patch_factor):
+                patch_list.append(x[:, :, i * patch_h:(i + 1) * patch_h, j * patch_w:(j + 1) * patch_w])
+        # stack to patch tensor
+        y = torch.stack(patch_list, 1)
+        # perform 2D DFT (real-to-complex, orthonormalization)
+        if IS_HIGH_VERSION:
+            freq = torch.fft.fft2(y, norm='ortho')
+            freq = torch.stack([freq.real, freq.imag], -1)
+        else:
+            freq = torch.rfft(y, 2, onesided=False, normalized=True)
+        return freq
+    def loss_formulation(self, recon_freq, real_freq, matrix=None):
+        # spectrum weight matrix
+        if matrix is not None:
+            # if the matrix is predefined
+            weight_matrix = matrix.detach()
+        else:
+            # if the matrix is calculated online: continuous, dynamic, based on current Euclidean distance
+            matrix_tmp = (recon_freq - real_freq) ** 2
+            matrix_tmp = torch.sqrt(matrix_tmp[..., 0] + matrix_tmp[..., 1]) ** self.alpha
+            # whether to adjust the spectrum weight matrix by logarithm
+            if self.log_matrix:
+                matrix_tmp = torch.log(matrix_tmp + 1.0)
+            # whether to calculate the spectrum weight matrix using batch-based statistics
+            if self.batch_matrix:
+                matrix_tmp = matrix_tmp / matrix_tmp.max()
+            else:
+                matrix_tmp = matrix_tmp / matrix_tmp.max(-1).values.max(-1).values[:, :, :, None, None]
+            matrix_tmp[torch.isnan(matrix_tmp)] = 0.0
+            matrix_tmp = torch.clamp(matrix_tmp, min=0.0, max=1.0)
+            weight_matrix = matrix_tmp.clone().detach()
+        assert weight_matrix.min().item() >= 0 and weight_matrix.max().item() <= 1, (
+            'The values of spectrum weight matrix should be in the range [0, 1], '
+            'but got Min: %.10f Max: %.10f' % (weight_matrix.min().item(), weight_matrix.max().item()))
+        # frequency distance using (squared) Euclidean distance
+        tmp = (recon_freq - real_freq) ** 2
+        freq_distance = tmp[..., 0] + tmp[..., 1]
+        # dynamic spectrum weighting (Hadamard product)
+        loss = weight_matrix * freq_distance
+        return torch.mean(loss)
+    def forward(self, pred, target, matrix=None, **kwargs):
+        """Forward function to calculate focal frequency loss.
+        Args:
+            pred (torch.Tensor): of shape (N, C, H, W). Predicted tensor.
+            target (torch.Tensor): of shape (N, C, H, W). Target tensor.
+            matrix (torch.Tensor, optional): Element-wise spectrum weight matrix.
+                Default: None (If set to None: calculated online, dynamic).
+        """
+        pred_freq = self.tensor2freq(pred)
+        target_freq = self.tensor2freq(target)
+        # whether to use minibatch average spectrum
+        if self.ave_spectrum:
+            pred_freq = torch.mean(pred_freq, 0, keepdim=True)
+            target_freq = torch.mean(target_freq, 0, keepdim=True)
+        # calculate focal frequency loss
+        return self.loss_formulation(pred_freq, target_freq, matrix) * self.loss_weight

DH-AISP/2/model_convnext2_hdr.py ADDED Viewed

	@@ -0,0 +1,592 @@

+import torch
+import torch.nn as nn
+import torch.nn.functional as F
+from timm.models.layers import trunc_normal_, DropPath
+from timm.models.registry import register_model
+#import Convnext as PreConv
+from myFFCResblock0 import myFFCResblock
+# A ConvNet for the 2020s
+# original implementation  https://github.com/facebookresearch/ConvNeXt/blob/main/models/convnext.py
+# paper https://arxiv.org/pdf/2201.03545.pdf
+class ConvNeXt0(nn.Module):
+    r""" ConvNeXt
+        A PyTorch impl of : `A ConvNet for the 2020s`  -
+          https://arxiv.org/pdf/2201.03545.pdf
+    Args:
+        in_chans (int): Number of input image channels. Default: 3
+        num_classes (int): Number of classes for classification head. Default: 1000
+        depths (tuple(int)): Number of blocks at each stage. Default: [3, 3, 9, 3]
+        dims (int): Feature dimension at each stage. Default: [96, 192, 384, 768]
+        drop_path_rate (float): Stochastic depth rate. Default: 0.
+        layer_scale_init_value (float): Init value for Layer Scale. Default: 1e-6.
+        head_init_scale (float): Init scaling value for classifier weights and biases. Default: 1.
+    """
+    def __init__(self, block, in_chans=3, num_classes=1000,
+                 depths=[3, 3, 27, 3], dims=[256, 512, 1024, 2048], drop_path_rate=0.,
+                 layer_scale_init_value=1e-6, head_init_scale=1.,
+                 ):
+        super().__init__()
+        self.downsample_layers = nn.ModuleList() # stem and 3 intermediate downsampling conv layers
+        stem = nn.Sequential(
+            nn.Conv2d(in_chans, dims[0], kernel_size=4, stride=4),
+            LayerNorm(dims[0], eps=1e-6, data_format="channels_first")
+        )
+        self.downsample_layers.append(stem)
+        for i in range(3):
+            downsample_layer = nn.Sequential(
+                    LayerNorm(dims[i], eps=1e-6, data_format="channels_first"),
+                    nn.Conv2d(dims[i], dims[i+1], kernel_size=2, stride=2),
+            )
+            self.downsample_layers.append(downsample_layer)
+        self.stages = nn.ModuleList() # 4 feature resolution stages, each consisting of multiple residual blocks
+        dp_rates=[x.item() for x in torch.linspace(0, drop_path_rate, sum(depths))]
+        cur = 0
+        for i in range(4):
+            stage = nn.Sequential(
+                *[block(dim=dims[i], drop_path=dp_rates[cur + j],
+                layer_scale_init_value=layer_scale_init_value) for j in range(depths[i])]
+            )
+            self.stages.append(stage)
+            cur += depths[i]
+        self.norm = nn.LayerNorm(dims[-1], eps=1e-6) # final norm layer
+        self.head = nn.Linear(dims[-1], num_classes)
+        self.apply(self._init_weights)
+        self.head.weight.data.mul_(head_init_scale)
+        self.head.bias.data.mul_(head_init_scale)
+    def _init_weights(self, m):
+        if isinstance(m, (nn.Conv2d, nn.Linear)):
+            trunc_normal_(m.weight, std=.02)
+            nn.init.constant_(m.bias, 0)
+    def forward_features(self, x):
+        for i in range(4):
+            x = self.downsample_layers[i](x)
+            x = self.stages[i](x)
+        return self.norm(x.mean([-2, -1])) # global average pooling, (N, C, H, W) -> (N, C)
+    def forward(self, x):
+        x = self.forward_features(x)
+        x = self.head(x)
+        return x
+def dwt_init(x):
+    x01 = x[:, :, 0::2, :] / 2   #x01.shape=[4,3,128,256]
+    x02 = x[:, :, 1::2, :] / 2   #x02.shape=[4,3,128,256]
+    x1 = x01[:, :, :, 0::2]    #x1.shape=[4,3,128,128]
+    x2 = x02[:, :, :, 0::2]       #x2.shape=[4,3,128,128]
+    x3 = x01[:, :, :, 1::2]     #x3.shape=[4,3,128,128]
+    x4 = x02[:, :, :, 1::2]  #x4.shape=[4,3,128,128]
+    x_LL = x1 + x2 + x3 + x4
+    x_HL = -x1 - x2 + x3 + x4
+    x_LH = -x1 + x2 - x3 + x4
+    x_HH = x1 - x2 - x3 + x4
+    return x_LL, torch.cat((x_HL, x_LH, x_HH), 1)
+class DWT(nn.Module):
+    def __init__(self):
+        super(DWT, self).__init__()
+        self.requires_grad = False
+    def forward(self, x):
+        return dwt_init(x)
+class DWT_transform(nn.Module):
+    def __init__(self, in_channels,out_channels):
+        super().__init__()
+        self.dwt = DWT()
+        self.conv1x1_low = nn.Conv2d(in_channels, out_channels, kernel_size=1, padding=0)
+        self.conv1x1_high = nn.Conv2d(in_channels*3, out_channels, kernel_size=1, padding=0)
+    def forward(self, x):
+        dwt_low_frequency,dwt_high_frequency = self.dwt(x)
+        dwt_low_frequency = self.conv1x1_low(dwt_low_frequency)
+        dwt_high_frequency = self.conv1x1_high(dwt_high_frequency)
+        return dwt_low_frequency,dwt_high_frequency
+def blockUNet(in_c, out_c, name, transposed=False, bn=False, relu=True, dropout=False):
+    block = nn.Sequential()
+    if relu:
+        block.add_module('%s_relu' % name, nn.ReLU(inplace=True))
+    else:
+        block.add_module('%s_leakyrelu' % name, nn.LeakyReLU(0.2, inplace=True))
+    if not transposed:
+        block.add_module('%s_conv' % name, nn.Conv2d(in_c, out_c, 4, 2, 1, bias=False))
+    else:
+        block.add_module('%s_conv' % name, nn.Conv2d(in_c, out_c, kernel_size=3, stride=1, padding=1))
+        block.add_module('%s_bili' % name, nn.Upsample(scale_factor=2, mode='bilinear', align_corners=True))
+    if bn:
+        block.add_module('%s_bn' % name, nn.BatchNorm2d(out_c))
+    if dropout:
+        block.add_module('%s_dropout' % name, nn.Dropout2d(0.5, inplace=True))
+    return block
+# DW-GAN: A Discrete Wavelet Transform GAN for NonHomogeneous Dehazing 2021
+# original implementation  https://github.com/liuh127/NTIRE-2021-Dehazing-DWGAN/blob/main/model.py
+# paper https://openaccess.thecvf.com/content/CVPR2021W/NTIRE/papers/Fu_DW-GAN_A_Discrete_Wavelet_Transform_GAN_for_NonHomogeneous_Dehazing_CVPRW_2021_paper.pdf
+class dwt_ffc_UNet2(nn.Module):
+    def __init__(self,output_nc=3, nf=16):
+        super(dwt_ffc_UNet2, self).__init__()
+        layer_idx = 1
+        name = 'layer%d' % layer_idx
+        layer1 = nn.Sequential()
+        layer1.add_module(name, nn.Conv2d(16, nf-1, 4, 2, 1, bias=False))
+        layer_idx += 1
+        name = 'layer%d' % layer_idx
+        layer2 = blockUNet(nf, nf*2-2, name, transposed=False, bn=True, relu=False, dropout=False)
+        layer_idx += 1
+        name = 'layer%d' % layer_idx
+        layer3 = blockUNet(nf*2, nf*4-4, name, transposed=False, bn=True, relu=False, dropout=False)
+        layer_idx += 1
+        name = 'layer%d' % layer_idx
+        layer4 = blockUNet(nf*4, nf*8-8, name, transposed=False, bn=True, relu=False, dropout=False)
+        layer_idx += 1
+        name = 'layer%d' % layer_idx
+        layer5 = blockUNet(nf*8, nf*8-16, name, transposed=False, bn=True, relu=False, dropout=False)
+        layer_idx += 1
+        name = 'layer%d' % layer_idx
+        layer6 = blockUNet(nf*4, nf*4, name, transposed=False, bn=False, relu=False, dropout=False)
+        layer_idx -= 1
+        name = 'dlayer%d' % layer_idx
+        dlayer6 = blockUNet(nf * 4, nf * 2, name, transposed=True, bn=True, relu=True, dropout=False)
+        layer_idx -= 1
+        name = 'dlayer%d' % layer_idx
+        dlayer5 = blockUNet(nf * 16+16, nf * 8, name, transposed=True, bn=True, relu=True, dropout=False)
+        layer_idx -= 1
+        name = 'dlayer%d' % layer_idx
+        dlayer4 = blockUNet(nf * 16+8, nf * 4, name, transposed=True, bn=True, relu=True, dropout=False)
+        layer_idx -= 1
+        name = 'dlayer%d' % layer_idx
+        dlayer3 = blockUNet(nf * 8+4, nf * 2, name, transposed=True, bn=True, relu=True, dropout=False)
+        layer_idx -= 1
+        name = 'dlayer%d' % layer_idx
+        dlayer2 = blockUNet(nf * 4+2, nf, name, transposed=True, bn=True, relu=True, dropout=False)
+        layer_idx -= 1
+        name = 'dlayer%d' % layer_idx
+        dlayer1 = blockUNet(nf * 2+1, nf * 2, name, transposed=True, bn=True, relu=True, dropout=False)
+        self.initial_conv=nn.Conv2d(9,16,3,padding=1)
+        self.bn1=nn.BatchNorm2d(16)
+        self.layer1 = layer1
+        self.DWT_down_0= DWT_transform(9,1)
+        self.layer2 = layer2
+        self.DWT_down_1 = DWT_transform(16, 2)
+        self.layer3 = layer3
+        self.DWT_down_2 = DWT_transform(32, 4)
+        self.layer4 = layer4
+        self.DWT_down_3 = DWT_transform(64, 8)
+        self.layer5 = layer5
+        self.DWT_down_4 = DWT_transform(128, 16)
+        self.layer6 = layer6
+        self.dlayer6 = dlayer6
+        self.dlayer5 = dlayer5
+        self.dlayer4 = dlayer4
+        self.dlayer3 = dlayer3
+        self.dlayer2 = dlayer2
+        self.dlayer1 = dlayer1
+        self.tail_conv1 = nn.Conv2d(48, 32, 3, padding=1, bias=True)
+        self.bn2=nn.BatchNorm2d(32)
+        self.tail_conv2 = nn.Conv2d(nf*2, output_nc, 3,padding=1, bias=True)
+        self.FFCResNet = myFFCResblock(input_nc=64, output_nc=64)
+    def forward(self, x):
+        conv_start=self.initial_conv(x)
+        conv_start=self.bn1(conv_start)
+        conv_out1 = self.layer1(conv_start)
+        dwt_low_0,dwt_high_0=self.DWT_down_0(x)
+        out1=torch.cat([conv_out1, dwt_low_0], 1)
+        conv_out2 = self.layer2(out1)
+        dwt_low_1,dwt_high_1= self.DWT_down_1(out1)
+        out2 = torch.cat([conv_out2, dwt_low_1], 1)
+        conv_out3 = self.layer3(out2)
+        dwt_low_2,dwt_high_2 = self.DWT_down_2(out2)
+        out3 = torch.cat([conv_out3, dwt_low_2], 1)
+        # conv_out4 = self.layer4(out3)
+        # dwt_low_3,dwt_high_3 = self.DWT_down_3(out3)
+        # out4 = torch.cat([conv_out4, dwt_low_3], 1)
+        # conv_out5 = self.layer5(out4)
+        # dwt_low_4,dwt_high_4 = self.DWT_down_4(out4)
+        # out5 = torch.cat([conv_out5, dwt_low_4], 1)
+        # out6 = self.layer6(out5)
+        out3_ffc= self.FFCResNet(out3)
+        dout3 = self.dlayer6(out3_ffc)
+        Tout3_out2 = torch.cat([dout3, out2,dwt_high_1], 1)
+        Tout2 = self.dlayer2(Tout3_out2)
+        Tout2_out1 = torch.cat([Tout2, out1,dwt_high_0], 1)
+        Tout1 = self.dlayer1(Tout2_out1)
+        Tout1_outinit = torch.cat([Tout1, conv_start], 1)
+        tail1=self.tail_conv1(Tout1_outinit)
+        tail2=self.bn2(tail1)
+        dout1 = self.tail_conv2(tail2)
+        return dout1
+class Block(nn.Module):
+    r""" ConvNeXt Block. There are two equivalent implementations:
+    (1) DwConv -> LayerNorm (channels_first) -> 1x1 Conv -> GELU -> 1x1 Conv; all in (N, C, H, W)
+    (2) DwConv -> Permute to (N, H, W, C); LayerNorm (channels_last) -> Linear -> GELU -> Linear; Permute back
+    We use (2) as we find it slightly faster in PyTorch
+    Args:
+        dim (int): Number of input channels.
+        drop_path (float): Stochastic depth rate. Default: 0.0
+        layer_scale_init_value (float): Init value for Layer Scale. Default: 1e-6.
+    """
+    def __init__(self, dim, drop_path=0., layer_scale_init_value=1e-6):
+        super().__init__()
+        self.dwconv = nn.Conv2d(dim, dim, kernel_size=7, padding=3, groups=dim) # depthwise conv
+        self.norm = LayerNorm(dim, eps=1e-6)
+        self.pwconv1 = nn.Linear(dim, 4 * dim) # pointwise/1x1 convs, implemented with linear layers
+        self.act = nn.GELU()
+        self.pwconv2 = nn.Linear(4 * dim, dim)
+        self.gamma = nn.Parameter(layer_scale_init_value * torch.ones((dim)),
+                                    requires_grad=True) if layer_scale_init_value > 0 else None
+        self.drop_path = DropPath(drop_path) if drop_path > 0. else nn.Identity()
+    def forward(self, x):
+        input = x
+        x = self.dwconv(x)
+        x = x.permute(0, 2, 3, 1) # (N, C, H, W) -> (N, H, W, C)
+        x = self.norm(x)
+        x = self.pwconv1(x)
+        x = self.act(x)
+        x = self.pwconv2(x)
+        if self.gamma is not None:
+            x = self.gamma * x
+        x = x.permute(0, 3, 1, 2) # (N, H, W, C) -> (N, C, H, W)
+        x = input + self.drop_path(x)
+        return x
+class ConvNeXt(nn.Module):
+    def __init__(self, block, in_chans=3, num_classes=1000,
+                 depths=[3, 3, 27, 3], dims=[256, 512, 1024,2048], drop_path_rate=0.,
+                 layer_scale_init_value=1e-6, head_init_scale=1.,
+                 ):
+        super().__init__()
+        self.downsample_layers = nn.ModuleList() # stem and 3 intermediate downsampling conv layers
+        stem = nn.Sequential(
+            nn.Conv2d(in_chans, dims[0], kernel_size=4, stride=4),
+            LayerNorm(dims[0], eps=1e-6, data_format="channels_first")
+        )
+        self.downsample_layers.append(stem)
+        for i in range(3):
+            downsample_layer = nn.Sequential(
+                    LayerNorm(dims[i], eps=1e-6, data_format="channels_first"),
+                    nn.Conv2d(dims[i], dims[i+1], kernel_size=2, stride=2),
+            )
+            self.downsample_layers.append(downsample_layer)
+        self.stages = nn.ModuleList() # 4 feature resolution stages, each consisting of multiple residual blocks
+        dp_rates=[x.item() for x in torch.linspace(0, drop_path_rate, sum(depths))]
+        cur = 0
+        for i in range(4):
+            stage = nn.Sequential(
+                *[block(dim=dims[i], drop_path=dp_rates[cur + j],
+                layer_scale_init_value=layer_scale_init_value) for j in range(depths[i])]
+            )
+            self.stages.append(stage)
+            cur += depths[i]
+        self.norm = nn.LayerNorm(dims[-1], eps=1e-6) # final norm layer
+        self.head = nn.Linear(dims[-1], num_classes)
+        self.head.weight.data.mul_(head_init_scale)
+        self.head.bias.data.mul_(head_init_scale)
+    def forward(self, x):
+        x_layer1 = self.downsample_layers[0](x)
+        x_layer1 = self.stages[0](x_layer1)
+        x_layer2 = self.downsample_layers[1](x_layer1)
+        x_layer2 = self.stages[1](x_layer2)
+        x_layer3 = self.downsample_layers[2](x_layer2)
+        out = self.stages[2](x_layer3)
+        return x_layer1, x_layer2, out
+class LayerNorm(nn.Module):
+    r""" LayerNorm that supports two data formats: channels_last (default) or channels_first.
+    The ordering of the dimensions in the inputs. channels_last corresponds to inputs with
+    shape (batch_size, height, width, channels) while channels_first corresponds to inputs
+    with shape (batch_size, channels, height, width).
+    """
+    def __init__(self, normalized_shape, eps=1e-6, data_format="channels_last"):
+        super().__init__()
+        self.weight = nn.Parameter(torch.ones(normalized_shape))
+        self.bias = nn.Parameter(torch.zeros(normalized_shape))
+        self.eps = eps
+        self.data_format = data_format
+        if self.data_format not in ["channels_last", "channels_first"]:
+            raise NotImplementedError
+        self.normalized_shape = (normalized_shape, )
+    def forward(self, x):
+        if self.data_format == "channels_last":
+            return F.layer_norm(x, self.normalized_shape, self.weight, self.bias, self.eps)
+        elif self.data_format == "channels_first":
+            u = x.mean(1, keepdim=True)
+            s = (x - u).pow(2).mean(1, keepdim=True)
+            x = (x - u) / torch.sqrt(s + self.eps)
+            x = self.weight[:, None, None] * x + self.bias[:, None, None]
+            return x
+class PALayer(nn.Module):
+    def __init__(self, channel):
+        super(PALayer, self).__init__()
+        self.pa = nn.Sequential(
+            nn.Conv2d(channel, channel // 8, 1, padding=0, bias=True),
+            nn.ReLU(inplace=True),
+            nn.Conv2d(channel // 8, 1, 1, padding=0, bias=True),
+            nn.Sigmoid()
+        )
+    def forward(self, x):
+        y = self.pa(x)
+        return x * y
+class CALayer(nn.Module):
+    def __init__(self, channel):
+        super(CALayer, self).__init__()
+        self.avg_pool = nn.AdaptiveAvgPool2d(1)
+        self.ca = nn.Sequential(
+            nn.Conv2d(channel, channel // 8, 1, padding=0, bias=True),
+            nn.ReLU(inplace=True),
+            nn.Conv2d(channel // 8, channel, 1, padding=0, bias=True),
+            nn.Sigmoid()
+        )
+    def forward(self, x):
+        y = self.avg_pool(x)
+        y = self.ca(y)
+        return x * y
+class CP_Attention_block(nn.Module):
+    def __init__(self, conv, dim, kernel_size):
+        super(CP_Attention_block, self).__init__()
+        self.conv1 = conv(dim, dim, kernel_size, bias=True)
+        self.act1 = nn.ReLU(inplace=True)
+        self.conv2 = conv(dim, dim, kernel_size, bias=True)
+        self.calayer = CALayer(dim)
+        self.palayer = PALayer(dim)
+    def forward(self, x):
+        res = self.act1(self.conv1(x))
+        res = res + x
+        res = self.conv2(res)
+        res = self.calayer(res)
+        res = self.palayer(res)
+        res += x
+        return res
+def default_conv(in_channels, out_channels, kernel_size, bias=True):
+    return nn.Conv2d(in_channels, out_channels, kernel_size, padding=(kernel_size // 2), bias=bias)
+class knowledge_adaptation_convnext(nn.Module):
+    def __init__(self):
+        super(knowledge_adaptation_convnext, self).__init__()
+        self.encoder = ConvNeXt(Block, in_chans=9,num_classes=1000, depths=[3, 3, 27, 3], dims=[256, 512, 1024,2048], drop_path_rate=0., layer_scale_init_value=1e-6, head_init_scale=1.)
+        '''pretrained_model = ConvNeXt0(Block, in_chans=3,num_classes=1000, depths=[3, 3, 27, 3], dims=[256, 512, 1024, 2048], drop_path_rate=0., layer_scale_init_value=1e-6, head_init_scale=1.)
+        #pretrained_model=nn.DataParallel(pretrained_model)
+        checkpoint=torch.load('./weights/convnext_xlarge_22k_1k_384_ema.pth')
+        #for k,v in checkpoint["model"].items():
+            #print(k)
+        #url="https://dl.fbaipublicfiles.com/convnext/convnext_large_1k_384.pth"
+        #checkpoint = torch.hub.load_state_dict_from_url(url=url, map_location="cuda:0")
+        pretrained_model.load_state_dict(checkpoint["model"])
+        pretrained_dict = pretrained_model.state_dict()
+        model_dict = self.encoder.state_dict()
+        key_dict = {k: v for k, v in pretrained_dict.items() if k in model_dict}
+        model_dict.update(key_dict)
+        self.encoder.load_state_dict(model_dict)'''
+        self.up_block= nn.PixelShuffle(2)
+        self.attention0 = CP_Attention_block(default_conv, 1024, 3)
+        self.attention1 = CP_Attention_block(default_conv, 256, 3)
+        self.attention2 = CP_Attention_block(default_conv, 192, 3)
+        self.attention3 = CP_Attention_block(default_conv, 112, 3)
+        self.attention4 = CP_Attention_block(default_conv, 28, 3)
+        self.conv_process_1 = nn.Conv2d(28, 28, kernel_size=3,padding=1)
+        self.conv_process_2 = nn.Conv2d(28, 28, kernel_size=3,padding=1)
+        self.tail = nn.Sequential(nn.ReflectionPad2d(3), nn.Conv2d(28, 3, kernel_size=7, padding=0), nn.Tanh())
+    def forward(self, input):
+        x_layer1, x_layer2, x_output = self.encoder(input)
+        x_mid = self.attention0(x_output)  #[1024,24,24]
+        x = self.up_block(x_mid)      #[256,48,48]
+        x = self.attention1(x)
+        x = torch.cat((x, x_layer2), 1)  #[768,48,48]
+        x = self.up_block(x)            #[192,96,96]
+        x = self.attention2(x)
+        x = torch.cat((x, x_layer1), 1)   #[448,96,96]
+        x = self.up_block(x)            #[112,192,192]
+        x = self.attention3(x)
+        x = self.up_block(x)        #[28,384,384]
+        x = self.attention4(x)
+        x=self.conv_process_1(x)
+        out=self.conv_process_2(x)
+        return out
+class fusion_net(nn.Module):
+    def __init__(self):
+        super(fusion_net, self).__init__()
+        self.dwt_branch=dwt_ffc_UNet2()
+        self.knowledge_adaptation_branch=knowledge_adaptation_convnext()
+        self.fusion = nn.Sequential(nn.ReflectionPad2d(3), nn.Conv2d(31, 3, kernel_size=7, padding=0), nn.Tanh())
+    def forward(self, input):
+        dwt_branch=self.dwt_branch(input)
+        knowledge_adaptation_branch=self.knowledge_adaptation_branch(input)
+        x = torch.cat([dwt_branch, knowledge_adaptation_branch], 1)
+        x = self.fusion(x)
+        return x
+class Discriminator(nn.Module):
+    def __init__(self):
+        super(Discriminator, self).__init__()
+        self.net = nn.Sequential(
+            nn.Conv2d(3, 64, kernel_size=3, padding=1),
+            nn.LeakyReLU(0.2),
+            nn.Conv2d(64, 64, kernel_size=3, stride=2, padding=1),
+            nn.BatchNorm2d(64),
+            nn.LeakyReLU(0.2),
+            nn.Conv2d(64, 128, kernel_size=3, padding=1),
+            nn.BatchNorm2d(128),
+            nn.LeakyReLU(0.2),
+            nn.Conv2d(128, 128, kernel_size=3, stride=2, padding=1),
+            nn.BatchNorm2d(128),
+            nn.LeakyReLU(0.2),
+            nn.Conv2d(128, 256, kernel_size=3, padding=1),
+            nn.BatchNorm2d(256),
+            nn.LeakyReLU(0.2),
+            nn.Conv2d(256, 256, kernel_size=3, stride=2, padding=1),
+            nn.BatchNorm2d(256),
+            nn.LeakyReLU(0.2),
+            nn.Conv2d(256, 512, kernel_size=3, padding=1),
+            nn.BatchNorm2d(512),
+            nn.LeakyReLU(0.2),
+            nn.Conv2d(512, 512, kernel_size=3, stride=2, padding=1),
+            nn.BatchNorm2d(512),
+            nn.LeakyReLU(0.2),
+            nn.AdaptiveAvgPool2d(1),
+            nn.Conv2d(512, 1024, kernel_size=1),
+            nn.LeakyReLU(0.2),
+            nn.Conv2d(1024, 1, kernel_size=1)
+        )
+    def forward(self, x):
+        batch_size = x.size(0)
+        return torch.sigmoid(self.net(x).view(batch_size))
+class Discriminator2(nn.Module):
+    def __init__(self):
+        super(Discriminator2, self).__init__()
+        self.net = nn.Sequential(
+            nn.Conv2d(3, 64, kernel_size=3, padding=1),
+            nn.LeakyReLU(0.2),
+            nn.Conv2d(64, 64, kernel_size=3, stride=2, padding=1),
+            nn.BatchNorm2d(64),
+            nn.LeakyReLU(0.2),
+            nn.Conv2d(64, 128, kernel_size=3, padding=1),
+            nn.BatchNorm2d(128),
+            nn.LeakyReLU(0.2),
+            nn.Conv2d(128, 128, kernel_size=3, stride=2, padding=1),
+            nn.BatchNorm2d(128),
+            nn.LeakyReLU(0.2),
+            nn.Conv2d(128, 256, kernel_size=3, padding=1),
+            nn.BatchNorm2d(256),
+            nn.LeakyReLU(0.2),
+            nn.Conv2d(256, 256, kernel_size=3, stride=2, padding=1),
+            nn.BatchNorm2d(256),
+            nn.LeakyReLU(0.2),
+            nn.Conv2d(256, 512, kernel_size=3, padding=1),
+            nn.BatchNorm2d(512),
+            nn.LeakyReLU(0.2),
+            nn.Conv2d(512, 512, kernel_size=3, stride=2, padding=1),
+            nn.BatchNorm2d(512),
+            nn.LeakyReLU(0.2),
+            nn.Conv2d(512, 512, kernel_size=3, stride=2, padding=1),
+            nn.BatchNorm2d(512),
+            nn.LeakyReLU(0.2),
+            nn.Conv2d(512, 1, kernel_size=3, padding=1),
+        )
+    def forward(self, x):
+        return self.net(x)
+if __name__ == '__main__':
+    device = torch.device("cuda:0")
+    # Create model
+    im = torch.rand(1, 3, 640, 640).to(device)
+    model_g = fusion_net().to(device)
+    out_data = model_g(im)

DH-AISP/2/myFFCResblock0.py ADDED Viewed

	@@ -0,0 +1,60 @@

+import numpy as np
+import torch
+import torch.nn as nn
+import torch.nn.functional as F
+from saicinpainting.training.modules.ffc0 import FFCResnetBlock
+from saicinpainting.training.modules.ffc0 import FFC_BN_ACT
+class myFFCResblock(nn.Module):
+    def __init__(self, input_nc, output_nc, n_blocks=2, norm_layer=nn.BatchNorm2d,     #128--->64
+                 padding_type='reflect', activation_layer=nn.ReLU,
+                 resnet_conv_kwargs={},
+                 spatial_transform_layers=None, spatial_transform_kwargs={},
+                 add_out_act=True, max_features=1024, out_ffc=False, out_ffc_kwargs={}):
+        assert (n_blocks >= 0)
+        super().__init__()
+        self.initial = FFC_BN_ACT(input_nc, input_nc, kernel_size=3, padding=1, dilation=1,
+            norm_layer=norm_layer, activation_layer=activation_layer,
+            padding_type=padding_type,
+            **resnet_conv_kwargs)
+        self.ffcresblock = FFCResnetBlock(input_nc, padding_type=padding_type, activation_layer=activation_layer,
+            norm_layer=norm_layer, **resnet_conv_kwargs)
+        self.final = FFC_BN_ACT(input_nc, output_nc, kernel_size=3, padding=1, dilation=1,
+            norm_layer=norm_layer,
+            activation_layer=activation_layer,
+            padding_type=padding_type,
+            **resnet_conv_kwargs)
+    def forward(self, x):
+        x_l, x_g = self.initial(x)
+        x_l, x_g = self.ffcresblock(x_l, x_g)
+        x_l, x_g = self.ffcresblock(x_l, x_g)
+        out_ = torch.cat([x_l, x_g], 1)
+        x_lout, x_gout =self.final(out_)
+        out = torch.cat([x_lout, x_gout], 1)
+        return out

DH-AISP/2/perceptual.py ADDED Viewed

	@@ -0,0 +1,30 @@

+# --- Imports --- #
+import torch
+import torch.nn.functional as F
+# --- Perceptual loss network  --- #
+class LossNetwork(torch.nn.Module):
+    def __init__(self, vgg_model):
+        super(LossNetwork, self).__init__()
+        self.vgg_layers = vgg_model
+        self.layer_name_mapping = {
+            '3': "relu1_2",
+            '8': "relu2_2",
+            '15': "relu3_3"
+        }
+    def output_features(self, x):
+        output = {}
+        for name, module in self.vgg_layers._modules.items():
+            x = module(x)
+            if name in self.layer_name_mapping:
+                output[self.layer_name_mapping[name]] = x
+        return list(output.values())
+    def forward(self, dehaze, gt):
+        loss = []
+        dehaze_features = self.output_features(dehaze)
+        gt_features = self.output_features(gt)
+        for dehaze_feature, gt_feature in zip(dehaze_features, gt_features):
+            loss.append(F.mse_loss(dehaze_feature, gt_feature))
+        return sum(loss)/len(loss)

DH-AISP/2/pytorch_msssim/__init__.py ADDED Viewed

	@@ -0,0 +1,133 @@

+import torch
+import torch.nn.functional as F
+from math import exp
+import numpy as np
+def gaussian(window_size, sigma):
+    gauss = torch.Tensor([exp(-(x - window_size//2)**2/float(2*sigma**2)) for x in range(window_size)])
+    return gauss/gauss.sum()
+def create_window(window_size, channel=1):
+    _1D_window = gaussian(window_size, 1.5).unsqueeze(1)
+    _2D_window = _1D_window.mm(_1D_window.t()).float().unsqueeze(0).unsqueeze(0)
+    window = _2D_window.expand(channel, 1, window_size, window_size).contiguous()
+    return window
+def ssim(img1, img2, window_size=11, window=None, size_average=True, full=False, val_range=None):
+    # Value range can be different from 255. Other common ranges are 1 (sigmoid) and 2 (tanh).
+    if val_range is None:
+        if torch.max(img1) > 128:
+            max_val = 255
+        else:
+            max_val = 1
+        if torch.min(img1) < -0.5:
+            min_val = -1
+        else:
+            min_val = 0
+        L = max_val - min_val
+    else:
+        L = val_range
+    padd = 0
+    (_, channel, height, width) = img1.size()
+    if window is None:
+        real_size = min(window_size, height, width)
+        window = create_window(real_size, channel=channel).to(img1.device)
+    mu1 = F.conv2d(img1, window, padding=padd, groups=channel)
+    mu2 = F.conv2d(img2, window, padding=padd, groups=channel)
+    mu1_sq = mu1.pow(2)
+    mu2_sq = mu2.pow(2)
+    mu1_mu2 = mu1 * mu2
+    sigma1_sq = F.conv2d(img1 * img1, window, padding=padd, groups=channel) - mu1_sq
+    sigma2_sq = F.conv2d(img2 * img2, window, padding=padd, groups=channel) - mu2_sq
+    sigma12 = F.conv2d(img1 * img2, window, padding=padd, groups=channel) - mu1_mu2
+    C1 = (0.01 * L) ** 2
+    C2 = (0.03 * L) ** 2
+    v1 = 2.0 * sigma12 + C2
+    v2 = sigma1_sq + sigma2_sq + C2
+    cs = torch.mean(v1 / v2)  # contrast sensitivity
+    ssim_map = ((2 * mu1_mu2 + C1) * v1) / ((mu1_sq + mu2_sq + C1) * v2)
+    if size_average:
+        ret = ssim_map.mean()
+    else:
+        ret = ssim_map.mean(1).mean(1).mean(1)
+    if full:
+        return ret, cs
+    return ret
+def msssim(img1, img2, window_size=11, size_average=True, val_range=None, normalize=False):
+    device = img1.device
+    weights = torch.FloatTensor([0.0448, 0.2856, 0.3001, 0.2363, 0.1333]).to(device)
+    levels = weights.size()[0]
+    mssim = []
+    mcs = []
+    for _ in range(levels):
+        sim, cs = ssim(img1, img2, window_size=window_size, size_average=size_average, full=True, val_range=val_range)
+        mssim.append(sim)
+        mcs.append(cs)
+        img1 = F.avg_pool2d(img1, (2, 2))
+        img2 = F.avg_pool2d(img2, (2, 2))
+    mssim = torch.stack(mssim)
+    mcs = torch.stack(mcs)
+    # Normalize (to avoid NaNs during training unstable models, not compliant with original definition)
+    if normalize:
+        mssim = (mssim + 1) / 2
+        mcs = (mcs + 1) / 2
+    pow1 = mcs ** weights
+    pow2 = mssim ** weights
+    # From Matlab implementation https://ece.uwaterloo.ca/~z70wang/research/iwssim/
+    output = torch.prod(pow1[:-1] * pow2[-1])
+    return output
+# Classes to re-use window
+class SSIM(torch.nn.Module):
+    def __init__(self, window_size=11, size_average=True, val_range=None):
+        super(SSIM, self).__init__()
+        self.window_size = window_size
+        self.size_average = size_average
+        self.val_range = val_range
+        # Assume 1 channel for SSIM
+        self.channel = 1
+        self.window = create_window(window_size)
+    def forward(self, img1, img2):
+        (_, channel, _, _) = img1.size()
+        if channel == self.channel and self.window.dtype == img1.dtype:
+            window = self.window
+        else:
+            window = create_window(self.window_size, channel).to(img1.device).type(img1.dtype)
+            self.window = window
+            self.channel = channel
+        return ssim(img1, img2, window=window, window_size=self.window_size, size_average=self.size_average)
+class MSSSIM(torch.nn.Module):
+    def __init__(self, window_size=11, size_average=True, channel=3):
+        super(MSSSIM, self).__init__()
+        self.window_size = window_size
+        self.size_average = size_average
+        self.channel = channel
+    def forward(self, img1, img2):
+        # TODO: store window between calls if possible
+        return msssim(img1, img2, window_size=self.window_size, size_average=self.size_average)

DH-AISP/2/pytorch_msssim/__pycache__/__init__.cpython-36.pyc ADDED Viewed

Binary file (3.9 kB). View file

DH-AISP/2/pytorch_msssim/__pycache__/__init__.cpython-37.pyc ADDED Viewed

Binary file (3.88 kB). View file

DH-AISP/2/result_low_light_hdr/checkpoint_gen.pth ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:e5952db983eb66b04c6a39348a0916164d9148ec99c4a3b8a77bf4e240657022
+size 1491472482

DH-AISP/2/saicinpainting/__init__.py ADDED Viewed

File without changes

DH-AISP/2/saicinpainting/__pycache__/__init__.cpython-36.pyc ADDED Viewed

Binary file (168 Bytes). View file

DH-AISP/2/saicinpainting/__pycache__/__init__.cpython-37.pyc ADDED Viewed

Binary file (155 Bytes). View file

DH-AISP/2/saicinpainting/__pycache__/utils.cpython-36.pyc ADDED Viewed

Binary file (6.1 kB). View file

DH-AISP/2/saicinpainting/__pycache__/utils.cpython-37.pyc ADDED Viewed

Binary file (6.08 kB). View file

DH-AISP/2/saicinpainting/evaluation/__init__.py ADDED Viewed

	@@ -0,0 +1,33 @@

+import logging
+import torch
+from saicinpainting.evaluation.evaluator import InpaintingEvaluatorOnline, ssim_fid100_f1, lpips_fid100_f1
+from saicinpainting.evaluation.losses.base_loss import SSIMScore, LPIPSScore, FIDScore
+def make_evaluator(kind='default', ssim=True, lpips=True, fid=True, integral_kind=None, **kwargs):
+    logging.info(f'Make evaluator {kind}')
+    device = "cuda" if torch.cuda.is_available() else "cpu"
+    metrics = {}
+    if ssim:
+        metrics['ssim'] = SSIMScore()
+    if lpips:
+        metrics['lpips'] = LPIPSScore()
+    if fid:
+        metrics['fid'] = FIDScore().to(device)
+    if integral_kind is None:
+        integral_func = None
+    elif integral_kind == 'ssim_fid100_f1':
+        integral_func = ssim_fid100_f1
+    elif integral_kind == 'lpips_fid100_f1':
+        integral_func = lpips_fid100_f1
+    else:
+        raise ValueError(f'Unexpected integral_kind={integral_kind}')
+    if kind == 'default':
+        return InpaintingEvaluatorOnline(scores=metrics,
+                                         integral_func=integral_func,
+                                         integral_title=integral_kind,
+                                         **kwargs)

DH-AISP/2/saicinpainting/evaluation/data.py ADDED Viewed

	@@ -0,0 +1,168 @@

+import glob
+import os
+import cv2
+import PIL.Image as Image
+import numpy as np
+from torch.utils.data import Dataset
+import torch.nn.functional as F
+def load_image(fname, mode='RGB', return_orig=False):
+    img = np.array(Image.open(fname).convert(mode))
+    if img.ndim == 3:
+        img = np.transpose(img, (2, 0, 1))
+    out_img = img.astype('float32') / 255
+    if return_orig:
+        return out_img, img
+    else:
+        return out_img
+def ceil_modulo(x, mod):
+    if x % mod == 0:
+        return x
+    return (x // mod + 1) * mod
+def pad_img_to_modulo(img, mod):
+    channels, height, width = img.shape
+    out_height = ceil_modulo(height, mod)
+    out_width = ceil_modulo(width, mod)
+    return np.pad(img, ((0, 0), (0, out_height - height), (0, out_width - width)), mode='symmetric')
+def pad_tensor_to_modulo(img, mod):
+    batch_size, channels, height, width = img.shape
+    out_height = ceil_modulo(height, mod)
+    out_width = ceil_modulo(width, mod)
+    return F.pad(img, pad=(0, out_width - width, 0, out_height - height), mode='reflect')
+def scale_image(img, factor, interpolation=cv2.INTER_AREA):
+    if img.shape[0] == 1:
+        img = img[0]
+    else:
+        img = np.transpose(img, (1, 2, 0))
+    img = cv2.resize(img, dsize=None, fx=factor, fy=factor, interpolation=interpolation)
+    if img.ndim == 2:
+        img = img[None, ...]
+    else:
+        img = np.transpose(img, (2, 0, 1))
+    return img
+class InpaintingDataset(Dataset):
+    def __init__(self, datadir, img_suffix='.jpg', pad_out_to_modulo=None, scale_factor=None):
+        self.datadir = datadir
+        self.mask_filenames = sorted(list(glob.glob(os.path.join(self.datadir, '**', '*mask*.png'), recursive=True)))
+        self.img_filenames = [fname.rsplit('_mask', 1)[0] + img_suffix for fname in self.mask_filenames]
+        self.pad_out_to_modulo = pad_out_to_modulo
+        self.scale_factor = scale_factor
+    def __len__(self):
+        return len(self.mask_filenames)
+    def __getitem__(self, i):
+        image = load_image(self.img_filenames[i], mode='RGB')
+        mask = load_image(self.mask_filenames[i], mode='L')
+        result = dict(image=image, mask=mask[None, ...])
+        if self.scale_factor is not None:
+            result['image'] = scale_image(result['image'], self.scale_factor)
+            result['mask'] = scale_image(result['mask'], self.scale_factor, interpolation=cv2.INTER_NEAREST)
+        if self.pad_out_to_modulo is not None and self.pad_out_to_modulo > 1:
+            result['unpad_to_size'] = result['image'].shape[1:]
+            result['image'] = pad_img_to_modulo(result['image'], self.pad_out_to_modulo)
+            result['mask'] = pad_img_to_modulo(result['mask'], self.pad_out_to_modulo)
+        return result
+class OurInpaintingDataset(Dataset):
+    def __init__(self, datadir, img_suffix='.jpg', pad_out_to_modulo=None, scale_factor=None):
+        self.datadir = datadir
+        self.mask_filenames = sorted(list(glob.glob(os.path.join(self.datadir, 'mask', '**', '*mask*.png'), recursive=True)))
+        self.img_filenames = [os.path.join(self.datadir, 'img', os.path.basename(fname.rsplit('-', 1)[0].rsplit('_', 1)[0]) + '.png') for fname in self.mask_filenames]
+        self.pad_out_to_modulo = pad_out_to_modulo
+        self.scale_factor = scale_factor
+    def __len__(self):
+        return len(self.mask_filenames)
+    def __getitem__(self, i):
+        result = dict(image=load_image(self.img_filenames[i], mode='RGB'),
+                      mask=load_image(self.mask_filenames[i], mode='L')[None, ...])
+        if self.scale_factor is not None:
+            result['image'] = scale_image(result['image'], self.scale_factor)
+            result['mask'] = scale_image(result['mask'], self.scale_factor)
+        if self.pad_out_to_modulo is not None and self.pad_out_to_modulo > 1:
+            result['image'] = pad_img_to_modulo(result['image'], self.pad_out_to_modulo)
+            result['mask'] = pad_img_to_modulo(result['mask'], self.pad_out_to_modulo)
+        return result
+class PrecomputedInpaintingResultsDataset(InpaintingDataset):
+    def __init__(self, datadir, predictdir, inpainted_suffix='_inpainted.jpg', **kwargs):
+        super().__init__(datadir, **kwargs)
+        if not datadir.endswith('/'):
+            datadir += '/'
+        self.predictdir = predictdir
+        self.pred_filenames = [os.path.join(predictdir, os.path.splitext(fname[len(datadir):])[0] + inpainted_suffix)
+                               for fname in self.mask_filenames]
+    def __getitem__(self, i):
+        result = super().__getitem__(i)
+        result['inpainted'] = load_image(self.pred_filenames[i])
+        if self.pad_out_to_modulo is not None and self.pad_out_to_modulo > 1:
+            result['inpainted'] = pad_img_to_modulo(result['inpainted'], self.pad_out_to_modulo)
+        return result
+class OurPrecomputedInpaintingResultsDataset(OurInpaintingDataset):
+    def __init__(self, datadir, predictdir, inpainted_suffix="png", **kwargs):
+        super().__init__(datadir, **kwargs)
+        if not datadir.endswith('/'):
+            datadir += '/'
+        self.predictdir = predictdir
+        self.pred_filenames = [os.path.join(predictdir, os.path.basename(os.path.splitext(fname)[0]) + f'_inpainted.{inpainted_suffix}')
+                               for fname in self.mask_filenames]
+        # self.pred_filenames = [os.path.join(predictdir, os.path.splitext(fname[len(datadir):])[0] + inpainted_suffix)
+        #                        for fname in self.mask_filenames]
+    def __getitem__(self, i):
+        result = super().__getitem__(i)
+        result['inpainted'] = self.file_loader(self.pred_filenames[i])
+        if self.pad_out_to_modulo is not None and self.pad_out_to_modulo > 1:
+            result['inpainted'] = pad_img_to_modulo(result['inpainted'], self.pad_out_to_modulo)
+        return result
+class InpaintingEvalOnlineDataset(Dataset):
+    def __init__(self, indir, mask_generator, img_suffix='.jpg', pad_out_to_modulo=None, scale_factor=None,  **kwargs):
+        self.indir = indir
+        self.mask_generator = mask_generator
+        self.img_filenames = sorted(list(glob.glob(os.path.join(self.indir, '**', f'*{img_suffix}' ), recursive=True)))
+        self.pad_out_to_modulo = pad_out_to_modulo
+        self.scale_factor = scale_factor
+    def __len__(self):
+        return len(self.img_filenames)
+    def __getitem__(self, i):
+        img, raw_image = load_image(self.img_filenames[i], mode='RGB', return_orig=True)
+        mask = self.mask_generator(img, raw_image=raw_image)
+        result = dict(image=img, mask=mask)
+        if self.scale_factor is not None:
+            result['image'] = scale_image(result['image'], self.scale_factor)
+            result['mask'] = scale_image(result['mask'], self.scale_factor, interpolation=cv2.INTER_NEAREST)
+        if self.pad_out_to_modulo is not None and self.pad_out_to_modulo > 1:
+            result['image'] = pad_img_to_modulo(result['image'], self.pad_out_to_modulo)
+            result['mask'] = pad_img_to_modulo(result['mask'], self.pad_out_to_modulo)
+        return result

DH-AISP/2/saicinpainting/evaluation/evaluator.py ADDED Viewed

	@@ -0,0 +1,220 @@

+import logging
+import math
+from typing import Dict
+import numpy as np
+import torch
+import torch.nn as nn
+import tqdm
+from torch.utils.data import DataLoader
+from saicinpainting.evaluation.utils import move_to_device
+LOGGER = logging.getLogger(__name__)
+class InpaintingEvaluator():
+    def __init__(self, dataset, scores, area_grouping=True, bins=10, batch_size=32, device='cuda',
+                 integral_func=None, integral_title=None, clamp_image_range=None):
+        """
+        :param dataset: torch.utils.data.Dataset which contains images and masks
+        :param scores: dict {score_name: EvaluatorScore object}
+        :param area_grouping: in addition to the overall scores, allows to compute score for the groups of samples
+            which are defined by share of area occluded by mask
+        :param bins: number of groups, partition is generated by np.linspace(0., 1., bins + 1)
+        :param batch_size: batch_size for the dataloader
+        :param device: device to use
+        """
+        self.scores = scores
+        self.dataset = dataset
+        self.area_grouping = area_grouping
+        self.bins = bins
+        self.device = torch.device(device)
+        self.dataloader = DataLoader(self.dataset, shuffle=False, batch_size=batch_size)
+        self.integral_func = integral_func
+        self.integral_title = integral_title
+        self.clamp_image_range = clamp_image_range
+    def _get_bin_edges(self):
+        bin_edges = np.linspace(0, 1, self.bins + 1)
+        num_digits = max(0, math.ceil(math.log10(self.bins)) - 1)
+        interval_names = []
+        for idx_bin in range(self.bins):
+            start_percent, end_percent = round(100 * bin_edges[idx_bin], num_digits), \
+                                         round(100 * bin_edges[idx_bin + 1], num_digits)
+            start_percent = '{:.{n}f}'.format(start_percent, n=num_digits)
+            end_percent = '{:.{n}f}'.format(end_percent, n=num_digits)
+            interval_names.append("{0}-{1}%".format(start_percent, end_percent))
+        groups = []
+        for batch in self.dataloader:
+            mask = batch['mask']
+            batch_size = mask.shape[0]
+            area = mask.to(self.device).reshape(batch_size, -1).mean(dim=-1)
+            bin_indices = np.searchsorted(bin_edges, area.detach().cpu().numpy(), side='right') - 1
+            # corner case: when area is equal to 1, bin_indices should return bins - 1, not bins for that element
+            bin_indices[bin_indices == self.bins] = self.bins - 1
+            groups.append(bin_indices)
+        groups = np.hstack(groups)
+        return groups, interval_names
+    def evaluate(self, model=None):
+        """
+        :param model: callable with signature (image_batch, mask_batch); should return inpainted_batch
+        :return: dict with (score_name, group_type) as keys, where group_type can be either 'overall' or
+            name of the particular group arranged by area of mask (e.g. '10-20%')
+            and score statistics for the group as values.
+        """
+        results = dict()
+        if self.area_grouping:
+            groups, interval_names = self._get_bin_edges()
+        else:
+            groups = None
+        for score_name, score in tqdm.auto.tqdm(self.scores.items(), desc='scores'):
+            score.to(self.device)
+            with torch.no_grad():
+                score.reset()
+                for batch in tqdm.auto.tqdm(self.dataloader, desc=score_name, leave=False):
+                    batch = move_to_device(batch, self.device)
+                    image_batch, mask_batch = batch['image'], batch['mask']
+                    if self.clamp_image_range is not None:
+                        image_batch = torch.clamp(image_batch,
+                                                  min=self.clamp_image_range[0],
+                                                  max=self.clamp_image_range[1])
+                    if model is None:
+                        assert 'inpainted' in batch, \
+                            'Model is None, so we expected precomputed inpainting results at key "inpainted"'
+                        inpainted_batch = batch['inpainted']
+                    else:
+                        inpainted_batch = model(image_batch, mask_batch)
+                    score(inpainted_batch, image_batch, mask_batch)
+                total_results, group_results = score.get_value(groups=groups)
+            results[(score_name, 'total')] = total_results
+            if groups is not None:
+                for group_index, group_values in group_results.items():
+                    group_name = interval_names[group_index]
+                    results[(score_name, group_name)] = group_values
+        if self.integral_func is not None:
+            results[(self.integral_title, 'total')] = dict(mean=self.integral_func(results))
+        return results
+def ssim_fid100_f1(metrics, fid_scale=100):
+    ssim = metrics[('ssim', 'total')]['mean']
+    fid = metrics[('fid', 'total')]['mean']
+    fid_rel = max(0, fid_scale - fid) / fid_scale
+    f1 = 2 * ssim * fid_rel / (ssim + fid_rel + 1e-3)
+    return f1
+def lpips_fid100_f1(metrics, fid_scale=100):
+    neg_lpips = 1 - metrics[('lpips', 'total')]['mean']  # invert, so bigger is better
+    fid = metrics[('fid', 'total')]['mean']
+    fid_rel = max(0, fid_scale - fid) / fid_scale
+    f1 = 2 * neg_lpips * fid_rel / (neg_lpips + fid_rel + 1e-3)
+    return f1
+class InpaintingEvaluatorOnline(nn.Module):
+    def __init__(self, scores, bins=10, image_key='image', inpainted_key='inpainted',
+                 integral_func=None, integral_title=None, clamp_image_range=None):
+        """
+        :param scores: dict {score_name: EvaluatorScore object}
+        :param bins: number of groups, partition is generated by np.linspace(0., 1., bins + 1)
+        :param device: device to use
+        """
+        super().__init__()
+        LOGGER.info(f'{type(self)} init called')
+        self.scores = nn.ModuleDict(scores)
+        self.image_key = image_key
+        self.inpainted_key = inpainted_key
+        self.bins_num = bins
+        self.bin_edges = np.linspace(0, 1, self.bins_num + 1)
+        num_digits = max(0, math.ceil(math.log10(self.bins_num)) - 1)
+        self.interval_names = []
+        for idx_bin in range(self.bins_num):
+            start_percent, end_percent = round(100 * self.bin_edges[idx_bin], num_digits), \
+                                         round(100 * self.bin_edges[idx_bin + 1], num_digits)
+            start_percent = '{:.{n}f}'.format(start_percent, n=num_digits)
+            end_percent = '{:.{n}f}'.format(end_percent, n=num_digits)
+            self.interval_names.append("{0}-{1}%".format(start_percent, end_percent))
+        self.groups = []
+        self.integral_func = integral_func
+        self.integral_title = integral_title
+        self.clamp_image_range = clamp_image_range
+        LOGGER.info(f'{type(self)} init done')
+    def _get_bins(self, mask_batch):
+        batch_size = mask_batch.shape[0]
+        area = mask_batch.view(batch_size, -1).mean(dim=-1).detach().cpu().numpy()
+        bin_indices = np.clip(np.searchsorted(self.bin_edges, area) - 1, 0, self.bins_num - 1)
+        return bin_indices
+    def forward(self, batch: Dict[str, torch.Tensor]):
+        """
+        Calculate and accumulate metrics for batch. To finalize evaluation and obtain final metrics, call evaluation_end
+        :param batch: batch dict with mandatory fields mask, image, inpainted (can be overriden by self.inpainted_key)
+        """
+        result = {}
+        with torch.no_grad():
+            image_batch, mask_batch, inpainted_batch = batch[self.image_key], batch['mask'], batch[self.inpainted_key]
+            if self.clamp_image_range is not None:
+                image_batch = torch.clamp(image_batch,
+                                          min=self.clamp_image_range[0],
+                                          max=self.clamp_image_range[1])
+            self.groups.extend(self._get_bins(mask_batch))
+            for score_name, score in self.scores.items():
+                result[score_name] = score(inpainted_batch, image_batch, mask_batch)
+        return result
+    def process_batch(self, batch: Dict[str, torch.Tensor]):
+        return self(batch)
+    def evaluation_end(self, states=None):
+        """:return: dict with (score_name, group_type) as keys, where group_type can be either 'overall' or
+            name of the particular group arranged by area of mask (e.g. '10-20%')
+            and score statistics for the group as values.
+        """
+        LOGGER.info(f'{type(self)}: evaluation_end called')
+        self.groups = np.array(self.groups)
+        results = {}
+        for score_name, score in self.scores.items():
+            LOGGER.info(f'Getting value of {score_name}')
+            cur_states = [s[score_name] for s in states] if states is not None else None
+            total_results, group_results = score.get_value(groups=self.groups, states=cur_states)
+            LOGGER.info(f'Getting value of {score_name} done')
+            results[(score_name, 'total')] = total_results
+            for group_index, group_values in group_results.items():
+                group_name = self.interval_names[group_index]
+                results[(score_name, group_name)] = group_values
+        if self.integral_func is not None:
+            results[(self.integral_title, 'total')] = dict(mean=self.integral_func(results))
+        LOGGER.info(f'{type(self)}: reset scores')
+        self.groups = []
+        for sc in self.scores.values():
+            sc.reset()
+        LOGGER.info(f'{type(self)}: reset scores done')
+        LOGGER.info(f'{type(self)}: evaluation_end done')
+        return results

DH-AISP/2/saicinpainting/evaluation/losses/__init__.py ADDED Viewed

File without changes

DH-AISP/2/saicinpainting/evaluation/losses/base_loss.py ADDED Viewed

	@@ -0,0 +1,528 @@

+import logging
+from abc import abstractmethod, ABC
+import numpy as np
+import sklearn
+import sklearn.svm
+import torch
+import torch.nn as nn
+import torch.nn.functional as F
+from joblib import Parallel, delayed
+from scipy import linalg
+from models.ade20k import SegmentationModule, NUM_CLASS, segm_options
+from .fid.inception import InceptionV3
+from .lpips import PerceptualLoss
+from .ssim import SSIM
+LOGGER = logging.getLogger(__name__)
+def get_groupings(groups):
+    """
+    :param groups: group numbers for respective elements
+    :return: dict of kind {group_idx: indices of the corresponding group elements}
+    """
+    label_groups, count_groups = np.unique(groups, return_counts=True)
+    indices = np.argsort(groups)
+    grouping = dict()
+    cur_start = 0
+    for label, count in zip(label_groups, count_groups):
+        cur_end = cur_start + count
+        cur_indices = indices[cur_start:cur_end]
+        grouping[label] = cur_indices
+        cur_start = cur_end
+    return grouping
+class EvaluatorScore(nn.Module):
+    @abstractmethod
+    def forward(self, pred_batch, target_batch, mask):
+        pass
+    @abstractmethod
+    def get_value(self, groups=None, states=None):
+        pass
+    @abstractmethod
+    def reset(self):
+        pass
+class PairwiseScore(EvaluatorScore, ABC):
+    def __init__(self):
+        super().__init__()
+        self.individual_values = None
+    def get_value(self, groups=None, states=None):
+        """
+        :param groups:
+        :return:
+            total_results: dict of kind {'mean': score mean, 'std': score std}
+            group_results: None, if groups is None;
+                else dict {group_idx: {'mean': score mean among group, 'std': score std among group}}
+        """
+        individual_values = torch.cat(states, dim=-1).reshape(-1).cpu().numpy() if states is not None \
+            else self.individual_values
+        total_results = {
+            'mean': individual_values.mean(),
+            'std': individual_values.std()
+        }
+        if groups is None:
+            return total_results, None
+        group_results = dict()
+        grouping = get_groupings(groups)
+        for label, index in grouping.items():
+            group_scores = individual_values[index]
+            group_results[label] = {
+                'mean': group_scores.mean(),
+                'std': group_scores.std()
+            }
+        return total_results, group_results
+    def reset(self):
+        self.individual_values = []
+class SSIMScore(PairwiseScore):
+    def __init__(self, window_size=11):
+        super().__init__()
+        self.score = SSIM(window_size=window_size, size_average=False).eval()
+        self.reset()
+    def forward(self, pred_batch, target_batch, mask=None):
+        batch_values = self.score(pred_batch, target_batch)
+        self.individual_values = np.hstack([
+            self.individual_values, batch_values.detach().cpu().numpy()
+        ])
+        return batch_values
+class LPIPSScore(PairwiseScore):
+    def __init__(self, model='net-lin', net='vgg', model_path=None, use_gpu=True):
+        super().__init__()
+        self.score = PerceptualLoss(model=model, net=net, model_path=model_path,
+                                    use_gpu=use_gpu, spatial=False).eval()
+        self.reset()
+    def forward(self, pred_batch, target_batch, mask=None):
+        batch_values = self.score(pred_batch, target_batch).flatten()
+        self.individual_values = np.hstack([
+            self.individual_values, batch_values.detach().cpu().numpy()
+        ])
+        return batch_values
+def fid_calculate_activation_statistics(act):
+    mu = np.mean(act, axis=0)
+    sigma = np.cov(act, rowvar=False)
+    return mu, sigma
+def calculate_frechet_distance(activations_pred, activations_target, eps=1e-6):
+    mu1, sigma1 = fid_calculate_activation_statistics(activations_pred)
+    mu2, sigma2 = fid_calculate_activation_statistics(activations_target)
+    diff = mu1 - mu2
+    # Product might be almost singular
+    covmean, _ = linalg.sqrtm(sigma1.dot(sigma2), disp=False)
+    if not np.isfinite(covmean).all():
+        msg = ('fid calculation produces singular product; '
+               'adding %s to diagonal of cov estimates') % eps
+        LOGGER.warning(msg)
+        offset = np.eye(sigma1.shape[0]) * eps
+        covmean = linalg.sqrtm((sigma1 + offset).dot(sigma2 + offset))
+    # Numerical error might give slight imaginary component
+    if np.iscomplexobj(covmean):
+        # if not np.allclose(np.diagonal(covmean).imag, 0, atol=1e-3):
+        if not np.allclose(np.diagonal(covmean).imag, 0, atol=1e-2):
+            m = np.max(np.abs(covmean.imag))
+            raise ValueError('Imaginary component {}'.format(m))
+        covmean = covmean.real
+    tr_covmean = np.trace(covmean)
+    return (diff.dot(diff) + np.trace(sigma1) +
+            np.trace(sigma2) - 2 * tr_covmean)
+class FIDScore(EvaluatorScore):
+    def __init__(self, dims=2048, eps=1e-6):
+        LOGGER.info("FIDscore init called")
+        super().__init__()
+        if getattr(FIDScore, '_MODEL', None) is None:
+            block_idx = InceptionV3.BLOCK_INDEX_BY_DIM[dims]
+            FIDScore._MODEL = InceptionV3([block_idx]).eval()
+        self.model = FIDScore._MODEL
+        self.eps = eps
+        self.reset()
+        LOGGER.info("FIDscore init done")
+    def forward(self, pred_batch, target_batch, mask=None):
+        activations_pred = self._get_activations(pred_batch)
+        activations_target = self._get_activations(target_batch)
+        self.activations_pred.append(activations_pred.detach().cpu())
+        self.activations_target.append(activations_target.detach().cpu())
+        return activations_pred, activations_target
+    def get_value(self, groups=None, states=None):
+        LOGGER.info("FIDscore get_value called")
+        activations_pred, activations_target = zip(*states) if states is not None \
+            else (self.activations_pred, self.activations_target)
+        activations_pred = torch.cat(activations_pred).cpu().numpy()
+        activations_target = torch.cat(activations_target).cpu().numpy()
+        total_distance = calculate_frechet_distance(activations_pred, activations_target, eps=self.eps)
+        total_results = dict(mean=total_distance)
+        if groups is None:
+            group_results = None
+        else:
+            group_results = dict()
+            grouping = get_groupings(groups)
+            for label, index in grouping.items():
+                if len(index) > 1:
+                    group_distance = calculate_frechet_distance(activations_pred[index], activations_target[index],
+                                                                eps=self.eps)
+                    group_results[label] = dict(mean=group_distance)
+                else:
+                    group_results[label] = dict(mean=float('nan'))
+        self.reset()
+        LOGGER.info("FIDscore get_value done")
+        return total_results, group_results
+    def reset(self):
+        self.activations_pred = []
+        self.activations_target = []
+    def _get_activations(self, batch):
+        activations = self.model(batch)[0]
+        if activations.shape[2] != 1 or activations.shape[3] != 1:
+            assert False, \
+                'We should not have got here, because Inception always scales inputs to 299x299'
+            # activations = F.adaptive_avg_pool2d(activations, output_size=(1, 1))
+        activations = activations.squeeze(-1).squeeze(-1)
+        return activations
+class SegmentationAwareScore(EvaluatorScore):
+    def __init__(self, weights_path):
+        super().__init__()
+        self.segm_network = SegmentationModule(weights_path=weights_path, use_default_normalization=True).eval()
+        self.target_class_freq_by_image_total = []
+        self.target_class_freq_by_image_mask = []
+        self.pred_class_freq_by_image_mask = []
+    def forward(self, pred_batch, target_batch, mask):
+        pred_segm_flat = self.segm_network.predict(pred_batch)[0].view(pred_batch.shape[0], -1).long().detach().cpu().numpy()
+        target_segm_flat = self.segm_network.predict(target_batch)[0].view(pred_batch.shape[0], -1).long().detach().cpu().numpy()
+        mask_flat = (mask.view(mask.shape[0], -1) > 0.5).detach().cpu().numpy()
+        batch_target_class_freq_total = []
+        batch_target_class_freq_mask = []
+        batch_pred_class_freq_mask = []
+        for cur_pred_segm, cur_target_segm, cur_mask in zip(pred_segm_flat, target_segm_flat, mask_flat):
+            cur_target_class_freq_total = np.bincount(cur_target_segm, minlength=NUM_CLASS)[None, ...]
+            cur_target_class_freq_mask = np.bincount(cur_target_segm[cur_mask], minlength=NUM_CLASS)[None, ...]
+            cur_pred_class_freq_mask = np.bincount(cur_pred_segm[cur_mask], minlength=NUM_CLASS)[None, ...]
+            self.target_class_freq_by_image_total.append(cur_target_class_freq_total)
+            self.target_class_freq_by_image_mask.append(cur_target_class_freq_mask)
+            self.pred_class_freq_by_image_mask.append(cur_pred_class_freq_mask)
+            batch_target_class_freq_total.append(cur_target_class_freq_total)
+            batch_target_class_freq_mask.append(cur_target_class_freq_mask)
+            batch_pred_class_freq_mask.append(cur_pred_class_freq_mask)
+        batch_target_class_freq_total = np.concatenate(batch_target_class_freq_total, axis=0)
+        batch_target_class_freq_mask = np.concatenate(batch_target_class_freq_mask, axis=0)
+        batch_pred_class_freq_mask = np.concatenate(batch_pred_class_freq_mask, axis=0)
+        return batch_target_class_freq_total, batch_target_class_freq_mask, batch_pred_class_freq_mask
+    def reset(self):
+        super().reset()
+        self.target_class_freq_by_image_total = []
+        self.target_class_freq_by_image_mask = []
+        self.pred_class_freq_by_image_mask = []
+def distribute_values_to_classes(target_class_freq_by_image_mask, values, idx2name):
+    assert target_class_freq_by_image_mask.ndim == 2 and target_class_freq_by_image_mask.shape[0] == values.shape[0]
+    total_class_freq = target_class_freq_by_image_mask.sum(0)
+    distr_values = (target_class_freq_by_image_mask * values[..., None]).sum(0)
+    result = distr_values / (total_class_freq + 1e-3)
+    return {idx2name[i]: val for i, val in enumerate(result) if total_class_freq[i] > 0}
+def get_segmentation_idx2name():
+    return {i - 1: name for i, name in segm_options['classes'].set_index('Idx', drop=True)['Name'].to_dict().items()}
+class SegmentationAwarePairwiseScore(SegmentationAwareScore):
+    def __init__(self, *args, **kwargs):
+        super().__init__(*args, **kwargs)
+        self.individual_values = []
+        self.segm_idx2name = get_segmentation_idx2name()
+    def forward(self, pred_batch, target_batch, mask):
+        cur_class_stats = super().forward(pred_batch, target_batch, mask)
+        score_values = self.calc_score(pred_batch, target_batch, mask)
+        self.individual_values.append(score_values)
+        return cur_class_stats + (score_values,)
+    @abstractmethod
+    def calc_score(self, pred_batch, target_batch, mask):
+        raise NotImplementedError()
+    def get_value(self, groups=None, states=None):
+        """
+        :param groups:
+        :return:
+            total_results: dict of kind {'mean': score mean, 'std': score std}
+            group_results: None, if groups is None;
+                else dict {group_idx: {'mean': score mean among group, 'std': score std among group}}
+        """
+        if states is not None:
+            (target_class_freq_by_image_total,
+             target_class_freq_by_image_mask,
+             pred_class_freq_by_image_mask,
+             individual_values) = states
+        else:
+            target_class_freq_by_image_total = self.target_class_freq_by_image_total
+            target_class_freq_by_image_mask = self.target_class_freq_by_image_mask
+            pred_class_freq_by_image_mask = self.pred_class_freq_by_image_mask
+            individual_values = self.individual_values
+        target_class_freq_by_image_total = np.concatenate(target_class_freq_by_image_total, axis=0)
+        target_class_freq_by_image_mask = np.concatenate(target_class_freq_by_image_mask, axis=0)
+        pred_class_freq_by_image_mask = np.concatenate(pred_class_freq_by_image_mask, axis=0)
+        individual_values = np.concatenate(individual_values, axis=0)
+        total_results = {
+            'mean': individual_values.mean(),
+            'std': individual_values.std(),
+            **distribute_values_to_classes(target_class_freq_by_image_mask, individual_values, self.segm_idx2name)
+        }
+        if groups is None:
+            return total_results, None
+        group_results = dict()
+        grouping = get_groupings(groups)
+        for label, index in grouping.items():
+            group_class_freq = target_class_freq_by_image_mask[index]
+            group_scores = individual_values[index]
+            group_results[label] = {
+                'mean': group_scores.mean(),
+                'std': group_scores.std(),
+                ** distribute_values_to_classes(group_class_freq, group_scores, self.segm_idx2name)
+            }
+        return total_results, group_results
+    def reset(self):
+        super().reset()
+        self.individual_values = []
+class SegmentationClassStats(SegmentationAwarePairwiseScore):
+    def calc_score(self, pred_batch, target_batch, mask):
+        return 0
+    def get_value(self, groups=None, states=None):
+        """
+        :param groups:
+        :return:
+            total_results: dict of kind {'mean': score mean, 'std': score std}
+            group_results: None, if groups is None;
+                else dict {group_idx: {'mean': score mean among group, 'std': score std among group}}
+        """
+        if states is not None:
+            (target_class_freq_by_image_total,
+             target_class_freq_by_image_mask,
+             pred_class_freq_by_image_mask,
+             _) = states
+        else:
+            target_class_freq_by_image_total = self.target_class_freq_by_image_total
+            target_class_freq_by_image_mask = self.target_class_freq_by_image_mask
+            pred_class_freq_by_image_mask = self.pred_class_freq_by_image_mask
+        target_class_freq_by_image_total = np.concatenate(target_class_freq_by_image_total, axis=0)
+        target_class_freq_by_image_mask = np.concatenate(target_class_freq_by_image_mask, axis=0)
+        pred_class_freq_by_image_mask = np.concatenate(pred_class_freq_by_image_mask, axis=0)
+        target_class_freq_by_image_total_marginal = target_class_freq_by_image_total.sum(0).astype('float32')
+        target_class_freq_by_image_total_marginal /= target_class_freq_by_image_total_marginal.sum()
+        target_class_freq_by_image_mask_marginal = target_class_freq_by_image_mask.sum(0).astype('float32')
+        target_class_freq_by_image_mask_marginal /= target_class_freq_by_image_mask_marginal.sum()
+        pred_class_freq_diff = (pred_class_freq_by_image_mask - target_class_freq_by_image_mask).sum(0) / (target_class_freq_by_image_mask.sum(0) + 1e-3)
+        total_results = dict()
+        total_results.update({f'total_freq/{self.segm_idx2name[i]}': v
+                              for i, v in enumerate(target_class_freq_by_image_total_marginal)
+                              if v > 0})
+        total_results.update({f'mask_freq/{self.segm_idx2name[i]}': v
+                              for i, v in enumerate(target_class_freq_by_image_mask_marginal)
+                              if v > 0})
+        total_results.update({f'mask_freq_diff/{self.segm_idx2name[i]}': v
+                              for i, v in enumerate(pred_class_freq_diff)
+                              if target_class_freq_by_image_total_marginal[i] > 0})
+        if groups is None:
+            return total_results, None
+        group_results = dict()
+        grouping = get_groupings(groups)
+        for label, index in grouping.items():
+            group_target_class_freq_by_image_total = target_class_freq_by_image_total[index]
+            group_target_class_freq_by_image_mask = target_class_freq_by_image_mask[index]
+            group_pred_class_freq_by_image_mask = pred_class_freq_by_image_mask[index]
+            group_target_class_freq_by_image_total_marginal = group_target_class_freq_by_image_total.sum(0).astype('float32')
+            group_target_class_freq_by_image_total_marginal /= group_target_class_freq_by_image_total_marginal.sum()
+            group_target_class_freq_by_image_mask_marginal = group_target_class_freq_by_image_mask.sum(0).astype('float32')
+            group_target_class_freq_by_image_mask_marginal /= group_target_class_freq_by_image_mask_marginal.sum()
+            group_pred_class_freq_diff = (group_pred_class_freq_by_image_mask - group_target_class_freq_by_image_mask).sum(0) / (
+                    group_target_class_freq_by_image_mask.sum(0) + 1e-3)
+            cur_group_results = dict()
+            cur_group_results.update({f'total_freq/{self.segm_idx2name[i]}': v
+                                      for i, v in enumerate(group_target_class_freq_by_image_total_marginal)
+                                      if v > 0})
+            cur_group_results.update({f'mask_freq/{self.segm_idx2name[i]}': v
+                                      for i, v in enumerate(group_target_class_freq_by_image_mask_marginal)
+                                      if v > 0})
+            cur_group_results.update({f'mask_freq_diff/{self.segm_idx2name[i]}': v
+                                      for i, v in enumerate(group_pred_class_freq_diff)
+                                      if group_target_class_freq_by_image_total_marginal[i] > 0})
+            group_results[label] = cur_group_results
+        return total_results, group_results
+class SegmentationAwareSSIM(SegmentationAwarePairwiseScore):
+    def __init__(self, *args, window_size=11, **kwargs):
+        super().__init__(*args, **kwargs)
+        self.score_impl = SSIM(window_size=window_size, size_average=False).eval()
+    def calc_score(self, pred_batch, target_batch, mask):
+        return self.score_impl(pred_batch, target_batch).detach().cpu().numpy()
+class SegmentationAwareLPIPS(SegmentationAwarePairwiseScore):
+    def __init__(self, *args, model='net-lin', net='vgg', model_path=None, use_gpu=True,  **kwargs):
+        super().__init__(*args, **kwargs)
+        self.score_impl = PerceptualLoss(model=model, net=net, model_path=model_path,
+                                         use_gpu=use_gpu, spatial=False).eval()
+    def calc_score(self, pred_batch, target_batch, mask):
+        return self.score_impl(pred_batch, target_batch).flatten().detach().cpu().numpy()
+def calculade_fid_no_img(img_i, activations_pred, activations_target, eps=1e-6):
+    activations_pred = activations_pred.copy()
+    activations_pred[img_i] = activations_target[img_i]
+    return calculate_frechet_distance(activations_pred, activations_target, eps=eps)
+class SegmentationAwareFID(SegmentationAwarePairwiseScore):
+    def __init__(self, *args, dims=2048, eps=1e-6, n_jobs=-1, **kwargs):
+        super().__init__(*args, **kwargs)
+        if getattr(FIDScore, '_MODEL', None) is None:
+            block_idx = InceptionV3.BLOCK_INDEX_BY_DIM[dims]
+            FIDScore._MODEL = InceptionV3([block_idx]).eval()
+        self.model = FIDScore._MODEL
+        self.eps = eps
+        self.n_jobs = n_jobs
+    def calc_score(self, pred_batch, target_batch, mask):
+        activations_pred = self._get_activations(pred_batch)
+        activations_target = self._get_activations(target_batch)
+        return activations_pred, activations_target
+    def get_value(self, groups=None, states=None):
+        """
+        :param groups:
+        :return:
+            total_results: dict of kind {'mean': score mean, 'std': score std}
+            group_results: None, if groups is None;
+                else dict {group_idx: {'mean': score mean among group, 'std': score std among group}}
+        """
+        if states is not None:
+            (target_class_freq_by_image_total,
+             target_class_freq_by_image_mask,
+             pred_class_freq_by_image_mask,
+             activation_pairs) = states
+        else:
+            target_class_freq_by_image_total = self.target_class_freq_by_image_total
+            target_class_freq_by_image_mask = self.target_class_freq_by_image_mask
+            pred_class_freq_by_image_mask = self.pred_class_freq_by_image_mask
+            activation_pairs = self.individual_values
+        target_class_freq_by_image_total = np.concatenate(target_class_freq_by_image_total, axis=0)
+        target_class_freq_by_image_mask = np.concatenate(target_class_freq_by_image_mask, axis=0)
+        pred_class_freq_by_image_mask = np.concatenate(pred_class_freq_by_image_mask, axis=0)
+        activations_pred, activations_target = zip(*activation_pairs)
+        activations_pred = np.concatenate(activations_pred, axis=0)
+        activations_target = np.concatenate(activations_target, axis=0)
+        total_results = {
+            'mean': calculate_frechet_distance(activations_pred, activations_target, eps=self.eps),
+            'std': 0,
+            **self.distribute_fid_to_classes(target_class_freq_by_image_mask, activations_pred, activations_target)
+        }
+        if groups is None:
+            return total_results, None
+        group_results = dict()
+        grouping = get_groupings(groups)
+        for label, index in grouping.items():
+            if len(index) > 1:
+                group_activations_pred = activations_pred[index]
+                group_activations_target = activations_target[index]
+                group_class_freq = target_class_freq_by_image_mask[index]
+                group_results[label] = {
+                    'mean': calculate_frechet_distance(group_activations_pred, group_activations_target, eps=self.eps),
+                    'std': 0,
+                    **self.distribute_fid_to_classes(group_class_freq,
+                                                     group_activations_pred,
+                                                     group_activations_target)
+                }
+            else:
+                group_results[label] = dict(mean=float('nan'), std=0)
+        return total_results, group_results
+    def distribute_fid_to_classes(self, class_freq, activations_pred, activations_target):
+        real_fid = calculate_frechet_distance(activations_pred, activations_target, eps=self.eps)
+        fid_no_images = Parallel(n_jobs=self.n_jobs)(
+            delayed(calculade_fid_no_img)(img_i, activations_pred, activations_target, eps=self.eps)
+            for img_i in range(activations_pred.shape[0])
+        )
+        errors = real_fid - fid_no_images
+        return distribute_values_to_classes(class_freq, errors, self.segm_idx2name)
+    def _get_activations(self, batch):
+        activations = self.model(batch)[0]
+        if activations.shape[2] != 1 or activations.shape[3] != 1:
+            activations = F.adaptive_avg_pool2d(activations, output_size=(1, 1))
+        activations = activations.squeeze(-1).squeeze(-1).detach().cpu().numpy()
+        return activations

DH-AISP/2/saicinpainting/evaluation/losses/fid/__init__.py ADDED Viewed

File without changes

DH-AISP/2/saicinpainting/evaluation/losses/fid/fid_score.py ADDED Viewed

	@@ -0,0 +1,328 @@

+#!/usr/bin/env python3
+"""Calculates the Frechet Inception Distance (FID) to evalulate GANs
+The FID metric calculates the distance between two distributions of images.
+Typically, we have summary statistics (mean & covariance matrix) of one
+of these distributions, while the 2nd distribution is given by a GAN.
+When run as a stand-alone program, it compares the distribution of
+images that are stored as PNG/JPEG at a specified location with a
+distribution given by summary statistics (in pickle format).
+The FID is calculated by assuming that X_1 and X_2 are the activations of
+the pool_3 layer of the inception net for generated samples and real world
+samples respectively.
+See --help to see further details.
+Code apapted from https://github.com/bioinf-jku/TTUR to use PyTorch instead
+of Tensorflow
+Copyright 2018 Institute of Bioinformatics, JKU Linz
+Licensed under the Apache License, Version 2.0 (the "License");
+you may not use this file except in compliance with the License.
+You may obtain a copy of the License at
+   http://www.apache.org/licenses/LICENSE-2.0
+Unless required by applicable law or agreed to in writing, software
+distributed under the License is distributed on an "AS IS" BASIS,
+WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+See the License for the specific language governing permissions and
+limitations under the License.
+"""
+import os
+import pathlib
+from argparse import ArgumentDefaultsHelpFormatter, ArgumentParser
+import numpy as np
+import torch
+# from scipy.misc import imread
+from imageio import imread
+from PIL import Image, JpegImagePlugin
+from scipy import linalg
+from torch.nn.functional import adaptive_avg_pool2d
+from torchvision.transforms import CenterCrop, Compose, Resize, ToTensor
+try:
+    from tqdm import tqdm
+except ImportError:
+    # If not tqdm is not available, provide a mock version of it
+    def tqdm(x): return x
+try:
+    from .inception import InceptionV3
+except ModuleNotFoundError:
+    from inception import InceptionV3
+parser = ArgumentParser(formatter_class=ArgumentDefaultsHelpFormatter)
+parser.add_argument('path', type=str, nargs=2,
+                    help=('Path to the generated images or '
+                          'to .npz statistic files'))
+parser.add_argument('--batch-size', type=int, default=50,
+                    help='Batch size to use')
+parser.add_argument('--dims', type=int, default=2048,
+                    choices=list(InceptionV3.BLOCK_INDEX_BY_DIM),
+                    help=('Dimensionality of Inception features to use. '
+                          'By default, uses pool3 features'))
+parser.add_argument('-c', '--gpu', default='', type=str,
+                    help='GPU to use (leave blank for CPU only)')
+parser.add_argument('--resize', default=256)
+transform = Compose([Resize(256), CenterCrop(256), ToTensor()])
+def get_activations(files, model, batch_size=50, dims=2048,
+                    cuda=False, verbose=False, keep_size=False):
+    """Calculates the activations of the pool_3 layer for all images.
+    Params:
+    -- files       : List of image files paths
+    -- model       : Instance of inception model
+    -- batch_size  : Batch size of images for the model to process at once.
+                     Make sure that the number of samples is a multiple of
+                     the batch size, otherwise some samples are ignored. This
+                     behavior is retained to match the original FID score
+                     implementation.
+    -- dims        : Dimensionality of features returned by Inception
+    -- cuda        : If set to True, use GPU
+    -- verbose     : If set to True and parameter out_step is given, the number
+                     of calculated batches is reported.
+    Returns:
+    -- A numpy array of dimension (num images, dims) that contains the
+       activations of the given tensor when feeding inception with the
+       query tensor.
+    """
+    model.eval()
+    if len(files) % batch_size != 0:
+        print(('Warning: number of images is not a multiple of the '
+               'batch size. Some samples are going to be ignored.'))
+    if batch_size > len(files):
+        print(('Warning: batch size is bigger than the data size. '
+               'Setting batch size to data size'))
+        batch_size = len(files)
+    n_batches = len(files) // batch_size
+    n_used_imgs = n_batches * batch_size
+    pred_arr = np.empty((n_used_imgs, dims))
+    for i in tqdm(range(n_batches)):
+        if verbose:
+            print('\rPropagating batch %d/%d' % (i + 1, n_batches),
+                  end='', flush=True)
+        start = i * batch_size
+        end = start + batch_size
+        # # Official code goes below
+        # images = np.array([imread(str(f)).astype(np.float32)
+        #                    for f in files[start:end]])
+        # # Reshape to (n_images, 3, height, width)
+        # images = images.transpose((0, 3, 1, 2))
+        # images /= 255
+        # batch = torch.from_numpy(images).type(torch.FloatTensor)
+        # #
+        t = transform if not keep_size else ToTensor()
+        if isinstance(files[0], pathlib.PosixPath):
+            images = [t(Image.open(str(f))) for f in files[start:end]]
+        elif isinstance(files[0], Image.Image):
+            images = [t(f) for f in files[start:end]]
+        else:
+            raise ValueError(f"Unknown data type for image: {type(files[0])}")
+        batch = torch.stack(images)
+        if cuda:
+            batch = batch.cuda()
+        pred = model(batch)[0]
+        # If model output is not scalar, apply global spatial average pooling.
+        # This happens if you choose a dimensionality not equal 2048.
+        if pred.shape[2] != 1 or pred.shape[3] != 1:
+            pred = adaptive_avg_pool2d(pred, output_size=(1, 1))
+        pred_arr[start:end] = pred.cpu().data.numpy().reshape(batch_size, -1)
+    if verbose:
+        print(' done')
+    return pred_arr
+def calculate_frechet_distance(mu1, sigma1, mu2, sigma2, eps=1e-6):
+    """Numpy implementation of the Frechet Distance.
+    The Frechet distance between two multivariate Gaussians X_1 ~ N(mu_1, C_1)
+    and X_2 ~ N(mu_2, C_2) is
+            d^2 = ||mu_1 - mu_2||^2 + Tr(C_1 + C_2 - 2*sqrt(C_1*C_2)).
+    Stable version by Dougal J. Sutherland.
+    Params:
+    -- mu1   : Numpy array containing the activations of a layer of the
+               inception net (like returned by the function 'get_predictions')
+               for generated samples.
+    -- mu2   : The sample mean over activations, precalculated on an
+               representative data set.
+    -- sigma1: The covariance matrix over activations for generated samples.
+    -- sigma2: The covariance matrix over activations, precalculated on an
+               representative data set.
+    Returns:
+    --   : The Frechet Distance.
+    """
+    mu1 = np.atleast_1d(mu1)
+    mu2 = np.atleast_1d(mu2)
+    sigma1 = np.atleast_2d(sigma1)
+    sigma2 = np.atleast_2d(sigma2)
+    assert mu1.shape == mu2.shape, \
+        'Training and test mean vectors have different lengths'
+    assert sigma1.shape == sigma2.shape, \
+        'Training and test covariances have different dimensions'
+    diff = mu1 - mu2
+    # Product might be almost singular
+    covmean, _ = linalg.sqrtm(sigma1.dot(sigma2), disp=False)
+    if not np.isfinite(covmean).all():
+        msg = ('fid calculation produces singular product; '
+               'adding %s to diagonal of cov estimates') % eps
+        print(msg)
+        offset = np.eye(sigma1.shape[0]) * eps
+        covmean = linalg.sqrtm((sigma1 + offset).dot(sigma2 + offset))
+    # Numerical error might give slight imaginary component
+    if np.iscomplexobj(covmean):
+        # if not np.allclose(np.diagonal(covmean).imag, 0, atol=1e-3):
+        if not np.allclose(np.diagonal(covmean).imag, 0, atol=1e-2):
+            m = np.max(np.abs(covmean.imag))
+            raise ValueError('Imaginary component {}'.format(m))
+        covmean = covmean.real
+    tr_covmean = np.trace(covmean)
+    return (diff.dot(diff) + np.trace(sigma1) +
+            np.trace(sigma2) - 2 * tr_covmean)
+def calculate_activation_statistics(files, model, batch_size=50,
+                                    dims=2048, cuda=False, verbose=False, keep_size=False):
+    """Calculation of the statistics used by the FID.
+    Params:
+    -- files       : List of image files paths
+    -- model       : Instance of inception model
+    -- batch_size  : The images numpy array is split into batches with
+                     batch size batch_size. A reasonable batch size
+                     depends on the hardware.
+    -- dims        : Dimensionality of features returned by Inception
+    -- cuda        : If set to True, use GPU
+    -- verbose     : If set to True and parameter out_step is given, the
+                     number of calculated batches is reported.
+    Returns:
+    -- mu    : The mean over samples of the activations of the pool_3 layer of
+               the inception model.
+    -- sigma : The covariance matrix of the activations of the pool_3 layer of
+               the inception model.
+    """
+    act = get_activations(files, model, batch_size, dims, cuda, verbose, keep_size=keep_size)
+    mu = np.mean(act, axis=0)
+    sigma = np.cov(act, rowvar=False)
+    return mu, sigma
+def _compute_statistics_of_path(path, model, batch_size, dims, cuda):
+    if path.endswith('.npz'):
+        f = np.load(path)
+        m, s = f['mu'][:], f['sigma'][:]
+        f.close()
+    else:
+        path = pathlib.Path(path)
+        files = list(path.glob('*.jpg')) + list(path.glob('*.png'))
+        m, s = calculate_activation_statistics(files, model, batch_size,
+                                               dims, cuda)
+    return m, s
+def _compute_statistics_of_images(images, model, batch_size, dims, cuda, keep_size=False):
+    if isinstance(images, list):  # exact paths to files are provided
+        m, s = calculate_activation_statistics(images, model, batch_size,
+                                               dims, cuda, keep_size=keep_size)
+        return m, s
+    else:
+        raise ValueError
+def calculate_fid_given_paths(paths, batch_size, cuda, dims):
+    """Calculates the FID of two paths"""
+    for p in paths:
+        if not os.path.exists(p):
+            raise RuntimeError('Invalid path: %s' % p)
+    block_idx = InceptionV3.BLOCK_INDEX_BY_DIM[dims]
+    model = InceptionV3([block_idx])
+    if cuda:
+        model.cuda()
+    m1, s1 = _compute_statistics_of_path(paths[0], model, batch_size,
+                                         dims, cuda)
+    m2, s2 = _compute_statistics_of_path(paths[1], model, batch_size,
+                                         dims, cuda)
+    fid_value = calculate_frechet_distance(m1, s1, m2, s2)
+    return fid_value
+def calculate_fid_given_images(images, batch_size, cuda, dims, use_globals=False, keep_size=False):
+    if use_globals:
+        global FID_MODEL  # for multiprocessing
+    for imgs in images:
+        if isinstance(imgs, list) and isinstance(imgs[0], (Image.Image, JpegImagePlugin.JpegImageFile)):
+            pass
+        else:
+            raise RuntimeError('Invalid images')
+    block_idx = InceptionV3.BLOCK_INDEX_BY_DIM[dims]
+    if 'FID_MODEL' not in globals() or not use_globals:
+        model = InceptionV3([block_idx])
+        if cuda:
+            model.cuda()
+        if use_globals:
+            FID_MODEL = model
+    else:
+        model = FID_MODEL
+    m1, s1 = _compute_statistics_of_images(images[0], model, batch_size,
+                                        dims, cuda, keep_size=False)
+    m2, s2 = _compute_statistics_of_images(images[1], model, batch_size,
+                                        dims, cuda, keep_size=False)
+    fid_value = calculate_frechet_distance(m1, s1, m2, s2)
+    return fid_value
+if __name__ == '__main__':
+    args = parser.parse_args()
+    os.environ['CUDA_VISIBLE_DEVICES'] = args.gpu
+    fid_value = calculate_fid_given_paths(args.path,
+                                          args.batch_size,
+                                          args.gpu != '',
+                                          args.dims)
+    print('FID: ', fid_value)

DH-AISP/2/saicinpainting/evaluation/losses/fid/inception.py ADDED Viewed

	@@ -0,0 +1,323 @@

+import logging
+import torch
+import torch.nn as nn
+import torch.nn.functional as F
+from torchvision import models
+try:
+    from torchvision.models.utils import load_state_dict_from_url
+except ImportError:
+    from torch.utils.model_zoo import load_url as load_state_dict_from_url
+# Inception weights ported to Pytorch from
+# http://download.tensorflow.org/models/image/imagenet/inception-2015-12-05.tgz
+FID_WEIGHTS_URL = 'https://github.com/mseitzer/pytorch-fid/releases/download/fid_weights/pt_inception-2015-12-05-6726825d.pth'
+LOGGER = logging.getLogger(__name__)
+class InceptionV3(nn.Module):
+    """Pretrained InceptionV3 network returning feature maps"""
+    # Index of default block of inception to return,
+    # corresponds to output of final average pooling
+    DEFAULT_BLOCK_INDEX = 3
+    # Maps feature dimensionality to their output blocks indices
+    BLOCK_INDEX_BY_DIM = {
+        64: 0,   # First max pooling features
+        192: 1,  # Second max pooling featurs
+        768: 2,  # Pre-aux classifier features
+        2048: 3  # Final average pooling features
+    }
+    def __init__(self,
+                 output_blocks=[DEFAULT_BLOCK_INDEX],
+                 resize_input=True,
+                 normalize_input=True,
+                 requires_grad=False,
+                 use_fid_inception=True):
+        """Build pretrained InceptionV3
+        Parameters
+        ----------
+        output_blocks : list of int
+            Indices of blocks to return features of. Possible values are:
+                - 0: corresponds to output of first max pooling
+                - 1: corresponds to output of second max pooling
+                - 2: corresponds to output which is fed to aux classifier
+                - 3: corresponds to output of final average pooling
+        resize_input : bool
+            If true, bilinearly resizes input to width and height 299 before
+            feeding input to model. As the network without fully connected
+            layers is fully convolutional, it should be able to handle inputs
+            of arbitrary size, so resizing might not be strictly needed
+        normalize_input : bool
+            If true, scales the input from range (0, 1) to the range the
+            pretrained Inception network expects, namely (-1, 1)
+        requires_grad : bool
+            If true, parameters of the model require gradients. Possibly useful
+            for finetuning the network
+        use_fid_inception : bool
+            If true, uses the pretrained Inception model used in Tensorflow's
+            FID implementation. If false, uses the pretrained Inception model
+            available in torchvision. The FID Inception model has different
+            weights and a slightly different structure from torchvision's
+            Inception model. If you want to compute FID scores, you are
+            strongly advised to set this parameter to true to get comparable
+            results.
+        """
+        super(InceptionV3, self).__init__()
+        self.resize_input = resize_input
+        self.normalize_input = normalize_input
+        self.output_blocks = sorted(output_blocks)
+        self.last_needed_block = max(output_blocks)
+        assert self.last_needed_block <= 3, \
+            'Last possible output block index is 3'
+        self.blocks = nn.ModuleList()
+        if use_fid_inception:
+            inception = fid_inception_v3()
+        else:
+            inception = models.inception_v3(pretrained=True)
+        # Block 0: input to maxpool1
+        block0 = [
+            inception.Conv2d_1a_3x3,
+            inception.Conv2d_2a_3x3,
+            inception.Conv2d_2b_3x3,
+            nn.MaxPool2d(kernel_size=3, stride=2)
+        ]
+        self.blocks.append(nn.Sequential(*block0))
+        # Block 1: maxpool1 to maxpool2
+        if self.last_needed_block >= 1:
+            block1 = [
+                inception.Conv2d_3b_1x1,
+                inception.Conv2d_4a_3x3,
+                nn.MaxPool2d(kernel_size=3, stride=2)
+            ]
+            self.blocks.append(nn.Sequential(*block1))
+        # Block 2: maxpool2 to aux classifier
+        if self.last_needed_block >= 2:
+            block2 = [
+                inception.Mixed_5b,
+                inception.Mixed_5c,
+                inception.Mixed_5d,
+                inception.Mixed_6a,
+                inception.Mixed_6b,
+                inception.Mixed_6c,
+                inception.Mixed_6d,
+                inception.Mixed_6e,
+            ]
+            self.blocks.append(nn.Sequential(*block2))
+        # Block 3: aux classifier to final avgpool
+        if self.last_needed_block >= 3:
+            block3 = [
+                inception.Mixed_7a,
+                inception.Mixed_7b,
+                inception.Mixed_7c,
+                nn.AdaptiveAvgPool2d(output_size=(1, 1))
+            ]
+            self.blocks.append(nn.Sequential(*block3))
+        for param in self.parameters():
+            param.requires_grad = requires_grad
+    def forward(self, inp):
+        """Get Inception feature maps
+        Parameters
+        ----------
+        inp : torch.autograd.Variable
+            Input tensor of shape Bx3xHxW. Values are expected to be in
+            range (0, 1)
+        Returns
+        -------
+        List of torch.autograd.Variable, corresponding to the selected output
+        block, sorted ascending by index
+        """
+        outp = []
+        x = inp
+        if self.resize_input:
+            x = F.interpolate(x,
+                              size=(299, 299),
+                              mode='bilinear',
+                              align_corners=False)
+        if self.normalize_input:
+            x = 2 * x - 1  # Scale from range (0, 1) to range (-1, 1)
+        for idx, block in enumerate(self.blocks):
+            x = block(x)
+            if idx in self.output_blocks:
+                outp.append(x)
+            if idx == self.last_needed_block:
+                break
+        return outp
+def fid_inception_v3():
+    """Build pretrained Inception model for FID computation
+    The Inception model for FID computation uses a different set of weights
+    and has a slightly different structure than torchvision's Inception.
+    This method first constructs torchvision's Inception and then patches the
+    necessary parts that are different in the FID Inception model.
+    """
+    LOGGER.info('fid_inception_v3 called')
+    inception = models.inception_v3(num_classes=1008,
+                                    aux_logits=False,
+                                    pretrained=False)
+    LOGGER.info('models.inception_v3 done')
+    inception.Mixed_5b = FIDInceptionA(192, pool_features=32)
+    inception.Mixed_5c = FIDInceptionA(256, pool_features=64)
+    inception.Mixed_5d = FIDInceptionA(288, pool_features=64)
+    inception.Mixed_6b = FIDInceptionC(768, channels_7x7=128)
+    inception.Mixed_6c = FIDInceptionC(768, channels_7x7=160)
+    inception.Mixed_6d = FIDInceptionC(768, channels_7x7=160)
+    inception.Mixed_6e = FIDInceptionC(768, channels_7x7=192)
+    inception.Mixed_7b = FIDInceptionE_1(1280)
+    inception.Mixed_7c = FIDInceptionE_2(2048)
+    LOGGER.info('fid_inception_v3 patching done')
+    state_dict = load_state_dict_from_url(FID_WEIGHTS_URL, progress=True)
+    LOGGER.info('fid_inception_v3 weights downloaded')
+    inception.load_state_dict(state_dict)
+    LOGGER.info('fid_inception_v3 weights loaded into model')
+    return inception
+class FIDInceptionA(models.inception.InceptionA):
+    """InceptionA block patched for FID computation"""
+    def __init__(self, in_channels, pool_features):
+        super(FIDInceptionA, self).__init__(in_channels, pool_features)
+    def forward(self, x):
+        branch1x1 = self.branch1x1(x)
+        branch5x5 = self.branch5x5_1(x)
+        branch5x5 = self.branch5x5_2(branch5x5)
+        branch3x3dbl = self.branch3x3dbl_1(x)
+        branch3x3dbl = self.branch3x3dbl_2(branch3x3dbl)
+        branch3x3dbl = self.branch3x3dbl_3(branch3x3dbl)
+        # Patch: Tensorflow's average pool does not use the padded zero's in
+        # its average calculation
+        branch_pool = F.avg_pool2d(x, kernel_size=3, stride=1, padding=1,
+                                   count_include_pad=False)
+        branch_pool = self.branch_pool(branch_pool)
+        outputs = [branch1x1, branch5x5, branch3x3dbl, branch_pool]
+        return torch.cat(outputs, 1)
+class FIDInceptionC(models.inception.InceptionC):
+    """InceptionC block patched for FID computation"""
+    def __init__(self, in_channels, channels_7x7):
+        super(FIDInceptionC, self).__init__(in_channels, channels_7x7)
+    def forward(self, x):
+        branch1x1 = self.branch1x1(x)
+        branch7x7 = self.branch7x7_1(x)
+        branch7x7 = self.branch7x7_2(branch7x7)
+        branch7x7 = self.branch7x7_3(branch7x7)
+        branch7x7dbl = self.branch7x7dbl_1(x)
+        branch7x7dbl = self.branch7x7dbl_2(branch7x7dbl)
+        branch7x7dbl = self.branch7x7dbl_3(branch7x7dbl)
+        branch7x7dbl = self.branch7x7dbl_4(branch7x7dbl)
+        branch7x7dbl = self.branch7x7dbl_5(branch7x7dbl)
+        # Patch: Tensorflow's average pool does not use the padded zero's in
+        # its average calculation
+        branch_pool = F.avg_pool2d(x, kernel_size=3, stride=1, padding=1,
+                                   count_include_pad=False)
+        branch_pool = self.branch_pool(branch_pool)
+        outputs = [branch1x1, branch7x7, branch7x7dbl, branch_pool]
+        return torch.cat(outputs, 1)
+class FIDInceptionE_1(models.inception.InceptionE):
+    """First InceptionE block patched for FID computation"""
+    def __init__(self, in_channels):
+        super(FIDInceptionE_1, self).__init__(in_channels)
+    def forward(self, x):
+        branch1x1 = self.branch1x1(x)
+        branch3x3 = self.branch3x3_1(x)
+        branch3x3 = [
+            self.branch3x3_2a(branch3x3),
+            self.branch3x3_2b(branch3x3),
+        ]
+        branch3x3 = torch.cat(branch3x3, 1)
+        branch3x3dbl = self.branch3x3dbl_1(x)
+        branch3x3dbl = self.branch3x3dbl_2(branch3x3dbl)
+        branch3x3dbl = [
+            self.branch3x3dbl_3a(branch3x3dbl),
+            self.branch3x3dbl_3b(branch3x3dbl),
+        ]
+        branch3x3dbl = torch.cat(branch3x3dbl, 1)
+        # Patch: Tensorflow's average pool does not use the padded zero's in
+        # its average calculation
+        branch_pool = F.avg_pool2d(x, kernel_size=3, stride=1, padding=1,
+                                   count_include_pad=False)
+        branch_pool = self.branch_pool(branch_pool)
+        outputs = [branch1x1, branch3x3, branch3x3dbl, branch_pool]
+        return torch.cat(outputs, 1)
+class FIDInceptionE_2(models.inception.InceptionE):
+    """Second InceptionE block patched for FID computation"""
+    def __init__(self, in_channels):
+        super(FIDInceptionE_2, self).__init__(in_channels)
+    def forward(self, x):
+        branch1x1 = self.branch1x1(x)
+        branch3x3 = self.branch3x3_1(x)
+        branch3x3 = [
+            self.branch3x3_2a(branch3x3),
+            self.branch3x3_2b(branch3x3),
+        ]
+        branch3x3 = torch.cat(branch3x3, 1)
+        branch3x3dbl = self.branch3x3dbl_1(x)
+        branch3x3dbl = self.branch3x3dbl_2(branch3x3dbl)
+        branch3x3dbl = [
+            self.branch3x3dbl_3a(branch3x3dbl),
+            self.branch3x3dbl_3b(branch3x3dbl),
+        ]
+        branch3x3dbl = torch.cat(branch3x3dbl, 1)
+        # Patch: The FID Inception model uses max pooling instead of average
+        # pooling. This is likely an error in this specific Inception
+        # implementation, as other Inception models use average pooling here
+        # (which matches the description in the paper).
+        branch_pool = F.max_pool2d(x, kernel_size=3, stride=1, padding=1)
+        branch_pool = self.branch_pool(branch_pool)
+        outputs = [branch1x1, branch3x3, branch3x3dbl, branch_pool]
+        return torch.cat(outputs, 1)

DH-AISP/2/saicinpainting/evaluation/losses/lpips.py ADDED Viewed

	@@ -0,0 +1,891 @@

+############################################################
+# The contents below have been combined using files in the #
+# following repository:                                    #
+# https://github.com/richzhang/PerceptualSimilarity        #
+############################################################
+############################################################
+#                       __init__.py                        #
+############################################################
+import numpy as np
+from skimage.metrics import structural_similarity
+import torch
+from saicinpainting.utils import get_shape
+class PerceptualLoss(torch.nn.Module):
+    def __init__(self, model='net-lin', net='alex', colorspace='rgb', model_path=None, spatial=False, use_gpu=True):
+        # VGG using our perceptually-learned weights (LPIPS metric)
+        # def __init__(self, model='net', net='vgg', use_gpu=True): # "default" way of using VGG as a perceptual loss
+        super(PerceptualLoss, self).__init__()
+        self.use_gpu = use_gpu
+        self.spatial = spatial
+        self.model = DistModel()
+        self.model.initialize(model=model, net=net, use_gpu=use_gpu, colorspace=colorspace,
+                              model_path=model_path, spatial=self.spatial)
+    def forward(self, pred, target, normalize=True):
+        """
+        Pred and target are Variables.
+        If normalize is True, assumes the images are between [0,1] and then scales them between [-1,+1]
+        If normalize is False, assumes the images are already between [-1,+1]
+        Inputs pred and target are Nx3xHxW
+        Output pytorch Variable N long
+        """
+        if normalize:
+            target = 2 * target - 1
+            pred = 2 * pred - 1
+        return self.model(target, pred)
+def normalize_tensor(in_feat, eps=1e-10):
+    norm_factor = torch.sqrt(torch.sum(in_feat ** 2, dim=1, keepdim=True))
+    return in_feat / (norm_factor + eps)
+def l2(p0, p1, range=255.):
+    return .5 * np.mean((p0 / range - p1 / range) ** 2)
+def psnr(p0, p1, peak=255.):
+    return 10 * np.log10(peak ** 2 / np.mean((1. * p0 - 1. * p1) ** 2))
+def dssim(p0, p1, range=255.):
+    return (1 - compare_ssim(p0, p1, data_range=range, multichannel=True)) / 2.
+def rgb2lab(in_img, mean_cent=False):
+    from skimage import color
+    img_lab = color.rgb2lab(in_img)
+    if (mean_cent):
+        img_lab[:, :, 0] = img_lab[:, :, 0] - 50
+    return img_lab
+def tensor2np(tensor_obj):
+    # change dimension of a tensor object into a numpy array
+    return tensor_obj[0].cpu().float().numpy().transpose((1, 2, 0))
+def np2tensor(np_obj):
+    # change dimenion of np array into tensor array
+    return torch.Tensor(np_obj[:, :, :, np.newaxis].transpose((3, 2, 0, 1)))
+def tensor2tensorlab(image_tensor, to_norm=True, mc_only=False):
+    # image tensor to lab tensor
+    from skimage import color
+    img = tensor2im(image_tensor)
+    img_lab = color.rgb2lab(img)
+    if (mc_only):
+        img_lab[:, :, 0] = img_lab[:, :, 0] - 50
+    if (to_norm and not mc_only):
+        img_lab[:, :, 0] = img_lab[:, :, 0] - 50
+        img_lab = img_lab / 100.
+    return np2tensor(img_lab)
+def tensorlab2tensor(lab_tensor, return_inbnd=False):
+    from skimage import color
+    import warnings
+    warnings.filterwarnings("ignore")
+    lab = tensor2np(lab_tensor) * 100.
+    lab[:, :, 0] = lab[:, :, 0] + 50
+    rgb_back = 255. * np.clip(color.lab2rgb(lab.astype('float')), 0, 1)
+    if (return_inbnd):
+        # convert back to lab, see if we match
+        lab_back = color.rgb2lab(rgb_back.astype('uint8'))
+        mask = 1. * np.isclose(lab_back, lab, atol=2.)
+        mask = np2tensor(np.prod(mask, axis=2)[:, :, np.newaxis])
+        return (im2tensor(rgb_back), mask)
+    else:
+        return im2tensor(rgb_back)
+def rgb2lab(input):
+    from skimage import color
+    return color.rgb2lab(input / 255.)
+def tensor2im(image_tensor, imtype=np.uint8, cent=1., factor=255. / 2.):
+    image_numpy = image_tensor[0].cpu().float().numpy()
+    image_numpy = (np.transpose(image_numpy, (1, 2, 0)) + cent) * factor
+    return image_numpy.astype(imtype)
+def im2tensor(image, imtype=np.uint8, cent=1., factor=255. / 2.):
+    return torch.Tensor((image / factor - cent)
+                        [:, :, :, np.newaxis].transpose((3, 2, 0, 1)))
+def tensor2vec(vector_tensor):
+    return vector_tensor.data.cpu().numpy()[:, :, 0, 0]
+def voc_ap(rec, prec, use_07_metric=False):
+    """ ap = voc_ap(rec, prec, [use_07_metric])
+    Compute VOC AP given precision and recall.
+    If use_07_metric is true, uses the
+    VOC 07 11 point method (default:False).
+    """
+    if use_07_metric:
+        # 11 point metric
+        ap = 0.
+        for t in np.arange(0., 1.1, 0.1):
+            if np.sum(rec >= t) == 0:
+                p = 0
+            else:
+                p = np.max(prec[rec >= t])
+            ap = ap + p / 11.
+    else:
+        # correct AP calculation
+        # first append sentinel values at the end
+        mrec = np.concatenate(([0.], rec, [1.]))
+        mpre = np.concatenate(([0.], prec, [0.]))
+        # compute the precision envelope
+        for i in range(mpre.size - 1, 0, -1):
+            mpre[i - 1] = np.maximum(mpre[i - 1], mpre[i])
+        # to calculate area under PR curve, look for points
+        # where X axis (recall) changes value
+        i = np.where(mrec[1:] != mrec[:-1])[0]
+        # and sum (\Delta recall) * prec
+        ap = np.sum((mrec[i + 1] - mrec[i]) * mpre[i + 1])
+    return ap
+def tensor2im(image_tensor, imtype=np.uint8, cent=1., factor=255. / 2.):
+    # def tensor2im(image_tensor, imtype=np.uint8, cent=1., factor=1.):
+    image_numpy = image_tensor[0].cpu().float().numpy()
+    image_numpy = (np.transpose(image_numpy, (1, 2, 0)) + cent) * factor
+    return image_numpy.astype(imtype)
+def im2tensor(image, imtype=np.uint8, cent=1., factor=255. / 2.):
+    # def im2tensor(image, imtype=np.uint8, cent=1., factor=1.):
+    return torch.Tensor((image / factor - cent)
+                        [:, :, :, np.newaxis].transpose((3, 2, 0, 1)))
+############################################################
+#                      base_model.py                       #
+############################################################
+class BaseModel(torch.nn.Module):
+    def __init__(self):
+        super().__init__()
+    def name(self):
+        return 'BaseModel'
+    def initialize(self, use_gpu=True):
+        self.use_gpu = use_gpu
+    def forward(self):
+        pass
+    def get_image_paths(self):
+        pass
+    def optimize_parameters(self):
+        pass
+    def get_current_visuals(self):
+        return self.input
+    def get_current_errors(self):
+        return {}
+    def save(self, label):
+        pass
+    # helper saving function that can be used by subclasses
+    def save_network(self, network, path, network_label, epoch_label):
+        save_filename = '%s_net_%s.pth' % (epoch_label, network_label)
+        save_path = os.path.join(path, save_filename)
+        torch.save(network.state_dict(), save_path)
+    # helper loading function that can be used by subclasses
+    def load_network(self, network, network_label, epoch_label):
+        save_filename = '%s_net_%s.pth' % (epoch_label, network_label)
+        save_path = os.path.join(self.save_dir, save_filename)
+        print('Loading network from %s' % save_path)
+        network.load_state_dict(torch.load(save_path, map_location='cpu'))
+    def update_learning_rate():
+        pass
+    def get_image_paths(self):
+        return self.image_paths
+    def save_done(self, flag=False):
+        np.save(os.path.join(self.save_dir, 'done_flag'), flag)
+        np.savetxt(os.path.join(self.save_dir, 'done_flag'), [flag, ], fmt='%i')
+############################################################
+#                      dist_model.py                       #
+############################################################
+import os
+from collections import OrderedDict
+from scipy.ndimage import zoom
+from tqdm import tqdm
+class DistModel(BaseModel):
+    def name(self):
+        return self.model_name
+    def initialize(self, model='net-lin', net='alex', colorspace='Lab', pnet_rand=False, pnet_tune=False,
+                   model_path=None,
+                   use_gpu=True, printNet=False, spatial=False,
+                   is_train=False, lr=.0001, beta1=0.5, version='0.1'):
+        '''
+        INPUTS
+            model - ['net-lin'] for linearly calibrated network
+                    ['net'] for off-the-shelf network
+                    ['L2'] for L2 distance in Lab colorspace
+                    ['SSIM'] for ssim in RGB colorspace
+            net - ['squeeze','alex','vgg']
+            model_path - if None, will look in weights/[NET_NAME].pth
+            colorspace - ['Lab','RGB'] colorspace to use for L2 and SSIM
+            use_gpu - bool - whether or not to use a GPU
+            printNet - bool - whether or not to print network architecture out
+            spatial - bool - whether to output an array containing varying distances across spatial dimensions
+            spatial_shape - if given, output spatial shape. if None then spatial shape is determined automatically via spatial_factor (see below).
+            spatial_factor - if given, specifies upsampling factor relative to the largest spatial extent of a convolutional layer. if None then resized to size of input images.
+            spatial_order - spline order of filter for upsampling in spatial mode, by default 1 (bilinear).
+            is_train - bool - [True] for training mode
+            lr - float - initial learning rate
+            beta1 - float - initial momentum term for adam
+            version - 0.1 for latest, 0.0 was original (with a bug)
+        '''
+        BaseModel.initialize(self, use_gpu=use_gpu)
+        self.model = model
+        self.net = net
+        self.is_train = is_train
+        self.spatial = spatial
+        self.model_name = '%s [%s]' % (model, net)
+        if (self.model == 'net-lin'):  # pretrained net + linear layer
+            self.net = PNetLin(pnet_rand=pnet_rand, pnet_tune=pnet_tune, pnet_type=net,
+                               use_dropout=True, spatial=spatial, version=version, lpips=True)
+            kw = dict(map_location='cpu')
+            if (model_path is None):
+                import inspect
+                model_path = os.path.abspath(
+                    os.path.join(os.path.dirname(__file__), '..', '..', '..', 'models', 'lpips_models', f'{net}.pth'))
+            if (not is_train):
+                self.net.load_state_dict(torch.load(model_path, **kw), strict=False)
+        elif (self.model == 'net'):  # pretrained network
+            self.net = PNetLin(pnet_rand=pnet_rand, pnet_type=net, lpips=False)
+        elif (self.model in ['L2', 'l2']):
+            self.net = L2(use_gpu=use_gpu, colorspace=colorspace)  # not really a network, only for testing
+            self.model_name = 'L2'
+        elif (self.model in ['DSSIM', 'dssim', 'SSIM', 'ssim']):
+            self.net = DSSIM(use_gpu=use_gpu, colorspace=colorspace)
+            self.model_name = 'SSIM'
+        else:
+            raise ValueError("Model [%s] not recognized." % self.model)
+        self.trainable_parameters = list(self.net.parameters())
+        if self.is_train:  # training mode
+            # extra network on top to go from distances (d0,d1) => predicted human judgment (h*)
+            self.rankLoss = BCERankingLoss()
+            self.trainable_parameters += list(self.rankLoss.net.parameters())
+            self.lr = lr
+            self.old_lr = lr
+            self.optimizer_net = torch.optim.Adam(self.trainable_parameters, lr=lr, betas=(beta1, 0.999))
+        else:  # test mode
+            self.net.eval()
+        # if (use_gpu):
+            # self.net.to(gpu_ids[0])
+            # self.net = torch.nn.DataParallel(self.net, device_ids=gpu_ids)
+            # if (self.is_train):
+            #     self.rankLoss = self.rankLoss.to(device=gpu_ids[0])  # just put this on GPU0
+        if (printNet):
+            print('---------- Networks initialized -------------')
+            print_network(self.net)
+            print('-----------------------------------------------')
+    def forward(self, in0, in1, retPerLayer=False):
+        ''' Function computes the distance between image patches in0 and in1
+        INPUTS
+            in0, in1 - torch.Tensor object of shape Nx3xXxY - image patch scaled to [-1,1]
+        OUTPUT
+            computed distances between in0 and in1
+        '''
+        return self.net(in0, in1, retPerLayer=retPerLayer)
+    # ***** TRAINING FUNCTIONS *****
+    def optimize_parameters(self):
+        self.forward_train()
+        self.optimizer_net.zero_grad()
+        self.backward_train()
+        self.optimizer_net.step()
+        self.clamp_weights()
+    def clamp_weights(self):
+        for module in self.net.modules():
+            if (hasattr(module, 'weight') and module.kernel_size == (1, 1)):
+                module.weight.data = torch.clamp(module.weight.data, min=0)
+    def set_input(self, data):
+        self.input_ref = data['ref']
+        self.input_p0 = data['p0']
+        self.input_p1 = data['p1']
+        self.input_judge = data['judge']
+        # if (self.use_gpu):
+        #     self.input_ref = self.input_ref.to(device=self.gpu_ids[0])
+        #     self.input_p0 = self.input_p0.to(device=self.gpu_ids[0])
+        #     self.input_p1 = self.input_p1.to(device=self.gpu_ids[0])
+        #     self.input_judge = self.input_judge.to(device=self.gpu_ids[0])
+        # self.var_ref = Variable(self.input_ref, requires_grad=True)
+        # self.var_p0 = Variable(self.input_p0, requires_grad=True)
+        # self.var_p1 = Variable(self.input_p1, requires_grad=True)
+    def forward_train(self):  # run forward pass
+        # print(self.net.module.scaling_layer.shift)
+        # print(torch.norm(self.net.module.net.slice1[0].weight).item(), torch.norm(self.net.module.lin0.model[1].weight).item())
+        assert False, "We shoud've not get here when using LPIPS as a metric"
+        self.d0 = self(self.var_ref, self.var_p0)
+        self.d1 = self(self.var_ref, self.var_p1)
+        self.acc_r = self.compute_accuracy(self.d0, self.d1, self.input_judge)
+        self.var_judge = Variable(1. * self.input_judge).view(self.d0.size())
+        self.loss_total = self.rankLoss(self.d0, self.d1, self.var_judge * 2. - 1.)
+        return self.loss_total
+    def backward_train(self):
+        torch.mean(self.loss_total).backward()
+    def compute_accuracy(self, d0, d1, judge):
+        ''' d0, d1 are Variables, judge is a Tensor '''
+        d1_lt_d0 = (d1 < d0).cpu().data.numpy().flatten()
+        judge_per = judge.cpu().numpy().flatten()
+        return d1_lt_d0 * judge_per + (1 - d1_lt_d0) * (1 - judge_per)
+    def get_current_errors(self):
+        retDict = OrderedDict([('loss_total', self.loss_total.data.cpu().numpy()),
+                               ('acc_r', self.acc_r)])
+        for key in retDict.keys():
+            retDict[key] = np.mean(retDict[key])
+        return retDict
+    def get_current_visuals(self):
+        zoom_factor = 256 / self.var_ref.data.size()[2]
+        ref_img = tensor2im(self.var_ref.data)
+        p0_img = tensor2im(self.var_p0.data)
+        p1_img = tensor2im(self.var_p1.data)
+        ref_img_vis = zoom(ref_img, [zoom_factor, zoom_factor, 1], order=0)
+        p0_img_vis = zoom(p0_img, [zoom_factor, zoom_factor, 1], order=0)
+        p1_img_vis = zoom(p1_img, [zoom_factor, zoom_factor, 1], order=0)
+        return OrderedDict([('ref', ref_img_vis),
+                            ('p0', p0_img_vis),
+                            ('p1', p1_img_vis)])
+    def save(self, path, label):
+        if (self.use_gpu):
+            self.save_network(self.net.module, path, '', label)
+        else:
+            self.save_network(self.net, path, '', label)
+        self.save_network(self.rankLoss.net, path, 'rank', label)
+    def update_learning_rate(self, nepoch_decay):
+        lrd = self.lr / nepoch_decay
+        lr = self.old_lr - lrd
+        for param_group in self.optimizer_net.param_groups:
+            param_group['lr'] = lr
+        print('update lr [%s] decay: %f -> %f' % (type, self.old_lr, lr))
+        self.old_lr = lr
+def score_2afc_dataset(data_loader, func, name=''):
+    ''' Function computes Two Alternative Forced Choice (2AFC) score using
+        distance function 'func' in dataset 'data_loader'
+    INPUTS
+        data_loader - CustomDatasetDataLoader object - contains a TwoAFCDataset inside
+        func - callable distance function - calling d=func(in0,in1) should take 2
+            pytorch tensors with shape Nx3xXxY, and return numpy array of length N
+    OUTPUTS
+        [0] - 2AFC score in [0,1], fraction of time func agrees with human evaluators
+        [1] - dictionary with following elements
+            d0s,d1s - N arrays containing distances between reference patch to perturbed patches
+            gts - N array in [0,1], preferred patch selected by human evaluators
+                (closer to "0" for left patch p0, "1" for right patch p1,
+                "0.6" means 60pct people preferred right patch, 40pct preferred left)
+            scores - N array in [0,1], corresponding to what percentage function agreed with humans
+    CONSTS
+        N - number of test triplets in data_loader
+    '''
+    d0s = []
+    d1s = []
+    gts = []
+    for data in tqdm(data_loader.load_data(), desc=name):
+        d0s += func(data['ref'], data['p0']).data.cpu().numpy().flatten().tolist()
+        d1s += func(data['ref'], data['p1']).data.cpu().numpy().flatten().tolist()
+        gts += data['judge'].cpu().numpy().flatten().tolist()
+    d0s = np.array(d0s)
+    d1s = np.array(d1s)
+    gts = np.array(gts)
+    scores = (d0s < d1s) * (1. - gts) + (d1s < d0s) * gts + (d1s == d0s) * .5
+    return (np.mean(scores), dict(d0s=d0s, d1s=d1s, gts=gts, scores=scores))
+def score_jnd_dataset(data_loader, func, name=''):
+    ''' Function computes JND score using distance function 'func' in dataset 'data_loader'
+    INPUTS
+        data_loader - CustomDatasetDataLoader object - contains a JNDDataset inside
+        func - callable distance function - calling d=func(in0,in1) should take 2
+            pytorch tensors with shape Nx3xXxY, and return pytorch array of length N
+    OUTPUTS
+        [0] - JND score in [0,1], mAP score (area under precision-recall curve)
+        [1] - dictionary with following elements
+            ds - N array containing distances between two patches shown to human evaluator
+            sames - N array containing fraction of people who thought the two patches were identical
+    CONSTS
+        N - number of test triplets in data_loader
+    '''
+    ds = []
+    gts = []
+    for data in tqdm(data_loader.load_data(), desc=name):
+        ds += func(data['p0'], data['p1']).data.cpu().numpy().tolist()
+        gts += data['same'].cpu().numpy().flatten().tolist()
+    sames = np.array(gts)
+    ds = np.array(ds)
+    sorted_inds = np.argsort(ds)
+    ds_sorted = ds[sorted_inds]
+    sames_sorted = sames[sorted_inds]
+    TPs = np.cumsum(sames_sorted)
+    FPs = np.cumsum(1 - sames_sorted)
+    FNs = np.sum(sames_sorted) - TPs
+    precs = TPs / (TPs + FPs)
+    recs = TPs / (TPs + FNs)
+    score = voc_ap(recs, precs)
+    return (score, dict(ds=ds, sames=sames))
+############################################################
+#                    networks_basic.py                     #
+############################################################
+import torch.nn as nn
+from torch.autograd import Variable
+import numpy as np
+def spatial_average(in_tens, keepdim=True):
+    return in_tens.mean([2, 3], keepdim=keepdim)
+def upsample(in_tens, out_H=64):  # assumes scale factor is same for H and W
+    in_H = in_tens.shape[2]
+    scale_factor = 1. * out_H / in_H
+    return nn.Upsample(scale_factor=scale_factor, mode='bilinear', align_corners=False)(in_tens)
+# Learned perceptual metric
+class PNetLin(nn.Module):
+    def __init__(self, pnet_type='vgg', pnet_rand=False, pnet_tune=False, use_dropout=True, spatial=False,
+                 version='0.1', lpips=True):
+        super(PNetLin, self).__init__()
+        self.pnet_type = pnet_type
+        self.pnet_tune = pnet_tune
+        self.pnet_rand = pnet_rand
+        self.spatial = spatial
+        self.lpips = lpips
+        self.version = version
+        self.scaling_layer = ScalingLayer()
+        if (self.pnet_type in ['vgg', 'vgg16']):
+            net_type = vgg16
+            self.chns = [64, 128, 256, 512, 512]
+        elif (self.pnet_type == 'alex'):
+            net_type = alexnet
+            self.chns = [64, 192, 384, 256, 256]
+        elif (self.pnet_type == 'squeeze'):
+            net_type = squeezenet
+            self.chns = [64, 128, 256, 384, 384, 512, 512]
+        self.L = len(self.chns)
+        self.net = net_type(pretrained=not self.pnet_rand, requires_grad=self.pnet_tune)
+        if (lpips):
+            self.lin0 = NetLinLayer(self.chns[0], use_dropout=use_dropout)
+            self.lin1 = NetLinLayer(self.chns[1], use_dropout=use_dropout)
+            self.lin2 = NetLinLayer(self.chns[2], use_dropout=use_dropout)
+            self.lin3 = NetLinLayer(self.chns[3], use_dropout=use_dropout)
+            self.lin4 = NetLinLayer(self.chns[4], use_dropout=use_dropout)
+            self.lins = [self.lin0, self.lin1, self.lin2, self.lin3, self.lin4]
+            if (self.pnet_type == 'squeeze'):  # 7 layers for squeezenet
+                self.lin5 = NetLinLayer(self.chns[5], use_dropout=use_dropout)
+                self.lin6 = NetLinLayer(self.chns[6], use_dropout=use_dropout)
+                self.lins += [self.lin5, self.lin6]
+    def forward(self, in0, in1, retPerLayer=False):
+        # v0.0 - original release had a bug, where input was not scaled
+        in0_input, in1_input = (self.scaling_layer(in0), self.scaling_layer(in1)) if self.version == '0.1' else (
+        in0, in1)
+        outs0, outs1 = self.net(in0_input), self.net(in1_input)
+        feats0, feats1, diffs = {}, {}, {}
+        for kk in range(self.L):
+            feats0[kk], feats1[kk] = normalize_tensor(outs0[kk]), normalize_tensor(outs1[kk])
+            diffs[kk] = (feats0[kk] - feats1[kk]) ** 2
+        if (self.lpips):
+            if (self.spatial):
+                res = [upsample(self.lins[kk].model(diffs[kk]), out_H=in0.shape[2]) for kk in range(self.L)]
+            else:
+                res = [spatial_average(self.lins[kk].model(diffs[kk]), keepdim=True) for kk in range(self.L)]
+        else:
+            if (self.spatial):
+                res = [upsample(diffs[kk].sum(dim=1, keepdim=True), out_H=in0.shape[2]) for kk in range(self.L)]
+            else:
+                res = [spatial_average(diffs[kk].sum(dim=1, keepdim=True), keepdim=True) for kk in range(self.L)]
+        val = res[0]
+        for l in range(1, self.L):
+            val += res[l]
+        if (retPerLayer):
+            return (val, res)
+        else:
+            return val
+class ScalingLayer(nn.Module):
+    def __init__(self):
+        super(ScalingLayer, self).__init__()
+        self.register_buffer('shift', torch.Tensor([-.030, -.088, -.188])[None, :, None, None])
+        self.register_buffer('scale', torch.Tensor([.458, .448, .450])[None, :, None, None])
+    def forward(self, inp):
+        return (inp - self.shift) / self.scale
+class NetLinLayer(nn.Module):
+    ''' A single linear layer which does a 1x1 conv '''
+    def __init__(self, chn_in, chn_out=1, use_dropout=False):
+        super(NetLinLayer, self).__init__()
+        layers = [nn.Dropout(), ] if (use_dropout) else []
+        layers += [nn.Conv2d(chn_in, chn_out, 1, stride=1, padding=0, bias=False), ]
+        self.model = nn.Sequential(*layers)
+class Dist2LogitLayer(nn.Module):
+    ''' takes 2 distances, puts through fc layers, spits out value between [0,1] (if use_sigmoid is True) '''
+    def __init__(self, chn_mid=32, use_sigmoid=True):
+        super(Dist2LogitLayer, self).__init__()
+        layers = [nn.Conv2d(5, chn_mid, 1, stride=1, padding=0, bias=True), ]
+        layers += [nn.LeakyReLU(0.2, True), ]
+        layers += [nn.Conv2d(chn_mid, chn_mid, 1, stride=1, padding=0, bias=True), ]
+        layers += [nn.LeakyReLU(0.2, True), ]
+        layers += [nn.Conv2d(chn_mid, 1, 1, stride=1, padding=0, bias=True), ]
+        if (use_sigmoid):
+            layers += [nn.Sigmoid(), ]
+        self.model = nn.Sequential(*layers)
+    def forward(self, d0, d1, eps=0.1):
+        return self.model(torch.cat((d0, d1, d0 - d1, d0 / (d1 + eps), d1 / (d0 + eps)), dim=1))
+class BCERankingLoss(nn.Module):
+    def __init__(self, chn_mid=32):
+        super(BCERankingLoss, self).__init__()
+        self.net = Dist2LogitLayer(chn_mid=chn_mid)
+        # self.parameters = list(self.net.parameters())
+        self.loss = torch.nn.BCELoss()
+    def forward(self, d0, d1, judge):
+        per = (judge + 1.) / 2.
+        self.logit = self.net(d0, d1)
+        return self.loss(self.logit, per)
+# L2, DSSIM metrics
+class FakeNet(nn.Module):
+    def __init__(self, use_gpu=True, colorspace='Lab'):
+        super(FakeNet, self).__init__()
+        self.use_gpu = use_gpu
+        self.colorspace = colorspace
+class L2(FakeNet):
+    def forward(self, in0, in1, retPerLayer=None):
+        assert (in0.size()[0] == 1)  # currently only supports batchSize 1
+        if (self.colorspace == 'RGB'):
+            (N, C, X, Y) = in0.size()
+            value = torch.mean(torch.mean(torch.mean((in0 - in1) ** 2, dim=1).view(N, 1, X, Y), dim=2).view(N, 1, 1, Y),
+                               dim=3).view(N)
+            return value
+        elif (self.colorspace == 'Lab'):
+            value = l2(tensor2np(tensor2tensorlab(in0.data, to_norm=False)),
+                       tensor2np(tensor2tensorlab(in1.data, to_norm=False)), range=100.).astype('float')
+            ret_var = Variable(torch.Tensor((value,)))
+            # if (self.use_gpu):
+            #     ret_var = ret_var.cuda()
+            return ret_var
+class DSSIM(FakeNet):
+    def forward(self, in0, in1, retPerLayer=None):
+        assert (in0.size()[0] == 1)  # currently only supports batchSize 1
+        if (self.colorspace == 'RGB'):
+            value = dssim(1. * tensor2im(in0.data), 1. * tensor2im(in1.data), range=255.).astype('float')
+        elif (self.colorspace == 'Lab'):
+            value = dssim(tensor2np(tensor2tensorlab(in0.data, to_norm=False)),
+                          tensor2np(tensor2tensorlab(in1.data, to_norm=False)), range=100.).astype('float')
+        ret_var = Variable(torch.Tensor((value,)))
+        # if (self.use_gpu):
+        #     ret_var = ret_var.cuda()
+        return ret_var
+def print_network(net):
+    num_params = 0
+    for param in net.parameters():
+        num_params += param.numel()
+    print('Network', net)
+    print('Total number of parameters: %d' % num_params)
+############################################################
+#                 pretrained_networks.py                   #
+############################################################
+from collections import namedtuple
+import torch
+from torchvision import models as tv
+class squeezenet(torch.nn.Module):
+    def __init__(self, requires_grad=False, pretrained=True):
+        super(squeezenet, self).__init__()
+        pretrained_features = tv.squeezenet1_1(pretrained=pretrained).features
+        self.slice1 = torch.nn.Sequential()
+        self.slice2 = torch.nn.Sequential()
+        self.slice3 = torch.nn.Sequential()
+        self.slice4 = torch.nn.Sequential()
+        self.slice5 = torch.nn.Sequential()
+        self.slice6 = torch.nn.Sequential()
+        self.slice7 = torch.nn.Sequential()
+        self.N_slices = 7
+        for x in range(2):
+            self.slice1.add_module(str(x), pretrained_features[x])
+        for x in range(2, 5):
+            self.slice2.add_module(str(x), pretrained_features[x])
+        for x in range(5, 8):
+            self.slice3.add_module(str(x), pretrained_features[x])
+        for x in range(8, 10):
+            self.slice4.add_module(str(x), pretrained_features[x])
+        for x in range(10, 11):
+            self.slice5.add_module(str(x), pretrained_features[x])
+        for x in range(11, 12):
+            self.slice6.add_module(str(x), pretrained_features[x])
+        for x in range(12, 13):
+            self.slice7.add_module(str(x), pretrained_features[x])
+        if not requires_grad:
+            for param in self.parameters():
+                param.requires_grad = False
+    def forward(self, X):
+        h = self.slice1(X)
+        h_relu1 = h
+        h = self.slice2(h)
+        h_relu2 = h
+        h = self.slice3(h)
+        h_relu3 = h
+        h = self.slice4(h)
+        h_relu4 = h
+        h = self.slice5(h)
+        h_relu5 = h
+        h = self.slice6(h)
+        h_relu6 = h
+        h = self.slice7(h)
+        h_relu7 = h
+        vgg_outputs = namedtuple("SqueezeOutputs", ['relu1', 'relu2', 'relu3', 'relu4', 'relu5', 'relu6', 'relu7'])
+        out = vgg_outputs(h_relu1, h_relu2, h_relu3, h_relu4, h_relu5, h_relu6, h_relu7)
+        return out
+class alexnet(torch.nn.Module):
+    def __init__(self, requires_grad=False, pretrained=True):
+        super(alexnet, self).__init__()
+        alexnet_pretrained_features = tv.alexnet(pretrained=pretrained).features
+        self.slice1 = torch.nn.Sequential()
+        self.slice2 = torch.nn.Sequential()
+        self.slice3 = torch.nn.Sequential()
+        self.slice4 = torch.nn.Sequential()
+        self.slice5 = torch.nn.Sequential()
+        self.N_slices = 5
+        for x in range(2):
+            self.slice1.add_module(str(x), alexnet_pretrained_features[x])
+        for x in range(2, 5):
+            self.slice2.add_module(str(x), alexnet_pretrained_features[x])
+        for x in range(5, 8):
+            self.slice3.add_module(str(x), alexnet_pretrained_features[x])
+        for x in range(8, 10):
+            self.slice4.add_module(str(x), alexnet_pretrained_features[x])
+        for x in range(10, 12):
+            self.slice5.add_module(str(x), alexnet_pretrained_features[x])
+        if not requires_grad:
+            for param in self.parameters():
+                param.requires_grad = False
+    def forward(self, X):
+        h = self.slice1(X)
+        h_relu1 = h
+        h = self.slice2(h)
+        h_relu2 = h
+        h = self.slice3(h)
+        h_relu3 = h
+        h = self.slice4(h)
+        h_relu4 = h
+        h = self.slice5(h)
+        h_relu5 = h
+        alexnet_outputs = namedtuple("AlexnetOutputs", ['relu1', 'relu2', 'relu3', 'relu4', 'relu5'])
+        out = alexnet_outputs(h_relu1, h_relu2, h_relu3, h_relu4, h_relu5)
+        return out
+class vgg16(torch.nn.Module):
+    def __init__(self, requires_grad=False, pretrained=True):
+        super(vgg16, self).__init__()
+        vgg_pretrained_features = tv.vgg16(pretrained=pretrained).features
+        self.slice1 = torch.nn.Sequential()
+        self.slice2 = torch.nn.Sequential()
+        self.slice3 = torch.nn.Sequential()
+        self.slice4 = torch.nn.Sequential()
+        self.slice5 = torch.nn.Sequential()
+        self.N_slices = 5
+        for x in range(4):
+            self.slice1.add_module(str(x), vgg_pretrained_features[x])
+        for x in range(4, 9):
+            self.slice2.add_module(str(x), vgg_pretrained_features[x])
+        for x in range(9, 16):
+            self.slice3.add_module(str(x), vgg_pretrained_features[x])
+        for x in range(16, 23):
+            self.slice4.add_module(str(x), vgg_pretrained_features[x])
+        for x in range(23, 30):
+            self.slice5.add_module(str(x), vgg_pretrained_features[x])
+        if not requires_grad:
+            for param in self.parameters():
+                param.requires_grad = False
+    def forward(self, X):
+        h = self.slice1(X)
+        h_relu1_2 = h
+        h = self.slice2(h)
+        h_relu2_2 = h
+        h = self.slice3(h)
+        h_relu3_3 = h
+        h = self.slice4(h)
+        h_relu4_3 = h
+        h = self.slice5(h)
+        h_relu5_3 = h
+        vgg_outputs = namedtuple("VggOutputs", ['relu1_2', 'relu2_2', 'relu3_3', 'relu4_3', 'relu5_3'])
+        out = vgg_outputs(h_relu1_2, h_relu2_2, h_relu3_3, h_relu4_3, h_relu5_3)
+        return out
+class resnet(torch.nn.Module):
+    def __init__(self, requires_grad=False, pretrained=True, num=18):
+        super(resnet, self).__init__()
+        if (num == 18):
+            self.net = tv.resnet18(pretrained=pretrained)
+        elif (num == 34):
+            self.net = tv.resnet34(pretrained=pretrained)
+        elif (num == 50):
+            self.net = tv.resnet50(pretrained=pretrained)
+        elif (num == 101):
+            self.net = tv.resnet101(pretrained=pretrained)
+        elif (num == 152):
+            self.net = tv.resnet152(pretrained=pretrained)
+        self.N_slices = 5
+        self.conv1 = self.net.conv1
+        self.bn1 = self.net.bn1
+        self.relu = self.net.relu
+        self.maxpool = self.net.maxpool
+        self.layer1 = self.net.layer1
+        self.layer2 = self.net.layer2
+        self.layer3 = self.net.layer3
+        self.layer4 = self.net.layer4
+    def forward(self, X):
+        h = self.conv1(X)
+        h = self.bn1(h)
+        h = self.relu(h)
+        h_relu1 = h
+        h = self.maxpool(h)
+        h = self.layer1(h)
+        h_conv2 = h
+        h = self.layer2(h)
+        h_conv3 = h
+        h = self.layer3(h)
+        h_conv4 = h
+        h = self.layer4(h)
+        h_conv5 = h
+        outputs = namedtuple("Outputs", ['relu1', 'conv2', 'conv3', 'conv4', 'conv5'])
+        out = outputs(h_relu1, h_conv2, h_conv3, h_conv4, h_conv5)
+        return out

DH-AISP/2/saicinpainting/evaluation/losses/ssim.py ADDED Viewed

	@@ -0,0 +1,74 @@

+import numpy as np
+import torch
+import torch.nn.functional as F
+class SSIM(torch.nn.Module):
+    """SSIM. Modified from:
+    https://github.com/Po-Hsun-Su/pytorch-ssim/blob/master/pytorch_ssim/__init__.py
+    """
+    def __init__(self, window_size=11, size_average=True):
+        super().__init__()
+        self.window_size = window_size
+        self.size_average = size_average
+        self.channel = 1
+        self.register_buffer('window', self._create_window(window_size, self.channel))
+    def forward(self, img1, img2):
+        assert len(img1.shape) == 4
+        channel = img1.size()[1]
+        if channel == self.channel and self.window.data.type() == img1.data.type():
+            window = self.window
+        else:
+            window = self._create_window(self.window_size, channel)
+            # window = window.to(img1.get_device())
+            window = window.type_as(img1)
+            self.window = window
+            self.channel = channel
+        return self._ssim(img1, img2, window, self.window_size, channel, self.size_average)
+    def _gaussian(self, window_size, sigma):
+        gauss = torch.Tensor([
+            np.exp(-(x - (window_size // 2)) ** 2 / float(2 * sigma ** 2)) for x in range(window_size)
+        ])
+        return gauss / gauss.sum()
+    def _create_window(self, window_size, channel):
+        _1D_window = self._gaussian(window_size, 1.5).unsqueeze(1)
+        _2D_window = _1D_window.mm(_1D_window.t()).float().unsqueeze(0).unsqueeze(0)
+        return _2D_window.expand(channel, 1, window_size, window_size).contiguous()
+    def _ssim(self, img1, img2, window, window_size, channel, size_average=True):
+        mu1 = F.conv2d(img1, window, padding=(window_size // 2), groups=channel)
+        mu2 = F.conv2d(img2, window, padding=(window_size // 2), groups=channel)
+        mu1_sq = mu1.pow(2)
+        mu2_sq = mu2.pow(2)
+        mu1_mu2 = mu1 * mu2
+        sigma1_sq = F.conv2d(
+            img1 * img1, window, padding=(window_size // 2), groups=channel) - mu1_sq
+        sigma2_sq = F.conv2d(
+            img2 * img2, window, padding=(window_size // 2), groups=channel) - mu2_sq
+        sigma12 = F.conv2d(
+            img1 * img2, window, padding=(window_size // 2), groups=channel) - mu1_mu2
+        C1 = 0.01 ** 2
+        C2 = 0.03 ** 2
+        ssim_map = ((2 * mu1_mu2 + C1) * (2 * sigma12 + C2)) / \
+                   ((mu1_sq + mu2_sq + C1) * (sigma1_sq + sigma2_sq + C2))
+        if size_average:
+            return ssim_map.mean()
+        return ssim_map.mean(1).mean(1).mean(1)
+    def _load_from_state_dict(self, state_dict, prefix, local_metadata, strict, missing_keys, unexpected_keys, error_msgs):
+        return

DH-AISP/2/saicinpainting/evaluation/masks/README.md ADDED Viewed

	@@ -0,0 +1,27 @@

+# Current algorithm
+## Choice of mask objects
+For identification of the objects which are suitable for mask obtaining, panoptic segmentation model
+from [detectron2](https://github.com/facebookresearch/detectron2) trained on COCO. Categories of the detected instances
+belong either to "stuff" or "things" types. We consider that instances of objects should have category belong
+to "things". Besides, we set upper bound on area which is taken by the object &mdash; we consider that too big
+area indicates either of the instance being a background or a main object which should not be removed.
+## Choice of position for mask
+We consider that input image has size 2^n x 2^m. We downsample it using
+[COUNTLESS](https://github.com/william-silversmith/countless) algorithm so the width is equal to
+64 = 2^8 = 2^{downsample_levels}.
+### Augmentation
+There are several parameters for augmentation:
+- Scaling factor. We limit scaling to the case when a mask after scaling with pivot point in its center fits inside the
+ image completely.
+-
+### Shift
+## Select

DH-AISP/2/saicinpainting/evaluation/masks/__init__.py ADDED Viewed

File without changes

DH-AISP/2/saicinpainting/evaluation/masks/countless/README.md ADDED Viewed

	@@ -0,0 +1,25 @@

+[![Build Status](https://travis-ci.org/william-silversmith/countless.svg?branch=master)](https://travis-ci.org/william-silversmith/countless)
+Python COUNTLESS Downsampling
+=============================
+To install:
+`pip install -r requirements.txt`
+To test:
+`python test.py`
+To benchmark countless2d:
+`python python/countless2d.py python/images/gray_segmentation.png`
+To benchmark countless3d:
+`python python/countless3d.py`
+Adjust N and the list of algorithms inside each script to modify the run parameters.
+Python3 is slightly faster than Python2.

DH-AISP/2/saicinpainting/evaluation/masks/countless/__init__.py ADDED Viewed

File without changes

DH-AISP/2/saicinpainting/evaluation/masks/countless/countless2d.py ADDED Viewed

	@@ -0,0 +1,529 @@

+from __future__ import print_function, division
+"""
+COUNTLESS performance test in Python.
+python countless2d.py ./images/NAMEOFIMAGE
+"""
+import six
+from six.moves import range
+from collections import defaultdict
+from functools import reduce
+import operator
+import io
+import os
+from PIL import Image
+import math
+import numpy as np
+import random
+import sys
+import time
+from tqdm import tqdm
+from scipy import ndimage
+def simplest_countless(data):
+  """
+  Vectorized implementation of downsampling a 2D
+  image by 2 on each side using the COUNTLESS algorithm.
+  data is a 2D numpy array with even dimensions.
+  """
+  sections = []
+  # This loop splits the 2D array apart into four arrays that are
+  # all the result of striding by 2 and offset by (0,0), (0,1), (1,0),
+  # and (1,1) representing the A, B, C, and D positions from Figure 1.
+  factor = (2,2)
+  for offset in np.ndindex(factor):
+    part = data[tuple(np.s_[o::f] for o, f in zip(offset, factor))]
+    sections.append(part)
+  a, b, c, d = sections
+  ab = a * (a == b) # PICK(A,B)
+  ac = a * (a == c) # PICK(A,C)
+  bc = b * (b == c) # PICK(B,C)
+  a = ab | ac | bc # Bitwise OR, safe b/c non-matches are zeroed
+  return a + (a == 0) * d # AB || AC || BC || D
+def quick_countless(data):
+  """
+  Vectorized implementation of downsampling a 2D
+  image by 2 on each side using the COUNTLESS algorithm.
+  data is a 2D numpy array with even dimensions.
+  """
+  sections = []
+  # This loop splits the 2D array apart into four arrays that are
+  # all the result of striding by 2 and offset by (0,0), (0,1), (1,0),
+  # and (1,1) representing the A, B, C, and D positions from Figure 1.
+  factor = (2,2)
+  for offset in np.ndindex(factor):
+    part = data[tuple(np.s_[o::f] for o, f in zip(offset, factor))]
+    sections.append(part)
+  a, b, c, d = sections
+  ab_ac = a * ((a == b) | (a == c)) # PICK(A,B) || PICK(A,C) w/ optimization
+  bc = b * (b == c) # PICK(B,C)
+  a = ab_ac | bc # (PICK(A,B) || PICK(A,C)) or PICK(B,C)
+  return a + (a == 0) * d # AB || AC || BC || D
+def quickest_countless(data):
+  """
+  Vectorized implementation of downsampling a 2D
+  image by 2 on each side using the COUNTLESS algorithm.
+  data is a 2D numpy array with even dimensions.
+  """
+  sections = []
+  # This loop splits the 2D array apart into four arrays that are
+  # all the result of striding by 2 and offset by (0,0), (0,1), (1,0),
+  # and (1,1) representing the A, B, C, and D positions from Figure 1.
+  factor = (2,2)
+  for offset in np.ndindex(factor):
+    part = data[tuple(np.s_[o::f] for o, f in zip(offset, factor))]
+    sections.append(part)
+  a, b, c, d = sections
+  ab_ac = a * ((a == b) | (a == c)) # PICK(A,B) || PICK(A,C) w/ optimization
+  ab_ac |= b * (b == c) # PICK(B,C)
+  return ab_ac + (ab_ac == 0) * d # AB || AC || BC || D
+def quick_countless_xor(data):
+  """
+  Vectorized implementation of downsampling a 2D
+  image by 2 on each side using the COUNTLESS algorithm.
+  data is a 2D numpy array with even dimensions.
+  """
+  sections = []
+  # This loop splits the 2D array apart into four arrays that are
+  # all the result of striding by 2 and offset by (0,0), (0,1), (1,0),
+  # and (1,1) representing the A, B, C, and D positions from Figure 1.
+  factor = (2,2)
+  for offset in np.ndindex(factor):
+    part = data[tuple(np.s_[o::f] for o, f in zip(offset, factor))]
+    sections.append(part)
+  a, b, c, d = sections
+  ab = a ^ (a ^ b) # a or b
+  ab += (ab != a) * ((ab ^ (ab ^ c)) - b) # b or c
+  ab += (ab == c) * ((ab ^ (ab ^ d)) - c) # c or d
+  return ab
+def stippled_countless(data):
+  """
+  Vectorized implementation of downsampling a 2D
+  image by 2 on each side using the COUNTLESS algorithm
+  that treats zero as "background" and inflates lone
+  pixels.
+  data is a 2D numpy array with even dimensions.
+  """
+  sections = []
+  # This loop splits the 2D array apart into four arrays that are
+  # all the result of striding by 2 and offset by (0,0), (0,1), (1,0),
+  # and (1,1) representing the A, B, C, and D positions from Figure 1.
+  factor = (2,2)
+  for offset in np.ndindex(factor):
+    part = data[tuple(np.s_[o::f] for o, f in zip(offset, factor))]
+    sections.append(part)
+  a, b, c, d = sections
+  ab_ac = a * ((a == b) | (a == c)) # PICK(A,B) || PICK(A,C) w/ optimization
+  ab_ac |= b * (b == c) # PICK(B,C)
+  nonzero = a + (a == 0) * (b + (b == 0) * c)
+  return ab_ac + (ab_ac == 0) * (d + (d == 0) * nonzero) # AB || AC || BC || D
+def zero_corrected_countless(data):
+  """
+  Vectorized implementation of downsampling a 2D
+  image by 2 on each side using the COUNTLESS algorithm.
+  data is a 2D numpy array with even dimensions.
+  """
+  # allows us to prevent losing 1/2 a bit of information
+  # at the top end by using a bigger type. Without this 255 is handled incorrectly.
+  data, upgraded = upgrade_type(data)
+  # offset from zero, raw countless doesn't handle 0 correctly
+  # we'll remove the extra 1 at the end.
+  data += 1
+  sections = []
+  # This loop splits the 2D array apart into four arrays that are
+  # all the result of striding by 2 and offset by (0,0), (0,1), (1,0),
+  # and (1,1) representing the A, B, C, and D positions from Figure 1.
+  factor = (2,2)
+  for offset in np.ndindex(factor):
+    part = data[tuple(np.s_[o::f] for o, f in zip(offset, factor))]
+    sections.append(part)
+  a, b, c, d = sections
+  ab = a * (a == b) # PICK(A,B)
+  ac = a * (a == c) # PICK(A,C)
+  bc = b * (b == c) # PICK(B,C)
+  a = ab | ac | bc # Bitwise OR, safe b/c non-matches are zeroed
+  result = a + (a == 0) * d - 1 # a or d - 1
+  if upgraded:
+    return downgrade_type(result)
+  # only need to reset data if we weren't upgraded
+  # b/c no copy was made in that case
+  data -= 1
+  return result
+def countless_extreme(data):
+  nonzeros = np.count_nonzero(data)
+  # print("nonzeros", nonzeros)
+  N = reduce(operator.mul, data.shape)
+  if nonzeros == N:
+    print("quick")
+    return quick_countless(data)
+  elif np.count_nonzero(data + 1) == N:
+    print("quick")
+    # print("upper", nonzeros)
+    return quick_countless(data)
+  else:
+    return countless(data)
+def countless(data):
+  """
+  Vectorized implementation of downsampling a 2D
+  image by 2 on each side using the COUNTLESS algorithm.
+  data is a 2D numpy array with even dimensions.
+  """
+  # allows us to prevent losing 1/2 a bit of information
+  # at the top end by using a bigger type. Without this 255 is handled incorrectly.
+  data, upgraded = upgrade_type(data)
+  # offset from zero, raw countless doesn't handle 0 correctly
+  # we'll remove the extra 1 at the end.
+  data += 1
+  sections = []
+  # This loop splits the 2D array apart into four arrays that are
+  # all the result of striding by 2 and offset by (0,0), (0,1), (1,0),
+  # and (1,1) representing the A, B, C, and D positions from Figure 1.
+  factor = (2,2)
+  for offset in np.ndindex(factor):
+    part = data[tuple(np.s_[o::f] for o, f in zip(offset, factor))]
+    sections.append(part)
+  a, b, c, d = sections
+  ab_ac = a * ((a == b) | (a == c)) # PICK(A,B) || PICK(A,C) w/ optimization
+  ab_ac |= b * (b == c) # PICK(B,C)
+  result = ab_ac + (ab_ac == 0) * d - 1 # (matches or d) - 1
+  if upgraded:
+    return downgrade_type(result)
+  # only need to reset data if we weren't upgraded
+  # b/c no copy was made in that case
+  data -= 1
+  return result
+def upgrade_type(arr):
+  dtype = arr.dtype
+  if dtype == np.uint8:
+    return arr.astype(np.uint16), True
+  elif dtype == np.uint16:
+    return arr.astype(np.uint32), True
+  elif dtype == np.uint32:
+    return arr.astype(np.uint64), True
+  return arr, False
+def downgrade_type(arr):
+  dtype = arr.dtype
+  if dtype == np.uint64:
+    return arr.astype(np.uint32)
+  elif dtype == np.uint32:
+    return arr.astype(np.uint16)
+  elif dtype == np.uint16:
+    return arr.astype(np.uint8)
+  return arr
+def odd_to_even(image):
+  """
+  To facilitate 2x2 downsampling segmentation, change an odd sized image into an even sized one.
+  Works by mirroring the starting 1 pixel edge of the image on odd shaped sides.
+  e.g. turn a 3x3x5 image into a 4x4x5 (the x and y are what are getting downsampled)
+  For example: [ 3, 2, 4 ] => [ 3, 3, 2, 4 ] which is now easy to downsample.
+  """
+  shape = np.array(image.shape)
+  offset = (shape % 2)[:2] # x,y offset
+  # detect if we're dealing with an even
+  # image. if so it's fine, just return.
+  if not np.any(offset):
+    return image
+  oddshape = image.shape[:2] + offset
+  oddshape = np.append(oddshape, shape[2:])
+  oddshape = oddshape.astype(int)
+  newimg = np.empty(shape=oddshape, dtype=image.dtype)
+  ox,oy = offset
+  sx,sy = oddshape
+  newimg[0,0] = image[0,0] # corner
+  newimg[ox:sx,0] = image[:,0] # x axis line
+  newimg[0,oy:sy] = image[0,:] # y axis line
+  return newimg
+def counting(array):
+    factor = (2, 2, 1)
+    shape = array.shape
+    while len(shape) < 4:
+      array = np.expand_dims(array, axis=-1)
+      shape = array.shape
+    output_shape = tuple(int(math.ceil(s / f)) for s, f in zip(shape, factor))
+    output = np.zeros(output_shape, dtype=array.dtype)
+    for chan in range(0, shape[3]):
+      for z in range(0, shape[2]):
+        for x in range(0, shape[0], 2):
+          for y in range(0, shape[1], 2):
+            block = array[ x:x+2, y:y+2, z, chan ] # 2x2 block
+            hashtable = defaultdict(int)
+            for subx, suby in np.ndindex(block.shape[0], block.shape[1]):
+              hashtable[block[subx, suby]] += 1
+            best = (0, 0)
+            for segid, val in six.iteritems(hashtable):
+              if best[1] < val:
+                best = (segid, val)
+            output[ x // 2, y // 2, chan ] = best[0]
+    return output
+def ndzoom(array):
+    if len(array.shape) == 3:
+      ratio = ( 1 / 2.0, 1 / 2.0, 1.0 )
+    else:
+      ratio = ( 1 / 2.0, 1 / 2.0)
+    return ndimage.interpolation.zoom(array, ratio, order=1)
+def countless_if(array):
+    factor = (2, 2, 1)
+    shape = array.shape
+    if len(shape) < 3:
+      array = array[ :,:, np.newaxis ]
+      shape = array.shape
+    output_shape = tuple(int(math.ceil(s / f)) for s, f in zip(shape, factor))
+    output = np.zeros(output_shape, dtype=array.dtype)
+    for chan in range(0, shape[2]):
+      for x in range(0, shape[0], 2):
+        for y in range(0, shape[1], 2):
+          block = array[ x:x+2, y:y+2, chan ] # 2x2 block
+          if block[0,0] == block[1,0]:
+            pick = block[0,0]
+          elif block[0,0] == block[0,1]:
+            pick = block[0,0]
+          elif block[1,0] == block[0,1]:
+            pick = block[1,0]
+          else:
+            pick = block[1,1]
+          output[ x // 2, y // 2, chan ] = pick
+    return np.squeeze(output)
+def downsample_with_averaging(array):
+  """
+  Downsample x by factor using averaging.
+  @return: The downsampled array, of the same type as x.
+  """
+  if len(array.shape) == 3:
+    factor = (2,2,1)
+  else:
+    factor = (2,2)
+  if np.array_equal(factor[:3], np.array([1,1,1])):
+    return array
+  output_shape = tuple(int(math.ceil(s / f)) for s, f in zip(array.shape, factor))
+  temp = np.zeros(output_shape, float)
+  counts = np.zeros(output_shape, np.int)
+  for offset in np.ndindex(factor):
+      part = array[tuple(np.s_[o::f] for o, f in zip(offset, factor))]
+      indexing_expr = tuple(np.s_[:s] for s in part.shape)
+      temp[indexing_expr] += part
+      counts[indexing_expr] += 1
+  return np.cast[array.dtype](temp / counts)
+def downsample_with_max_pooling(array):
+  factor = (2,2)
+  if np.all(np.array(factor, int) == 1):
+      return array
+  sections = []
+  for offset in np.ndindex(factor):
+    part = array[tuple(np.s_[o::f] for o, f in zip(offset, factor))]
+    sections.append(part)
+  output = sections[0].copy()
+  for section in sections[1:]:
+    np.maximum(output, section, output)
+  return output
+def striding(array):
+  """Downsample x by factor using striding.
+  @return: The downsampled array, of the same type as x.
+  """
+  factor = (2,2)
+  if np.all(np.array(factor, int) == 1):
+    return array
+  return array[tuple(np.s_[::f] for f in factor)]
+def benchmark():
+  filename = sys.argv[1]
+  img = Image.open(filename)
+  data = np.array(img.getdata(), dtype=np.uint8)
+  if len(data.shape) == 1:
+    n_channels = 1
+    reshape = (img.height, img.width)
+  else:
+    n_channels = min(data.shape[1], 3)
+    data = data[:, :n_channels]
+    reshape = (img.height, img.width, n_channels)
+  data = data.reshape(reshape).astype(np.uint8)
+  methods = [
+    simplest_countless,
+    quick_countless,
+    quick_countless_xor,
+    quickest_countless,
+    stippled_countless,
+    zero_corrected_countless,
+    countless,
+    downsample_with_averaging,
+    downsample_with_max_pooling,
+    ndzoom,
+    striding,
+    # countless_if,
+    # counting,
+  ]
+  formats = {
+    1: 'L',
+    3: 'RGB',
+    4: 'RGBA'
+  }
+  if not os.path.exists('./results'):
+    os.mkdir('./results')
+  N = 500
+  img_size = float(img.width * img.height) / 1024.0 / 1024.0
+  print("N = %d, %dx%d (%.2f MPx) %d chan, %s" % (N, img.width, img.height, img_size, n_channels, filename))
+  print("Algorithm\tMPx/sec\tMB/sec\tSec")
+  for fn in methods:
+    print(fn.__name__, end='')
+    sys.stdout.flush()
+    start = time.time()
+    # tqdm is here to show you what's going on the first time you run it.
+    # Feel free to remove it to get slightly more accurate timing results.
+    for _ in tqdm(range(N), desc=fn.__name__, disable=True):
+      result = fn(data)
+    end = time.time()
+    print("\r", end='')
+    total_time = (end - start)
+    mpx = N * img_size / total_time
+    mbytes = N * img_size * n_channels / total_time
+    # Output in tab separated format to enable copy-paste into excel/numbers
+    print("%s\t%.3f\t%.3f\t%.2f" % (fn.__name__, mpx, mbytes, total_time))
+    outimg = Image.fromarray(np.squeeze(result), formats[n_channels])
+    outimg.save('./results/{}.png'.format(fn.__name__, "PNG"))
+if __name__ == '__main__':
+  benchmark()
+# Example results:
+# N = 5, 1024x1024 (1.00 MPx) 1 chan, images/gray_segmentation.png
+# Function                        MPx/sec   MB/sec     Sec
+# simplest_countless              752.855   752.855    0.01
+# quick_countless                 920.328   920.328    0.01
+# zero_corrected_countless        534.143   534.143    0.01
+# countless                       644.247   644.247    0.01
+# downsample_with_averaging       372.575   372.575    0.01
+# downsample_with_max_pooling     974.060   974.060    0.01
+# ndzoom                          137.517   137.517    0.04
+# striding                      38550.588 38550.588    0.00
+# countless_if                      4.377     4.377    1.14
+# counting                          0.117     0.117   42.85
+# Run without non-numpy implementations:
+# N = 2000, 1024x1024 (1.00 MPx) 1 chan, images/gray_segmentation.png
+# Algorithm                       MPx/sec   MB/sec     Sec
+# simplest_countless              800.522   800.522    2.50
+# quick_countless                 945.420   945.420    2.12
+# quickest_countless              947.256   947.256    2.11
+# stippled_countless              544.049   544.049    3.68
+# zero_corrected_countless        575.310   575.310    3.48
+# countless                       646.684   646.684    3.09
+# downsample_with_averaging       385.132   385.132    5.19
+# downsample_with_max_poolin      988.361   988.361    2.02
+# ndzoom                          163.104   163.104   12.26
+# striding                      81589.340 81589.340    0.02

DH-AISP/2/saicinpainting/evaluation/masks/countless/countless3d.py ADDED Viewed

	@@ -0,0 +1,356 @@

+from six.moves import range
+from PIL import Image
+import numpy as np
+import io
+import time
+import math
+import random
+import sys
+from collections import defaultdict
+from copy import deepcopy
+from itertools import combinations
+from functools import reduce
+from tqdm import tqdm
+from memory_profiler import profile
+def countless5(a,b,c,d,e):
+  """First stage of generalizing from countless2d.
+  You have five slots: A, B, C, D, E
+  You can decide if something is the winner by first checking for
+  matches of three, then matches of two, then picking just one if
+  the other two tries fail. In countless2d, you just check for matches
+  of two and then pick one of them otherwise.
+  Unfortunately, you need to check ABC, ABD, ABE, BCD, BDE, & CDE.
+  Then you need to check AB, AC, AD, BC, BD
+  We skip checking E because if none of these match, we pick E. We can
+  skip checking AE, BE, CE, DE since if any of those match, E is our boy
+  so it's redundant.
+  So countless grows cominatorially in complexity.
+  """
+  sections = [ a,b,c,d,e ]
+  p2 = lambda q,r: q * (q == r) # q if p == q else 0
+  p3 = lambda q,r,s: q * ( (q == r) & (r == s) ) # q if q == r == s else 0
+  lor = lambda x,y: x + (x == 0) * y
+  results3 = ( p3(x,y,z) for x,y,z in combinations(sections, 3) )
+  results3 = reduce(lor, results3)
+  results2 = ( p2(x,y) for x,y in combinations(sections[:-1], 2) )
+  results2 = reduce(lor, results2)
+  return reduce(lor, (results3, results2, e))
+def countless8(a,b,c,d,e,f,g,h):
+  """Extend countless5 to countless8. Same deal, except we also
+    need to check for matches of length 4."""
+  sections = [ a, b, c, d, e, f, g, h ]
+  p2 = lambda q,r: q * (q == r)
+  p3 = lambda q,r,s: q * ( (q == r) & (r == s) )
+  p4 = lambda p,q,r,s: p * ( (p == q) & (q == r) & (r == s) )
+  lor = lambda x,y: x + (x == 0) * y
+  results4 = ( p4(x,y,z,w) for x,y,z,w in combinations(sections, 4) )
+  results4 = reduce(lor, results4)
+  results3 = ( p3(x,y,z) for x,y,z in combinations(sections, 3) )
+  results3 = reduce(lor, results3)
+  # We can always use our shortcut of omitting the last element
+  # for N choose 2
+  results2 = ( p2(x,y) for x,y in combinations(sections[:-1], 2) )
+  results2 = reduce(lor, results2)
+  return reduce(lor, [ results4, results3, results2, h ])
+def dynamic_countless3d(data):
+  """countless8 + dynamic programming. ~2x faster"""
+  sections = []
+  # shift zeros up one so they don't interfere with bitwise operators
+  # we'll shift down at the end
+  data += 1
+  # This loop splits the 2D array apart into four arrays that are
+  # all the result of striding by 2 and offset by (0,0), (0,1), (1,0),
+  # and (1,1) representing the A, B, C, and D positions from Figure 1.
+  factor = (2,2,2)
+  for offset in np.ndindex(factor):
+    part = data[tuple(np.s_[o::f] for o, f in zip(offset, factor))]
+    sections.append(part)
+  pick = lambda a,b: a * (a == b)
+  lor = lambda x,y: x + (x == 0) * y
+  subproblems2 = {}
+  results2 = None
+  for x,y in combinations(range(7), 2):
+    res = pick(sections[x], sections[y])
+    subproblems2[(x,y)] = res
+    if results2 is not None:
+      results2 += (results2 == 0) * res
+    else:
+      results2 = res
+  subproblems3 = {}
+  results3 = None
+  for x,y,z in combinations(range(8), 3):
+    res = pick(subproblems2[(x,y)], sections[z])
+    if z != 7:
+      subproblems3[(x,y,z)] = res
+    if results3 is not None:
+      results3 += (results3 == 0) * res
+    else:
+      results3 = res
+  results3 = reduce(lor, (results3, results2, sections[-1]))
+  # free memory
+  results2 = None
+  subproblems2 = None
+  res = None
+  results4 = ( pick(subproblems3[(x,y,z)], sections[w]) for x,y,z,w in combinations(range(8), 4) )
+  results4 = reduce(lor, results4)
+  subproblems3 = None # free memory
+  final_result = lor(results4, results3) - 1
+  data -= 1
+  return final_result
+def countless3d(data):
+  """Now write countless8 in such a way that it could be used
+  to process an image."""
+  sections = []
+  # shift zeros up one so they don't interfere with bitwise operators
+  # we'll shift down at the end
+  data += 1
+  # This loop splits the 2D array apart into four arrays that are
+  # all the result of striding by 2 and offset by (0,0), (0,1), (1,0),
+  # and (1,1) representing the A, B, C, and D positions from Figure 1.
+  factor = (2,2,2)
+  for offset in np.ndindex(factor):
+    part = data[tuple(np.s_[o::f] for o, f in zip(offset, factor))]
+    sections.append(part)
+  p2 = lambda q,r: q * (q == r)
+  p3 = lambda q,r,s: q * ( (q == r) & (r == s) )
+  p4 = lambda p,q,r,s: p * ( (p == q) & (q == r) & (r == s) )
+  lor = lambda x,y: x + (x == 0) * y
+  results4 = ( p4(x,y,z,w) for x,y,z,w in combinations(sections, 4)  )
+  results4 = reduce(lor, results4)
+  results3 = ( p3(x,y,z) for x,y,z in combinations(sections, 3)  )
+  results3 = reduce(lor, results3)
+  results2 = ( p2(x,y) for x,y in combinations(sections[:-1], 2)  )
+  results2 = reduce(lor, results2)
+  final_result = reduce(lor, (results4, results3, results2, sections[-1])) - 1
+  data -= 1
+  return final_result
+def countless_generalized(data, factor):
+  assert len(data.shape) == len(factor)
+  sections = []
+  mode_of = reduce(lambda x,y: x * y, factor)
+  majority = int(math.ceil(float(mode_of) / 2))
+  data += 1
+  # This loop splits the 2D array apart into four arrays that are
+  # all the result of striding by 2 and offset by (0,0), (0,1), (1,0),
+  # and (1,1) representing the A, B, C, and D positions from Figure 1.
+  for offset in np.ndindex(factor):
+    part = data[tuple(np.s_[o::f] for o, f in zip(offset, factor))]
+    sections.append(part)
+  def pick(elements):
+    eq = ( elements[i] == elements[i+1] for i in range(len(elements) - 1) )
+    anded = reduce(lambda p,q: p & q, eq)
+    return elements[0] * anded
+  def logical_or(x,y):
+    return x + (x == 0) * y
+  result = ( pick(combo) for combo in combinations(sections, majority) )
+  result = reduce(logical_or, result)
+  for i in range(majority - 1, 3-1, -1): # 3-1 b/c of exclusive bounds
+    partial_result = ( pick(combo) for combo in combinations(sections, i) )
+    partial_result = reduce(logical_or, partial_result)
+    result = logical_or(result, partial_result)
+  partial_result = ( pick(combo) for combo in combinations(sections[:-1], 2) )
+  partial_result = reduce(logical_or, partial_result)
+  result = logical_or(result, partial_result)
+  result = logical_or(result, sections[-1]) - 1
+  data -= 1
+  return result
+def dynamic_countless_generalized(data, factor):
+  assert len(data.shape) == len(factor)
+  sections = []
+  mode_of = reduce(lambda x,y: x * y, factor)
+  majority = int(math.ceil(float(mode_of) / 2))
+  data += 1 # offset from zero
+  # This loop splits the 2D array apart into four arrays that are
+  # all the result of striding by 2 and offset by (0,0), (0,1), (1,0),
+  # and (1,1) representing the A, B, C, and D positions from Figure 1.
+  for offset in np.ndindex(factor):
+    part = data[tuple(np.s_[o::f] for o, f in zip(offset, factor))]
+    sections.append(part)
+  pick = lambda a,b: a * (a == b)
+  lor = lambda x,y: x + (x == 0) * y # logical or
+  subproblems = [ {}, {} ]
+  results2 = None
+  for x,y in combinations(range(len(sections) - 1), 2):
+    res = pick(sections[x], sections[y])
+    subproblems[0][(x,y)] = res
+    if results2 is not None:
+      results2 = lor(results2, res)
+    else:
+      results2 = res
+  results = [ results2 ]
+  for r in range(3, majority+1):
+    r_results = None
+    for combo in combinations(range(len(sections)), r):
+      res = pick(subproblems[0][combo[:-1]], sections[combo[-1]])
+      if combo[-1] != len(sections) - 1:
+        subproblems[1][combo] = res
+      if r_results is not None:
+        r_results = lor(r_results, res)
+      else:
+        r_results = res
+    results.append(r_results)
+    subproblems[0] = subproblems[1]
+    subproblems[1] = {}
+  results.reverse()
+  final_result = lor(reduce(lor, results), sections[-1]) - 1
+  data -= 1
+  return final_result
+def downsample_with_averaging(array):
+  """
+  Downsample x by factor using averaging.
+  @return: The downsampled array, of the same type as x.
+  """
+  factor = (2,2,2)
+  if np.array_equal(factor[:3], np.array([1,1,1])):
+    return array
+  output_shape = tuple(int(math.ceil(s / f)) for s, f in zip(array.shape, factor))
+  temp = np.zeros(output_shape, float)
+  counts = np.zeros(output_shape, np.int)
+  for offset in np.ndindex(factor):
+      part = array[tuple(np.s_[o::f] for o, f in zip(offset, factor))]
+      indexing_expr = tuple(np.s_[:s] for s in part.shape)
+      temp[indexing_expr] += part
+      counts[indexing_expr] += 1
+  return np.cast[array.dtype](temp / counts)
+def downsample_with_max_pooling(array):
+  factor = (2,2,2)
+  sections = []
+  for offset in np.ndindex(factor):
+    part = array[tuple(np.s_[o::f] for o, f in zip(offset, factor))]
+    sections.append(part)
+  output = sections[0].copy()
+  for section in sections[1:]:
+    np.maximum(output, section, output)
+  return output
+def striding(array):
+  """Downsample x by factor using striding.
+  @return: The downsampled array, of the same type as x.
+  """
+  factor = (2,2,2)
+  if np.all(np.array(factor, int) == 1):
+    return array
+  return array[tuple(np.s_[::f] for f in factor)]
+def benchmark():
+  def countless3d_generalized(img):
+    return countless_generalized(img, (2,8,1))
+  def countless3d_dynamic_generalized(img):
+    return dynamic_countless_generalized(img, (8,8,1))
+  methods = [
+    # countless3d,
+    # dynamic_countless3d,
+    countless3d_generalized,
+    # countless3d_dynamic_generalized,
+    # striding,
+    # downsample_with_averaging,
+    # downsample_with_max_pooling
+  ]
+  data = np.zeros(shape=(16**2, 16**2, 16**2), dtype=np.uint8) + 1
+  N = 5
+  print('Algorithm\tMPx\tMB/sec\tSec\tN=%d' % N)
+  for fn in methods:
+    start = time.time()
+    for _ in range(N):
+      result = fn(data)
+    end = time.time()
+    total_time = (end - start)
+    mpx = N * float(data.shape[0] * data.shape[1] * data.shape[2]) / total_time / 1024.0 / 1024.0
+    mbytes = mpx * np.dtype(data.dtype).itemsize
+    # Output in tab separated format to enable copy-paste into excel/numbers
+    print("%s\t%.3f\t%.3f\t%.2f" % (fn.__name__, mpx, mbytes, total_time))
+if __name__ == '__main__':
+  benchmark()
+# Algorithm MPx MB/sec  Sec N=5
+# countless3d 10.564  10.564  60.58
+# dynamic_countless3d 22.717  22.717  28.17
+# countless3d_generalized 9.702 9.702 65.96
+# countless3d_dynamic_generalized 22.720  22.720  28.17
+# striding  253360.506  253360.506  0.00
+# downsample_with_averaging 224.098 224.098 2.86
+# downsample_with_max_pooling 690.474 690.474 0.93

DH-AISP/2/saicinpainting/evaluation/masks/countless/images/gcim.jpg ADDED Viewed

Git LFS Details

SHA256: 2b1ade0a290a0a79aceb49a170d085e28e5d2ea1face4fcd522d39a279d3fb4d
Pointer size: 132 Bytes
Size of remote file: 2.58 MB

DH-AISP/2/saicinpainting/evaluation/masks/countless/images/gray_segmentation.png ADDED Viewed

DH-AISP/2/saicinpainting/evaluation/masks/countless/images/segmentation.png ADDED Viewed

DH-AISP/2/saicinpainting/evaluation/masks/countless/images/sparse.png ADDED Viewed

DH-AISP/2/saicinpainting/evaluation/masks/countless/memprof/countless2d_gcim_N_1000.png ADDED Viewed

DH-AISP/2/saicinpainting/evaluation/masks/countless/memprof/countless2d_quick_gcim_N_1000.png ADDED Viewed

DH-AISP/2/saicinpainting/evaluation/masks/countless/memprof/countless3d.png ADDED Viewed