# ============================================================= # This file contains helper functions and classes # # Mushfiqul Alam, 2017 # # Report bugs/suggestions: # mushfiqulalam@gmail.com # ============================================================= import png import numpy as np import scipy.misc import math from scipy import signal # for convolutions from scipy import ndimage # for n-dimensional convolution from scipy import interpolate # ============================================================= # function: imsave # save image in image formats # data: is the image data # output_dtype: output data type # input_dtype: input data type # is_scale: is scaling needed to go from input data type to output data type # ============================================================= def imsave(data, output_name, output_dtype="uint8", input_dtype="uint8", is_scale=False): dtype_dictionary = {"uint8" : np.uint8(data), "uint16" : np.uint16(data),\ "uint32" : np.uint32(data), "uint64" : np.uint64(data),\ "int8" : np.int8(data), "int16" : np.int16(data),\ "int32" : np.int32(data), "int64" : np.int64(data),\ "float16" : np.float16(data), "float32" : np.float32(data),\ "float64" : np.float64(data)} min_val_dictionary = {"uint8" : 0, "uint16" : 0,\ "uint32" : 0, "uint64" : 0,\ "int8" : -128, "int16" : -32768,\ "int32" : -2147483648, "int64" : -9223372036854775808} max_val_dictionary = {"uint8" : 255, "uint16" : 65535,\ "uint32" : 4294967295, "uint64" : 18446744073709551615,\ "int8" : 127, "int16" : 32767,\ "int32" : 2147483647, "int64" : 9223372036854775807} # scale the data in case scaling is necessary to go from input_dtype # to output_dtype if (is_scale): # convert data into float32 data = np.float32(data) # Get minimum and maximum value of the input and output data types in_min = min_val_dictionary[input_dtype] in_max = max_val_dictionary[input_dtype] out_min = min_val_dictionary[output_dtype] out_max = max_val_dictionary[output_dtype] # clip the input data in the input_dtype range data = np.clip(data, in_min, in_max) # scale the data data = out_min + (data - in_min) * (out_max - out_min) / (in_max - in_min) # clip scaled data in output_dtype range data = np.clip(data, out_min, out_max) # convert the data into the output_dtype data = dtype_dictionary[output_dtype] # output image type: raw, png, jpeg output_file_type = output_name[-3:] # save files depending on output_file_type if (output_file_type == "raw"): pass # will be added later return elif (output_file_type == "png"): # png will only save uint8 or uint16 if ((output_dtype == "uint16") or (output_dtype == "uint8")): if (output_dtype == "uint16"): output_bitdepth = 16 elif (output_dtype == "uint8"): output_bitdepth = 8 pass else: print("For png output, output_dtype must be uint8 or uint16") return with open(output_name, "wb") as f: # rgb image if (np.ndim(data) == 3): # create the png writer writer = png.Writer(width=data.shape[1], height=data.shape[0],\ bitdepth = output_bitdepth) # convert data to the python lists expected by the png Writer data2list = data.reshape(-1, data.shape[1]*data.shape[2]).tolist() # write in the file writer.write(f, data2list) # greyscale image elif (np.ndim(data) == 2): # create the png writer writer = png.Writer(width=data.shape[1], height=data.shape[0],\ bitdepth = output_bitdepth,\ greyscale = True) # convert data to the python lists expected by the png Writer data2list = data.tolist() # write in the file writer.write(f, data2list) elif (output_file_type == "jpg"): pass # will be added later return else: print("output_name should contain extensions of .raw, .png, or .jpg") return # ============================================================= # class: helpers # a class of useful helper functions # ============================================================= class helpers: def __init__(self, data=None, name="helper"): self.data = np.float32(data) self.name = name def get_width_height(self): #------------------------------------------------------ # returns width, height # We assume data be in height x width x number of channel x frames format #------------------------------------------------------ if (np.ndim(self.data) > 1): size = np.shape(self.data) width = size[1] height = size[0] return width, height else: print("Error! data dimension must be 2 or greater") def bayer_channel_separation(self, pattern): #------------------------------------------------------ # function: bayer_channel_separation # Objective: Outputs four channels of the bayer pattern # Input: # data: the bayer data # pattern: rggb, grbg, gbrg, or bggr # Output: # R, G1, G2, B (Quarter resolution images) #------------------------------------------------------ if (pattern == "rggb"): R = self.data[::2, ::2] G1 = self.data[::2, 1::2] G2 = self.data[1::2, ::2] B = self.data[1::2, 1::2] elif (pattern == "grbg"): G1 = self.data[::2, ::2] R = self.data[::2, 1::2] B = self.data[1::2, ::2] G2 = self.data[1::2, 1::2] elif (pattern == "gbrg"): G1 = self.data[::2, ::2] B = self.data[::2, 1::2] R = self.data[1::2, ::2] G2 = self.data[1::2, 1::2] elif (pattern == "bggr"): B = self.data[::2, ::2] G1 = self.data[::2, 1::2] G2 = self.data[1::2, ::2] R = self.data[1::2, 1::2] else: print("pattern must be one of these: rggb, grbg, gbrg, bggr") return return R, G1, G2, B def bayer_channel_integration(self, R, G1, G2, B, pattern): #------------------------------------------------------ # function: bayer_channel_integration # Objective: combine data into a raw according to pattern # Input: # R, G1, G2, B: the four separate channels (Quarter resolution) # pattern: rggb, grbg, gbrg, or bggr # Output: # data (Full resolution image) #------------------------------------------------------ size = np.shape(R) data = np.empty((size[0]*2, size[1]*2), dtype=np.float32) if (pattern == "rggb"): data[::2, ::2] = R data[::2, 1::2] = G1 data[1::2, ::2] = G2 data[1::2, 1::2] = B elif (pattern == "grbg"): data[::2, ::2] = G1 data[::2, 1::2] = R data[1::2, ::2] = B data[1::2, 1::2] = G2 elif (pattern == "gbrg"): data[::2, ::2] = G1 data[::2, 1::2] = B data[1::2, ::2] = R data[1::2, 1::2] = G2 elif (pattern == "bggr"): data[::2, ::2] = B data[::2, 1::2] = G1 data[1::2, ::2] = G2 data[1::2, 1::2] = R else: print("pattern must be one of these: rggb, grbg, gbrg, bggr") return return data def shuffle_bayer_pattern(self, input_pattern, output_pattern): #------------------------------------------------------ # function: shuffle_bayer_pattern # convert from one bayer pattern to another #------------------------------------------------------ # Get separate channels R, G1, G2, B = self.bayer_channel_separation(input_pattern) # return integrated data return self.bayer_channel_integration(R, G1, G2, B, output_pattern) def sigma_filter_helper(self, neighborhood_size, sigma): if (neighborhood_size % 2) == 0: print("Error! neighborhood_size must be odd for example 3, 5, 7") return # number of pixels to be padded at the borders no_of_pixel_pad = math.floor(neighborhood_size / 2.) # get width, height width, height = self.get_width_height() # pad pixels at the borders img = np.pad(self.data, \ (no_of_pixel_pad, no_of_pixel_pad),\ 'reflect') # reflect would not repeat the border value # allocate memory for output output = np.empty((height, width), dtype=np.float32) for i in range(no_of_pixel_pad, height + no_of_pixel_pad): for j in range(no_of_pixel_pad, width + no_of_pixel_pad): # save the middle pixel value mid_pixel_val = img[i, j] # extract the neighborhood neighborhood = img[i - no_of_pixel_pad : i + no_of_pixel_pad+1,\ j - no_of_pixel_pad : j + no_of_pixel_pad+1] lower_range = mid_pixel_val - sigma upper_range = mid_pixel_val + sigma temp = 0. ctr = 0 for ni in range (0, neighborhood_size): for nj in range (0, neighborhood_size): if (neighborhood[ni, nj] > lower_range) and (neighborhood[ni, nj] < upper_range): temp += neighborhood[ni, nj] ctr += 1 output[i - no_of_pixel_pad, j - no_of_pixel_pad] = temp / ctr return output def bilinear_interpolation(self, x, y): width, height = self.get_width_height() x0 = np.floor(x).astype(int) x1 = x0 + 1 y0 = np.floor(y).astype(int) y1 = y0 + 1 x0 = np.clip(x0, 0, width-1) x1 = np.clip(x1, 0, width-1) y0 = np.clip(y0, 0, height-1) y1 = np.clip(y1, 0, height-1) Ia = self.data[y0, x0] Ib = self.data[y1, x0] Ic = self.data[y0, x1] Id = self.data[y1, x1] x = np.clip(x, 0, width-1) y = np.clip(y, 0, height-1) wa = (x1 - x) * (y1 - y) wb = (x1 - x) * (y - y0) wc = (x - x0) * (y1 - y) wd = (x - x0) * (y - y0) return wa * Ia + wb * Ib + wc * Ic + wd * Id def degamma_srgb(self, clip_range=[0, 65535]): # bring data in range 0 to 1 data = np.clip(self.data, clip_range[0], clip_range[1]) data = np.divide(data, clip_range[1]) data = np.asarray(data) mask = data > 0.04045 # basically, if data[x, y, c] > 0.04045, data[x, y, c] = ( (data[x, y, c] + 0.055) / 1.055 ) ^ 2.4 # else, data[x, y, c] = data[x, y, c] / 12.92 data[mask] += 0.055 data[mask] /= 1.055 data[mask] **= 2.4 data[np.invert(mask)] /= 12.92 # rescale return np.clip(data * clip_range[1], clip_range[0], clip_range[1]) def gamma_srgb(self, clip_range=[0, 65535]): # bring data in range 0 to 1 data = np.clip(self.data, clip_range[0], clip_range[1]) data = np.divide(data, clip_range[1]) data = np.asarray(data) mask = data > 0.0031308 # basically, if data[x, y, c] > 0.0031308, data[x, y, c] = 1.055 * ( var_R(i, j) ^ ( 1 / 2.4 ) ) - 0.055 # else, data[x, y, c] = data[x, y, c] * 12.92 data[mask] **= 0.4167 data[mask] *= 1.055 data[mask] -= 0.055 data[np.invert(mask)] *= 12.92 # rescale return np.clip(data * clip_range[1], clip_range[0], clip_range[1]) def degamma_adobe_rgb_1998(self, clip_range=[0, 65535]): # bring data in range 0 to 1 data = np.clip(self.data, clip_range[0], clip_range[1]) data = np.divide(data, clip_range[1]) data = np.power(data, 2.2) # originally raised to 2.19921875 # rescale return np.clip(data * clip_range[1], clip_range[0], clip_range[1]) def gamma_adobe_rgb_1998(self, clip_range=[0, 65535]): # bring data in range 0 to 1 data = np.clip(self.data, clip_range[0], clip_range[1]) data = np.divide(data, clip_range[1]) data = np.power(data, 0.4545) # rescale return np.clip(data * clip_range[1], clip_range[0], clip_range[1]) def get_xyz_reference(self, cie_version="1931", illuminant="d65"): if (cie_version == "1931"): xyz_reference_dictionary = {"A" : [109.850, 100.0, 35.585],\ "B" : [99.0927, 100.0, 85.313],\ "C" : [98.074, 100.0, 118.232],\ "d50" : [96.422, 100.0, 82.521],\ "d55" : [95.682, 100.0, 92.149],\ "d65" : [95.047, 100.0, 108.883],\ "d75" : [94.972, 100.0, 122.638],\ "E" : [100.0, 100.0, 100.0],\ "F1" : [92.834, 100.0, 103.665],\ "F2" : [99.187, 100.0, 67.395],\ "F3" : [103.754, 100.0, 49.861],\ "F4" : [109.147, 100.0, 38.813],\ "F5" : [90.872, 100.0, 98.723],\ "F6" : [97.309, 100.0, 60.191],\ "F7" : [95.044, 100.0, 108.755],\ "F8" : [96.413, 100.0, 82.333],\ "F9" : [100.365, 100.0, 67.868],\ "F10" : [96.174, 100.0, 81.712],\ "F11" : [100.966, 100.0, 64.370],\ "F12" : [108.046, 100.0, 39.228]} elif (cie_version == "1964"): xyz_reference_dictionary = {"A" : [111.144, 100.0, 35.200],\ "B" : [99.178, 100.0, 84.3493],\ "C" : [97.285, 100.0, 116.145],\ "D50" : [96.720, 100.0, 81.427],\ "D55" : [95.799, 100.0, 90.926],\ "D65" : [94.811, 100.0, 107.304],\ "D75" : [94.416, 100.0, 120.641],\ "E" : [100.0, 100.0, 100.0],\ "F1" : [94.791, 100.0, 103.191],\ "F2" : [103.280, 100.0, 69.026],\ "F3" : [108.968, 100.0, 51.965],\ "F4" : [114.961, 100.0, 40.963],\ "F5" : [93.369, 100.0, 98.636],\ "F6" : [102.148, 100.0, 62.074],\ "F7" : [95.792, 100.0, 107.687],\ "F8" : [97.115, 100.0, 81.135],\ "F9" : [102.116, 100.0, 67.826],\ "F10" : [99.001, 100.0, 83.134],\ "F11" : [103.866, 100.0, 65.627],\ "F12" : [111.428, 100.0, 40.353]} else: print("Warning! cie_version must be 1931 or 1964.") return return np.divide(xyz_reference_dictionary[illuminant], 100.0) def sobel_prewitt_direction_label(self, gradient_magnitude, theta, threshold=0): direction_label = np.zeros(np.shape(gradient_magnitude), dtype=np.float32) theta = np.asarray(theta) # vertical mask = ((theta >= -22.5) & (theta <= 22.5)) direction_label[mask] = 3. # +45 degree mask = ((theta > 22.5) & (theta <= 67.5)) direction_label[mask] = 2. # -45 degree mask = ((theta < -22.5) & (theta >= -67.5)) direction_label[mask] = 4. # horizontal mask = ((theta > 67.5) & (theta <= 90.)) | ((theta < -67.5) & (theta >= -90.)) direction_label[mask] = 1. gradient_magnitude = np.asarray(gradient_magnitude) mask = gradient_magnitude < threshold direction_label[mask] = 0. return direction_label def edge_wise_median(self, kernel_size, edge_location): # pad two pixels at the border no_of_pixel_pad = math.floor(kernel_size / 2) # number of pixels to pad data = self.data data = np.pad(data, \ (no_of_pixel_pad, no_of_pixel_pad),\ 'reflect') # reflect would not repeat the border value edge_location = np.pad(edge_location,\ (no_of_pixel_pad, no_of_pixel_pad),\ 'reflect') # reflect would not repeat the border value width, height = self.get_width_height() output = np.empty((height, width), dtype=np.float32) for i in range(no_of_pixel_pad, height + no_of_pixel_pad): for j in range(no_of_pixel_pad, width + no_of_pixel_pad): if (edge_location[i, j] == 1): output[i - no_of_pixel_pad, j - no_of_pixel_pad] = \ np.median(data[i - no_of_pixel_pad : i + no_of_pixel_pad + 1,\ j - no_of_pixel_pad : j + no_of_pixel_pad + 1]) elif (edge_location[i, j] == 0): output[i - no_of_pixel_pad, j - no_of_pixel_pad] = data[i, j] return output def nonuniform_quantization(self): output = np.zeros(np.shape(self.data), dtype=np.float32) min_val = np.min(self.data) max_val = np.max(self.data) mask = (self.data > (7./8.) * (max_val - min_val)) output[mask] = 3. mask = (self.data > (3./4.) * (max_val - min_val)) & (self.data <= (7./8.) * (max_val - min_val)) output[mask] = 2. mask = (self.data > (1./2.) * (max_val - min_val)) & (self.data <= (3./4.) * (max_val - min_val)) output[mask] = 1. return output def __str__(self): return self.name # ============================================================= # function: distance_euclid # returns Euclidean distance between two points # ============================================================= def distance_euclid(point1, point2): return math.sqrt((point1[0] - point2[0])**2 + (point1[1]-point2[1])**2) # ============================================================= # class: special_functions # pass input through special functions # ============================================================= class special_function: def __init__(self, data, name="special function"): self.data = np.float32(data) self.name = name def soft_coring(self, slope, tau_threshold, gamma_speed): # Usage: Used in the unsharp masking sharpening Process # Input: # slope: controls the boost. # the amount of sharpening, higher slope # means more aggresssive sharpening # # tau_threshold: controls the amount of coring. # threshold value till which the image is # not sharpened. The lower the value of # tau_threshold the more frequencies # goes through the sharpening process # # gamma_speed: controls the speed of convergence to the slope # smaller value gives a little bit more # sharpened image, this may be a fine tuner return slope * self.data * ( 1. - np.exp(-((np.abs(self.data / tau_threshold))**gamma_speed))) def distortion_function(self, correction_type="barrel-1", strength=0.1): if (correction_type == "pincushion-1"): return np.divide(self.data, 1. + strength * self.data) elif (correction_type == "pincushion-2"): return np.divide(self.data, 1. + strength * np.power(self.data, 2)) elif (correction_type == "barrel-1"): return np.multiply(self.data, 1. + strength * self.data) elif (correction_type == "barrel-2"): return np.multiply(self.data, 1. + strength * np.power(self.data, 2)) else: print("Warning! Unknown correction_type.") return def bilateral_filter(self, edge): # bilateral filter based upon the work of # Jiawen Chen, Sylvain Paris, and Fredo Durand, 2007 work # note: if edge data is not provided, image is served as edge # this is called normal bilateral filter # if edge data is provided, then it is called cross or joint # bilateral filter # get width and height of the image width, height = helpers(self.data).get_width_height() # sigma_spatial sigma_spatial = min(height, width) / 16. # calculate edge_delta edge_min = np.min(edge) edge_max = np.max(edge) edge_delta = edge_max - edge_min # sigma_range and sampling_range sigma_range = 0.1 * edge_delta sampling_range = sigma_range sampling_spatial = sigma_spatial # derived_sigma_spatial and derived_sigma_range derived_sigma_spatial = sigma_spatial / sampling_spatial derived_sigma_range = sigma_range / sampling_range # paddings padding_xy = np.floor(2. * derived_sigma_spatial) + 1. padding_z = np.floor(2. * derived_sigma_range) + 1. # downsamples downsample_width = np.uint16(np.floor((width - 1.) / sampling_spatial) + 1. + 2. * padding_xy) downsample_height = np.uint16(np.floor((height - 1.) / sampling_spatial) + 1. + 2. * padding_xy) downsample_depth = np.uint16(np.floor(edge_delta / sampling_range) + 1. + 2. * padding_z) grid_data = np.zeros((downsample_height, downsample_width, downsample_depth)) grid_weight = np.zeros((downsample_height, downsample_width, downsample_depth)) jj, ii = np.meshgrid(np.arange(0, width, 1),\ np.arange(0, height, 1)) di = np.uint16(np.round( ii / sampling_spatial ) + padding_xy + 1.) dj = np.uint16(np.round( jj / sampling_spatial ) + padding_xy + 1.) dz = np.uint16(np.round( (edge - edge_min) / sampling_range ) + padding_z + 1.) for i in range(0, height): for j in range(0, width): data_z = self.data[i, j] if not np.isnan(data_z): dik = di[i, j] djk = dj[i, j] dzk = dz[i, j] grid_data[dik, djk, dzk] = grid_data[dik, djk, dzk] + data_z grid_weight[dik, djk, dzk] = grid_weight[dik, djk, dzk] + 1. kernel_width = 2. * derived_sigma_spatial + 1. kernel_height = kernel_width kernel_depth = 2. * derived_sigma_range + 1. half_kernel_width = np.floor(kernel_width / 2.) half_kernel_height = np.floor(kernel_height / 2.) half_kernel_depth = np.floor(kernel_depth / 2.) grid_x, grid_y, grid_z = np.meshgrid(np.arange(0, kernel_width, 1),\ np.arange(0, kernel_height, 1),\ np.arange(0, kernel_depth, 1)) grid_x = grid_x - half_kernel_width grid_y = grid_y - half_kernel_height grid_z = grid_z - half_kernel_depth grid_r_squared = ( ( np.multiply(grid_x, grid_x) + \ np.multiply(grid_y, grid_y) ) / np.multiply(derived_sigma_spatial, derived_sigma_spatial) ) + \ ( np.multiply(grid_z, grid_z) / np.multiply(derived_sigma_range, derived_sigma_range) ) kernel = np.exp(-0.5 * grid_r_squared) blurred_grid_data = ndimage.convolve(grid_data, kernel, mode='reflect') blurred_grid_weight = ndimage.convolve(grid_weight, kernel, mode='reflect') # divide blurred_grid_weight = np.asarray(blurred_grid_weight) mask = blurred_grid_weight == 0 blurred_grid_weight[mask] = -2. normalized_blurred_grid = np.divide(blurred_grid_data, blurred_grid_weight) mask = blurred_grid_weight < -1 normalized_blurred_grid[mask] = 0. blurred_grid_weight[mask] = 0. # upsample jj, ii = np.meshgrid(np.arange(0, width, 1),\ np.arange(0, height, 1)) di = (ii / sampling_spatial) + padding_xy + 1. dj = (jj / sampling_spatial) + padding_xy + 1. dz = (edge - edge_min) / sampling_range + padding_z + 1. # arrange the input points n_i, n_j, n_z = np.shape(normalized_blurred_grid) points = (np.arange(0, n_i, 1), np.arange(0, n_j, 1), np.arange(0, n_z, 1)) # query points xi = (di, dj, dz) # multidimensional interpolation output = interpolate.interpn(points, normalized_blurred_grid, xi, method='linear') return output # ============================================================= # class: synthetic_image_generate # creates sysnthetic images for different purposes # ============================================================= class synthetic_image_generate: def __init__(self, width, height, name="synthetic_image"): self.name = name self.width = width self.height = height def create_lens_shading_correction_images(self, dark_current=0, flat_max=65535, flat_min=0, clip_range=[0, 65535]): # Objective: creates two images: # dark_current_image and flat_field_image dark_current_image = dark_current * np.ones((self.height, self.width), dtype=np.float32) flat_field_image = np.empty((self.height, self.width), dtype=np.float32) center_pixel_pos = [self.height/2, self.width/2] max_distance = distance_euclid(center_pixel_pos, [self.height, self.width]) for i in range(0, self.height): for j in range(0, self.width): flat_field_image[i, j] = (max_distance - distance_euclid(center_pixel_pos, [i, j])) / max_distance flat_field_image[i, j] = flat_min + flat_field_image[i, j] * (flat_max - flat_min) dark_current_image = np.clip(dark_current_image, clip_range[0], clip_range[1]) flat_field_image = np.clip(flat_field_image, clip_range[0], clip_range[1]) return dark_current_image, flat_field_image def create_zone_plate_image(self): pass def create_color_gradient_image(self): pass def create_random_noise_image(self, mean=0, standard_deviation=1, seed=0): # Creates normally distributed noisy image np.random.seed(seed) return np.random.normal(mean, standard_deviation, (self.height, self.width)) def create_noisy_image(self, data, mean=0, standard_deviation=1, seed=0, clip_range=[0, 65535]): # Adds normally distributed noise to the data return np.clip(data + self.create_random_noise_image(mean, standard_deviation, seed), clip_range[0], clip_range[1]) # ============================================================= # class: create_filter # creates different filters, generally 2D filters # ============================================================= class create_filter: def __init__(self, name="filter"): self.name = name def gaussian(self, kernel_size, sigma): # calculate which number to where the grid should be # remember that, kernel_size[0] is the width of the kernel # and kernel_size[1] is the height of the kernel temp = np.floor(np.float32(kernel_size) / 2.) # create the grid # example: if kernel_size = [5, 3], then: # x: array([[-2., -1., 0., 1., 2.], # [-2., -1., 0., 1., 2.], # [-2., -1., 0., 1., 2.]]) # y: array([[-1., -1., -1., -1., -1.], # [ 0., 0., 0., 0., 0.], # [ 1., 1., 1., 1., 1.]]) x, y = np.meshgrid(np.linspace(-temp[0], temp[0], kernel_size[0]),\ np.linspace(-temp[1], temp[1], kernel_size[1])) # Gaussian equation temp = np.exp( -(x**2 + y**2) / (2. * sigma**2) ) # make kernel sum equal to 1 return temp / np.sum(temp) def gaussian_separable(self, kernel_size, sigma): # calculate which number to where the grid should be # remember that, kernel_size[0] is the width of the kernel # and kernel_size[1] is the height of the kernel temp = np.floor(np.float32(kernel_size) / 2.) # create the horizontal kernel x = np.linspace(-temp[0], temp[0], kernel_size[0]) x = x.reshape((1, kernel_size[0])) # reshape to create row vector hx = np.exp(-x**2 / (2 * sigma**2)) hx = hx / np.sum(hx) # create the vertical kernel y = np.linspace(-temp[1], temp[1], kernel_size[1]) y = y.reshape((kernel_size[1], 1)) # reshape to create column vector hy = np.exp(-y**2 / (2 * sigma**2)) hy = hy / np.sum(hy) return hx, hy def sobel(self, kernel_size): # Returns the Sobel filter kernels Sx and Sy Sx = .25 * np.dot([[1.], [2.], [1.]], [[1., 0., -1.]]) if (kernel_size > 3): n = (np.floor((kernel_size - 5) / 2 + 1)).astype(int) for i in range(0, n): Sx = (1./16.) * signal.convolve2d(np.dot([[1.], [2.], [1.]], [[1., 2., 1.]]), Sx) Sy = np.transpose(Sx) return Sx, Sy def __str__(self): return self.name # ============================================================= # class: color_conversion # color conversion from one color space to another # ============================================================= class color_conversion: def __init__(self, data, name="color conversion"): self.data = np.float32(data) self.name = name def rgb2gray(self): return 0.299 * self.data[:, :, 0] +\ 0.587 * self.data[:, :, 1] +\ 0.114 * self.data[:, :, 2] def rgb2ycc(self, rule="bt601"): # map to select kr and kb kr_kb_dict = {"bt601" : [0.299, 0.114],\ "bt709" : [0.2126, 0.0722],\ "bt2020" : [0.2627, 0.0593]} kr = kr_kb_dict[rule][0] kb = kr_kb_dict[rule][1] kg = 1 - (kr + kb) output = np.empty(np.shape(self.data), dtype=np.float32) output[:, :, 0] = kr * self.data[:, :, 0] + \ kg * self.data[:, :, 1] + \ kb * self.data[:, :, 2] output[:, :, 1] = 0.5 * ((self.data[:, :, 2] - output[:, :, 0]) / (1 - kb)) output[:, :, 2] = 0.5 * ((self.data[:, :, 0] - output[:, :, 0]) / (1 - kr)) return output def ycc2rgb(self, rule="bt601"): # map to select kr and kb kr_kb_dict = {"bt601" : [0.299, 0.114],\ "bt709" : [0.2126, 0.0722],\ "bt2020" : [0.2627, 0.0593]} kr = kr_kb_dict[rule][0] kb = kr_kb_dict[rule][1] kg = 1 - (kr + kb) output = np.empty(np.shape(self.data), dtype=np.float32) output[:, :, 0] = 2. * self.data[:, :, 2] * (1 - kr) + self.data[:, :, 0] output[:, :, 2] = 2. * self.data[:, :, 1] * (1 - kb) + self.data[:, :, 0] output[:, :, 1] = (self.data[:, :, 0] - kr * output[:, :, 0] - kb * output[:, :, 2]) / kg return output def rgb2xyz(self, color_space="srgb", clip_range=[0, 65535]): # input rgb in range clip_range # output xyz is in range 0 to 1 if (color_space == "srgb"): # degamma / linearization data = helpers(self.data).degamma_srgb(clip_range) data = np.float32(data) data = np.divide(data, clip_range[1]) # matrix multiplication` output = np.empty(np.shape(self.data), dtype=np.float32) output[:, :, 0] = data[:, :, 0] * 0.4124 + data[:, :, 1] * 0.3576 + data[:, :, 2] * 0.1805 output[:, :, 1] = data[:, :, 0] * 0.2126 + data[:, :, 1] * 0.7152 + data[:, :, 2] * 0.0722 output[:, :, 2] = data[:, :, 0] * 0.0193 + data[:, :, 1] * 0.1192 + data[:, :, 2] * 0.9505 elif (color_space == "adobe-rgb-1998"): # degamma / linearization data = helpers(self.data).degamma_adobe_rgb_1998(clip_range) data = np.float32(data) data = np.divide(data, clip_range[1]) # matrix multiplication output = np.empty(np.shape(self.data), dtype=np.float32) output[:, :, 0] = data[:, :, 0] * 0.5767309 + data[:, :, 1] * 0.1855540 + data[:, :, 2] * 0.1881852 output[:, :, 1] = data[:, :, 0] * 0.2973769 + data[:, :, 1] * 0.6273491 + data[:, :, 2] * 0.0752741 output[:, :, 2] = data[:, :, 0] * 0.0270343 + data[:, :, 1] * 0.0706872 + data[:, :, 2] * 0.9911085 elif (color_space == "linear"): # matrix multiplication` output = np.empty(np.shape(self.data), dtype=np.float32) data = np.float32(self.data) data = np.divide(data, clip_range[1]) output[:, :, 0] = data[:, :, 0] * 0.4124 + data[:, :, 1] * 0.3576 + data[:, :, 2] * 0.1805 output[:, :, 1] = data[:, :, 0] * 0.2126 + data[:, :, 1] * 0.7152 + data[:, :, 2] * 0.0722 output[:, :, 2] = data[:, :, 0] * 0.0193 + data[:, :, 1] * 0.1192 + data[:, :, 2] * 0.9505 else: print("Warning! color_space must be srgb or adobe-rgb-1998.") return return output def xyz2rgb(self, color_space="srgb", clip_range=[0, 65535]): # input xyz is in range 0 to 1 # output rgb in clip_range # allocate space for output output = np.empty(np.shape(self.data), dtype=np.float32) if (color_space == "srgb"): # matrix multiplication output[:, :, 0] = self.data[:, :, 0] * 3.2406 + self.data[:, :, 1] * -1.5372 + self.data[:, :, 2] * -0.4986 output[:, :, 1] = self.data[:, :, 0] * -0.9689 + self.data[:, :, 1] * 1.8758 + self.data[:, :, 2] * 0.0415 output[:, :, 2] = self.data[:, :, 0] * 0.0557 + self.data[:, :, 1] * -0.2040 + self.data[:, :, 2] * 1.0570 # gamma to retain nonlinearity output = helpers(output * clip_range[1]).gamma_srgb(clip_range) elif (color_space == "adobe-rgb-1998"): # matrix multiplication output[:, :, 0] = self.data[:, :, 0] * 2.0413690 + self.data[:, :, 1] * -0.5649464 + self.data[:, :, 2] * -0.3446944 output[:, :, 1] = self.data[:, :, 0] * -0.9692660 + self.data[:, :, 1] * 1.8760108 + self.data[:, :, 2] * 0.0415560 output[:, :, 2] = self.data[:, :, 0] * 0.0134474 + self.data[:, :, 1] * -0.1183897 + self.data[:, :, 2] * 1.0154096 # gamma to retain nonlinearity output = helpers(output * clip_range[1]).gamma_adobe_rgb_1998(clip_range) elif (color_space == "linear"): # matrix multiplication output[:, :, 0] = self.data[:, :, 0] * 3.2406 + self.data[:, :, 1] * -1.5372 + self.data[:, :, 2] * -0.4986 output[:, :, 1] = self.data[:, :, 0] * -0.9689 + self.data[:, :, 1] * 1.8758 + self.data[:, :, 2] * 0.0415 output[:, :, 2] = self.data[:, :, 0] * 0.0557 + self.data[:, :, 1] * -0.2040 + self.data[:, :, 2] * 1.0570 # gamma to retain nonlinearity output = output * clip_range[1] else: print("Warning! color_space must be srgb or adobe-rgb-1998.") return return output def xyz2lab(self, cie_version="1931", illuminant="d65"): xyz_reference = helpers().get_xyz_reference(cie_version, illuminant) data = self.data data[:, :, 0] = data[:, :, 0] / xyz_reference[0] data[:, :, 1] = data[:, :, 1] / xyz_reference[1] data[:, :, 2] = data[:, :, 2] / xyz_reference[2] data = np.asarray(data) # if data[x, y, c] > 0.008856, data[x, y, c] = data[x, y, c] ^ (1/3) # else, data[x, y, c] = 7.787 * data[x, y, c] + 16/116 mask = data > 0.008856 data[mask] **= 1./3. data[np.invert(mask)] *= 7.787 data[np.invert(mask)] += 16./116. data = np.float32(data) output = np.empty(np.shape(self.data), dtype=np.float32) output[:, :, 0] = 116. * data[:, :, 1] - 16. output[:, :, 1] = 500. * (data[:, :, 0] - data[:, :, 1]) output[:, :, 2] = 200. * (data[:, :, 1] - data[:, :, 2]) return output def lab2xyz(self, cie_version="1931", illuminant="d65"): output = np.empty(np.shape(self.data), dtype=np.float32) output[:, :, 1] = (self.data[:, :, 0] + 16.) / 116. output[:, :, 0] = (self.data[:, :, 1] / 500.) + output[:, :, 1] output[:, :, 2] = output[:, :, 1] - (self.data[:, :, 2] / 200.) # if output[x, y, c] > 0.008856, output[x, y, c] ^ 3 # else, output[x, y, c] = ( output[x, y, c] - 16/116 ) / 7.787 output = np.asarray(output) mask = output > 0.008856 output[mask] **= 3. output[np.invert(mask)] -= 16/116 output[np.invert(mask)] /= 7.787 xyz_reference = helpers().get_xyz_reference(cie_version, illuminant) output = np.float32(output) output[:, :, 0] = output[:, :, 0] * xyz_reference[0] output[:, :, 1] = output[:, :, 1] * xyz_reference[1] output[:, :, 2] = output[:, :, 2] * xyz_reference[2] return output def lab2lch(self): output = np.empty(np.shape(self.data), dtype=np.float32) output[:, :, 0] = self.data[:, :, 0] # L transfers directly output[:, :, 1] = np.power(np.power(self.data[:, :, 1], 2) + np.power(self.data[:, :, 2], 2), 0.5) output[:, :, 2] = np.arctan2(self.data[:, :, 2], self.data[:, :, 1]) * 180 / np.pi return output def lch2lab(self): output = np.empty(np.shape(self.data), dtype=np.float32) output[:, :, 0] = self.data[:, :, 0] # L transfers directly output[:, :, 1] = np.multiply(np.cos(self.data[:, :, 2] * np.pi / 180), self.data[:, :, 1]) output[:, :, 2] = np.multiply(np.sin(self.data[:, :, 2] * np.pi / 180), self.data[:, :, 1]) return output def __str__(self): return self.name # ============================================================= # class: edge_detection # detect edges in an image # ============================================================= class edge_detection: def __init__(self, data, name="edge detection"): self.data = np.float32(data) self.name = name def sobel(self, kernel_size=3, output_type="all", threshold=0., clip_range=[0, 65535]): Sx, Sy = create_filter().sobel(kernel_size) # Gradient in x direction: Gx # Gradient in y direction: Gy if np.ndim(self.data) > 2: Gx = np.empty(np.shape(self.data), dtype=np.float32) Gy = np.empty(np.shape(self.data), dtype=np.float32) for dimension_idx in range(0, np.shape(self.data)[2]): Gx[:, :, dimension_idx] = signal.convolve2d(self.data[:, :, dimension_idx], Sx, mode="same", boundary="symm") Gy[:, :, dimension_idx] = signal.convolve2d(self.data[:, :, dimension_idx], Sy, mode="same", boundary="symm") elif np.ndim(self.data) == 2: Gx = signal.convolve2d(self.data, Sx, mode="same", boundary="symm") Gy = signal.convolve2d(self.data, Sy, mode="same", boundary="symm") else: print("Warning! Data dimension must be 2 or 3.") # Gradient magnitude G = np.power(np.power(Gx, 2) + np.power(Gy, 2), .5) if (output_type == "gradient_magnitude"): return G # Gradient angle theta = np.arctan(np.divide(Gy, Gx)) * 180. / np.pi if (output_type == "gradient_magnitude_and_angle"): return G, theta # Change the threshold according to the clip_range's maximum value threshold = threshold * clip_range[1] # calculating if the edge is a strong edge is_edge = np.zeros(np.shape(self.data)).astype(int) mask = G > threshold is_edge[mask] = 1 if (output_type == "is_edge"): return is_edge # Edge direction label temp = np.asarray(theta) direction_label = np.zeros(np.shape(self.data), dtype=np.float32) if np.ndim(self.data > 2): for i in range(0, np.shape(self.data)[2]): direction_label[:, :, i] = helpers().sobel_prewitt_direction_label(G[:, :, i], theta[:, :, i], threshold) else: direction_label = helpers().sobel_prewitt_direction_label(G, theta, threshold) if (output_type == "all"): return G, Gx, Gy, theta, is_edge, direction_label def __str__(self): return self.name