Commit a8ada82f authored by chenych's avatar chenych
Browse files

First commit

parent 537691da
#!/bin/bash
echo "Export params ..."
export HIP_VISIBLE_DEVICES=0,1,2,3 # 自行修改为训练的卡号和数量
export HSA_FORCE_FINE_GRAIN_PCIE=1
export USE_MIOPEN_BATCHNORM=1
echo "Training start ..."
python main_train_psnr.py --opt options/masked_denoising/input_mask_80_90.json
\ No newline at end of file
#!/bin/bash
echo "Export params ..."
export HIP_VISIBLE_DEVICES=0,1,2,3 # 自行修改为训练的卡号和数量
export HSA_FORCE_FINE_GRAIN_PCIE=1
export USE_MIOPEN_BATCHNORM=1
echo "Training start ..."
# 分布式
python -m torch.distributed.launch --nproc_per_node=4 --master_port=1234 main_train_psnr.py --opt options/masked_denoising/input_mask_80_90.json --dist True
\ No newline at end of file
# -*- coding: utf-8 -*-
"""
Created on Mon Apr 24 15:43:29 2017
@author: zhaoy
"""
import cv2
import numpy as np
from skimage import transform as trans
# reference facial points, a list of coordinates (x,y)
REFERENCE_FACIAL_POINTS = [
[30.29459953, 51.69630051],
[65.53179932, 51.50139999],
[48.02519989, 71.73660278],
[33.54930115, 92.3655014],
[62.72990036, 92.20410156]
]
DEFAULT_CROP_SIZE = (96, 112)
def _umeyama(src, dst, estimate_scale=True, scale=1.0):
"""Estimate N-D similarity transformation with or without scaling.
Parameters
----------
src : (M, N) array
Source coordinates.
dst : (M, N) array
Destination coordinates.
estimate_scale : bool
Whether to estimate scaling factor.
Returns
-------
T : (N + 1, N + 1)
The homogeneous similarity transformation matrix. The matrix contains
NaN values only if the problem is not well-conditioned.
References
----------
.. [1] "Least-squares estimation of transformation parameters between two
point patterns", Shinji Umeyama, PAMI 1991, :DOI:`10.1109/34.88573`
"""
num = src.shape[0]
dim = src.shape[1]
# Compute mean of src and dst.
src_mean = src.mean(axis=0)
dst_mean = dst.mean(axis=0)
# Subtract mean from src and dst.
src_demean = src - src_mean
dst_demean = dst - dst_mean
# Eq. (38).
A = dst_demean.T @ src_demean / num
# Eq. (39).
d = np.ones((dim,), dtype=np.double)
if np.linalg.det(A) < 0:
d[dim - 1] = -1
T = np.eye(dim + 1, dtype=np.double)
U, S, V = np.linalg.svd(A)
# Eq. (40) and (43).
rank = np.linalg.matrix_rank(A)
if rank == 0:
return np.nan * T
elif rank == dim - 1:
if np.linalg.det(U) * np.linalg.det(V) > 0:
T[:dim, :dim] = U @ V
else:
s = d[dim - 1]
d[dim - 1] = -1
T[:dim, :dim] = U @ np.diag(d) @ V
d[dim - 1] = s
else:
T[:dim, :dim] = U @ np.diag(d) @ V
if estimate_scale:
# Eq. (41) and (42).
scale = 1.0 / src_demean.var(axis=0).sum() * (S @ d)
else:
scale = scale
T[:dim, dim] = dst_mean - scale * (T[:dim, :dim] @ src_mean.T)
T[:dim, :dim] *= scale
return T, scale
class FaceWarpException(Exception):
def __str__(self):
return 'In File {}:{}'.format(
__file__, super.__str__(self))
def get_reference_facial_points(output_size=None,
inner_padding_factor=0.0,
outer_padding=(0, 0),
default_square=False):
tmp_5pts = np.array(REFERENCE_FACIAL_POINTS)
tmp_crop_size = np.array(DEFAULT_CROP_SIZE)
# 0) make the inner region a square
if default_square:
size_diff = max(tmp_crop_size) - tmp_crop_size
tmp_5pts += size_diff / 2
tmp_crop_size += size_diff
if (output_size and
output_size[0] == tmp_crop_size[0] and
output_size[1] == tmp_crop_size[1]):
print('output_size == DEFAULT_CROP_SIZE {}: return default reference points'.format(tmp_crop_size))
return tmp_5pts
if (inner_padding_factor == 0 and
outer_padding == (0, 0)):
if output_size is None:
print('No paddings to do: return default reference points')
return tmp_5pts
else:
raise FaceWarpException(
'No paddings to do, output_size must be None or {}'.format(tmp_crop_size))
# check output size
if not (0 <= inner_padding_factor <= 1.0):
raise FaceWarpException('Not (0 <= inner_padding_factor <= 1.0)')
if ((inner_padding_factor > 0 or outer_padding[0] > 0 or outer_padding[1] > 0)
and output_size is None):
output_size = tmp_crop_size * \
(1 + inner_padding_factor * 2).astype(np.int32)
output_size += np.array(outer_padding)
print(' deduced from paddings, output_size = ', output_size)
if not (outer_padding[0] < output_size[0]
and outer_padding[1] < output_size[1]):
raise FaceWarpException('Not (outer_padding[0] < output_size[0]'
'and outer_padding[1] < output_size[1])')
# 1) pad the inner region according inner_padding_factor
# print('---> STEP1: pad the inner region according inner_padding_factor')
if inner_padding_factor > 0:
size_diff = tmp_crop_size * inner_padding_factor * 2
tmp_5pts += size_diff / 2
tmp_crop_size += np.round(size_diff).astype(np.int32)
# print(' crop_size = ', tmp_crop_size)
# print(' reference_5pts = ', tmp_5pts)
# 2) resize the padded inner region
# print('---> STEP2: resize the padded inner region')
size_bf_outer_pad = np.array(output_size) - np.array(outer_padding) * 2
# print(' crop_size = ', tmp_crop_size)
# print(' size_bf_outer_pad = ', size_bf_outer_pad)
if size_bf_outer_pad[0] * tmp_crop_size[1] != size_bf_outer_pad[1] * tmp_crop_size[0]:
raise FaceWarpException('Must have (output_size - outer_padding)'
'= some_scale * (crop_size * (1.0 + inner_padding_factor)')
scale_factor = size_bf_outer_pad[0].astype(np.float32) / tmp_crop_size[0]
# print(' resize scale_factor = ', scale_factor)
tmp_5pts = tmp_5pts * scale_factor
# size_diff = tmp_crop_size * (scale_factor - min(scale_factor))
# tmp_5pts = tmp_5pts + size_diff / 2
tmp_crop_size = size_bf_outer_pad
# print(' crop_size = ', tmp_crop_size)
# print(' reference_5pts = ', tmp_5pts)
# 3) add outer_padding to make output_size
reference_5point = tmp_5pts + np.array(outer_padding)
tmp_crop_size = output_size
# print('---> STEP3: add outer_padding to make output_size')
# print(' crop_size = ', tmp_crop_size)
# print(' reference_5pts = ', tmp_5pts)
#
# print('===> end get_reference_facial_points\n')
return reference_5point
def get_affine_transform_matrix(src_pts, dst_pts):
tfm = np.float32([[1, 0, 0], [0, 1, 0]])
n_pts = src_pts.shape[0]
ones = np.ones((n_pts, 1), src_pts.dtype)
src_pts_ = np.hstack([src_pts, ones])
dst_pts_ = np.hstack([dst_pts, ones])
A, res, rank, s = np.linalg.lstsq(src_pts_, dst_pts_)
if rank == 3:
tfm = np.float32([
[A[0, 0], A[1, 0], A[2, 0]],
[A[0, 1], A[1, 1], A[2, 1]]
])
elif rank == 2:
tfm = np.float32([
[A[0, 0], A[1, 0], 0],
[A[0, 1], A[1, 1], 0]
])
return tfm
def warp_and_crop_face(src_img,
facial_pts,
reference_pts=None,
crop_size=(96, 112),
align_type='smilarity'): #smilarity cv2_affine affine
if reference_pts is None:
if crop_size[0] == 96 and crop_size[1] == 112:
reference_pts = REFERENCE_FACIAL_POINTS
else:
default_square = False
inner_padding_factor = 0
outer_padding = (0, 0)
output_size = crop_size
reference_pts = get_reference_facial_points(output_size,
inner_padding_factor,
outer_padding,
default_square)
ref_pts = np.float32(reference_pts)
ref_pts_shp = ref_pts.shape
if max(ref_pts_shp) < 3 or min(ref_pts_shp) != 2:
raise FaceWarpException(
'reference_pts.shape must be (K,2) or (2,K) and K>2')
if ref_pts_shp[0] == 2:
ref_pts = ref_pts.T
src_pts = np.float32(facial_pts)
src_pts_shp = src_pts.shape
if max(src_pts_shp) < 3 or min(src_pts_shp) != 2:
raise FaceWarpException(
'facial_pts.shape must be (K,2) or (2,K) and K>2')
if src_pts_shp[0] == 2:
src_pts = src_pts.T
if src_pts.shape != ref_pts.shape:
raise FaceWarpException(
'facial_pts and reference_pts must have the same shape')
if align_type is 'cv2_affine':
tfm = cv2.getAffineTransform(src_pts[0:3], ref_pts[0:3])
tfm_inv = cv2.getAffineTransform(ref_pts[0:3], src_pts[0:3])
elif align_type is 'affine':
tfm = get_affine_transform_matrix(src_pts, ref_pts)
tfm_inv = get_affine_transform_matrix(ref_pts, src_pts)
else:
params, scale = _umeyama(src_pts, ref_pts)
tfm = params[:2, :]
params, _ = _umeyama(ref_pts, src_pts, False, scale=1.0/scale)
tfm_inv = params[:2, :]
face_img = cv2.warpAffine(src_img, tfm, (crop_size[0], crop_size[1]), flags=3)
return face_img, tfm_inv
# -*- coding: utf-8 -*-
import numpy as np
import cv2
import torch
from utils import utils_image as util
import random
from scipy import ndimage
import scipy
import scipy.stats as ss
from scipy.interpolate import interp2d
from scipy.linalg import orth
"""
# --------------------------------------------
# Super-Resolution
# --------------------------------------------
#
# Kai Zhang (cskaizhang@gmail.com)
# https://github.com/cszn
# From 2019/03--2021/08
# --------------------------------------------
"""
def modcrop_np(img, sf):
'''
Args:
img: numpy image, WxH or WxHxC
sf: scale factor
Return:
cropped image
'''
w, h = img.shape[:2]
im = np.copy(img)
return im[:w - w % sf, :h - h % sf, ...]
"""
# --------------------------------------------
# anisotropic Gaussian kernels
# --------------------------------------------
"""
def analytic_kernel(k):
"""Calculate the X4 kernel from the X2 kernel (for proof see appendix in paper)"""
k_size = k.shape[0]
# Calculate the big kernels size
big_k = np.zeros((3 * k_size - 2, 3 * k_size - 2))
# Loop over the small kernel to fill the big one
for r in range(k_size):
for c in range(k_size):
big_k[2 * r:2 * r + k_size, 2 * c:2 * c + k_size] += k[r, c] * k
# Crop the edges of the big kernel to ignore very small values and increase run time of SR
crop = k_size // 2
cropped_big_k = big_k[crop:-crop, crop:-crop]
# Normalize to 1
return cropped_big_k / cropped_big_k.sum()
def anisotropic_Gaussian(ksize=15, theta=np.pi, l1=6, l2=6):
""" generate an anisotropic Gaussian kernel
Args:
ksize : e.g., 15, kernel size
theta : [0, pi], rotation angle range
l1 : [0.1,50], scaling of eigenvalues
l2 : [0.1,l1], scaling of eigenvalues
If l1 = l2, will get an isotropic Gaussian kernel.
Returns:
k : kernel
"""
v = np.dot(np.array([[np.cos(theta), -np.sin(theta)], [np.sin(theta), np.cos(theta)]]), np.array([1., 0.]))
V = np.array([[v[0], v[1]], [v[1], -v[0]]])
D = np.array([[l1, 0], [0, l2]])
Sigma = np.dot(np.dot(V, D), np.linalg.inv(V))
k = gm_blur_kernel(mean=[0, 0], cov=Sigma, size=ksize)
return k
def gm_blur_kernel(mean, cov, size=15):
center = size / 2.0 + 0.5
k = np.zeros([size, size])
for y in range(size):
for x in range(size):
cy = y - center + 1
cx = x - center + 1
k[y, x] = ss.multivariate_normal.pdf([cx, cy], mean=mean, cov=cov)
k = k / np.sum(k)
return k
def shift_pixel(x, sf, upper_left=True):
"""shift pixel for super-resolution with different scale factors
Args:
x: WxHxC or WxH
sf: scale factor
upper_left: shift direction
"""
h, w = x.shape[:2]
shift = (sf-1)*0.5
xv, yv = np.arange(0, w, 1.0), np.arange(0, h, 1.0)
if upper_left:
x1 = xv + shift
y1 = yv + shift
else:
x1 = xv - shift
y1 = yv - shift
x1 = np.clip(x1, 0, w-1)
y1 = np.clip(y1, 0, h-1)
if x.ndim == 2:
x = interp2d(xv, yv, x)(x1, y1)
if x.ndim == 3:
for i in range(x.shape[-1]):
x[:, :, i] = interp2d(xv, yv, x[:, :, i])(x1, y1)
return x
def blur(x, k):
'''
x: image, NxcxHxW
k: kernel, Nx1xhxw
'''
n, c = x.shape[:2]
p1, p2 = (k.shape[-2]-1)//2, (k.shape[-1]-1)//2
x = torch.nn.functional.pad(x, pad=(p1, p2, p1, p2), mode='replicate')
k = k.repeat(1,c,1,1)
k = k.view(-1, 1, k.shape[2], k.shape[3])
x = x.view(1, -1, x.shape[2], x.shape[3])
x = torch.nn.functional.conv2d(x, k, bias=None, stride=1, padding=0, groups=n*c)
x = x.view(n, c, x.shape[2], x.shape[3])
return x
def gen_kernel(k_size=np.array([15, 15]), scale_factor=np.array([4, 4]), min_var=0.6, max_var=10., noise_level=0):
""""
# modified version of https://github.com/assafshocher/BlindSR_dataset_generator
# Kai Zhang
# min_var = 0.175 * sf # variance of the gaussian kernel will be sampled between min_var and max_var
# max_var = 2.5 * sf
"""
# Set random eigen-vals (lambdas) and angle (theta) for COV matrix
lambda_1 = min_var + np.random.rand() * (max_var - min_var)
lambda_2 = min_var + np.random.rand() * (max_var - min_var)
theta = np.random.rand() * np.pi # random theta
noise = -noise_level + np.random.rand(*k_size) * noise_level * 2
# Set COV matrix using Lambdas and Theta
LAMBDA = np.diag([lambda_1, lambda_2])
Q = np.array([[np.cos(theta), -np.sin(theta)],
[np.sin(theta), np.cos(theta)]])
SIGMA = Q @ LAMBDA @ Q.T
INV_SIGMA = np.linalg.inv(SIGMA)[None, None, :, :]
# Set expectation position (shifting kernel for aligned image)
MU = k_size // 2 - 0.5*(scale_factor - 1) # - 0.5 * (scale_factor - k_size % 2)
MU = MU[None, None, :, None]
# Create meshgrid for Gaussian
[X,Y] = np.meshgrid(range(k_size[0]), range(k_size[1]))
Z = np.stack([X, Y], 2)[:, :, :, None]
# Calcualte Gaussian for every pixel of the kernel
ZZ = Z-MU
ZZ_t = ZZ.transpose(0,1,3,2)
raw_kernel = np.exp(-0.5 * np.squeeze(ZZ_t @ INV_SIGMA @ ZZ)) * (1 + noise)
# shift the kernel so it will be centered
#raw_kernel_centered = kernel_shift(raw_kernel, scale_factor)
# Normalize the kernel and return
#kernel = raw_kernel_centered / np.sum(raw_kernel_centered)
kernel = raw_kernel / np.sum(raw_kernel)
return kernel
def fspecial_gaussian(hsize, sigma):
hsize = [hsize, hsize]
siz = [(hsize[0]-1.0)/2.0, (hsize[1]-1.0)/2.0]
std = sigma
[x, y] = np.meshgrid(np.arange(-siz[1], siz[1]+1), np.arange(-siz[0], siz[0]+1))
arg = -(x*x + y*y)/(2*std*std)
h = np.exp(arg)
h[h < scipy.finfo(float).eps * h.max()] = 0
sumh = h.sum()
if sumh != 0:
h = h/sumh
return h
def fspecial_laplacian(alpha):
alpha = max([0, min([alpha,1])])
h1 = alpha/(alpha+1)
h2 = (1-alpha)/(alpha+1)
h = [[h1, h2, h1], [h2, -4/(alpha+1), h2], [h1, h2, h1]]
h = np.array(h)
return h
def fspecial(filter_type, *args, **kwargs):
'''
python code from:
https://github.com/ronaldosena/imagens-medicas-2/blob/40171a6c259edec7827a6693a93955de2bd39e76/Aulas/aula_2_-_uniform_filter/matlab_fspecial.py
'''
if filter_type == 'gaussian':
return fspecial_gaussian(*args, **kwargs)
if filter_type == 'laplacian':
return fspecial_laplacian(*args, **kwargs)
"""
# --------------------------------------------
# degradation models
# --------------------------------------------
"""
def bicubic_degradation(x, sf=3):
'''
Args:
x: HxWxC image, [0, 1]
sf: down-scale factor
Return:
bicubicly downsampled LR image
'''
x = util.imresize_np(x, scale=1/sf)
return x
def srmd_degradation(x, k, sf=3):
''' blur + bicubic downsampling
Args:
x: HxWxC image, [0, 1]
k: hxw, double
sf: down-scale factor
Return:
downsampled LR image
Reference:
@inproceedings{zhang2018learning,
title={Learning a single convolutional super-resolution network for multiple degradations},
author={Zhang, Kai and Zuo, Wangmeng and Zhang, Lei},
booktitle={IEEE Conference on Computer Vision and Pattern Recognition},
pages={3262--3271},
year={2018}
}
'''
x = ndimage.filters.convolve(x, np.expand_dims(k, axis=2), mode='wrap') # 'nearest' | 'mirror'
x = bicubic_degradation(x, sf=sf)
return x
def dpsr_degradation(x, k, sf=3):
''' bicubic downsampling + blur
Args:
x: HxWxC image, [0, 1]
k: hxw, double
sf: down-scale factor
Return:
downsampled LR image
Reference:
@inproceedings{zhang2019deep,
title={Deep Plug-and-Play Super-Resolution for Arbitrary Blur Kernels},
author={Zhang, Kai and Zuo, Wangmeng and Zhang, Lei},
booktitle={IEEE Conference on Computer Vision and Pattern Recognition},
pages={1671--1681},
year={2019}
}
'''
x = bicubic_degradation(x, sf=sf)
x = ndimage.filters.convolve(x, np.expand_dims(k, axis=2), mode='wrap')
return x
def classical_degradation(x, k, sf=3):
''' blur + downsampling
Args:
x: HxWxC image, [0, 1]/[0, 255]
k: hxw, double
sf: down-scale factor
Return:
downsampled LR image
'''
x = ndimage.filters.convolve(x, np.expand_dims(k, axis=2), mode='wrap')
#x = filters.correlate(x, np.expand_dims(np.flip(k), axis=2))
st = 0
return x[st::sf, st::sf, ...]
def add_sharpening(img, weight=0.5, radius=50, threshold=10):
"""USM sharpening. borrowed from real-ESRGAN
Input image: I; Blurry image: B.
1. K = I + weight * (I - B)
2. Mask = 1 if abs(I - B) > threshold, else: 0
3. Blur mask:
4. Out = Mask * K + (1 - Mask) * I
Args:
img (Numpy array): Input image, HWC, BGR; float32, [0, 1].
weight (float): Sharp weight. Default: 1.
radius (float): Kernel size of Gaussian blur. Default: 50.
threshold (int):
"""
if radius % 2 == 0:
radius += 1
blur = cv2.GaussianBlur(img, (radius, radius), 0)
residual = img - blur
mask = np.abs(residual) * 255 > threshold
mask = mask.astype('float32')
soft_mask = cv2.GaussianBlur(mask, (radius, radius), 0)
K = img + weight * residual
K = np.clip(K, 0, 1)
return soft_mask * K + (1 - soft_mask) * img
def add_blur(img, sf=4):
wd2 = 4.0 + sf
wd = 2.0 + 0.2*sf
if random.random() < 0.5:
l1 = wd2*random.random()
l2 = wd2*random.random()
k = anisotropic_Gaussian(ksize=2*random.randint(2,11)+3, theta=random.random()*np.pi, l1=l1, l2=l2)
else:
k = fspecial('gaussian', 2*random.randint(2,11)+3, wd*random.random())
img = ndimage.filters.convolve(img, np.expand_dims(k, axis=2), mode='mirror')
return img
def add_resize(img, sf=4):
rnum = np.random.rand()
if rnum > 0.8: # up
sf1 = random.uniform(1, 2)
elif rnum < 0.7: # down
sf1 = random.uniform(0.5/sf, 1)
else:
sf1 = 1.0
img = cv2.resize(img, (int(sf1*img.shape[1]), int(sf1*img.shape[0])), interpolation=random.choice([1, 2, 3]))
img = np.clip(img, 0.0, 1.0)
return img
def add_Gaussian_noise(img, noise_level1=2, noise_level2=25):
noise_level = random.randint(noise_level1, noise_level2)
rnum = np.random.rand()
if rnum > 0.6: # add color Gaussian noise
img += np.random.normal(0, noise_level/255.0, img.shape).astype(np.float32)
elif rnum < 0.4: # add grayscale Gaussian noise
img += np.random.normal(0, noise_level/255.0, (*img.shape[:2], 1)).astype(np.float32)
else: # add noise
L = noise_level2/255.
D = np.diag(np.random.rand(3))
U = orth(np.random.rand(3,3))
conv = np.dot(np.dot(np.transpose(U), D), U)
img += np.random.multivariate_normal([0,0,0], np.abs(L**2*conv), img.shape[:2]).astype(np.float32)
img = np.clip(img, 0.0, 1.0)
return img
def add_speckle_noise(img, noise_level1=2, noise_level2=25):
noise_level = random.randint(noise_level1, noise_level2)
img = np.clip(img, 0.0, 1.0)
rnum = random.random()
if rnum > 0.6:
img += img*np.random.normal(0, noise_level/255.0, img.shape).astype(np.float32)
elif rnum < 0.4:
img += img*np.random.normal(0, noise_level/255.0, (*img.shape[:2], 1)).astype(np.float32)
else:
L = noise_level2/255.
D = np.diag(np.random.rand(3))
U = orth(np.random.rand(3,3))
conv = np.dot(np.dot(np.transpose(U), D), U)
img += img*np.random.multivariate_normal([0,0,0], np.abs(L**2*conv), img.shape[:2]).astype(np.float32)
img = np.clip(img, 0.0, 1.0)
return img
def add_Poisson_noise(img):
img = np.clip((img * 255.0).round(), 0, 255) / 255.
vals = 10**(2*random.random()+2.0) # [2, 4]
if random.random() < 0.5:
img = np.random.poisson(img * vals).astype(np.float32) / vals
else:
img_gray = np.dot(img[...,:3], [0.299, 0.587, 0.114])
img_gray = np.clip((img_gray * 255.0).round(), 0, 255) / 255.
noise_gray = np.random.poisson(img_gray * vals).astype(np.float32) / vals - img_gray
img += noise_gray[:, :, np.newaxis]
img = np.clip(img, 0.0, 1.0)
return img
def add_JPEG_noise(img):
quality_factor = random.randint(30, 95)
img = cv2.cvtColor(util.single2uint(img), cv2.COLOR_RGB2BGR)
result, encimg = cv2.imencode('.jpg', img, [int(cv2.IMWRITE_JPEG_QUALITY), quality_factor])
img = cv2.imdecode(encimg, 1)
img = cv2.cvtColor(util.uint2single(img), cv2.COLOR_BGR2RGB)
return img
def random_crop(lq, hq, sf=4, lq_patchsize=64):
h, w = lq.shape[:2]
rnd_h = random.randint(0, h-lq_patchsize)
rnd_w = random.randint(0, w-lq_patchsize)
lq = lq[rnd_h:rnd_h + lq_patchsize, rnd_w:rnd_w + lq_patchsize, :]
rnd_h_H, rnd_w_H = int(rnd_h * sf), int(rnd_w * sf)
hq = hq[rnd_h_H:rnd_h_H + lq_patchsize*sf, rnd_w_H:rnd_w_H + lq_patchsize*sf, :]
return lq, hq
def degradation_bsrgan(img, sf=4, lq_patchsize=72, isp_model=None):
"""
This is the degradation model of BSRGAN from the paper
"Designing a Practical Degradation Model for Deep Blind Image Super-Resolution"
----------
img: HXWXC, [0, 1], its size should be large than (lq_patchsizexsf)x(lq_patchsizexsf)
sf: scale factor
isp_model: camera ISP model
Returns
-------
img: low-quality patch, size: lq_patchsizeXlq_patchsizeXC, range: [0, 1]
hq: corresponding high-quality patch, size: (lq_patchsizexsf)X(lq_patchsizexsf)XC, range: [0, 1]
"""
isp_prob, jpeg_prob, scale2_prob = 0.25, 0.9, 0.25
sf_ori = sf
h1, w1 = img.shape[:2]
img = img.copy()[:w1 - w1 % sf, :h1 - h1 % sf, ...] # mod crop
h, w = img.shape[:2]
if h < lq_patchsize*sf or w < lq_patchsize*sf:
raise ValueError(f'img size ({h1}X{w1}) is too small!')
hq = img.copy()
if sf == 4 and random.random() < scale2_prob: # downsample1
if np.random.rand() < 0.5:
img = cv2.resize(img, (int(1/2*img.shape[1]), int(1/2*img.shape[0])), interpolation=random.choice([1,2,3]))
else:
img = util.imresize_np(img, 1/2, True)
img = np.clip(img, 0.0, 1.0)
sf = 2
shuffle_order = random.sample(range(7), 7)
idx1, idx2 = shuffle_order.index(2), shuffle_order.index(3)
if idx1 > idx2: # keep downsample3 last
shuffle_order[idx1], shuffle_order[idx2] = shuffle_order[idx2], shuffle_order[idx1]
for i in shuffle_order:
if i == 0:
img = add_blur(img, sf=sf)
elif i == 1:
img = add_blur(img, sf=sf)
elif i == 2:
a, b = img.shape[1], img.shape[0]
# downsample2
if random.random() < 0.75:
sf1 = random.uniform(1,2*sf)
img = cv2.resize(img, (int(1/sf1*img.shape[1]), int(1/sf1*img.shape[0])), interpolation=random.choice([1,2,3]))
else:
k = fspecial('gaussian', 25, random.uniform(0.1, 0.6*sf))
k_shifted = shift_pixel(k, sf)
k_shifted = k_shifted/k_shifted.sum() # blur with shifted kernel
img = ndimage.filters.convolve(img, np.expand_dims(k_shifted, axis=2), mode='mirror')
img = img[0::sf, 0::sf, ...] # nearest downsampling
img = np.clip(img, 0.0, 1.0)
elif i == 3:
# downsample3
img = cv2.resize(img, (int(1/sf*a), int(1/sf*b)), interpolation=random.choice([1,2,3]))
img = np.clip(img, 0.0, 1.0)
elif i == 4:
# add Gaussian noise
img = add_Gaussian_noise(img, noise_level1=2, noise_level2=25)
elif i == 5:
# add JPEG noise
if random.random() < jpeg_prob:
img = add_JPEG_noise(img)
elif i == 6:
# add processed camera sensor noise
if random.random() < isp_prob and isp_model is not None:
with torch.no_grad():
img, hq = isp_model.forward(img.copy(), hq)
# add final JPEG compression noise
img = add_JPEG_noise(img)
# random crop
img, hq = random_crop(img, hq, sf_ori, lq_patchsize)
return img, hq
# def degradation_bsrgan_plus(img, sf=4, shuffle_prob=0.5, use_sharp=False, lq_patchsize=64, isp_model=None):
# """
# This is an extended degradation model by combining
# the degradation models of BSRGAN and Real-ESRGAN
# ----------
# img: HXWXC, [0, 1], its size should be large than (lq_patchsizexsf)x(lq_patchsizexsf)
# sf: scale factor
# use_shuffle: the degradation shuffle
# use_sharp: sharpening the img
# Returns
# -------
# img: low-quality patch, size: lq_patchsizeXlq_patchsizeXC, range: [0, 1]
# hq: corresponding high-quality patch, size: (lq_patchsizexsf)X(lq_patchsizexsf)XC, range: [0, 1]
# """
# h1, w1 = img.shape[:2]
# img = img.copy()[:w1 - w1 % sf, :h1 - h1 % sf, ...] # mod crop
# h, w = img.shape[:2]
# if h < lq_patchsize*sf or w < lq_patchsize*sf:
# raise ValueError(f'img size ({h1}X{w1}) is too small!')
# if use_sharp:
# img = add_sharpening(img)
# hq = img.copy()
# if random.random() < shuffle_prob:
# shuffle_order = random.sample(range(13), 13)
# else:
# shuffle_order = list(range(13))
# # local shuffle for noise, JPEG is always the last one
# shuffle_order[2:6] = random.sample(shuffle_order[2:6], len(range(2, 6)))
# shuffle_order[9:13] = random.sample(shuffle_order[9:13], len(range(9, 13)))
# poisson_prob, speckle_prob, isp_prob = 0.1, 0.1, 0.1
# for i in shuffle_order:
# if i == 0:
# img = add_blur(img, sf=sf)
# elif i == 1:
# img = add_resize(img, sf=sf)
# elif i == 2:
# img = add_Gaussian_noise(img, noise_level1=2, noise_level2=25)
# elif i == 3:
# if random.random() < poisson_prob:
# img = add_Poisson_noise(img)
# elif i == 4:
# if random.random() < speckle_prob:
# img = add_speckle_noise(img)
# elif i == 5:
# if random.random() < isp_prob and isp_model is not None:
# with torch.no_grad():
# img, hq = isp_model.forward(img.copy(), hq)
# elif i == 6:
# img = add_JPEG_noise(img)
# elif i == 7:
# img = add_blur(img, sf=sf)
# elif i == 8:
# img = add_resize(img, sf=sf)
# elif i == 9:
# img = add_Gaussian_noise(img, noise_level1=2, noise_level2=25)
# elif i == 10:
# if random.random() < poisson_prob:
# img = add_Poisson_noise(img)
# elif i == 11:
# if random.random() < speckle_prob:
# img = add_speckle_noise(img)
# elif i == 12:
# if random.random() < isp_prob and isp_model is not None:
# with torch.no_grad():
# img, hq = isp_model.forward(img.copy(), hq)
# else:
# print('check the shuffle!')
# # resize to desired size
# img = cv2.resize(img, (int(1/sf*hq.shape[1]), int(1/sf*hq.shape[0])), interpolation=random.choice([1, 2, 3]))
# # add final JPEG compression noise
# img = add_JPEG_noise(img)
# # random crop
# img, hq = random_crop(img, hq, sf, lq_patchsize)
# return img, hq
def add_Gaussian_noise_color(img, noise_level1=2, noise_level2=25, color_ratio=1):
noise_level = random.randint(noise_level1, noise_level2)
img += np.random.normal(0, noise_level/255.0, img.shape).astype(np.float32)
img = np.clip(img, 0.0, 1.0)
return img
if __name__ == '__main__':
img = util.imread_uint('utils/test.png', 3)
img = util.uint2single(img)
sf = 4
for i in range(20):
img_lq, img_hq = degradation_bsrgan(img, sf=sf, lq_patchsize=72)
print(i)
lq_nearest = cv2.resize(util.single2uint(img_lq), (int(sf*img_lq.shape[1]), int(sf*img_lq.shape[0])), interpolation=0)
img_concat = np.concatenate([lq_nearest, util.single2uint(img_hq)], axis=1)
util.imsave(img_concat, str(i)+'.png')
# for i in range(10):
# img_lq, img_hq = degradation_bsrgan_plus(img, sf=sf, shuffle_prob=0.1, use_sharp=True, lq_patchsize=64)
# print(i)
# lq_nearest = cv2.resize(util.single2uint(img_lq), (int(sf*img_lq.shape[1]), int(sf*img_lq.shape[0])), interpolation=0)
# img_concat = np.concatenate([lq_nearest, util.single2uint(img_hq)], axis=1)
# util.imsave(img_concat, str(i)+'.png')
# run utils/utils_blindsr.py
import torch
import torch.nn as nn
"""
# --------------------------------------------
# Batch Normalization
# --------------------------------------------
# Kai Zhang (cskaizhang@gmail.com)
# https://github.com/cszn
# 01/Jan/2019
# --------------------------------------------
"""
# --------------------------------------------
# remove/delete specified layer
# --------------------------------------------
def deleteLayer(model, layer_type=nn.BatchNorm2d):
''' Kai Zhang, 11/Jan/2019.
'''
for k, m in list(model.named_children()):
if isinstance(m, layer_type):
del model._modules[k]
deleteLayer(m, layer_type)
# --------------------------------------------
# merge bn, "conv+bn" --> "conv"
# --------------------------------------------
def merge_bn(model):
''' Kai Zhang, 11/Jan/2019.
merge all 'Conv+BN' (or 'TConv+BN') into 'Conv' (or 'TConv')
based on https://github.com/pytorch/pytorch/pull/901
'''
prev_m = None
for k, m in list(model.named_children()):
if (isinstance(m, nn.BatchNorm2d) or isinstance(m, nn.BatchNorm1d)) and (isinstance(prev_m, nn.Conv2d) or isinstance(prev_m, nn.Linear) or isinstance(prev_m, nn.ConvTranspose2d)):
w = prev_m.weight.data
if prev_m.bias is None:
zeros = torch.Tensor(prev_m.out_channels).zero_().type(w.type())
prev_m.bias = nn.Parameter(zeros)
b = prev_m.bias.data
invstd = m.running_var.clone().add_(m.eps).pow_(-0.5)
if isinstance(prev_m, nn.ConvTranspose2d):
w.mul_(invstd.view(1, w.size(1), 1, 1).expand_as(w))
else:
w.mul_(invstd.view(w.size(0), 1, 1, 1).expand_as(w))
b.add_(-m.running_mean).mul_(invstd)
if m.affine:
if isinstance(prev_m, nn.ConvTranspose2d):
w.mul_(m.weight.data.view(1, w.size(1), 1, 1).expand_as(w))
else:
w.mul_(m.weight.data.view(w.size(0), 1, 1, 1).expand_as(w))
b.mul_(m.weight.data).add_(m.bias.data)
del model._modules[k]
prev_m = m
merge_bn(m)
# --------------------------------------------
# add bn, "conv" --> "conv+bn"
# --------------------------------------------
def add_bn(model):
''' Kai Zhang, 11/Jan/2019.
'''
for k, m in list(model.named_children()):
if (isinstance(m, nn.Conv2d) or isinstance(m, nn.Linear) or isinstance(m, nn.ConvTranspose2d)):
b = nn.BatchNorm2d(m.out_channels, momentum=0.1, affine=True)
b.weight.data.fill_(1)
new_m = nn.Sequential(model._modules[k], b)
model._modules[k] = new_m
add_bn(m)
# --------------------------------------------
# tidy model after removing bn
# --------------------------------------------
def tidy_sequential(model):
''' Kai Zhang, 11/Jan/2019.
'''
for k, m in list(model.named_children()):
if isinstance(m, nn.Sequential):
if m.__len__() == 1:
model._modules[k] = m.__getitem__(0)
tidy_sequential(m)
# -*- coding: utf-8 -*-
import numpy as np
import scipy
from scipy import fftpack
import torch
from math import cos, sin
from numpy import zeros, ones, prod, array, pi, log, min, mod, arange, sum, mgrid, exp, pad, round
from numpy.random import randn, rand
from scipy.signal import convolve2d
import cv2
import random
# import utils_image as util
'''
modified by Kai Zhang (github: https://github.com/cszn)
03/03/2019
'''
def get_uperleft_denominator(img, kernel):
'''
img: HxWxC
kernel: hxw
denominator: HxWx1
upperleft: HxWxC
'''
V = psf2otf(kernel, img.shape[:2])
denominator = np.expand_dims(np.abs(V)**2, axis=2)
upperleft = np.expand_dims(np.conj(V), axis=2) * np.fft.fft2(img, axes=[0, 1])
return upperleft, denominator
def get_uperleft_denominator_pytorch(img, kernel):
'''
img: NxCxHxW
kernel: Nx1xhxw
denominator: Nx1xHxW
upperleft: NxCxHxWx2
'''
V = p2o(kernel, img.shape[-2:]) # Nx1xHxWx2
denominator = V[..., 0]**2+V[..., 1]**2 # Nx1xHxW
upperleft = cmul(cconj(V), rfft(img)) # Nx1xHxWx2 * NxCxHxWx2
return upperleft, denominator
def c2c(x):
return torch.from_numpy(np.stack([np.float32(x.real), np.float32(x.imag)], axis=-1))
def r2c(x):
return torch.stack([x, torch.zeros_like(x)], -1)
def cdiv(x, y):
a, b = x[..., 0], x[..., 1]
c, d = y[..., 0], y[..., 1]
cd2 = c**2 + d**2
return torch.stack([(a*c+b*d)/cd2, (b*c-a*d)/cd2], -1)
def cabs(x):
return torch.pow(x[..., 0]**2+x[..., 1]**2, 0.5)
def cmul(t1, t2):
'''
complex multiplication
t1: NxCxHxWx2
output: NxCxHxWx2
'''
real1, imag1 = t1[..., 0], t1[..., 1]
real2, imag2 = t2[..., 0], t2[..., 1]
return torch.stack([real1 * real2 - imag1 * imag2, real1 * imag2 + imag1 * real2], dim=-1)
def cconj(t, inplace=False):
'''
# complex's conjugation
t: NxCxHxWx2
output: NxCxHxWx2
'''
c = t.clone() if not inplace else t
c[..., 1] *= -1
return c
def rfft(t):
return torch.rfft(t, 2, onesided=False)
def irfft(t):
return torch.irfft(t, 2, onesided=False)
def fft(t):
return torch.fft(t, 2)
def ifft(t):
return torch.ifft(t, 2)
def p2o(psf, shape):
'''
# psf: NxCxhxw
# shape: [H,W]
# otf: NxCxHxWx2
'''
otf = torch.zeros(psf.shape[:-2] + shape).type_as(psf)
otf[...,:psf.shape[2],:psf.shape[3]].copy_(psf)
for axis, axis_size in enumerate(psf.shape[2:]):
otf = torch.roll(otf, -int(axis_size / 2), dims=axis+2)
otf = torch.rfft(otf, 2, onesided=False)
n_ops = torch.sum(torch.tensor(psf.shape).type_as(psf) * torch.log2(torch.tensor(psf.shape).type_as(psf)))
otf[...,1][torch.abs(otf[...,1])<n_ops*2.22e-16] = torch.tensor(0).type_as(psf)
return otf
# otf2psf: not sure where I got this one from. Maybe translated from Octave source code or whatever. It's just math.
def otf2psf(otf, outsize=None):
insize = np.array(otf.shape)
psf = np.fft.ifftn(otf, axes=(0, 1))
for axis, axis_size in enumerate(insize):
psf = np.roll(psf, np.floor(axis_size / 2).astype(int), axis=axis)
if type(outsize) != type(None):
insize = np.array(otf.shape)
outsize = np.array(outsize)
n = max(np.size(outsize), np.size(insize))
# outsize = postpad(outsize(:), n, 1);
# insize = postpad(insize(:) , n, 1);
colvec_out = outsize.flatten().reshape((np.size(outsize), 1))
colvec_in = insize.flatten().reshape((np.size(insize), 1))
outsize = np.pad(colvec_out, ((0, max(0, n - np.size(colvec_out))), (0, 0)), mode="constant")
insize = np.pad(colvec_in, ((0, max(0, n - np.size(colvec_in))), (0, 0)), mode="constant")
pad = (insize - outsize) / 2
if np.any(pad < 0):
print("otf2psf error: OUTSIZE must be smaller than or equal than OTF size")
prepad = np.floor(pad)
postpad = np.ceil(pad)
dims_start = prepad.astype(int)
dims_end = (insize - postpad).astype(int)
for i in range(len(dims_start.shape)):
psf = np.take(psf, range(dims_start[i][0], dims_end[i][0]), axis=i)
n_ops = np.sum(otf.size * np.log2(otf.shape))
psf = np.real_if_close(psf, tol=n_ops)
return psf
# psf2otf copied/modified from https://github.com/aboucaud/pypher/blob/master/pypher/pypher.py
def psf2otf(psf, shape=None):
"""
Convert point-spread function to optical transfer function.
Compute the Fast Fourier Transform (FFT) of the point-spread
function (PSF) array and creates the optical transfer function (OTF)
array that is not influenced by the PSF off-centering.
By default, the OTF array is the same size as the PSF array.
To ensure that the OTF is not altered due to PSF off-centering, PSF2OTF
post-pads the PSF array (down or to the right) with zeros to match
dimensions specified in OUTSIZE, then circularly shifts the values of
the PSF array up (or to the left) until the central pixel reaches (1,1)
position.
Parameters
----------
psf : `numpy.ndarray`
PSF array
shape : int
Output shape of the OTF array
Returns
-------
otf : `numpy.ndarray`
OTF array
Notes
-----
Adapted from MATLAB psf2otf function
"""
if type(shape) == type(None):
shape = psf.shape
shape = np.array(shape)
if np.all(psf == 0):
# return np.zeros_like(psf)
return np.zeros(shape)
if len(psf.shape) == 1:
psf = psf.reshape((1, psf.shape[0]))
inshape = psf.shape
psf = zero_pad(psf, shape, position='corner')
for axis, axis_size in enumerate(inshape):
psf = np.roll(psf, -int(axis_size / 2), axis=axis)
# Compute the OTF
otf = np.fft.fft2(psf, axes=(0, 1))
# Estimate the rough number of operations involved in the FFT
# and discard the PSF imaginary part if within roundoff error
# roundoff error = machine epsilon = sys.float_info.epsilon
# or np.finfo().eps
n_ops = np.sum(psf.size * np.log2(psf.shape))
otf = np.real_if_close(otf, tol=n_ops)
return otf
def zero_pad(image, shape, position='corner'):
"""
Extends image to a certain size with zeros
Parameters
----------
image: real 2d `numpy.ndarray`
Input image
shape: tuple of int
Desired output shape of the image
position : str, optional
The position of the input image in the output one:
* 'corner'
top-left corner (default)
* 'center'
centered
Returns
-------
padded_img: real `numpy.ndarray`
The zero-padded image
"""
shape = np.asarray(shape, dtype=int)
imshape = np.asarray(image.shape, dtype=int)
if np.alltrue(imshape == shape):
return image
if np.any(shape <= 0):
raise ValueError("ZERO_PAD: null or negative shape given")
dshape = shape - imshape
if np.any(dshape < 0):
raise ValueError("ZERO_PAD: target size smaller than source one")
pad_img = np.zeros(shape, dtype=image.dtype)
idx, idy = np.indices(imshape)
if position == 'center':
if np.any(dshape % 2 != 0):
raise ValueError("ZERO_PAD: source and target shapes "
"have different parity.")
offx, offy = dshape // 2
else:
offx, offy = (0, 0)
pad_img[idx + offx, idy + offy] = image
return pad_img
'''
Reducing boundary artifacts
'''
def opt_fft_size(n):
'''
Kai Zhang (github: https://github.com/cszn)
03/03/2019
# opt_fft_size.m
# compute an optimal data length for Fourier transforms
# written by Sunghyun Cho (sodomau@postech.ac.kr)
# persistent opt_fft_size_LUT;
'''
LUT_size = 2048
# print("generate opt_fft_size_LUT")
opt_fft_size_LUT = np.zeros(LUT_size)
e2 = 1
while e2 <= LUT_size:
e3 = e2
while e3 <= LUT_size:
e5 = e3
while e5 <= LUT_size:
e7 = e5
while e7 <= LUT_size:
if e7 <= LUT_size:
opt_fft_size_LUT[e7-1] = e7
if e7*11 <= LUT_size:
opt_fft_size_LUT[e7*11-1] = e7*11
if e7*13 <= LUT_size:
opt_fft_size_LUT[e7*13-1] = e7*13
e7 = e7 * 7
e5 = e5 * 5
e3 = e3 * 3
e2 = e2 * 2
nn = 0
for i in range(LUT_size, 0, -1):
if opt_fft_size_LUT[i-1] != 0:
nn = i-1
else:
opt_fft_size_LUT[i-1] = nn+1
m = np.zeros(len(n))
for c in range(len(n)):
nn = n[c]
if nn <= LUT_size:
m[c] = opt_fft_size_LUT[nn-1]
else:
m[c] = -1
return m
def wrap_boundary_liu(img, img_size):
"""
Reducing boundary artifacts in image deconvolution
Renting Liu, Jiaya Jia
ICIP 2008
"""
if img.ndim == 2:
ret = wrap_boundary(img, img_size)
elif img.ndim == 3:
ret = [wrap_boundary(img[:, :, i], img_size) for i in range(3)]
ret = np.stack(ret, 2)
return ret
def wrap_boundary(img, img_size):
"""
python code from:
https://github.com/ys-koshelev/nla_deblur/blob/90fe0ab98c26c791dcbdf231fe6f938fca80e2a0/boundaries.py
Reducing boundary artifacts in image deconvolution
Renting Liu, Jiaya Jia
ICIP 2008
"""
(H, W) = np.shape(img)
H_w = int(img_size[0]) - H
W_w = int(img_size[1]) - W
# ret = np.zeros((img_size[0], img_size[1]));
alpha = 1
HG = img[:, :]
r_A = np.zeros((alpha*2+H_w, W))
r_A[:alpha, :] = HG[-alpha:, :]
r_A[-alpha:, :] = HG[:alpha, :]
a = np.arange(H_w)/(H_w-1)
# r_A(alpha+1:end-alpha, 1) = (1-a)*r_A(alpha,1) + a*r_A(end-alpha+1,1)
r_A[alpha:-alpha, 0] = (1-a)*r_A[alpha-1, 0] + a*r_A[-alpha, 0]
# r_A(alpha+1:end-alpha, end) = (1-a)*r_A(alpha,end) + a*r_A(end-alpha+1,end)
r_A[alpha:-alpha, -1] = (1-a)*r_A[alpha-1, -1] + a*r_A[-alpha, -1]
r_B = np.zeros((H, alpha*2+W_w))
r_B[:, :alpha] = HG[:, -alpha:]
r_B[:, -alpha:] = HG[:, :alpha]
a = np.arange(W_w)/(W_w-1)
r_B[0, alpha:-alpha] = (1-a)*r_B[0, alpha-1] + a*r_B[0, -alpha]
r_B[-1, alpha:-alpha] = (1-a)*r_B[-1, alpha-1] + a*r_B[-1, -alpha]
if alpha == 1:
A2 = solve_min_laplacian(r_A[alpha-1:, :])
B2 = solve_min_laplacian(r_B[:, alpha-1:])
r_A[alpha-1:, :] = A2
r_B[:, alpha-1:] = B2
else:
A2 = solve_min_laplacian(r_A[alpha-1:-alpha+1, :])
r_A[alpha-1:-alpha+1, :] = A2
B2 = solve_min_laplacian(r_B[:, alpha-1:-alpha+1])
r_B[:, alpha-1:-alpha+1] = B2
A = r_A
B = r_B
r_C = np.zeros((alpha*2+H_w, alpha*2+W_w))
r_C[:alpha, :] = B[-alpha:, :]
r_C[-alpha:, :] = B[:alpha, :]
r_C[:, :alpha] = A[:, -alpha:]
r_C[:, -alpha:] = A[:, :alpha]
if alpha == 1:
C2 = C2 = solve_min_laplacian(r_C[alpha-1:, alpha-1:])
r_C[alpha-1:, alpha-1:] = C2
else:
C2 = solve_min_laplacian(r_C[alpha-1:-alpha+1, alpha-1:-alpha+1])
r_C[alpha-1:-alpha+1, alpha-1:-alpha+1] = C2
C = r_C
# return C
A = A[alpha-1:-alpha-1, :]
B = B[:, alpha:-alpha]
C = C[alpha:-alpha, alpha:-alpha]
ret = np.vstack((np.hstack((img, B)), np.hstack((A, C))))
return ret
def solve_min_laplacian(boundary_image):
(H, W) = np.shape(boundary_image)
# Laplacian
f = np.zeros((H, W))
# boundary image contains image intensities at boundaries
boundary_image[1:-1, 1:-1] = 0
j = np.arange(2, H)-1
k = np.arange(2, W)-1
f_bp = np.zeros((H, W))
f_bp[np.ix_(j, k)] = -4*boundary_image[np.ix_(j, k)] + boundary_image[np.ix_(j, k+1)] + boundary_image[np.ix_(j, k-1)] + boundary_image[np.ix_(j-1, k)] + boundary_image[np.ix_(j+1, k)]
del(j, k)
f1 = f - f_bp # subtract boundary points contribution
del(f_bp, f)
# DST Sine Transform algo starts here
f2 = f1[1:-1,1:-1]
del(f1)
# compute sine tranform
if f2.shape[1] == 1:
tt = fftpack.dst(f2, type=1, axis=0)/2
else:
tt = fftpack.dst(f2, type=1)/2
if tt.shape[0] == 1:
f2sin = np.transpose(fftpack.dst(np.transpose(tt), type=1, axis=0)/2)
else:
f2sin = np.transpose(fftpack.dst(np.transpose(tt), type=1)/2)
del(f2)
# compute Eigen Values
[x, y] = np.meshgrid(np.arange(1, W-1), np.arange(1, H-1))
denom = (2*np.cos(np.pi*x/(W-1))-2) + (2*np.cos(np.pi*y/(H-1)) - 2)
# divide
f3 = f2sin/denom
del(f2sin, x, y)
# compute Inverse Sine Transform
if f3.shape[0] == 1:
tt = fftpack.idst(f3*2, type=1, axis=1)/(2*(f3.shape[1]+1))
else:
tt = fftpack.idst(f3*2, type=1, axis=0)/(2*(f3.shape[0]+1))
del(f3)
if tt.shape[1] == 1:
img_tt = np.transpose(fftpack.idst(np.transpose(tt)*2, type=1)/(2*(tt.shape[0]+1)))
else:
img_tt = np.transpose(fftpack.idst(np.transpose(tt)*2, type=1, axis=0)/(2*(tt.shape[1]+1)))
del(tt)
# put solution in inner points; outer points obtained from boundary image
img_direct = boundary_image
img_direct[1:-1, 1:-1] = 0
img_direct[1:-1, 1:-1] = img_tt
return img_direct
"""
Created on Thu Jan 18 15:36:32 2018
@author: italo
https://github.com/ronaldosena/imagens-medicas-2/blob/40171a6c259edec7827a6693a93955de2bd39e76/Aulas/aula_2_-_uniform_filter/matlab_fspecial.py
"""
"""
Syntax
h = fspecial(type)
h = fspecial('average',hsize)
h = fspecial('disk',radius)
h = fspecial('gaussian',hsize,sigma)
h = fspecial('laplacian',alpha)
h = fspecial('log',hsize,sigma)
h = fspecial('motion',len,theta)
h = fspecial('prewitt')
h = fspecial('sobel')
"""
def fspecial_average(hsize=3):
"""Smoothing filter"""
return np.ones((hsize, hsize))/hsize**2
def fspecial_disk(radius):
"""Disk filter"""
raise(NotImplemented)
rad = 0.6
crad = np.ceil(rad-0.5)
[x, y] = np.meshgrid(np.arange(-crad, crad+1), np.arange(-crad, crad+1))
maxxy = np.zeros(x.shape)
maxxy[abs(x) >= abs(y)] = abs(x)[abs(x) >= abs(y)]
maxxy[abs(y) >= abs(x)] = abs(y)[abs(y) >= abs(x)]
minxy = np.zeros(x.shape)
minxy[abs(x) <= abs(y)] = abs(x)[abs(x) <= abs(y)]
minxy[abs(y) <= abs(x)] = abs(y)[abs(y) <= abs(x)]
m1 = (rad**2 < (maxxy+0.5)**2 + (minxy-0.5)**2)*(minxy-0.5) +\
(rad**2 >= (maxxy+0.5)**2 + (minxy-0.5)**2)*\
np.sqrt((rad**2 + 0j) - (maxxy + 0.5)**2)
m2 = (rad**2 > (maxxy-0.5)**2 + (minxy+0.5)**2)*(minxy+0.5) +\
(rad**2 <= (maxxy-0.5)**2 + (minxy+0.5)**2)*\
np.sqrt((rad**2 + 0j) - (maxxy - 0.5)**2)
h = None
return h
def fspecial_gaussian(hsize, sigma):
hsize = [hsize, hsize]
siz = [(hsize[0]-1.0)/2.0, (hsize[1]-1.0)/2.0]
std = sigma
[x, y] = np.meshgrid(np.arange(-siz[1], siz[1]+1), np.arange(-siz[0], siz[0]+1))
arg = -(x*x + y*y)/(2*std*std)
h = np.exp(arg)
h[h < scipy.finfo(float).eps * h.max()] = 0
sumh = h.sum()
if sumh != 0:
h = h/sumh
return h
def fspecial_laplacian(alpha):
alpha = max([0, min([alpha,1])])
h1 = alpha/(alpha+1)
h2 = (1-alpha)/(alpha+1)
h = [[h1, h2, h1], [h2, -4/(alpha+1), h2], [h1, h2, h1]]
h = np.array(h)
return h
def fspecial_log(hsize, sigma):
raise(NotImplemented)
def fspecial_motion(motion_len, theta):
raise(NotImplemented)
def fspecial_prewitt():
return np.array([[1, 1, 1], [0, 0, 0], [-1, -1, -1]])
def fspecial_sobel():
return np.array([[1, 2, 1], [0, 0, 0], [-1, -2, -1]])
def fspecial(filter_type, *args, **kwargs):
'''
python code from:
https://github.com/ronaldosena/imagens-medicas-2/blob/40171a6c259edec7827a6693a93955de2bd39e76/Aulas/aula_2_-_uniform_filter/matlab_fspecial.py
'''
if filter_type == 'average':
return fspecial_average(*args, **kwargs)
if filter_type == 'disk':
return fspecial_disk(*args, **kwargs)
if filter_type == 'gaussian':
return fspecial_gaussian(*args, **kwargs)
if filter_type == 'laplacian':
return fspecial_laplacian(*args, **kwargs)
if filter_type == 'log':
return fspecial_log(*args, **kwargs)
if filter_type == 'motion':
return fspecial_motion(*args, **kwargs)
if filter_type == 'prewitt':
return fspecial_prewitt(*args, **kwargs)
if filter_type == 'sobel':
return fspecial_sobel(*args, **kwargs)
def fspecial_gauss(size, sigma):
x, y = mgrid[-size // 2 + 1 : size // 2 + 1, -size // 2 + 1 : size // 2 + 1]
g = exp(-((x ** 2 + y ** 2) / (2.0 * sigma ** 2)))
return g / g.sum()
def blurkernel_synthesis(h=37, w=None):
# https://github.com/tkkcc/prior/blob/879a0b6c117c810776d8cc6b63720bf29f7d0cc4/util/gen_kernel.py
w = h if w is None else w
kdims = [h, w]
x = randomTrajectory(250)
k = None
while k is None:
k = kernelFromTrajectory(x)
# center pad to kdims
pad_width = ((kdims[0] - k.shape[0]) // 2, (kdims[1] - k.shape[1]) // 2)
pad_width = [(pad_width[0],), (pad_width[1],)]
if pad_width[0][0]<0 or pad_width[1][0]<0:
k = k[0:h, 0:h]
else:
k = pad(k, pad_width, "constant")
x1,x2 = k.shape
if np.random.randint(0, 4) == 1:
k = cv2.resize(k, (random.randint(x1, 5*x1), random.randint(x2, 5*x2)), interpolation=cv2.INTER_LINEAR)
y1, y2 = k.shape
k = k[(y1-x1)//2: (y1-x1)//2+x1, (y2-x2)//2: (y2-x2)//2+x2]
if sum(k)<0.1:
k = fspecial_gaussian(h, 0.1+6*np.random.rand(1))
k = k / sum(k)
# import matplotlib.pyplot as plt
# plt.imshow(k, interpolation="nearest", cmap="gray")
# plt.show()
return k
def kernelFromTrajectory(x):
h = 5 - log(rand()) / 0.15
h = round(min([h, 27])).astype(int)
h = h + 1 - h % 2
w = h
k = zeros((h, w))
xmin = min(x[0])
xmax = max(x[0])
ymin = min(x[1])
ymax = max(x[1])
xthr = arange(xmin, xmax, (xmax - xmin) / w)
ythr = arange(ymin, ymax, (ymax - ymin) / h)
for i in range(1, xthr.size):
for j in range(1, ythr.size):
idx = (
(x[0, :] >= xthr[i - 1])
& (x[0, :] < xthr[i])
& (x[1, :] >= ythr[j - 1])
& (x[1, :] < ythr[j])
)
k[i - 1, j - 1] = sum(idx)
if sum(k) == 0:
return
k = k / sum(k)
k = convolve2d(k, fspecial_gauss(3, 1), "same")
k = k / sum(k)
return k
def randomTrajectory(T):
x = zeros((3, T))
v = randn(3, T)
r = zeros((3, T))
trv = 1 / 1
trr = 2 * pi / T
for t in range(1, T):
F_rot = randn(3) / (t + 1) + r[:, t - 1]
F_trans = randn(3) / (t + 1)
r[:, t] = r[:, t - 1] + trr * F_rot
v[:, t] = v[:, t - 1] + trv * F_trans
st = v[:, t]
st = rot3D(st, r[:, t])
x[:, t] = x[:, t - 1] + st
return x
def rot3D(x, r):
Rx = array([[1, 0, 0], [0, cos(r[0]), -sin(r[0])], [0, sin(r[0]), cos(r[0])]])
Ry = array([[cos(r[1]), 0, sin(r[1])], [0, 1, 0], [-sin(r[1]), 0, cos(r[1])]])
Rz = array([[cos(r[2]), -sin(r[2]), 0], [sin(r[2]), cos(r[2]), 0], [0, 0, 1]])
R = Rz @ Ry @ Rx
x = R @ x
return x
if __name__ == '__main__':
a = opt_fft_size([111])
print(a)
print(fspecial('gaussian', 5, 1))
print(p2o(torch.zeros(1,1,4,4).float(),(14,14)).shape)
k = blurkernel_synthesis(11)
import matplotlib.pyplot as plt
plt.imshow(k, interpolation="nearest", cmap="gray")
plt.show()
# Modified from https://github.com/open-mmlab/mmcv/blob/master/mmcv/runner/dist_utils.py # noqa: E501
import functools
import os
import subprocess
import torch
import torch.distributed as dist
import torch.multiprocessing as mp
# ----------------------------------
# init
# ----------------------------------
def init_dist(launcher, backend='nccl', **kwargs):
if mp.get_start_method(allow_none=True) is None:
mp.set_start_method('spawn')
if launcher == 'pytorch':
_init_dist_pytorch(backend, **kwargs)
elif launcher == 'slurm':
_init_dist_slurm(backend, **kwargs)
else:
raise ValueError(f'Invalid launcher type: {launcher}')
def _init_dist_pytorch(backend, **kwargs):
rank = int(os.environ['RANK'])
num_gpus = torch.cuda.device_count()
torch.cuda.set_device(rank % num_gpus)
dist.init_process_group(backend=backend, **kwargs)
def _init_dist_slurm(backend, port=None):
"""Initialize slurm distributed training environment.
If argument ``port`` is not specified, then the master port will be system
environment variable ``MASTER_PORT``. If ``MASTER_PORT`` is not in system
environment variable, then a default port ``29500`` will be used.
Args:
backend (str): Backend of torch.distributed.
port (int, optional): Master port. Defaults to None.
"""
proc_id = int(os.environ['SLURM_PROCID'])
ntasks = int(os.environ['SLURM_NTASKS'])
node_list = os.environ['SLURM_NODELIST']
num_gpus = torch.cuda.device_count()
torch.cuda.set_device(proc_id % num_gpus)
addr = subprocess.getoutput(
f'scontrol show hostname {node_list} | head -n1')
# specify master port
if port is not None:
os.environ['MASTER_PORT'] = str(port)
elif 'MASTER_PORT' in os.environ:
pass # use MASTER_PORT in the environment variable
else:
# 29500 is torch.distributed default port
os.environ['MASTER_PORT'] = '29500'
os.environ['MASTER_ADDR'] = addr
os.environ['WORLD_SIZE'] = str(ntasks)
os.environ['LOCAL_RANK'] = str(proc_id % num_gpus)
os.environ['RANK'] = str(proc_id)
dist.init_process_group(backend=backend)
# ----------------------------------
# get rank and world_size
# ----------------------------------
def get_dist_info():
if dist.is_available():
initialized = dist.is_initialized()
else:
initialized = False
if initialized:
rank = dist.get_rank()
world_size = dist.get_world_size()
else:
rank = 0
world_size = 1
return rank, world_size
def get_rank():
if not dist.is_available():
return 0
if not dist.is_initialized():
return 0
return dist.get_rank()
def get_world_size():
if not dist.is_available():
return 1
if not dist.is_initialized():
return 1
return dist.get_world_size()
def master_only(func):
@functools.wraps(func)
def wrapper(*args, **kwargs):
rank, _ = get_dist_info()
if rank == 0:
return func(*args, **kwargs)
return wrapper
# ----------------------------------
# operation across ranks
# ----------------------------------
def reduce_sum(tensor):
if not dist.is_available():
return tensor
if not dist.is_initialized():
return tensor
tensor = tensor.clone()
dist.all_reduce(tensor, op=dist.ReduceOp.SUM)
return tensor
def gather_grad(params):
world_size = get_world_size()
if world_size == 1:
return
for param in params:
if param.grad is not None:
dist.all_reduce(param.grad.data, op=dist.ReduceOp.SUM)
param.grad.data.div_(world_size)
def all_gather(data):
world_size = get_world_size()
if world_size == 1:
return [data]
buffer = pickle.dumps(data)
storage = torch.ByteStorage.from_buffer(buffer)
tensor = torch.ByteTensor(storage).to('cuda')
local_size = torch.IntTensor([tensor.numel()]).to('cuda')
size_list = [torch.IntTensor([0]).to('cuda') for _ in range(world_size)]
dist.all_gather(size_list, local_size)
size_list = [int(size.item()) for size in size_list]
max_size = max(size_list)
tensor_list = []
for _ in size_list:
tensor_list.append(torch.ByteTensor(size=(max_size,)).to('cuda'))
if local_size != max_size:
padding = torch.ByteTensor(size=(max_size - local_size,)).to('cuda')
tensor = torch.cat((tensor, padding), 0)
dist.all_gather(tensor_list, tensor)
data_list = []
for size, tensor in zip(size_list, tensor_list):
buffer = tensor.cpu().numpy().tobytes()[:size]
data_list.append(pickle.loads(buffer))
return data_list
def reduce_loss_dict(loss_dict):
world_size = get_world_size()
if world_size < 2:
return loss_dict
with torch.no_grad():
keys = []
losses = []
for k in sorted(loss_dict.keys()):
keys.append(k)
losses.append(loss_dict[k])
losses = torch.stack(losses, 0)
dist.reduce(losses, dst=0)
if dist.get_rank() == 0:
losses /= world_size
reduced_losses = {k: v for k, v in zip(keys, losses)}
return reduced_losses
import math
import requests
from tqdm import tqdm
'''
borrowed from
https://github.com/xinntao/BasicSR/blob/28883e15eedc3381d23235ff3cf7c454c4be87e6/basicsr/utils/download_util.py
'''
def sizeof_fmt(size, suffix='B'):
"""Get human readable file size.
Args:
size (int): File size.
suffix (str): Suffix. Default: 'B'.
Return:
str: Formated file siz.
"""
for unit in ['', 'K', 'M', 'G', 'T', 'P', 'E', 'Z']:
if abs(size) < 1024.0:
return f'{size:3.1f} {unit}{suffix}'
size /= 1024.0
return f'{size:3.1f} Y{suffix}'
def download_file_from_google_drive(file_id, save_path):
"""Download files from google drive.
Ref:
https://stackoverflow.com/questions/25010369/wget-curl-large-file-from-google-drive # noqa E501
Args:
file_id (str): File id.
save_path (str): Save path.
"""
session = requests.Session()
URL = 'https://docs.google.com/uc?export=download'
params = {'id': file_id}
response = session.get(URL, params=params, stream=True)
token = get_confirm_token(response)
if token:
params['confirm'] = token
response = session.get(URL, params=params, stream=True)
# get file size
response_file_size = session.get(
URL, params=params, stream=True, headers={'Range': 'bytes=0-2'})
if 'Content-Range' in response_file_size.headers:
file_size = int(
response_file_size.headers['Content-Range'].split('/')[1])
else:
file_size = None
save_response_content(response, save_path, file_size)
def get_confirm_token(response):
for key, value in response.cookies.items():
if key.startswith('download_warning'):
return value
return None
def save_response_content(response,
destination,
file_size=None,
chunk_size=32768):
if file_size is not None:
pbar = tqdm(total=math.ceil(file_size / chunk_size), unit='chunk')
readable_file_size = sizeof_fmt(file_size)
else:
pbar = None
with open(destination, 'wb') as f:
downloaded_size = 0
for chunk in response.iter_content(chunk_size):
downloaded_size += chunk_size
if pbar is not None:
pbar.update(1)
pbar.set_description(f'Download {sizeof_fmt(downloaded_size)} '
f'/ {readable_file_size}')
if chunk: # filter out keep-alive new chunks
f.write(chunk)
if pbar is not None:
pbar.close()
if __name__ == "__main__":
file_id = '1WNULM1e8gRNvsngVscsQ8tpaOqJ4mYtv'
save_path = 'BSRGAN.pth'
download_file_from_google_drive(file_id, save_path)
import os
import math
import random
import numpy as np
import torch
import cv2
from torchvision.utils import make_grid
from datetime import datetime
# import torchvision.transforms as transforms
import matplotlib.pyplot as plt
from mpl_toolkits.mplot3d import Axes3D
os.environ["KMP_DUPLICATE_LIB_OK"] = "TRUE"
'''
# --------------------------------------------
# Kai Zhang (github: https://github.com/cszn)
# 03/Mar/2019
# --------------------------------------------
# https://github.com/twhui/SRGAN-pyTorch
# https://github.com/xinntao/BasicSR
# --------------------------------------------
'''
IMG_EXTENSIONS = ['.jpg', '.JPG', '.jpeg', '.JPEG', '.png', '.PNG', '.ppm', '.PPM', '.bmp', '.BMP', '.tif']
def is_image_file(filename):
return any(filename.endswith(extension) for extension in IMG_EXTENSIONS)
def get_timestamp():
return datetime.now().strftime('%y%m%d-%H%M%S')
def imshow(x, title=None, cbar=False, figsize=None):
plt.figure(figsize=figsize)
plt.imshow(np.squeeze(x), interpolation='nearest', cmap='gray')
if title:
plt.title(title)
if cbar:
plt.colorbar()
plt.show()
def surf(Z, cmap='rainbow', figsize=None):
plt.figure(figsize=figsize)
ax3 = plt.axes(projection='3d')
w, h = Z.shape[:2]
xx = np.arange(0,w,1)
yy = np.arange(0,h,1)
X, Y = np.meshgrid(xx, yy)
ax3.plot_surface(X,Y,Z,cmap=cmap)
#ax3.contour(X,Y,Z, zdim='z',offset=-2,cmap=cmap)
plt.show()
'''
# --------------------------------------------
# get image pathes
# --------------------------------------------
'''
def get_image_paths(dataroot):
paths = None # return None if dataroot is None
if isinstance(dataroot, str):
paths = sorted(_get_paths_from_images(dataroot))
elif isinstance(dataroot, list):
paths = []
for i in dataroot:
paths += sorted(_get_paths_from_images(i))
return paths
def _get_paths_from_images(path):
assert os.path.isdir(path), '{:s} is not a valid directory'.format(path)
images = []
for dirpath, _, fnames in sorted(os.walk(path)):
for fname in sorted(fnames):
if is_image_file(fname):
img_path = os.path.join(dirpath, fname)
images.append(img_path)
assert images, '{:s} has no valid image file'.format(path)
return images
'''
# --------------------------------------------
# split large images into small images
# --------------------------------------------
'''
def patches_from_image(img, p_size=512, p_overlap=64, p_max=800):
w, h = img.shape[:2]
patches = []
if w > p_max and h > p_max:
w1 = list(np.arange(0, w-p_size, p_size-p_overlap, dtype=int))
h1 = list(np.arange(0, h-p_size, p_size-p_overlap, dtype=int))
w1.append(w-p_size)
h1.append(h-p_size)
# print(w1)
# print(h1)
for i in w1:
for j in h1:
patches.append(img[i:i+p_size, j:j+p_size,:])
else:
patches.append(img)
return patches
def imssave(imgs, img_path):
"""
imgs: list, N images of size WxHxC
"""
img_name, ext = os.path.splitext(os.path.basename(img_path))
for i, img in enumerate(imgs):
if img.ndim == 3:
img = img[:, :, [2, 1, 0]]
new_path = os.path.join(os.path.dirname(img_path), img_name+str('_{:04d}'.format(i))+'.png')
cv2.imwrite(new_path, img)
def split_imageset(original_dataroot, taget_dataroot, n_channels=3, p_size=512, p_overlap=96, p_max=800):
"""
split the large images from original_dataroot into small overlapped images with size (p_size)x(p_size),
and save them into taget_dataroot; only the images with larger size than (p_max)x(p_max)
will be splitted.
Args:
original_dataroot:
taget_dataroot:
p_size: size of small images
p_overlap: patch size in training is a good choice
p_max: images with smaller size than (p_max)x(p_max) keep unchanged.
"""
paths = get_image_paths(original_dataroot)
for img_path in paths:
# img_name, ext = os.path.splitext(os.path.basename(img_path))
img = imread_uint(img_path, n_channels=n_channels)
patches = patches_from_image(img, p_size, p_overlap, p_max)
imssave(patches, os.path.join(taget_dataroot, os.path.basename(img_path)))
#if original_dataroot == taget_dataroot:
#del img_path
'''
# --------------------------------------------
# makedir
# --------------------------------------------
'''
def mkdir(path):
if not os.path.exists(path):
os.makedirs(path)
def mkdirs(paths):
if isinstance(paths, str):
mkdir(paths)
else:
for path in paths:
mkdir(path)
def mkdir_and_rename(path):
if os.path.exists(path):
new_name = path + '_archived_' + get_timestamp()
print('Path already exists. Rename it to [{:s}]'.format(new_name))
os.rename(path, new_name)
os.makedirs(path)
'''
# --------------------------------------------
# read image from path
# opencv is fast, but read BGR numpy image
# --------------------------------------------
'''
# --------------------------------------------
# get uint8 image of size HxWxn_channles (RGB)
# --------------------------------------------
def imread_uint(path, n_channels=3):
# input: path
# output: HxWx3(RGB or GGG), or HxWx1 (G)
if n_channels == 1:
img = cv2.imread(path, 0) # cv2.IMREAD_GRAYSCALE
img = np.expand_dims(img, axis=2) # HxWx1
elif n_channels == 3:
img = cv2.imread(path, cv2.IMREAD_UNCHANGED) # BGR or G
if img.ndim == 2:
img = cv2.cvtColor(img, cv2.COLOR_GRAY2RGB) # GGG
else:
img = cv2.cvtColor(img, cv2.COLOR_BGR2RGB) # RGB
return img
# --------------------------------------------
# matlab's imwrite
# --------------------------------------------
def imsave(img, img_path):
img = np.squeeze(img)
if img.ndim == 3:
img = img[:, :, [2, 1, 0]]
cv2.imwrite(img_path, img)
def imwrite(img, img_path):
img = np.squeeze(img)
if img.ndim == 3:
img = img[:, :, [2, 1, 0]]
cv2.imwrite(img_path, img)
# --------------------------------------------
# get single image of size HxWxn_channles (BGR)
# --------------------------------------------
def read_img(path):
# read image by cv2
# return: Numpy float32, HWC, BGR, [0,1]
img = cv2.imread(path, cv2.IMREAD_UNCHANGED) # cv2.IMREAD_GRAYSCALE
img = img.astype(np.float32) / 255.
if img.ndim == 2:
img = np.expand_dims(img, axis=2)
# some images have 4 channels
if img.shape[2] > 3:
img = img[:, :, :3]
return img
'''
# --------------------------------------------
# image format conversion
# --------------------------------------------
# numpy(single) <---> numpy(uint)
# numpy(single) <---> tensor
# numpy(uint) <---> tensor
# --------------------------------------------
'''
# --------------------------------------------
# numpy(single) [0, 1] <---> numpy(uint)
# --------------------------------------------
def uint2single(img):
return np.float32(img/255.)
def single2uint(img):
return np.uint8((img.clip(0, 1)*255.).round())
def uint162single(img):
return np.float32(img/65535.)
def single2uint16(img):
return np.uint16((img.clip(0, 1)*65535.).round())
# --------------------------------------------
# numpy(uint) (HxWxC or HxW) <---> tensor
# --------------------------------------------
# convert uint to 4-dimensional torch tensor
def uint2tensor4(img):
if img.ndim == 2:
img = np.expand_dims(img, axis=2)
return torch.from_numpy(np.ascontiguousarray(img)).permute(2, 0, 1).float().div(255.).unsqueeze(0)
# convert uint to 3-dimensional torch tensor
def uint2tensor3(img):
if img.ndim == 2:
img = np.expand_dims(img, axis=2)
return torch.from_numpy(np.ascontiguousarray(img)).permute(2, 0, 1).float().div(255.)
# convert 2/3/4-dimensional torch tensor to uint
def tensor2uint(img):
img = img.data.squeeze().float().clamp_(0, 1).cpu().numpy()
if img.ndim == 3:
img = np.transpose(img, (1, 2, 0))
return np.uint8((img*255.0).round())
# --------------------------------------------
# numpy(single) (HxWxC) <---> tensor
# --------------------------------------------
# convert single (HxWxC) to 3-dimensional torch tensor
def single2tensor3(img):
return torch.from_numpy(np.ascontiguousarray(img)).permute(2, 0, 1).float()
# convert single (HxWxC) to 4-dimensional torch tensor
def single2tensor4(img):
return torch.from_numpy(np.ascontiguousarray(img)).permute(2, 0, 1).float().unsqueeze(0)
# convert torch tensor to single
def tensor2single(img):
img = img.data.squeeze().float().cpu().numpy()
if img.ndim == 3:
img = np.transpose(img, (1, 2, 0))
return img
# convert torch tensor to single
def tensor2single3(img):
img = img.data.squeeze().float().cpu().numpy()
if img.ndim == 3:
img = np.transpose(img, (1, 2, 0))
elif img.ndim == 2:
img = np.expand_dims(img, axis=2)
return img
def single2tensor5(img):
return torch.from_numpy(np.ascontiguousarray(img)).permute(2, 0, 1, 3).float().unsqueeze(0)
def single32tensor5(img):
return torch.from_numpy(np.ascontiguousarray(img)).float().unsqueeze(0).unsqueeze(0)
def single42tensor4(img):
return torch.from_numpy(np.ascontiguousarray(img)).permute(2, 0, 1, 3).float()
# from skimage.io import imread, imsave
def tensor2img(tensor, out_type=np.uint8, min_max=(0, 1)):
'''
Converts a torch Tensor into an image Numpy array of BGR channel order
Input: 4D(B,(3/1),H,W), 3D(C,H,W), or 2D(H,W), any range, RGB channel order
Output: 3D(H,W,C) or 2D(H,W), [0,255], np.uint8 (default)
'''
tensor = tensor.squeeze().float().cpu().clamp_(*min_max) # squeeze first, then clamp
tensor = (tensor - min_max[0]) / (min_max[1] - min_max[0]) # to range [0,1]
n_dim = tensor.dim()
if n_dim == 4:
n_img = len(tensor)
img_np = make_grid(tensor, nrow=int(math.sqrt(n_img)), normalize=False).numpy()
img_np = np.transpose(img_np[[2, 1, 0], :, :], (1, 2, 0)) # HWC, BGR
elif n_dim == 3:
img_np = tensor.numpy()
img_np = np.transpose(img_np[[2, 1, 0], :, :], (1, 2, 0)) # HWC, BGR
elif n_dim == 2:
img_np = tensor.numpy()
else:
raise TypeError(
'Only support 4D, 3D and 2D tensor. But received with dimension: {:d}'.format(n_dim))
if out_type == np.uint8:
img_np = (img_np * 255.0).round()
# Important. Unlike matlab, numpy.uint8() WILL NOT round by default.
return img_np.astype(out_type)
'''
# --------------------------------------------
# Augmentation, flipe and/or rotate
# --------------------------------------------
# The following two are enough.
# (1) augmet_img: numpy image of WxHxC or WxH
# (2) augment_img_tensor4: tensor image 1xCxWxH
# --------------------------------------------
'''
def augment_img(img, mode=0):
'''Kai Zhang (github: https://github.com/cszn)
'''
if mode == 0:
return img
elif mode == 1:
return np.flipud(np.rot90(img))
elif mode == 2:
return np.flipud(img)
elif mode == 3:
return np.rot90(img, k=3)
elif mode == 4:
return np.flipud(np.rot90(img, k=2))
elif mode == 5:
return np.rot90(img)
elif mode == 6:
return np.rot90(img, k=2)
elif mode == 7:
return np.flipud(np.rot90(img, k=3))
def augment_img_tensor4(img, mode=0):
'''Kai Zhang (github: https://github.com/cszn)
'''
if mode == 0:
return img
elif mode == 1:
return img.rot90(1, [2, 3]).flip([2])
elif mode == 2:
return img.flip([2])
elif mode == 3:
return img.rot90(3, [2, 3])
elif mode == 4:
return img.rot90(2, [2, 3]).flip([2])
elif mode == 5:
return img.rot90(1, [2, 3])
elif mode == 6:
return img.rot90(2, [2, 3])
elif mode == 7:
return img.rot90(3, [2, 3]).flip([2])
def augment_img_tensor(img, mode=0):
'''Kai Zhang (github: https://github.com/cszn)
'''
img_size = img.size()
img_np = img.data.cpu().numpy()
if len(img_size) == 3:
img_np = np.transpose(img_np, (1, 2, 0))
elif len(img_size) == 4:
img_np = np.transpose(img_np, (2, 3, 1, 0))
img_np = augment_img(img_np, mode=mode)
img_tensor = torch.from_numpy(np.ascontiguousarray(img_np))
if len(img_size) == 3:
img_tensor = img_tensor.permute(2, 0, 1)
elif len(img_size) == 4:
img_tensor = img_tensor.permute(3, 2, 0, 1)
return img_tensor.type_as(img)
def augment_img_np3(img, mode=0):
if mode == 0:
return img
elif mode == 1:
return img.transpose(1, 0, 2)
elif mode == 2:
return img[::-1, :, :]
elif mode == 3:
img = img[::-1, :, :]
img = img.transpose(1, 0, 2)
return img
elif mode == 4:
return img[:, ::-1, :]
elif mode == 5:
img = img[:, ::-1, :]
img = img.transpose(1, 0, 2)
return img
elif mode == 6:
img = img[:, ::-1, :]
img = img[::-1, :, :]
return img
elif mode == 7:
img = img[:, ::-1, :]
img = img[::-1, :, :]
img = img.transpose(1, 0, 2)
return img
def augment_imgs(img_list, hflip=True, rot=True):
# horizontal flip OR rotate
hflip = hflip and random.random() < 0.5
vflip = rot and random.random() < 0.5
rot90 = rot and random.random() < 0.5
def _augment(img):
if hflip:
img = img[:, ::-1, :]
if vflip:
img = img[::-1, :, :]
if rot90:
img = img.transpose(1, 0, 2)
return img
return [_augment(img) for img in img_list]
'''
# --------------------------------------------
# modcrop and shave
# --------------------------------------------
'''
def modcrop(img_in, scale):
# img_in: Numpy, HWC or HW
img = np.copy(img_in)
if img.ndim == 2:
H, W = img.shape
H_r, W_r = H % scale, W % scale
img = img[:H - H_r, :W - W_r]
elif img.ndim == 3:
H, W, C = img.shape
H_r, W_r = H % scale, W % scale
img = img[:H - H_r, :W - W_r, :]
else:
raise ValueError('Wrong img ndim: [{:d}].'.format(img.ndim))
return img
def shave(img_in, border=0):
# img_in: Numpy, HWC or HW
img = np.copy(img_in)
h, w = img.shape[:2]
img = img[border:h-border, border:w-border]
return img
'''
# --------------------------------------------
# image processing process on numpy image
# channel_convert(in_c, tar_type, img_list):
# rgb2ycbcr(img, only_y=True):
# bgr2ycbcr(img, only_y=True):
# ycbcr2rgb(img):
# --------------------------------------------
'''
def rgb2ycbcr(img, only_y=True):
'''same as matlab rgb2ycbcr
only_y: only return Y channel
Input:
uint8, [0, 255]
float, [0, 1]
'''
in_img_type = img.dtype
img.astype(np.float32)
if in_img_type != np.uint8:
img *= 255.
# convert
if only_y:
rlt = np.dot(img, [65.481, 128.553, 24.966]) / 255.0 + 16.0
else:
rlt = np.matmul(img, [[65.481, -37.797, 112.0], [128.553, -74.203, -93.786],
[24.966, 112.0, -18.214]]) / 255.0 + [16, 128, 128]
if in_img_type == np.uint8:
rlt = rlt.round()
else:
rlt /= 255.
return rlt.astype(in_img_type)
def ycbcr2rgb(img):
'''same as matlab ycbcr2rgb
Input:
uint8, [0, 255]
float, [0, 1]
'''
in_img_type = img.dtype
img.astype(np.float32)
if in_img_type != np.uint8:
img *= 255.
# convert
rlt = np.matmul(img, [[0.00456621, 0.00456621, 0.00456621], [0, -0.00153632, 0.00791071],
[0.00625893, -0.00318811, 0]]) * 255.0 + [-222.921, 135.576, -276.836]
rlt = np.clip(rlt, 0, 255)
if in_img_type == np.uint8:
rlt = rlt.round()
else:
rlt /= 255.
return rlt.astype(in_img_type)
def bgr2ycbcr(img, only_y=True):
'''bgr version of rgb2ycbcr
only_y: only return Y channel
Input:
uint8, [0, 255]
float, [0, 1]
'''
in_img_type = img.dtype
img.astype(np.float32)
if in_img_type != np.uint8:
img *= 255.
# convert
if only_y:
rlt = np.dot(img, [24.966, 128.553, 65.481]) / 255.0 + 16.0
else:
rlt = np.matmul(img, [[24.966, 112.0, -18.214], [128.553, -74.203, -93.786],
[65.481, -37.797, 112.0]]) / 255.0 + [16, 128, 128]
if in_img_type == np.uint8:
rlt = rlt.round()
else:
rlt /= 255.
return rlt.astype(in_img_type)
def channel_convert(in_c, tar_type, img_list):
# conversion among BGR, gray and y
if in_c == 3 and tar_type == 'gray': # BGR to gray
gray_list = [cv2.cvtColor(img, cv2.COLOR_BGR2GRAY) for img in img_list]
return [np.expand_dims(img, axis=2) for img in gray_list]
elif in_c == 3 and tar_type == 'y': # BGR to y
y_list = [bgr2ycbcr(img, only_y=True) for img in img_list]
return [np.expand_dims(img, axis=2) for img in y_list]
elif in_c == 1 and tar_type == 'RGB': # gray/y to BGR
return [cv2.cvtColor(img, cv2.COLOR_GRAY2BGR) for img in img_list]
else:
return img_list
'''
# --------------------------------------------
# metric, PSNR, SSIM and PSNRB
# --------------------------------------------
'''
# --------------------------------------------
# PSNR
# --------------------------------------------
def calculate_psnr(img1, img2, border=0):
# img1 and img2 have range [0, 255]
#img1 = img1.squeeze()
#img2 = img2.squeeze()
if not img1.shape == img2.shape:
raise ValueError('Input images must have the same dimensions.')
h, w = img1.shape[:2]
img1 = img1[border:h-border, border:w-border]
img2 = img2[border:h-border, border:w-border]
img1 = img1.astype(np.float64)
img2 = img2.astype(np.float64)
mse = np.mean((img1 - img2)**2)
if mse == 0:
return float('inf')
return 20 * math.log10(255.0 / math.sqrt(mse))
# --------------------------------------------
# SSIM
# --------------------------------------------
def calculate_ssim(img1, img2, border=0):
'''calculate SSIM
the same outputs as MATLAB's
img1, img2: [0, 255]
'''
#img1 = img1.squeeze()
#img2 = img2.squeeze()
if not img1.shape == img2.shape:
raise ValueError('Input images must have the same dimensions.')
h, w = img1.shape[:2]
img1 = img1[border:h-border, border:w-border]
img2 = img2[border:h-border, border:w-border]
if img1.ndim == 2:
return ssim(img1, img2)
elif img1.ndim == 3:
if img1.shape[2] == 3:
ssims = []
for i in range(3):
ssims.append(ssim(img1[:,:,i], img2[:,:,i]))
return np.array(ssims).mean()
elif img1.shape[2] == 1:
return ssim(np.squeeze(img1), np.squeeze(img2))
else:
raise ValueError('Wrong input image dimensions.')
def ssim(img1, img2):
C1 = (0.01 * 255)**2
C2 = (0.03 * 255)**2
img1 = img1.astype(np.float64)
img2 = img2.astype(np.float64)
kernel = cv2.getGaussianKernel(11, 1.5)
window = np.outer(kernel, kernel.transpose())
mu1 = cv2.filter2D(img1, -1, window)[5:-5, 5:-5] # valid
mu2 = cv2.filter2D(img2, -1, window)[5:-5, 5:-5]
mu1_sq = mu1**2
mu2_sq = mu2**2
mu1_mu2 = mu1 * mu2
sigma1_sq = cv2.filter2D(img1**2, -1, window)[5:-5, 5:-5] - mu1_sq
sigma2_sq = cv2.filter2D(img2**2, -1, window)[5:-5, 5:-5] - mu2_sq
sigma12 = cv2.filter2D(img1 * img2, -1, window)[5:-5, 5:-5] - mu1_mu2
ssim_map = ((2 * mu1_mu2 + C1) * (2 * sigma12 + C2)) / ((mu1_sq + mu2_sq + C1) *
(sigma1_sq + sigma2_sq + C2))
return ssim_map.mean()
def _blocking_effect_factor(im):
block_size = 8
block_horizontal_positions = torch.arange(7, im.shape[3] - 1, 8)
block_vertical_positions = torch.arange(7, im.shape[2] - 1, 8)
horizontal_block_difference = (
(im[:, :, :, block_horizontal_positions] - im[:, :, :, block_horizontal_positions + 1]) ** 2).sum(
3).sum(2).sum(1)
vertical_block_difference = (
(im[:, :, block_vertical_positions, :] - im[:, :, block_vertical_positions + 1, :]) ** 2).sum(3).sum(
2).sum(1)
nonblock_horizontal_positions = np.setdiff1d(torch.arange(0, im.shape[3] - 1), block_horizontal_positions)
nonblock_vertical_positions = np.setdiff1d(torch.arange(0, im.shape[2] - 1), block_vertical_positions)
horizontal_nonblock_difference = (
(im[:, :, :, nonblock_horizontal_positions] - im[:, :, :, nonblock_horizontal_positions + 1]) ** 2).sum(
3).sum(2).sum(1)
vertical_nonblock_difference = (
(im[:, :, nonblock_vertical_positions, :] - im[:, :, nonblock_vertical_positions + 1, :]) ** 2).sum(
3).sum(2).sum(1)
n_boundary_horiz = im.shape[2] * (im.shape[3] // block_size - 1)
n_boundary_vert = im.shape[3] * (im.shape[2] // block_size - 1)
boundary_difference = (horizontal_block_difference + vertical_block_difference) / (
n_boundary_horiz + n_boundary_vert)
n_nonboundary_horiz = im.shape[2] * (im.shape[3] - 1) - n_boundary_horiz
n_nonboundary_vert = im.shape[3] * (im.shape[2] - 1) - n_boundary_vert
nonboundary_difference = (horizontal_nonblock_difference + vertical_nonblock_difference) / (
n_nonboundary_horiz + n_nonboundary_vert)
scaler = np.log2(block_size) / np.log2(min([im.shape[2], im.shape[3]]))
bef = scaler * (boundary_difference - nonboundary_difference)
bef[boundary_difference <= nonboundary_difference] = 0
return bef
def calculate_psnrb(img1, img2, border=0):
"""Calculate PSNR-B (Peak Signal-to-Noise Ratio).
Ref: Quality assessment of deblocked images, for JPEG image deblocking evaluation
# https://gitlab.com/Queuecumber/quantization-guided-ac/-/blob/master/metrics/psnrb.py
Args:
img1 (ndarray): Images with range [0, 255].
img2 (ndarray): Images with range [0, 255].
border (int): Cropped pixels in each edge of an image. These
pixels are not involved in the PSNR calculation.
test_y_channel (bool): Test on Y channel of YCbCr. Default: False.
Returns:
float: psnr result.
"""
if not img1.shape == img2.shape:
raise ValueError('Input images must have the same dimensions.')
if img1.ndim == 2:
img1, img2 = np.expand_dims(img1, 2), np.expand_dims(img2, 2)
h, w = img1.shape[:2]
img1 = img1[border:h-border, border:w-border]
img2 = img2[border:h-border, border:w-border]
img1 = img1.astype(np.float64)
img2 = img2.astype(np.float64)
# follow https://gitlab.com/Queuecumber/quantization-guided-ac/-/blob/master/metrics/psnrb.py
img1 = torch.from_numpy(img1).permute(2, 0, 1).unsqueeze(0) / 255.
img2 = torch.from_numpy(img2).permute(2, 0, 1).unsqueeze(0) / 255.
total = 0
for c in range(img1.shape[1]):
mse = torch.nn.functional.mse_loss(img1[:, c:c + 1, :, :], img2[:, c:c + 1, :, :], reduction='none')
bef = _blocking_effect_factor(img1[:, c:c + 1, :, :])
mse = mse.view(mse.shape[0], -1).mean(1)
total += 10 * torch.log10(1 / (mse + bef))
return float(total) / img1.shape[1]
'''
# --------------------------------------------
# matlab's bicubic imresize (numpy and torch) [0, 1]
# --------------------------------------------
'''
# matlab 'imresize' function, now only support 'bicubic'
def cubic(x):
absx = torch.abs(x)
absx2 = absx**2
absx3 = absx**3
return (1.5*absx3 - 2.5*absx2 + 1) * ((absx <= 1).type_as(absx)) + \
(-0.5*absx3 + 2.5*absx2 - 4*absx + 2) * (((absx > 1)*(absx <= 2)).type_as(absx))
def calculate_weights_indices(in_length, out_length, scale, kernel, kernel_width, antialiasing):
if (scale < 1) and (antialiasing):
# Use a modified kernel to simultaneously interpolate and antialias- larger kernel width
kernel_width = kernel_width / scale
# Output-space coordinates
x = torch.linspace(1, out_length, out_length)
# Input-space coordinates. Calculate the inverse mapping such that 0.5
# in output space maps to 0.5 in input space, and 0.5+scale in output
# space maps to 1.5 in input space.
u = x / scale + 0.5 * (1 - 1 / scale)
# What is the left-most pixel that can be involved in the computation?
left = torch.floor(u - kernel_width / 2)
# What is the maximum number of pixels that can be involved in the
# computation? Note: it's OK to use an extra pixel here; if the
# corresponding weights are all zero, it will be eliminated at the end
# of this function.
P = math.ceil(kernel_width) + 2
# The indices of the input pixels involved in computing the k-th output
# pixel are in row k of the indices matrix.
indices = left.view(out_length, 1).expand(out_length, P) + torch.linspace(0, P - 1, P).view(
1, P).expand(out_length, P)
# The weights used to compute the k-th output pixel are in row k of the
# weights matrix.
distance_to_center = u.view(out_length, 1).expand(out_length, P) - indices
# apply cubic kernel
if (scale < 1) and (antialiasing):
weights = scale * cubic(distance_to_center * scale)
else:
weights = cubic(distance_to_center)
# Normalize the weights matrix so that each row sums to 1.
weights_sum = torch.sum(weights, 1).view(out_length, 1)
weights = weights / weights_sum.expand(out_length, P)
# If a column in weights is all zero, get rid of it. only consider the first and last column.
weights_zero_tmp = torch.sum((weights == 0), 0)
if not math.isclose(weights_zero_tmp[0], 0, rel_tol=1e-6):
indices = indices.narrow(1, 1, P - 2)
weights = weights.narrow(1, 1, P - 2)
if not math.isclose(weights_zero_tmp[-1], 0, rel_tol=1e-6):
indices = indices.narrow(1, 0, P - 2)
weights = weights.narrow(1, 0, P - 2)
weights = weights.contiguous()
indices = indices.contiguous()
sym_len_s = -indices.min() + 1
sym_len_e = indices.max() - in_length
indices = indices + sym_len_s - 1
return weights, indices, int(sym_len_s), int(sym_len_e)
# --------------------------------------------
# imresize for tensor image [0, 1]
# --------------------------------------------
def imresize(img, scale, antialiasing=True):
# Now the scale should be the same for H and W
# input: img: pytorch tensor, CHW or HW [0,1]
# output: CHW or HW [0,1] w/o round
need_squeeze = True if img.dim() == 2 else False
if need_squeeze:
img.unsqueeze_(0)
in_C, in_H, in_W = img.size()
out_C, out_H, out_W = in_C, math.ceil(in_H * scale), math.ceil(in_W * scale)
kernel_width = 4
kernel = 'cubic'
# Return the desired dimension order for performing the resize. The
# strategy is to perform the resize first along the dimension with the
# smallest scale factor.
# Now we do not support this.
# get weights and indices
weights_H, indices_H, sym_len_Hs, sym_len_He = calculate_weights_indices(
in_H, out_H, scale, kernel, kernel_width, antialiasing)
weights_W, indices_W, sym_len_Ws, sym_len_We = calculate_weights_indices(
in_W, out_W, scale, kernel, kernel_width, antialiasing)
# process H dimension
# symmetric copying
img_aug = torch.FloatTensor(in_C, in_H + sym_len_Hs + sym_len_He, in_W)
img_aug.narrow(1, sym_len_Hs, in_H).copy_(img)
sym_patch = img[:, :sym_len_Hs, :]
inv_idx = torch.arange(sym_patch.size(1) - 1, -1, -1).long()
sym_patch_inv = sym_patch.index_select(1, inv_idx)
img_aug.narrow(1, 0, sym_len_Hs).copy_(sym_patch_inv)
sym_patch = img[:, -sym_len_He:, :]
inv_idx = torch.arange(sym_patch.size(1) - 1, -1, -1).long()
sym_patch_inv = sym_patch.index_select(1, inv_idx)
img_aug.narrow(1, sym_len_Hs + in_H, sym_len_He).copy_(sym_patch_inv)
out_1 = torch.FloatTensor(in_C, out_H, in_W)
kernel_width = weights_H.size(1)
for i in range(out_H):
idx = int(indices_H[i][0])
for j in range(out_C):
out_1[j, i, :] = img_aug[j, idx:idx + kernel_width, :].transpose(0, 1).mv(weights_H[i])
# process W dimension
# symmetric copying
out_1_aug = torch.FloatTensor(in_C, out_H, in_W + sym_len_Ws + sym_len_We)
out_1_aug.narrow(2, sym_len_Ws, in_W).copy_(out_1)
sym_patch = out_1[:, :, :sym_len_Ws]
inv_idx = torch.arange(sym_patch.size(2) - 1, -1, -1).long()
sym_patch_inv = sym_patch.index_select(2, inv_idx)
out_1_aug.narrow(2, 0, sym_len_Ws).copy_(sym_patch_inv)
sym_patch = out_1[:, :, -sym_len_We:]
inv_idx = torch.arange(sym_patch.size(2) - 1, -1, -1).long()
sym_patch_inv = sym_patch.index_select(2, inv_idx)
out_1_aug.narrow(2, sym_len_Ws + in_W, sym_len_We).copy_(sym_patch_inv)
out_2 = torch.FloatTensor(in_C, out_H, out_W)
kernel_width = weights_W.size(1)
for i in range(out_W):
idx = int(indices_W[i][0])
for j in range(out_C):
out_2[j, :, i] = out_1_aug[j, :, idx:idx + kernel_width].mv(weights_W[i])
if need_squeeze:
out_2.squeeze_()
return out_2
# --------------------------------------------
# imresize for numpy image [0, 1]
# --------------------------------------------
def imresize_np(img, scale, antialiasing=True):
# Now the scale should be the same for H and W
# input: img: Numpy, HWC or HW [0,1]
# output: HWC or HW [0,1] w/o round
img = torch.from_numpy(img)
need_squeeze = True if img.dim() == 2 else False
if need_squeeze:
img.unsqueeze_(2)
in_H, in_W, in_C = img.size()
out_C, out_H, out_W = in_C, math.ceil(in_H * scale), math.ceil(in_W * scale)
kernel_width = 4
kernel = 'cubic'
# Return the desired dimension order for performing the resize. The
# strategy is to perform the resize first along the dimension with the
# smallest scale factor.
# Now we do not support this.
# get weights and indices
weights_H, indices_H, sym_len_Hs, sym_len_He = calculate_weights_indices(
in_H, out_H, scale, kernel, kernel_width, antialiasing)
weights_W, indices_W, sym_len_Ws, sym_len_We = calculate_weights_indices(
in_W, out_W, scale, kernel, kernel_width, antialiasing)
# process H dimension
# symmetric copying
img_aug = torch.FloatTensor(in_H + sym_len_Hs + sym_len_He, in_W, in_C)
img_aug.narrow(0, sym_len_Hs, in_H).copy_(img)
sym_patch = img[:sym_len_Hs, :, :]
inv_idx = torch.arange(sym_patch.size(0) - 1, -1, -1).long()
sym_patch_inv = sym_patch.index_select(0, inv_idx)
img_aug.narrow(0, 0, sym_len_Hs).copy_(sym_patch_inv)
sym_patch = img[-sym_len_He:, :, :]
inv_idx = torch.arange(sym_patch.size(0) - 1, -1, -1).long()
sym_patch_inv = sym_patch.index_select(0, inv_idx)
img_aug.narrow(0, sym_len_Hs + in_H, sym_len_He).copy_(sym_patch_inv)
out_1 = torch.FloatTensor(out_H, in_W, in_C)
kernel_width = weights_H.size(1)
for i in range(out_H):
idx = int(indices_H[i][0])
for j in range(out_C):
out_1[i, :, j] = img_aug[idx:idx + kernel_width, :, j].transpose(0, 1).mv(weights_H[i])
# process W dimension
# symmetric copying
out_1_aug = torch.FloatTensor(out_H, in_W + sym_len_Ws + sym_len_We, in_C)
out_1_aug.narrow(1, sym_len_Ws, in_W).copy_(out_1)
sym_patch = out_1[:, :sym_len_Ws, :]
inv_idx = torch.arange(sym_patch.size(1) - 1, -1, -1).long()
sym_patch_inv = sym_patch.index_select(1, inv_idx)
out_1_aug.narrow(1, 0, sym_len_Ws).copy_(sym_patch_inv)
sym_patch = out_1[:, -sym_len_We:, :]
inv_idx = torch.arange(sym_patch.size(1) - 1, -1, -1).long()
sym_patch_inv = sym_patch.index_select(1, inv_idx)
out_1_aug.narrow(1, sym_len_Ws + in_W, sym_len_We).copy_(sym_patch_inv)
out_2 = torch.FloatTensor(out_H, out_W, in_C)
kernel_width = weights_W.size(1)
for i in range(out_W):
idx = int(indices_W[i][0])
for j in range(out_C):
out_2[:, i, j] = out_1_aug[:, idx:idx + kernel_width, j].mv(weights_W[i])
if need_squeeze:
out_2.squeeze_()
return out_2.numpy()
if __name__ == '__main__':
img = imread_uint('test.bmp', 3)
# img = uint2single(img)
# img_bicubic = imresize_np(img, 1/4)
# imshow(single2uint(img_bicubic))
#
# img_tensor = single2tensor4(img)
# for i in range(8):
# imshow(np.concatenate((augment_img(img, i), tensor2single(augment_img_tensor4(img_tensor, i))), 1))
# patches = patches_from_image(img, p_size=128, p_overlap=0, p_max=200)
# imssave(patches,'a.png')
import cv2
import lmdb
import sys
from multiprocessing import Pool
from os import path as osp
from tqdm import tqdm
def make_lmdb_from_imgs(data_path,
lmdb_path,
img_path_list,
keys,
batch=5000,
compress_level=1,
multiprocessing_read=False,
n_thread=40,
map_size=None):
"""Make lmdb from images.
Contents of lmdb. The file structure is:
example.lmdb
├── data.mdb
├── lock.mdb
├── meta_info.txt
The data.mdb and lock.mdb are standard lmdb files and you can refer to
https://lmdb.readthedocs.io/en/release/ for more details.
The meta_info.txt is a specified txt file to record the meta information
of our datasets. It will be automatically created when preparing
datasets by our provided dataset tools.
Each line in the txt file records 1)image name (with extension),
2)image shape, and 3)compression level, separated by a white space.
For example, the meta information could be:
`000_00000000.png (720,1280,3) 1`, which means:
1) image name (with extension): 000_00000000.png;
2) image shape: (720,1280,3);
3) compression level: 1
We use the image name without extension as the lmdb key.
If `multiprocessing_read` is True, it will read all the images to memory
using multiprocessing. Thus, your server needs to have enough memory.
Args:
data_path (str): Data path for reading images.
lmdb_path (str): Lmdb save path.
img_path_list (str): Image path list.
keys (str): Used for lmdb keys.
batch (int): After processing batch images, lmdb commits.
Default: 5000.
compress_level (int): Compress level when encoding images. Default: 1.
multiprocessing_read (bool): Whether use multiprocessing to read all
the images to memory. Default: False.
n_thread (int): For multiprocessing.
map_size (int | None): Map size for lmdb env. If None, use the
estimated size from images. Default: None
"""
assert len(img_path_list) == len(keys), ('img_path_list and keys should have the same length, '
f'but got {len(img_path_list)} and {len(keys)}')
print(f'Create lmdb for {data_path}, save to {lmdb_path}...')
print(f'Totoal images: {len(img_path_list)}')
if not lmdb_path.endswith('.lmdb'):
raise ValueError("lmdb_path must end with '.lmdb'.")
if osp.exists(lmdb_path):
print(f'Folder {lmdb_path} already exists. Exit.')
sys.exit(1)
if multiprocessing_read:
# read all the images to memory (multiprocessing)
dataset = {} # use dict to keep the order for multiprocessing
shapes = {}
print(f'Read images with multiprocessing, #thread: {n_thread} ...')
pbar = tqdm(total=len(img_path_list), unit='image')
def callback(arg):
"""get the image data and update pbar."""
key, dataset[key], shapes[key] = arg
pbar.update(1)
pbar.set_description(f'Read {key}')
pool = Pool(n_thread)
for path, key in zip(img_path_list, keys):
pool.apply_async(read_img_worker, args=(osp.join(data_path, path), key, compress_level), callback=callback)
pool.close()
pool.join()
pbar.close()
print(f'Finish reading {len(img_path_list)} images.')
# create lmdb environment
if map_size is None:
# obtain data size for one image
img = cv2.imread(osp.join(data_path, img_path_list[0]), cv2.IMREAD_UNCHANGED)
_, img_byte = cv2.imencode('.png', img, [cv2.IMWRITE_PNG_COMPRESSION, compress_level])
data_size_per_img = img_byte.nbytes
print('Data size per image is: ', data_size_per_img)
data_size = data_size_per_img * len(img_path_list)
map_size = data_size * 10
env = lmdb.open(lmdb_path, map_size=map_size)
# write data to lmdb
pbar = tqdm(total=len(img_path_list), unit='chunk')
txn = env.begin(write=True)
txt_file = open(osp.join(lmdb_path, 'meta_info.txt'), 'w')
for idx, (path, key) in enumerate(zip(img_path_list, keys)):
pbar.update(1)
pbar.set_description(f'Write {key}')
key_byte = key.encode('ascii')
if multiprocessing_read:
img_byte = dataset[key]
h, w, c = shapes[key]
else:
_, img_byte, img_shape = read_img_worker(osp.join(data_path, path), key, compress_level)
h, w, c = img_shape
txn.put(key_byte, img_byte)
# write meta information
txt_file.write(f'{key}.png ({h},{w},{c}) {compress_level}\n')
if idx % batch == 0:
txn.commit()
txn = env.begin(write=True)
pbar.close()
txn.commit()
env.close()
txt_file.close()
print('\nFinish writing lmdb.')
def read_img_worker(path, key, compress_level):
"""Read image worker.
Args:
path (str): Image path.
key (str): Image key.
compress_level (int): Compress level when encoding images.
Returns:
str: Image key.
byte: Image byte.
tuple[int]: Image shape.
"""
img = cv2.imread(path, cv2.IMREAD_UNCHANGED)
# deal with `libpng error: Read Error`
if img is None:
print(f'To deal with `libpng error: Read Error`, use PIL to load {path}')
from PIL import Image
import numpy as np
img = Image.open(path)
img = np.asanyarray(img)
img = img[:, :, [2, 1, 0]]
if img.ndim == 2:
h, w = img.shape
c = 1
else:
h, w, c = img.shape
_, img_byte = cv2.imencode('.png', img, [cv2.IMWRITE_PNG_COMPRESSION, compress_level])
return (key, img_byte, (h, w, c))
class LmdbMaker():
"""LMDB Maker.
Args:
lmdb_path (str): Lmdb save path.
map_size (int): Map size for lmdb env. Default: 1024 ** 4, 1TB.
batch (int): After processing batch images, lmdb commits.
Default: 5000.
compress_level (int): Compress level when encoding images. Default: 1.
"""
def __init__(self, lmdb_path, map_size=1024**4, batch=5000, compress_level=1):
if not lmdb_path.endswith('.lmdb'):
raise ValueError("lmdb_path must end with '.lmdb'.")
if osp.exists(lmdb_path):
print(f'Folder {lmdb_path} already exists. Exit.')
sys.exit(1)
self.lmdb_path = lmdb_path
self.batch = batch
self.compress_level = compress_level
self.env = lmdb.open(lmdb_path, map_size=map_size)
self.txn = self.env.begin(write=True)
self.txt_file = open(osp.join(lmdb_path, 'meta_info.txt'), 'w')
self.counter = 0
def put(self, img_byte, key, img_shape):
self.counter += 1
key_byte = key.encode('ascii')
self.txn.put(key_byte, img_byte)
# write meta information
h, w, c = img_shape
self.txt_file.write(f'{key}.png ({h},{w},{c}) {self.compress_level}\n')
if self.counter % self.batch == 0:
self.txn.commit()
self.txn = self.env.begin(write=True)
def close(self):
self.txn.commit()
self.env.close()
self.txt_file.close()
import sys
import datetime
import logging
'''
# --------------------------------------------
# Kai Zhang (github: https://github.com/cszn)
# 03/Mar/2019
# --------------------------------------------
# https://github.com/xinntao/BasicSR
# --------------------------------------------
'''
def log(*args, **kwargs):
print(datetime.datetime.now().strftime("%Y-%m-%d %H:%M:%S:"), *args, **kwargs)
'''
# --------------------------------------------
# logger
# --------------------------------------------
'''
def logger_info(logger_name, log_path='default_logger.log'):
''' set up logger
modified by Kai Zhang (github: https://github.com/cszn)
'''
log = logging.getLogger(logger_name)
if log.hasHandlers():
print('LogHandlers exist!')
else:
print('LogHandlers setup!')
level = logging.INFO
formatter = logging.Formatter('%(asctime)s.%(msecs)03d : %(message)s', datefmt='%y-%m-%d %H:%M:%S')
fh = logging.FileHandler(log_path, mode='a')
fh.setFormatter(formatter)
log.setLevel(level)
log.addHandler(fh)
# print(len(log.handlers))
sh = logging.StreamHandler()
sh.setFormatter(formatter)
log.addHandler(sh)
'''
# --------------------------------------------
# print to file and std_out simultaneously
# --------------------------------------------
'''
class logger_print(object):
def __init__(self, log_path="default.log"):
self.terminal = sys.stdout
self.log = open(log_path, 'a')
def write(self, message):
self.terminal.write(message)
self.log.write(message) # write the message
def flush(self):
pass
# -*- coding: utf-8 -*-
import numpy as np
import cv2
import torch
from utils import utils_image as util
import random
from scipy import ndimage
import scipy
import scipy.stats as ss
from scipy.interpolate import interp2d
from scipy.linalg import orth
"""
# --------------------------------------------
# Super-Resolution
# --------------------------------------------
#
# Kai Zhang (cskaizhang@gmail.com)
# https://github.com/cszn
# From 2019/03--2021/08
# --------------------------------------------
"""
def modcrop_np(img, sf):
'''
Args:
img: numpy image, WxH or WxHxC
sf: scale factor
Return:
cropped image
'''
w, h = img.shape[:2]
im = np.copy(img)
return im[:w - w % sf, :h - h % sf, ...]
"""
# --------------------------------------------
# anisotropic Gaussian kernels
# --------------------------------------------
"""
def analytic_kernel(k):
"""Calculate the X4 kernel from the X2 kernel (for proof see appendix in paper)"""
k_size = k.shape[0]
# Calculate the big kernels size
big_k = np.zeros((3 * k_size - 2, 3 * k_size - 2))
# Loop over the small kernel to fill the big one
for r in range(k_size):
for c in range(k_size):
big_k[2 * r:2 * r + k_size, 2 * c:2 * c + k_size] += k[r, c] * k
# Crop the edges of the big kernel to ignore very small values and increase run time of SR
crop = k_size // 2
cropped_big_k = big_k[crop:-crop, crop:-crop]
# Normalize to 1
return cropped_big_k / cropped_big_k.sum()
def anisotropic_Gaussian(ksize=15, theta=np.pi, l1=6, l2=6):
""" generate an anisotropic Gaussian kernel
Args:
ksize : e.g., 15, kernel size
theta : [0, pi], rotation angle range
l1 : [0.1,50], scaling of eigenvalues
l2 : [0.1,l1], scaling of eigenvalues
If l1 = l2, will get an isotropic Gaussian kernel.
Returns:
k : kernel
"""
v = np.dot(np.array([[np.cos(theta), -np.sin(theta)], [np.sin(theta), np.cos(theta)]]), np.array([1., 0.]))
V = np.array([[v[0], v[1]], [v[1], -v[0]]])
D = np.array([[l1, 0], [0, l2]])
Sigma = np.dot(np.dot(V, D), np.linalg.inv(V))
k = gm_blur_kernel(mean=[0, 0], cov=Sigma, size=ksize)
return k
def gm_blur_kernel(mean, cov, size=15):
center = size / 2.0 + 0.5
k = np.zeros([size, size])
for y in range(size):
for x in range(size):
cy = y - center + 1
cx = x - center + 1
k[y, x] = ss.multivariate_normal.pdf([cx, cy], mean=mean, cov=cov)
k = k / np.sum(k)
return k
def shift_pixel(x, sf, upper_left=True):
"""shift pixel for super-resolution with different scale factors
Args:
x: WxHxC or WxH
sf: scale factor
upper_left: shift direction
"""
h, w = x.shape[:2]
shift = (sf-1)*0.5
xv, yv = np.arange(0, w, 1.0), np.arange(0, h, 1.0)
if upper_left:
x1 = xv + shift
y1 = yv + shift
else:
x1 = xv - shift
y1 = yv - shift
x1 = np.clip(x1, 0, w-1)
y1 = np.clip(y1, 0, h-1)
if x.ndim == 2:
x = interp2d(xv, yv, x)(x1, y1)
if x.ndim == 3:
for i in range(x.shape[-1]):
x[:, :, i] = interp2d(xv, yv, x[:, :, i])(x1, y1)
return x
def blur(x, k):
'''
x: image, NxcxHxW
k: kernel, Nx1xhxw
'''
n, c = x.shape[:2]
p1, p2 = (k.shape[-2]-1)//2, (k.shape[-1]-1)//2
x = torch.nn.functional.pad(x, pad=(p1, p2, p1, p2), mode='replicate')
k = k.repeat(1,c,1,1)
k = k.view(-1, 1, k.shape[2], k.shape[3])
x = x.view(1, -1, x.shape[2], x.shape[3])
x = torch.nn.functional.conv2d(x, k, bias=None, stride=1, padding=0, groups=n*c)
x = x.view(n, c, x.shape[2], x.shape[3])
return x
def gen_kernel(k_size=np.array([15, 15]), scale_factor=np.array([4, 4]), min_var=0.6, max_var=10., noise_level=0):
""""
# modified version of https://github.com/assafshocher/BlindSR_dataset_generator
# Kai Zhang
# min_var = 0.175 * sf # variance of the gaussian kernel will be sampled between min_var and max_var
# max_var = 2.5 * sf
"""
# Set random eigen-vals (lambdas) and angle (theta) for COV matrix
lambda_1 = min_var + np.random.rand() * (max_var - min_var)
lambda_2 = min_var + np.random.rand() * (max_var - min_var)
theta = np.random.rand() * np.pi # random theta
noise = -noise_level + np.random.rand(*k_size) * noise_level * 2
# Set COV matrix using Lambdas and Theta
LAMBDA = np.diag([lambda_1, lambda_2])
Q = np.array([[np.cos(theta), -np.sin(theta)],
[np.sin(theta), np.cos(theta)]])
SIGMA = Q @ LAMBDA @ Q.T
INV_SIGMA = np.linalg.inv(SIGMA)[None, None, :, :]
# Set expectation position (shifting kernel for aligned image)
MU = k_size // 2 - 0.5*(scale_factor - 1) # - 0.5 * (scale_factor - k_size % 2)
MU = MU[None, None, :, None]
# Create meshgrid for Gaussian
[X,Y] = np.meshgrid(range(k_size[0]), range(k_size[1]))
Z = np.stack([X, Y], 2)[:, :, :, None]
# Calcualte Gaussian for every pixel of the kernel
ZZ = Z-MU
ZZ_t = ZZ.transpose(0,1,3,2)
raw_kernel = np.exp(-0.5 * np.squeeze(ZZ_t @ INV_SIGMA @ ZZ)) * (1 + noise)
# shift the kernel so it will be centered
#raw_kernel_centered = kernel_shift(raw_kernel, scale_factor)
# Normalize the kernel and return
#kernel = raw_kernel_centered / np.sum(raw_kernel_centered)
kernel = raw_kernel / np.sum(raw_kernel)
return kernel
def fspecial_gaussian(hsize, sigma):
hsize = [hsize, hsize]
siz = [(hsize[0]-1.0)/2.0, (hsize[1]-1.0)/2.0]
std = sigma
[x, y] = np.meshgrid(np.arange(-siz[1], siz[1]+1), np.arange(-siz[0], siz[0]+1))
arg = -(x*x + y*y)/(2*std*std)
h = np.exp(arg)
h[h < scipy.finfo(float).eps * h.max()] = 0
sumh = h.sum()
if sumh != 0:
h = h/sumh
return h
def fspecial_laplacian(alpha):
alpha = max([0, min([alpha,1])])
h1 = alpha/(alpha+1)
h2 = (1-alpha)/(alpha+1)
h = [[h1, h2, h1], [h2, -4/(alpha+1), h2], [h1, h2, h1]]
h = np.array(h)
return h
def fspecial(filter_type, *args, **kwargs):
'''
python code from:
https://github.com/ronaldosena/imagens-medicas-2/blob/40171a6c259edec7827a6693a93955de2bd39e76/Aulas/aula_2_-_uniform_filter/matlab_fspecial.py
'''
if filter_type == 'gaussian':
return fspecial_gaussian(*args, **kwargs)
if filter_type == 'laplacian':
return fspecial_laplacian(*args, **kwargs)
"""
# --------------------------------------------
# degradation models
# --------------------------------------------
"""
def bicubic_degradation(x, sf=3):
'''
Args:
x: HxWxC image, [0, 1]
sf: down-scale factor
Return:
bicubicly downsampled LR image
'''
x = util.imresize_np(x, scale=1/sf)
return x
def srmd_degradation(x, k, sf=3):
''' blur + bicubic downsampling
Args:
x: HxWxC image, [0, 1]
k: hxw, double
sf: down-scale factor
Return:
downsampled LR image
Reference:
@inproceedings{zhang2018learning,
title={Learning a single convolutional super-resolution network for multiple degradations},
author={Zhang, Kai and Zuo, Wangmeng and Zhang, Lei},
booktitle={IEEE Conference on Computer Vision and Pattern Recognition},
pages={3262--3271},
year={2018}
}
'''
x = ndimage.filters.convolve(x, np.expand_dims(k, axis=2), mode='wrap') # 'nearest' | 'mirror'
x = bicubic_degradation(x, sf=sf)
return x
def dpsr_degradation(x, k, sf=3):
''' bicubic downsampling + blur
Args:
x: HxWxC image, [0, 1]
k: hxw, double
sf: down-scale factor
Return:
downsampled LR image
Reference:
@inproceedings{zhang2019deep,
title={Deep Plug-and-Play Super-Resolution for Arbitrary Blur Kernels},
author={Zhang, Kai and Zuo, Wangmeng and Zhang, Lei},
booktitle={IEEE Conference on Computer Vision and Pattern Recognition},
pages={1671--1681},
year={2019}
}
'''
x = bicubic_degradation(x, sf=sf)
x = ndimage.filters.convolve(x, np.expand_dims(k, axis=2), mode='wrap')
return x
def classical_degradation(x, k, sf=3):
''' blur + downsampling
Args:
x: HxWxC image, [0, 1]/[0, 255]
k: hxw, double
sf: down-scale factor
Return:
downsampled LR image
'''
x = ndimage.filters.convolve(x, np.expand_dims(k, axis=2), mode='wrap')
#x = filters.correlate(x, np.expand_dims(np.flip(k), axis=2))
st = 0
return x[st::sf, st::sf, ...]
def add_sharpening(img, weight=0.5, radius=50, threshold=10):
"""USM sharpening. borrowed from real-ESRGAN
Input image: I; Blurry image: B.
1. K = I + weight * (I - B)
2. Mask = 1 if abs(I - B) > threshold, else: 0
3. Blur mask:
4. Out = Mask * K + (1 - Mask) * I
Args:
img (Numpy array): Input image, HWC, BGR; float32, [0, 1].
weight (float): Sharp weight. Default: 1.
radius (float): Kernel size of Gaussian blur. Default: 50.
threshold (int):
"""
if radius % 2 == 0:
radius += 1
blur = cv2.GaussianBlur(img, (radius, radius), 0)
residual = img - blur
mask = np.abs(residual) * 255 > threshold
mask = mask.astype('float32')
soft_mask = cv2.GaussianBlur(mask, (radius, radius), 0)
K = img + weight * residual
K = np.clip(K, 0, 1)
return soft_mask * K + (1 - soft_mask) * img
def add_blur(img, sf=4):
wd2 = 4.0 + sf
wd = 2.0 + 0.2*sf
if random.random() < 0.5:
l1 = wd2*random.random()
l2 = wd2*random.random()
k = anisotropic_Gaussian(ksize=2*random.randint(2,11)+3, theta=random.random()*np.pi, l1=l1, l2=l2)
else:
k = fspecial('gaussian', 2*random.randint(2,11)+3, wd*random.random())
img = ndimage.filters.convolve(img, np.expand_dims(k, axis=2), mode='mirror')
return img
def add_resize(img, sf=4):
rnum = np.random.rand()
if rnum > 0.8: # up
sf1 = random.uniform(1, 2)
elif rnum < 0.7: # down
sf1 = random.uniform(0.5/sf, 1)
else:
sf1 = 1.0
img = cv2.resize(img, (int(sf1*img.shape[1]), int(sf1*img.shape[0])), interpolation=random.choice([1, 2, 3]))
img = np.clip(img, 0.0, 1.0)
return img
def add_Gaussian_noise(img, noise_level1=2, noise_level2=25):
noise_level = random.randint(noise_level1, noise_level2)
rnum = np.random.rand()
if rnum > 0.6: # add color Gaussian noise
img += np.random.normal(0, noise_level/255.0, img.shape).astype(np.float32)
elif rnum < 0.4: # add grayscale Gaussian noise
img += np.random.normal(0, noise_level/255.0, (*img.shape[:2], 1)).astype(np.float32)
else: # add noise
L = noise_level2/255.
D = np.diag(np.random.rand(3))
U = orth(np.random.rand(3,3))
conv = np.dot(np.dot(np.transpose(U), D), U)
img += np.random.multivariate_normal([0,0,0], np.abs(L**2*conv), img.shape[:2]).astype(np.float32)
img = np.clip(img, 0.0, 1.0)
return img
def add_speckle_noise(img, noise_level1=2, noise_level2=25):
noise_level = random.randint(noise_level1, noise_level2)
img = np.clip(img, 0.0, 1.0)
rnum = random.random()
if rnum > 0.6:
img += img*np.random.normal(0, noise_level/255.0, img.shape).astype(np.float32)
elif rnum < 0.4:
img += img*np.random.normal(0, noise_level/255.0, (*img.shape[:2], 1)).astype(np.float32)
else:
L = noise_level2/255.
D = np.diag(np.random.rand(3))
U = orth(np.random.rand(3,3))
conv = np.dot(np.dot(np.transpose(U), D), U)
img += img*np.random.multivariate_normal([0,0,0], np.abs(L**2*conv), img.shape[:2]).astype(np.float32)
img = np.clip(img, 0.0, 1.0)
return img
def add_Poisson_noise(img):
img = np.clip((img * 255.0).round(), 0, 255) / 255.
vals = 10**(2*random.random()+2.0) # [2, 4]
if random.random() < 0.5:
img = np.random.poisson(img * vals).astype(np.float32) / vals
else:
img_gray = np.dot(img[...,:3], [0.299, 0.587, 0.114])
img_gray = np.clip((img_gray * 255.0).round(), 0, 255) / 255.
noise_gray = np.random.poisson(img_gray * vals).astype(np.float32) / vals - img_gray
img += noise_gray[:, :, np.newaxis]
img = np.clip(img, 0.0, 1.0)
return img
def add_JPEG_noise(img):
quality_factor = random.randint(30, 95)
img = cv2.cvtColor(util.single2uint(img), cv2.COLOR_RGB2BGR)
result, encimg = cv2.imencode('.jpg', img, [int(cv2.IMWRITE_JPEG_QUALITY), quality_factor])
img = cv2.imdecode(encimg, 1)
img = cv2.cvtColor(util.uint2single(img), cv2.COLOR_BGR2RGB)
return img
def random_crop(lq, hq, sf=4, lq_patchsize=64):
h, w = lq.shape[:2]
rnd_h = random.randint(0, h-lq_patchsize)
rnd_w = random.randint(0, w-lq_patchsize)
lq = lq[rnd_h:rnd_h + lq_patchsize, rnd_w:rnd_w + lq_patchsize, :]
rnd_h_H, rnd_w_H = int(rnd_h * sf), int(rnd_w * sf)
hq = hq[rnd_h_H:rnd_h_H + lq_patchsize*sf, rnd_w_H:rnd_w_H + lq_patchsize*sf, :]
return lq, hq
def degradation_bsrgan(img, sf=4, lq_patchsize=72, isp_model=None):
"""
This is the degradation model of BSRGAN from the paper
"Designing a Practical Degradation Model for Deep Blind Image Super-Resolution"
----------
img: HXWXC, [0, 1], its size should be large than (lq_patchsizexsf)x(lq_patchsizexsf)
sf: scale factor
isp_model: camera ISP model
Returns
-------
img: low-quality patch, size: lq_patchsizeXlq_patchsizeXC, range: [0, 1]
hq: corresponding high-quality patch, size: (lq_patchsizexsf)X(lq_patchsizexsf)XC, range: [0, 1]
"""
isp_prob, jpeg_prob, scale2_prob = 0.25, 0.9, 0.25
sf_ori = sf
h1, w1 = img.shape[:2]
img = img.copy()[:w1 - w1 % sf, :h1 - h1 % sf, ...] # mod crop
h, w = img.shape[:2]
if h < lq_patchsize*sf or w < lq_patchsize*sf:
raise ValueError(f'img size ({h1}X{w1}) is too small!')
hq = img.copy()
if sf == 4 and random.random() < scale2_prob: # downsample1
if np.random.rand() < 0.5:
img = cv2.resize(img, (int(1/2*img.shape[1]), int(1/2*img.shape[0])), interpolation=random.choice([1,2,3]))
else:
img = util.imresize_np(img, 1/2, True)
img = np.clip(img, 0.0, 1.0)
sf = 2
shuffle_order = random.sample(range(7), 7)
idx1, idx2 = shuffle_order.index(2), shuffle_order.index(3)
if idx1 > idx2: # keep downsample3 last
shuffle_order[idx1], shuffle_order[idx2] = shuffle_order[idx2], shuffle_order[idx1]
for i in shuffle_order:
if i == 0:
img = add_blur(img, sf=sf)
elif i == 1:
img = add_blur(img, sf=sf)
elif i == 2:
a, b = img.shape[1], img.shape[0]
# downsample2
if random.random() < 0.75:
sf1 = random.uniform(1,2*sf)
img = cv2.resize(img, (int(1/sf1*img.shape[1]), int(1/sf1*img.shape[0])), interpolation=random.choice([1,2,3]))
else:
k = fspecial('gaussian', 25, random.uniform(0.1, 0.6*sf))
k_shifted = shift_pixel(k, sf)
k_shifted = k_shifted/k_shifted.sum() # blur with shifted kernel
img = ndimage.filters.convolve(img, np.expand_dims(k_shifted, axis=2), mode='mirror')
img = img[0::sf, 0::sf, ...] # nearest downsampling
img = np.clip(img, 0.0, 1.0)
elif i == 3:
# downsample3
img = cv2.resize(img, (int(1/sf*a), int(1/sf*b)), interpolation=random.choice([1,2,3]))
img = np.clip(img, 0.0, 1.0)
elif i == 4:
# add Gaussian noise
img = add_Gaussian_noise(img, noise_level1=2, noise_level2=25)
elif i == 5:
# add JPEG noise
if random.random() < jpeg_prob:
img = add_JPEG_noise(img)
elif i == 6:
# add processed camera sensor noise
if random.random() < isp_prob and isp_model is not None:
with torch.no_grad():
img, hq = isp_model.forward(img.copy(), hq)
# add final JPEG compression noise
img = add_JPEG_noise(img)
# random crop
img, hq = random_crop(img, hq, sf_ori, lq_patchsize)
return img, hq
def add_Gaussian_noise_color(img, noise_level1=2, noise_level2=25, color_ratio=1):
noise_level = random.randint(noise_level1, noise_level2)
img += np.random.normal(0, noise_level/255.0, img.shape).astype(np.float32)
img = np.clip(img, 0.0, 1.0)
return img
def input_mask(image, prob_=0.75, value=0.1):
"""
Multiplicative bernoulli
"""
x = image.shape[0]
y = image.shape[1]
mask = np.random.choice([0, 1], size=(x, y), p=[prob_, 1 - prob_])
mask = np.repeat(mask[:, :, np.newaxis], 3, axis=2)
noise_image = np.multiply(image, mask)
noise_image = noise_image - value + value * mask
return noise_image
def input_mask_with_noise(img, sf=1, lq_patchsize=64, noise_level=15, if_mask=True, mask1=75, mask2=75):
h1, w1 = img.shape[:2]
img = img.copy()[:w1 - w1 % sf, :h1 - h1 % sf, ...] # mod crop
h, w = img.shape[:2]
if h < lq_patchsize*sf or w < lq_patchsize*sf:
raise ValueError(f'img size ({h1}X{w1}) is too small!')
hq = img.copy()
if noise_level > 0:
img = add_Gaussian_noise_color(img, noise_level1=noise_level, noise_level2=noise_level)
img, hq = random_crop(img, hq, sf, lq_patchsize)
if if_mask:
prob = random.randint(mask1, mask2) / 100
# prob = 0.75
img = input_mask(img, prob_=prob)
return img, hq
# def add_m_bernoulli_noise_with_mask(image, mask, prob_=0.75, value=0.1):
# noise_image = np.multiply(image, mask)
# noise_image = noise_image - value + value * mask
# return noise_image
# def input_mask_with_noise_mask(img, sf=4, shuffle_prob=0.5, use_sharp=False, lq_patchsize=64,
# isp_model=None, noise_level=15, if_mask=True, mask1=75, mask2=75):
# # print(img.shape)
# h1, w1 = img.shape[:2]
# img = img.copy()[:w1 - w1 % sf, :h1 - h1 % sf, ...] # mod crop
# h, w = img.shape[:2]
# if h < lq_patchsize*sf or w < lq_patchsize*sf:
# raise ValueError(f'img size ({h1}X{w1}) is too small!')
# if use_sharp:
# img = add_sharpening(img)
# hq = img.copy()
# if noise_level > 0:
# img = add_Gaussian_noise_color(img, noise_level1=noise_level, noise_level2=noise_level)
# img, hq = random_crop(img, hq, sf, lq_patchsize)
# if if_mask:
# prob = random.randint(mask1, mask2) / 100
# x = img.shape[0]
# y = img.shape[1]
# mask = np.random.choice([0, 1], size=(x, y), p=[prob, 1 - prob])
# mask = np.repeat(mask[:, :, np.newaxis], 3, axis=2)
# img = add_m_bernoulli_noise_with_mask(img, mask, prob_=prob)
# mask_inv = mask * -1 + 1
# hq = add_m_bernoulli_noise_with_mask(hq, mask_inv, prob_=prob, value=0)
# # print(type(mask))
# return img, hq, mask_inv
# if __name__ == '__main__':
# img = util.imread_uint('utils/test.png', 3)
# img = util.uint2single(img)
# sf = 4
# for i in range(20):
# img_lq, img_hq = degradation_bsrgan(img, sf=sf, lq_patchsize=72)
# print(i)
# lq_nearest = cv2.resize(util.single2uint(img_lq), (int(sf*img_lq.shape[1]), int(sf*img_lq.shape[0])), interpolation=0)
# img_concat = np.concatenate([lq_nearest, util.single2uint(img_hq)], axis=1)
# util.imsave(img_concat, str(i)+'.png')
import os
import json
import scipy.io as spio
import pandas as pd
def loadmat(filename):
'''
this function should be called instead of direct spio.loadmat
as it cures the problem of not properly recovering python dictionaries
from mat files. It calls the function check keys to cure all entries
which are still mat-objects
'''
data = spio.loadmat(filename, struct_as_record=False, squeeze_me=True)
return dict_to_nonedict(_check_keys(data))
def _check_keys(dict):
'''
checks if entries in dictionary are mat-objects. If yes
todict is called to change them to nested dictionaries
'''
for key in dict:
if isinstance(dict[key], spio.matlab.mio5_params.mat_struct):
dict[key] = _todict(dict[key])
return dict
def _todict(matobj):
'''
A recursive function which constructs from matobjects nested dictionaries
'''
dict = {}
for strg in matobj._fieldnames:
elem = matobj.__dict__[strg]
if isinstance(elem, spio.matlab.mio5_params.mat_struct):
dict[strg] = _todict(elem)
else:
dict[strg] = elem
return dict
def dict_to_nonedict(opt):
if isinstance(opt, dict):
new_opt = dict()
for key, sub_opt in opt.items():
new_opt[key] = dict_to_nonedict(sub_opt)
return NoneDict(**new_opt)
elif isinstance(opt, list):
return [dict_to_nonedict(sub_opt) for sub_opt in opt]
else:
return opt
class NoneDict(dict):
def __missing__(self, key):
return None
def mat2json(mat_path=None, filepath = None):
"""
Converts .mat file to .json and writes new file
Parameters
----------
mat_path: Str
path/filename .mat存放路径
filepath: Str
如果需要保存成json, 添加这一路径. 否则不保存
Returns
返回转化的字典
-------
None
Examples
--------
>>> mat2json(blah blah)
"""
matlabFile = loadmat(mat_path)
#pop all those dumb fields that don't let you jsonize file
matlabFile.pop('__header__')
matlabFile.pop('__version__')
matlabFile.pop('__globals__')
#jsonize the file - orientation is 'index'
matlabFile = pd.Series(matlabFile).to_json()
if filepath:
json_path = os.path.splitext(os.path.split(mat_path)[1])[0] + '.json'
with open(json_path, 'w') as f:
f.write(matlabFile)
return matlabFile
\ No newline at end of file
# -*- coding: utf-8 -*-
import numpy as np
import torch
from collections import OrderedDict
# import scipy.io as io
import hdf5storage
"""
# --------------------------------------------
# Convert matconvnet SimpleNN model into pytorch model
# --------------------------------------------
# Kai Zhang (cskaizhang@gmail.com)
# https://github.com/cszn
# 28/Nov/2019
# --------------------------------------------
"""
def weights2tensor(x, squeeze=False, in_features=None, out_features=None):
"""Modified version of https://github.com/albanie/pytorch-mcn
Adjust memory layout and load weights as torch tensor
Args:
x (ndaray): a numpy array, corresponding to a set of network weights
stored in column major order
squeeze (bool) [False]: whether to squeeze the tensor (i.e. remove
singletons from the trailing dimensions. So after converting to
pytorch layout (C_out, C_in, H, W), if the shape is (A, B, 1, 1)
it will be reshaped to a matrix with shape (A,B).
in_features (int :: None): used to reshape weights for a linear block.
out_features (int :: None): used to reshape weights for a linear block.
Returns:
torch.tensor: a permuted sets of weights, matching the pytorch layout
convention
"""
if x.ndim == 4:
x = x.transpose((3, 2, 0, 1))
# for FFDNet, pixel-shuffle layer
# if x.shape[1]==13:
# x=x[:,[0,2,1,3, 4,6,5,7, 8,10,9,11, 12],:,:]
# if x.shape[0]==12:
# x=x[[0,2,1,3, 4,6,5,7, 8,10,9,11],:,:,:]
# if x.shape[1]==5:
# x=x[:,[0,2,1,3, 4],:,:]
# if x.shape[0]==4:
# x=x[[0,2,1,3],:,:,:]
## for SRMD, pixel-shuffle layer
# if x.shape[0]==12:
# x=x[[0,2,1,3, 4,6,5,7, 8,10,9,11],:,:,:]
# if x.shape[0]==27:
# x=x[[0,3,6,1,4,7,2,5,8, 0+9,3+9,6+9,1+9,4+9,7+9,2+9,5+9,8+9, 0+18,3+18,6+18,1+18,4+18,7+18,2+18,5+18,8+18],:,:,:]
# if x.shape[0]==48:
# x=x[[0,4,8,12,1,5,9,13,2,6,10,14,3,7,11,15, 0+16,4+16,8+16,12+16,1+16,5+16,9+16,13+16,2+16,6+16,10+16,14+16,3+16,7+16,11+16,15+16, 0+32,4+32,8+32,12+32,1+32,5+32,9+32,13+32,2+32,6+32,10+32,14+32,3+32,7+32,11+32,15+32],:,:,:]
elif x.ndim == 3: # add by Kai
x = x[:,:,:,None]
x = x.transpose((3, 2, 0, 1))
elif x.ndim == 2:
if x.shape[1] == 1:
x = x.flatten()
if squeeze:
if in_features and out_features:
x = x.reshape((out_features, in_features))
x = np.squeeze(x)
return torch.from_numpy(np.ascontiguousarray(x))
def save_model(network, save_path):
state_dict = network.state_dict()
for key, param in state_dict.items():
state_dict[key] = param.cpu()
torch.save(state_dict, save_path)
if __name__ == '__main__':
# from utils import utils_logger
# import logging
# utils_logger.logger_info('a', 'a.log')
# logger = logging.getLogger('a')
#
# mcn = hdf5storage.loadmat('/model_zoo/matfile/FFDNet_Clip_gray.mat')
mcn = hdf5storage.loadmat('models/modelcolor.mat')
#logger.info(mcn['CNNdenoiser'][0][0][0][1][0][0][0][0])
mat_net = OrderedDict()
for idx in range(25):
mat_net[str(idx)] = OrderedDict()
count = -1
print(idx)
for i in range(13):
if mcn['CNNdenoiser'][0][idx][0][i][0][0][0][0] == 'conv':
count += 1
w = mcn['CNNdenoiser'][0][idx][0][i][0][1][0][0]
# print(w.shape)
w = weights2tensor(w)
# print(w.shape)
b = mcn['CNNdenoiser'][0][idx][0][i][0][1][0][1]
b = weights2tensor(b)
print(b.shape)
mat_net[str(idx)]['model.{:d}.weight'.format(count*2)] = w
mat_net[str(idx)]['model.{:d}.bias'.format(count*2)] = b
torch.save(mat_net, 'model_zoo/modelcolor.pth')
# from models.network_dncnn import IRCNN as net
# network = net(in_nc=3, out_nc=3, nc=64)
# state_dict = network.state_dict()
#
# #show_kv(state_dict)
#
# for i in range(len(mcn['net'][0][0][0])):
# print(mcn['net'][0][0][0][i][0][0][0][0])
#
# count = -1
# mat_net = OrderedDict()
# for i in range(len(mcn['net'][0][0][0])):
# if mcn['net'][0][0][0][i][0][0][0][0] == 'conv':
#
# count += 1
# w = mcn['net'][0][0][0][i][0][1][0][0]
# print(w.shape)
# w = weights2tensor(w)
# print(w.shape)
#
# b = mcn['net'][0][0][0][i][0][1][0][1]
# b = weights2tensor(b)
# print(b.shape)
#
# mat_net['model.{:d}.weight'.format(count*2)] = w
# mat_net['model.{:d}.bias'.format(count*2)] = b
#
# torch.save(mat_net, 'E:/pytorch/KAIR_ongoing/model_zoo/ffdnet_gray_clip.pth')
#
#
#
# crt_net = torch.load('E:/pytorch/KAIR_ongoing/model_zoo/imdn_x4.pth')
# def show_kv(net):
# for k, v in net.items():
# print(k)
#
# show_kv(crt_net)
# from models.network_dncnn import DnCNN as net
# network = net(in_nc=2, out_nc=1, nc=64, nb=20, act_mode='R')
# from models.network_srmd import SRMD as net
# #network = net(in_nc=1, out_nc=1, nc=64, nb=15, act_mode='R')
# network = net(in_nc=19, out_nc=3, nc=128, nb=12, upscale=4, act_mode='R', upsample_mode='pixelshuffle')
#
# from models.network_rrdb import RRDB as net
# network = net(in_nc=3, out_nc=3, nc=64, nb=23, gc=32, upscale=4, act_mode='L', upsample_mode='upconv')
#
# state_dict = network.state_dict()
# for key, param in state_dict.items():
# print(key)
# from models.network_imdn import IMDN as net
# network = net(in_nc=3, out_nc=3, nc=64, nb=8, upscale=4, act_mode='L', upsample_mode='pixelshuffle')
# state_dict = network.state_dict()
# mat_net = OrderedDict()
# for ((key, param),(key2, param2)) in zip(state_dict.items(), crt_net.items()):
# mat_net[key] = param2
# torch.save(mat_net, 'model_zoo/imdn_x4_1.pth')
#
# net_old = torch.load('net_old.pth')
# def show_kv(net):
# for k, v in net.items():
# print(k)
#
# show_kv(net_old)
# from models.network_dpsr import MSRResNet_prior as net
# model = net(in_nc=4, out_nc=3, nc=96, nb=16, upscale=4, act_mode='R', upsample_mode='pixelshuffle')
# state_dict = network.state_dict()
# net_new = OrderedDict()
# for ((key, param),(key_old, param_old)) in zip(state_dict.items(), net_old.items()):
# net_new[key] = param_old
# torch.save(net_new, 'net_new.pth')
# print(key)
# print(param.size())
# run utils/utils_matconvnet.py
Markdown is supported
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment