Commit e2696ece authored by mashun1's avatar mashun1
Browse files

controlnet

parents
Pipeline #643 canceled with stages
# add all requirements to auto generate the docs
addict
future
lmdb
numpy
opencv-python
Pillow
pyyaml
recommonmark
requests
scikit-image
scipy
sphinx
sphinx_intl
sphinx_markdown_tables
sphinx_rtd_theme
tb-nightly
torch>=1.7
torchvision
tqdm
yapf
import argparse
import cv2
import glob
import os
import shutil
import torch
from basicsr.archs.basicvsr_arch import BasicVSR
from basicsr.data.data_util import read_img_seq
from basicsr.utils.img_util import tensor2img
def inference(imgs, imgnames, model, save_path):
with torch.no_grad():
outputs = model(imgs)
# save imgs
outputs = outputs.squeeze()
outputs = list(outputs)
for output, imgname in zip(outputs, imgnames):
output = tensor2img(output)
cv2.imwrite(os.path.join(save_path, f'{imgname}_BasicVSR.png'), output)
def main():
parser = argparse.ArgumentParser()
parser.add_argument('--model_path', type=str, default='experiments/pretrained_models/BasicVSR_REDS4.pth')
parser.add_argument(
'--input_path', type=str, default='datasets/REDS4/sharp_bicubic/000', help='input test image folder')
parser.add_argument('--save_path', type=str, default='results/BasicVSR', help='save image path')
parser.add_argument('--interval', type=int, default=15, help='interval size')
args = parser.parse_args()
device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')
# set up model
model = BasicVSR(num_feat=64, num_block=30)
model.load_state_dict(torch.load(args.model_path)['params'], strict=True)
model.eval()
model = model.to(device)
os.makedirs(args.save_path, exist_ok=True)
# extract images from video format files
input_path = args.input_path
use_ffmpeg = False
if not os.path.isdir(input_path):
use_ffmpeg = True
video_name = os.path.splitext(os.path.split(args.input_path)[-1])[0]
input_path = os.path.join('./BasicVSR_tmp', video_name)
os.makedirs(os.path.join('./BasicVSR_tmp', video_name), exist_ok=True)
os.system(f'ffmpeg -i {args.input_path} -qscale:v 1 -qmin 1 -qmax 1 -vsync 0 {input_path} /frame%08d.png')
# load data and inference
imgs_list = sorted(glob.glob(os.path.join(input_path, '*')))
num_imgs = len(imgs_list)
if len(imgs_list) <= args.interval: # too many images may cause CUDA out of memory
imgs, imgnames = read_img_seq(imgs_list, return_imgname=True)
imgs = imgs.unsqueeze(0).to(device)
inference(imgs, imgnames, model, args.save_path)
else:
for idx in range(0, num_imgs, args.interval):
interval = min(args.interval, num_imgs - idx)
imgs, imgnames = read_img_seq(imgs_list[idx:idx + interval], return_imgname=True)
imgs = imgs.unsqueeze(0).to(device)
inference(imgs, imgnames, model, args.save_path)
# delete ffmpeg output images
if use_ffmpeg:
shutil.rmtree(input_path)
if __name__ == '__main__':
main()
import argparse
import cv2
import glob
import os
import shutil
import torch
from basicsr.archs.basicvsrpp_arch import BasicVSRPlusPlus
from basicsr.data.data_util import read_img_seq
from basicsr.utils.img_util import tensor2img
def inference(imgs, imgnames, model, save_path):
with torch.no_grad():
outputs = model(imgs)
# save imgs
outputs = outputs.squeeze()
outputs = list(outputs)
for output, imgname in zip(outputs, imgnames):
output = tensor2img(output)
cv2.imwrite(os.path.join(save_path, f'{imgname}_BasicVSRPP.png'), output)
def main():
parser = argparse.ArgumentParser()
parser.add_argument('--model_path', type=str, default='experiments/pretrained_models/BasicVSRPP_REDS4.pth')
parser.add_argument(
'--input_path', type=str, default='datasets/REDS4/sharp_bicubic/000', help='input test image folder')
parser.add_argument('--save_path', type=str, default='results/BasicVSRPP/000', help='save image path')
parser.add_argument('--interval', type=int, default=100, help='interval size')
args = parser.parse_args()
device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')
# set up model
model = BasicVSRPlusPlus(mid_channels=64, num_blocks=7)
model.load_state_dict(torch.load(args.model_path)['params'], strict=True)
model.eval()
model = model.to(device)
os.makedirs(args.save_path, exist_ok=True)
# extract images from video format files
input_path = args.input_path
use_ffmpeg = False
if not os.path.isdir(input_path):
use_ffmpeg = True
video_name = os.path.splitext(os.path.split(args.input_path)[-1])[0]
input_path = os.path.join('./BasicVSRPP_tmp', video_name)
os.makedirs(os.path.join('./BasicVSRPP_tmp', video_name), exist_ok=True)
os.system(f'ffmpeg -i {args.input_path} -qscale:v 1 -qmin 1 -qmax 1 -vsync 0 {input_path} /frame%08d.png')
# load data and inference
imgs_list = sorted(glob.glob(os.path.join(input_path, '*')))
num_imgs = len(imgs_list)
if len(imgs_list) <= args.interval: # too many images may cause CUDA out of memory
imgs, imgnames = read_img_seq(imgs_list, return_imgname=True)
imgs = imgs.unsqueeze(0).to(device)
inference(imgs, imgnames, model, args.save_path)
else:
for idx in range(0, num_imgs, args.interval):
interval = min(args.interval, num_imgs - idx)
imgs, imgnames = read_img_seq(imgs_list[idx:idx + interval], return_imgname=True)
imgs = imgs.unsqueeze(0).to(device)
inference(imgs, imgnames, model, args.save_path)
# delete ffmpeg output images
if use_ffmpeg:
shutil.rmtree(input_path)
if __name__ == '__main__':
main()
import argparse
import glob
import numpy as np
import os
import torch
import torchvision.transforms as transforms
from skimage import io
from basicsr.archs.dfdnet_arch import DFDNet
from basicsr.utils import imwrite, tensor2img
try:
from facexlib.utils.face_restoration_helper import FaceRestoreHelper
except ImportError:
print('Please install facexlib: pip install facexlib')
# TODO: need to modify, as we have updated the FaceRestorationHelper
def get_part_location(landmarks):
"""Get part locations from landmarks."""
map_left_eye = list(np.hstack((range(17, 22), range(36, 42))))
map_right_eye = list(np.hstack((range(22, 27), range(42, 48))))
map_nose = list(range(29, 36))
map_mouth = list(range(48, 68))
# left eye
mean_left_eye = np.mean(landmarks[map_left_eye], 0) # (x, y)
half_len_left_eye = np.max(
(np.max(np.max(landmarks[map_left_eye], 0) - np.min(landmarks[map_left_eye], 0)) / 2, 16)) # A number
loc_left_eye = np.hstack((mean_left_eye - half_len_left_eye + 1, mean_left_eye + half_len_left_eye)).astype(int)
loc_left_eye = torch.from_numpy(loc_left_eye).unsqueeze(0)
# (1, 4), the four numbers forms two coordinates in the diagonal
# right eye
mean_right_eye = np.mean(landmarks[map_right_eye], 0)
half_len_right_eye = np.max(
(np.max(np.max(landmarks[map_right_eye], 0) - np.min(landmarks[map_right_eye], 0)) / 2, 16))
loc_right_eye = np.hstack(
(mean_right_eye - half_len_right_eye + 1, mean_right_eye + half_len_right_eye)).astype(int)
loc_right_eye = torch.from_numpy(loc_right_eye).unsqueeze(0)
# nose
mean_nose = np.mean(landmarks[map_nose], 0)
half_len_nose = np.max(
(np.max(np.max(landmarks[map_nose], 0) - np.min(landmarks[map_nose], 0)) / 2, 16)) # noqa: E126
loc_nose = np.hstack((mean_nose - half_len_nose + 1, mean_nose + half_len_nose)).astype(int)
loc_nose = torch.from_numpy(loc_nose).unsqueeze(0)
# mouth
mean_mouth = np.mean(landmarks[map_mouth], 0)
half_len_mouth = np.max(
(np.max(np.max(landmarks[map_mouth], 0) - np.min(landmarks[map_mouth], 0)) / 2, 16)) # noqa: E126
loc_mouth = np.hstack((mean_mouth - half_len_mouth + 1, mean_mouth + half_len_mouth)).astype(int)
loc_mouth = torch.from_numpy(loc_mouth).unsqueeze(0)
return loc_left_eye, loc_right_eye, loc_nose, loc_mouth
if __name__ == '__main__':
"""We try to align to the official codes. But there are still slight
differences: 1) we use dlib for 68 landmark detection; 2) the used image
package are different (especially for reading and writing.)
"""
device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')
parser = argparse.ArgumentParser()
parser.add_argument('--upscale_factor', type=int, default=2)
parser.add_argument(
'--model_path',
type=str,
default= # noqa: E251
'experiments/pretrained_models/DFDNet/DFDNet_official-d1fa5650.pth')
parser.add_argument(
'--dict_path',
type=str,
default= # noqa: E251
'experiments/pretrained_models/DFDNet/DFDNet_dict_512-f79685f0.pth')
parser.add_argument('--test_path', type=str, default='datasets/TestWhole')
parser.add_argument('--upsample_num_times', type=int, default=1)
parser.add_argument('--save_inverse_affine', action='store_true')
parser.add_argument('--only_keep_largest', action='store_true')
# The official codes use skimage.io to read the cropped images from disk
# instead of directly using the intermediate results in the memory (as we
# do). Such a different operation brings slight differences due to
# skimage.io. For aligning with the official results, we could set the
# official_adaption to True.
parser.add_argument('--official_adaption', type=bool, default=True)
# The following are the paths for dlib models
parser.add_argument(
'--detection_path',
type=str,
default= # noqa: E251
'experiments/pretrained_models/dlib/mmod_human_face_detector-4cb19393.dat' # noqa: E501
)
parser.add_argument(
'--landmark5_path',
type=str,
default= # noqa: E251
'experiments/pretrained_models/dlib/shape_predictor_5_face_landmarks-c4b1e980.dat' # noqa: E501
)
parser.add_argument(
'--landmark68_path',
type=str,
default= # noqa: E251
'experiments/pretrained_models/dlib/shape_predictor_68_face_landmarks-fbdc2cb8.dat' # noqa: E501
)
args = parser.parse_args()
if args.test_path.endswith('/'): # solve when path ends with /
args.test_path = args.test_path[:-1]
result_root = f'results/DFDNet/{os.path.basename(args.test_path)}'
# set up the DFDNet
net = DFDNet(64, dict_path=args.dict_path).to(device)
checkpoint = torch.load(args.model_path, map_location=lambda storage, loc: storage)
net.load_state_dict(checkpoint['params'])
net.eval()
save_crop_root = os.path.join(result_root, 'cropped_faces')
save_inverse_affine_root = os.path.join(result_root, 'inverse_affine')
os.makedirs(save_inverse_affine_root, exist_ok=True)
save_restore_root = os.path.join(result_root, 'restored_faces')
save_final_root = os.path.join(result_root, 'final_results')
face_helper = FaceRestoreHelper(args.upscale_factor, face_size=512)
# scan all the jpg and png images
for img_path in sorted(glob.glob(os.path.join(args.test_path, '*.[jp][pn]g'))):
img_name = os.path.basename(img_path)
print(f'Processing {img_name} image ...')
save_crop_path = os.path.join(save_crop_root, img_name)
if args.save_inverse_affine:
save_inverse_affine_path = os.path.join(save_inverse_affine_root, img_name)
else:
save_inverse_affine_path = None
face_helper.init_dlib(args.detection_path, args.landmark5_path, args.landmark68_path)
# detect faces
num_det_faces = face_helper.detect_faces(
img_path, upsample_num_times=args.upsample_num_times, only_keep_largest=args.only_keep_largest)
# get 5 face landmarks for each face
num_landmarks = face_helper.get_face_landmarks_5()
print(f'\tDetect {num_det_faces} faces, {num_landmarks} landmarks.')
# warp and crop each face
face_helper.warp_crop_faces(save_crop_path, save_inverse_affine_path)
if args.official_adaption:
path, ext = os.path.splitext(save_crop_path)
paths = sorted(glob.glob(f'{path}_[0-9]*.png'))
cropped_faces = [io.imread(path) for path in paths]
else:
cropped_faces = face_helper.cropped_faces
# get 68 landmarks for each cropped face
num_landmarks = face_helper.get_face_landmarks_68()
print(f'\tDetect {num_landmarks} faces for 68 landmarks.')
face_helper.free_dlib_gpu_memory()
print('\tFace restoration ...')
# face restoration for each cropped face
assert len(cropped_faces) == len(face_helper.all_landmarks_68)
for idx, (cropped_face, landmarks) in enumerate(zip(cropped_faces, face_helper.all_landmarks_68)):
if landmarks is None:
print(f'Landmarks is None, skip cropped faces with idx {idx}.')
# just copy the cropped faces to the restored faces
restored_face = cropped_face
else:
# prepare data
part_locations = get_part_location(landmarks)
cropped_face = transforms.ToTensor()(cropped_face)
cropped_face = transforms.Normalize((0.5, 0.5, 0.5), (0.5, 0.5, 0.5))(cropped_face)
cropped_face = cropped_face.unsqueeze(0).to(device)
try:
with torch.no_grad():
output = net(cropped_face, part_locations)
restored_face = tensor2img(output, min_max=(-1, 1))
del output
torch.cuda.empty_cache()
except Exception as e:
print(f'DFDNet inference fail: {e}')
restored_face = tensor2img(cropped_face, min_max=(-1, 1))
path = os.path.splitext(os.path.join(save_restore_root, img_name))[0]
save_path = f'{path}_{idx:02d}.png'
imwrite(restored_face, save_path)
face_helper.add_restored_face(restored_face)
print('\tGenerate the final result ...')
# paste each restored face to the input image
face_helper.paste_faces_to_input_image(os.path.join(save_final_root, img_name))
# clean all the intermediate results to process the next image
face_helper.clean_all()
print(f'\nAll results are saved in {result_root}')
import argparse
import cv2
import glob
import numpy as np
import os
import torch
from basicsr.archs.rrdbnet_arch import RRDBNet
def main():
parser = argparse.ArgumentParser()
parser.add_argument(
'--model_path',
type=str,
default= # noqa: E251
'experiments/pretrained_models/ESRGAN/ESRGAN_SRx4_DF2KOST_official-ff704c30.pth' # noqa: E501
)
parser.add_argument('--input', type=str, default='datasets/Set14/LRbicx4', help='input test image folder')
parser.add_argument('--output', type=str, default='results/ESRGAN', help='output folder')
args = parser.parse_args()
device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')
# set up model
model = RRDBNet(num_in_ch=3, num_out_ch=3, num_feat=64, num_block=23, num_grow_ch=32)
model.load_state_dict(torch.load(args.model_path)['params'], strict=True)
model.eval()
model = model.to(device)
os.makedirs(args.output, exist_ok=True)
for idx, path in enumerate(sorted(glob.glob(os.path.join(args.input, '*')))):
imgname = os.path.splitext(os.path.basename(path))[0]
print('Testing', idx, imgname)
# read image
img = cv2.imread(path, cv2.IMREAD_COLOR).astype(np.float32) / 255.
img = torch.from_numpy(np.transpose(img[:, :, [2, 1, 0]], (2, 0, 1))).float()
img = img.unsqueeze(0).to(device)
# inference
try:
with torch.no_grad():
output = model(img)
except Exception as error:
print('Error', error, imgname)
else:
# save image
output = output.data.squeeze().float().cpu().clamp_(0, 1).numpy()
output = np.transpose(output[[2, 1, 0], :, :], (1, 2, 0))
output = (output * 255.0).round().astype(np.uint8)
cv2.imwrite(os.path.join(args.output, f'{imgname}_ESRGAN.png'), output)
if __name__ == '__main__':
main()
import argparse
import cv2
import glob
import numpy as np
import os
import torch
from tqdm import tqdm
from basicsr.archs.ridnet_arch import RIDNet
from basicsr.utils.img_util import img2tensor, tensor2img
if __name__ == '__main__':
device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')
parser = argparse.ArgumentParser()
parser.add_argument('--test_path', type=str, default='datasets/denoise/RNI15')
parser.add_argument('--noise_g', type=int, default=25)
parser.add_argument(
'--model_path',
type=str,
default= # noqa: E251
'experiments/pretrained_models/RIDNet/RIDNet.pth')
args = parser.parse_args()
if args.test_path.endswith('/'): # solve when path ends with /
args.test_path = args.test_path[:-1]
test_root = os.path.join(args.test_path, f'X{args.noise_g}')
result_root = f'results/RIDNet/{os.path.basename(args.test_path)}'
os.makedirs(result_root, exist_ok=True)
# set up the RIDNet
net = RIDNet(3, 64, 3).to(device)
checkpoint = torch.load(args.model_path, map_location=lambda storage, loc: storage)
net.load_state_dict(checkpoint)
net.eval()
# scan all the jpg and png images
img_list = sorted(glob.glob(os.path.join(test_root, '*.[jp][pn]g')))
pbar = tqdm(total=len(img_list), desc='')
for idx, img_path in enumerate(img_list):
img_name = os.path.basename(img_path).split('.')[0]
pbar.update(1)
pbar.set_description(f'{idx}: {img_name}')
# read image
img = cv2.imread(img_path, cv2.IMREAD_COLOR)
img = img2tensor(img, bgr2rgb=True, float32=True).unsqueeze(0).to(device)
# inference
with torch.no_grad():
output = net(img)
# save image
output = tensor2img(output, rgb2bgr=True, out_type=np.uint8, min_max=(0, 255))
save_img_path = os.path.join(result_root, f'{img_name}_x{args.noise_g}_RIDNet.png')
cv2.imwrite(save_img_path, output)
import argparse
import math
import os
import torch
from torchvision import utils
from basicsr.archs.stylegan2_arch import StyleGAN2Generator
from basicsr.utils import set_random_seed
def generate(args, g_ema, device, mean_latent, randomize_noise):
with torch.no_grad():
g_ema.eval()
for i in range(args.pics):
sample_z = torch.randn(args.sample, args.latent, device=device)
sample, _ = g_ema([sample_z],
truncation=args.truncation,
randomize_noise=randomize_noise,
truncation_latent=mean_latent)
utils.save_image(
sample,
f'samples/{str(i).zfill(6)}.png',
nrow=int(math.sqrt(args.sample)),
normalize=True,
range=(-1, 1),
)
if __name__ == '__main__':
device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')
parser = argparse.ArgumentParser()
parser.add_argument('--size', type=int, default=1024)
parser.add_argument('--sample', type=int, default=1)
parser.add_argument('--pics', type=int, default=1)
parser.add_argument('--truncation', type=float, default=1)
parser.add_argument('--truncation_mean', type=int, default=4096)
parser.add_argument(
'--ckpt',
type=str,
default= # noqa: E251
'experiments/pretrained_models/StyleGAN/stylegan2_ffhq_config_f_1024_official-3ab41b38.pth' # noqa: E501
)
parser.add_argument('--channel_multiplier', type=int, default=2)
parser.add_argument('--randomize_noise', type=bool, default=True)
args = parser.parse_args()
args.latent = 512
args.n_mlp = 8
os.makedirs('samples', exist_ok=True)
set_random_seed(2020)
g_ema = StyleGAN2Generator(
args.size, args.latent, args.n_mlp, channel_multiplier=args.channel_multiplier).to(device)
checkpoint = torch.load(args.ckpt)['params_ema']
g_ema.load_state_dict(checkpoint)
if args.truncation < 1:
with torch.no_grad():
mean_latent = g_ema.mean_latent(args.truncation_mean)
else:
mean_latent = None
generate(args, g_ema, device, mean_latent, args.randomize_noise)
# Modified from https://github.com/JingyunLiang/SwinIR
import argparse
import cv2
import glob
import numpy as np
import os
import torch
from torch.nn import functional as F
from basicsr.archs.swinir_arch import SwinIR
def main():
parser = argparse.ArgumentParser()
parser.add_argument('--input', type=str, default='datasets/Set5/LRbicx4', help='input test image folder')
parser.add_argument('--output', type=str, default='results/SwinIR/Set5', help='output folder')
parser.add_argument(
'--task',
type=str,
default='classical_sr',
help='classical_sr, lightweight_sr, real_sr, gray_dn, color_dn, jpeg_car')
# dn: denoising; car: compression artifact removal
# TODO: it now only supports sr, need to adapt to dn and jpeg_car
parser.add_argument('--patch_size', type=int, default=64, help='training patch size')
parser.add_argument('--scale', type=int, default=4, help='scale factor: 1, 2, 3, 4, 8') # 1 for dn and jpeg car
parser.add_argument('--noise', type=int, default=15, help='noise level: 15, 25, 50')
parser.add_argument('--jpeg', type=int, default=40, help='scale factor: 10, 20, 30, 40')
parser.add_argument('--large_model', action='store_true', help='Use large model, only used for real image sr')
parser.add_argument(
'--model_path',
type=str,
default='experiments/pretrained_models/SwinIR/001_classicalSR_DF2K_s64w8_SwinIR-M_x4.pth')
args = parser.parse_args()
os.makedirs(args.output, exist_ok=True)
device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')
# set up model
model = define_model(args)
model.eval()
model = model.to(device)
if args.task == 'jpeg_car':
window_size = 7
else:
window_size = 8
for idx, path in enumerate(sorted(glob.glob(os.path.join(args.input, '*')))):
# read image
imgname = os.path.splitext(os.path.basename(path))[0]
print('Testing', idx, imgname)
# read image
img = cv2.imread(path, cv2.IMREAD_COLOR).astype(np.float32) / 255.
img = torch.from_numpy(np.transpose(img[:, :, [2, 1, 0]], (2, 0, 1))).float()
img = img.unsqueeze(0).to(device)
# inference
with torch.no_grad():
# pad input image to be a multiple of window_size
mod_pad_h, mod_pad_w = 0, 0
_, _, h, w = img.size()
if h % window_size != 0:
mod_pad_h = window_size - h % window_size
if w % window_size != 0:
mod_pad_w = window_size - w % window_size
img = F.pad(img, (0, mod_pad_w, 0, mod_pad_h), 'reflect')
output = model(img)
_, _, h, w = output.size()
output = output[:, :, 0:h - mod_pad_h * args.scale, 0:w - mod_pad_w * args.scale]
# save image
output = output.data.squeeze().float().cpu().clamp_(0, 1).numpy()
if output.ndim == 3:
output = np.transpose(output[[2, 1, 0], :, :], (1, 2, 0))
output = (output * 255.0).round().astype(np.uint8)
cv2.imwrite(os.path.join(args.output, f'{imgname}_SwinIR.png'), output)
def define_model(args):
# 001 classical image sr
if args.task == 'classical_sr':
model = SwinIR(
upscale=args.scale,
in_chans=3,
img_size=args.patch_size,
window_size=8,
img_range=1.,
depths=[6, 6, 6, 6, 6, 6],
embed_dim=180,
num_heads=[6, 6, 6, 6, 6, 6],
mlp_ratio=2,
upsampler='pixelshuffle',
resi_connection='1conv')
# 002 lightweight image sr
# use 'pixelshuffledirect' to save parameters
elif args.task == 'lightweight_sr':
model = SwinIR(
upscale=args.scale,
in_chans=3,
img_size=64,
window_size=8,
img_range=1.,
depths=[6, 6, 6, 6],
embed_dim=60,
num_heads=[6, 6, 6, 6],
mlp_ratio=2,
upsampler='pixelshuffledirect',
resi_connection='1conv')
# 003 real-world image sr
elif args.task == 'real_sr':
if not args.large_model:
# use 'nearest+conv' to avoid block artifacts
model = SwinIR(
upscale=4,
in_chans=3,
img_size=64,
window_size=8,
img_range=1.,
depths=[6, 6, 6, 6, 6, 6],
embed_dim=180,
num_heads=[6, 6, 6, 6, 6, 6],
mlp_ratio=2,
upsampler='nearest+conv',
resi_connection='1conv')
else:
# larger model size; use '3conv' to save parameters and memory; use ema for GAN training
model = SwinIR(
upscale=4,
in_chans=3,
img_size=64,
window_size=8,
img_range=1.,
depths=[6, 6, 6, 6, 6, 6, 6, 6, 6],
embed_dim=248,
num_heads=[8, 8, 8, 8, 8, 8, 8, 8, 8],
mlp_ratio=2,
upsampler='nearest+conv',
resi_connection='3conv')
# 004 grayscale image denoising
elif args.task == 'gray_dn':
model = SwinIR(
upscale=1,
in_chans=1,
img_size=128,
window_size=8,
img_range=1.,
depths=[6, 6, 6, 6, 6, 6],
embed_dim=180,
num_heads=[6, 6, 6, 6, 6, 6],
mlp_ratio=2,
upsampler='',
resi_connection='1conv')
# 005 color image denoising
elif args.task == 'color_dn':
model = SwinIR(
upscale=1,
in_chans=3,
img_size=128,
window_size=8,
img_range=1.,
depths=[6, 6, 6, 6, 6, 6],
embed_dim=180,
num_heads=[6, 6, 6, 6, 6, 6],
mlp_ratio=2,
upsampler='',
resi_connection='1conv')
# 006 JPEG compression artifact reduction
# use window_size=7 because JPEG encoding uses 8x8; use img_range=255 because it's slightly better than 1
elif args.task == 'jpeg_car':
model = SwinIR(
upscale=1,
in_chans=1,
img_size=126,
window_size=7,
img_range=255.,
depths=[6, 6, 6, 6, 6, 6],
embed_dim=180,
num_heads=[6, 6, 6, 6, 6, 6],
mlp_ratio=2,
upsampler='',
resi_connection='1conv')
loadnet = torch.load(args.model_path)
if 'params_ema' in loadnet:
keyname = 'params_ema'
else:
keyname = 'params'
model.load_state_dict(loadnet[keyname], strict=True)
return model
if __name__ == '__main__':
main()
name: BasicVSR_REDS
model_type: VideoRecurrentModel
scale: 4
num_gpu: 1 # set num_gpu: 0 for cpu mode
manual_seed: 0
datasets:
test_1: # the 1st test dataset
name: REDS4
type: VideoRecurrentTestDataset
dataroot_gt: datasets/REDS4/GT
dataroot_lq: datasets/REDS4/sharp_bicubic
cache_data: True
io_backend:
type: disk
num_frame: -1 # not needed
# network structures
network_g:
type: BasicVSR
num_feat: 64
num_block: 30
spynet_path: experiments/pretrained_models/flownet/spynet_sintel_final-3d2a1287.pth
# path
path:
pretrain_network_g: experiments/pretrained_models/BasicVSR/BasicVSR_REDS4-543c8261.pth
strict_load_g: true
# validation settings
val:
save_img: true
suffix: ~ # add suffix to saved images, if None, use exp name
metrics:
psnr: # metric name, can be arbitrary
type: calculate_psnr
crop_border: 0
test_y_channel: false
ssim:
type: calculate_ssim
crop_border: 0
test_y_channel: false
name: BasicVSR_Vimeo90K_BIx4
model_type: VideoRecurrentModel
scale: 4
num_gpu: 1 # set num_gpu: 0 for cpu mode
manual_seed: 0
datasets:
test_1: # the 1st test dataset
name: Vimeo90K
type: VideoTestVimeo90KDataset
dataroot_gt: datasets/vimeo90k/vimeo_septuplet/sequences
dataroot_lq: datasets/vimeo90k/BDx4
meta_info_file: basicsr/data/meta_info/meta_info_Vimeo90K_test_GT.txt
cache_data: false
io_backend:
type: disk
num_frame: 7
# network structures
network_g:
type: BasicVSR
num_feat: 64
num_block: 30
spynet_path: experiments/pretrained_models/flownet/spynet_sintel_final-3d2a1287.pth
# path
path:
pretrain_network_g: experiments/pretrained_models/BasicVSR/BasicVSR_Vimeo90K_BDx4-e9bf46eb.pth
strict_load_g: true
# validation settings
val:
save_img: true
flip_seq: true
center_frame_only: true
suffix: ~ # add suffix to saved images, if None, use exp name
metrics:
psnr: # metric name, can be arbitrary
type: calculate_psnr
crop_border: 0
test_y_channel: true
ssim:
type: calculate_ssim
crop_border: 0
test_y_channel: true
name: BasicVSR_Vimeo90K_BIx4
model_type: VideoRecurrentModel
scale: 4
num_gpu: 1 # set num_gpu: 0 for cpu mode
manual_seed: 0
datasets:
test_1: # the 1st test dataset
name: Vimeo90K
type: VideoTestVimeo90KDataset
dataroot_gt: datasets/vimeo90k/vimeo_septuplet/sequences
dataroot_lq: datasets/vimeo90k/BIx4
meta_info_file: basicsr/data/meta_info/meta_info_Vimeo90K_test_GT.txt
cache_data: false
io_backend:
type: disk
num_frame: 7
# network structures
network_g:
type: BasicVSR
num_feat: 64
num_block: 30
spynet_path: experiments/pretrained_models/flownet/spynet_sintel_final-3d2a1287.pth
# path
path:
pretrain_network_g: experiments/pretrained_models/BasicVSR/BasicVSR_Vimeo90K_BIx4-2a29695a.pth
strict_load_g: true
# validation settings
val:
save_img: true
flip_seq: true
center_frame_only: true
suffix: ~ # add suffix to saved images, if None, use exp name
metrics:
psnr: # metric name, can be arbitrary
type: calculate_psnr
crop_border: 0
test_y_channel: true
ssim:
type: calculate_ssim
crop_border: 0
test_y_channel: true
name: IconVSR_REDS
model_type: VideoRecurrentModel
scale: 4
num_gpu: 1 # set num_gpu: 0 for cpu mode
manual_seed: 0
datasets:
test_1: # the 1st test dataset
name: REDS4
type: VideoRecurrentTestDataset
dataroot_gt: datasets/REDS4/GT
dataroot_lq: datasets/REDS4/sharp_bicubic
cache_data: True
io_backend:
type: disk
num_frame: -1 # not needed
# network structures
network_g:
type: IconVSR
num_feat: 64
num_block: 30
keyframe_stride: 5
temporal_padding: 2
spynet_path: experiments/pretrained_models/flownet/spynet_sintel_final-3d2a1287.pth
edvr_path: experiments/pretrained_models/BasicVSR/EDVR_REDS_pretrained_for_IconVSR-f62a2f1e.pth
# path
path:
pretrain_network_g: experiments/pretrained_models/BasicVSR/IconVSR_REDS-aaa5367f.pth
strict_load_g: true
# validation settings
val:
save_img: true
suffix: ~ # add suffix to saved images, if None, use exp name
metrics:
psnr: # metric name, can be arbitrary
type: calculate_psnr
crop_border: 0
test_y_channel: false
ssim:
type: calculate_ssim
crop_border: 0
test_y_channel: false
name: IconVSR_Vimeo90K_BDx4
model_type: VideoRecurrentModel
scale: 4
num_gpu: 1 # set num_gpu: 0 for cpu mode
manual_seed: 0
datasets:
test_1: # the 1st test dataset
name: Vimeo90K
type: VideoTestVimeo90KDataset
dataroot_gt: datasets/vimeo90k/vimeo_septuplet/sequences
dataroot_lq: datasets/vimeo90k/BDx4
meta_info_file: basicsr/data/meta_info/meta_info_Vimeo90K_test_GT.txt
cache_data: false
io_backend:
type: disk
num_frame: 7
# network structures
network_g:
type: IconVSR
num_feat: 64
num_block: 30
keyframe_stride: 5
temporal_padding: 3
spynet_path: experiments/pretrained_models/flownet/spynet_sintel_final-3d2a1287.pth
edvr_path: experiments/pretrained_models/BasicVSR/EDVR_Vimeo90K_pretrained_for_IconVSR-ee48ee92.pth
# path
path:
pretrain_network_g: experiments/pretrained_models/BasicVSR/IconVSR_Vimeo90K_BDx4-cfcb7e00.pth
strict_load_g: true
# validation settings
val:
save_img: true
flip_seq: true
center_frame_only: true
suffix: ~ # add suffix to saved images, if None, use exp name
metrics:
psnr: # metric name, can be arbitrary
type: calculate_psnr
crop_border: 0
test_y_channel: true
ssim:
type: calculate_ssim
crop_border: 0
test_y_channel: true
name: IconVSR_Vimeo90K_BIx4
model_type: VideoRecurrentModel
scale: 4
num_gpu: 1 # set num_gpu: 0 for cpu mode
manual_seed: 0
datasets:
test_1: # the 1st test dataset
name: Vimeo90K
type: VideoTestVimeo90KDataset
dataroot_gt: datasets/vimeo90k/vimeo_septuplet/sequences
dataroot_lq: datasets/vimeo90k/BIx4
meta_info_file: basicsr/data/meta_info/meta_info_Vimeo90K_test_GT.txt
cache_data: false
io_backend:
type: disk
num_frame: 7
# network structures
network_g:
type: IconVSR
num_feat: 64
num_block: 30
keyframe_stride: 5
temporal_padding: 3
spynet_path: experiments/pretrained_models/flownet/spynet_sintel_final-3d2a1287.pth
edvr_path: experiments/pretrained_models/BasicVSR/EDVR_Vimeo90K_pretrained_for_IconVSR-ee48ee92.pth
# path
path:
pretrain_network_g: experiments/pretrained_models/BasicVSR/IconVSR_Vimeo90K_BIx4-35fec07c.pth
strict_load_g: true
# validation settings
val:
save_img: true
flip_seq: true
center_frame_only: true
suffix: ~ # add suffix to saved images, if None, use exp name
metrics:
psnr: # metric name, can be arbitrary
type: calculate_psnr
crop_border: 0
test_y_channel: true
ssim:
type: calculate_ssim
crop_border: 0
test_y_channel: true
name: DUF_x4_52L_official
model_type: VideoBaseModel
scale: 4
num_gpu: 8 # set num_gpu: 0 for cpu mode
manual_seed: 0
datasets:
test:
name: Vid4
type: VideoTestDUFDataset
dataroot_gt: datasets/Vid4/GT
dataroot_lq: datasets/Vid4/BIx4
io_backend:
type: disk
cache_data: false
num_frame: 7
padding: reflection_circle
use_duf_downsampling: true
# network structures
network_g:
type: DUF
scale: 4
num_layer: 52
adapt_official_weights: true
# path
path:
pretrain_network_g: experiments/pretrained_models/DUF/DUF_x4_52L_official-483d2c78.pth
strict_load_g: true
# validation settings
val:
save_img: true
suffix: ~ # add suffix to saved images, if None, use exp name
metrics:
psnr_y: # metric name, can be arbitrary
type: calculate_psnr
crop_border: 8
test_y_channel: true
ssim_y:
type: calculate_ssim
crop_border: 8
test_y_channel: true
name: EDSR_Lx2_f256b32_DIV2K_official
model_type: SRModel
scale: 2
num_gpu: 1 # set num_gpu: 0 for cpu mode
manual_seed: 0
datasets:
test_1: # the 1st test dataset
name: Set5
type: PairedImageDataset
dataroot_gt: datasets/Set5/GTmod12
dataroot_lq: datasets/Set5/LRbicx2
io_backend:
type: disk
test_2: # the 2nd test dataset
name: Set14
type: PairedImageDataset
dataroot_gt: datasets/Set14/GTmod12
dataroot_lq: datasets/Set14/LRbicx2
io_backend:
type: disk
test_3:
name: DIV2K100
type: PairedImageDataset
dataroot_gt: datasets/DIV2K/DIV2K_valid_HR
dataroot_lq: datasets/DIV2K/DIV2K_valid_LR_bicubic/X2
filename_tmpl: '{}x2'
io_backend:
type: disk
# network structures
network_g:
type: EDSR
num_in_ch: 3
num_out_ch: 3
num_feat: 256
num_block: 32
upscale: 2
res_scale: 0.1
img_range: 255.
rgb_mean: [0.4488, 0.4371, 0.4040]
# path
path:
pretrain_network_g: experiments/pretrained_models/EDSR/EDSR_Lx2_f256b32_DIV2K_official-be38e77d.pth
strict_load_g: true
param_key_g: params_ema
# validation settings
val:
save_img: true
suffix: ~ # add suffix to saved images, if None, use exp name
metrics:
psnr: # metric name, can be arbitrary
type: calculate_psnr
crop_border: 2
test_y_channel: false
ssim:
type: calculate_ssim
crop_border: 2
test_y_channel: false
name: EDSR_Lx3_f256b32_DIV2K_official
model_type: SRModel
scale: 3
num_gpu: 1 # set num_gpu: 0 for cpu mode
manual_seed: 0
datasets:
test_1: # the 1st test dataset
name: Set5
type: PairedImageDataset
dataroot_gt: datasets/Set5/GTmod12
dataroot_lq: datasets/Set5/LRbicx3
io_backend:
type: disk
test_2: # the 2nd test dataset
name: Set14
type: PairedImageDataset
dataroot_gt: datasets/Set14/GTmod12
dataroot_lq: datasets/Set14/LRbicx3
io_backend:
type: disk
test_3:
name: DIV2K100
type: PairedImageDataset
dataroot_gt: datasets/DIV2K/DIV2K_valid_HR
dataroot_lq: datasets/DIV2K/DIV2K_valid_LR_bicubic/X3
filename_tmpl: '{}x3'
io_backend:
type: disk
# network structures
network_g:
type: EDSR
num_in_ch: 3
num_out_ch: 3
num_feat: 256
num_block: 32
upscale: 3
res_scale: 0.1
img_range: 255.
rgb_mean: [0.4488, 0.4371, 0.4040]
# path
path:
pretrain_network_g: experiments/pretrained_models/EDSR/EDSR_Lx3_f256b32_DIV2K_official-3660f70d.pth
strict_load_g: true
# validation settings
val:
save_img: true
suffix: ~ # add suffix to saved images, if None, use exp name
metrics:
psnr: # metric name, can be arbitrary
type: calculate_psnr
crop_border: 3
test_y_channel: false
ssim:
type: calculate_ssim
crop_border: 3
test_y_channel: false
name: EDSR_Lx4_f256b32_DIV2K_official
model_type: SRModel
scale: 4
num_gpu: 1 # set num_gpu: 0 for cpu mode
manual_seed: 0
datasets:
test_1: # the 1st test dataset
name: Set5
type: PairedImageDataset
dataroot_gt: datasets/Set5/GTmod12
dataroot_lq: datasets/Set5/LRbicx4
io_backend:
type: disk
test_2: # the 2nd test dataset
name: Set14
type: PairedImageDataset
dataroot_gt: datasets/Set14/GTmod12
dataroot_lq: datasets/Set14/LRbicx4
io_backend:
type: disk
test_3:
name: DIV2K100
type: PairedImageDataset
dataroot_gt: datasets/DIV2K/DIV2K_valid_HR
dataroot_lq: datasets/DIV2K/DIV2K_valid_LR_bicubic/X4
filename_tmpl: '{}x4'
io_backend:
type: disk
# network structures
network_g:
type: EDSR
num_in_ch: 3
num_out_ch: 3
num_feat: 256
num_block: 32
upscale: 4
res_scale: 0.1
img_range: 255.
rgb_mean: [0.4488, 0.4371, 0.4040]
# path
path:
pretrain_network_g: experiments/pretrained_models/EDSR/EDSR_Lx4_f256b32_DIV2K_official-76ee1c8f.pth
strict_load_g: true
# validation settings
val:
save_img: true
suffix: ~ # add suffix to saved images, if None, use exp name
metrics:
psnr: # metric name, can be arbitrary
type: calculate_psnr
crop_border: 4
test_y_channel: false
ssim:
type: calculate_ssim
crop_border: 4
test_y_channel: false
name: EDSR_Mx2_f64b16_DIV2K_official
model_type: SRModel
scale: 2
num_gpu: 1 # set num_gpu: 0 for cpu mode
manual_seed: 0
datasets:
test_1: # the 1st test dataset
name: Set5
type: PairedImageDataset
dataroot_gt: datasets/Set5/GTmod12
dataroot_lq: datasets/Set5/LRbicx2
io_backend:
type: disk
test_2: # the 2nd test dataset
name: Set14
type: PairedImageDataset
dataroot_gt: datasets/Set14/GTmod12
dataroot_lq: datasets/Set14/LRbicx2
io_backend:
type: disk
test_3:
name: DIV2K100
type: PairedImageDataset
dataroot_gt: datasets/DIV2K/DIV2K_valid_HR
dataroot_lq: datasets/DIV2K/DIV2K_valid_LR_bicubic/X2
filename_tmpl: '{}x2'
io_backend:
type: disk
# network structures
network_g:
type: EDSR
num_in_ch: 3
num_out_ch: 3
num_feat: 64
num_block: 16
upscale: 2
res_scale: 1
img_range: 255.
rgb_mean: [0.4488, 0.4371, 0.4040]
# path
path:
pretrain_network_g: experiments/pretrained_models/EDSR/EDSR_Mx2_f64b16_DIV2K_official-3ba7b086.pth
strict_load_g: true
# validation settings
val:
save_img: true
suffix: ~ # add suffix to saved images, if None, use exp name
metrics:
psnr: # metric name, can be arbitrary
type: calculate_psnr
crop_border: 2
test_y_channel: false
ssim:
type: calculate_ssim
crop_border: 2
test_y_channel: false
name: EDSR_Mx3_f64b16_DIV2K_official
model_type: SRModel
scale: 3
num_gpu: 1 # set num_gpu: 0 for cpu mode
manual_seed: 0
datasets:
test_1: # the 1st test dataset
name: Set5
type: PairedImageDataset
dataroot_gt: datasets/Set5/GTmod12
dataroot_lq: datasets/Set5/LRbicx3
io_backend:
type: disk
test_2: # the 2nd test dataset
name: Set14
type: PairedImageDataset
dataroot_gt: datasets/Set14/GTmod12
dataroot_lq: datasets/Set14/LRbicx3
io_backend:
type: disk
test_3:
name: DIV2K100
type: PairedImageDataset
dataroot_gt: datasets/DIV2K/DIV2K_valid_HR
dataroot_lq: datasets/DIV2K/DIV2K_valid_LR_bicubic/X3
filename_tmpl: '{}x3'
io_backend:
type: disk
# network structures
network_g:
type: EDSR
num_in_ch: 3
num_out_ch: 3
num_feat: 64
num_block: 16
upscale: 3
res_scale: 1
img_range: 255.
rgb_mean: [0.4488, 0.4371, 0.4040]
# path
path:
pretrain_network_g: experiments/pretrained_models/EDSR/EDSR_Mx3_f64b16_DIV2K_official-6908f88a.pth
strict_load_g: true
# validation settings
val:
save_img: true
suffix: ~ # add suffix to saved images, if None, use exp name
metrics:
psnr: # metric name, can be arbitrary
type: calculate_psnr
crop_border: 3
test_y_channel: false
ssim:
type: calculate_ssim
crop_border: 3
test_y_channel: false
Markdown is supported
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment