Commit 8f9687f5 authored by mashun1's avatar mashun1
Browse files

ridcp

parents
Pipeline #617 canceled with stages
import argparse
import math
import numpy as np
import torch
from torch.utils.data import DataLoader
from basicsr.data import build_dataset
from basicsr.metrics.fid import extract_inception_features, load_patched_inception_v3
def calculate_stats_from_dataset():
device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')
parser = argparse.ArgumentParser()
parser.add_argument('--num_sample', type=int, default=50000)
parser.add_argument('--batch_size', type=int, default=64)
parser.add_argument('--size', type=int, default=512)
parser.add_argument('--dataroot', type=str, default='datasets/ffhq')
args = parser.parse_args()
# inception model
inception = load_patched_inception_v3(device)
# create dataset
opt = {}
opt['name'] = 'FFHQ'
opt['type'] = 'FFHQDataset'
opt['dataroot_gt'] = f'datasets/ffhq/ffhq_{args.size}.lmdb'
opt['io_backend'] = dict(type='lmdb')
opt['use_hflip'] = False
opt['mean'] = [0.5, 0.5, 0.5]
opt['std'] = [0.5, 0.5, 0.5]
dataset = build_dataset(opt)
# create dataloader
data_loader = DataLoader(
dataset=dataset, batch_size=args.batch_size, shuffle=False, num_workers=4, sampler=None, drop_last=False)
total_batch = math.ceil(args.num_sample / args.batch_size)
def data_generator(data_loader, total_batch):
for idx, data in enumerate(data_loader):
if idx >= total_batch:
break
else:
yield data['gt']
features = extract_inception_features(data_generator(data_loader, total_batch), inception, total_batch, device)
features = features.numpy()
total_len = features.shape[0]
features = features[:args.num_sample]
print(f'Extracted {total_len} features, use the first {features.shape[0]} features to calculate stats.')
mean = np.mean(features, 0)
cov = np.cov(features, rowvar=False)
save_path = f'inception_{opt["name"]}_{args.size}.pth'
torch.save(
dict(name=opt['name'], size=args.size, mean=mean, cov=cov), save_path, _use_new_zipfile_serialization=False)
if __name__ == '__main__':
calculate_stats_from_dataset()
import cv2
import glob
import numpy as np
import os.path as osp
from torchvision.transforms.functional import normalize
from basicsr.utils import img2tensor
try:
import lpips
except ImportError:
print('Please install lpips: pip install lpips')
def main():
# Configurations
# -------------------------------------------------------------------------
folder_gt = 'datasets/celeba/celeba_512_validation'
folder_restored = 'datasets/celeba/celeba_512_validation_lq'
# crop_border = 4
suffix = ''
# -------------------------------------------------------------------------
loss_fn_vgg = lpips.LPIPS(net='vgg').cuda() # RGB, normalized to [-1,1]
lpips_all = []
img_list = sorted(glob.glob(osp.join(folder_gt, '*')))
mean = [0.5, 0.5, 0.5]
std = [0.5, 0.5, 0.5]
for i, img_path in enumerate(img_list):
basename, ext = osp.splitext(osp.basename(img_path))
img_gt = cv2.imread(img_path, cv2.IMREAD_UNCHANGED).astype(np.float32) / 255.
img_restored = cv2.imread(osp.join(folder_restored, basename + suffix + ext), cv2.IMREAD_UNCHANGED).astype(
np.float32) / 255.
img_gt, img_restored = img2tensor([img_gt, img_restored], bgr2rgb=True, float32=True)
# norm to [-1, 1]
normalize(img_gt, mean, std, inplace=True)
normalize(img_restored, mean, std, inplace=True)
# calculate lpips
lpips_val = loss_fn_vgg(img_restored.unsqueeze(0).cuda(), img_gt.unsqueeze(0).cuda())
print(f'{i+1:3d}: {basename:25}. \tLPIPS: {lpips_val:.6f}.')
lpips_all.append(lpips_val)
print(f'Average: LPIPS: {sum(lpips_all) / len(lpips_all):.6f}')
if __name__ == '__main__':
main()
import argparse
import cv2
import os
import warnings
from basicsr.metrics import calculate_niqe
from basicsr.utils import scandir
def main(args):
niqe_all = []
img_list = sorted(scandir(args.input, recursive=True, full_path=True))
for i, img_path in enumerate(img_list):
basename, _ = os.path.splitext(os.path.basename(img_path))
img = cv2.imread(img_path, cv2.IMREAD_UNCHANGED)
with warnings.catch_warnings():
warnings.simplefilter('ignore', category=RuntimeWarning)
niqe_score = calculate_niqe(img, args.crop_border, input_order='HWC', convert_to='y')
print(f'{i+1:3d}: {basename:25}. \tNIQE: {niqe_score:.6f}')
niqe_all.append(niqe_score)
print(args.input)
print(f'Average: NIQE: {sum(niqe_all) / len(niqe_all):.6f}')
if __name__ == '__main__':
parser = argparse.ArgumentParser()
parser.add_argument('--input', type=str, default='datasets/val_set14/Set14', help='Input path')
parser.add_argument('--crop_border', type=int, default=0, help='Crop border for each side')
args = parser.parse_args()
main(args)
import argparse
import cv2
import numpy as np
from os import path as osp
from basicsr.metrics import calculate_psnr, calculate_ssim
from basicsr.utils import scandir
from basicsr.utils.matlab_functions import bgr2ycbcr
def main(args):
"""Calculate PSNR and SSIM for images.
"""
psnr_all = []
ssim_all = []
img_list_gt = sorted(list(scandir(args.gt, recursive=True, full_path=True)))
img_list_restored = sorted(list(scandir(args.restored, recursive=True, full_path=True)))
if args.test_y_channel:
print('Testing Y channel.')
else:
print('Testing RGB channels.')
for i, img_path in enumerate(img_list_gt):
basename, ext = osp.splitext(osp.basename(img_path))
img_gt = cv2.imread(img_path, cv2.IMREAD_UNCHANGED).astype(np.float32) / 255.
if args.suffix == '':
img_path_restored = img_list_restored[i]
else:
img_path_restored = osp.join(args.restored, basename + args.suffix + ext)
img_restored = cv2.imread(img_path_restored, cv2.IMREAD_UNCHANGED).astype(np.float32) / 255.
if args.correct_mean_var:
mean_l = []
std_l = []
for j in range(3):
mean_l.append(np.mean(img_gt[:, :, j]))
std_l.append(np.std(img_gt[:, :, j]))
for j in range(3):
# correct twice
mean = np.mean(img_restored[:, :, j])
img_restored[:, :, j] = img_restored[:, :, j] - mean + mean_l[j]
std = np.std(img_restored[:, :, j])
img_restored[:, :, j] = img_restored[:, :, j] / std * std_l[j]
mean = np.mean(img_restored[:, :, j])
img_restored[:, :, j] = img_restored[:, :, j] - mean + mean_l[j]
std = np.std(img_restored[:, :, j])
img_restored[:, :, j] = img_restored[:, :, j] / std * std_l[j]
if args.test_y_channel and img_gt.ndim == 3 and img_gt.shape[2] == 3:
img_gt = bgr2ycbcr(img_gt, y_only=True)
img_restored = bgr2ycbcr(img_restored, y_only=True)
# calculate PSNR and SSIM
psnr = calculate_psnr(img_gt * 255, img_restored * 255, crop_border=args.crop_border, input_order='HWC')
ssim = calculate_ssim(img_gt * 255, img_restored * 255, crop_border=args.crop_border, input_order='HWC')
print(f'{i+1:3d}: {basename:25}. \tPSNR: {psnr:.6f} dB, \tSSIM: {ssim:.6f}')
psnr_all.append(psnr)
ssim_all.append(ssim)
print(args.gt)
print(args.restored)
print(f'Average: PSNR: {sum(psnr_all) / len(psnr_all):.6f} dB, SSIM: {sum(ssim_all) / len(ssim_all):.6f}')
if __name__ == '__main__':
parser = argparse.ArgumentParser()
parser.add_argument('--gt', type=str, default='datasets/val_set14/Set14', help='Path to gt (Ground-Truth)')
parser.add_argument('--restored', type=str, default='results/Set14', help='Path to restored images')
parser.add_argument('--crop_border', type=int, default=0, help='Crop border for each side')
parser.add_argument('--suffix', type=str, default='', help='Suffix for restored images')
parser.add_argument(
'--test_y_channel',
action='store_true',
help='If True, test Y channel (In MatLab YCbCr format). If False, test RGB channels.')
parser.add_argument('--correct_mean_var', action='store_true', help='Correct the mean and var of restored images.')
args = parser.parse_args()
main(args)
import argparse
import math
import numpy as np
import torch
from torch import nn
from basicsr.archs.stylegan2_arch import StyleGAN2Generator
from basicsr.metrics.fid import calculate_fid, extract_inception_features, load_patched_inception_v3
def calculate_stylegan2_fid():
device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')
parser = argparse.ArgumentParser()
parser.add_argument('ckpt', type=str, help='Path to the stylegan2 checkpoint.')
parser.add_argument('fid_stats', type=str, help='Path to the dataset fid statistics.')
parser.add_argument('--size', type=int, default=256)
parser.add_argument('--channel_multiplier', type=int, default=2)
parser.add_argument('--batch_size', type=int, default=64)
parser.add_argument('--num_sample', type=int, default=50000)
parser.add_argument('--truncation', type=float, default=1)
parser.add_argument('--truncation_mean', type=int, default=4096)
args = parser.parse_args()
# create stylegan2 model
generator = StyleGAN2Generator(
out_size=args.size,
num_style_feat=512,
num_mlp=8,
channel_multiplier=args.channel_multiplier,
resample_kernel=(1, 3, 3, 1))
generator.load_state_dict(torch.load(args.ckpt)['params_ema'])
generator = nn.DataParallel(generator).eval().to(device)
if args.truncation < 1:
with torch.no_grad():
truncation_latent = generator.mean_latent(args.truncation_mean)
else:
truncation_latent = None
# inception model
inception = load_patched_inception_v3(device)
total_batch = math.ceil(args.num_sample / args.batch_size)
def sample_generator(total_batch):
for _ in range(total_batch):
with torch.no_grad():
latent = torch.randn(args.batch_size, 512, device=device)
samples, _ = generator([latent], truncation=args.truncation, truncation_latent=truncation_latent)
yield samples
features = extract_inception_features(sample_generator(total_batch), inception, total_batch, device)
features = features.numpy()
total_len = features.shape[0]
features = features[:args.num_sample]
print(f'Extracted {total_len} features, use the first {features.shape[0]} features to calculate stats.')
sample_mean = np.mean(features, 0)
sample_cov = np.cov(features, rowvar=False)
# load the dataset stats
stats = torch.load(args.fid_stats)
real_mean = stats['mean']
real_cov = stats['cov']
# calculate FID metric
fid = calculate_fid(sample_mean, sample_cov, real_mean, real_cov)
print('fid:', fid)
if __name__ == '__main__':
calculate_stylegan2_fid()
import importlib
from copy import deepcopy
from os import path as osp
from basicsr.utils import get_root_logger, scandir
from basicsr.utils.registry import MODEL_REGISTRY
__all__ = ['build_model']
# automatically scan and import model modules for registry
# scan all the files under the 'models' folder and collect files ending with
# '_model.py'
model_folder = osp.dirname(osp.abspath(__file__))
model_filenames = [osp.splitext(osp.basename(v))[0] for v in scandir(model_folder) if v.endswith('_model.py')]
# import all the model modules
_model_modules = [importlib.import_module(f'basicsr.models.{file_name}') for file_name in model_filenames]
def build_model(opt):
"""Build model from options.
Args:
opt (dict): Configuration. It must contain:
model_type (str): Model type.
"""
opt = deepcopy(opt)
model = MODEL_REGISTRY.get(opt['model_type'])(opt)
logger = get_root_logger()
logger.info(f'Model [{model.__class__.__name__}] is created.')
return model
import os
import time
import torch
from collections import OrderedDict
from copy import deepcopy
from torch.nn.parallel import DataParallel, DistributedDataParallel
from basicsr.models import lr_scheduler as lr_scheduler
from basicsr.utils import get_root_logger
from basicsr.utils.dist_util import master_only
class BaseModel():
"""Base model."""
def __init__(self, opt):
self.opt = opt
self.device = torch.device('cuda' if opt['num_gpu'] != 0 else 'cpu')
self.is_train = opt['is_train']
self.schedulers = []
self.optimizers = []
def feed_data(self, data):
pass
def optimize_parameters(self):
pass
def get_current_visuals(self):
pass
def save(self, epoch, current_iter):
"""Save networks and training state."""
pass
def validation(self, dataloader, current_iter, tb_logger, save_img=False, save_as_dir=None):
"""Validation function.
Args:
dataloader (torch.utils.data.DataLoader): Validation dataloader.
current_iter (int): Current iteration.
tb_logger (tensorboard logger): Tensorboard logger.
save_img (bool): Whether to save images. Default: False.
"""
if self.opt['dist']:
self.dist_validation(dataloader, current_iter, tb_logger, save_img, save_as_dir)
else:
self.nondist_validation(dataloader, current_iter, tb_logger, save_img, save_as_dir)
def _initialize_best_metric_results(self, dataset_name):
"""Initialize the best metric results dict for recording the best metric value and iteration."""
if hasattr(self, 'best_metric_results') and dataset_name in self.best_metric_results:
return
elif not hasattr(self, 'best_metric_results'):
self.best_metric_results = dict()
# add a dataset record
record = dict()
for metric, content in self.opt['val']['metrics'].items():
better = content.get('better', 'higher')
init_val = float('-inf') if better == 'higher' else float('inf')
record[metric] = dict(better=better, val=init_val, iter=-1)
self.best_metric_results[dataset_name] = record
def _update_metric_result(self, dataset_name, metric, val, current_iter):
self.best_metric_results[dataset_name][metric]['val'] = val
self.best_metric_results[dataset_name][metric]['iter'] = current_iter
def _update_best_metric_result(self, dataset_name, metric, val, current_iter):
if self.best_metric_results[dataset_name][metric]['better'] == 'higher':
if val >= self.best_metric_results[dataset_name][metric]['val']:
self.best_metric_results[dataset_name][metric]['val'] = val
self.best_metric_results[dataset_name][metric]['iter'] = current_iter
return True
else:
return False
else:
if val <= self.best_metric_results[dataset_name][metric]['val']:
self.best_metric_results[dataset_name][metric]['val'] = val
self.best_metric_results[dataset_name][metric]['iter'] = current_iter
return True
else:
return False
def model_ema(self, decay=0.999):
net_g = self.get_bare_model(self.net_g)
net_g_params = dict(net_g.named_parameters())
net_g_ema_params = dict(self.net_g_ema.named_parameters())
for k in net_g_ema_params.keys():
net_g_ema_params[k].data.mul_(decay).add_(net_g_params[k].data, alpha=1 - decay)
def copy_model(self, net_a, net_b):
"""copy model from net_a to net_b"""
tmp_net_a = self.get_bare_model(net_a)
tmp_net_b = self.get_bare_model(net_b)
tmp_net_b.load_state_dict(tmp_net_a.state_dict())
def get_current_log(self):
return self.log_dict
def model_to_device(self, net):
"""Model to device. It also warps models with DistributedDataParallel
or DataParallel.
Args:
net (nn.Module)
"""
net = net.to(self.device)
if self.opt['dist']:
find_unused_parameters = self.opt.get('find_unused_parameters', False)
net = DistributedDataParallel(
net, device_ids=[torch.cuda.current_device()], find_unused_parameters=find_unused_parameters)
elif self.opt['num_gpu'] > 1:
net = DataParallel(net)
return net
def get_optimizer(self, optim_type, params, lr, **kwargs):
if optim_type == 'Adam':
optimizer = torch.optim.Adam(params, lr, **kwargs)
else:
raise NotImplementedError(f'optimizer {optim_type} is not supperted yet.')
return optimizer
def setup_schedulers(self):
"""Set up schedulers."""
train_opt = self.opt['train']
scheduler_type = train_opt['scheduler'].pop('type')
if scheduler_type in ['MultiStepLR', 'MultiStepRestartLR']:
for optimizer in self.optimizers:
self.schedulers.append(lr_scheduler.MultiStepRestartLR(optimizer, **train_opt['scheduler']))
elif scheduler_type == 'CosineAnnealingRestartLR':
for optimizer in self.optimizers:
self.schedulers.append(lr_scheduler.CosineAnnealingRestartLR(optimizer, **train_opt['scheduler']))
else:
raise NotImplementedError(f'Scheduler {scheduler_type} is not implemented yet.')
def get_bare_model(self, net):
"""Get bare model, especially under wrapping with
DistributedDataParallel or DataParallel.
"""
if isinstance(net, (DataParallel, DistributedDataParallel)):
net = net.module
return net
@master_only
def print_network(self, net):
"""Print the str and parameter number of a network.
Args:
net (nn.Module)
"""
if isinstance(net, (DataParallel, DistributedDataParallel)):
net_cls_str = f'{net.__class__.__name__} - {net.module.__class__.__name__}'
else:
net_cls_str = f'{net.__class__.__name__}'
net = self.get_bare_model(net)
net_str = str(net)
net_params = sum(map(lambda x: x.numel(), net.parameters()))
logger = get_root_logger()
logger.info(f'Network: {net_cls_str}, with parameters: {net_params:,d}')
logger.info(net_str)
def _set_lr(self, lr_groups_l):
"""Set learning rate for warmup.
Args:
lr_groups_l (list): List for lr_groups, each for an optimizer.
"""
for optimizer, lr_groups in zip(self.optimizers, lr_groups_l):
for param_group, lr in zip(optimizer.param_groups, lr_groups):
param_group['lr'] = lr
def _get_init_lr(self):
"""Get the initial lr, which is set by the scheduler.
"""
init_lr_groups_l = []
for optimizer in self.optimizers:
init_lr_groups_l.append([v['initial_lr'] for v in optimizer.param_groups])
return init_lr_groups_l
def update_learning_rate(self, current_iter, warmup_iter=-1):
"""Update learning rate.
Args:
current_iter (int): Current iteration.
warmup_iter (int): Warmup iter numbers. -1 for no warmup.
Default: -1.
"""
if current_iter > 1:
for scheduler in self.schedulers:
scheduler.step()
# set up warm-up learning rate
if current_iter < warmup_iter:
# get initial lr for each group
init_lr_g_l = self._get_init_lr()
# modify warming-up learning rates
# currently only support linearly warm up
warm_up_lr_l = []
for init_lr_g in init_lr_g_l:
warm_up_lr_l.append([v / warmup_iter * current_iter for v in init_lr_g])
# set learning rate
self._set_lr(warm_up_lr_l)
def get_current_learning_rate(self):
return [optim.param_groups[0]['lr'] for optim in self.optimizers]
@master_only
def save_network(self, net, net_label, current_iter, param_key='params'):
"""Save networks.
Args:
net (nn.Module | list[nn.Module]): Network(s) to be saved.
net_label (str): Network label.
current_iter (int): Current iter number.
param_key (str | list[str]): The parameter key(s) to save network.
Default: 'params'.
"""
if current_iter == -1:
current_iter = 'latest'
save_filename = f'{net_label}_{current_iter}.pth'
save_path = os.path.join(self.opt['path']['models'], save_filename)
net = net if isinstance(net, list) else [net]
param_key = param_key if isinstance(param_key, list) else [param_key]
assert len(net) == len(param_key), 'The lengths of net and param_key should be the same.'
save_dict = {}
for net_, param_key_ in zip(net, param_key):
net_ = self.get_bare_model(net_)
state_dict = net_.state_dict()
for key, param in state_dict.items():
if key.startswith('module.'): # remove unnecessary 'module.'
key = key[7:]
state_dict[key] = param.cpu()
save_dict[param_key_] = state_dict
# avoid occasional writing errors
retry = 3
while retry > 0:
try:
torch.save(save_dict, save_path)
except Exception as e:
logger = get_root_logger()
logger.warning(f'Save model error: {e}, remaining retry times: {retry - 1}')
time.sleep(1)
else:
break
finally:
retry -= 1
if retry == 0:
logger.warning(f'Still cannot save {save_path}. Just ignore it.')
# raise IOError(f'Cannot save {save_path}.')
def _print_different_keys_loading(self, crt_net, load_net, strict=True):
"""Print keys with different name or different size when loading models.
1. Print keys with different names.
2. If strict=False, print the same key but with different tensor size.
It also ignore these keys with different sizes (not load).
Args:
crt_net (torch model): Current network.
load_net (dict): Loaded network.
strict (bool): Whether strictly loaded. Default: True.
"""
crt_net = self.get_bare_model(crt_net)
crt_net = crt_net.state_dict()
crt_net_keys = set(crt_net.keys())
load_net_keys = set(load_net.keys())
logger = get_root_logger()
if crt_net_keys != load_net_keys:
logger.warning('Current net - loaded net:')
for v in sorted(list(crt_net_keys - load_net_keys)):
logger.warning(f' {v}')
logger.warning('Loaded net - current net:')
for v in sorted(list(load_net_keys - crt_net_keys)):
logger.warning(f' {v}')
# check the size for the same keys
if not strict:
common_keys = crt_net_keys & load_net_keys
for k in common_keys:
if crt_net[k].size() != load_net[k].size():
logger.warning(f'Size different, ignore [{k}]: crt_net: '
f'{crt_net[k].shape}; load_net: {load_net[k].shape}')
load_net[k + '.ignore'] = load_net.pop(k)
def load_network(self, net, load_path, strict=True, param_key='params'):
"""Load network.
Args:
load_path (str): The path of networks to be loaded.
net (nn.Module): Network.
strict (bool): Whether strictly loaded.
param_key (str): The parameter key of loaded network. If set to
None, use the root 'path'.
Default: 'params'.
"""
logger = get_root_logger()
net = self.get_bare_model(net)
load_net = torch.load(load_path, map_location=lambda storage, loc: storage)
if param_key is not None:
if param_key not in load_net and 'params' in load_net:
param_key = 'params'
logger.info('Loading: params_ema does not exist, use params.')
load_net = load_net[param_key]
logger.info(f'Loading {net.__class__.__name__} model from {load_path}, with param key: [{param_key}].')
# remove unnecessary 'module.'
for k, v in deepcopy(load_net).items():
if k.startswith('module.'):
load_net[k[7:]] = v
load_net.pop(k)
self._print_different_keys_loading(net, load_net, strict)
net.load_state_dict(load_net, strict=strict)
@master_only
def save_training_state(self, epoch, current_iter):
"""Save training states during training, which will be used for
resuming.
Args:
epoch (int): Current epoch.
current_iter (int): Current iteration.
"""
if current_iter != -1:
state = {'epoch': epoch, 'iter': current_iter, 'optimizers': [], 'schedulers': []}
for o in self.optimizers:
state['optimizers'].append(o.state_dict())
for s in self.schedulers:
state['schedulers'].append(s.state_dict())
save_filename = f'{current_iter}.state'
save_path = os.path.join(self.opt['path']['training_states'], save_filename)
# avoid occasional writing errors
retry = 3
while retry > 0:
try:
torch.save(state, save_path)
except Exception as e:
logger = get_root_logger()
logger.warning(f'Save training state error: {e}, remaining retry times: {retry - 1}')
time.sleep(1)
else:
break
finally:
retry -= 1
if retry == 0:
logger.warning(f'Still cannot save {save_path}. Just ignore it.')
# raise IOError(f'Cannot save {save_path}.')
def resume_training(self, resume_state):
"""Reload the optimizers and schedulers for resumed training.
Args:
resume_state (dict): Resume state.
"""
resume_optimizers = resume_state['optimizers']
resume_schedulers = resume_state['schedulers']
assert len(resume_optimizers) == len(self.optimizers), 'Wrong lengths of optimizers'
assert len(resume_schedulers) == len(self.schedulers), 'Wrong lengths of schedulers'
for i, o in enumerate(resume_optimizers):
self.optimizers[i].load_state_dict(o)
for i, s in enumerate(resume_schedulers):
self.schedulers[i].load_state_dict(s)
def reduce_loss_dict(self, loss_dict):
"""reduce loss dict.
In distributed training, it averages the losses among different GPUs .
Args:
loss_dict (OrderedDict): Loss dict.
"""
with torch.no_grad():
if self.opt['dist']:
keys = []
losses = []
for name, value in loss_dict.items():
keys.append(name)
losses.append(value)
losses = torch.stack(losses, 0)
torch.distributed.reduce(losses, dst=0)
if self.opt['rank'] == 0:
losses /= self.opt['world_size']
loss_dict = {key: loss for key, loss in zip(keys, losses)}
log_dict = OrderedDict()
for name, value in loss_dict.items():
log_dict[name] = value.mean().item()
return log_dict
from collections import OrderedDict
from os import path as osp
from tqdm import tqdm
import torch
import torchvision.utils as tvu
from basicsr.archs import build_network
from basicsr.losses import build_loss
from basicsr.utils import get_root_logger, imwrite, tensor2img, img2tensor
from basicsr.utils.registry import MODEL_REGISTRY
from .base_model import BaseModel
import copy
import pyiqa
@MODEL_REGISTRY.register()
class VQDehazeModel(BaseModel):
def __init__(self, opt):
super().__init__(opt)
# define network
self.net_g = build_network(opt['network_g'])
self.net_g = self.model_to_device(self.net_g)
# define metric functions
if self.opt['val'].get('metrics') is not None:
self.metric_funcs = {}
for _, opt in self.opt['val']['metrics'].items():
mopt = opt.copy()
name = mopt.pop('type', None)
mopt.pop('better', None)
self.metric_funcs[name] = pyiqa.create_metric(name, device=self.device, **mopt)
# load pre-trained HQ ckpt, frozen decoder and codebook
self.LQ_stage = self.opt['network_g'].get('LQ_stage', False)
if self.LQ_stage:
load_path = self.opt['path'].get('pretrain_network_hq', None)
assert load_path is not None, 'Need to specify hq prior model path in LQ stage'
hq_opt = self.opt['network_g'].copy()
hq_opt['LQ_stage'] = False
# if hq_opt['only_residual']:
# hq_opt['only_residual'] = False
self.net_hq = build_network(hq_opt)
self.net_hq = self.model_to_device(self.net_hq)
self.load_network(self.net_hq, load_path, self.opt['path']['strict_load'])
self.load_network(self.net_g, load_path, False)
frozen_module_keywords = self.opt['network_g'].get('frozen_module_keywords', None)
if frozen_module_keywords is not None:
for name, module in self.net_g.named_modules():
for fkw in frozen_module_keywords:
if fkw in name:
for p in module.parameters():
p.requires_grad = False
break
# load pretrained models
load_path = self.opt['path'].get('pretrain_network_g', None)
logger = get_root_logger()
if load_path is not None:
logger.info(f'Loading net_g from {load_path}')
self.load_network(self.net_g, load_path, self.opt['path']['strict_load'])
if self.is_train:
self.init_training_settings()
self.use_dis = (self.opt['train']['gan_opt']['loss_weight'] != 0)
self.net_d_best = copy.deepcopy(self.net_d)
self.net_g_best = copy.deepcopy(self.net_g)
def init_training_settings(self):
logger = get_root_logger()
train_opt = self.opt['train']
self.net_g.train()
# define network net_d
self.net_d = build_network(self.opt['network_d'])
self.net_d = self.model_to_device(self.net_d)
# load pretrained d models
load_path = self.opt['path'].get('pretrain_network_d', None)
# print(load_path)
if load_path is not None:
logger.info(f'Loading net_d from {load_path}')
self.load_network(self.net_d, load_path, self.opt['path'].get('strict_load_d', True))
self.net_d.train()
# define losses
if train_opt.get('pixel_opt'):
self.cri_pix = build_loss(train_opt['pixel_opt']).to(self.device)
else:
self.cri_pix = None
if train_opt.get('perceptual_opt'):
self.cri_perceptual = build_loss(train_opt['perceptual_opt']).to(self.device)
self.model_to_device(self.cri_perceptual)
else:
self.cri_perceptual = None
if train_opt.get('gan_opt'):
self.cri_gan = build_loss(train_opt['gan_opt']).to(self.device)
self.net_d_iters = train_opt.get('net_d_iters', 1)
self.net_d_init_iters = train_opt.get('net_d_init_iters', 0)
# set up optimizers and schedulers
self.setup_optimizers()
self.setup_schedulers()
def setup_optimizers(self):
train_opt = self.opt['train']
optim_params = []
for k, v in self.net_g.named_parameters():
optim_params.append(v)
if not v.requires_grad:
logger = get_root_logger()
logger.warning(f'Params {k} will not be optimized.')
# optimizer g
optim_type = train_opt['optim_g'].pop('type')
optim_class = getattr(torch.optim, optim_type)
self.optimizer_g = optim_class(optim_params, **train_opt['optim_g'])
self.optimizers.append(self.optimizer_g)
# optimizer d
optim_type = train_opt['optim_d'].pop('type')
optim_class = getattr(torch.optim, optim_type)
self.optimizer_d = optim_class(self.net_d.parameters(), **train_opt['optim_d'])
self.optimizers.append(self.optimizer_d)
def feed_data(self, data):
self.lq = data['lq'].to(self.device)
if 'gt' in data:
self.gt = data['gt'].to(self.device)
def optimize_parameters(self, current_iter):
train_opt = self.opt['train']
for p in self.net_d.parameters():
p.requires_grad = False
self.optimizer_g.zero_grad()
if self.LQ_stage:
with torch.no_grad():
self.gt_rec, _, _, _, _, quant_gt, gt_indices = self.net_hq(self.gt)
self.lq.requires_grad = True
self.output, self.output_residual, l_codebook, l_semantic, quant_g, _, _ = self.net_g(self.lq, gt_indices)
else:
self.output, self.output_residual, l_codebook, l_semantic, _ = self.net_g(self.gt)
# print(l_codebook.mean())
l_g_total = 0
loss_dict = OrderedDict()
# ===================================================
# codebook loss
if train_opt.get('codebook_opt', None):
l_codebook *= train_opt['codebook_opt']['loss_weight']
l_g_total += l_codebook.mean()
loss_dict['l_codebook'] = l_codebook.mean()
# semantic cluster loss, only for LQ stage!
if train_opt.get('semantic_opt', None) and isinstance(l_semantic, torch.Tensor):
l_semantic *= train_opt['semantic_opt']['loss_weight']
l_semantic = l_semantic.mean()
l_g_total += l_semantic
loss_dict['l_semantic'] = l_semantic
# pixel loss
if self.cri_pix:
l_pix = self.cri_pix(self.output_residual, self.gt)
l_g_total += l_pix
loss_dict['l_pix'] = l_pix
# perceptual loss
if self.cri_perceptual:
l_percep, l_style = self.cri_perceptual(self.output_residual, self.gt)
if l_percep is not None:
l_g_total += l_percep.mean()
loss_dict['l_percep'] = l_percep.mean()
if l_style is not None:
l_g_total += l_style
loss_dict['l_style'] = l_style
# gan loss
if self.use_dis and current_iter > train_opt['net_d_init_iters']:
fake_g_pred = self.net_d(quant_g)
l_g_gan = self.cri_gan(fake_g_pred, True, is_disc=False)
l_g_total += l_g_gan
loss_dict['l_g_gan'] = l_g_gan
# print(l_g_total.requires_grad)
# if l_g_total.requires_grad:
l_g_total.mean().backward()
self.optimizer_g.step()
# optimize net_d
self.fixed_disc = self.opt['train'].get('fixed_disc', False)
if not self.fixed_disc and self.use_dis and current_iter > train_opt['net_d_init_iters']:
for p in self.net_d.parameters():
p.requires_grad = True
self.optimizer_d.zero_grad()
# real
real_d_pred = self.net_d(quant_gt)
l_d_real = self.cri_gan(real_d_pred, True, is_disc=True)
loss_dict['l_d_real'] = l_d_real
loss_dict['out_d_real'] = torch.mean(real_d_pred.detach())
l_d_real.backward()
# fake
fake_d_pred = self.net_d(quant_g.detach())
l_d_fake = self.cri_gan(fake_d_pred, False, is_disc=True)
loss_dict['l_d_fake'] = l_d_fake
loss_dict['out_d_fake'] = torch.mean(fake_d_pred.detach())
l_d_fake.backward()
self.optimizer_d.step()
self.log_dict = self.reduce_loss_dict(loss_dict)
def test(self):
self.net_g.eval()
net_g = self.get_bare_model(self.net_g)
min_size = 8000 * 8000 # use smaller min_size with limited GPU memory
lq_input = self.lq
_, _, h, w = lq_input.shape
if h*w < min_size:
self.output = net_g.test(lq_input)
else:
self.output = net_g.test_tile(lq_input)
self.net_g.train()
def dist_validation(self, dataloader, current_iter, tb_logger, save_img, save_as_dir=None):
logger = get_root_logger()
logger.info('Only support single GPU validation.')
self.nondist_validation(dataloader, current_iter, tb_logger, save_img, save_as_dir)
def nondist_validation(self, dataloader, current_iter, tb_logger,
save_img, save_as_dir):
dataset_name = dataloader.dataset.opt['name']
with_metrics = self.opt['val'].get('metrics') is not None
if with_metrics:
self.metric_results = {
metric: 0
for metric in self.opt['val']['metrics'].keys()
}
pbar = tqdm(total=len(dataloader), unit='image')
if with_metrics:
if not hasattr(self, 'metric_results'): # only execute in the first run
self.metric_results = {metric: 0 for metric in self.opt['val']['metrics'].keys()}
# initialize the best metric results for each dataset_name (supporting multiple validation datasets)
self._initialize_best_metric_results(dataset_name)
# zero self.metric_results
self.metric_results = {metric: 0 for metric in self.metric_results}
self.key_metric = self.opt['val'].get('key_metric')
for idx, val_data in enumerate(dataloader):
img_name = osp.splitext(osp.basename(val_data['lq_path'][0]))[0]
self.feed_data(val_data)
self.test()
sr_img = tensor2img(self.output[0])
metric_data = [img2tensor(sr_img).unsqueeze(0) / 255, self.gt]
# tentative for out of GPU memory
del self.lq
del self.output
torch.cuda.empty_cache()
if save_img:
if self.opt['is_train']:
save_img_path = osp.join(self.opt['path']['visualization'], 'image_results',
f'{current_iter}',
f'{img_name}.png')
else:
if self.opt['val']['suffix']:
save_img_path = osp.join(
self.opt['path']['visualization'], dataset_name,
f'{img_name}_{self.opt["val"]["suffix"]}.png')
else:
save_img_path = osp.join(
self.opt['path']['visualization'], dataset_name,
f'{img_name}_{self.opt["name"]}.png')
if save_as_dir:
save_as_img_path = osp.join(save_as_dir, f'{img_name}.png')
imwrite(sr_img, save_as_img_path)
imwrite(sr_img, save_img_path)
if with_metrics:
# calculate metrics
for name, opt_ in self.opt['val']['metrics'].items():
tmp_result = self.metric_funcs[name](*metric_data)
self.metric_results[name] += tmp_result.item()
pbar.update(1)
pbar.set_description(f'Test {img_name}')
pbar.close()
if with_metrics:
# calculate average metric
for metric in self.metric_results.keys():
self.metric_results[metric] /= (idx + 1)
if self.key_metric is not None:
# If the best metric is updated, update and save best model
to_update = self._update_best_metric_result(dataset_name, self.key_metric, self.metric_results[self.key_metric], current_iter)
if to_update:
for name, opt_ in self.opt['val']['metrics'].items():
self._update_metric_result(dataset_name, name, self.metric_results[name], current_iter)
self.copy_model(self.net_g, self.net_g_best)
self.copy_model(self.net_d, self.net_d_best)
self.save_network(self.net_g, 'net_g_best', '')
self.save_network(self.net_d, 'net_d_best', '')
else:
# update each metric separately
updated = []
for name, opt_ in self.opt['val']['metrics'].items():
tmp_updated = self._update_best_metric_result(dataset_name, name, self.metric_results[name], current_iter)
updated.append(tmp_updated)
# save best model if any metric is updated
if sum(updated):
self.copy_model(self.net_g, self.net_g_best)
self.copy_model(self.net_d, self.net_d_best)
self.save_network(self.net_g, 'net_g_best', '')
self.save_network(self.net_d, 'net_d_best', '')
self._log_validation_metric_values(current_iter, dataset_name, tb_logger)
def _log_validation_metric_values(self, current_iter, dataset_name, tb_logger):
log_str = f'Validation {dataset_name}\n'
for metric, value in self.metric_results.items():
log_str += f'\t # {metric}: {value:.4f}'
if hasattr(self, 'best_metric_results'):
log_str += (f'\tBest: {self.best_metric_results[dataset_name][metric]["val"]:.4f} @ '
f'{self.best_metric_results[dataset_name][metric]["iter"]} iter')
log_str += '\n'
logger = get_root_logger()
logger.info(log_str)
if tb_logger:
for metric, value in self.metric_results.items():
tb_logger.add_scalar(f'metrics/{dataset_name}/{metric}', value, current_iter)
def vis_single_code(self, up_factor=2):
net_g = self.get_bare_model(self.net_g)
codenum = self.opt['network_g']['codebook_params'][0][1]
with torch.no_grad():
code_idx = torch.arange(codenum).reshape(codenum, 1, 1, 1)
code_idx = code_idx.repeat(1, 1, up_factor, up_factor)
output_img = net_g.decode_indices(code_idx)
output_img = tvu.make_grid(output_img, nrow=32)
return output_img.unsqueeze(0)
def get_current_visuals(self):
vis_samples = 16
out_dict = OrderedDict()
out_dict['lq'] = self.lq.detach().cpu()[:vis_samples]
if self.output != None:
out_dict['result_codebook'] = self.output.detach().cpu()[:vis_samples]
if self.output_residual != None:
out_dict['result_residual'] = self.output_residual.detach().cpu()[:vis_samples]
if not self.LQ_stage:
out_dict['codebook'] = self.vis_single_code()
if hasattr(self, 'gt_rec'):
out_dict['gt_rec'] = self.gt_rec.detach().cpu()[:vis_samples]
if hasattr(self, 'gt'):
out_dict['gt'] = self.gt.detach().cpu()[:vis_samples]
return out_dict
def save(self, epoch, current_iter):
self.save_network(self.net_g, 'net_g', current_iter)
self.save_network(self.net_d, 'net_d', current_iter)
self.save_training_state(epoch, current_iter)
import math
from collections import Counter
from torch.optim.lr_scheduler import _LRScheduler
class MultiStepRestartLR(_LRScheduler):
""" MultiStep with restarts learning rate scheme.
Args:
optimizer (torch.nn.optimizer): Torch optimizer.
milestones (list): Iterations that will decrease learning rate.
gamma (float): Decrease ratio. Default: 0.1.
restarts (list): Restart iterations. Default: [0].
restart_weights (list): Restart weights at each restart iteration.
Default: [1].
last_epoch (int): Used in _LRScheduler. Default: -1.
"""
def __init__(self, optimizer, milestones, gamma=0.1, restarts=(0, ), restart_weights=(1, ), last_epoch=-1):
self.milestones = Counter(milestones)
self.gamma = gamma
self.restarts = restarts
self.restart_weights = restart_weights
assert len(self.restarts) == len(self.restart_weights), 'restarts and their weights do not match.'
super(MultiStepRestartLR, self).__init__(optimizer, last_epoch)
def get_lr(self):
if self.last_epoch in self.restarts:
weight = self.restart_weights[self.restarts.index(self.last_epoch)]
return [group['initial_lr'] * weight for group in self.optimizer.param_groups]
if self.last_epoch not in self.milestones:
return [group['lr'] for group in self.optimizer.param_groups]
return [group['lr'] * self.gamma**self.milestones[self.last_epoch] for group in self.optimizer.param_groups]
def get_position_from_periods(iteration, cumulative_period):
"""Get the position from a period list.
It will return the index of the right-closest number in the period list.
For example, the cumulative_period = [100, 200, 300, 400],
if iteration == 50, return 0;
if iteration == 210, return 2;
if iteration == 300, return 2.
Args:
iteration (int): Current iteration.
cumulative_period (list[int]): Cumulative period list.
Returns:
int: The position of the right-closest number in the period list.
"""
for i, period in enumerate(cumulative_period):
if iteration <= period:
return i
class CosineAnnealingRestartLR(_LRScheduler):
""" Cosine annealing with restarts learning rate scheme.
An example of config:
periods = [10, 10, 10, 10]
restart_weights = [1, 0.5, 0.5, 0.5]
eta_min=1e-7
It has four cycles, each has 10 iterations. At 10th, 20th, 30th, the
scheduler will restart with the weights in restart_weights.
Args:
optimizer (torch.nn.optimizer): Torch optimizer.
periods (list): Period for each cosine anneling cycle.
restart_weights (list): Restart weights at each restart iteration.
Default: [1].
eta_min (float): The minimum lr. Default: 0.
last_epoch (int): Used in _LRScheduler. Default: -1.
"""
def __init__(self, optimizer, periods, restart_weights=(1, ), eta_min=0, last_epoch=-1):
self.periods = periods
self.restart_weights = restart_weights
self.eta_min = eta_min
assert (len(self.periods) == len(
self.restart_weights)), 'periods and restart_weights should have the same length.'
self.cumulative_period = [sum(self.periods[0:i + 1]) for i in range(0, len(self.periods))]
super(CosineAnnealingRestartLR, self).__init__(optimizer, last_epoch)
def get_lr(self):
idx = get_position_from_periods(self.last_epoch, self.cumulative_period)
current_weight = self.restart_weights[idx]
nearest_restart = 0 if idx == 0 else self.cumulative_period[idx - 1]
current_period = self.periods[idx]
return [
self.eta_min + current_weight * 0.5 * (base_lr - self.eta_min) *
(1 + math.cos(math.pi * ((self.last_epoch - nearest_restart) / current_period)))
for base_lr in self.base_lrs
]
from .deform_conv import (DeformConv, DeformConvPack, ModulatedDeformConv, ModulatedDeformConvPack, deform_conv,
modulated_deform_conv)
__all__ = [
'DeformConv', 'DeformConvPack', 'ModulatedDeformConv', 'ModulatedDeformConvPack', 'deform_conv',
'modulated_deform_conv'
]
import math
import os
import torch
from torch import nn as nn
from torch.autograd import Function
from torch.autograd.function import once_differentiable
from torch.nn import functional as F
from torch.nn.modules.utils import _pair, _single
BASICSR_JIT = os.getenv('BASICSR_JIT')
if BASICSR_JIT == 'True':
from torch.utils.cpp_extension import load
module_path = os.path.dirname(__file__)
deform_conv_ext = load(
'deform_conv',
sources=[
os.path.join(module_path, 'src', 'deform_conv_ext.cpp'),
os.path.join(module_path, 'src', 'deform_conv_cuda.cpp'),
os.path.join(module_path, 'src', 'deform_conv_cuda_kernel.cu'),
],
)
else:
try:
from . import deform_conv_ext
except ImportError:
pass
# avoid annoying print output
# print(f'Cannot import deform_conv_ext. Error: {error}. You may need to: \n '
# '1. compile with BASICSR_EXT=True. or\n '
# '2. set BASICSR_JIT=True during running')
class DeformConvFunction(Function):
@staticmethod
def forward(ctx,
input,
offset,
weight,
stride=1,
padding=0,
dilation=1,
groups=1,
deformable_groups=1,
im2col_step=64):
if input is not None and input.dim() != 4:
raise ValueError(f'Expected 4D tensor as input, got {input.dim()}' 'D tensor instead.')
ctx.stride = _pair(stride)
ctx.padding = _pair(padding)
ctx.dilation = _pair(dilation)
ctx.groups = groups
ctx.deformable_groups = deformable_groups
ctx.im2col_step = im2col_step
ctx.save_for_backward(input, offset, weight)
output = input.new_empty(DeformConvFunction._output_size(input, weight, ctx.padding, ctx.dilation, ctx.stride))
ctx.bufs_ = [input.new_empty(0), input.new_empty(0)] # columns, ones
if not input.is_cuda:
raise NotImplementedError
else:
cur_im2col_step = min(ctx.im2col_step, input.shape[0])
assert (input.shape[0] % cur_im2col_step) == 0, 'im2col step must divide batchsize'
deform_conv_ext.deform_conv_forward(input, weight,
offset, output, ctx.bufs_[0], ctx.bufs_[1], weight.size(3),
weight.size(2), ctx.stride[1], ctx.stride[0], ctx.padding[1],
ctx.padding[0], ctx.dilation[1], ctx.dilation[0], ctx.groups,
ctx.deformable_groups, cur_im2col_step)
return output
@staticmethod
@once_differentiable
def backward(ctx, grad_output):
input, offset, weight = ctx.saved_tensors
grad_input = grad_offset = grad_weight = None
if not grad_output.is_cuda:
raise NotImplementedError
else:
cur_im2col_step = min(ctx.im2col_step, input.shape[0])
assert (input.shape[0] % cur_im2col_step) == 0, 'im2col step must divide batchsize'
if ctx.needs_input_grad[0] or ctx.needs_input_grad[1]:
grad_input = torch.zeros_like(input)
grad_offset = torch.zeros_like(offset)
deform_conv_ext.deform_conv_backward_input(input, offset, grad_output, grad_input,
grad_offset, weight, ctx.bufs_[0], weight.size(3),
weight.size(2), ctx.stride[1], ctx.stride[0], ctx.padding[1],
ctx.padding[0], ctx.dilation[1], ctx.dilation[0], ctx.groups,
ctx.deformable_groups, cur_im2col_step)
if ctx.needs_input_grad[2]:
grad_weight = torch.zeros_like(weight)
deform_conv_ext.deform_conv_backward_parameters(input, offset, grad_output, grad_weight,
ctx.bufs_[0], ctx.bufs_[1], weight.size(3),
weight.size(2), ctx.stride[1], ctx.stride[0],
ctx.padding[1], ctx.padding[0], ctx.dilation[1],
ctx.dilation[0], ctx.groups, ctx.deformable_groups, 1,
cur_im2col_step)
return (grad_input, grad_offset, grad_weight, None, None, None, None, None)
@staticmethod
def _output_size(input, weight, padding, dilation, stride):
channels = weight.size(0)
output_size = (input.size(0), channels)
for d in range(input.dim() - 2):
in_size = input.size(d + 2)
pad = padding[d]
kernel = dilation[d] * (weight.size(d + 2) - 1) + 1
stride_ = stride[d]
output_size += ((in_size + (2 * pad) - kernel) // stride_ + 1, )
if not all(map(lambda s: s > 0, output_size)):
raise ValueError(f'convolution input is too small (output would be {"x".join(map(str, output_size))})')
return output_size
class ModulatedDeformConvFunction(Function):
@staticmethod
def forward(ctx,
input,
offset,
mask,
weight,
bias=None,
stride=1,
padding=0,
dilation=1,
groups=1,
deformable_groups=1):
ctx.stride = stride
ctx.padding = padding
ctx.dilation = dilation
ctx.groups = groups
ctx.deformable_groups = deformable_groups
ctx.with_bias = bias is not None
if not ctx.with_bias:
bias = input.new_empty(1) # fake tensor
if not input.is_cuda:
raise NotImplementedError
if weight.requires_grad or mask.requires_grad or offset.requires_grad or input.requires_grad:
ctx.save_for_backward(input, offset, mask, weight, bias)
output = input.new_empty(ModulatedDeformConvFunction._infer_shape(ctx, input, weight))
ctx._bufs = [input.new_empty(0), input.new_empty(0)]
deform_conv_ext.modulated_deform_conv_forward(input, weight, bias, ctx._bufs[0], offset, mask, output,
ctx._bufs[1], weight.shape[2], weight.shape[3], ctx.stride,
ctx.stride, ctx.padding, ctx.padding, ctx.dilation, ctx.dilation,
ctx.groups, ctx.deformable_groups, ctx.with_bias)
return output
@staticmethod
@once_differentiable
def backward(ctx, grad_output):
if not grad_output.is_cuda:
raise NotImplementedError
input, offset, mask, weight, bias = ctx.saved_tensors
grad_input = torch.zeros_like(input)
grad_offset = torch.zeros_like(offset)
grad_mask = torch.zeros_like(mask)
grad_weight = torch.zeros_like(weight)
grad_bias = torch.zeros_like(bias)
deform_conv_ext.modulated_deform_conv_backward(input, weight, bias, ctx._bufs[0], offset, mask, ctx._bufs[1],
grad_input, grad_weight, grad_bias, grad_offset, grad_mask,
grad_output, weight.shape[2], weight.shape[3], ctx.stride,
ctx.stride, ctx.padding, ctx.padding, ctx.dilation, ctx.dilation,
ctx.groups, ctx.deformable_groups, ctx.with_bias)
if not ctx.with_bias:
grad_bias = None
return (grad_input, grad_offset, grad_mask, grad_weight, grad_bias, None, None, None, None, None)
@staticmethod
def _infer_shape(ctx, input, weight):
n = input.size(0)
channels_out = weight.size(0)
height, width = input.shape[2:4]
kernel_h, kernel_w = weight.shape[2:4]
height_out = (height + 2 * ctx.padding - (ctx.dilation * (kernel_h - 1) + 1)) // ctx.stride + 1
width_out = (width + 2 * ctx.padding - (ctx.dilation * (kernel_w - 1) + 1)) // ctx.stride + 1
return n, channels_out, height_out, width_out
deform_conv = DeformConvFunction.apply
modulated_deform_conv = ModulatedDeformConvFunction.apply
class DeformConv(nn.Module):
def __init__(self,
in_channels,
out_channels,
kernel_size,
stride=1,
padding=0,
dilation=1,
groups=1,
deformable_groups=1,
bias=False):
super(DeformConv, self).__init__()
assert not bias
assert in_channels % groups == 0, f'in_channels {in_channels} is not divisible by groups {groups}'
assert out_channels % groups == 0, f'out_channels {out_channels} is not divisible by groups {groups}'
self.in_channels = in_channels
self.out_channels = out_channels
self.kernel_size = _pair(kernel_size)
self.stride = _pair(stride)
self.padding = _pair(padding)
self.dilation = _pair(dilation)
self.groups = groups
self.deformable_groups = deformable_groups
# enable compatibility with nn.Conv2d
self.transposed = False
self.output_padding = _single(0)
self.weight = nn.Parameter(torch.Tensor(out_channels, in_channels // self.groups, *self.kernel_size))
self.reset_parameters()
def reset_parameters(self):
n = self.in_channels
for k in self.kernel_size:
n *= k
stdv = 1. / math.sqrt(n)
self.weight.data.uniform_(-stdv, stdv)
def forward(self, x, offset):
# To fix an assert error in deform_conv_cuda.cpp:128
# input image is smaller than kernel
input_pad = (x.size(2) < self.kernel_size[0] or x.size(3) < self.kernel_size[1])
if input_pad:
pad_h = max(self.kernel_size[0] - x.size(2), 0)
pad_w = max(self.kernel_size[1] - x.size(3), 0)
x = F.pad(x, (0, pad_w, 0, pad_h), 'constant', 0).contiguous()
offset = F.pad(offset, (0, pad_w, 0, pad_h), 'constant', 0).contiguous()
out = deform_conv(x, offset, self.weight, self.stride, self.padding, self.dilation, self.groups,
self.deformable_groups)
if input_pad:
out = out[:, :, :out.size(2) - pad_h, :out.size(3) - pad_w].contiguous()
return out
class DeformConvPack(DeformConv):
"""A Deformable Conv Encapsulation that acts as normal Conv layers.
Args:
in_channels (int): Same as nn.Conv2d.
out_channels (int): Same as nn.Conv2d.
kernel_size (int or tuple[int]): Same as nn.Conv2d.
stride (int or tuple[int]): Same as nn.Conv2d.
padding (int or tuple[int]): Same as nn.Conv2d.
dilation (int or tuple[int]): Same as nn.Conv2d.
groups (int): Same as nn.Conv2d.
bias (bool or str): If specified as `auto`, it will be decided by the
norm_cfg. Bias will be set as True if norm_cfg is None, otherwise
False.
"""
_version = 2
def __init__(self, *args, **kwargs):
super(DeformConvPack, self).__init__(*args, **kwargs)
self.conv_offset = nn.Conv2d(
self.in_channels,
self.deformable_groups * 2 * self.kernel_size[0] * self.kernel_size[1],
kernel_size=self.kernel_size,
stride=_pair(self.stride),
padding=_pair(self.padding),
dilation=_pair(self.dilation),
bias=True)
self.init_offset()
def init_offset(self):
self.conv_offset.weight.data.zero_()
self.conv_offset.bias.data.zero_()
def forward(self, x):
offset = self.conv_offset(x)
return deform_conv(x, offset, self.weight, self.stride, self.padding, self.dilation, self.groups,
self.deformable_groups)
class ModulatedDeformConv(nn.Module):
def __init__(self,
in_channels,
out_channels,
kernel_size,
stride=1,
padding=0,
dilation=1,
groups=1,
deformable_groups=1,
bias=True):
super(ModulatedDeformConv, self).__init__()
self.in_channels = in_channels
self.out_channels = out_channels
self.kernel_size = _pair(kernel_size)
self.stride = stride
self.padding = padding
self.dilation = dilation
self.groups = groups
self.deformable_groups = deformable_groups
self.with_bias = bias
# enable compatibility with nn.Conv2d
self.transposed = False
self.output_padding = _single(0)
self.weight = nn.Parameter(torch.Tensor(out_channels, in_channels // groups, *self.kernel_size))
if bias:
self.bias = nn.Parameter(torch.Tensor(out_channels))
else:
self.register_parameter('bias', None)
self.init_weights()
def init_weights(self):
n = self.in_channels
for k in self.kernel_size:
n *= k
stdv = 1. / math.sqrt(n)
self.weight.data.uniform_(-stdv, stdv)
if self.bias is not None:
self.bias.data.zero_()
def forward(self, x, offset, mask):
return modulated_deform_conv(x, offset, mask, self.weight, self.bias, self.stride, self.padding, self.dilation,
self.groups, self.deformable_groups)
class ModulatedDeformConvPack(ModulatedDeformConv):
"""A ModulatedDeformable Conv Encapsulation that acts as normal Conv layers.
Args:
in_channels (int): Same as nn.Conv2d.
out_channels (int): Same as nn.Conv2d.
kernel_size (int or tuple[int]): Same as nn.Conv2d.
stride (int or tuple[int]): Same as nn.Conv2d.
padding (int or tuple[int]): Same as nn.Conv2d.
dilation (int or tuple[int]): Same as nn.Conv2d.
groups (int): Same as nn.Conv2d.
bias (bool or str): If specified as `auto`, it will be decided by the
norm_cfg. Bias will be set as True if norm_cfg is None, otherwise
False.
"""
_version = 2
def __init__(self, *args, **kwargs):
super(ModulatedDeformConvPack, self).__init__(*args, **kwargs)
self.conv_offset = nn.Conv2d(
self.in_channels,
self.deformable_groups * 3 * self.kernel_size[0] * self.kernel_size[1],
kernel_size=self.kernel_size,
stride=_pair(self.stride),
padding=_pair(self.padding),
dilation=_pair(self.dilation),
bias=True)
self.init_weights()
def init_weights(self):
super(ModulatedDeformConvPack, self).init_weights()
if hasattr(self, 'conv_offset'):
self.conv_offset.weight.data.zero_()
self.conv_offset.bias.data.zero_()
def forward(self, x):
out = self.conv_offset(x)
o1, o2, mask = torch.chunk(out, 3, dim=1)
offset = torch.cat((o1, o2), dim=1)
mask = torch.sigmoid(mask)
return modulated_deform_conv(x, offset, mask, self.weight, self.bias, self.stride, self.padding, self.dilation,
self.groups, self.deformable_groups)
Markdown is supported
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment