Commit a8ada82f authored by chenych's avatar chenych
Browse files

First commit

parent 537691da
# 模型唯一标识
modelCode=xxx
# 模型名称
modelName=maskeddenoising_pytorch
# 模型描述
modelDescription=maskeddenoising_pytorch在训练过程中对输入图像的随机像素进行掩蔽,并在训练过程中重建缺失的信息。同时,还在自注意力层中掩蔽特征,以避免训练和测试不一致性的影响。
# 应用场景
appScenario=推理,训练,图像降噪,教育,交通,公安
# 框架类型
frameType=PyTorch
{
"task": "baseline" // real-world image sr. root/task/images-models-options
, "model": "plain" // "plain" | "plain2" if two inputs
, "gpu_ids": [0]
, "dist": false
, "scale": 1 // broadcast to "datasets"
, "n_channels": 3 // broadcast to "datasets", 1 for grayscale, 3 for color
, "path": {
"root": "masked_denoising" // "denoising" | "superresolution" | "dejpeg"
, "pretrained_netG": null // path of pretrained model
, "pretrained_netE": null // path of pretrained model
}
, "datasets": {
"train": {
"name": "train_dataset" // just name
, "dataset_type": "masked_denoising" // "dncnn" | "dnpatch" | "fdncnn" | "ffdnet" | "sr" | "srmd" | "dpsr" | "plain" | "plainpatch" | "jpeg" | "masked_denoising"
, "dataroot_H": "trainsets/trainH" // path of H training dataset. DIV2K (800 training images) + Flickr2K (2650 images) + + OST (10324 images)
, "dataroot_L": null // path of L training dataset
, "H_size": 64 // patch_size 256 | 288 | 320 (256)
, "lq_patchsize": 64 // (64)
, "dataloader_shuffle": true
, "dataloader_num_workers": 2
, "dataloader_batch_size": 16 // batch size 1 | 16 | 32 | 48 | 64 | 128. Total batch size =4x8=32 in SwinIR (32)
, "noise_level": 15
, "if_mask": false
, "mask1": 75
, "mask2": 75
}
, "test": {
"name": "test_dataset" // just name
, "dataset_type": "plain" // "dncnn" | "dnpatch" | "fdncnn" | "ffdnet" | "sr" | "srmd" | "dpsr" | "plain" | "plainpatch" | "jpeg"
, "dataroot_H": "testset/McM/HR" // path of H testing dataset
, "dataroot_L": "testset/McM/McM_poisson_20" // path of L testing dataset
}
}
, "netG": {
"net_type": "swinir"
, "upscale": 1
, "in_chans": 3
, "img_size": 64
, "window_size": 8 // 8 !!!!!!!!!!!!!!!
, "img_range": 1.0
, "depths": [6, 6, 6, 6]
, "embed_dim": 60
, "num_heads": [6, 6, 6, 6]
, "mlp_ratio": 2
, "upsampler": null // "pixelshuffle" | "pixelshuffledirect" | "nearest+conv" | null
, "resi_connection": "3conv" // "1conv" | "3conv"
, "init_type": "default"
, "talking_heads": false
, "attn_fn": "softmax" // null | "softmax" | "entmax15" |
, "head_scale": false
, "on_attn": false
, "use_mask": false // if use attention mask
, "mask_ratio1": 75 // attention mask ratio,
, "mask_ratio2": 75 // randomly sampling from [mask_ratio1, mask_ratio2]
, "mask_is_diff": false
, "type": "stand"
}
, "train": {
"manual_seed": 1
, "G_lossfn_type": "l1" // "l1" preferred | "l2sum" | "l2" | "ssim" | "charbonnier"
, "G_lossfn_weight": 1.0 // default
, "E_decay": 0.999 // Exponential Moving Average for netG: set 0 to disable; default setting 0.999
, "G_optimizer_type": "adam" // fixed, adam is enough
, "G_optimizer_lr": 1e-4 // 2e-4 // learning rate
, "G_optimizer_wd": 0 // weight decay, default 0
, "G_optimizer_clipgrad": null // unused
, "G_optimizer_reuse": true //
, "G_scheduler_type": "MultiStepLR" // "MultiStepLR" is enough
, "G_scheduler_milestones": [] // [250000, 400000, 450000, 475000, 500000]
, "G_scheduler_gamma": 0.5
, "G_regularizer_orthstep": null // unused
, "G_regularizer_clipstep": null // unused
, "G_param_strict": true
, "E_param_strict": true
, "checkpoint_test": 5000 // for testing (5000)
, "checkpoint_save": 5000 // for saving model
, "checkpoint_print": 100 // for print
, "save_image": ["img_043_x1", "img_021_x1", "img_024_x1", "img_031_x1", "img_041_x1", "img_032_x1"] // [250000, 400000, 450000, 475000, 500000]
}
}
{
"task": "80_90" // real-world image sr. root/task/images-models-options
, "model": "plain" // "plain" | "plain2" if two inputs
// , "gpu_ids": [0,1,2,3]
, "gpu_ids": [0]
, "dist": false
, "scale": 1 // broadcast to "datasets"
, "n_channels": 3 // broadcast to "datasets", 1 for grayscale, 3 for color
, "path": {
"root": "masked_denoising" // "denoising" | "superresolution" | "dejpeg"
, "pretrained_netG": null // path of pretrained model
, "pretrained_netE": null // path of pretrained model
},
"datasets": {
"train": {
"name": "train_dataset" // just name
, "dataset_type": "masked_denoising" // "dncnn" | "dnpatch" | "fdncnn" | "ffdnet" | "sr" | "srmd" | "dpsr" | "plain" | "plainpatch" | "jpeg" | "masked_denoising"
, "dataroot_H": "trainsets/trainH" // path of H training dataset. DIV2K (800 training images) + Flickr2K (2650 images) + + OST (10324 images)
, "dataroot_L": null // path of L training dataset
, "H_size": 64 // patch_size 256 | 288 | 320 (256)
, "lq_patchsize": 64 // (64)
, "dataloader_shuffle": true
, "dataloader_num_workers": 16
, "dataloader_batch_size": 64 // batch size 1 | 16 | 32 | 48 | 64 | 128. Total batch size =4x8=32 in SwinIR (32)
, "noise_level": 15
, "if_mask": true
, "mask1": 80
, "mask2": 90
}
, "test": {
"name": "test_dataset" // just name
, "dataset_type": "plain" // "dncnn" | "dnpatch" | "fdncnn" | "ffdnet" | "sr" | "srmd" | "dpsr" | "plain" | "plainpatch" | "jpeg"
, "dataroot_H": "testset/McM/HR" // path of H testing dataset
, "dataroot_L": "testset/McM/McM_poisson_20" // path of L testing dataset
}
},
"netG": {
"net_type": "swinir"
, "upscale": 1
, "in_chans": 3
, "img_size": 64
, "window_size": 8 // 8 !!!!!!!!!!!!!!!
, "img_range": 1.0
, "depths": [6, 6, 6, 6]
, "embed_dim": 60
, "num_heads": [6, 6, 6, 6]
, "mlp_ratio": 2
, "upsampler": null // "pixelshuffle" | "pixelshuffledirect" | "nearest+conv" | null
, "resi_connection": "3conv" // "1conv" | "3conv"
, "init_type": "default"
, "talking_heads": false
, "attn_fn": "softmax" // null | "softmax" | "entmax15" |
, "head_scale": false
, "on_attn": false
, "use_mask": true // if use attention mask
, "mask_ratio1": 75 // attention mask ratio,
, "mask_ratio2": 75 // randomly sampling from [mask_ratio1, mask_ratio2]
, "mask_is_diff": false
, "type": "stand"
},
"train": {
"manual_seed": 1
, "G_lossfn_type": "l1" // "l1" preferred | "l2sum" | "l2" | "ssim" | "charbonnier"
, "G_lossfn_weight": 1.0 // default
, "E_decay": 0.999 // Exponential Moving Average for netG: set 0 to disable; default setting 0.999
, "G_optimizer_type": "adam" // fixed, adam is enough
, "G_optimizer_lr": 1e-4 // 2e-4 // learning rate
, "G_optimizer_wd": 0 // weight decay, default 0
, "G_optimizer_clipgrad": null // unused
, "G_optimizer_reuse": true //
, "G_scheduler_type": "MultiStepLR" // "MultiStepLR" is enough
, "G_scheduler_milestones": [150000, 200000, 300000, 350000, 400000] // [250000, 400000, 450000, 475000, 500000]
, "G_scheduler_gamma": 0.5
, "G_regularizer_orthstep": null // unused
, "G_regularizer_clipstep": null // unused
, "G_param_strict": true
, "E_param_strict": true
, "checkpoint_test": 5000 // for testing (5000)
, "checkpoint_save": 5000 // for saving model
, "checkpoint_print": 100 // for print
, "save_image": ["img_043_x1", "img_021_x1", "img_024_x1", "img_031_x1", "img_041_x1", "img_032_x1"] // [250000, 400000, 450000, 475000, 500000]
}
}
This diff is collapsed.
This diff is collapsed.
This diff is collapsed.
import torch
import torch.nn.functional as F
from torch.autograd import Variable
import numpy as np
from math import exp
"""
# ============================================
# SSIM loss
# https://github.com/Po-Hsun-Su/pytorch-ssim
# ============================================
"""
def gaussian(window_size, sigma):
gauss = torch.Tensor([exp(-(x - window_size//2)**2/float(2*sigma**2)) for x in range(window_size)])
return gauss/gauss.sum()
def create_window(window_size, channel):
_1D_window = gaussian(window_size, 1.5).unsqueeze(1)
_2D_window = _1D_window.mm(_1D_window.t()).float().unsqueeze(0).unsqueeze(0)
window = Variable(_2D_window.expand(channel, 1, window_size, window_size).contiguous())
return window
def _ssim(img1, img2, window, window_size, channel, size_average=True):
mu1 = F.conv2d(img1, window, padding=window_size//2, groups=channel)
mu2 = F.conv2d(img2, window, padding=window_size//2, groups=channel)
mu1_sq = mu1.pow(2)
mu2_sq = mu2.pow(2)
mu1_mu2 = mu1*mu2
sigma1_sq = F.conv2d(img1*img1, window, padding=window_size//2, groups=channel) - mu1_sq
sigma2_sq = F.conv2d(img2*img2, window, padding=window_size//2, groups=channel) - mu2_sq
sigma12 = F.conv2d(img1*img2, window, padding=window_size//2, groups=channel) - mu1_mu2
C1 = 0.01**2
C2 = 0.03**2
ssim_map = ((2*mu1_mu2 + C1)*(2*sigma12 + C2))/((mu1_sq + mu2_sq + C1)*(sigma1_sq + sigma2_sq + C2))
if size_average:
return ssim_map.mean()
else:
return ssim_map.mean(1).mean(1).mean(1)
class SSIMLoss(torch.nn.Module):
def __init__(self, window_size=11, size_average=True):
super(SSIMLoss, self).__init__()
self.window_size = window_size
self.size_average = size_average
self.channel = 1
self.window = create_window(window_size, self.channel)
def forward(self, img1, img2):
(_, channel, _, _) = img1.size()
if channel == self.channel and self.window.data.type() == img1.data.type():
window = self.window
else:
window = create_window(self.window_size, channel)
if img1.is_cuda:
window = window.cuda(img1.get_device())
window = window.type_as(img1)
self.window = window
self.channel = channel
return _ssim(img1, img2, window, self.window_size, channel, self.size_average)
def ssim(img1, img2, window_size=11, size_average=True):
(_, channel, _, _) = img1.size()
window = create_window(window_size, channel)
if img1.is_cuda:
window = window.cuda(img1.get_device())
window = window.type_as(img1)
return _ssim(img1, img2, window, window_size, channel, size_average)
if __name__ == '__main__':
import cv2
from torch import optim
from skimage import io
npImg1 = cv2.imread("einstein.png")
img1 = torch.from_numpy(np.rollaxis(npImg1, 2)).float().unsqueeze(0)/255.0
img2 = torch.rand(img1.size())
if torch.cuda.is_available():
img1 = img1.cuda()
img2 = img2.cuda()
img1 = Variable(img1, requires_grad=False)
img2 = Variable(img2, requires_grad=True)
ssim_value = ssim(img1, img2).item()
print("Initial ssim:", ssim_value)
ssim_loss = SSIMLoss()
optimizer = optim.Adam([img2], lr=0.01)
while ssim_value < 0.99:
optimizer.zero_grad()
ssim_out = -ssim_loss(img1, img2)
ssim_value = -ssim_out.item()
print('{:<4.4f}'.format(ssim_value))
ssim_out.backward()
optimizer.step()
img = np.transpose(img2.detach().cpu().squeeze().float().numpy(), (1,2,0))
io.imshow(np.uint8(np.clip(img*255, 0, 255)))
This diff is collapsed.
This diff is collapsed.
This diff is collapsed.
from models.model_plain import ModelPlain
class ModelPlain2(ModelPlain):
"""Train with two inputs (L, C) and with pixel loss"""
# ----------------------------------------
# feed L/H data
# ----------------------------------------
def feed_data(self, data, need_H=True):
self.L = data['L'].to(self.device)
self.C = data['C'].to(self.device)
if need_H:
self.H = data['H'].to(self.device)
# ----------------------------------------
# feed (L, C) to netG and get E
# ----------------------------------------
def netG_forward(self):
self.E = self.netG(self.L, self.C)
from models.model_plain import ModelPlain
import numpy as np
class ModelPlain4(ModelPlain):
"""Train with four inputs (L, k, sf, sigma) and with pixel loss for USRNet"""
# ----------------------------------------
# feed L/H data
# ----------------------------------------
def feed_data(self, data, need_H=True):
self.L = data['L'].to(self.device) # low-quality image
self.k = data['k'].to(self.device) # blur kernel
self.sf = np.int(data['sf'][0,...].squeeze().cpu().numpy()) # scale factor
self.sigma = data['sigma'].to(self.device) # noise level
if need_H:
self.H = data['H'].to(self.device) # H
# ----------------------------------------
# feed (L, C) to netG and get E
# ----------------------------------------
def netG_forward(self):
self.E = self.netG(self.L, self.k, self.sf, self.sigma)
This diff is collapsed.
This diff is collapsed.
This diff is collapsed.
This diff is collapsed.
This diff is collapsed.
Markdown is supported
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment