Commit d5096d86 authored by mashun1's avatar mashun1
Browse files

idmvton

parents
Pipeline #1220 canceled with stages
# general settings
name: HiFaceGAN_SR4x_train
model_type: HiFaceGANModel
scale: 1 # HiFaceGAN does not resize lq input
num_gpu: 1 # set num_gpu: 0 for cpu mode
manual_seed: 0
datasets:
train:
name: FFHQ_sr4x
type: PairedImageDataset
dataroot_gt: datasets/FFHQ_512_gt
dataroot_lq: datasets/FFHQ_512_lq_sr4x
# (for lmdb)
# dataroot_gt: datasets/FFHQ_512_gt.lmdb
# dataroot_lq: datasets/FFHQ_512_lq_sr4x.lmdb
io_backend:
type: disk
# (for lmdb)
# type: lmdb
gt_size: 512
use_hflip: true
use_rot: false
# data loader
num_worker_per_gpu: 1
batch_size_per_gpu: 1
dataset_enlarge_ratio: 1
prefetch_mode: ~
val:
name: FFHQ_sr4x_val # For now, use training dataset for validation
type: PairedImageDataset
dataroot_gt: datasets/FFHQ_512_gt
dataroot_lq: datasets/FFHQ_512_lq_sr4x
io_backend:
type: disk
# network structures
network_g:
type: HiFaceGAN
num_in_ch: 3
num_feat: 48
use_vae: false
z_dim: 256 # dummy var
crop_size: 512
#norm_g: 'spectralspadesyncbatch3x3'
#norm_g: 'spectralspadeinstance3x3'
norm_g: 'spectralspadebatch3x3' # 20210519: Use batchnorm for now.
is_train: false # HifaceGAN supports progressive training
# so network architecture depends on it
network_d:
type: HiFaceGANDiscriminator
num_in_ch: 3
num_out_ch: 3
conditional_d: True
num_feat: 64
norm_d: 'spectralinstance'
# training settings
train:
optim_g:
type: Adam
lr: !!float 2e-4
weight_decay: 0
betas: [0.9, 0.999]
optim_d:
type: Adam
lr: !!float 2e-4
weight_decay: 0
betas: [0.9, 0.999]
scheduler:
# TODO: Integrate the exact scheduling system of HiFaceGAN
# which involves a fixed lr followed by a linear decay
# It is not supported in current BasicSR project.
type: MultiStepLR
milestones: [500, 1000, 2000, 3000]
gamma: 0.5
# By default HiFaceGAN trains for 50 epochs, need auto conversion with
# total_iter = #(epochs) * #(dataset_size) * enlarge_ratio / batch_size
total_iter: 5000
warmup_iter: -1 # no warm up
# losses:
# Note: HiFaceGAN does not need pixel loss, use it at your own discretion
# pixel_opt:
# type: L1Loss
# loss_weight: !!float 1e-2
# reduction: mean
perceptual_opt:
type: PerceptualLoss
# vgg_layer_indices: 2,7,12,21,30
# weights: 1/32, 1/16, 1/8, 1/4, 1
layer_weights:
'relu1_1': !!float 3.125e-2
'relu2_1': !!float 6.25e-2
'relu3_1': !!float 0.125
'relu4_1': !!float 0.25
'relu5_1': !!float 1.0
vgg_type: vgg19
use_input_norm: false # keep in [0,1] range
range_norm: false
perceptual_weight: !!float 10.0
style_weight: 0
criterion: l1
gan_opt:
type: MultiScaleGANLoss
gan_type: lsgan
real_label_val: 1.0
fake_label_val: 0.0
loss_weight: !!float 1.0
feature_matching_opt:
type: GANFeatLoss
loss_weight: !!float 10.0
criterion: l1
net_d_iters: 1
net_d_init_iters: 0
# path
path:
pretrain_network_g: ~ # experiments/pretrained_models/4xsr/latest_net_G.pth
strict_load_g: true
# validation settings
val:
val_freq: !!float 5e3
save_img: true
metrics:
psnr: # metric name, can be arbitrary
type: calculate_psnr
crop_border: 4
test_y_channel: false
# logging settings
logger:
print_freq: 100
save_checkpoint_freq: !!float 5e3
use_tb_logger: true
wandb:
project: ~
resume_id: ~
# general settings
name: train_LDL_realworld_RRDB
model_type: RealESRGANModel
scale: 4
num_gpu: 4
manual_seed: 0
# ----------------- options for synthesizing training data in RealESRGANModel ----------------- #
# USM the ground-truth
l1_gt_usm: True
percep_gt_usm: True
gan_gt_usm: False
# the first degradation process
resize_prob: [0.2, 0.7, 0.1] # up, down, keep
resize_range: [0.15, 1.5]
gaussian_noise_prob: 0.5
noise_range: [1, 30]
poisson_scale_range: [0.05, 3]
gray_noise_prob: 0.4
jpeg_range: [30, 95]
# the second degradation process
second_blur_prob: 0.8
resize_prob2: [0.3, 0.4, 0.3] # up, down, keep
resize_range2: [0.3, 1.2]
gaussian_noise_prob2: 0.5
noise_range2: [1, 25]
poisson_scale_range2: [0.05, 2.5]
gray_noise_prob2: 0.4
jpeg_range2: [30, 95]
gt_size: 256
queue_size: 180
# dataset and data loader settings
datasets:
train:
name: DF2K+OST
type: RealESRGANDataset
dataroot_gt: datasets/DF2K
meta_info: datasets/DF2K/meta_info/meta_info_DF2Kmultiscale+OST_sub.txt
io_backend:
type: disk
blur_kernel_size: 21
kernel_list: ['iso', 'aniso', 'generalized_iso', 'generalized_aniso', 'plateau_iso', 'plateau_aniso']
kernel_prob: [0.45, 0.25, 0.12, 0.03, 0.12, 0.03]
sinc_prob: 0.1
blur_sigma: [0.2, 3]
betag_range: [0.5, 4]
betap_range: [1, 2]
blur_kernel_size2: 21
kernel_list2: ['iso', 'aniso', 'generalized_iso', 'generalized_aniso', 'plateau_iso', 'plateau_aniso']
kernel_prob2: [0.45, 0.25, 0.12, 0.03, 0.12, 0.03]
sinc_prob2: 0.1
blur_sigma2: [0.2, 1.5]
betag_range2: [0.5, 4]
betap_range2: [1, 2]
final_sinc_prob: 0.8
gt_size: 256
use_hflip: True
use_rot: False
# data loader
num_worker_per_gpu: 4
batch_size_per_gpu: 4
dataset_enlarge_ratio: 1
prefetch_mode: ~
val:
name: RealWorld38
type: SingleImageDataset
dataroot_lq: datasets/RealWorld38/LR
io_backend:
type: disk
# network structures
network_g:
type: RRDBNet
num_in_ch: 3
num_out_ch: 3
num_feat: 64
num_block: 23
num_grow_ch: 32
network_d:
type: UNetDiscriminatorSN
num_in_ch: 3
num_feat: 64
skip_connection: True
# path
path:
# use the pre-trained Real-ESRNet model
pretrain_network_g: experiments/pretrained_models/RealESRGAN/RealESRNet_x4plus.pth
param_key_g: params_ema
strict_load_g: true
resume_state: ~
# training settings
train:
ema_decay: 0.999
optim_g:
type: Adam
lr: !!float 1e-4
weight_decay: 0
betas: [0.9, 0.99]
optim_d:
type: Adam
lr: !!float 1e-4
weight_decay: 0
betas: [0.9, 0.99]
scheduler:
type: MultiStepLR
milestones: [400000]
gamma: 0.5
total_iter: 400000
warmup_iter: -1 # no warm up
# losses
pixel_opt:
type: L1Loss
loss_weight: !!float 1e-2
reduction: mean
ldl_opt:
type: L1Loss
loss_weight: !!float 1.0
reduction: mean
# perceptual loss (content and style losses)
perceptual_opt:
type: PerceptualLoss
layer_weights:
# before relu
'conv1_2': 0.1
'conv2_2': 0.1
'conv3_4': 1
'conv4_4': 1
'conv5_4': 1
vgg_type: vgg19
use_input_norm: true
perceptual_weight: !!float 1.0
style_weight: 0
range_norm: false
criterion: l1
# gan loss
gan_opt:
type: GANLoss
gan_type: vanilla
real_label_val: 1.0
fake_label_val: 0.0
loss_weight: !!float 1e-1
net_d_iters: 1
net_d_init_iters: 0
# Uncomment these for validation
# validation settings
val:
val_freq: !!float 5e3
save_img: True
# logging settings
logger:
print_freq: 100
save_checkpoint_freq: !!float 5e3
use_tb_logger: true
wandb:
project: ~
resume_id: ~
# dist training settings
dist_params:
backend: nccl
port: 29500
# TODO
# general settings
name: 201_RCANx2_scratch_DIV2K_rand0
model_type: SRModel
scale: 2
num_gpu: 1 # set num_gpu: 0 for cpu mode
manual_seed: 10
# dataset and data loader settings
datasets:
train:
name: DIV2K
type: PairedImageDataset
dataroot_gt: datasets/DIV2K/DIV2K_train_HR_sub
dataroot_lq: datasets/DIV2K/DIV2K_train_LR_bicubic/X2_sub
# (for lmdb)
# dataroot_gt: datasets/DIV2K/DIV2K_train_HR_sub.lmdb
# dataroot_lq: datasets/DIV2K/DIV2K_train_LR_bicubic_X2_sub.lmdb
filename_tmpl: '{}'
io_backend:
type: disk
# (for lmdb)
# type: lmdb
gt_size: 96
use_hflip: true
use_rot: true
# data loader
num_worker_per_gpu: 6
batch_size_per_gpu: 16
dataset_enlarge_ratio: 100
prefetch_mode: ~
val:
name: Set5
type: PairedImageDataset
dataroot_gt: datasets/Set5/GTmod12
dataroot_lq: datasets/Set5/LRbicx2
io_backend:
type: disk
# network structures
network_g:
type: RCAN
num_in_ch: 3
num_out_ch: 3
num_feat: 64
num_group: 10
num_block: 20
squeeze_factor: 16
upscale: 2
res_scale: 1
img_range: 255.
rgb_mean: [0.4488, 0.4371, 0.4040]
# path
path:
pretrain_network_g: ~
strict_load_g: true
resume_state: ~
# training settings
train:
ema_decay: 0.999
optim_g:
type: Adam
lr: !!float 1e-4
weight_decay: 0
betas: [0.9, 0.99]
scheduler:
type: MultiStepLR
milestones: [200000]
gamma: 0.5
total_iter: 300000
warmup_iter: -1 # no warm up
# losses
pixel_opt:
type: L1Loss
loss_weight: 1.0
reduction: mean
# validation settings
val:
val_freq: !!float 5e3
save_img: false
metrics:
psnr: # metric name, can be arbitrary
type: calculate_psnr
crop_border: 2
test_y_channel: false
# logging settings
logger:
print_freq: 100
save_checkpoint_freq: !!float 5e3
use_tb_logger: true
wandb:
project: ~
resume_id: ~
# dist training settings
dist_params:
backend: nccl
port: 29500
# general settings
name: train_RealESRGANx2plus_400k_B12G4
model_type: RealESRGANModel
scale: 2
num_gpu: auto # auto: can infer from your visible devices automatically. official: 4 GPUs
manual_seed: 0
# ----------------- options for synthesizing training data in RealESRGANModel ----------------- #
# USM the ground-truth
l1_gt_usm: True
percep_gt_usm: True
gan_gt_usm: False
# the first degradation process
resize_prob: [0.2, 0.7, 0.1] # up, down, keep
resize_range: [0.15, 1.5]
gaussian_noise_prob: 0.5
noise_range: [1, 30]
poisson_scale_range: [0.05, 3]
gray_noise_prob: 0.4
jpeg_range: [30, 95]
# the second degradation process
second_blur_prob: 0.8
resize_prob2: [0.3, 0.4, 0.3] # up, down, keep
resize_range2: [0.3, 1.2]
gaussian_noise_prob2: 0.5
noise_range2: [1, 25]
poisson_scale_range2: [0.05, 2.5]
gray_noise_prob2: 0.4
jpeg_range2: [30, 95]
gt_size: 256
queue_size: 180
# dataset and data loader settings
datasets:
train:
name: DF2K+OST
type: RealESRGANDataset
dataroot_gt: datasets/DF2K
meta_info: datasets/DF2K/meta_info/meta_info_DF2Kmultiscale+OST_sub.txt
io_backend:
type: disk
blur_kernel_size: 21
kernel_list: ['iso', 'aniso', 'generalized_iso', 'generalized_aniso', 'plateau_iso', 'plateau_aniso']
kernel_prob: [0.45, 0.25, 0.12, 0.03, 0.12, 0.03]
sinc_prob: 0.1
blur_sigma: [0.2, 3]
betag_range: [0.5, 4]
betap_range: [1, 2]
blur_kernel_size2: 21
kernel_list2: ['iso', 'aniso', 'generalized_iso', 'generalized_aniso', 'plateau_iso', 'plateau_aniso']
kernel_prob2: [0.45, 0.25, 0.12, 0.03, 0.12, 0.03]
sinc_prob2: 0.1
blur_sigma2: [0.2, 1.5]
betag_range2: [0.5, 4]
betap_range2: [1, 2]
final_sinc_prob: 0.8
gt_size: 256
use_hflip: True
use_rot: False
# data loader
num_worker_per_gpu: 5
batch_size_per_gpu: 12
dataset_enlarge_ratio: 1
prefetch_mode: ~
# Uncomment these for validation
# val:
# name: validation
# type: PairedImageDataset
# dataroot_gt: path_to_gt
# dataroot_lq: path_to_lq
# io_backend:
# type: disk
# network structures
network_g:
type: RRDBNet
num_in_ch: 3
num_out_ch: 3
num_feat: 64
num_block: 23
num_grow_ch: 32
scale: 2
network_d:
type: UNetDiscriminatorSN
num_in_ch: 3
num_feat: 64
skip_connection: True
# path
path:
# use the pre-trained Real-ESRNet model
pretrain_network_g: experiments/pretrained_models/RealESRNet_x2plus.pth
param_key_g: params_ema
strict_load_g: true
resume_state: ~
# training settings
train:
ema_decay: 0.999
optim_g:
type: Adam
lr: !!float 1e-4
weight_decay: 0
betas: [0.9, 0.99]
optim_d:
type: Adam
lr: !!float 1e-4
weight_decay: 0
betas: [0.9, 0.99]
scheduler:
type: MultiStepLR
milestones: [400000]
gamma: 0.5
total_iter: 400000
warmup_iter: -1 # no warm up
# losses
pixel_opt:
type: L1Loss
loss_weight: 1.0
reduction: mean
# perceptual loss (content and style losses)
perceptual_opt:
type: PerceptualLoss
layer_weights:
# before relu
'conv1_2': 0.1
'conv2_2': 0.1
'conv3_4': 1
'conv4_4': 1
'conv5_4': 1
vgg_type: vgg19
use_input_norm: true
perceptual_weight: !!float 1.0
style_weight: 0
range_norm: false
criterion: l1
# gan loss
gan_opt:
type: GANLoss
gan_type: vanilla
real_label_val: 1.0
fake_label_val: 0.0
loss_weight: !!float 1e-1
net_d_iters: 1
net_d_init_iters: 0
# Uncomment these for validation
# validation settings
# val:
# val_freq: !!float 5e3
# save_img: True
# metrics:
# psnr: # metric name
# type: calculate_psnr
# crop_border: 4
# test_y_channel: false
# logging settings
logger:
print_freq: 100
save_checkpoint_freq: !!float 5e3
use_tb_logger: true
wandb:
project: ~
resume_id: ~
# dist training settings
dist_params:
backend: nccl
port: 29500
# general settings
name: train_RealESRGANx4plus_400k_B12G4
model_type: RealESRGANModel
scale: 4
num_gpu: auto # auto: can infer from your visible devices automatically. official: 4 GPUs
manual_seed: 0
# ----------------- options for synthesizing training data in RealESRGANModel ----------------- #
# USM the ground-truth
l1_gt_usm: True
percep_gt_usm: True
gan_gt_usm: False
# the first degradation process
resize_prob: [0.2, 0.7, 0.1] # up, down, keep
resize_range: [0.15, 1.5]
gaussian_noise_prob: 0.5
noise_range: [1, 30]
poisson_scale_range: [0.05, 3]
gray_noise_prob: 0.4
jpeg_range: [30, 95]
# the second degradation process
second_blur_prob: 0.8
resize_prob2: [0.3, 0.4, 0.3] # up, down, keep
resize_range2: [0.3, 1.2]
gaussian_noise_prob2: 0.5
noise_range2: [1, 25]
poisson_scale_range2: [0.05, 2.5]
gray_noise_prob2: 0.4
jpeg_range2: [30, 95]
gt_size: 256
queue_size: 180
# dataset and data loader settings
datasets:
train:
name: DF2K+OST
type: RealESRGANDataset
dataroot_gt: datasets/DF2K
meta_info: datasets/DF2K/meta_info/meta_info_DF2Kmultiscale+OST_sub.txt
io_backend:
type: disk
blur_kernel_size: 21
kernel_list: ['iso', 'aniso', 'generalized_iso', 'generalized_aniso', 'plateau_iso', 'plateau_aniso']
kernel_prob: [0.45, 0.25, 0.12, 0.03, 0.12, 0.03]
sinc_prob: 0.1
blur_sigma: [0.2, 3]
betag_range: [0.5, 4]
betap_range: [1, 2]
blur_kernel_size2: 21
kernel_list2: ['iso', 'aniso', 'generalized_iso', 'generalized_aniso', 'plateau_iso', 'plateau_aniso']
kernel_prob2: [0.45, 0.25, 0.12, 0.03, 0.12, 0.03]
sinc_prob2: 0.1
blur_sigma2: [0.2, 1.5]
betag_range2: [0.5, 4]
betap_range2: [1, 2]
final_sinc_prob: 0.8
gt_size: 256
use_hflip: True
use_rot: False
# data loader
num_worker_per_gpu: 5
batch_size_per_gpu: 12
dataset_enlarge_ratio: 1
prefetch_mode: ~
# Uncomment these for validation
# val:
# name: validation
# type: PairedImageDataset
# dataroot_gt: path_to_gt
# dataroot_lq: path_to_lq
# io_backend:
# type: disk
# network structures
network_g:
type: RRDBNet
num_in_ch: 3
num_out_ch: 3
num_feat: 64
num_block: 23
num_grow_ch: 32
network_d:
type: UNetDiscriminatorSN
num_in_ch: 3
num_feat: 64
skip_connection: True
# path
path:
# use the pre-trained Real-ESRNet model
pretrain_network_g: experiments/pretrained_models/RealESRNet_x4plus.pth
param_key_g: params_ema
strict_load_g: true
resume_state: ~
# training settings
train:
ema_decay: 0.999
optim_g:
type: Adam
lr: !!float 1e-4
weight_decay: 0
betas: [0.9, 0.99]
optim_d:
type: Adam
lr: !!float 1e-4
weight_decay: 0
betas: [0.9, 0.99]
scheduler:
type: MultiStepLR
milestones: [400000]
gamma: 0.5
total_iter: 400000
warmup_iter: -1 # no warm up
# losses
pixel_opt:
type: L1Loss
loss_weight: 1.0
reduction: mean
# perceptual loss (content and style losses)
perceptual_opt:
type: PerceptualLoss
layer_weights:
# before relu
'conv1_2': 0.1
'conv2_2': 0.1
'conv3_4': 1
'conv4_4': 1
'conv5_4': 1
vgg_type: vgg19
use_input_norm: true
perceptual_weight: !!float 1.0
style_weight: 0
range_norm: false
criterion: l1
# gan loss
gan_opt:
type: GANLoss
gan_type: vanilla
real_label_val: 1.0
fake_label_val: 0.0
loss_weight: !!float 1e-1
net_d_iters: 1
net_d_init_iters: 0
# Uncomment these for validation
# validation settings
# val:
# val_freq: !!float 5e3
# save_img: True
# metrics:
# psnr: # metric name
# type: calculate_psnr
# crop_border: 4
# test_y_channel: false
# logging settings
logger:
print_freq: 100
save_checkpoint_freq: !!float 5e3
use_tb_logger: true
wandb:
project: ~
resume_id: ~
# dist training settings
dist_params:
backend: nccl
port: 29500
# general settings
name: train_RealESRNetx2plus_1000k_B12G4
model_type: RealESRNetModel
scale: 2
num_gpu: auto # auto: can infer from your visible devices automatically. official: 4 GPUs
manual_seed: 0
# ----------------- options for synthesizing training data in RealESRNetModel ----------------- #
gt_usm: True # USM the ground-truth
# the first degradation process
resize_prob: [0.2, 0.7, 0.1] # up, down, keep
resize_range: [0.15, 1.5]
gaussian_noise_prob: 0.5
noise_range: [1, 30]
poisson_scale_range: [0.05, 3]
gray_noise_prob: 0.4
jpeg_range: [30, 95]
# the second degradation process
second_blur_prob: 0.8
resize_prob2: [0.3, 0.4, 0.3] # up, down, keep
resize_range2: [0.3, 1.2]
gaussian_noise_prob2: 0.5
noise_range2: [1, 25]
poisson_scale_range2: [0.05, 2.5]
gray_noise_prob2: 0.4
jpeg_range2: [30, 95]
gt_size: 256
queue_size: 180
# dataset and data loader settings
datasets:
train:
name: DF2K+OST
type: RealESRGANDataset
dataroot_gt: datasets/DF2K
meta_info: datasets/DF2K/meta_info/meta_info_DF2Kmultiscale+OST_sub.txt
io_backend:
type: disk
blur_kernel_size: 21
kernel_list: ['iso', 'aniso', 'generalized_iso', 'generalized_aniso', 'plateau_iso', 'plateau_aniso']
kernel_prob: [0.45, 0.25, 0.12, 0.03, 0.12, 0.03]
sinc_prob: 0.1
blur_sigma: [0.2, 3]
betag_range: [0.5, 4]
betap_range: [1, 2]
blur_kernel_size2: 21
kernel_list2: ['iso', 'aniso', 'generalized_iso', 'generalized_aniso', 'plateau_iso', 'plateau_aniso']
kernel_prob2: [0.45, 0.25, 0.12, 0.03, 0.12, 0.03]
sinc_prob2: 0.1
blur_sigma2: [0.2, 1.5]
betag_range2: [0.5, 4]
betap_range2: [1, 2]
final_sinc_prob: 0.8
gt_size: 256
use_hflip: True
use_rot: False
# data loader
num_worker_per_gpu: 5
batch_size_per_gpu: 12
dataset_enlarge_ratio: 1
prefetch_mode: ~
# Uncomment these for validation
# val:
# name: validation
# type: PairedImageDataset
# dataroot_gt: path_to_gt
# dataroot_lq: path_to_lq
# io_backend:
# type: disk
# network structures
network_g:
type: RRDBNet
num_in_ch: 3
num_out_ch: 3
num_feat: 64
num_block: 23
num_grow_ch: 32
scale: 2
# path
path:
pretrain_network_g: experiments/pretrained_models/ESRGAN/RealESRGAN_x4plus.pth
param_key_g: params_ema
strict_load_g: False
resume_state: ~
# training settings
train:
ema_decay: 0.999
optim_g:
type: Adam
lr: !!float 2e-4
weight_decay: 0
betas: [0.9, 0.99]
scheduler:
type: MultiStepLR
milestones: [1000000]
gamma: 0.5
total_iter: 1000000
warmup_iter: -1 # no warm up
# losses
pixel_opt:
type: L1Loss
loss_weight: 1.0
reduction: mean
# Uncomment these for validation
# validation settings
# val:
# val_freq: !!float 5e3
# save_img: True
# metrics:
# psnr: # metric name
# type: calculate_psnr
# crop_border: 4
# test_y_channel: false
# logging settings
logger:
print_freq: 100
save_checkpoint_freq: !!float 5e3
use_tb_logger: true
wandb:
project: ~
resume_id: ~
# dist training settings
dist_params:
backend: nccl
port: 29500
# general settings
name: train_RealESRNetx4plus_1000k_B12G4
model_type: RealESRNetModel
scale: 4
num_gpu: auto # auto: can infer from your visible devices automatically. official: 4 GPUs
manual_seed: 0
# ----------------- options for synthesizing training data in RealESRNetModel ----------------- #
gt_usm: True # USM the ground-truth
# the first degradation process
resize_prob: [0.2, 0.7, 0.1] # up, down, keep
resize_range: [0.15, 1.5]
gaussian_noise_prob: 0.5
noise_range: [1, 30]
poisson_scale_range: [0.05, 3]
gray_noise_prob: 0.4
jpeg_range: [30, 95]
# the second degradation process
second_blur_prob: 0.8
resize_prob2: [0.3, 0.4, 0.3] # up, down, keep
resize_range2: [0.3, 1.2]
gaussian_noise_prob2: 0.5
noise_range2: [1, 25]
poisson_scale_range2: [0.05, 2.5]
gray_noise_prob2: 0.4
jpeg_range2: [30, 95]
gt_size: 256
queue_size: 180
# dataset and data loader settings
datasets:
train:
name: DF2K+OST
type: RealESRGANDataset
dataroot_gt: datasets/DF2K
meta_info: datasets/DF2K/meta_info/meta_info_DF2Kmultiscale+OST_sub.txt
io_backend:
type: disk
blur_kernel_size: 21
kernel_list: ['iso', 'aniso', 'generalized_iso', 'generalized_aniso', 'plateau_iso', 'plateau_aniso']
kernel_prob: [0.45, 0.25, 0.12, 0.03, 0.12, 0.03]
sinc_prob: 0.1
blur_sigma: [0.2, 3]
betag_range: [0.5, 4]
betap_range: [1, 2]
blur_kernel_size2: 21
kernel_list2: ['iso', 'aniso', 'generalized_iso', 'generalized_aniso', 'plateau_iso', 'plateau_aniso']
kernel_prob2: [0.45, 0.25, 0.12, 0.03, 0.12, 0.03]
sinc_prob2: 0.1
blur_sigma2: [0.2, 1.5]
betag_range2: [0.5, 4]
betap_range2: [1, 2]
final_sinc_prob: 0.8
gt_size: 256
use_hflip: True
use_rot: False
# data loader
num_worker_per_gpu: 5
batch_size_per_gpu: 12
dataset_enlarge_ratio: 1
prefetch_mode: ~
# Uncomment these for validation
# val:
# name: validation
# type: PairedImageDataset
# dataroot_gt: path_to_gt
# dataroot_lq: path_to_lq
# io_backend:
# type: disk
# network structures
network_g:
type: RRDBNet
num_in_ch: 3
num_out_ch: 3
num_feat: 64
num_block: 23
num_grow_ch: 32
# path
path:
pretrain_network_g: experiments/pretrained_models/ESRGAN/ESRGAN_SRx4_DF2KOST_official-ff704c30.pth
param_key_g: params_ema
strict_load_g: true
resume_state: ~
# training settings
train:
ema_decay: 0.999
optim_g:
type: Adam
lr: !!float 2e-4
weight_decay: 0
betas: [0.9, 0.99]
scheduler:
type: MultiStepLR
milestones: [1000000]
gamma: 0.5
total_iter: 1000000
warmup_iter: -1 # no warm up
# losses
pixel_opt:
type: L1Loss
loss_weight: 1.0
reduction: mean
# Uncomment these for validation
# validation settings
# val:
# val_freq: !!float 5e3
# save_img: True
# metrics:
# psnr: # metric name
# type: calculate_psnr
# crop_border: 4
# test_y_channel: false
# logging settings
logger:
print_freq: 100
save_checkpoint_freq: !!float 5e3
use_tb_logger: true
wandb:
project: ~
resume_id: ~
# dist training settings
dist_params:
backend: nccl
port: 29500
# SRResNet and SRGAN
Experiment results and pre-trained model descriptions are put here.
# Modified SRResNet w/o BN from:
# Photo-Realistic Single Image Super-Resolution Using a Generative Adversarial Network
# Use perceptual loss before ReLU as ESRGAN does
# general settings
name: 004_MSRGAN_x4_f64b16_DIV2K_400k_B16G1_wandb
model_type: SRGANModel
scale: 4
num_gpu: 1 # set num_gpu: 0 for cpu mode
manual_seed: 0
# dataset and data loader settings
datasets:
train:
name: DIV2K
type: PairedImageDataset
dataroot_gt: datasets/DIV2K/DIV2K_train_HR_sub
dataroot_lq: datasets/DIV2K/DIV2K_train_LR_bicubic/X4_sub
# (for lmdb)
# dataroot_gt: datasets/DIV2K/DIV2K_train_HR_sub.lmdb
# dataroot_lq: datasets/DIV2K/DIV2K_train_LR_bicubic_X4_sub.lmdb
filename_tmpl: '{}'
io_backend:
type: disk
# (for lmdb)
# type: lmdb
gt_size: 128
use_hflip: true
use_rot: true
# data loader
num_worker_per_gpu: 6
batch_size_per_gpu: 16
dataset_enlarge_ratio: 100
prefetch_mode: ~
val:
name: Set14
type: PairedImageDataset
dataroot_gt: datasets/Set14/GTmod12
dataroot_lq: datasets/Set14/LRbicx4
io_backend:
type: disk
# network structures
network_g:
type: MSRResNet
num_in_ch: 3
num_out_ch: 3
num_feat: 64
num_block: 16
upscale: 4
network_d:
type: VGGStyleDiscriminator
num_in_ch: 3
num_feat: 64
# path
path:
pretrain_network_g: experiments/001_MSRResNet_x4_f64b16_DIV2K_1000k_B16G1_wandb/models/net_g_1000000.pth
strict_load_g: true
resume_state: ~
# training settings
train:
ema_decay: 0.999
optim_g:
type: Adam
lr: !!float 1e-4
weight_decay: 0
betas: [0.9, 0.99]
optim_d:
type: Adam
lr: !!float 1e-4
weight_decay: 0
betas: [0.9, 0.99]
scheduler:
type: MultiStepLR
milestones: [50000, 100000, 200000, 300000]
gamma: 0.5
total_iter: 400000
warmup_iter: -1 # no warm up
# losses
pixel_opt:
type: L1Loss
loss_weight: !!float 1e-2
reduction: mean
perceptual_opt:
type: PerceptualLoss
layer_weights:
'conv5_4': 1 # before relu
vgg_type: vgg19
use_input_norm: true
range_norm: false
perceptual_weight: 1.0
style_weight: 0
criterion: l1
gan_opt:
type: GANLoss
gan_type: vanilla
real_label_val: 1.0
fake_label_val: 0.0
loss_weight: !!float 5e-3
net_d_iters: 1
net_d_init_iters: 0
# validation settings
val:
val_freq: !!float 5e3
save_img: true
metrics:
psnr: # metric name, can be arbitrary
type: calculate_psnr
crop_border: 4
test_y_channel: false
# logging settings
logger:
print_freq: 100
save_checkpoint_freq: !!float 5e3
use_tb_logger: true
wandb:
project: ~
resume_id: ~
# dist training settings
dist_params:
backend: nccl
port: 29500
# Modified SRResNet w/o BN from:
# Photo-Realistic Single Image Super-Resolution Using a Generative Adversarial Network
# general settings
name: 002_MSRResNet_x2_f64b16_DIV2K_1000k_B16G1_001pretrain_wandb
model_type: SRModel
scale: 2
num_gpu: 1 # set num_gpu: 0 for cpu mode
manual_seed: 0
# dataset and data loader settings
datasets:
train:
name: DIV2K
type: PairedImageDataset
dataroot_gt: datasets/DIV2K/DIV2K_train_HR_sub
dataroot_lq: datasets/DIV2K/DIV2K_train_LR_bicubic/X2_sub
# (for lmdb)
# dataroot_gt: datasets/DIV2K/DIV2K_train_HR_sub.lmdb
# dataroot_lq: datasets/DIV2K/DIV2K_train_LR_bicubic_X2_sub.lmdb
filename_tmpl: '{}'
io_backend:
type: disk
# (for lmdb)
# type: lmdb
gt_size: 64
use_hflip: true
use_rot: true
# data loader
num_worker_per_gpu: 6
batch_size_per_gpu: 16
dataset_enlarge_ratio: 100
prefetch_mode: ~
val:
name: Set5
type: PairedImageDataset
dataroot_gt: datasets/Set5/GTmod12
dataroot_lq: datasets/Set5/LRbicx2
io_backend:
type: disk
# network structures
network_g:
type: MSRResNet
num_in_ch: 3
num_out_ch: 3
num_feat: 64
num_block: 16
upscale: 2
# path
path:
pretrain_network_g: experiments/001_MSRResNet_x4_f64b16_DIV2K_1000k_B16G1_wandb/models/net_g_1000000.pth
strict_load_g: false
resume_state: ~
# training settings
train:
ema_decay: 0.999
optim_g:
type: Adam
lr: !!float 2e-4
weight_decay: 0
betas: [0.9, 0.99]
scheduler:
type: CosineAnnealingRestartLR
periods: [250000, 250000, 250000, 250000]
restart_weights: [1, 1, 1, 1]
eta_min: !!float 1e-7
total_iter: 1000000
warmup_iter: -1 # no warm up
# losses
pixel_opt:
type: L1Loss
loss_weight: 1.0
reduction: mean
# validation settings
val:
val_freq: !!float 5e3
save_img: false
metrics:
psnr: # metric name, can be arbitrary
type: calculate_psnr
crop_border: 2
test_y_channel: false
# logging settings
logger:
print_freq: 100
save_checkpoint_freq: !!float 5e3
use_tb_logger: true
wandb:
project: ~
resume_id: ~
# dist training settings
dist_params:
backend: nccl
port: 29500
# Modified SRResNet w/o BN from:
# Photo-Realistic Single Image Super-Resolution Using a Generative Adversarial Network
# general settings
name: 003_MSRResNet_x3_f64b16_DIV2K_1000k_B16G1_001pretrain_wandb
model_type: SRModel
scale: 3
num_gpu: 1 # set num_gpu: 0 for cpu mode
manual_seed: 0
# dataset and data loader settings
datasets:
train:
name: DIV2K
type: PairedImageDataset
dataroot_gt: datasets/DIV2K/DIV2K_train_HR_sub
dataroot_lq: datasets/DIV2K/DIV2K_train_LR_bicubic/X3_sub
# (for lmdb)
# dataroot_gt: datasets/DIV2K/DIV2K_train_HR_sub.lmdb
# dataroot_lq: datasets/DIV2K/DIV2K_train_LR_bicubic_X3_sub.lmdb
filename_tmpl: '{}'
io_backend:
type: disk
# (for lmdb)
# type: lmdb
gt_size: 96
use_hflip: true
use_rot: true
# data loader
num_worker_per_gpu: 6
batch_size_per_gpu: 16
dataset_enlarge_ratio: 100
prefetch_mode: ~
val:
name: Set5
type: PairedImageDataset
dataroot_gt: datasets/Set5/GTmod12
dataroot_lq: datasets/Set5/LRbicx3
io_backend:
type: disk
# network structures
network_g:
type: MSRResNet
num_in_ch: 3
num_out_ch: 3
num_feat: 64
num_block: 16
upscale: 3
# path
path:
pretrain_network_g: experiments/001_MSRResNet_x4_f64b16_DIV2K_1000k_B16G1_wandb/models/net_g_1000000.pth
strict_load_g: false
resume_state: ~
# training settings
train:
ema_decay: 0.999
optim_g:
type: Adam
lr: !!float 2e-4
weight_decay: 0
betas: [0.9, 0.99]
scheduler:
type: CosineAnnealingRestartLR
periods: [250000, 250000, 250000, 250000]
restart_weights: [1, 1, 1, 1]
eta_min: !!float 1e-7
total_iter: 1000000
warmup_iter: -1 # no warm up
# losses
pixel_opt:
type: L1Loss
loss_weight: 1.0
reduction: mean
# validation settings
val:
val_freq: !!float 5e3
save_img: false
metrics:
psnr: # metric name, can be arbitrary
type: calculate_psnr
crop_border: 3
test_y_channel: false
# logging settings
logger:
print_freq: 100
save_checkpoint_freq: !!float 5e3
use_tb_logger: true
wandb:
project: ~
resume_id: ~
# dist training settings
dist_params:
backend: nccl
port: 29500
# Modified SRResNet w/o BN from:
# Photo-Realistic Single Image Super-Resolution Using a Generative Adversarial Network
# ----------- Commands for running
# ----------- Single GPU with auto_resume
# PYTHONPATH="./:${PYTHONPATH}" CUDA_VISIBLE_DEVICES=0 python basicsr/train.py -opt options/train/SRResNet_SRGAN/train_MSRResNet_x4.yml --auto_resume
# general settings
name: 001_MSRResNet_x4_f64b16_DIV2K_1000k_B16G1_wandb
model_type: SRModel
scale: 4
num_gpu: 1 # set num_gpu: 0 for cpu mode
manual_seed: 0
# dataset and data loader settings
datasets:
train:
name: DIV2K
type: PairedImageDataset
dataroot_gt: datasets/DF2K/DIV2K_train_HR_sub
dataroot_lq: datasets/DF2K/DIV2K_train_LR_bicubic_X4_sub
meta_info_file: basicsr/data/meta_info/meta_info_DIV2K800sub_GT.txt
# (for lmdb)
# dataroot_gt: datasets/DIV2K/DIV2K_train_HR_sub.lmdb
# dataroot_lq: datasets/DIV2K/DIV2K_train_LR_bicubic_X4_sub.lmdb
filename_tmpl: '{}'
io_backend:
type: disk
# (for lmdb)
# type: lmdb
gt_size: 128
use_hflip: true
use_rot: true
# data loader
num_worker_per_gpu: 6
batch_size_per_gpu: 16
dataset_enlarge_ratio: 100
prefetch_mode: ~
val:
name: Set5
type: PairedImageDataset
dataroot_gt: datasets/Set5/GTmod12
dataroot_lq: datasets/Set5/LRbicx4
io_backend:
type: disk
val_2:
name: Set14
type: PairedImageDataset
dataroot_gt: datasets/Set14/GTmod12
dataroot_lq: datasets/Set14/LRbicx4
io_backend:
type: disk
# network structures
network_g:
type: MSRResNet
num_in_ch: 3
num_out_ch: 3
num_feat: 64
num_block: 16
upscale: 4
# path
path:
pretrain_network_g: ~
param_key_g: params
strict_load_g: true
resume_state: ~
# training settings
train:
ema_decay: 0.999
optim_g:
type: Adam
lr: !!float 2e-4
weight_decay: 0
betas: [0.9, 0.99]
scheduler:
type: CosineAnnealingRestartLR
periods: [250000, 250000, 250000, 250000]
restart_weights: [1, 1, 1, 1]
eta_min: !!float 1e-7
total_iter: 1000000
warmup_iter: -1 # no warm up
# losses
pixel_opt:
type: L1Loss
loss_weight: 1.0
reduction: mean
# validation settings
val:
val_freq: !!float 5e3
save_img: false
metrics:
psnr: # metric name, can be arbitrary
type: calculate_psnr
crop_border: 4
test_y_channel: false
better: higher # the higher, the better. Default: higher
niqe:
type: calculate_niqe
crop_border: 4
better: lower # the lower, the better
# logging settings
logger:
print_freq: 100
save_checkpoint_freq: !!float 5e3
use_tb_logger: true
wandb:
project: ~
resume_id: ~
# dist training settings
dist_params:
backend: nccl
port: 29500
# general settings
name: 501_StyleGAN2_256_Cmul2_FFHQ_800k_B24G8_scratch
model_type: StyleGAN2Model
num_gpu: 8 # set num_gpu: 0 for cpu mode
manual_seed: 0
# dataset and data loader settings
datasets:
train:
name: FFHQ
type: FFHQDataset
dataroot_gt: datasets/ffhq/ffhq_256.lmdb
io_backend:
type: lmdb
use_hflip: true
mean: [0.5, 0.5, 0.5]
std: [0.5, 0.5, 0.5]
# data loader
num_worker_per_gpu: 6
batch_size_per_gpu: 3
dataset_enlarge_ratio: 100
prefetch_mode: ~
# network structures
network_g:
type: StyleGAN2Generator
out_size: 256
num_style_feat: 512
num_mlp: 8
channel_multiplier: 2
resample_kernel: [1, 3, 3, 1]
lr_mlp: 0.01
network_d:
type: StyleGAN2Discriminator
out_size: 256
channel_multiplier: 2
resample_kernel: [1, 3, 3, 1]
# path
path:
pretrain_network_g: ~
strict_load_g: true
resume_state: ~
# training settings
train:
optim_g:
type: Adam
lr: !!float 2e-3
optim_d:
type: Adam
lr: !!float 2e-3
scheduler:
type: MultiStepLR
milestones: [600000]
gamma: 0.5
total_iter: 800000
warmup_iter: -1 # no warm up
# losses
gan_opt:
type: GANLoss
gan_type: wgan_softplus
loss_weight: !!float 1
# r1 regularization for discriminator
r1_reg_weight: 10
# path length regularization for generator
path_batch_shrink: 2
path_reg_weight: 2
net_g_reg_every: 4
net_d_reg_every: 16
mixing_prob: 0.9
net_d_iters: 1
net_d_init_iters: 0
# validation settings
val:
val_freq: !!float 5e3
save_img: true
# logging settings
logger:
print_freq: 100
save_checkpoint_freq: !!float 5e3
use_tb_logger: true
wandb:
project: ~
resume_id: ~
# dist training settings
dist_params:
backend: nccl
port: 29500
# general settings
name: train_SwinIR_SRx2_scratch_P48W8_DIV2K_500k_B4G8
model_type: SwinIRModel
scale: 2
num_gpu: auto
manual_seed: 0
# dataset and data loader settings
datasets:
train:
name: DIV2K
type: PairedImageDataset
dataroot_gt: datasets/DF2K/DIV2K_train_HR_sub
dataroot_lq: datasets/DF2K/DIV2K_train_LR_bicubic_X2_sub
meta_info_file: basicsr/data/meta_info/meta_info_DIV2K800sub_GT.txt
filename_tmpl: '{}'
io_backend:
type: disk
gt_size: 96
use_hflip: true
use_rot: true
# data loader
num_worker_per_gpu: 6
batch_size_per_gpu: 4
dataset_enlarge_ratio: 1
prefetch_mode: ~
val:
name: Set5
type: PairedImageDataset
dataroot_gt: datasets/Set5/GTmod12
dataroot_lq: datasets/Set5/LRbicx2
io_backend:
type: disk
# network structures
network_g:
type: SwinIR
upscale: 2
in_chans: 3
img_size: 48
window_size: 8
img_range: 1.
depths: [6, 6, 6, 6, 6, 6]
embed_dim: 180
num_heads: [6, 6, 6, 6, 6, 6]
mlp_ratio: 2
upsampler: 'pixelshuffle'
resi_connection: '1conv'
# path
path:
pretrain_network_g: ~
strict_load_g: true
resume_state: ~
# training settings
train:
ema_decay: 0.999
optim_g:
type: Adam
lr: !!float 2e-4
weight_decay: 0
betas: [0.9, 0.99]
scheduler:
type: MultiStepLR
milestones: [250000, 400000, 450000, 475000]
gamma: 0.5
total_iter: 500000
warmup_iter: -1 # no warm up
# losses
pixel_opt:
type: L1Loss
loss_weight: 1.0
reduction: mean
# validation settings
val:
val_freq: !!float 5e3
save_img: false
metrics:
psnr: # metric name, can be arbitrary
type: calculate_psnr
crop_border: 2
test_y_channel: false
# logging settings
logger:
print_freq: 100
save_checkpoint_freq: !!float 5e3
use_tb_logger: true
wandb:
project: ~
resume_id: ~
# dist training settings
dist_params:
backend: nccl
port: 29500
# general settings
name: train_SwinIR_SRx4_scratch_P48W8_DIV2K_500k_B4G8
model_type: SwinIRModel
scale: 4
num_gpu: auto
manual_seed: 0
# dataset and data loader settings
datasets:
train:
name: DIV2K
type: PairedImageDataset
dataroot_gt: datasets/DF2K/DIV2K_train_HR_sub
dataroot_lq: datasets/DF2K/DIV2K_train_LR_bicubic_X4_sub
meta_info_file: basicsr/data/meta_info/meta_info_DIV2K800sub_GT.txt
filename_tmpl: '{}'
io_backend:
type: disk
gt_size: 192
use_hflip: true
use_rot: true
# data loader
num_worker_per_gpu: 6
batch_size_per_gpu: 4
dataset_enlarge_ratio: 1
prefetch_mode: ~
val:
name: Set5
type: PairedImageDataset
dataroot_gt: datasets/Set5/GTmod12
dataroot_lq: datasets/Set5/LRbicx4
io_backend:
type: disk
# network structures
network_g:
type: SwinIR
upscale: 4
in_chans: 3
img_size: 48
window_size: 8
img_range: 1.
depths: [6, 6, 6, 6, 6, 6]
embed_dim: 180
num_heads: [6, 6, 6, 6, 6, 6]
mlp_ratio: 2
upsampler: 'pixelshuffle'
resi_connection: '1conv'
# path
path:
pretrain_network_g: ~
strict_load_g: true
resume_state: ~
# training settings
train:
ema_decay: 0.999
optim_g:
type: Adam
lr: !!float 2e-4
weight_decay: 0
betas: [0.9, 0.99]
scheduler:
type: MultiStepLR
milestones: [250000, 400000, 450000, 475000]
gamma: 0.5
total_iter: 500000
warmup_iter: -1 # no warm up
# losses
pixel_opt:
type: L1Loss
loss_weight: 1.0
reduction: mean
# validation settings
val:
val_freq: !!float 5e3
save_img: false
metrics:
psnr: # metric name, can be arbitrary
type: calculate_psnr
crop_border: 4
test_y_channel: false
# logging settings
logger:
print_freq: 100
save_checkpoint_freq: !!float 5e3
use_tb_logger: true
wandb:
project: ~
resume_id: ~
# dist training settings
dist_params:
backend: nccl
port: 29500
# general settings
name: VideoRecurrentGANModel_REDS
model_type: VideoRecurrentGANModel
scale: 4
num_gpu: 1 # set num_gpu: 0 for cpu mode
manual_seed: 0
# dataset and data loader settings
datasets:
train:
name: REDS
type: REDSRecurrentDataset
dataroot_gt: datasets/REDS/train_sharp
dataroot_lq: datasets/REDS/train_sharp_bicubic
dataroot_flow: ~
meta_info_file: basicsr/data/meta_info/meta_info_REDS_GT.txt
val_partition: REDS4 # set to 'official' when use the official validation partition
test_mode: False
io_backend:
type: disk
num_frame: 15
gt_size: 256
interval_list: [1]
random_reverse: false
use_hflip: true
use_rot: true
# data loader
num_worker_per_gpu: 6
batch_size_per_gpu: 4
dataset_enlarge_ratio: 200
prefetch_mode: ~
val:
name: REDS4
type: VideoRecurrentTestDataset
dataroot_gt: datasets/REDS4/GT
dataroot_lq: datasets/REDS4/sharp_bicubic
cache_data: true
io_backend:
type: disk
num_frame: -1 # For recurrent, num_frame has no effect. Set to -1
# network structures
network_g:
type: BasicVSR
num_feat: 64
num_block: 30
spynet_path: experiments/pretrained_models/flownet/spynet_sintel_final-3d2a1287.pth
network_d:
type: VGGStyleDiscriminator
num_in_ch: 3
num_feat: 32
input_size: 256
# path
path:
pretrain_network_g: experiments/pretrained_models/BasicVSR_REDS4.pth
strict_load_g: true
resume_state: ~
# training settings
train:
ema_decay: 0.999
optim_g:
type: Adam
lr: !!float 1e-4
weight_decay: 0
betas: [0.9, 0.99]
lr_flow: !!float 0
optim_d:
type: Adam
lr: !!float 1e-4
weight_decay: 0
betas: [0.9, 0.99]
scheduler:
type: MultiStepLR
milestones: [50000, 100000, 200000, 300000]
gamma: 0.5
total_iter: 400000
warmup_iter: -1 # no warm up
fix_flow: ~
# losses
pixel_opt:
type: L1Loss
loss_weight: !!float 1e-2
reduction: mean
perceptual_opt:
type: PerceptualLoss
layer_weights:
'conv5_4': 1 # before relu
vgg_type: vgg19
use_input_norm: true
range_norm: false
perceptual_weight: 1.0
style_weight: 0
criterion: l1
gan_opt:
type: GANLoss
gan_type: vanilla
real_label_val: 1.0
fake_label_val: 0.0
loss_weight: !!float 5e-3
net_d_iters: 1
net_d_init_iters: 0
# validation settings
val:
val_freq: !!float 1e4
save_img: false
metrics:
psnr: # metric name, can be arbitrary
type: calculate_psnr
crop_border: 4
test_y_channel: false
# logging settings
logger:
print_freq: 100
save_checkpoint_freq: !!float 5e3
use_tb_logger: true
wandb:
project: ~
resume_id: ~
# dist training settings
dist_params:
backend: nccl
port: 29500
find_unused_parameters: true
addict
future
lmdb
numpy>=1.17
opencv-python
Pillow
pyyaml
requests
scikit-image
scipy
tb-nightly
torch>=1.7
torchvision
tqdm
yapf
import argparse
from os import path as osp
from basicsr.utils import scandir
from basicsr.utils.lmdb_util import make_lmdb_from_imgs
def create_lmdb_for_div2k():
"""Create lmdb files for DIV2K dataset.
Usage:
Before run this script, please run `extract_subimages.py`.
Typically, there are four folders to be processed for DIV2K dataset.
* DIV2K_train_HR_sub
* DIV2K_train_LR_bicubic/X2_sub
* DIV2K_train_LR_bicubic/X3_sub
* DIV2K_train_LR_bicubic/X4_sub
Remember to modify opt configurations according to your settings.
"""
# HR images
folder_path = 'datasets/DIV2K/DIV2K_train_HR_sub'
lmdb_path = 'datasets/DIV2K/DIV2K_train_HR_sub.lmdb'
img_path_list, keys = prepare_keys_div2k(folder_path)
make_lmdb_from_imgs(folder_path, lmdb_path, img_path_list, keys)
# LRx2 images
folder_path = 'datasets/DIV2K/DIV2K_train_LR_bicubic/X2_sub'
lmdb_path = 'datasets/DIV2K/DIV2K_train_LR_bicubic_X2_sub.lmdb'
img_path_list, keys = prepare_keys_div2k(folder_path)
make_lmdb_from_imgs(folder_path, lmdb_path, img_path_list, keys)
# LRx3 images
folder_path = 'datasets/DIV2K/DIV2K_train_LR_bicubic/X3_sub'
lmdb_path = 'datasets/DIV2K/DIV2K_train_LR_bicubic_X3_sub.lmdb'
img_path_list, keys = prepare_keys_div2k(folder_path)
make_lmdb_from_imgs(folder_path, lmdb_path, img_path_list, keys)
# LRx4 images
folder_path = 'datasets/DIV2K/DIV2K_train_LR_bicubic/X4_sub'
lmdb_path = 'datasets/DIV2K/DIV2K_train_LR_bicubic_X4_sub.lmdb'
img_path_list, keys = prepare_keys_div2k(folder_path)
make_lmdb_from_imgs(folder_path, lmdb_path, img_path_list, keys)
def prepare_keys_div2k(folder_path):
"""Prepare image path list and keys for DIV2K dataset.
Args:
folder_path (str): Folder path.
Returns:
list[str]: Image path list.
list[str]: Key list.
"""
print('Reading image path list ...')
img_path_list = sorted(list(scandir(folder_path, suffix='png', recursive=False)))
keys = [img_path.split('.png')[0] for img_path in sorted(img_path_list)]
return img_path_list, keys
def create_lmdb_for_reds():
"""Create lmdb files for REDS dataset.
Usage:
Before run this script, please run :file:`merge_reds_train_val.py`.
We take two folders for example:
* train_sharp
* train_sharp_bicubic
Remember to modify opt configurations according to your settings.
"""
# train_sharp
folder_path = 'datasets/REDS/train_sharp'
lmdb_path = 'datasets/REDS/train_sharp_with_val.lmdb'
img_path_list, keys = prepare_keys_reds(folder_path)
make_lmdb_from_imgs(folder_path, lmdb_path, img_path_list, keys, multiprocessing_read=True)
# train_sharp_bicubic
folder_path = 'datasets/REDS/train_sharp_bicubic'
lmdb_path = 'datasets/REDS/train_sharp_bicubic_with_val.lmdb'
img_path_list, keys = prepare_keys_reds(folder_path)
make_lmdb_from_imgs(folder_path, lmdb_path, img_path_list, keys, multiprocessing_read=True)
def prepare_keys_reds(folder_path):
"""Prepare image path list and keys for REDS dataset.
Args:
folder_path (str): Folder path.
Returns:
list[str]: Image path list.
list[str]: Key list.
"""
print('Reading image path list ...')
img_path_list = sorted(list(scandir(folder_path, suffix='png', recursive=True)))
keys = [v.split('.png')[0] for v in img_path_list] # example: 000/00000000
return img_path_list, keys
def create_lmdb_for_vimeo90k():
"""Create lmdb files for Vimeo90K dataset.
Usage:
Remember to modify opt configurations according to your settings.
"""
# GT
folder_path = 'datasets/vimeo90k/vimeo_septuplet/sequences'
lmdb_path = 'datasets/vimeo90k/vimeo90k_train_GT_only4th.lmdb'
train_list_path = 'datasets/vimeo90k/vimeo_septuplet/sep_trainlist.txt'
img_path_list, keys = prepare_keys_vimeo90k(folder_path, train_list_path, 'gt')
make_lmdb_from_imgs(folder_path, lmdb_path, img_path_list, keys, multiprocessing_read=True)
# LQ
folder_path = 'datasets/vimeo90k/vimeo_septuplet_matlabLRx4/sequences'
lmdb_path = 'datasets/vimeo90k/vimeo90k_train_LR7frames.lmdb'
train_list_path = 'datasets/vimeo90k/vimeo_septuplet/sep_trainlist.txt'
img_path_list, keys = prepare_keys_vimeo90k(folder_path, train_list_path, 'lq')
make_lmdb_from_imgs(folder_path, lmdb_path, img_path_list, keys, multiprocessing_read=True)
def prepare_keys_vimeo90k(folder_path, train_list_path, mode):
"""Prepare image path list and keys for Vimeo90K dataset.
Args:
folder_path (str): Folder path.
train_list_path (str): Path to the official train list.
mode (str): One of 'gt' or 'lq'.
Returns:
list[str]: Image path list.
list[str]: Key list.
"""
print('Reading image path list ...')
with open(train_list_path, 'r') as fin:
train_list = [line.strip() for line in fin]
img_path_list = []
keys = []
for line in train_list:
folder, sub_folder = line.split('/')
img_path_list.extend([osp.join(folder, sub_folder, f'im{j + 1}.png') for j in range(7)])
keys.extend([f'{folder}/{sub_folder}/im{j + 1}' for j in range(7)])
if mode == 'gt':
print('Only keep the 4th frame for the gt mode.')
img_path_list = [v for v in img_path_list if v.endswith('im4.png')]
keys = [v for v in keys if v.endswith('/im4')]
return img_path_list, keys
if __name__ == '__main__':
parser = argparse.ArgumentParser()
parser.add_argument(
'--dataset',
type=str,
help=("Options: 'DIV2K', 'REDS', 'Vimeo90K' You may need to modify the corresponding configurations in codes."))
args = parser.parse_args()
dataset = args.dataset.lower()
if dataset == 'div2k':
create_lmdb_for_div2k()
elif dataset == 'reds':
create_lmdb_for_reds()
elif dataset == 'vimeo90k':
create_lmdb_for_vimeo90k()
else:
raise ValueError('Wrong dataset.')
import argparse
import glob
import os
from os import path as osp
from basicsr.utils.download_util import download_file_from_google_drive
def download_dataset(dataset, file_ids):
save_path_root = './datasets/'
os.makedirs(save_path_root, exist_ok=True)
for file_name, file_id in file_ids.items():
save_path = osp.abspath(osp.join(save_path_root, file_name))
if osp.exists(save_path):
user_response = input(f'{file_name} already exist. Do you want to cover it? Y/N\n')
if user_response.lower() == 'y':
print(f'Covering {file_name} to {save_path}')
download_file_from_google_drive(file_id, save_path)
elif user_response.lower() == 'n':
print(f'Skipping {file_name}')
else:
raise ValueError('Wrong input. Only accepts Y/N.')
else:
print(f'Downloading {file_name} to {save_path}')
download_file_from_google_drive(file_id, save_path)
# unzip
if save_path.endswith('.zip'):
extracted_path = save_path.replace('.zip', '')
print(f'Extract {save_path} to {extracted_path}')
import zipfile
with zipfile.ZipFile(save_path, 'r') as zip_ref:
zip_ref.extractall(extracted_path)
file_name = file_name.replace('.zip', '')
subfolder = osp.join(extracted_path, file_name)
if osp.isdir(subfolder):
print(f'Move {subfolder} to {extracted_path}')
import shutil
for path in glob.glob(osp.join(subfolder, '*')):
shutil.move(path, extracted_path)
shutil.rmtree(subfolder)
if __name__ == '__main__':
parser = argparse.ArgumentParser()
parser.add_argument(
'dataset',
type=str,
help=("Options: 'Set5', 'Set14'. "
"Set to 'all' if you want to download all the dataset."))
args = parser.parse_args()
file_ids = {
'Set5': {
'Set5.zip': # file name
'1RtyIeUFTyW8u7oa4z7a0lSzT3T1FwZE9', # file id
},
'Set14': {
'Set14.zip': '1vsw07sV8wGrRQ8UARe2fO5jjgy9QJy_E',
}
}
if args.dataset == 'all':
for dataset in file_ids.keys():
download_dataset(dataset, file_ids[dataset])
else:
download_dataset(args.dataset, file_ids[args.dataset])
import argparse
import cv2
import glob
import numpy as np
import os
from basicsr.utils.lmdb_util import LmdbMaker
def convert_celeba_tfrecords(tf_file, log_resolution, save_root, save_type='img', compress_level=1):
"""Convert CelebA tfrecords to images or lmdb files.
Args:
tf_file (str): Input tfrecords file in glob pattern.
Example: 'datasets/celeba/celeba_tfrecords/validation/validation-r08-s-*-of-*.tfrecords' # noqa:E501
log_resolution (int): Log scale of resolution.
save_root (str): Path root to save.
save_type (str): Save type. Options: img | lmdb. Default: img.
compress_level (int): Compress level when encoding images. Default: 1.
"""
if 'validation' in tf_file:
phase = 'validation'
else:
phase = 'train'
if save_type == 'lmdb':
save_path = os.path.join(save_root, f'celeba_{2**log_resolution}_{phase}.lmdb')
lmdb_maker = LmdbMaker(save_path)
elif save_type == 'img':
save_path = os.path.join(save_root, f'celeba_{2**log_resolution}_{phase}')
else:
raise ValueError('Wrong save type.')
os.makedirs(save_path, exist_ok=True)
idx = 0
for record in sorted(glob.glob(tf_file)):
print('Processing record: ', record)
record_iterator = tf.python_io.tf_record_iterator(record)
for string_record in record_iterator:
example = tf.train.Example()
example.ParseFromString(string_record)
# label = example.features.feature['label'].int64_list.value[0]
# attr = example.features.feature['attr'].int64_list.value
# male = attr[20]
# young = attr[39]
shape = example.features.feature['shape'].int64_list.value
h, w, c = shape
img_str = example.features.feature['data'].bytes_list.value[0]
img = np.fromstring(img_str, dtype=np.uint8).reshape((h, w, c))
img = img[:, :, [2, 1, 0]]
if save_type == 'img':
cv2.imwrite(os.path.join(save_path, f'{idx:08d}.png'), img)
elif save_type == 'lmdb':
_, img_byte = cv2.imencode('.png', img, [cv2.IMWRITE_PNG_COMPRESSION, compress_level])
key = f'{idx:08d}/r{log_resolution:02d}'
lmdb_maker.put(img_byte, key, (h, w, c))
idx += 1
print(idx)
if save_type == 'lmdb':
lmdb_maker.close()
def convert_ffhq_tfrecords(tf_file, log_resolution, save_root, save_type='img', compress_level=1):
"""Convert FFHQ tfrecords to images or lmdb files.
Args:
tf_file (str): Input tfrecords file.
log_resolution (int): Log scale of resolution.
save_root (str): Path root to save.
save_type (str): Save type. Options: img | lmdb. Default: img.
compress_level (int): Compress level when encoding images. Default: 1.
"""
if save_type == 'lmdb':
save_path = os.path.join(save_root, f'ffhq_{2**log_resolution}.lmdb')
lmdb_maker = LmdbMaker(save_path)
elif save_type == 'img':
save_path = os.path.join(save_root, f'ffhq_{2**log_resolution}')
else:
raise ValueError('Wrong save type.')
os.makedirs(save_path, exist_ok=True)
idx = 0
for record in sorted(glob.glob(tf_file)):
print('Processing record: ', record)
record_iterator = tf.python_io.tf_record_iterator(record)
for string_record in record_iterator:
example = tf.train.Example()
example.ParseFromString(string_record)
shape = example.features.feature['shape'].int64_list.value
c, h, w = shape
img_str = example.features.feature['data'].bytes_list.value[0]
img = np.fromstring(img_str, dtype=np.uint8).reshape((c, h, w))
img = img.transpose(1, 2, 0)
img = img[:, :, [2, 1, 0]]
if save_type == 'img':
cv2.imwrite(os.path.join(save_path, f'{idx:08d}.png'), img)
elif save_type == 'lmdb':
_, img_byte = cv2.imencode('.png', img, [cv2.IMWRITE_PNG_COMPRESSION, compress_level])
key = f'{idx:08d}/r{log_resolution:02d}'
lmdb_maker.put(img_byte, key, (h, w, c))
idx += 1
print(idx)
if save_type == 'lmdb':
lmdb_maker.close()
def make_ffhq_lmdb_from_imgs(folder_path, log_resolution, save_root, save_type='lmdb', compress_level=1):
"""Make FFHQ lmdb from images.
Args:
folder_path (str): Folder path.
log_resolution (int): Log scale of resolution.
save_root (str): Path root to save.
save_type (str): Save type. Options: img | lmdb. Default: img.
compress_level (int): Compress level when encoding images. Default: 1.
"""
if save_type == 'lmdb':
save_path = os.path.join(save_root, f'ffhq_{2**log_resolution}_crop1.2.lmdb')
lmdb_maker = LmdbMaker(save_path)
else:
raise ValueError('Wrong save type.')
os.makedirs(save_path, exist_ok=True)
img_list = sorted(glob.glob(os.path.join(folder_path, '*')))
for idx, img_path in enumerate(img_list):
print(f'Processing {idx}: ', img_path)
img = cv2.imread(img_path)
h, w, c = img.shape
if save_type == 'lmdb':
_, img_byte = cv2.imencode('.png', img, [cv2.IMWRITE_PNG_COMPRESSION, compress_level])
key = f'{idx:08d}/r{log_resolution:02d}'
lmdb_maker.put(img_byte, key, (h, w, c))
if save_type == 'lmdb':
lmdb_maker.close()
if __name__ == '__main__':
"""Read tfrecords w/o define a graph.
We have tested it on TensorFlow 1.15
References: http://warmspringwinds.github.io/tensorflow/tf-slim/2016/12/21/tfrecords-guide/
"""
parser = argparse.ArgumentParser()
parser.add_argument(
'--dataset', type=str, default='ffhq', help="Dataset name. Options: 'ffhq' | 'celeba'. Default: 'ffhq'.")
parser.add_argument(
'--tf_file',
type=str,
default='datasets/ffhq/ffhq-r10.tfrecords',
help=(
'Input tfrecords file. For celeba, it should be glob pattern. '
'Put quotes around the wildcard argument to prevent the shell '
'from expanding it.'
"Example: 'datasets/celeba/celeba_tfrecords/validation/validation-r08-s-*-of-*.tfrecords'" # noqa:E501
))
parser.add_argument('--log_resolution', type=int, default=10, help='Log scale of resolution.')
parser.add_argument('--save_root', type=str, default='datasets/ffhq/', help='Save root path.')
parser.add_argument(
'--save_type', type=str, default='img', help="Save type. Options: 'img' | 'lmdb'. Default: 'img'.")
parser.add_argument(
'--compress_level', type=int, default=1, help='Compress level when encoding images. Default: 1.')
args = parser.parse_args()
try:
import tensorflow as tf
except Exception:
raise ImportError('You need to install tensorflow to read tfrecords.')
if args.dataset == 'ffhq':
convert_ffhq_tfrecords(
args.tf_file,
args.log_resolution,
args.save_root,
save_type=args.save_type,
compress_level=args.compress_level)
else:
convert_celeba_tfrecords(
args.tf_file,
args.log_resolution,
args.save_root,
save_type=args.save_type,
compress_level=args.compress_level)
Markdown is supported
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment