Commit 57e0e891 authored by limm's avatar limm
Browse files

add part mmgeneration code

parent 04e07f48
_base_ = [
'../_base_/datasets/ffhq_flip.py',
'../_base_/models/stylegan/stylegan2_base.py',
'../_base_/default_runtime.py'
]
model = dict(
type='MSPIEStyleGAN2',
generator=dict(
type='MSStyleGANv2Generator',
head_pos_encoding=dict(
type='SPE',
embedding_dim=256,
padding_idx=0,
init_size=256,
center_shift=100),
deconv2conv=True,
up_after_conv=True,
head_pos_size=(4, 4),
interp_head=True,
up_config=dict(scale_factor=2, mode='bilinear', align_corners=True),
out_size=256),
discriminator=dict(
type='MSStyleGAN2Discriminator', in_size=256, with_adaptive_pool=True))
train_cfg = dict(
num_upblocks=6,
multi_input_scales=[0, 2, 4],
multi_scale_probability=[0.5, 0.25, 0.25])
data = dict(
samples_per_gpu=3,
train=dict(dataset=dict(imgs_root='./data/ffhq/ffhq_imgs/ffhq_512')))
ema_half_life = 10.
custom_hooks = [
dict(
type='VisualizeUnconditionalSamples',
output_dir='training_samples',
interval=5000),
dict(
type='ExponentialMovingAverageHook',
module_keys=('generator_ema', ),
interval=1,
interp_cfg=dict(momentum=0.5**(32. / (ema_half_life * 1000.))),
priority='VERY_HIGH')
]
checkpoint_config = dict(interval=10000, by_epoch=False, max_keep_ckpts=40)
lr_config = None
log_config = dict(
interval=100,
hooks=[
dict(type='TextLoggerHook'),
# dict(type='TensorboardLoggerHook'),
])
cudnn_benchmark = False
total_iters = 1100002
metrics = dict(
fid50k=dict(
type='FID',
num_images=50000,
inception_pkl='work_dirs/inception_pkl/ffhq-256-50k-rgb.pkl',
bgr2rgb=True),
pr10k3=dict(type='PR', num_images=10000, k=3))
_base_ = [
'../_base_/datasets/ffhq_flip.py',
'../_base_/models/stylegan/stylegan2_base.py',
'../_base_/default_runtime.py'
]
model = dict(
type='MSPIEStyleGAN2',
generator=dict(
type='MSStyleGANv2Generator',
head_pos_encoding=dict(
type='SPE',
embedding_dim=256,
padding_idx=0,
init_size=256,
center_shift=100),
deconv2conv=True,
up_after_conv=True,
up_config=dict(scale_factor=2, mode='bilinear', align_corners=True),
out_size=256),
discriminator=dict(
type='MSStyleGAN2Discriminator', in_size=256, with_adaptive_pool=True))
train_cfg = dict(
num_upblocks=6,
multi_input_scales=[0, 2, 4],
multi_scale_probability=[0.5, 0.25, 0.25])
data = dict(
samples_per_gpu=3,
train=dict(dataset=dict(imgs_root='./data/ffhq/ffhq_imgs/ffhq_512')))
ema_half_life = 10.
custom_hooks = [
dict(
type='VisualizeUnconditionalSamples',
output_dir='training_samples',
interval=5000),
dict(
type='ExponentialMovingAverageHook',
module_keys=('generator_ema', ),
interval=1,
interp_cfg=dict(momentum=0.5**(32. / (ema_half_life * 1000.))),
priority='VERY_HIGH')
]
checkpoint_config = dict(interval=10000, by_epoch=False, max_keep_ckpts=40)
lr_config = None
log_config = dict(
interval=100,
hooks=[
dict(type='TextLoggerHook'),
# dict(type='TensorboardLoggerHook'),
])
cudnn_benchmark = False
total_iters = 1100002
metrics = dict(
fid50k=dict(
type='FID',
num_images=50000,
inception_pkl='work_dirs/inception_pkl/ffhq-256-50k-rgb.pkl',
bgr2rgb=True),
pr10k3=dict(type='PR', num_images=10000, k=3))
_base_ = [
'../_base_/datasets/ffhq_flip.py',
'../_base_/models/stylegan/stylegan2_base.py',
'../_base_/default_runtime.py'
]
model = dict(
type='MSPIEStyleGAN2',
generator=dict(
type='MSStyleGANv2Generator',
head_pos_encoding=dict(
type='SPE',
embedding_dim=256,
padding_idx=0,
init_size=256,
center_shift=100),
deconv2conv=True,
up_after_conv=True,
up_config=dict(scale_factor=2, mode='bilinear', align_corners=True),
out_size=256),
discriminator=dict(
type='MSStyleGAN2Discriminator', in_size=256, with_adaptive_pool=True))
train_cfg = dict(
num_upblocks=6,
multi_input_scales=[0, 4, 10],
multi_scale_probability=[0.5, 0.25, 0.25])
data = dict(
samples_per_gpu=3,
train=dict(dataset=dict(
imgs_root='./data/ffhq/images'))) # path for 1024 scales
ema_half_life = 10.
custom_hooks = [
dict(
type='VisualizeUnconditionalSamples',
output_dir='training_samples',
interval=5000),
dict(
type='ExponentialMovingAverageHook',
module_keys=('generator_ema', ),
interval=1,
interp_cfg=dict(momentum=0.5**(32. / (ema_half_life * 1000.))),
priority='VERY_HIGH')
]
checkpoint_config = dict(interval=10000, by_epoch=False, max_keep_ckpts=40)
lr_config = None
log_config = dict(
interval=100,
hooks=[
dict(type='TextLoggerHook'),
# dict(type='TensorboardLoggerHook'),
])
cudnn_benchmark = False
total_iters = 1100002
metrics = dict(
fid50k=dict(
type='FID',
num_images=50000,
inception_pkl='work_dirs/inception_pkl/ffhq-256-50k-rgb.pkl',
bgr2rgb=True),
pr10k3=dict(type='PR', num_images=10000, k=3))
_base_ = [
'../_base_/datasets/ffhq_flip.py',
'../_base_/models/stylegan/stylegan2_base.py',
'../_base_/default_runtime.py'
]
model = dict(
type='MSPIEStyleGAN2',
generator=dict(
type='MSStyleGANv2Generator',
head_pos_encoding=None,
deconv2conv=True,
up_after_conv=False,
interp_pad=4,
no_pad=True,
head_pos_size=(6, 6),
interp_head=True,
up_config=dict(scale_factor=2, mode='bilinear', align_corners=True),
out_size=256),
discriminator=dict(
type='MSStyleGAN2Discriminator', in_size=256, with_adaptive_pool=True))
train_cfg = dict(
num_upblocks=6,
multi_input_scales=[0, 2, 4],
multi_scale_probability=[0.5, 0.25, 0.25])
data = dict(
samples_per_gpu=3,
train=dict(dataset=dict(imgs_root='./data/ffhq/ffhq_imgs/ffhq_512')))
ema_half_life = 10.
custom_hooks = [
dict(
type='VisualizeUnconditionalSamples',
output_dir='training_samples',
interval=5000),
dict(
type='ExponentialMovingAverageHook',
module_keys=('generator_ema', ),
interval=1,
interp_cfg=dict(momentum=0.5**(32. / (ema_half_life * 1000.))),
priority='VERY_HIGH')
]
checkpoint_config = dict(interval=10000, by_epoch=False, max_keep_ckpts=40)
lr_config = None
log_config = dict(
interval=100,
hooks=[
dict(type='TextLoggerHook'),
# dict(type='TensorboardLoggerHook'),
])
cudnn_benchmark = False
total_iters = 1100002
metrics = dict(
fid50k=dict(
type='FID',
num_images=50000,
inception_pkl='work_dirs/inception_pkl/ffhq-256-50k-rgb.pkl',
bgr2rgb=True),
pr10k3=dict(type='PR', num_images=10000, k=3))
_base_ = [
'../_base_/datasets/ffhq_flip.py',
'../_base_/models/stylegan/stylegan2_base.py',
'../_base_/default_runtime.py'
]
model = dict(
type='MSPIEStyleGAN2',
generator=dict(
type='MSStyleGANv2Generator',
head_pos_encoding=dict(type='CSG'),
deconv2conv=True,
up_after_conv=False,
interp_pad=4,
no_pad=True,
head_pos_size=(6, 6),
up_config=dict(scale_factor=2, mode='bilinear', align_corners=True),
out_size=256),
discriminator=dict(
type='MSStyleGAN2Discriminator', in_size=256, with_adaptive_pool=True))
train_cfg = dict(
num_upblocks=6,
multi_input_scales=[0, 2, 4],
multi_scale_probability=[0.5, 0.25, 0.25])
data = dict(
samples_per_gpu=3,
train=dict(dataset=dict(imgs_root='./data/ffhq/ffhq_imgs/ffhq_512')))
ema_half_life = 10.
custom_hooks = [
dict(
type='VisualizeUnconditionalSamples',
output_dir='training_samples',
interval=5000),
dict(
type='ExponentialMovingAverageHook',
module_keys=('generator_ema', ),
interval=1,
interp_cfg=dict(momentum=0.5**(32. / (ema_half_life * 1000.))),
priority='VERY_HIGH')
]
checkpoint_config = dict(interval=10000, by_epoch=False, max_keep_ckpts=40)
lr_config = None
log_config = dict(
interval=100,
hooks=[
dict(type='TextLoggerHook'),
# dict(type='TensorboardLoggerHook'),
])
cudnn_benchmark = False
total_iters = 1100002
metrics = dict(
fid50k=dict(
type='FID',
num_images=50000,
inception_pkl='work_dirs/inception_pkl/ffhq-256-50k-rgb.pkl',
bgr2rgb=True),
pr10k3=dict(type='PR', num_images=10000, k=3))
_base_ = [
'../_base_/datasets/ffhq_flip.py',
'../_base_/models/stylegan/stylegan2_base.py',
'../_base_/default_runtime.py'
]
model = dict(
type='MSPIEStyleGAN2',
generator=dict(
type='MSStyleGANv2Generator',
head_pos_encoding=dict(
type='SPE',
embedding_dim=256,
padding_idx=0,
init_size=256,
center_shift=100),
deconv2conv=True,
up_after_conv=False,
interp_pad=4,
no_pad=True,
head_pos_size=(6, 6),
interp_head=True,
up_config=dict(scale_factor=2, mode='bilinear', align_corners=True),
out_size=256),
discriminator=dict(
type='MSStyleGAN2Discriminator', in_size=256, with_adaptive_pool=True))
train_cfg = dict(
num_upblocks=6,
multi_input_scales=[0, 2, 4],
multi_scale_probability=[0.5, 0.25, 0.25])
data = dict(
samples_per_gpu=3,
train=dict(dataset=dict(imgs_root='./data/ffhq/ffhq_imgs/ffhq_512')))
ema_half_life = 10.
custom_hooks = [
dict(
type='VisualizeUnconditionalSamples',
output_dir='training_samples',
interval=5000),
dict(
type='ExponentialMovingAverageHook',
module_keys=('generator_ema', ),
interval=1,
interp_cfg=dict(momentum=0.5**(32. / (ema_half_life * 1000.))),
priority='VERY_HIGH')
]
checkpoint_config = dict(interval=10000, by_epoch=False, max_keep_ckpts=40)
lr_config = None
log_config = dict(
interval=100,
hooks=[
dict(type='TextLoggerHook'),
# dict(type='TensorboardLoggerHook'),
])
cudnn_benchmark = False
total_iters = 1100002
metrics = dict(
fid50k=dict(
type='FID',
num_images=50000,
inception_pkl='work_dirs/inception_pkl/ffhq-256-50k-rgb.pkl',
bgr2rgb=True),
pr10k3=dict(type='PR', num_images=10000, k=3))
_base_ = [
'../_base_/datasets/ffhq_flip.py',
'../_base_/models/stylegan/stylegan2_base.py',
'../_base_/default_runtime.py'
]
model = dict(
type='MSPIEStyleGAN2',
generator=dict(
type='MSStyleGANv2Generator',
head_pos_encoding=dict(
type='SPE',
embedding_dim=256,
padding_idx=0,
init_size=256,
center_shift=100),
deconv2conv=True,
up_after_conv=False,
interp_pad=4,
no_pad=True,
head_pos_size=(6, 6),
up_config=dict(scale_factor=2, mode='bilinear', align_corners=True),
out_size=256),
discriminator=dict(
type='MSStyleGAN2Discriminator', in_size=256, with_adaptive_pool=True))
train_cfg = dict(
num_upblocks=6,
multi_input_scales=[0, 2, 4],
multi_scale_probability=[0.5, 0.25, 0.25])
data = dict(
samples_per_gpu=3,
train=dict(dataset=dict(imgs_root='./data/ffhq/ffhq_imgs/ffhq_512')))
ema_half_life = 10.
custom_hooks = [
dict(
type='VisualizeUnconditionalSamples',
output_dir='training_samples',
interval=5000),
dict(
type='ExponentialMovingAverageHook',
module_keys=('generator_ema', ),
interval=1,
interp_cfg=dict(momentum=0.5**(32. / (ema_half_life * 1000.))),
priority='VERY_HIGH')
]
checkpoint_config = dict(interval=10000, by_epoch=False, max_keep_ckpts=40)
lr_config = None
log_config = dict(
interval=100,
hooks=[
dict(type='TextLoggerHook'),
# dict(type='TensorboardLoggerHook'),
])
cudnn_benchmark = False
total_iters = 1100002
metrics = dict(
fid50k=dict(
type='FID',
num_images=50000,
inception_pkl='work_dirs/inception_pkl/ffhq-256-50k-rgb.pkl',
bgr2rgb=True),
pr10k3=dict(type='PR', num_images=10000, k=3))
_base_ = ['../singan/singan_bohemian.py']
num_scales = 10 # start from zero
model = dict(
type='PESinGAN',
generator=dict(
type='SinGANMSGeneratorPE',
num_scales=num_scales,
padding=1,
pad_at_head=False,
first_stage_in_channels=2,
positional_encoding=dict(type='CSG')),
discriminator=dict(num_scales=num_scales))
train_cfg = dict(first_fixed_noises_ch=2)
data = dict(
train=dict(
img_path='./data/singan/bohemian.png',
min_size=25,
max_size=500,
))
dist_params = dict(backend='nccl')
total_iters = 22000
_base_ = ['../singan/singan_fish.py']
num_scales = 10 # start from zero
model = dict(
type='PESinGAN',
generator=dict(
type='SinGANMSGeneratorPE',
num_scales=num_scales,
padding=1,
pad_at_head=False,
first_stage_in_channels=2,
positional_encoding=dict(type='CSG')),
discriminator=dict(num_scales=num_scales))
train_cfg = dict(first_fixed_noises_ch=2)
data = dict(
train=dict(
img_path='./data/singan/fish-crop.jpg',
min_size=25,
max_size=300,
))
dist_params = dict(backend='nccl')
total_iters = 22000
_base_ = ['../singan/singan_balloons.py']
model = dict(
type='PESinGAN',
generator=dict(
type='SinGANMSGeneratorPE', interp_pad=True, noise_with_pad=True))
train_cfg = dict(fixed_noise_with_pad=True)
dist_params = dict(backend='nccl')
_base_ = ['../singan/singan_balloons.py']
model = dict(
type='PESinGAN',
generator=dict(
type='SinGANMSGeneratorPE', interp_pad=True, noise_with_pad=True),
discriminator=dict(norm_cfg=None))
train_cfg = dict(fixed_noise_with_pad=True)
dist_params = dict(backend='nccl')
_base_ = ['../singan/singan_fish.py']
model = dict(
type='PESinGAN',
generator=dict(
type='SinGANMSGeneratorPE', interp_pad=True, noise_with_pad=True),
discriminator=dict(norm_cfg=None))
train_cfg = dict(fixed_noise_with_pad=True)
data = dict(
train=dict(
img_path='./data/singan/fish-crop.jpg',
min_size=25,
max_size=300,
))
dist_params = dict(backend='nccl')
_base_ = ['../singan/singan_bohemian.py']
embedding_dim = 4
num_scales = 10 # start from zero
model = dict(
type='PESinGAN',
generator=dict(
type='SinGANMSGeneratorPE',
num_scales=num_scales,
padding=1,
pad_at_head=False,
first_stage_in_channels=embedding_dim * 2,
positional_encoding=dict(
type='SPE',
embedding_dim=embedding_dim,
padding_idx=0,
init_size=512,
div_half_dim=False,
center_shift=200)),
discriminator=dict(num_scales=num_scales))
train_cfg = dict(first_fixed_noises_ch=embedding_dim * 2)
data = dict(
train=dict(
img_path='./data/singan/bohemian.png',
min_size=25,
max_size=500,
))
dist_params = dict(backend='nccl')
total_iters = 22000
_base_ = ['../singan/singan_fish.py']
embedding_dim = 4
num_scales = 10 # start from zero
model = dict(
type='PESinGAN',
generator=dict(
type='SinGANMSGeneratorPE',
num_scales=num_scales,
padding=1,
pad_at_head=False,
first_stage_in_channels=embedding_dim * 2,
positional_encoding=dict(
type='SPE',
embedding_dim=embedding_dim,
padding_idx=0,
init_size=512,
div_half_dim=False,
center_shift=200)),
discriminator=dict(num_scales=num_scales))
data = dict(
train=dict(
img_path='./data/singan/fish-crop.jpg',
min_size=25,
max_size=300,
))
dist_params = dict(backend='nccl')
total_iters = 22000
_base_ = ['../singan/singan_bohemian.py']
embedding_dim = 4
num_scales = 10 # start from zero
model = dict(
type='PESinGAN',
generator=dict(
type='SinGANMSGeneratorPE',
num_scales=num_scales,
padding=1,
pad_at_head=False,
first_stage_in_channels=embedding_dim * 2,
positional_encoding=dict(
type='SPE',
embedding_dim=embedding_dim,
padding_idx=0,
init_size=512,
div_half_dim=False,
center_shift=200)),
discriminator=dict(num_scales=num_scales))
train_cfg = dict(first_fixed_noises_ch=embedding_dim * 2)
data = dict(
train=dict(
img_path='./data/singan/bohemian.png',
min_size=25,
max_size=500,
))
dist_params = dict(backend='nccl')
total_iters = 22000
"""Config for the `config-f` setting in StyleGAN2."""
_base_ = [
'../_base_/datasets/ffhq_flip.py',
'../_base_/models/stylegan/stylegan2_base.py',
'../_base_/default_runtime.py'
]
model = dict(generator=dict(out_size=256), discriminator=dict(in_size=256))
data = dict(
samples_per_gpu=3,
train=dict(dataset=dict(imgs_root='./data/ffhq/ffhq_imgs/ffhq_256')))
ema_half_life = 10. # G_smoothing_kimg
custom_hooks = [
dict(
type='VisualizeUnconditionalSamples',
output_dir='training_samples',
interval=5000),
dict(
type='ExponentialMovingAverageHook',
module_keys=('generator_ema', ),
interval=1,
interp_cfg=dict(momentum=0.5**(32. / (ema_half_life * 1000.))),
priority='VERY_HIGH')
]
metrics = dict(
fid50k=dict(
type='FID',
num_images=50000,
inception_pkl='work_dirs/inception_pkl/ffhq-256-50k-rgb.pkl',
bgr2rgb=True),
pr10k3=dict(type='PR', num_images=10000, k=3))
checkpoint_config = dict(interval=10000, by_epoch=False, max_keep_ckpts=30)
lr_config = None
log_config = dict(
interval=100,
hooks=[
dict(type='TextLoggerHook'),
# dict(type='TensorboardLoggerHook'),
])
total_iters = 1100002
"""Config for the `config-f` setting in StyleGAN2."""
_base_ = [
'../_base_/datasets/ffhq_flip.py',
'../_base_/models/stylegan/stylegan2_base.py',
'../_base_/default_runtime.py'
]
model = dict(generator=dict(out_size=512), discriminator=dict(in_size=512))
data = dict(
samples_per_gpu=3,
train=dict(dataset=dict(imgs_root='./data/ffhq/ffhq_imgs/ffhq_512')))
ema_half_life = 10. # G_smoothing_kimg
custom_hooks = [
dict(
type='VisualizeUnconditionalSamples',
output_dir='training_samples',
interval=5000),
dict(
type='ExponentialMovingAverageHook',
module_keys=('generator_ema', ),
interval=1,
interp_cfg=dict(momentum=0.5**(32. / (ema_half_life * 1000.))),
priority='VERY_HIGH')
]
metrics = dict(
fid50k=dict(
type='FID',
num_images=50000,
inception_pkl='work_dirs/inception_pkl/ffhq-512-50k-rgb.pkl',
bgr2rgb=True),
pr10k3=dict(type='PR', num_images=10000, k=3))
checkpoint_config = dict(interval=10000, by_epoch=False, max_keep_ckpts=30)
lr_config = None
log_config = dict(
interval=100,
hooks=[
dict(type='TextLoggerHook'),
# dict(type='TensorboardLoggerHook'),
])
total_iters = 1100002
# SAGAN
> [Self-attention generative adversarial networks](https://proceedings.mlr.press/v97/zhang19d.html)
<!-- [ALGORITHM] -->
## Abstract
<!-- [ABSTRACT] -->
In this paper, we propose the Self-Attention Generative Adversarial Network (SAGAN) which allows attention-driven, long-range dependency modeling for image generation tasks. Traditional convolutional GANs generate high-resolution details as a function of only spatially local points in lower-resolution feature maps. In SAGAN, details can be generated using cues from all feature locations. Moreover, the discriminator can check that highly detailed features in distant portions of the image are consistent with each other. Furthermore, recent work has shown that generator conditioning affects GAN performance. Leveraging this insight, we apply spectral normalization to the GAN generator and find that this improves training dynamics. The proposed SAGAN performs better than prior work, boosting the best published Inception score from 36.8 to 52.52 and reducing Fréchet Inception distance from 27.62 to 18.65 on the challenging ImageNet dataset. Visualization of the attention layers shows that the generator leverages neighborhoods that correspond to object shapes rather than local regions of fixed shape.
<!-- [IMAGE] -->
<div align=center>
<img src="https://user-images.githubusercontent.com/28132635/143054130-8cc1d9b7-df13-4fdb-8dbf-af4b2c15ed28.JPG"/>
</div>
## Results and models
<div align="center">
<b> Results from our SAGAN trained in CIFAR10</b>
<br/>
<img src="https://user-images.githubusercontent.com/28132635/127619657-67f2e62d-52e4-43d2-931f-6d0e6e019813.png" width="400"/>
</div>
| Models | Dataset | Inplace ReLU | dist_step | Total Batchsize (BZ_PER_GPU * NGPU) | Total Iters\* | Iter | IS | FID | Config | Download | Log |
| :------------------------: | :------: | :----------: | :-------: | :---------------------------------: | :-----------: | :----: | :-----: | :-----: | :------------------------: | :--------------------------: | :---------------------: |
| SAGAN-32x32-woInplaceReLU Best IS | CIFAR10 | w/o | 5 | 64x1 | 500000 | 400000 | 9.3217 | 10.5030 | [config](https://github.com/open-mmlab/mmgeneration/tree/master/configs/sagan/sagan_32_woReLUinplace_lr-2e-4_ndisc5_cifar10_b64x1.py) | [model](https://download.openmmlab.com/mmgen/sagan/sagan_cifar10_32_lr2e-4_ndisc5_b64x1_woReUinplace_is-iter400000_20210730_125743-4008a9ca.pth) | [Log](https://download.openmmlab.com/mmgen/sagan/sagan_cifar10_32_lr2e-4_ndisc5_b64x1_woReUinplace_20210730_125449_fid-d50568a4_is-04008a9ca.json) |
| SAGAN-32x32-woInplaceReLU Best FID | CIFAR10 | w/o | 5 | 64x1 | 500000 | 480000 | 9.3174 | 9.4252 | [config](https://github.com/open-mmlab/mmgeneration/tree/master/configs/sagan/sagan_32_woReLUinplace_lr-2e-4_ndisc5_cifar10_b64x1.py) | [model](https://download.openmmlab.com/mmgen/sagan/sagan_cifar10_32_lr2e-4_ndisc5_b64x1_woReUinplace_fid-iter480000_20210730_125449-d50568a4.pth) | [Log](https://download.openmmlab.com/mmgen/sagan/sagan_cifar10_32_lr2e-4_ndisc5_b64x1_woReUinplace_20210730_125449_fid-d50568a4_is-04008a9ca.json) |
| SAGAN-32x32-wInplaceReLU Best IS | CIFAR10 | w | 5 | 64x1 | 500000 | 380000 | 9.2286 | 11.7760 | [config](https://github.com/open-mmlab/mmgeneration/tree/master/configs/sagan/sagan_32_wReLUinplace_lr-2e-4_ndisc5_cifar10_b64x1.py) | [model](https://download.openmmlab.com/mmgen/sagan/sagan_cifar10_32_lr2e-4_ndisc5_b64x1_wReLUinplace_is-iter380000_20210730_124937-c77b4d25.pth) | [Log](https://download.openmmlab.com/mmgen/sagan/sagan_cifar10_32_lr2e-4_ndisc5_b64x1_wReLUinplace_20210730_125155_fid-cbefb354_is-c77b4d25.json) |
| SAGAN-32x32-wInplaceReLU Best FID | CIFAR10 | w | 5 | 64x1 | 500000 | 460000 | 9.2061 | 10.7781 | [config](https://github.com/open-mmlab/mmgeneration/tree/master/configs/sagan/sagan_32_wReLUinplace_lr-2e-4_ndisc5_cifar10_b64x1.py) | [model](https://download.openmmlab.com/mmgen/sagan/sagan_cifar10_32_lr2e-4_ndisc5_b64x1_wReLUinplace_fid-iter460000_20210730_125155-cbefb354.pth) | [Log](https://download.openmmlab.com/mmgen/sagan/sagan_cifar10_32_lr2e-4_ndisc5_b64x1_wReLUinplace_20210730_125155_fid-cbefb354_is-c77b4d25.json) |
| SAGAN-128x128-woInplaceReLU Best IS | ImageNet | w/o | 1 | 64x4 | 1000000 | 980000 | 31.5938 | 36.7712 | [config](https://github.com/open-mmlab/mmgeneration/tree/master/configs/sagan/sagan_128_woReLUinplace_Glr-1e-4_Dlr-4e-4_ndisc1_imagenet1k_b64x4.py) | [model](https://download.openmmlab.com/mmgen/sagan/sagan_imagenet1k_128_Glr1e-4_Dlr4e-4_ndisc1_b32x4_woReLUinplace_is-iter980000_20210730_163140-cfbebfc6.pth) | [Log](https://download.openmmlab.com/mmgen/sagan/sagan_imagenet1k_128_Glr1e-4_Dlr4e-4_ndisc1_b32x4_woReLUinplace_20210730_163431_fid-d7916963_is-cfbebfc6.json) |
| SAGAN-128x128-woInplaceReLU Best FID | ImageNet | w/o | 1 | 64x4 | 1000000 | 950000 | 28.4936 | 34.7838 | [config](https://github.com/open-mmlab/mmgeneration/tree/master/configs/sagan/sagan_128_woReLUinplace_Glr-1e-4_Dlr-4e-4_ndisc1_imagenet1k_b64x4.py) | [model](https://download.openmmlab.com/mmgen/sagan/sagan_imagenet1k_128_Glr1e-4_Dlr4e-4_ndisc1_b32x4_woReLUinplace_fid-iter950000_20210730_163431-d7916963.pth) | [Log](https://download.openmmlab.com/mmgen/sagan/sagan_imagenet1k_128_Glr1e-4_Dlr4e-4_ndisc1_b32x4_woReLUinplace_20210730_163431_fid-d7916963_is-cfbebfc6.json) |
| SAGAN-128x128-BigGAN Schedule Best IS | ImageNet | w/o | 1 | 32x8 | 1000000 | 826000 | 69.5350 | 12.8295 | [config](https://github.com/open-mmlab/mmgeneration/tree/master/configs/sagan/sagan_128_woReLUinplace_noaug_bigGAN_Glr-1e-4_Dlr-4e-4_ndisc1_imagenet1k_b32x8.py) | [model](https://download.openmmlab.com/mmgen/sagan/sagan_128_woReLUinplace_noaug_bigGAN_imagenet1k_b32x8_Glr1e-4_Dlr-4e-4_ndisc1_20210818_210232-3f5686af.pth) | [Log](https://download.openmmlab.com/mmgen/sagan/sagan_128_woReLUinplace_noaug_bigGAN_imagenet1k_b32x8_Glr1e-4_Dlr-4e-4_ndisc1_20210818_210232-3f5686af.json) |
| SAGAN-128x128-BigGAN Schedule Best FID | ImageNet | w/o | 1 | 32x8 | 1000000 | 826000 | 69.5350 | 12.8295 | [config](https://github.com/open-mmlab/mmgeneration/tree/master/configs/sagan/sagan_128_woReLUinplace_noaug_bigGAN_Glr-1e-4_Dlr-4e-4_ndisc1_imagenet1k_b32x8.py) | [model](https://download.openmmlab.com/mmgen/sagan/sagan_128_woReLUinplace_noaug_bigGAN_imagenet1k_b32x8_Glr1e-4_Dlr-4e-4_ndisc1_20210818_210232-3f5686af.pth) | [Log](https://download.openmmlab.com/mmgen/sagan/sagan_128_woReLUinplace_noaug_bigGAN_imagenet1k_b32x8_Glr1e-4_Dlr-4e-4_ndisc1_20210818_210232-3f5686af.json) |
'\*' Iteration counting rule in our implementation is different from others. If you want to align with other codebases, you can use the following conversion formula:
```
total_iters (biggan/pytorch studio gan) = our_total_iters / dist_step
```
We also provide converted pre-train models from [Pytorch-StudioGAN](https://github.com/POSTECH-CVLab/PyTorch-StudioGAN).
To be noted that, in Pytorch Studio GAN, **inplace ReLU** is used in generator and discriminator.
| Models | Dataset | Inplace ReLU | n_disc | Total Iters | IS (Our Pipeline) | FID (Our Pipeline) | IS (StudioGAN) | FID (StudioGAN) | Config | Download | Original Download link |
| :------------------: | :------: | :----------: | :----: | :---------: | :---------------: | :----------------: | :------------: | :-------------: | :------------------: | :--------------------: | :-----------------------------------: |
| SAGAN-32x32 StudioGAN | CIFAR10 | w | 5 | 100000 | 9.116 | 10.2011 | 8.680 | 14.009 | [Config](https://github.com/open-mmlab/mmgeneration/tree/master/configs/sagan/sagan_32_cvt_studioGAN.py) | [model](https://download.openmmlab.com/mmgen/sagan/sagan_32_cifar10_convert-studio-rgb_20210730_153321-080da7e2.pth) | [model](https://drive.google.com/drive/folders/1FA8hcz4MB8-hgTwLuDA0ZUfr8slud5P_) |
| SAGAN0-128x128 StudioGAN | ImageNet | w | 1 | 1000000 | 27.367 | 40.1162 | 29.848 | 34.726 | [Config](https://github.com/open-mmlab/mmgeneration/tree/master/configs/sagan/sagan_32_cvt_studioGAN.py) | [model](https://download.openmmlab.com/mmgen/sagan/sagan_128_imagenet1k_convert-studio-rgb_20210730_153357-eddb0d1d.pth) | [model](https://drive.google.com/drive/folders/1ZYaqeeumDgxOPDhRR5QLeLFIpgBJ9S6B) |
- `Our Pipeline` denote results evaluated with our pipeline.
- `StudioGAN` denote results released by Pytorch-StudioGAN.
For IS metric, our implementation is different from PyTorch-Studio GAN in the following aspects:
1. We use [Tero's Inception](https://nvlabs-fi-cdn.nvidia.com/stylegan2-ada-pytorch/pretrained/metrics/inception-2015-12-05.pt) for feature extraction.
2. We use bicubic interpolation with PIL backend to resize image before feed them to Inception.
For FID evaluation, we follow the pipeline of [BigGAN](https://github.com/ajbrock/BigGAN-PyTorch/blob/98459431a5d618d644d54cd1e9fceb1e5045648d/calculate_inception_moments.py#L52), where the whole training set is adopted to extract inception statistics, and Pytorch Studio GAN uses 50000 randomly selected samples. Besides, we also use [Tero's Inception](https://nvlabs-fi-cdn.nvidia.com/stylegan2-ada-pytorch/pretrained/metrics/inception-2015-12-05.pt) for feature extraction.
You can download the preprocessed inception state by the following url: [CIFAR10](https://download.openmmlab.com/mmgen/evaluation/fid_inception_pkl/cifar10.pkl) and [ImageNet1k](https://download.openmmlab.com/mmgen/evaluation/fid_inception_pkl/imagenet.pkl).
You can use following commands to extract those inception states by yourself.
```
# For CIFAR10
python tools/utils/inception_stat.py --data-cfg configs/_base_/datasets/cifar10_inception_stat.py --pklname cifar10.pkl --no-shuffle --inception-style stylegan --num-samples -1 --subset train
# For ImageNet1k
python tools/utils/inception_stat.py --data-cfg configs/_base_/datasets/imagenet_128x128_inception_stat.py --pklname imagenet.pkl --no-shuffle --inception-style stylegan --num-samples -1 --subset train
```
## Citation
```latex
@inproceedings{zhang2019self,
title={Self-attention generative adversarial networks},
author={Zhang, Han and Goodfellow, Ian and Metaxas, Dimitris and Odena, Augustus},
booktitle={International conference on machine learning},
pages={7354--7363},
year={2019},
organization={PMLR},
url={https://proceedings.mlr.press/v97/zhang19d.html},
}
```
Collections:
- Metadata:
Architecture:
- SAGAN
Name: SAGAN
Paper:
- https://proceedings.mlr.press/v97/zhang19d.html
README: configs/sagan/README.md
Models:
- Config: https://github.com/open-mmlab/mmgeneration/tree/master/configs/sagan/sagan_32_woReLUinplace_lr-2e-4_ndisc5_cifar10_b64x1.py
In Collection: SAGAN
Metadata:
Training Data: CIFAR
Name: sagan_32_woReLUinplace_lr-2e-4_ndisc5_cifar10_b64x1
Results:
- Dataset: CIFAR
Metrics:
FID: 10.503
IS: 9.3217
Inplace ReLU: w/o
Iter: 400000.0
Log: '[Log]'
Total Batchsize (BZ_PER_GPU * NGPU): 64x1
Total Iters\*: 500000.0
dist_step: 5.0
Task: Conditional GANs
Weights: https://download.openmmlab.com/mmgen/sagan/sagan_cifar10_32_lr2e-4_ndisc5_b64x1_woReUinplace_is-iter400000_20210730_125743-4008a9ca.pth
- Config: https://github.com/open-mmlab/mmgeneration/tree/master/configs/sagan/sagan_32_woReLUinplace_lr-2e-4_ndisc5_cifar10_b64x1.py
In Collection: SAGAN
Metadata:
Training Data: CIFAR
Name: sagan_32_woReLUinplace_lr-2e-4_ndisc5_cifar10_b64x1
Results:
- Dataset: CIFAR
Metrics:
FID: 9.4252
IS: 9.3174
Inplace ReLU: w/o
Iter: 480000.0
Log: '[Log]'
Total Batchsize (BZ_PER_GPU * NGPU): 64x1
Total Iters\*: 500000.0
dist_step: 5.0
Task: Conditional GANs
Weights: https://download.openmmlab.com/mmgen/sagan/sagan_cifar10_32_lr2e-4_ndisc5_b64x1_woReUinplace_fid-iter480000_20210730_125449-d50568a4.pth
- Config: https://github.com/open-mmlab/mmgeneration/tree/master/configs/sagan/sagan_32_wReLUinplace_lr-2e-4_ndisc5_cifar10_b64x1.py
In Collection: SAGAN
Metadata:
Training Data: CIFAR
Name: sagan_32_wReLUinplace_lr-2e-4_ndisc5_cifar10_b64x1
Results:
- Dataset: CIFAR
Metrics:
FID: 11.776
IS: 9.2286
Inplace ReLU: w
Iter: 380000.0
Log: '[Log]'
Total Batchsize (BZ_PER_GPU * NGPU): 64x1
Total Iters\*: 500000.0
dist_step: 5.0
Task: Conditional GANs
Weights: https://download.openmmlab.com/mmgen/sagan/sagan_cifar10_32_lr2e-4_ndisc5_b64x1_wReLUinplace_is-iter380000_20210730_124937-c77b4d25.pth
- Config: https://github.com/open-mmlab/mmgeneration/tree/master/configs/sagan/sagan_32_wReLUinplace_lr-2e-4_ndisc5_cifar10_b64x1.py
In Collection: SAGAN
Metadata:
Training Data: CIFAR
Name: sagan_32_wReLUinplace_lr-2e-4_ndisc5_cifar10_b64x1
Results:
- Dataset: CIFAR
Metrics:
FID: 10.7781
IS: 9.2061
Inplace ReLU: w
Iter: 460000.0
Log: '[Log]'
Total Batchsize (BZ_PER_GPU * NGPU): 64x1
Total Iters\*: 500000.0
dist_step: 5.0
Task: Conditional GANs
Weights: https://download.openmmlab.com/mmgen/sagan/sagan_cifar10_32_lr2e-4_ndisc5_b64x1_wReLUinplace_fid-iter460000_20210730_125155-cbefb354.pth
- Config: https://github.com/open-mmlab/mmgeneration/tree/master/configs/sagan/sagan_128_woReLUinplace_Glr-1e-4_Dlr-4e-4_ndisc1_imagenet1k_b64x4.py
In Collection: SAGAN
Metadata:
Training Data: IMAGENET
Name: sagan_128_woReLUinplace_Glr-1e-4_Dlr-4e-4_ndisc1_imagenet1k_b64x4
Results:
- Dataset: IMAGENET
Metrics:
FID: 36.7712
IS: 31.5938
Inplace ReLU: w/o
Iter: 980000.0
Log: '[Log]'
Total Batchsize (BZ_PER_GPU * NGPU): 64x4
Total Iters\*: 1000000.0
dist_step: 1.0
Task: Conditional GANs
Weights: https://download.openmmlab.com/mmgen/sagan/sagan_imagenet1k_128_Glr1e-4_Dlr4e-4_ndisc1_b32x4_woReLUinplace_is-iter980000_20210730_163140-cfbebfc6.pth
- Config: https://github.com/open-mmlab/mmgeneration/tree/master/configs/sagan/sagan_128_woReLUinplace_Glr-1e-4_Dlr-4e-4_ndisc1_imagenet1k_b64x4.py
In Collection: SAGAN
Metadata:
Training Data: IMAGENET
Name: sagan_128_woReLUinplace_Glr-1e-4_Dlr-4e-4_ndisc1_imagenet1k_b64x4
Results:
- Dataset: IMAGENET
Metrics:
FID: 34.7838
IS: 28.4936
Inplace ReLU: w/o
Iter: 950000.0
Log: '[Log]'
Total Batchsize (BZ_PER_GPU * NGPU): 64x4
Total Iters\*: 1000000.0
dist_step: 1.0
Task: Conditional GANs
Weights: https://download.openmmlab.com/mmgen/sagan/sagan_imagenet1k_128_Glr1e-4_Dlr4e-4_ndisc1_b32x4_woReLUinplace_fid-iter950000_20210730_163431-d7916963.pth
- Config: https://github.com/open-mmlab/mmgeneration/tree/master/configs/sagan/sagan_128_woReLUinplace_noaug_bigGAN_Glr-1e-4_Dlr-4e-4_ndisc1_imagenet1k_b32x8.py
In Collection: SAGAN
Metadata:
Training Data: IMAGENET
Name: sagan_128_woReLUinplace_noaug_bigGAN_Glr-1e-4_Dlr-4e-4_ndisc1_imagenet1k_b32x8
Results:
- Dataset: IMAGENET
Metrics:
FID: 12.8295
IS: 69.535
Inplace ReLU: w/o
Iter: 826000.0
Log: '[Log]'
Total Batchsize (BZ_PER_GPU * NGPU): 32x8
Total Iters\*: 1000000.0
dist_step: 1.0
Task: Conditional GANs
Weights: https://download.openmmlab.com/mmgen/sagan/sagan_128_woReLUinplace_noaug_bigGAN_imagenet1k_b32x8_Glr1e-4_Dlr-4e-4_ndisc1_20210818_210232-3f5686af.pth
- Config: https://github.com/open-mmlab/mmgeneration/tree/master/configs/sagan/sagan_128_woReLUinplace_noaug_bigGAN_Glr-1e-4_Dlr-4e-4_ndisc1_imagenet1k_b32x8.py
In Collection: SAGAN
Metadata:
Training Data: IMAGENET
Name: sagan_128_woReLUinplace_noaug_bigGAN_Glr-1e-4_Dlr-4e-4_ndisc1_imagenet1k_b32x8
Results:
- Dataset: IMAGENET
Metrics:
FID: 12.8295
IS: 69.535
Inplace ReLU: w/o
Iter: 826000.0
Log: '[Log]'
Total Batchsize (BZ_PER_GPU * NGPU): 32x8
Total Iters\*: 1000000.0
dist_step: 1.0
Task: Conditional GANs
Weights: https://download.openmmlab.com/mmgen/sagan/sagan_128_woReLUinplace_noaug_bigGAN_imagenet1k_b32x8_Glr1e-4_Dlr-4e-4_ndisc1_20210818_210232-3f5686af.pth
- Config: https://github.com/open-mmlab/mmgeneration/tree/master/configs/sagan/sagan_32_cvt_studioGAN.py
In Collection: SAGAN
Metadata:
Training Data: Others
Name: sagan_32_cvt_studioGAN
Results:
- Dataset: Others
Metrics:
FID (Our Pipeline): 10.2011
FID (StudioGAN): 14.009
IS (Our Pipeline): 9.116
IS (StudioGAN): 8.68
Inplace ReLU: w
Total Iters: 100000.0
n_disc: 5.0
Task: Conditional GANs
Weights: https://download.openmmlab.com/mmgen/sagan/sagan_32_cifar10_convert-studio-rgb_20210730_153321-080da7e2.pth
- Config: https://github.com/open-mmlab/mmgeneration/tree/master/configs/sagan/sagan_32_cvt_studioGAN.py
In Collection: SAGAN
Metadata:
Training Data: Others
Name: sagan_32_cvt_studioGAN
Results:
- Dataset: Others
Metrics:
FID (Our Pipeline): 40.1162
FID (StudioGAN): 34.726
IS (Our Pipeline): 27.367
IS (StudioGAN): 29.848
Inplace ReLU: w
Total Iters: 1000000.0
n_disc: 1.0
Task: Conditional GANs
Weights: https://download.openmmlab.com/mmgen/sagan/sagan_128_imagenet1k_convert-studio-rgb_20210730_153357-eddb0d1d.pth
_base_ = ['../_base_/models/sagan/sagan_128x128.py']
Markdown is supported
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment