Commit 1401de15 authored by dongchy920's avatar dongchy920
Browse files

stylegan2_mmcv

parents
Pipeline #1274 canceled with stages
_base_ = [
'../_base_/models/cyclegan/cyclegan_lsgan_resnet.py',
'../_base_/datasets/unpaired_imgs_256x256.py',
'../_base_/default_runtime.py'
]
train_cfg = dict(buffer_size=50)
test_cfg = None
domain_a = 'mask'
domain_b = 'photo'
model = dict(
default_domain=domain_b,
reachable_domains=[domain_a, domain_b],
related_domains=[domain_a, domain_b],
gen_auxiliary_loss=[
dict(
type='L1Loss',
loss_weight=10.0,
loss_name='cycle_loss',
data_info=dict(
pred=f'cycle_{domain_a}', target=f'real_{domain_a}'),
reduction='mean'),
dict(
type='L1Loss',
loss_weight=10.0,
loss_name='cycle_loss',
data_info=dict(
pred=f'cycle_{domain_b}',
target=f'real_{domain_b}',
),
reduction='mean')
])
train_pipeline = [
dict(
type='LoadImageFromFile',
io_backend='disk',
key=f'img_{domain_a}',
flag='color'),
dict(
type='LoadImageFromFile',
io_backend='disk',
key=f'img_{domain_b}',
flag='color'),
dict(
type='Resize',
keys=[f'img_{domain_a}', f'img_{domain_b}'],
scale=(286, 286),
interpolation='bicubic'),
dict(
type='Crop',
keys=[f'img_{domain_a}', f'img_{domain_b}'],
crop_size=(256, 256),
random_crop=True),
dict(type='Flip', keys=[f'img_{domain_a}'], direction='horizontal'),
dict(type='Flip', keys=[f'img_{domain_b}'], direction='horizontal'),
dict(type='RescaleToZeroOne', keys=[f'img_{domain_a}', f'img_{domain_b}']),
dict(
type='Normalize',
keys=[f'img_{domain_a}', f'img_{domain_b}'],
to_rgb=False,
mean=[0.5, 0.5, 0.5],
std=[0.5, 0.5, 0.5]),
dict(type='ImageToTensor', keys=[f'img_{domain_a}', f'img_{domain_b}']),
dict(
type='Collect',
keys=[f'img_{domain_a}', f'img_{domain_b}'],
meta_keys=[f'img_{domain_a}_path', f'img_{domain_b}_path'])
]
dataroot = './data/unpaired_facades'
test_pipeline = [
dict(
type='LoadImageFromFile',
io_backend='disk',
key=f'img_{domain_a}',
flag='color'),
dict(
type='LoadImageFromFile',
io_backend='disk',
key=f'img_{domain_b}',
flag='color'),
dict(
type='Resize',
keys=[f'img_{domain_a}', f'img_{domain_b}'],
scale=(256, 256),
interpolation='bicubic'),
dict(type='RescaleToZeroOne', keys=[f'img_{domain_a}', f'img_{domain_b}']),
dict(
type='Normalize',
keys=[f'img_{domain_a}', f'img_{domain_b}'],
to_rgb=False,
mean=[0.5, 0.5, 0.5],
std=[0.5, 0.5, 0.5]),
dict(type='ImageToTensor', keys=[f'img_{domain_a}', f'img_{domain_b}']),
dict(
type='Collect',
keys=[f'img_{domain_a}', f'img_{domain_b}'],
meta_keys=[f'img_{domain_a}_path', f'img_{domain_b}_path'])
]
data = dict(
train=dict(
dataroot=dataroot,
pipeline=train_pipeline,
domain_a=domain_a,
domain_b=domain_b),
val=dict(
dataroot=dataroot,
domain_a=domain_a,
domain_b=domain_b,
pipeline=test_pipeline),
test=dict(
dataroot=dataroot,
domain_a=domain_a,
domain_b=domain_b,
pipeline=test_pipeline))
optimizer = dict(
generators=dict(type='Adam', lr=0.0002, betas=(0.5, 0.999)),
discriminators=dict(type='Adam', lr=0.0002, betas=(0.5, 0.999)))
# learning policy
lr_config = dict(
policy='Linear', by_epoch=False, target_lr=0, start=40000, interval=400)
checkpoint_config = dict(interval=10000, save_optimizer=True, by_epoch=False)
custom_hooks = [
dict(
type='MMGenVisualizationHook',
output_dir='training_samples',
res_name_list=[f'fake_{domain_a}', f'fake_{domain_b}'],
interval=5000)
]
runner = None
use_ddp_wrapper = True
total_iters = 80000
workflow = [('train', 1)]
exp_name = 'cyclegan_facades_id0'
work_dir = f'./work_dirs/experiments/{exp_name}'
num_images = 106
metrics = dict(
FID=dict(type='FID', num_images=num_images, image_shape=(3, 256, 256)),
IS=dict(
type='IS',
num_images=num_images,
image_shape=(3, 256, 256),
inception_args=dict(type='pytorch')))
evaluation = dict(
type='TranslationEvalHook',
target_domain=domain_b,
interval=10000,
metrics=[
dict(type='FID', num_images=num_images, bgr2rgb=True),
dict(
type='IS',
num_images=num_images,
inception_args=dict(type='pytorch'))
],
best_metric=['fid', 'is'])
_base_ = [
'../_base_/models/cyclegan/cyclegan_lsgan_resnet.py',
'../_base_/datasets/unpaired_imgs_256x256.py',
'../_base_/default_runtime.py'
]
domain_a = 'horse'
domain_b = 'zebra'
model = dict(
default_domain=domain_b,
reachable_domains=[domain_a, domain_b],
related_domains=[domain_a, domain_b],
gen_auxiliary_loss=[
dict(
type='L1Loss',
loss_weight=10.0,
loss_name='cycle_loss',
data_info=dict(
pred=f'cycle_{domain_a}', target=f'real_{domain_a}'),
reduction='mean'),
dict(
type='L1Loss',
loss_weight=10.0,
loss_name='cycle_loss',
data_info=dict(
pred=f'cycle_{domain_b}',
target=f'real_{domain_b}',
),
reduction='mean')
])
dataroot = './data/horse2zebra'
train_pipeline = [
dict(
type='LoadImageFromFile',
io_backend='disk',
key=f'img_{domain_a}',
flag='color'),
dict(
type='LoadImageFromFile',
io_backend='disk',
key=f'img_{domain_b}',
flag='color'),
dict(
type='Resize',
keys=[f'img_{domain_a}', f'img_{domain_b}'],
scale=(286, 286),
interpolation='bicubic'),
dict(
type='Crop',
keys=[f'img_{domain_a}', f'img_{domain_b}'],
crop_size=(256, 256),
random_crop=True),
dict(type='Flip', keys=[f'img_{domain_a}'], direction='horizontal'),
dict(type='Flip', keys=[f'img_{domain_b}'], direction='horizontal'),
dict(type='RescaleToZeroOne', keys=[f'img_{domain_a}', f'img_{domain_b}']),
dict(
type='Normalize',
keys=[f'img_{domain_a}', f'img_{domain_b}'],
to_rgb=False,
mean=[0.5, 0.5, 0.5],
std=[0.5, 0.5, 0.5]),
dict(type='ImageToTensor', keys=[f'img_{domain_a}', f'img_{domain_b}']),
dict(
type='Collect',
keys=[f'img_{domain_a}', f'img_{domain_b}'],
meta_keys=[f'img_{domain_a}_path', f'img_{domain_b}_path'])
]
test_pipeline = [
dict(
type='LoadImageFromFile',
io_backend='disk',
key=f'img_{domain_a}',
flag='color'),
dict(
type='LoadImageFromFile',
io_backend='disk',
key=f'img_{domain_b}',
flag='color'),
dict(
type='Resize',
keys=[f'img_{domain_a}', f'img_{domain_b}'],
scale=(256, 256),
interpolation='bicubic'),
dict(type='RescaleToZeroOne', keys=[f'img_{domain_a}', f'img_{domain_b}']),
dict(
type='Normalize',
keys=[f'img_{domain_a}', f'img_{domain_b}'],
to_rgb=False,
mean=[0.5, 0.5, 0.5],
std=[0.5, 0.5, 0.5]),
dict(type='ImageToTensor', keys=[f'img_{domain_a}', f'img_{domain_b}']),
dict(
type='Collect',
keys=[f'img_{domain_a}', f'img_{domain_b}'],
meta_keys=[f'img_{domain_a}_path', f'img_{domain_b}_path'])
]
data = dict(
train=dict(
dataroot=dataroot,
pipeline=train_pipeline,
domain_a=domain_a,
domain_b=domain_b),
val=dict(
dataroot=dataroot,
domain_a=domain_a,
domain_b=domain_b,
pipeline=test_pipeline),
test=dict(
dataroot=dataroot,
domain_a=domain_a,
domain_b=domain_b,
pipeline=test_pipeline))
optimizer = dict(
generators=dict(type='Adam', lr=0.0002, betas=(0.5, 0.999)),
discriminators=dict(type='Adam', lr=0.0002, betas=(0.5, 0.999)))
# learning policy
lr_config = dict(
policy='Linear', by_epoch=False, target_lr=0, start=135000, interval=1350)
checkpoint_config = dict(interval=10000, save_optimizer=True, by_epoch=False)
custom_hooks = [
dict(
type='MMGenVisualizationHook',
output_dir='training_samples',
res_name_list=[f'fake_{domain_a}', f'fake_{domain_b}'],
interval=5000)
]
runner = None
use_ddp_wrapper = True
total_iters = 270000
workflow = [('train', 1)]
exp_name = 'cyclegan_horse2zebra_id0'
work_dir = f'./work_dirs/experiments/{exp_name}'
num_images = 140
metrics = dict(
FID=dict(type='FID', num_images=num_images, image_shape=(3, 256, 256)),
IS=dict(
type='IS',
num_images=num_images,
image_shape=(3, 256, 256),
inception_args=dict(type='pytorch')))
evaluation = dict(
type='TranslationEvalHook',
target_domain=domain_b,
interval=10000,
metrics=[
dict(type='FID', num_images=num_images, bgr2rgb=True),
dict(
type='IS',
num_images=num_images,
inception_args=dict(type='pytorch'))
],
best_metric=['fid', 'is'])
_base_ = [
'../_base_/models/cyclegan/cyclegan_lsgan_resnet.py',
'../_base_/datasets/unpaired_imgs_256x256.py',
'../_base_/default_runtime.py'
]
domain_a = 'summer'
domain_b = 'winter'
model = dict(
default_domain=domain_b,
reachable_domains=[domain_a, domain_b],
related_domains=[domain_a, domain_b],
gen_auxiliary_loss=[
dict(
type='L1Loss',
loss_weight=10.0,
loss_name='cycle_loss',
data_info=dict(
pred=f'cycle_{domain_a}', target=f'real_{domain_a}'),
reduction='mean'),
dict(
type='L1Loss',
loss_weight=10.0,
loss_name='cycle_loss',
data_info=dict(
pred=f'cycle_{domain_b}',
target=f'real_{domain_b}',
),
reduction='mean')
])
dataroot = './data/summer2winter_yosemite'
train_pipeline = [
dict(
type='LoadImageFromFile',
io_backend='disk',
key=f'img_{domain_a}',
flag='color'),
dict(
type='LoadImageFromFile',
io_backend='disk',
key=f'img_{domain_b}',
flag='color'),
dict(
type='Resize',
keys=[f'img_{domain_a}', f'img_{domain_b}'],
scale=(286, 286),
interpolation='bicubic'),
dict(
type='Crop',
keys=[f'img_{domain_a}', f'img_{domain_b}'],
crop_size=(256, 256),
random_crop=True),
dict(type='Flip', keys=[f'img_{domain_a}'], direction='horizontal'),
dict(type='Flip', keys=[f'img_{domain_b}'], direction='horizontal'),
dict(type='RescaleToZeroOne', keys=[f'img_{domain_a}', f'img_{domain_b}']),
dict(
type='Normalize',
keys=[f'img_{domain_a}', f'img_{domain_b}'],
to_rgb=False,
mean=[0.5, 0.5, 0.5],
std=[0.5, 0.5, 0.5]),
dict(type='ImageToTensor', keys=[f'img_{domain_a}', f'img_{domain_b}']),
dict(
type='Collect',
keys=[f'img_{domain_a}', f'img_{domain_b}'],
meta_keys=[f'img_{domain_a}_path', f'img_{domain_b}_path'])
]
test_pipeline = [
dict(
type='LoadImageFromFile',
io_backend='disk',
key=f'img_{domain_a}',
flag='color'),
dict(
type='LoadImageFromFile',
io_backend='disk',
key=f'img_{domain_b}',
flag='color'),
dict(
type='Resize',
keys=[f'img_{domain_a}', f'img_{domain_b}'],
scale=(256, 256),
interpolation='bicubic'),
dict(type='RescaleToZeroOne', keys=[f'img_{domain_a}', f'img_{domain_b}']),
dict(
type='Normalize',
keys=[f'img_{domain_a}', f'img_{domain_b}'],
to_rgb=False,
mean=[0.5, 0.5, 0.5],
std=[0.5, 0.5, 0.5]),
dict(type='ImageToTensor', keys=[f'img_{domain_a}', f'img_{domain_b}']),
dict(
type='Collect',
keys=[f'img_{domain_a}', f'img_{domain_b}'],
meta_keys=[f'img_{domain_a}_path', f'img_{domain_b}_path'])
]
data = dict(
train=dict(
dataroot=dataroot,
pipeline=train_pipeline,
domain_a=domain_a,
domain_b=domain_b),
val=dict(
dataroot=dataroot,
domain_a=domain_a,
domain_b=domain_b,
pipeline=test_pipeline),
test=dict(
dataroot=dataroot,
domain_a=domain_a,
domain_b=domain_b,
pipeline=test_pipeline))
optimizer = dict(
generators=dict(type='Adam', lr=0.0002, betas=(0.5, 0.999)),
discriminators=dict(type='Adam', lr=0.0002, betas=(0.5, 0.999)))
# learning policy
lr_config = dict(
policy='Linear', by_epoch=False, target_lr=0, start=125000, interval=1250)
checkpoint_config = dict(interval=10000, save_optimizer=True, by_epoch=False)
custom_hooks = [
dict(
type='MMGenVisualizationHook',
output_dir='training_samples',
res_name_list=[f'fake_{domain_a}', f'fake_{domain_b}'],
interval=5000)
]
runner = None
use_ddp_wrapper = True
total_iters = 250000
workflow = [('train', 1)]
exp_name = 'cyclegan_summer2winter_id0'
work_dir = f'./work_dirs/experiments/{exp_name}'
# testA: 309, testB:238
num_images = 238
metrics = dict(
FID=dict(type='FID', num_images=num_images, image_shape=(3, 256, 256)),
IS=dict(
type='IS',
num_images=num_images,
image_shape=(3, 256, 256),
inception_args=dict(type='pytorch')))
evaluation = dict(
type='TranslationEvalHook',
target_domain=domain_b,
interval=10000,
metrics=[
dict(type='FID', num_images=num_images, bgr2rgb=True),
dict(
type='IS',
num_images=num_images,
inception_args=dict(type='pytorch'))
],
best_metric=['fid', 'is'])
_base_ = [
'../_base_/models/cyclegan/cyclegan_lsgan_resnet.py',
'../_base_/datasets/unpaired_imgs_256x256.py',
'../_base_/default_runtime.py'
]
train_cfg = dict(buffer_size=50)
test_cfg = None
domain_a = 'mask'
domain_b = 'photo'
model = dict(
default_domain=domain_b,
reachable_domains=[domain_a, domain_b],
related_domains=[domain_a, domain_b],
gen_auxiliary_loss=[
dict(
type='L1Loss',
loss_weight=10.0,
loss_name='cycle_loss',
data_info=dict(
pred=f'cycle_{domain_a}', target=f'real_{domain_a}'),
reduction='mean'),
dict(
type='L1Loss',
loss_weight=10.0,
loss_name='cycle_loss',
data_info=dict(
pred=f'cycle_{domain_b}',
target=f'real_{domain_b}',
),
reduction='mean'),
dict(
type='L1Loss',
loss_weight=0.5,
loss_name='id_loss',
data_info=dict(
pred=f'identity_{domain_a}', target=f'real_{domain_a}'),
reduction='mean'),
dict(
type='L1Loss',
loss_weight=0.5,
loss_name='id_loss',
data_info=dict(
pred=f'identity_{domain_b}', target=f'real_{domain_b}'),
reduction='mean')
])
dataroot = './data/unpaired_facades'
train_pipeline = [
dict(
type='LoadImageFromFile',
io_backend='disk',
key=f'img_{domain_a}',
flag='color'),
dict(
type='LoadImageFromFile',
io_backend='disk',
key=f'img_{domain_b}',
flag='color'),
dict(
type='Resize',
keys=[f'img_{domain_a}', f'img_{domain_b}'],
scale=(286, 286),
interpolation='bicubic'),
dict(
type='Crop',
keys=[f'img_{domain_a}', f'img_{domain_b}'],
crop_size=(256, 256),
random_crop=True),
dict(type='Flip', keys=[f'img_{domain_a}'], direction='horizontal'),
dict(type='Flip', keys=[f'img_{domain_b}'], direction='horizontal'),
dict(type='RescaleToZeroOne', keys=[f'img_{domain_a}', f'img_{domain_b}']),
dict(
type='Normalize',
keys=[f'img_{domain_a}', f'img_{domain_b}'],
to_rgb=False,
mean=[0.5, 0.5, 0.5],
std=[0.5, 0.5, 0.5]),
dict(type='ImageToTensor', keys=[f'img_{domain_a}', f'img_{domain_b}']),
dict(
type='Collect',
keys=[f'img_{domain_a}', f'img_{domain_b}'],
meta_keys=[f'img_{domain_a}_path', f'img_{domain_b}_path'])
]
test_pipeline = [
dict(
type='LoadImageFromFile',
io_backend='disk',
key=f'img_{domain_a}',
flag='color'),
dict(
type='LoadImageFromFile',
io_backend='disk',
key=f'img_{domain_b}',
flag='color'),
dict(
type='Resize',
keys=[f'img_{domain_a}', f'img_{domain_b}'],
scale=(256, 256),
interpolation='bicubic'),
dict(type='RescaleToZeroOne', keys=[f'img_{domain_a}', f'img_{domain_b}']),
dict(
type='Normalize',
keys=[f'img_{domain_a}', f'img_{domain_b}'],
to_rgb=False,
mean=[0.5, 0.5, 0.5],
std=[0.5, 0.5, 0.5]),
dict(type='ImageToTensor', keys=[f'img_{domain_a}', f'img_{domain_b}']),
dict(
type='Collect',
keys=[f'img_{domain_a}', f'img_{domain_b}'],
meta_keys=[f'img_{domain_a}_path', f'img_{domain_b}_path'])
]
data = dict(
train=dict(
dataroot=dataroot,
pipeline=train_pipeline,
domain_a=domain_a,
domain_b=domain_b),
val=dict(
dataroot=dataroot,
domain_a=domain_a,
domain_b=domain_b,
pipeline=test_pipeline),
test=dict(
dataroot=dataroot,
domain_a=domain_a,
domain_b=domain_b,
pipeline=test_pipeline))
optimizer = dict(
generators=dict(type='Adam', lr=0.0002, betas=(0.5, 0.999)),
discriminators=dict(type='Adam', lr=0.0002, betas=(0.5, 0.999)))
# learning policy
lr_config = dict(
policy='Linear', by_epoch=False, target_lr=0, start=40000, interval=400)
checkpoint_config = dict(interval=10000, save_optimizer=True, by_epoch=False)
custom_hooks = [
dict(
type='MMGenVisualizationHook',
output_dir='training_samples',
res_name_list=[f'fake_{domain_a}', f'fake_{domain_b}'],
interval=5000)
]
runner = None
use_ddp_wrapper = True
total_iters = 80000
workflow = [('train', 1)]
exp_name = 'cyclegan_facades'
work_dir = f'./work_dirs/experiments/{exp_name}'
num_images = 106
metrics = dict(
FID=dict(type='FID', num_images=num_images, image_shape=(3, 256, 256)),
IS=dict(
type='IS',
num_images=num_images,
image_shape=(3, 256, 256),
inception_args=dict(type='pytorch')))
evaluation = dict(
type='TranslationEvalHook',
target_domain=domain_b,
interval=10000,
metrics=[
dict(type='FID', num_images=num_images, bgr2rgb=True),
dict(
type='IS',
num_images=num_images,
inception_args=dict(type='pytorch'))
],
best_metric=['fid', 'is'])
_base_ = [
'../_base_/models/cyclegan/cyclegan_lsgan_resnet.py',
'../_base_/datasets/unpaired_imgs_256x256.py',
'../_base_/default_runtime.py'
]
domain_a = 'horse'
domain_b = 'zebra'
model = dict(
default_domain=domain_b,
reachable_domains=[domain_a, domain_b],
related_domains=[domain_a, domain_b],
gen_auxiliary_loss=[
dict(
type='L1Loss',
loss_weight=10.0,
loss_name='cycle_loss',
data_info=dict(
pred=f'cycle_{domain_a}', target=f'real_{domain_a}'),
reduction='mean'),
dict(
type='L1Loss',
loss_weight=10.0,
loss_name='cycle_loss',
data_info=dict(
pred=f'cycle_{domain_b}',
target=f'real_{domain_b}',
),
reduction='mean'),
dict(
type='L1Loss',
loss_weight=0.5,
loss_name='id_loss',
data_info=dict(
pred=f'identity_{domain_a}', target=f'real_{domain_a}'),
reduction='mean'),
dict(
type='L1Loss',
loss_weight=0.5,
loss_name='id_loss',
data_info=dict(
pred=f'identity_{domain_b}', target=f'real_{domain_b}'),
reduction='mean')
])
dataroot = './data/horse2zebra'
train_pipeline = [
dict(
type='LoadImageFromFile',
io_backend='disk',
key=f'img_{domain_a}',
flag='color'),
dict(
type='LoadImageFromFile',
io_backend='disk',
key=f'img_{domain_b}',
flag='color'),
dict(
type='Resize',
keys=[f'img_{domain_a}', f'img_{domain_b}'],
scale=(286, 286),
interpolation='bicubic'),
dict(
type='Crop',
keys=[f'img_{domain_a}', f'img_{domain_b}'],
crop_size=(256, 256),
random_crop=True),
dict(type='Flip', keys=[f'img_{domain_a}'], direction='horizontal'),
dict(type='Flip', keys=[f'img_{domain_b}'], direction='horizontal'),
dict(type='RescaleToZeroOne', keys=[f'img_{domain_a}', f'img_{domain_b}']),
dict(
type='Normalize',
keys=[f'img_{domain_a}', f'img_{domain_b}'],
to_rgb=False,
mean=[0.5, 0.5, 0.5],
std=[0.5, 0.5, 0.5]),
dict(type='ImageToTensor', keys=[f'img_{domain_a}', f'img_{domain_b}']),
dict(
type='Collect',
keys=[f'img_{domain_a}', f'img_{domain_b}'],
meta_keys=[f'img_{domain_a}_path', f'img_{domain_b}_path'])
]
test_pipeline = [
dict(
type='LoadImageFromFile',
io_backend='disk',
key=f'img_{domain_a}',
flag='color'),
dict(
type='LoadImageFromFile',
io_backend='disk',
key=f'img_{domain_b}',
flag='color'),
dict(
type='Resize',
keys=[f'img_{domain_a}', f'img_{domain_b}'],
scale=(256, 256),
interpolation='bicubic'),
dict(type='RescaleToZeroOne', keys=[f'img_{domain_a}', f'img_{domain_b}']),
dict(
type='Normalize',
keys=[f'img_{domain_a}', f'img_{domain_b}'],
to_rgb=False,
mean=[0.5, 0.5, 0.5],
std=[0.5, 0.5, 0.5]),
dict(type='ImageToTensor', keys=[f'img_{domain_a}', f'img_{domain_b}']),
dict(
type='Collect',
keys=[f'img_{domain_a}', f'img_{domain_b}'],
meta_keys=[f'img_{domain_a}_path', f'img_{domain_b}_path'])
]
data = dict(
train=dict(
dataroot=dataroot,
pipeline=train_pipeline,
domain_a=domain_a,
domain_b=domain_b),
val=dict(
dataroot=dataroot,
domain_a=domain_a,
domain_b=domain_b,
pipeline=test_pipeline),
test=dict(
dataroot=dataroot,
domain_a=domain_a,
domain_b=domain_b,
pipeline=test_pipeline))
optimizer = dict(
generators=dict(type='Adam', lr=0.0002, betas=(0.5, 0.999)),
discriminators=dict(type='Adam', lr=0.0002, betas=(0.5, 0.999)))
# learning policy
lr_config = dict(
policy='Linear', by_epoch=False, target_lr=0, start=135000, interval=1350)
checkpoint_config = dict(interval=10000, save_optimizer=True, by_epoch=False)
custom_hooks = [
dict(
type='MMGenVisualizationHook',
output_dir='training_samples',
res_name_list=[f'fake_{domain_a}', f'fake_{domain_b}'],
interval=5000)
]
runner = None
use_ddp_wrapper = True
total_iters = 270000
workflow = [('train', 1)]
exp_name = 'cyclegan_horse2zebra'
work_dir = f'./work_dirs/experiments/{exp_name}'
# testA 120, testB 140
num_images = 140
metrics = dict(
FID=dict(type='FID', num_images=num_images, image_shape=(3, 256, 256)),
IS=dict(
type='IS',
num_images=num_images,
image_shape=(3, 256, 256),
inception_args=dict(type='pytorch')))
evaluation = dict(
type='TranslationEvalHook',
target_domain=domain_b,
interval=10000,
metrics=[
dict(type='FID', num_images=num_images, bgr2rgb=True),
dict(
type='IS',
num_images=num_images,
inception_args=dict(type='pytorch'))
],
best_metric=['fid', 'is'])
_base_ = [
'../_base_/models/cyclegan/cyclegan_lsgan_resnet.py',
'../_base_/datasets/unpaired_imgs_256x256.py',
'../_base_/default_runtime.py'
]
domain_a = 'summer'
domain_b = 'winter'
model = dict(
default_domain=domain_b,
reachable_domains=[domain_a, domain_b],
related_domains=[domain_a, domain_b],
gen_auxiliary_loss=[
dict(
type='L1Loss',
loss_weight=10.0,
loss_name='cycle_loss',
data_info=dict(
pred=f'cycle_{domain_a}', target=f'real_{domain_a}'),
reduction='mean'),
dict(
type='L1Loss',
loss_weight=10.0,
loss_name='cycle_loss',
data_info=dict(
pred=f'cycle_{domain_b}',
target=f'real_{domain_b}',
),
reduction='mean'),
dict(
type='L1Loss',
loss_weight=0.5,
loss_name='id_loss',
data_info=dict(
pred=f'identity_{domain_a}', target=f'real_{domain_a}'),
reduction='mean'),
dict(
type='L1Loss',
loss_weight=0.5,
loss_name='id_loss',
data_info=dict(
pred=f'identity_{domain_b}', target=f'real_{domain_b}'),
reduction='mean')
])
dataroot = './data/summer2winter_yosemite'
train_pipeline = [
dict(
type='LoadImageFromFile',
io_backend='disk',
key=f'img_{domain_a}',
flag='color'),
dict(
type='LoadImageFromFile',
io_backend='disk',
key=f'img_{domain_b}',
flag='color'),
dict(
type='Resize',
keys=[f'img_{domain_a}', f'img_{domain_b}'],
scale=(286, 286),
interpolation='bicubic'),
dict(
type='Crop',
keys=[f'img_{domain_a}', f'img_{domain_b}'],
crop_size=(256, 256),
random_crop=True),
dict(type='Flip', keys=[f'img_{domain_a}'], direction='horizontal'),
dict(type='Flip', keys=[f'img_{domain_b}'], direction='horizontal'),
dict(type='RescaleToZeroOne', keys=[f'img_{domain_a}', f'img_{domain_b}']),
dict(
type='Normalize',
keys=[f'img_{domain_a}', f'img_{domain_b}'],
to_rgb=False,
mean=[0.5, 0.5, 0.5],
std=[0.5, 0.5, 0.5]),
dict(type='ImageToTensor', keys=[f'img_{domain_a}', f'img_{domain_b}']),
dict(
type='Collect',
keys=[f'img_{domain_a}', f'img_{domain_b}'],
meta_keys=[f'img_{domain_a}_path', f'img_{domain_b}_path'])
]
test_pipeline = [
dict(
type='LoadImageFromFile',
io_backend='disk',
key=f'img_{domain_a}',
flag='color'),
dict(
type='LoadImageFromFile',
io_backend='disk',
key=f'img_{domain_b}',
flag='color'),
dict(
type='Resize',
keys=[f'img_{domain_a}', f'img_{domain_b}'],
scale=(256, 256),
interpolation='bicubic'),
dict(type='RescaleToZeroOne', keys=[f'img_{domain_a}', f'img_{domain_b}']),
dict(
type='Normalize',
keys=[f'img_{domain_a}', f'img_{domain_b}'],
to_rgb=False,
mean=[0.5, 0.5, 0.5],
std=[0.5, 0.5, 0.5]),
dict(type='ImageToTensor', keys=[f'img_{domain_a}', f'img_{domain_b}']),
dict(
type='Collect',
keys=[f'img_{domain_a}', f'img_{domain_b}'],
meta_keys=[f'img_{domain_a}_path', f'img_{domain_b}_path'])
]
data = dict(
train=dict(
dataroot=dataroot,
pipeline=train_pipeline,
domain_a=domain_a,
domain_b=domain_b),
val=dict(
dataroot=dataroot,
domain_a=domain_a,
domain_b=domain_b,
pipeline=test_pipeline),
test=dict(
dataroot=dataroot,
domain_a=domain_a,
domain_b=domain_b,
pipeline=test_pipeline))
optimizer = dict(
generators=dict(type='Adam', lr=0.0002, betas=(0.5, 0.999)),
discriminators=dict(type='Adam', lr=0.0002, betas=(0.5, 0.999)))
# learning policy
lr_config = dict(
policy='Linear', by_epoch=False, target_lr=0, start=125000, interval=1250)
checkpoint_config = dict(interval=10000, save_optimizer=True, by_epoch=False)
custom_hooks = [
dict(
type='MMGenVisualizationHook',
output_dir='training_samples',
res_name_list=[f'fake_{domain_a}', f'fake_{domain_b}'],
interval=5000)
]
runner = None
use_ddp_wrapper = True
total_iters = 250000
workflow = [('train', 1)]
exp_name = 'cyclegan_summer2winter'
work_dir = f'./work_dirs/experiments/{exp_name}'
# testA: 309, testB:238
num_images = 238
metrics = dict(
FID=dict(type='FID', num_images=num_images, image_shape=(3, 256, 256)),
IS=dict(
type='IS',
num_images=num_images,
image_shape=(3, 256, 256),
inception_args=dict(type='pytorch')))
evaluation = dict(
type='TranslationEvalHook',
target_domain=domain_b,
interval=10000,
metrics=[
dict(type='FID', num_images=num_images, bgr2rgb=True),
dict(
type='IS',
num_images=num_images,
inception_args=dict(type='pytorch'))
],
best_metric=['fid', 'is'])
Collections:
- Metadata:
Architecture:
- 'CycleGAN: Unpaired Image-to-Image Translation Using Cycle-Consistent Adversarial
Networks'
Name: 'CycleGAN: Unpaired Image-to-Image Translation Using Cycle-Consistent Adversarial
Networks'
Paper:
- https://openaccess.thecvf.com/content_iccv_2017/html/Zhu_Unpaired_Image-To-Image_Translation_ICCV_2017_paper.html
README: configs/cyclegan/README.md
Models:
- Config: https://github.com/open-mmlab/mmgeneration/tree/master/configs/cyclegan/cyclegan_lsgan_resnet_in_facades_b1x1_80k.py
In Collection: 'CycleGAN: Unpaired Image-to-Image Translation Using Cycle-Consistent
Adversarial Networks'
Metadata:
Training Data: FACADES
Name: cyclegan_lsgan_resnet_in_facades_b1x1_80k
Results:
- Dataset: FACADES
Metrics:
FID: 124.8033
IS: 1.792
Task: Image2Image Translation
Weights: https://download.openmmlab.com/mmgen/cyclegan/refactor/cyclegan_lsgan_resnet_in_1x1_80k_facades_20210902_165905-5e2c0876.pth
- Config: https://github.com/open-mmlab/mmgeneration/tree/master/configs/cyclegan/cyclegan_lsgan_id0_resnet_in_facades_b1x1_80k.py
In Collection: 'CycleGAN: Unpaired Image-to-Image Translation Using Cycle-Consistent
Adversarial Networks'
Metadata:
Training Data: FACADES
Name: cyclegan_lsgan_id0_resnet_in_facades_b1x1_80k
Results:
- Dataset: FACADES
Metrics:
FID: 125.1694
IS: 1.905
Task: Image2Image Translation
Weights: https://download.openmmlab.com/mmgen/cyclegan/refactor/cyclegan_lsgan_id0_resnet_in_1x1_80k_facades_convert-bgr_20210902_164411-d8e72b45.pth
- Config: https://github.com/open-mmlab/mmgeneration/tree/master/configs/cyclegan/cyclegan_lsgan_resnet_in_summer2winter_b1x1_250k.py
In Collection: 'CycleGAN: Unpaired Image-to-Image Translation Using Cycle-Consistent
Adversarial Networks'
Metadata:
Training Data: SUMMER2WINTER
Name: cyclegan_lsgan_resnet_in_summer2winter_b1x1_250k
Results:
- Dataset: SUMMER2WINTER
Metrics:
FID: 83.7177
IS: 2.771
Task: Image2Image Translation
Weights: https://download.openmmlab.com/mmgen/cyclegan/refactor/cyclegan_lsgan_resnet_in_1x1_246200_summer2winter_convert-bgr_20210902_165932-fcf08dc1.pth
- Config: https://github.com/open-mmlab/mmgeneration/tree/master/configs/cyclegan/cyclegan_lsgan_id0_resnet_in_summer2winter_b1x1_250k.py
In Collection: 'CycleGAN: Unpaired Image-to-Image Translation Using Cycle-Consistent
Adversarial Networks'
Metadata:
Training Data: SUMMER2WINTER
Name: cyclegan_lsgan_id0_resnet_in_summer2winter_b1x1_250k
Results:
- Dataset: SUMMER2WINTER
Metrics:
FID: 83.1418
IS: 2.72
Task: Image2Image Translation
Weights: https://download.openmmlab.com/mmgen/cyclegan/refactor/cyclegan_lsgan_id0_resnet_in_1x1_246200_summer2winter_convert-bgr_20210902_165640-8b825581.pth
- Config: https://github.com/open-mmlab/mmgeneration/tree/master/configs/cyclegan/cyclegan_lsgan_resnet_in_summer2winter_b1x1_250k.py
In Collection: 'CycleGAN: Unpaired Image-to-Image Translation Using Cycle-Consistent
Adversarial Networks'
Metadata:
Training Data: SUMMER2WINTER
Name: cyclegan_lsgan_resnet_in_summer2winter_b1x1_250k
Results:
- Dataset: SUMMER2WINTER
Metrics:
FID: 72.8025
IS: 3.129
Task: Image2Image Translation
Weights: https://download.openmmlab.com/mmgen/cyclegan/refactor/cyclegan_lsgan_resnet_in_1x1_246200_summer2winter_convert-bgr_20210902_165932-fcf08dc1.pth
- Config: https://github.com/open-mmlab/mmgeneration/tree/master/configs/cyclegan/cyclegan_lsgan_id0_resnet_in_summer2winter_b1x1_250k.py
In Collection: 'CycleGAN: Unpaired Image-to-Image Translation Using Cycle-Consistent
Adversarial Networks'
Metadata:
Training Data: SUMMER2WINTER
Name: cyclegan_lsgan_id0_resnet_in_summer2winter_b1x1_250k
Results:
- Dataset: SUMMER2WINTER
Metrics:
FID: 73.5001
IS: 3.107
Task: Image2Image Translation
Weights: https://download.openmmlab.com/mmgen/cyclegan/refactor/cyclegan_lsgan_id0_resnet_in_1x1_246200_summer2winter_convert-bgr_20210902_165640-8b825581.pth
- Config: https://github.com/open-mmlab/mmgeneration/tree/master/configs/cyclegan/cyclegan_lsgan_resnet_in_horse2zebra_b1x1_270k.py
In Collection: 'CycleGAN: Unpaired Image-to-Image Translation Using Cycle-Consistent
Adversarial Networks'
Metadata:
Training Data: HORSE2ZEBRA
Name: cyclegan_lsgan_resnet_in_horse2zebra_b1x1_270k
Results:
- Dataset: HORSE2ZEBRA
Metrics:
FID: 64.5225
IS: 1.418
Task: Image2Image Translation
Weights: https://download.openmmlab.com/mmgen/cyclegan/refactor/cyclegan_lsgan_resnet_in_1x1_266800_horse2zebra_convert-bgr_20210902_170004-a32c733a.pth
- Config: https://github.com/open-mmlab/mmgeneration/tree/master/configs/cyclegan/cyclegan_lsgan_id0_resnet_in_horse2zebra_b1x1_270k.py
In Collection: 'CycleGAN: Unpaired Image-to-Image Translation Using Cycle-Consistent
Adversarial Networks'
Metadata:
Training Data: HORSE2ZEBRA
Name: cyclegan_lsgan_id0_resnet_in_horse2zebra_b1x1_270k
Results:
- Dataset: HORSE2ZEBRA
Metrics:
FID: 74.777
IS: 1.542
Task: Image2Image Translation
Weights: https://download.openmmlab.com/mmgen/cyclegan/refactor/cyclegan_lsgan_id0_resnet_in_1x1_266800_horse2zebra_convert-bgr_20210902_165724-77c9c806.pth
- Config: https://github.com/open-mmlab/mmgeneration/tree/master/configs/cyclegan/cyclegan_lsgan_resnet_in_horse2zebra_b1x1_270k.py
In Collection: 'CycleGAN: Unpaired Image-to-Image Translation Using Cycle-Consistent
Adversarial Networks'
Metadata:
Training Data: HORSE2ZEBRA
Name: cyclegan_lsgan_resnet_in_horse2zebra_b1x1_270k
Results:
- Dataset: HORSE2ZEBRA
Metrics:
FID: 141.1517
IS: 3.154
Task: Image2Image Translation
Weights: https://download.openmmlab.com/mmgen/cyclegan/refactor/cyclegan_lsgan_resnet_in_1x1_266800_horse2zebra_convert-bgr_20210902_170004-a32c733a.pth
- Config: https://github.com/open-mmlab/mmgeneration/tree/master/configs/cyclegan/cyclegan_lsgan_id0_resnet_in_horse2zebra_b1x1_270k.py
In Collection: 'CycleGAN: Unpaired Image-to-Image Translation Using Cycle-Consistent
Adversarial Networks'
Metadata:
Training Data: HORSE2ZEBRA
Name: cyclegan_lsgan_id0_resnet_in_horse2zebra_b1x1_270k
Results:
- Dataset: HORSE2ZEBRA
Metrics:
FID: 134.3728
IS: 3.091
Task: Image2Image Translation
Weights: https://download.openmmlab.com/mmgen/cyclegan/refactor/cyclegan_lsgan_id0_resnet_in_1x1_266800_horse2zebra_convert-bgr_20210902_165724-77c9c806.pth
# Unsupervised Representation Learning with Deep Convolutional Generative Adversarial Networks
> [Unsupervised Representation Learning with Deep Convolutional Generative Adversarial Networks](https://arxiv.org/abs/1511.06434)
<!-- [ALGORITHM] -->
## Abstract
<!-- [ABSTRACT] -->
In recent years, supervised learning with convolutional networks (CNNs) has seen huge adoption in computer vision applications. Comparatively, unsupervised learning with CNNs has received less attention. In this work we hope to help bridge the gap between the success of CNNs for supervised learning and unsupervised learning. We introduce a class of CNNs called deep convolutional generative adversarial networks (DCGANs), that have certain architectural constraints, and demonstrate that they are a strong candidate for unsupervised learning. Training on various image datasets, we show convincing evidence that our deep convolutional adversarial pair learns a hierarchy of representations from object parts to scenes in both the generator and discriminator. Additionally, we use the learned features for novel tasks - demonstrating their applicability as general image representations.
<!-- [IMAGE] -->
<div align=center>
<img src="https://user-images.githubusercontent.com/28132635/143050281-60808c3f-81d0-4fae-9071-f4c297116b2f.JPG"/>
</div>
## Results and models
<div align="center">
<b> DCGAN 64x64, CelebA-Cropped</b>
<br/>
<img src="https://user-images.githubusercontent.com/12726765/113991928-871f9b80-9885-11eb-920e-d389c603fed8.png" width="800"/>
</div>
| Models | Dataset | SWD | MS-SSIM | Config | Download |
| :---------: | :------------: | :----------------------: | :-----: | :---------------------------------------------------------------------: | :-----------------------------------------------------------------------: |
| DCGAN 64x64 | MNIST (64x64) | 21.16, 4.4, 8.41/11.32 | 0.1395 | [config](https://github.com/open-mmlab/mmgeneration/tree/master/configs/dcgan/dcgan_mnist-64_b128x1_Glr4e-4_Dlr1e-4_5k.py) | [model](https://download.openmmlab.com//mmgen/dcgan/dcgan_mnist-64_b128x1_Glr4e-4_Dlr1e-4_5k_20210512_163926-207a1eaf.pth) \| [log](https://download.openmmlab.com//mmgen/dcgan/dcgan_mnist-64_b128x1_Glr4e-4_Dlr1e-4_5k_20210512_163926-207a1eaf.json) |
| DCGAN 64x64 | CelebA-Cropped | 8.93,10.53,50.32/23.26 | 0.2899 | [config](https://github.com/open-mmlab/mmgeneration/tree/master/configs/dcgan/dcgan_celeba-cropped_64_b128x1_300k.py) | [model](https://download.openmmlab.com/mmgen/dcgan/dcgan_celeba-cropped_64_b128x1_300kiter_20210408_161607-1f8a2277.pth) \| [log](https://download.openmmlab.com/mmgen/dcgan/dcgan_celeba-cropped_64_b128x1_300kiter_20210408_161607-1f8a2277.json) |
| DCGAN 64x64 | LSUN-Bedroom | 42.79, 34.55, 98.46/58.6 | 0.2095 | [config](https://github.com/open-mmlab/mmgeneration/tree/master/configs/dcgan/dcgan_lsun-bedroom_64x64_b128x1_5e.py) | [model](https://download.openmmlab.com/mmgen/dcgan/dcgan_lsun-bedroom_64_b128x1_5e_20210408_161713-117c498b.pth) \| [log](https://download.openmmlab.com/mmgen/dcgan/dcgan_lsun-bedroom_64_b128x1_5e_20210408_161713-117c498b.json) |
## Citation
```latex
@article{radford2015unsupervised,
title={Unsupervised representation learning with deep convolutional generative adversarial networks},
author={Radford, Alec and Metz, Luke and Chintala, Soumith},
journal={arXiv preprint arXiv:1511.06434},
year={2015},
url={https://arxiv.org/abs/1511.06434},
}
```
_base_ = [
'../_base_/models/dcgan/dcgan_64x64.py',
'../_base_/datasets/unconditional_imgs_64x64.py',
'../_base_/default_runtime.py'
]
# define dataset
# you must set `samples_per_gpu` and `imgs_root`
data = dict(
samples_per_gpu=128,
train=dict(imgs_root='data/celeba-cropped/cropped_images_aligned_png'))
# adjust running config
lr_config = None
checkpoint_config = dict(interval=10000, by_epoch=False, max_keep_ckpts=20)
custom_hooks = [
dict(
type='VisualizeUnconditionalSamples',
output_dir='training_samples',
interval=10000)
]
total_iters = 300002
# use ddp wrapper for faster training
use_ddp_wrapper = True
find_unused_parameters = False
runner = dict(
type='DynamicIterBasedRunner',
is_dynamic_ddp=False, # Note that this flag should be False.
pass_training_status=True)
metrics = dict(
ms_ssim10k=dict(type='MS_SSIM', num_images=10000),
swd16k=dict(type='SWD', num_images=16384, image_shape=(3, 64, 64)))
_base_ = [
'../_base_/models/dcgan/dcgan_64x64.py',
'../_base_/datasets/unconditional_imgs_64x64.py',
'../_base_/default_runtime.py'
]
# define dataset
# you must set `samples_per_gpu` and `imgs_root`
data = dict(
samples_per_gpu=128, train=dict(imgs_root='data/lsun/bedroom_train'))
# adjust running config
lr_config = None
checkpoint_config = dict(interval=100000, by_epoch=False)
custom_hooks = [
dict(
type='VisualizeUnconditionalSamples',
output_dir='training_samples',
interval=10000)
]
total_iters = 1500002
metrics = dict(
ms_ssim10k=dict(type='MS_SSIM', num_images=10000),
swd16k=dict(type='SWD', num_images=16384, image_shape=(3, 64, 64)))
_base_ = [
'../_base_/models/dcgan/dcgan_64x64.py',
'../_base_/datasets/unconditional_imgs_64x64.py',
'../_base_/default_runtime.py'
]
# output single channel
model = dict(generator=dict(out_channels=1), discriminator=dict(in_channels=1))
# define dataset
# modify train_pipeline to load gray scale images
train_pipeline = [
dict(
type='LoadImageFromFile',
key='real_img',
flag='grayscale',
io_backend='disk'),
dict(type='Resize', keys=['real_img'], scale=(64, 64)),
dict(
type='Normalize',
keys=['real_img'],
mean=[127.5],
std=[127.5],
to_rgb=False),
dict(type='ImageToTensor', keys=['real_img']),
dict(type='Collect', keys=['real_img'], meta_keys=['real_img_path'])
]
# you must set `samples_per_gpu` and `imgs_root`
data = dict(
samples_per_gpu=128,
train=dict(imgs_root='data/mnist_64/train', pipeline=train_pipeline),
val=None)
# adjust running config
lr_config = None
checkpoint_config = dict(interval=500, by_epoch=False)
custom_hooks = [
dict(
type='VisualizeUnconditionalSamples',
output_dir='training_samples',
interval=100)
]
log_config = dict(
interval=100, hooks=[
dict(type='TextLoggerHook'),
])
total_iters = 5000
metrics = dict(
ms_ssim10k=dict(type='MS_SSIM', num_images=10000),
swd16k=dict(type='SWD', num_images=16384, image_shape=(3, 64, 64)))
optimizer = dict(
generator=dict(type='Adam', lr=0.0004, betas=(0.5, 0.999)),
discriminator=dict(type='Adam', lr=0.0001, betas=(0.5, 0.999)))
Collections:
- Metadata:
Architecture:
- Unsupervised Representation Learning with Deep Convolutional Generative Adversarial
Networks
Name: Unsupervised Representation Learning with Deep Convolutional Generative Adversarial
Networks
Paper:
- https://arxiv.org/abs/1511.06434
README: configs/dcgan/README.md
Models:
- Config: https://github.com/open-mmlab/mmgeneration/tree/master/configs/dcgan/dcgan_mnist-64_b128x1_Glr4e-4_Dlr1e-4_5k.py
In Collection: Unsupervised Representation Learning with Deep Convolutional Generative
Adversarial Networks
Metadata:
Training Data: Others
Name: dcgan_mnist-64_b128x1_Glr4e-4_Dlr1e-4_5k
Results:
- Dataset: Others
Metrics:
MS-SSIM: 0.1395
SWD: 21.16, 4.4, 8.41/11.32
Task: Unconditional GANs
Weights: https://download.openmmlab.com//mmgen/dcgan/dcgan_mnist-64_b128x1_Glr4e-4_Dlr1e-4_5k_20210512_163926-207a1eaf.pth
- Config: https://github.com/open-mmlab/mmgeneration/tree/master/configs/dcgan/dcgan_celeba-cropped_64_b128x1_300k.py
In Collection: Unsupervised Representation Learning with Deep Convolutional Generative
Adversarial Networks
Metadata:
Training Data: CELEBA
Name: dcgan_celeba-cropped_64_b128x1_300k
Results:
- Dataset: CELEBA
Metrics:
MS-SSIM: 0.2899
SWD: 8.93,10.53,50.32/23.26
Task: Unconditional GANs
Weights: https://download.openmmlab.com/mmgen/dcgan/dcgan_celeba-cropped_64_b128x1_300kiter_20210408_161607-1f8a2277.pth
- Config: https://github.com/open-mmlab/mmgeneration/tree/master/configs/dcgan/dcgan_lsun-bedroom_64x64_b128x1_5e.py
In Collection: Unsupervised Representation Learning with Deep Convolutional Generative
Adversarial Networks
Metadata:
Training Data: LSUN
Name: dcgan_lsun-bedroom_64x64_b128x1_5e
Results:
- Dataset: LSUN
Metrics:
MS-SSIM: 0.2095
SWD: 42.79, 34.55, 98.46/58.6
Task: Unconditional GANs
Weights: https://download.openmmlab.com/mmgen/dcgan/dcgan_lsun-bedroom_64_b128x1_5e_20210408_161713-117c498b.pth
# GGAN
> [Geometric GAN](https://arxiv.org/abs/1705.02894)
<!-- [ALGORITHM] -->
## Abstract
<!-- [ABSTRACT] -->
Generative Adversarial Nets (GANs) represent an important milestone for effective generative models, which has inspired numerous variants seemingly different from each other. One of the main contributions of this paper is to reveal a unified geometric structure in GAN and its variants. Specifically, we show that the adversarial generative model training can be decomposed into three geometric steps: separating hyperplane search, discriminator parameter update away from the separating hyperplane, and the generator update along the normal vector direction of the separating hyperplane. This geometric intuition reveals the limitations of the existing approaches and leads us to propose a new formulation called geometric GAN using SVM separating hyperplane that maximizes the margin. Our theoretical analysis shows that the geometric GAN converges to a Nash equilibrium between the discriminator and generator. In addition, extensive numerical results show that the superior performance of geometric GAN.
<!-- [IMAGE] -->
<div align=center>
<img src="https://user-images.githubusercontent.com/28132635/143051600-6a3e5c37-259e-4b77-a847-c6ad1eafa65f.JPG"/>
</div>
## Results and models
<div align="center">
<b> GGAN 64x64, CelebA-Cropped</b>
<br/>
<img src="https://user-images.githubusercontent.com/22982797/116691577-9067d800-a9ed-11eb-8ea4-be79884d8502.PNG" width="800"/>
</div>
| Models | Dataset | SWD | MS-SSIM | FID | Config | Download |
| :----------: | :------------: | :-----------------------------: | :-----: | :-----: | :-------------------------------------------------------------: | :----------------------------------------------------------------: |
| GGAN 64x64 | CelebA-Cropped | 11.18, 12.21, 39.16/20.85 | 0.3318 | 20.1797 | [config](https://github.com/open-mmlab/mmgeneration/tree/master/configs/ggan/ggan_celeba-cropped_dcgan-archi_lr-1e-3_64_b128x1_12m.py) | [model](https://download.openmmlab.com/mmgen/ggan/ggan_celeba-cropped_dcgan-archi_lr-1e-3_64_b128x1_12m.pth) \| [log](https://download.openmmlab.com/mmgen/ggan/ggan_celeba-cropped_dcgan-archi_lr-1e-3_64_b128x1_12m_20210430_113839.log.json) |
| GGAN 128x128 | CelebA-Cropped | 9.81, 11.29, 19.22, 47.79/22.03 | 0.3149 | 18.7647 | [config](https://github.com/open-mmlab/mmgeneration/tree/master/configs/ggan/ggan_celeba-cropped_dcgan-archi_lr-1e-4_128_b64x1_10m.py) | [model](https://download.openmmlab.com/mmgen/ggan/ggan_celeba-cropped_dcgan-archi_lr-1e-4_128_b64x1_10m_20210430_143027-516423dc.pth) \| [log](https://download.openmmlab.com/mmgen/ggan/ggan_celeba-cropped_dcgan-archi_lr-1e-4_128_b64x1_10m_20210423_154258.log.json) |
| GGAN 64x64 | LSUN-Bedroom | 9.1, 6.2, 12.27/9.19 | 0.0649 | 85.6629 | [config](https://github.com/open-mmlab/mmgeneration/tree/master/configs/ggan/ggan_lsun-bedroom_lsgan_archi_lr-1e-4_64_b128x1_20m.py) | [model](https://download.openmmlab.com/mmgen/ggan/ggan_lsun-bedroom_lsgan_archi_lr-1e-4_64_b128x1_20m_20210430_143114-5d99b76c.pth) \| [log](https://download.openmmlab.com/mmgen/ggan/ggan_lsun-bedroom_lsgan_archi_lr-1e-4_64_b128x1_20m_20210428_202027.log.json) |
Note: In the original implementation of [GGAN](https://github.com/lim0606/pytorch-geometric-gan), they set `G_iters` to 10. However our framework does not support `G_iters` currently, so we dropped the settings in the original implementation and conducted several experiments with our own settings. We have shown above the experiment results with the lowest `fid` score. \
Original settings and our settings:
| Models | Dataset | Architecture | optimizer | lr_G | lr_D | G_iters | D_iters |
| :----------------: | :------------: | :----------: | :-------: | :----: | :----: | :-----: | :-----: |
| GGAN(origin) 64x64 | CelebA-Cropped | dcgan-archi | RMSprop | 0.0002 | 0.0002 | 10 | 1 |
| GGAN(ours) 64x64 | CelebA-Cropped | dcgan-archi | Adam | 0.001 | 0.001 | 1 | 1 |
| GGAN(origin) 64x64 | LSUN-Bedroom | dcgan-archi | RMSprop | 0.0002 | 0.0002 | 10 | 1 |
| GGAN(ours) 64x64 | LSUN-Bedroom | lsgan-archi | Adam | 0.0001 | 0.0001 | 1 | 1 |
## Citation
```latex
@article{lim2017geometric,
title={Geometric gan},
author={Lim, Jae Hyun and Ye, Jong Chul},
journal={arXiv preprint arXiv:1705.02894},
year={2017},
url={https://arxiv.org/abs/1705.02894},
}
```
_base_ = [
'../_base_/models/dcgan/dcgan_64x64.py',
'../_base_/datasets/unconditional_imgs_64x64.py',
'../_base_/default_runtime.py'
]
model = dict(
discriminator=dict(output_scale=4, out_channels=1),
gan_loss=dict(type='GANLoss', gan_type='hinge'))
# define dataset
# you must set `samples_per_gpu` and `imgs_root`
data = dict(
samples_per_gpu=128,
train=dict(imgs_root='./data/celeba/cropped_images_aligned_png/'),
val=dict(imgs_root='./data/celeba/cropped_images_aligned_png/'))
optimizer = dict(
generator=dict(type='Adam', lr=0.001, betas=(0.5, 0.99)),
discriminator=dict(type='Adam', lr=0.001, betas=(0.5, 0.99)))
# adjust running config
lr_config = None
checkpoint_config = dict(interval=10000, by_epoch=False, max_keep_ckpts=20)
custom_hooks = [
dict(
type='VisualizeUnconditionalSamples',
output_dir='training_samples',
interval=5000)
]
evaluation = dict(
type='GenerativeEvalHook',
interval=10000,
metrics=dict(
type='FID', num_images=50000, inception_pkl=None, bgr2rgb=True),
sample_kwargs=dict(sample_model='orig'))
total_iters = 100000
# use ddp wrapper for faster training
use_ddp_wrapper = True
find_unused_parameters = False
runner = dict(
type='DynamicIterBasedRunner',
is_dynamic_ddp=False, # Note that this flag should be False.
pass_training_status=True)
metrics = dict(
ms_ssim10k=dict(type='MS_SSIM', num_images=10000),
swd16k=dict(type='SWD', num_images=16384, image_shape=(3, 64, 64)),
fid50k=dict(type='FID', num_images=50000, inception_pkl=None))
_base_ = [
'../_base_/models/dcgan/dcgan_128x128.py',
'../_base_/datasets/unconditional_imgs_128x128.py',
'../_base_/default_runtime.py'
]
model = dict(
discriminator=dict(output_scale=4, out_channels=1),
gan_loss=dict(type='GANLoss', gan_type='hinge'))
# define dataset
# you must set `samples_per_gpu` and `imgs_root`
data = dict(
samples_per_gpu=64,
train=dict(imgs_root='./data/celeba/cropped_images_aligned_png/'),
val=dict(imgs_root='./data/celeba/cropped_images_aligned_png/'))
optimizer = dict(
generator=dict(type='Adam', lr=0.0001, betas=(0.5, 0.99)),
discriminator=dict(type='Adam', lr=0.0001, betas=(0.5, 0.99)))
# adjust running config
lr_config = None
checkpoint_config = dict(interval=10000, by_epoch=False, max_keep_ckpts=20)
custom_hooks = [
dict(
type='VisualizeUnconditionalSamples',
output_dir='training_samples',
interval=5000)
]
evaluation = dict(
type='GenerativeEvalHook',
interval=10000,
metrics=dict(
type='FID', num_images=50000, inception_pkl=None, bgr2rgb=True),
sample_kwargs=dict(sample_model='orig'))
total_iters = 160000
# use ddp wrapper for faster training
use_ddp_wrapper = True
find_unused_parameters = False
runner = dict(
type='DynamicIterBasedRunner',
is_dynamic_ddp=False, # Note that this flag should be False.
pass_training_status=True)
metrics = dict(
ms_ssim10k=dict(type='MS_SSIM', num_images=10000),
swd16k=dict(type='SWD', num_images=16384, image_shape=(3, 128, 128)),
fid50k=dict(type='FID', num_images=50000, inception_pkl=None))
_base_ = [
'../_base_/datasets/unconditional_imgs_64x64.py',
'../_base_/default_runtime.py'
]
model = dict(
type='StaticUnconditionalGAN',
generator=dict(type='LSGANGenerator', output_scale=64),
discriminator=dict(type='LSGANDiscriminator', input_scale=64),
gan_loss=dict(type='GANLoss', gan_type='hinge'))
train_cfg = dict(disc_steps=1)
test_cfg = None
# define dataset
# you must set `samples_per_gpu` and `imgs_root`
data = dict(
samples_per_gpu=128,
train=dict(imgs_root='data/lsun/bedroom_train'),
val=dict(imgs_root='data/lsun/bedroom_train'))
optimizer = dict(
generator=dict(type='Adam', lr=0.0001, betas=(0.5, 0.99)),
discriminator=dict(type='Adam', lr=0.0001, betas=(0.5, 0.99)))
# adjust running config
lr_config = None
checkpoint_config = dict(interval=10000, by_epoch=False, max_keep_ckpts=20)
custom_hooks = [
dict(
type='VisualizeUnconditionalSamples',
output_dir='training_samples',
interval=5000)
]
evaluation = dict(
type='GenerativeEvalHook',
interval=10000,
metrics=dict(
type='FID', num_images=50000, inception_pkl=None, bgr2rgb=True),
sample_kwargs=dict(sample_model='orig'))
total_iters = 160000
# use ddp wrapper for faster training
use_ddp_wrapper = True
find_unused_parameters = False
runner = dict(
type='DynamicIterBasedRunner',
is_dynamic_ddp=False, # Note that this flag should be False.
pass_training_status=True)
metrics = dict(
ms_ssim10k=dict(type='MS_SSIM', num_images=10000),
swd16k=dict(type='SWD', num_images=16384, image_shape=(3, 64, 64)),
fid50k=dict(type='FID', num_images=50000, inception_pkl=None))
Collections:
- Metadata:
Architecture:
- GGAN
Name: GGAN
Paper:
- https://arxiv.org/abs/1705.02894
README: configs/ggan/README.md
Models:
- Config: https://github.com/open-mmlab/mmgeneration/tree/master/configs/ggan/ggan_celeba-cropped_dcgan-archi_lr-1e-3_64_b128x1_12m.py
In Collection: GGAN
Metadata:
Training Data: CELEBA
Name: ggan_celeba-cropped_dcgan-archi_lr-1e-3_64_b128x1_12m
Results:
- Dataset: CELEBA
Metrics:
FID: 20.1797
MS-SSIM: 0.3318
SWD: 11.18, 12.21, 39.16/20.85
Task: Unconditional GANs
Weights: https://download.openmmlab.com/mmgen/ggan/ggan_celeba-cropped_dcgan-archi_lr-1e-3_64_b128x1_12m.pth
- Config: https://github.com/open-mmlab/mmgeneration/tree/master/configs/ggan/ggan_celeba-cropped_dcgan-archi_lr-1e-4_128_b64x1_10m.py
In Collection: GGAN
Metadata:
Training Data: CELEBA
Name: ggan_celeba-cropped_dcgan-archi_lr-1e-4_128_b64x1_10m
Results:
- Dataset: CELEBA
Metrics:
FID: 18.7647
MS-SSIM: 0.3149
SWD: 9.81, 11.29, 19.22, 47.79/22.03
Task: Unconditional GANs
Weights: https://download.openmmlab.com/mmgen/ggan/ggan_celeba-cropped_dcgan-archi_lr-1e-4_128_b64x1_10m_20210430_143027-516423dc.pth
- Config: https://github.com/open-mmlab/mmgeneration/tree/master/configs/ggan/ggan_lsun-bedroom_lsgan_archi_lr-1e-4_64_b128x1_20m.py
In Collection: GGAN
Metadata:
Training Data: LSUN
Name: ggan_lsun-bedroom_lsgan_archi_lr-1e-4_64_b128x1_20m
Results:
- Dataset: LSUN
Metrics:
FID: 85.6629
MS-SSIM: 0.0649
SWD: 9.1, 6.2, 12.27/9.19
Task: Unconditional GANs
Weights: https://download.openmmlab.com/mmgen/ggan/ggan_lsun-bedroom_lsgan_archi_lr-1e-4_64_b128x1_20m_20210430_143114-5d99b76c.pth
# Improved-DDPM
> [Improved Denoising Diffusion Probabilistic Models](https://arxiv.org/abs/2102.09672)
<!-- [ALGORITHM] -->
## Abstract
<!-- [ABSTRACT] -->
Denoising diffusion probabilistic models (DDPM) are a class of generative models which have recently been shown to produce excellent samples. We show that with a few simple modifications, DDPMs can also achieve competitive log-likelihoods while maintaining high sample quality. Additionally, we find that learning variances of the reverse diffusion process allows sampling with an order of magnitude fewer forward passes with a negligible difference in sample quality, which is important for the practical deployment of these models. We additionally use precision and recall to compare how well DDPMs and GANs cover the target distribution. Finally, we show that the sample quality and likelihood of these models scale smoothly with model capacity and training compute, making them easily scalable. We release our code at this https URL.
<!-- [IMAGE] -->
<div align=center>
<img src="https://user-images.githubusercontent.com/28132635/147938745-a5ae5b6f-b0e1-4db6-9768-44c1c6c43755.png"/>
</div>
## Results and Models
<div align="center">
<b> Denoising process of Improve-DDPM trained on CIFAR10 and ImageNet-64x64</b>
<br/>
<img src="https://user-images.githubusercontent.com/28132635/148009529-46d3fc28-eaeb-4ae9-8831-fa9edea334cc.gif" width="300"/> &nbsp;&nbsp;
<img src="https://user-images.githubusercontent.com/28132635/147954424-1c9e4623-5bed-4cdc-b49c-ab17d619f748.gif" width="300"/>
</div>
| Models | Dataset | FID | Config | Download |
| :----------------------------: | :--------: | :-----: | :-------------------------------------------------------------------------: | :----------------------------------------------------------------------------: |
| Improve-DDPM 32x32 Dropout=0.3 | CIFAR10 | 3.8848 | [config](https://github.com/open-mmlab/mmgeneration/blob/master/configs/improved_ddpm/ddpm_cosine_hybird_timestep-4k_drop0.3_cifar10_32x32_b8x16_500k.py) | [model](https://download.openmmlab.com/mmgen/improved_ddpm/ddpm_cosine_hybird_timestep-4k_drop0.3_cifar10_32x32_b8x16_500k_20220103_222621-2f42f476.pth)\| [log](https://download.openmmlab.com/mmgen/improved_ddpm/ddpm_cosine_hybird_timestep-4k_drop0.3_cifar10_32x32_b8x16_500k_20220103_222621-2f42f476.json) |
| Improve-DDPM 64x64 | ImageNet1k | 13.5181 | [config](https://github.com/open-mmlab/mmgeneration/tree/master/configs/improve_ddpm/ddpm_cosine_hybird_timestep-4k_imagenet1k_64x64_b8x16_1500k.py) | [model](https://download.openmmlab.com/mmgen/improved_ddpm/ddpm_cosine_hybird_timestep-4k_imagenet1k_64x64_b8x16_1500k_20220103_223919-b8f1a310.pth)\| [log](https://download.openmmlab.com/mmgen/improved_ddpm/ddpm_cosine_hybird_timestep-4k_imagenet1k_64x64_b8x16_1500k_20220103_223919-b8f1a310.json) |
| Improve-DDPM 64x64 Dropout=0.3 | ImageNet1k | 13.4094 | [config](https://github.com/open-mmlab/mmgeneration/blob/master/configs/improved_ddpm/ddpm_cosine_hybird_timestep-4k_drop0.3_imagenet1k_64x64_b8x16_1500k.py) | [model](https://download.openmmlab.com/mmgen/improved_ddpm/ddpm_cosine_hybird_timestep-4k_drop0.3_imagenet1k_64x64_b8x16_1500k_20220103_224427-7bb55975.pth)\| [log](https://download.openmmlab.com/mmgen/improved_ddpm/ddpm_cosine_hybird_timestep-4k_drop0.3_imagenet1k_64x64_b8x16_1500k_20220103_224427-7bb55975.json) |
`FID` comparison with official:
| Dataset | CIFAR10 | ImageNet1k-64x64 |
| :------: | :------: | :--------------: |
| Ours | 3.8848 | **13.5181** |
| Official | **3.19** | 19.2 |
For FID evaluation, we follow the pipeline of [BigGAN](https://github.com/ajbrock/BigGAN-PyTorch/blob/98459431a5d618d644d54cd1e9fceb1e5045648d/calculate_inception_moments.py#L52), where the whole training set is adopted to extract inception statistics, and Pytorch Studio GAN uses 50000 randomly selected samples. Besides, we also use [Tero's Inception](https://nvlabs-fi-cdn.nvidia.com/stylegan2-ada-pytorch/pretrained/metrics/inception-2015-12-05.pt) for feature extraction.
You can download the preprocessed inception state by the following url: [CIFAR10](https://download.openmmlab.com/mmgen/evaluation/fid_inception_pkl/cifar10.pkl) and [ImageNet1k-64x64](https://download.openmmlab.com/mmgen/evaluation/fid_inception_pkl/imagenet_64x64.pkl).
You can use following commands to extract those inception states by yourself.
```
# For CIFAR10
python tools/utils/inception_stat.py --data-cfg configs/_base_/datasets/cifar10_inception_stat.py --pklname cifar10.pkl --no-shuffle --inception-style stylegan --num-samples -1 --subset train
# For ImageNet1k-64x64
python tools/utils/inception_stat.py --data-cfg configs/_base_/datasets/imagenet_64x64_inception_stat.py --pklname imagenet_64x64.pkl --no-shuffle --inception-style stylegan --num-samples -1 --subset train
```
## Citation
<summary align="right"><a href="https://arxiv.org/abs/2102.09672">Improve-DDPM (arXiv'2021)</a></summary>
```latex
@article{nichol2021improved,
title={Improved denoising diffusion probabilistic models},
author={Nichol, Alex and Dhariwal, Prafulla},
journal={arXiv preprint arXiv:2102.09672},
year={2021}
}
```
_base_ = [
'../_base_/models/improved_ddpm/ddpm_32x32.py',
'../_base_/datasets/cifar10_noaug.py', '../_base_/default_runtime.py'
]
lr_config = None
checkpoint_config = dict(interval=10000, by_epoch=False, max_keep_ckpts=20)
custom_hooks = [
dict(
type='MMGenVisualizationHook',
output_dir='training_samples',
res_name_list=['real_imgs', 'x_0_pred', 'x_t', 'x_t_1'],
padding=1,
interval=1000),
dict(
type='ExponentialMovingAverageHook',
module_keys=('denoising_ema'),
interval=1,
start_iter=0,
interp_cfg=dict(momentum=0.9999),
priority='VERY_HIGH')
]
# do not evaluation in training process because evaluation take too much time.
evaluation = None
total_iters = 500000 # 500k
data = dict(samples_per_gpu=16) # 8x16=128
# use ddp wrapper for faster training
use_ddp_wrapper = True
find_unused_parameters = False
runner = dict(
type='DynamicIterBasedRunner',
is_dynamic_ddp=False, # Note that this flag should be False.
pass_training_status=True)
inception_pkl = './work_dirs/inception_pkl/cifar10.pkl'
metrics = dict(
fid50k=dict(
type='FID',
num_images=50000,
bgr2rgb=True,
inception_pkl=inception_pkl,
inception_args=dict(type='StyleGAN')))
_base_ = [
'../_base_/models/improved_ddpm/ddpm_64x64.py',
'../_base_/datasets/imagenet_noaug_64.py', '../_base_/default_runtime.py'
]
# set dropout prob as 0.3
model = dict(denoising=dict(dropout=0.3))
lr_config = None
checkpoint_config = dict(interval=10000, by_epoch=False, max_keep_ckpts=20)
custom_hooks = [
dict(
type='MMGenVisualizationHook',
output_dir='training_samples',
res_name_list=['real_imgs', 'x_0_pred', 'x_t', 'x_t_1'],
padding=1,
interval=1000),
dict(
type='ExponentialMovingAverageHook',
module_keys=('denoising_ema'),
interval=1,
start_iter=0,
interp_cfg=dict(momentum=0.9999),
priority='VERY_HIGH')
]
# do not evaluation in training process because evaluation take too much time.
evaluation = None
total_iters = 1500000 # 1500k
data = dict(samples_per_gpu=16) # 8x16=128
# use ddp wrapper for faster training
use_ddp_wrapper = True
find_unused_parameters = False
runner = dict(
type='DynamicIterBasedRunner',
is_dynamic_ddp=False, # Note that this flag should be False.
pass_training_status=True)
inception_pkl = './work_dirs/inception_pkl/imagenet_64x64.pkl'
metrics = dict(
fid50k=dict(
type='FID',
num_images=50000,
bgr2rgb=True,
inception_pkl=inception_pkl,
inception_args=dict(type='StyleGAN')))
Markdown is supported
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment