Skip to content
GitLab
Menu
Projects
Groups
Snippets
Loading...
Help
Help
Support
Community forum
Keyboard shortcuts
?
Submit feedback
Contribute to GitLab
Sign in / Register
Toggle navigation
Menu
Open sidebar
OpenDAS
mmgeneration
Commits
b7536f78
Commit
b7536f78
authored
Jun 16, 2025
by
limm
Browse files
add a to another part of mmgeneration code
parent
57e0e891
Pipeline
#2777
canceled with stages
Changes
185
Pipelines
1
Hide whitespace changes
Inline
Side-by-side
Showing
20 changed files
with
3681 additions
and
0 deletions
+3681
-0
mmgen/models/architectures/cyclegan/__init__.py
mmgen/models/architectures/cyclegan/__init__.py
+5
-0
mmgen/models/architectures/cyclegan/generator_discriminator.py
.../models/architectures/cyclegan/generator_discriminator.py
+152
-0
mmgen/models/architectures/cyclegan/modules.py
mmgen/models/architectures/cyclegan/modules.py
+75
-0
mmgen/models/architectures/dcgan/__init__.py
mmgen/models/architectures/dcgan/__init__.py
+4
-0
mmgen/models/architectures/dcgan/generator_discriminator.py
mmgen/models/architectures/dcgan/generator_discriminator.py
+315
-0
mmgen/models/architectures/ddpm/__init__.py
mmgen/models/architectures/ddpm/__init__.py
+9
-0
mmgen/models/architectures/ddpm/denoising.py
mmgen/models/architectures/ddpm/denoising.py
+422
-0
mmgen/models/architectures/ddpm/modules.py
mmgen/models/architectures/ddpm/modules.py
+422
-0
mmgen/models/architectures/fid_inception.py
mmgen/models/architectures/fid_inception.py
+339
-0
mmgen/models/architectures/lpips/__init__.py
mmgen/models/architectures/lpips/__init__.py
+8
-0
mmgen/models/architectures/lpips/networks_basic.py
mmgen/models/architectures/lpips/networks_basic.py
+213
-0
mmgen/models/architectures/lpips/perceptual_loss.py
mmgen/models/architectures/lpips/perceptual_loss.py
+62
-0
mmgen/models/architectures/lpips/pretrained_networks.py
mmgen/models/architectures/lpips/pretrained_networks.py
+54
-0
mmgen/models/architectures/lsgan/__init__.py
mmgen/models/architectures/lsgan/__init__.py
+4
-0
mmgen/models/architectures/lsgan/generator_discriminator.py
mmgen/models/architectures/lsgan/generator_discriminator.py
+301
-0
mmgen/models/architectures/pggan/__init__.py
mmgen/models/architectures/pggan/__init__.py
+13
-0
mmgen/models/architectures/pggan/generator_discriminator.py
mmgen/models/architectures/pggan/generator_discriminator.py
+456
-0
mmgen/models/architectures/pggan/modules.py
mmgen/models/architectures/pggan/modules.py
+567
-0
mmgen/models/architectures/pix2pix/__init__.py
mmgen/models/architectures/pix2pix/__init__.py
+8
-0
mmgen/models/architectures/pix2pix/generator_discriminator.py
...n/models/architectures/pix2pix/generator_discriminator.py
+252
-0
No files found.
mmgen/models/architectures/cyclegan/__init__.py
0 → 100644
View file @
b7536f78
# Copyright (c) OpenMMLab. All rights reserved.
from
.generator_discriminator
import
ResnetGenerator
from
.modules
import
ResidualBlockWithDropout
__all__
=
[
'ResnetGenerator'
,
'ResidualBlockWithDropout'
]
mmgen/models/architectures/cyclegan/generator_discriminator.py
0 → 100644
View file @
b7536f78
# Copyright (c) OpenMMLab. All rights reserved.
import
torch.nn
as
nn
from
mmcv.cnn
import
ConvModule
from
mmcv.runner
import
load_checkpoint
from
mmgen.models.architectures.pix2pix
import
generation_init_weights
from
mmgen.models.builder
import
MODULES
from
mmgen.utils
import
get_root_logger
from
.modules
import
ResidualBlockWithDropout
@
MODULES
.
register_module
()
class
ResnetGenerator
(
nn
.
Module
):
"""Construct a Resnet-based generator that consists of residual blocks
between a few downsampling/upsampling operations.
Args:
in_channels (int): Number of channels in input images.
out_channels (int): Number of channels in output images.
base_channels (int): Number of filters at the last conv layer.
Default: 64.
norm_cfg (dict): Config dict to build norm layer. Default:
`dict(type='IN')`.
use_dropout (bool): Whether to use dropout layers. Default: False.
num_blocks (int): Number of residual blocks. Default: 9.
padding_mode (str): The name of padding layer in conv layers:
'reflect' | 'replicate' | 'zeros'. Default: 'reflect'.
init_cfg (dict): Config dict for initialization.
`type`: The name of our initialization method. Default: 'normal'.
`gain`: Scaling factor for normal, xavier and orthogonal.
Default: 0.02.
"""
def
__init__
(
self
,
in_channels
,
out_channels
,
base_channels
=
64
,
norm_cfg
=
dict
(
type
=
'IN'
),
use_dropout
=
False
,
num_blocks
=
9
,
padding_mode
=
'reflect'
,
init_cfg
=
dict
(
type
=
'normal'
,
gain
=
0.02
)):
super
().
__init__
()
assert
num_blocks
>=
0
,
(
'Number of residual blocks must be '
f
'non-negative, but got
{
num_blocks
}
.'
)
assert
isinstance
(
norm_cfg
,
dict
),
(
"'norm_cfg' should be dict, but"
f
'got
{
type
(
norm_cfg
)
}
'
)
assert
'type'
in
norm_cfg
,
"'norm_cfg' must have key 'type'"
# We use norm layers in the resnet generator.
# Only for IN, use bias to follow cyclegan's original implementation.
use_bias
=
norm_cfg
[
'type'
]
==
'IN'
model
=
[]
model
+=
[
ConvModule
(
in_channels
=
in_channels
,
out_channels
=
base_channels
,
kernel_size
=
7
,
padding
=
3
,
bias
=
use_bias
,
norm_cfg
=
norm_cfg
,
padding_mode
=
padding_mode
)
]
num_down
=
2
# add downsampling layers
for
i
in
range
(
num_down
):
multiple
=
2
**
i
model
+=
[
ConvModule
(
in_channels
=
base_channels
*
multiple
,
out_channels
=
base_channels
*
multiple
*
2
,
kernel_size
=
3
,
stride
=
2
,
padding
=
1
,
bias
=
use_bias
,
norm_cfg
=
norm_cfg
)
]
# add residual blocks
multiple
=
2
**
num_down
for
i
in
range
(
num_blocks
):
model
+=
[
ResidualBlockWithDropout
(
base_channels
*
multiple
,
padding_mode
=
padding_mode
,
norm_cfg
=
norm_cfg
,
use_dropout
=
use_dropout
)
]
# add upsampling layers
for
i
in
range
(
num_down
):
multiple
=
2
**
(
num_down
-
i
)
model
+=
[
ConvModule
(
in_channels
=
base_channels
*
multiple
,
out_channels
=
base_channels
*
multiple
//
2
,
kernel_size
=
3
,
stride
=
2
,
padding
=
1
,
bias
=
use_bias
,
conv_cfg
=
dict
(
type
=
'deconv'
,
output_padding
=
1
),
norm_cfg
=
norm_cfg
)
]
model
+=
[
ConvModule
(
in_channels
=
base_channels
,
out_channels
=
out_channels
,
kernel_size
=
7
,
padding
=
3
,
bias
=
True
,
norm_cfg
=
None
,
act_cfg
=
dict
(
type
=
'Tanh'
),
padding_mode
=
padding_mode
)
]
self
.
model
=
nn
.
Sequential
(
*
model
)
self
.
init_type
=
'normal'
if
init_cfg
is
None
else
init_cfg
.
get
(
'type'
,
'normal'
)
self
.
init_gain
=
0.02
if
init_cfg
is
None
else
init_cfg
.
get
(
'gain'
,
0.02
)
def
forward
(
self
,
x
):
"""Forward function.
Args:
x (Tensor): Input tensor with shape (n, c, h, w).
Returns:
Tensor: Forward results.
"""
return
self
.
model
(
x
)
def
init_weights
(
self
,
pretrained
=
None
,
strict
=
True
):
"""Initialize weights for the model.
Args:
pretrained (str, optional): Path for pretrained weights. If given
None, pretrained weights will not be loaded. Default: None.
strict (bool, optional): Whether to allow different params for the
model and checkpoint. Default: True.
"""
if
isinstance
(
pretrained
,
str
):
logger
=
get_root_logger
()
load_checkpoint
(
self
,
pretrained
,
strict
=
strict
,
logger
=
logger
)
elif
pretrained
is
None
:
generation_init_weights
(
self
,
init_type
=
self
.
init_type
,
init_gain
=
self
.
init_gain
)
else
:
raise
TypeError
(
"'pretrained' must be a str or None. "
f
'But received
{
type
(
pretrained
)
}
.'
)
mmgen/models/architectures/cyclegan/modules.py
0 → 100644
View file @
b7536f78
# Copyright (c) OpenMMLab. All rights reserved.
import
torch.nn
as
nn
from
mmcv.cnn
import
ConvModule
class
ResidualBlockWithDropout
(
nn
.
Module
):
"""Define a Residual Block with dropout layers.
Ref:
Deep Residual Learning for Image Recognition
A residual block is a conv block with skip connections. A dropout layer is
added between two common conv modules.
Args:
channels (int): Number of channels in the conv layer.
padding_mode (str): The name of padding layer:
'reflect' | 'replicate' | 'zeros'.
norm_cfg (dict): Config dict to build norm layer. Default:
`dict(type='IN')`.
use_dropout (bool): Whether to use dropout layers. Default: True.
"""
def
__init__
(
self
,
channels
,
padding_mode
,
norm_cfg
=
dict
(
type
=
'BN'
),
use_dropout
=
True
):
super
().
__init__
()
assert
isinstance
(
norm_cfg
,
dict
),
(
"'norm_cfg' should be dict, but"
f
'got
{
type
(
norm_cfg
)
}
'
)
assert
'type'
in
norm_cfg
,
"'norm_cfg' must have key 'type'"
# We use norm layers in the residual block with dropout layers.
# Only for IN, use bias to follow cyclegan's original implementation.
use_bias
=
norm_cfg
[
'type'
]
==
'IN'
block
=
[
ConvModule
(
in_channels
=
channels
,
out_channels
=
channels
,
kernel_size
=
3
,
padding
=
1
,
bias
=
use_bias
,
norm_cfg
=
norm_cfg
,
padding_mode
=
padding_mode
)
]
if
use_dropout
:
block
+=
[
nn
.
Dropout
(
0.5
)]
block
+=
[
ConvModule
(
in_channels
=
channels
,
out_channels
=
channels
,
kernel_size
=
3
,
padding
=
1
,
bias
=
use_bias
,
norm_cfg
=
norm_cfg
,
act_cfg
=
None
,
padding_mode
=
padding_mode
)
]
self
.
block
=
nn
.
Sequential
(
*
block
)
def
forward
(
self
,
x
):
"""Forward function. Add skip connections without final ReLU.
Args:
x (Tensor): Input tensor with shape (n, c, h, w).
Returns:
Tensor: Forward results.
"""
out
=
x
+
self
.
block
(
x
)
return
out
mmgen/models/architectures/dcgan/__init__.py
0 → 100644
View file @
b7536f78
# Copyright (c) OpenMMLab. All rights reserved.
from
.generator_discriminator
import
DCGANDiscriminator
,
DCGANGenerator
__all__
=
[
'DCGANGenerator'
,
'DCGANDiscriminator'
]
mmgen/models/architectures/dcgan/generator_discriminator.py
0 → 100644
View file @
b7536f78
# Copyright (c) OpenMMLab. All rights reserved.
import
numpy
as
np
import
torch
import
torch.nn
as
nn
from
mmcv.cnn
import
ConvModule
,
normal_init
from
mmcv.runner
import
load_checkpoint
from
mmcv.utils.parrots_wrapper
import
_BatchNorm
from
mmgen.models.builder
import
MODULES
from
mmgen.utils
import
get_root_logger
from
..common
import
get_module_device
@
MODULES
.
register_module
()
class
DCGANGenerator
(
nn
.
Module
):
"""Generator for DCGAN.
Implementation Details for DCGAN architecture:
#. Adopt transposed convolution in the generator;
#. Use batchnorm in the generator except for the final output layer;
#. Use ReLU in the generator in addition to the final output layer.
More details can be found in the original paper:
Unsupervised Representation Learning with Deep Convolutional
Generative Adversarial Networks
http://arxiv.org/abs/1511.06434
Args:
output_scale (int | tuple[int]): Output scale for the generated
image. If only a integer is provided, the output image will
be a square shape. The tuple of two integers will set the
height and width for the output image, respectively.
out_channels (int, optional): The channel number of the output feature.
Default to 3.
base_channels (int, optional): The basic channel number of the
generator. The other layers contains channels based on this number.
Default to 1024.
input_scale (int | tuple[int], optional): Output scale for the
generated image. If only a integer is provided, the input feature
ahead of the convolutional generator will be a square shape. The
tuple of two integers will set the height and width for the input
convolutional feature, respectively. Defaults to 4.
noise_size (int, optional): Size of the input noise
vector. Defaults to 100.
default_norm_cfg (dict, optional): Norm config for all of layers
except for the final output layer. Defaults to ``dict(type='BN')``.
default_act_cfg (dict, optional): Activation config for all of layers
except for the final output layer. Defaults to
``dict(type='ReLU')``.
out_act_cfg (dict, optional): Activation config for the final output
layer. Defaults to ``dict(type='Tanh')``.
pretrained (str, optional): Path for the pretrained model. Default to
``None``.
"""
def
__init__
(
self
,
output_scale
,
out_channels
=
3
,
base_channels
=
1024
,
input_scale
=
4
,
noise_size
=
100
,
default_norm_cfg
=
dict
(
type
=
'BN'
),
default_act_cfg
=
dict
(
type
=
'ReLU'
),
out_act_cfg
=
dict
(
type
=
'Tanh'
),
pretrained
=
None
):
super
().
__init__
()
self
.
output_scale
=
output_scale
self
.
base_channels
=
base_channels
self
.
input_scale
=
input_scale
self
.
noise_size
=
noise_size
# the number of times for upsampling
self
.
num_upsamples
=
int
(
np
.
log2
(
output_scale
//
input_scale
))
# output 4x4 feature map
self
.
noise2feat
=
ConvModule
(
noise_size
,
base_channels
,
kernel_size
=
4
,
stride
=
1
,
padding
=
0
,
conv_cfg
=
dict
(
type
=
'ConvTranspose2d'
),
norm_cfg
=
default_norm_cfg
,
act_cfg
=
default_act_cfg
)
# build up upsampling backbone (excluding the output layer)
upsampling
=
[]
curr_channel
=
base_channels
for
_
in
range
(
self
.
num_upsamples
-
1
):
upsampling
.
append
(
ConvModule
(
curr_channel
,
curr_channel
//
2
,
kernel_size
=
4
,
stride
=
2
,
padding
=
1
,
conv_cfg
=
dict
(
type
=
'ConvTranspose2d'
),
norm_cfg
=
default_norm_cfg
,
act_cfg
=
default_act_cfg
))
curr_channel
//=
2
self
.
upsampling
=
nn
.
Sequential
(
*
upsampling
)
# output layer
self
.
output_layer
=
ConvModule
(
curr_channel
,
out_channels
,
kernel_size
=
4
,
stride
=
2
,
padding
=
1
,
conv_cfg
=
dict
(
type
=
'ConvTranspose2d'
),
norm_cfg
=
None
,
act_cfg
=
out_act_cfg
)
self
.
init_weights
(
pretrained
=
pretrained
)
def
forward
(
self
,
noise
,
num_batches
=
0
,
return_noise
=
False
):
"""Forward function.
Args:
noise (torch.Tensor | callable | None): You can directly give a
batch of noise through a ``torch.Tensor`` or offer a callable
function to sample a batch of noise data. Otherwise, the
``None`` indicates to use the default noise sampler.
num_batches (int, optional): The number of batch size.
Defaults to 0.
return_noise (bool, optional): If True, ``noise_batch`` will be
returned in a dict with ``fake_img``. Defaults to False.
Returns:
torch.Tensor | dict: If not ``return_noise``, only the output image
will be returned. Otherwise, a dict contains ``fake_img`` and
``noise_batch`` will be returned.
"""
# receive noise and conduct sanity check.
if
isinstance
(
noise
,
torch
.
Tensor
):
assert
noise
.
shape
[
1
]
==
self
.
noise_size
if
noise
.
ndim
==
2
:
noise_batch
=
noise
[:,
:,
None
,
None
]
elif
noise
.
ndim
==
4
:
noise_batch
=
noise
else
:
raise
ValueError
(
'The noise should be in shape of (n, c) or '
f
'(n, c, 1, 1), but got
{
noise
.
shape
}
'
)
# receive a noise generator and sample noise.
elif
callable
(
noise
):
noise_generator
=
noise
assert
num_batches
>
0
noise_batch
=
noise_generator
((
num_batches
,
self
.
noise_size
,
1
,
1
))
# otherwise, we will adopt default noise sampler.
else
:
assert
num_batches
>
0
noise_batch
=
torch
.
randn
((
num_batches
,
self
.
noise_size
,
1
,
1
))
# dirty code for putting data on the right device
noise_batch
=
noise_batch
.
to
(
get_module_device
(
self
))
x
=
self
.
noise2feat
(
noise_batch
)
x
=
self
.
upsampling
(
x
)
x
=
self
.
output_layer
(
x
)
if
return_noise
:
return
dict
(
fake_img
=
x
,
noise_batch
=
noise_batch
)
return
x
def
init_weights
(
self
,
pretrained
=
None
):
"""Init weights for models.
We just use the initialization method proposed in the original paper.
Args:
pretrained (str, optional): Path for pretrained weights. If given
None, pretrained weights will not be loaded. Defaults to None.
"""
if
isinstance
(
pretrained
,
str
):
logger
=
get_root_logger
()
load_checkpoint
(
self
,
pretrained
,
strict
=
False
,
logger
=
logger
)
elif
pretrained
is
None
:
for
m
in
self
.
modules
():
if
isinstance
(
m
,
(
nn
.
Conv2d
,
nn
.
ConvTranspose2d
)):
normal_init
(
m
,
0
,
0.02
)
elif
isinstance
(
m
,
_BatchNorm
):
nn
.
init
.
normal_
(
m
.
weight
.
data
)
nn
.
init
.
constant_
(
m
.
bias
.
data
,
0
)
else
:
raise
TypeError
(
'pretrained must be a str or None but'
f
' got
{
type
(
pretrained
)
}
instead.'
)
@
MODULES
.
register_module
()
class
DCGANDiscriminator
(
nn
.
Module
):
"""Discriminator for DCGAN.
Implementation Details for DCGAN architecture:
#. Adopt convolution in the discriminator;
#. Use batchnorm in the discriminator except for the input and final
\
output layer;
#. Use LeakyReLU in the discriminator in addition to the output layer.
Args:
input_scale (int): The scale of the input image.
output_scale (int): The final scale of the convolutional feature.
out_channels (int): The channel number of the final output layer.
in_channels (int, optional): The channel number of the input image.
Defaults to 3.
base_channels (int, optional): The basic channel number of the
generator. The other layers contains channels based on this number.
Defaults to 128.
default_norm_cfg (dict, optional): Norm config for all of layers
except for the final output layer. Defaults to ``dict(type='BN')``.
default_act_cfg (dict, optional): Activation config for all of layers
except for the final output layer. Defaults to
``dict(type='ReLU')``.
out_act_cfg (dict, optional): Activation config for the final output
layer. Defaults to ``dict(type='Tanh')``.
pretrained (str, optional): Path for the pretrained model. Default to
``None``.
"""
def
__init__
(
self
,
input_scale
,
output_scale
,
out_channels
,
in_channels
=
3
,
base_channels
=
128
,
default_norm_cfg
=
dict
(
type
=
'BN'
),
default_act_cfg
=
dict
(
type
=
'LeakyReLU'
),
out_act_cfg
=
None
,
pretrained
=
None
):
super
().
__init__
()
self
.
input_scale
=
input_scale
self
.
output_scale
=
output_scale
self
.
out_channels
=
out_channels
self
.
base_channels
=
base_channels
# the number of times for downsampling
self
.
num_downsamples
=
int
(
np
.
log2
(
input_scale
//
output_scale
))
# build up downsampling backbone (excluding the output layer)
downsamples
=
[]
for
i
in
range
(
self
.
num_downsamples
):
# remove norm for the first conv
norm_cfg_
=
None
if
i
==
0
else
default_norm_cfg
in_ch
=
in_channels
if
i
==
0
else
base_channels
*
2
**
(
i
-
1
)
downsamples
.
append
(
ConvModule
(
in_ch
,
base_channels
*
2
**
i
,
kernel_size
=
4
,
stride
=
2
,
padding
=
1
,
conv_cfg
=
dict
(
type
=
'Conv2d'
),
norm_cfg
=
norm_cfg_
,
act_cfg
=
default_act_cfg
))
curr_channels
=
base_channels
*
2
**
i
self
.
downsamples
=
nn
.
Sequential
(
*
downsamples
)
# define output layer
self
.
output_layer
=
ConvModule
(
curr_channels
,
out_channels
,
kernel_size
=
4
,
stride
=
1
,
padding
=
0
,
conv_cfg
=
dict
(
type
=
'Conv2d'
),
norm_cfg
=
None
,
act_cfg
=
out_act_cfg
)
self
.
init_weights
(
pretrained
=
pretrained
)
def
forward
(
self
,
x
):
"""Forward function.
Args:
x (torch.Tensor): Fake or real image tensor.
Returns:
torch.Tensor: Prediction for the reality of the input image.
"""
n
=
x
.
shape
[
0
]
x
=
self
.
downsamples
(
x
)
x
=
self
.
output_layer
(
x
)
# reshape to a flatten feature
return
x
.
view
(
n
,
-
1
)
def
init_weights
(
self
,
pretrained
=
None
):
"""Init weights for models.
We just use the initialization method proposed in the original paper.
Args:
pretrained (str, optional): Path for pretrained weights. If given
None, pretrained weights will not be loaded. Defaults to None.
"""
if
isinstance
(
pretrained
,
str
):
logger
=
get_root_logger
()
load_checkpoint
(
self
,
pretrained
,
strict
=
False
,
logger
=
logger
)
elif
pretrained
is
None
:
for
m
in
self
.
modules
():
if
isinstance
(
m
,
(
nn
.
Conv2d
,
nn
.
ConvTranspose2d
)):
normal_init
(
m
,
0
,
0.02
)
elif
isinstance
(
m
,
_BatchNorm
):
nn
.
init
.
normal_
(
m
.
weight
.
data
)
nn
.
init
.
constant_
(
m
.
bias
.
data
,
0
)
else
:
raise
TypeError
(
'pretrained must be a str or None but'
f
' got
{
type
(
pretrained
)
}
instead.'
)
mmgen/models/architectures/ddpm/__init__.py
0 → 100644
View file @
b7536f78
# Copyright (c) OpenMMLab. All rights reserved.
from
.denoising
import
DenoisingUnet
from
.modules
import
(
DenoisingDownsample
,
DenoisingResBlock
,
DenoisingUpsample
,
TimeEmbedding
)
__all__
=
[
'DenoisingUnet'
,
'TimeEmbedding'
,
'DenoisingDownsample'
,
'DenoisingUpsample'
,
'DenoisingResBlock'
]
mmgen/models/architectures/ddpm/denoising.py
0 → 100644
View file @
b7536f78
# Copyright (c) OpenMMLab. All rights reserved.
from
copy
import
deepcopy
import
torch
import
torch.nn
as
nn
from
mmcv.cnn
import
constant_init
from
mmcv.cnn.bricks.conv_module
import
ConvModule
from
mmcv.runner
import
load_checkpoint
from
mmgen.models.builder
import
MODULES
,
build_module
from
mmgen.utils
import
get_root_logger
from
.modules
import
EmbedSequential
,
TimeEmbedding
@
MODULES
.
register_module
()
class
DenoisingUnet
(
nn
.
Module
):
"""Denoising Unet. This network receives a diffused image ``x_t`` and
current timestep ``t``, and returns a ``output_dict`` corresponding to the
passed ``output_cfg``.
``output_cfg`` defines the number of channels and the meaning of the
output. ``output_cfg`` mainly contains keys of ``mean`` and ``var``,
denoting how the network outputs mean and variance required for the
denoising process.
For ``mean``:
1. ``dict(mean='EPS')``: Model will predict noise added in the
diffusion process, and the ``output_dict`` will contain a key named
``eps_t_pred``.
2. ``dict(mean='START_X')``: Model will direct predict the mean of the
original image `x_0`, and the ``output_dict`` will contain a key named
``x_0_pred``.
3. ``dict(mean='X_TM1_PRED')``: Model will predict the mean of diffused
image at `t-1` timestep, and the ``output_dict`` will contain a key
named ``x_tm1_pred``.
For ``var``:
1. ``dict(var='FIXED_SMALL')`` or ``dict(var='FIXED_LARGE')``: Variance in
the denoising process is regarded as a fixed value. Therefore only
'mean' will be predicted, and the output channels will equal to the
input image (e.g., three channels for RGB image.)
2. ``dict(var='LEARNED')``: Model will predict `log_variance` in the
denoising process, and the ``output_dict`` will contain a key named
``log_var``.
3. ``dict(var='LEARNED_RANGE')``: Model will predict an interpolation
factor and the `log_variance` will be calculated as
`factor * upper_bound + (1-factor) * lower_bound`. The ``output_dict``
will contain a key named ``factor``.
If ``var`` is not ``FIXED_SMALL`` or ``FIXED_LARGE``, the number of output
channels will be the double of input channels, where the first half part
contains predicted mean values and the other part is the predicted
variance values. Otherwise, the number of output channels equals to the
input channels, only containing the predicted mean values.
Args:
image_size (int | list[int]): The size of image to denoise.
in_channels (int, optional): The input channels of the input image.
Defaults as ``3``.
base_channels (int, optional): The basic channel number of the
generator. The other layers contain channels based on this number.
Defaults to ``128``.
resblocks_per_downsample (int, optional): Number of ResBlock used
between two downsample operations. The number of ResBlock between
upsample operations will be the same value to keep symmetry.
Defaults to 3.
num_timesteps (int, optional): The total timestep of the denoising
process and the diffusion process. Defaults to ``1000``.
use_rescale_timesteps (bool, optional): Whether rescale the input
timesteps in range of [0, 1000]. Defaults to ``True``.
dropout (float, optional): The probability of dropout operation of
each ResBlock. Pass ``0`` to do not use dropout. Defaults as 0.
embedding_channels (int, optional): The output channels of time
embedding layer and label embedding layer. If not passed (or
passed ``-1``), output channels of the embedding layers will set
as four times of ``base_channels``. Defaults to ``-1``.
num_classes (int, optional): The number of conditional classes. If set
to 0, this model will be degraded to an unconditional model.
Defaults to 0.
channels_cfg (list | dict[list], optional): Config for input channels
of the intermedia blocks. If list is passed, each element of the
list indicates the scale factor for the input channels of the
current block with regard to the ``base_channels``. For block
``i``, the input and output channels should be
``channels_cfg[i] * base_channels`` and
``channels_cfg[i+1] * base_channels`` If dict is provided, the key
of the dict should be the output scale and corresponding value
should be a list to define channels. Default: Please refer to
``_defualt_channels_cfg``.
output_cfg (dict, optional): Config for output variables. Defaults to
``dict(mean='eps', var='learned_range')``.
norm_cfg (dict, optional): The config for normalization layers.
Defaults to ``dict(type='GN', num_groups=32)``.
act_cfg (dict, optional): The config for activation layers. Defaults
to ``dict(type='SiLU', inplace=False)``.
shortcut_kernel_size (int, optional): The kernel size for shortcut
conv in ResBlocks. The value of this argument will overwrite the
default value of `resblock_cfg`. Defaults to `3`.
use_scale_shift_norm (bool, optional): Whether perform scale and shift
after normalization operation. Defaults to True.
num_heads (int, optional): The number of attention heads. Defaults to
4.
time_embedding_mode (str, optional): Embedding method of
``time_embedding``. Defaults to 'sin'.
time_embedding_cfg (dict, optional): Config for ``time_embedding``.
Defaults to None.
resblock_cfg (dict, optional): Config for ResBlock. Defaults to
``dict(type='DenoisingResBlock')``.
attention_cfg (dict, optional): Config for attention operation.
Defaults to ``dict(type='MultiHeadAttention')``.
upsample_conv (bool, optional): Whether use conv in upsample block.
Defaults to ``True``.
downsample_conv (bool, optional): Whether use conv operation in
downsample block. Defaults to ``True``.
upsample_cfg (dict, optional): Config for upsample blocks.
Defaults to ``dict(type='DenoisingUpsample')``.
downsample_cfg (dict, optional): Config for downsample blocks.
Defaults to ``dict(type='DenoisingDownsample')``.
attention_res (int | list[int], optional): Resolution of feature maps
to apply attention operation. Defaults to ``[16, 8]``.
pretrained (str | dict, optional): Path for the pretrained model or
dict containing information for pretained models whose necessary
key is 'ckpt_path'. Besides, you can also provide 'prefix' to load
the generator part from the whole state dict. Defaults to None.
"""
_default_channels_cfg
=
{
256
:
[
1
,
1
,
2
,
2
,
4
,
4
],
64
:
[
1
,
2
,
3
,
4
],
32
:
[
1
,
2
,
2
,
2
]
}
def
__init__
(
self
,
image_size
,
in_channels
=
3
,
base_channels
=
128
,
resblocks_per_downsample
=
3
,
num_timesteps
=
1000
,
use_rescale_timesteps
=
True
,
dropout
=
0
,
embedding_channels
=-
1
,
num_classes
=
0
,
channels_cfg
=
None
,
output_cfg
=
dict
(
mean
=
'eps'
,
var
=
'learned_range'
),
norm_cfg
=
dict
(
type
=
'GN'
,
num_groups
=
32
),
act_cfg
=
dict
(
type
=
'SiLU'
,
inplace
=
False
),
shortcut_kernel_size
=
1
,
use_scale_shift_norm
=
False
,
num_heads
=
4
,
time_embedding_mode
=
'sin'
,
time_embedding_cfg
=
None
,
resblock_cfg
=
dict
(
type
=
'DenoisingResBlock'
),
attention_cfg
=
dict
(
type
=
'MultiHeadAttention'
),
downsample_conv
=
True
,
upsample_conv
=
True
,
downsample_cfg
=
dict
(
type
=
'DenoisingDownsample'
),
upsample_cfg
=
dict
(
type
=
'DenoisingUpsample'
),
attention_res
=
[
16
,
8
],
pretrained
=
None
):
super
().
__init__
()
self
.
num_classes
=
num_classes
self
.
num_timesteps
=
num_timesteps
self
.
use_rescale_timesteps
=
use_rescale_timesteps
self
.
output_cfg
=
deepcopy
(
output_cfg
)
self
.
mean_mode
=
self
.
output_cfg
.
get
(
'mean'
,
'eps'
)
self
.
var_mode
=
self
.
output_cfg
.
get
(
'var'
,
'learned_range'
)
# double output_channels to output mean and var at same time
out_channels
=
in_channels
if
'FIXED'
in
self
.
var_mode
.
upper
()
\
else
2
*
in_channels
self
.
out_channels
=
out_channels
# check type of image_size
if
not
isinstance
(
image_size
,
int
)
and
not
isinstance
(
image_size
,
list
):
raise
TypeError
(
'Only support `int` and `list[int]` for `image_size`.'
)
if
isinstance
(
image_size
,
list
):
assert
len
(
image_size
)
==
2
,
'The length of `image_size` should be 2.'
assert
image_size
[
0
]
==
image_size
[
1
],
'Width and height of the image should be same.'
image_size
=
image_size
[
0
]
self
.
image_size
=
image_size
channels_cfg
=
deepcopy
(
self
.
_default_channels_cfg
)
\
if
channels_cfg
is
None
else
deepcopy
(
channels_cfg
)
if
isinstance
(
channels_cfg
,
dict
):
if
image_size
not
in
channels_cfg
:
raise
KeyError
(
f
'`image_size=
{
image_size
}
is not found in '
'`channels_cfg`, only support configs for '
f
'
{
[
chn
for
chn
in
channels_cfg
.
keys
()]
}
'
)
self
.
channel_factor_list
=
channels_cfg
[
image_size
]
elif
isinstance
(
channels_cfg
,
list
):
self
.
channel_factor_list
=
channels_cfg
else
:
raise
ValueError
(
'Only support list or dict for `channels_cfg`, '
f
'receive
{
type
(
channels_cfg
)
}
'
)
embedding_channels
=
base_channels
*
4
\
if
embedding_channels
==
-
1
else
embedding_channels
self
.
time_embedding
=
TimeEmbedding
(
base_channels
,
embedding_channels
=
embedding_channels
,
embedding_mode
=
time_embedding_mode
,
embedding_cfg
=
time_embedding_cfg
,
act_cfg
=
act_cfg
)
if
self
.
num_classes
!=
0
:
self
.
label_embedding
=
nn
.
Embedding
(
self
.
num_classes
,
embedding_channels
)
self
.
resblock_cfg
=
deepcopy
(
resblock_cfg
)
self
.
resblock_cfg
.
setdefault
(
'dropout'
,
dropout
)
self
.
resblock_cfg
.
setdefault
(
'norm_cfg'
,
norm_cfg
)
self
.
resblock_cfg
.
setdefault
(
'act_cfg'
,
act_cfg
)
self
.
resblock_cfg
.
setdefault
(
'embedding_channels'
,
embedding_channels
)
self
.
resblock_cfg
.
setdefault
(
'use_scale_shift_norm'
,
use_scale_shift_norm
)
self
.
resblock_cfg
.
setdefault
(
'shortcut_kernel_size'
,
shortcut_kernel_size
)
# get scales of ResBlock to apply attention
attention_scale
=
[
image_size
//
int
(
res
)
for
res
in
attention_res
]
self
.
attention_cfg
=
deepcopy
(
attention_cfg
)
self
.
attention_cfg
.
setdefault
(
'num_heads'
,
num_heads
)
self
.
attention_cfg
.
setdefault
(
'norm_cfg'
,
norm_cfg
)
self
.
downsample_cfg
=
deepcopy
(
downsample_cfg
)
self
.
downsample_cfg
.
setdefault
(
'with_conv'
,
downsample_conv
)
self
.
upsample_cfg
=
deepcopy
(
upsample_cfg
)
self
.
upsample_cfg
.
setdefault
(
'with_conv'
,
upsample_conv
)
# init the channel scale factor
scale
=
1
self
.
in_blocks
=
nn
.
ModuleList
([
EmbedSequential
(
nn
.
Conv2d
(
in_channels
,
base_channels
,
3
,
1
,
padding
=
1
))
])
self
.
in_channels_list
=
[
base_channels
]
# construct the encoder part of Unet
for
level
,
factor
in
enumerate
(
self
.
channel_factor_list
):
in_channels_
=
base_channels
if
level
==
0
\
else
base_channels
*
self
.
channel_factor_list
[
level
-
1
]
out_channels_
=
base_channels
*
factor
for
_
in
range
(
resblocks_per_downsample
):
layers
=
[
build_module
(
self
.
resblock_cfg
,
{
'in_channels'
:
in_channels_
,
'out_channels'
:
out_channels_
})
]
in_channels_
=
out_channels_
if
scale
in
attention_scale
:
layers
.
append
(
build_module
(
self
.
attention_cfg
,
{
'in_channels'
:
in_channels_
}))
self
.
in_channels_list
.
append
(
in_channels_
)
self
.
in_blocks
.
append
(
EmbedSequential
(
*
layers
))
if
level
!=
len
(
self
.
channel_factor_list
)
-
1
:
self
.
in_blocks
.
append
(
EmbedSequential
(
build_module
(
self
.
downsample_cfg
,
{
'in_channels'
:
in_channels_
})))
self
.
in_channels_list
.
append
(
in_channels_
)
scale
*=
2
# construct the bottom part of Unet
self
.
mid_blocks
=
EmbedSequential
(
build_module
(
self
.
resblock_cfg
,
{
'in_channels'
:
in_channels_
}),
build_module
(
self
.
attention_cfg
,
{
'in_channels'
:
in_channels_
}),
build_module
(
self
.
resblock_cfg
,
{
'in_channels'
:
in_channels_
}),
)
# construct the decoder part of Unet
in_channels_list
=
deepcopy
(
self
.
in_channels_list
)
self
.
out_blocks
=
nn
.
ModuleList
()
for
level
,
factor
in
enumerate
(
self
.
channel_factor_list
[::
-
1
]):
for
idx
in
range
(
resblocks_per_downsample
+
1
):
layers
=
[
build_module
(
self
.
resblock_cfg
,
{
'in_channels'
:
in_channels_
+
in_channels_list
.
pop
(),
'out_channels'
:
base_channels
*
factor
})
]
in_channels_
=
base_channels
*
factor
if
scale
in
attention_scale
:
layers
.
append
(
build_module
(
self
.
attention_cfg
,
{
'in_channels'
:
in_channels_
}))
if
(
level
!=
len
(
self
.
channel_factor_list
)
-
1
and
idx
==
resblocks_per_downsample
):
layers
.
append
(
build_module
(
self
.
upsample_cfg
,
{
'in_channels'
:
in_channels_
}))
scale
//=
2
self
.
out_blocks
.
append
(
EmbedSequential
(
*
layers
))
self
.
out
=
ConvModule
(
in_channels
=
in_channels_
,
out_channels
=
out_channels
,
kernel_size
=
3
,
padding
=
1
,
act_cfg
=
act_cfg
,
norm_cfg
=
norm_cfg
,
bias
=
True
,
order
=
(
'norm'
,
'act'
,
'conv'
))
self
.
init_weights
(
pretrained
)
def
forward
(
self
,
x_t
,
t
,
label
=
None
,
return_noise
=
False
):
"""Forward function.
Args:
x_t (torch.Tensor): Diffused image at timestep `t` to denoise.
t (torch.Tensor): Current timestep.
label (torch.Tensor | callable | None): You can directly give a
batch of label through a ``torch.Tensor`` or offer a callable
function to sample a batch of label data. Otherwise, the
``None`` indicates to use the default label sampler.
return_noise (bool, optional): If True, inputted ``x_t`` and ``t``
will be returned in a dict with output desired by
``output_cfg``. Defaults to False.
Returns:
torch.Tensor | dict: If not ``return_noise``
"""
if
self
.
use_rescale_timesteps
:
t
=
t
.
float
()
*
(
1000.0
/
self
.
num_timesteps
)
embedding
=
self
.
time_embedding
(
t
)
if
label
is
not
None
:
assert
hasattr
(
self
,
'label_embedding'
)
embedding
=
self
.
label_embedding
(
label
)
+
embedding
h
,
hs
=
x_t
,
[]
# forward downsample blocks
for
block
in
self
.
in_blocks
:
h
=
block
(
h
,
embedding
)
hs
.
append
(
h
)
# forward middle blocks
h
=
self
.
mid_blocks
(
h
,
embedding
)
# forward upsample blocks
for
block
in
self
.
out_blocks
:
h
=
block
(
torch
.
cat
([
h
,
hs
.
pop
()],
dim
=
1
),
embedding
)
outputs
=
self
.
out
(
h
)
output_dict
=
dict
()
if
'FIXED'
not
in
self
.
var_mode
.
upper
():
# split mean and learned from output
mean
,
var
=
outputs
.
split
(
self
.
out_channels
//
2
,
dim
=
1
)
if
self
.
var_mode
.
upper
()
==
'LEARNED_RANGE'
:
# rescale [-1, 1] to [0, 1]
output_dict
[
'factor'
]
=
(
var
+
1
)
/
2
elif
self
.
var_mode
.
upper
()
==
'LEARNED'
:
output_dict
[
'logvar'
]
=
var
else
:
raise
AttributeError
(
'Only support
\'
FIXED
\'
,
\'
LEARNED_RANGE
\'
'
'and
\'
LEARNED
\'
for variance output format. But receive '
f
'
\'
{
self
.
var_mode
}
\'
.'
)
else
:
mean
=
outputs
if
self
.
mean_mode
.
upper
()
==
'EPS'
:
output_dict
[
'eps_t_pred'
]
=
mean
elif
self
.
mean_mode
.
upper
()
==
'START_X'
:
output_dict
[
'x_0_pred'
]
=
mean
elif
self
.
mean_mode
.
upper
()
==
'PREVIOUS_X'
:
output_dict
[
'x_tm1_pred'
]
=
mean
else
:
raise
AttributeError
(
'Only support
\'
EPS
\'
,
\'
START_X
\'
and
\'
PREVIOUS_X
\'
for '
f
'mean output format. But receive
\'
{
self
.
mean_mode
}
\'
.'
)
if
return_noise
:
output_dict
[
'x_t'
]
=
x_t
output_dict
[
't_rescaled'
]
=
t
if
self
.
num_classes
>
0
:
output_dict
[
'label'
]
=
label
return
output_dict
def
init_weights
(
self
,
pretrained
=
None
):
"""Init weights for models.
We just use the initialization method proposed in the original paper.
Args:
pretrained (str, optional): Path for pretrained weights. If given
None, pretrained weights will not be loaded. Defaults to None.
"""
if
isinstance
(
pretrained
,
str
):
logger
=
get_root_logger
()
load_checkpoint
(
self
,
pretrained
,
strict
=
False
,
logger
=
logger
)
elif
pretrained
is
None
:
# As Improved-DDPM, we apply zero-initialization to
# second conv block in ResBlock (keywords: conv_2)
# the output layer of the Unet (keywords: 'out' but
# not 'out_blocks')
# projection layer in Attention layer (keywords: proj)
for
n
,
m
in
self
.
named_modules
():
if
isinstance
(
m
,
nn
.
Conv2d
)
and
(
'conv_2'
in
n
or
(
'out'
in
n
and
'out_blocks'
not
in
n
)):
constant_init
(
m
,
0
)
if
isinstance
(
m
,
nn
.
Conv1d
)
and
'proj'
in
n
:
constant_init
(
m
,
0
)
else
:
raise
TypeError
(
'pretrained must be a str or None but'
f
' got
{
type
(
pretrained
)
}
instead.'
)
mmgen/models/architectures/ddpm/modules.py
0 → 100644
View file @
b7536f78
# Copyright (c) OpenMMLab. All rights reserved.
from
copy
import
deepcopy
from
functools
import
partial
import
mmcv
import
numpy
as
np
import
torch
import
torch.nn
as
nn
import
torch.nn.functional
as
F
from
mmcv.cnn
import
ACTIVATION_LAYERS
from
mmcv.cnn.bricks
import
build_activation_layer
,
build_norm_layer
from
mmcv.cnn.utils
import
constant_init
from
mmcv.utils
import
digit_version
from
mmgen.models.builder
import
MODULES
,
build_module
class
EmbedSequential
(
nn
.
Sequential
):
"""A sequential module that passes timestep embeddings to the children that
support it as an extra input.
Modified from
https://github.com/openai/improved-diffusion/blob/main/improved_diffusion/unet.py#L35
"""
def
forward
(
self
,
x
,
y
):
for
layer
in
self
:
if
isinstance
(
layer
,
DenoisingResBlock
):
x
=
layer
(
x
,
y
)
else
:
x
=
layer
(
x
)
return
x
if
'SiLU'
not
in
ACTIVATION_LAYERS
:
@
ACTIVATION_LAYERS
.
register_module
()
class
SiLU
(
nn
.
Module
):
r
"""Applies the Sigmoid Linear Unit (SiLU) function, element-wise.
The SiLU function is also known as the swish function.
Args:
input (bool, optional): Use inplace operation or not.
Defaults to `False`.
"""
def
__init__
(
self
,
inplace
=
False
):
super
().
__init__
()
if
digit_version
(
torch
.
__version__
)
<
digit_version
(
'1.7.0'
)
and
inplace
:
mmcv
.
print_log
(
'Inplace version of
\'
SiLU
\'
is not supported '
'for torch < 1.7.0, found '
f
'
\'
{
torch
.
version
}
\'
.'
)
self
.
inplace
=
inplace
def
forward
(
self
,
x
):
"""Forward function for SiLU.
Args:
x (torch.Tensor): Input tensor.
Returns:
torch.Tensor: Tensor after activation.
"""
if
digit_version
(
torch
.
__version__
)
<
digit_version
(
'1.7.0'
):
return
x
*
torch
.
sigmoid
(
x
)
return
F
.
silu
(
x
,
inplace
=
self
.
inplace
)
@
MODULES
.
register_module
()
class
MultiHeadAttention
(
nn
.
Module
):
"""An attention block allows spatial position to attend to each other.
Originally ported from here, but adapted to the N-d case.
https://github.com/hojonathanho/diffusion/blob/1e0dceb3b3495bbe19116a5e1b3596cd0706c543/diffusion_tf/models/unet.py#L66. # noqa
Args:
in_channels (int): Channels of the input feature map.
num_heads (int, optional): Number of heads in the attention.
norm_cfg (dict, optional): Config for normalization layer. Default
to ``dict(type='GN', num_groups=32)``
"""
def
__init__
(
self
,
in_channels
,
num_heads
=
1
,
norm_cfg
=
dict
(
type
=
'GN'
,
num_groups
=
32
)):
super
().
__init__
()
self
.
num_heads
=
num_heads
_
,
self
.
norm
=
build_norm_layer
(
norm_cfg
,
in_channels
)
self
.
qkv
=
nn
.
Conv1d
(
in_channels
,
in_channels
*
3
,
1
)
self
.
proj
=
nn
.
Conv1d
(
in_channels
,
in_channels
,
1
)
self
.
init_weights
()
@
staticmethod
def
QKVAttention
(
qkv
):
channel
=
qkv
.
shape
[
1
]
//
3
q
,
k
,
v
=
torch
.
chunk
(
qkv
,
3
,
dim
=
1
)
scale
=
1
/
np
.
sqrt
(
np
.
sqrt
(
channel
))
weight
=
torch
.
einsum
(
'bct,bcs->bts'
,
q
*
scale
,
k
*
scale
)
weight
=
torch
.
softmax
(
weight
.
float
(),
dim
=-
1
).
type
(
weight
.
dtype
)
weight
=
torch
.
einsum
(
'bts,bcs->bct'
,
weight
,
v
)
return
weight
def
forward
(
self
,
x
):
"""Forward function for multi head attention.
Args:
x (torch.Tensor): Input feature map.
Returns:
torch.Tensor: Feature map after attention.
"""
b
,
c
,
*
spatial
=
x
.
shape
x
=
x
.
reshape
(
b
,
c
,
-
1
)
qkv
=
self
.
qkv
(
self
.
norm
(
x
))
qkv
=
qkv
.
reshape
(
b
*
self
.
num_heads
,
-
1
,
qkv
.
shape
[
2
])
h
=
self
.
QKVAttention
(
qkv
)
h
=
h
.
reshape
(
b
,
-
1
,
h
.
shape
[
-
1
])
h
=
self
.
proj
(
h
)
return
(
h
+
x
).
reshape
(
b
,
c
,
*
spatial
)
def
init_weights
(
self
):
constant_init
(
self
.
proj
,
0
)
@
MODULES
.
register_module
()
class
TimeEmbedding
(
nn
.
Module
):
"""Time embedding layer, reference to Two level embedding. First embedding
time by an embedding function, then feed to neural networks.
Args:
in_channels (int): The channel number of the input feature map.
embedding_channels (int): The channel number of the output embedding.
embedding_mode (str, optional): Embedding mode for the time embedding.
Defaults to 'sin'.
embedding_cfg (dict, optional): Config for time embedding.
Defaults to None.
act_cfg (dict, optional): Config for activation layer. Defaults to
``dict(type='SiLU', inplace=False)``.
"""
def
__init__
(
self
,
in_channels
,
embedding_channels
,
embedding_mode
=
'sin'
,
embedding_cfg
=
None
,
act_cfg
=
dict
(
type
=
'SiLU'
,
inplace
=
False
)):
super
().
__init__
()
self
.
blocks
=
nn
.
Sequential
(
nn
.
Linear
(
in_channels
,
embedding_channels
),
build_activation_layer
(
act_cfg
),
nn
.
Linear
(
embedding_channels
,
embedding_channels
))
# add `dim` to embedding config
embedding_cfg_
=
dict
(
dim
=
in_channels
)
if
embedding_cfg
is
not
None
:
embedding_cfg_
.
update
(
embedding_cfg
)
if
embedding_mode
.
upper
()
==
'SIN'
:
self
.
embedding_fn
=
partial
(
self
.
sinusodial_embedding
,
**
embedding_cfg_
)
else
:
raise
ValueError
(
'Only support `SIN` for time embedding, '
f
'but receive
{
embedding_mode
}
.'
)
@
staticmethod
def
sinusodial_embedding
(
timesteps
,
dim
,
max_period
=
10000
):
"""Create sinusoidal timestep embeddings.
Args:
timesteps (torch.Tensor): Timestep to embedding. 1-D tensor shape
as ``[bz, ]``, one per batch element.
dim (int): The dimension of the embedding.
max_period (int, optional): Controls the minimum frequency of the
embeddings. Defaults to ``10000``.
Returns:
torch.Tensor: Embedding results shape as `[bz, dim]`.
"""
half
=
dim
//
2
freqs
=
torch
.
exp
(
-
np
.
log
(
max_period
)
*
torch
.
arange
(
start
=
0
,
end
=
half
,
dtype
=
torch
.
float32
)
/
half
).
to
(
device
=
timesteps
.
device
)
args
=
timesteps
[:,
None
].
float
()
*
freqs
[
None
]
embedding
=
torch
.
cat
([
torch
.
cos
(
args
),
torch
.
sin
(
args
)],
dim
=-
1
)
if
dim
%
2
:
embedding
=
torch
.
cat
(
[
embedding
,
torch
.
zeros_like
(
embedding
[:,
:
1
])],
dim
=-
1
)
return
embedding
def
forward
(
self
,
t
):
"""Forward function for time embedding layer.
Args:
t (torch.Tensor): Input timesteps.
Returns:
torch.Tensor: Timesteps embedding.
"""
return
self
.
blocks
(
self
.
embedding_fn
(
t
))
@
MODULES
.
register_module
()
class
DenoisingResBlock
(
nn
.
Module
):
"""Resblock for the denoising network. If `in_channels` not equals to
`out_channels`, a learnable shortcut with conv layers will be added.
Args:
in_channels (int): Number of channels of the input feature map.
embedding_channels (int): Number of channels of the input embedding.
use_scale_shift_norm (bool): Whether use scale-shift-norm in
`NormWithEmbedding` layer.
dropout (float): Probability of the dropout layers.
out_channels (int, optional): Number of output channels of the
ResBlock. If not defined, the output channels will equal to the
`in_channels`. Defaults to `None`.
norm_cfg (dict, optional): The config for the normalization layers.
Defaults too ``dict(type='GN', num_groups=32)``.
act_cfg (dict, optional): The config for the activation layers.
Defaults to ``dict(type='SiLU', inplace=False)``.
shortcut_kernel_size (int, optional): The kernel size for the shortcut
conv. Defaults to ``1``.
"""
def
__init__
(
self
,
in_channels
,
embedding_channels
,
use_scale_shift_norm
,
dropout
,
out_channels
=
None
,
norm_cfg
=
dict
(
type
=
'GN'
,
num_groups
=
32
),
act_cfg
=
dict
(
type
=
'SiLU'
,
inplace
=
False
),
shortcut_kernel_size
=
1
):
super
().
__init__
()
out_channels
=
in_channels
if
out_channels
is
None
else
out_channels
_norm_cfg
=
deepcopy
(
norm_cfg
)
_
,
norm_1
=
build_norm_layer
(
_norm_cfg
,
in_channels
)
conv_1
=
[
norm_1
,
build_activation_layer
(
act_cfg
),
nn
.
Conv2d
(
in_channels
,
out_channels
,
3
,
padding
=
1
)
]
self
.
conv_1
=
nn
.
Sequential
(
*
conv_1
)
norm_with_embedding_cfg
=
dict
(
in_channels
=
out_channels
,
embedding_channels
=
embedding_channels
,
use_scale_shift
=
use_scale_shift_norm
,
norm_cfg
=
_norm_cfg
)
self
.
norm_with_embedding
=
build_module
(
dict
(
type
=
'NormWithEmbedding'
),
default_args
=
norm_with_embedding_cfg
)
conv_2
=
[
build_activation_layer
(
act_cfg
),
nn
.
Dropout
(
dropout
),
nn
.
Conv2d
(
out_channels
,
out_channels
,
3
,
padding
=
1
)
]
self
.
conv_2
=
nn
.
Sequential
(
*
conv_2
)
assert
shortcut_kernel_size
in
[
1
,
3
],
(
'Only support `1` and `3` for `shortcut_kernel_size`, but '
f
'receive
{
shortcut_kernel_size
}
.'
)
self
.
learnable_shortcut
=
out_channels
!=
in_channels
if
self
.
learnable_shortcut
:
shortcut_padding
=
1
if
shortcut_kernel_size
==
3
else
0
self
.
shortcut
=
nn
.
Conv2d
(
in_channels
,
out_channels
,
shortcut_kernel_size
,
padding
=
shortcut_padding
)
self
.
init_weights
()
def
forward_shortcut
(
self
,
x
):
if
self
.
learnable_shortcut
:
return
self
.
shortcut
(
x
)
return
x
def
forward
(
self
,
x
,
y
):
"""Forward function.
Args:
x (torch.Tensor): Input feature map tensor.
y (torch.Tensor): Shared time embedding or shared label embedding.
Returns:
torch.Tensor : Output feature map tensor.
"""
shortcut
=
self
.
forward_shortcut
(
x
)
x
=
self
.
conv_1
(
x
)
x
=
self
.
norm_with_embedding
(
x
,
y
)
x
=
self
.
conv_2
(
x
)
return
x
+
shortcut
def
init_weights
(
self
):
# apply zero init to last conv layer
constant_init
(
self
.
conv_2
[
-
1
],
0
)
@
MODULES
.
register_module
()
class
NormWithEmbedding
(
nn
.
Module
):
"""Nornalization with embedding layer. If `use_scale_shift == True`,
embedding results will be chunked and used to re-shift and re-scale
normalization results. Otherwise, embedding results will directly add to
input of normalization layer.
Args:
in_channels (int): Number of channels of the input feature map.
embedding_channels (int) Number of channels of the input embedding.
norm_cfg (dict, optional): Config for the normalization operation.
Defaults to `dict(type='GN', num_groups=32)`.
act_cfg (dict, optional): Config for the activation layer. Defaults
to `dict(type='SiLU', inplace=False)`.
use_scale_shift (bool): If True, the output of Embedding layer will be
split to 'scale' and 'shift' and map the output of normalization
layer to ``out * (1 + scale) + shift``. Otherwise, the output of
Embedding layer will be added with the input before normalization
operation. Defaults to True.
"""
def
__init__
(
self
,
in_channels
,
embedding_channels
,
norm_cfg
=
dict
(
type
=
'GN'
,
num_groups
=
32
),
act_cfg
=
dict
(
type
=
'SiLU'
,
inplace
=
False
),
use_scale_shift
=
True
):
super
().
__init__
()
self
.
use_scale_shift
=
use_scale_shift
_
,
self
.
norm
=
build_norm_layer
(
norm_cfg
,
in_channels
)
embedding_output
=
in_channels
*
2
if
use_scale_shift
else
in_channels
self
.
embedding_layer
=
nn
.
Sequential
(
build_activation_layer
(
act_cfg
),
nn
.
Linear
(
embedding_channels
,
embedding_output
))
def
forward
(
self
,
x
,
y
):
"""Forward function.
Args:
x (torch.Tensor): Input feature map tensor.
y (torch.Tensor): Shared time embedding or shared label embedding.
Returns:
torch.Tensor : Output feature map tensor.
"""
embedding
=
self
.
embedding_layer
(
y
)[:,
:,
None
,
None
]
if
self
.
use_scale_shift
:
scale
,
shift
=
torch
.
chunk
(
embedding
,
2
,
dim
=
1
)
x
=
self
.
norm
(
x
)
x
=
x
*
(
1
+
scale
)
+
shift
else
:
x
=
self
.
norm
(
x
+
embedding
)
return
x
@
MODULES
.
register_module
()
class
DenoisingDownsample
(
nn
.
Module
):
"""Downsampling operation used in the denoising network. Support average
pooling and convolution for downsample operation.
Args:
in_channels (int): Number of channels of the input feature map to be
downsampled.
with_conv (bool, optional): Whether use convolution operation for
downsampling. Defaults to `True`.
"""
def
__init__
(
self
,
in_channels
,
with_conv
=
True
):
super
().
__init__
()
if
with_conv
:
self
.
downsample
=
nn
.
Conv2d
(
in_channels
,
in_channels
,
3
,
2
,
1
)
else
:
self
.
downsample
=
nn
.
AvgPool2d
(
stride
=
2
)
def
forward
(
self
,
x
):
"""Forward function for downsampling operation.
Args:
x (torch.Tensor): Feature map to downsample.
Returns:
torch.Tensor: Feature map after downsampling.
"""
return
self
.
downsample
(
x
)
@
MODULES
.
register_module
()
class
DenoisingUpsample
(
nn
.
Module
):
"""Upsampling operation used in the denoising network. Allows users to
apply an additional convolution layer after the nearest interpolation
operation.
Args:
in_channels (int): Number of channels of the input feature map to be
downsampled.
with_conv (bool, optional): Whether apply an additional convolution
layer after upsampling. Defaults to `True`.
"""
def
__init__
(
self
,
in_channels
,
with_conv
=
True
):
super
().
__init__
()
if
with_conv
:
self
.
with_conv
=
True
self
.
conv
=
nn
.
Conv2d
(
in_channels
,
in_channels
,
3
,
1
,
1
)
def
forward
(
self
,
x
):
"""Forward function for upsampling operation.
Args:
x (torch.Tensor): Feature map to upsample.
Returns:
torch.Tensor: Feature map after upsampling.
"""
x
=
F
.
interpolate
(
x
,
scale_factor
=
2
,
mode
=
'nearest'
)
if
self
.
with_conv
:
x
=
self
.
conv
(
x
)
return
x
mmgen/models/architectures/fid_inception.py
0 → 100644
View file @
b7536f78
# Copyright (c) OpenMMLab. All rights reserved.
"""Inception networks used in calculating FID and Inception metrics.
This code is modified from:
https://github.com/rosinality/stylegan2-pytorch/blob/master/inception.py
"""
import
torch
import
torch.nn
as
nn
import
torch.nn.functional
as
F
from
torch.utils.model_zoo
import
load_url
from
torchvision
import
models
# Inception weights ported to PyTorch from
# https://download.tensorflow.org/models/image/imagenet/inception-2015-12-05.tgz
FID_WEIGHTS_URL
=
'https://github.com/mseitzer/pytorch-fid/releases/download/fid_weights/pt_inception-2015-12-05-6726825d.pth'
# noqa: E501
class
InceptionV3
(
nn
.
Module
):
"""Pretrained InceptionV3 network returning feature maps."""
# Index of default block of inception to return,
# corresponds to output of final average pooling
DEFAULT_BLOCK_INDEX
=
3
# Maps feature dimensionality to their output blocks indices
BLOCK_INDEX_BY_DIM
=
{
64
:
0
,
# First max pooling features
192
:
1
,
# Second max pooling features
768
:
2
,
# Pre-aux classifier features
2048
:
3
# Final average pooling features
}
def
__init__
(
self
,
output_blocks
=
[
DEFAULT_BLOCK_INDEX
],
resize_input
=
True
,
normalize_input
=
True
,
requires_grad
=
False
,
use_fid_inception
=
True
,
load_fid_inception
=
True
):
"""Build pretrained InceptionV3.
Args:
output_blocks (list[int]): Indices of blocks to return features of.
Possible values are:
- 0: corresponds to output of first max pooling
- 1: corresponds to output of second max pooling
- 2: corresponds to output which is fed to aux classifier
- 3: corresponds to output of final average pooling
resize_input (bool): If true, bilinearly resizes input to width and
height 299 before feeding input to model. As the network
without fully connected layers is fully convolutional, it
should be able to handle inputs of arbitrary size, so resizing
might not be strictly needed.
normalize_input (bool): If true, scales the input from range (0, 1)
to the range the pretrained Inception network expects, namely
(-1, 1).
requires_grad (bool): If true, parameters of the model require
gradients. Possibly useful for finetuning the network.
use_fid_inception (bool): If true, uses the pretrained Inception
model used in Tensorflow's FID implementation. If false, uses
the pretrained Inception model available in torchvision. The
FID Inception model has different weights and a slightly
different structure from torchvision's Inception model. If you
want to compute FID scores, you are strongly advised to set
this parameter to true to get comparable results.
"""
super
().
__init__
()
self
.
resize_input
=
resize_input
self
.
normalize_input
=
normalize_input
self
.
output_blocks
=
sorted
(
output_blocks
)
self
.
last_needed_block
=
max
(
output_blocks
)
assert
self
.
last_needed_block
<=
3
,
\
'Last possible output block index is 3'
self
.
blocks
=
nn
.
ModuleList
()
if
use_fid_inception
:
inception
=
fid_inception_v3
(
load_fid_inception
)
else
:
inception
=
models
.
inception_v3
(
pretrained
=
True
)
# Block 0: input to maxpool1
block0
=
[
inception
.
Conv2d_1a_3x3
,
inception
.
Conv2d_2a_3x3
,
inception
.
Conv2d_2b_3x3
,
nn
.
MaxPool2d
(
kernel_size
=
3
,
stride
=
2
)
]
self
.
blocks
.
append
(
nn
.
Sequential
(
*
block0
))
# Block 1: maxpool1 to maxpool2
if
self
.
last_needed_block
>=
1
:
block1
=
[
inception
.
Conv2d_3b_1x1
,
inception
.
Conv2d_4a_3x3
,
nn
.
MaxPool2d
(
kernel_size
=
3
,
stride
=
2
)
]
self
.
blocks
.
append
(
nn
.
Sequential
(
*
block1
))
# Block 2: maxpool2 to aux classifier
if
self
.
last_needed_block
>=
2
:
block2
=
[
inception
.
Mixed_5b
,
inception
.
Mixed_5c
,
inception
.
Mixed_5d
,
inception
.
Mixed_6a
,
inception
.
Mixed_6b
,
inception
.
Mixed_6c
,
inception
.
Mixed_6d
,
inception
.
Mixed_6e
,
]
self
.
blocks
.
append
(
nn
.
Sequential
(
*
block2
))
# Block 3: aux classifier to final avgpool
if
self
.
last_needed_block
>=
3
:
block3
=
[
inception
.
Mixed_7a
,
inception
.
Mixed_7b
,
inception
.
Mixed_7c
,
nn
.
AdaptiveAvgPool2d
(
output_size
=
(
1
,
1
))
]
self
.
blocks
.
append
(
nn
.
Sequential
(
*
block3
))
for
param
in
self
.
parameters
():
param
.
requires_grad
=
requires_grad
def
forward
(
self
,
inp
):
"""Get Inception feature maps.
Args:
inp (torch.Tensor): Input tensor of shape Bx3xHxW.
Values are expected to be in range (0, 1)
Returns:
list(torch.Tensor): Corresponding to the selected output
\
block, sorted ascending by index.
"""
outp
=
[]
x
=
inp
if
self
.
resize_input
:
x
=
F
.
interpolate
(
x
,
size
=
(
299
,
299
),
mode
=
'bilinear'
,
align_corners
=
False
)
if
self
.
normalize_input
:
x
=
2
*
x
-
1
# Scale from range (0, 1) to range (-1, 1)
for
idx
,
block
in
enumerate
(
self
.
blocks
):
x
=
block
(
x
)
if
idx
in
self
.
output_blocks
:
outp
.
append
(
x
)
if
idx
==
self
.
last_needed_block
:
break
return
outp
def
fid_inception_v3
(
load_ckpt
=
True
):
"""Build pretrained Inception model for FID computation.
The Inception model for FID computation uses a different set of weights
and has a slightly different structure than torchvision's Inception.
This method first constructs torchvision's Inception and then patches the
necessary parts that are different in the FID Inception model.
"""
inception
=
models
.
inception_v3
(
num_classes
=
1008
,
aux_logits
=
False
,
pretrained
=
False
)
inception
.
Mixed_5b
=
FIDInceptionA
(
192
,
pool_features
=
32
)
inception
.
Mixed_5c
=
FIDInceptionA
(
256
,
pool_features
=
64
)
inception
.
Mixed_5d
=
FIDInceptionA
(
288
,
pool_features
=
64
)
inception
.
Mixed_6b
=
FIDInceptionC
(
768
,
channels_7x7
=
128
)
inception
.
Mixed_6c
=
FIDInceptionC
(
768
,
channels_7x7
=
160
)
inception
.
Mixed_6d
=
FIDInceptionC
(
768
,
channels_7x7
=
160
)
inception
.
Mixed_6e
=
FIDInceptionC
(
768
,
channels_7x7
=
192
)
inception
.
Mixed_7b
=
FIDInceptionE_1
(
1280
)
inception
.
Mixed_7c
=
FIDInceptionE_2
(
2048
)
if
load_ckpt
:
state_dict
=
load_url
(
FID_WEIGHTS_URL
,
progress
=
True
)
inception
.
load_state_dict
(
state_dict
)
return
inception
class
FIDInceptionA
(
models
.
inception
.
InceptionA
):
"""InceptionA block patched for FID computation."""
def
__init__
(
self
,
in_channels
,
pool_features
):
super
().
__init__
(
in_channels
,
pool_features
)
def
forward
(
self
,
x
):
"""Get InceptionA feature maps.
Args:
x (torch.Tensor): Input tensor of shape BxCxHxW.
Returns:
torch.Tensor: Feature Maps of x outputted by this block.
"""
branch1x1
=
self
.
branch1x1
(
x
)
branch5x5
=
self
.
branch5x5_1
(
x
)
branch5x5
=
self
.
branch5x5_2
(
branch5x5
)
branch3x3dbl
=
self
.
branch3x3dbl_1
(
x
)
branch3x3dbl
=
self
.
branch3x3dbl_2
(
branch3x3dbl
)
branch3x3dbl
=
self
.
branch3x3dbl_3
(
branch3x3dbl
)
# Patch: Tensorflow's average pool does not use the padded zero's in
# its average calculation
branch_pool
=
F
.
avg_pool2d
(
x
,
kernel_size
=
3
,
stride
=
1
,
padding
=
1
,
count_include_pad
=
False
)
branch_pool
=
self
.
branch_pool
(
branch_pool
)
outputs
=
[
branch1x1
,
branch5x5
,
branch3x3dbl
,
branch_pool
]
return
torch
.
cat
(
outputs
,
1
)
class
FIDInceptionC
(
models
.
inception
.
InceptionC
):
"""InceptionC block patched for FID computation."""
def
__init__
(
self
,
in_channels
,
channels_7x7
):
super
().
__init__
(
in_channels
,
channels_7x7
)
def
forward
(
self
,
x
):
"""Get InceptionC feature maps.
Args:
x (torch.Tensor): Input tensor of shape BxCxHxW.
Returns:
torch.Tensor: Feature Maps of x outputted by this block.
"""
branch1x1
=
self
.
branch1x1
(
x
)
branch7x7
=
self
.
branch7x7_1
(
x
)
branch7x7
=
self
.
branch7x7_2
(
branch7x7
)
branch7x7
=
self
.
branch7x7_3
(
branch7x7
)
branch7x7dbl
=
self
.
branch7x7dbl_1
(
x
)
branch7x7dbl
=
self
.
branch7x7dbl_2
(
branch7x7dbl
)
branch7x7dbl
=
self
.
branch7x7dbl_3
(
branch7x7dbl
)
branch7x7dbl
=
self
.
branch7x7dbl_4
(
branch7x7dbl
)
branch7x7dbl
=
self
.
branch7x7dbl_5
(
branch7x7dbl
)
# Patch: Tensorflow's average pool does not use the padded zero's in
# its average calculation
branch_pool
=
F
.
avg_pool2d
(
x
,
kernel_size
=
3
,
stride
=
1
,
padding
=
1
,
count_include_pad
=
False
)
branch_pool
=
self
.
branch_pool
(
branch_pool
)
outputs
=
[
branch1x1
,
branch7x7
,
branch7x7dbl
,
branch_pool
]
return
torch
.
cat
(
outputs
,
1
)
class
FIDInceptionE_1
(
models
.
inception
.
InceptionE
):
"""First InceptionE block patched for FID computation."""
def
__init__
(
self
,
in_channels
):
super
().
__init__
(
in_channels
)
def
forward
(
self
,
x
):
"""Get first InceptionE feature maps.
Args:
x (torch.Tensor): Input tensor of shape BxCxHxW.
Returns:
torch.Tensor: Feature Maps of x outputted by this block.
"""
branch1x1
=
self
.
branch1x1
(
x
)
branch3x3
=
self
.
branch3x3_1
(
x
)
branch3x3
=
[
self
.
branch3x3_2a
(
branch3x3
),
self
.
branch3x3_2b
(
branch3x3
),
]
branch3x3
=
torch
.
cat
(
branch3x3
,
1
)
branch3x3dbl
=
self
.
branch3x3dbl_1
(
x
)
branch3x3dbl
=
self
.
branch3x3dbl_2
(
branch3x3dbl
)
branch3x3dbl
=
[
self
.
branch3x3dbl_3a
(
branch3x3dbl
),
self
.
branch3x3dbl_3b
(
branch3x3dbl
),
]
branch3x3dbl
=
torch
.
cat
(
branch3x3dbl
,
1
)
# Patch: Tensorflow's average pool does not use the padded zero's in
# its average calculation
branch_pool
=
F
.
avg_pool2d
(
x
,
kernel_size
=
3
,
stride
=
1
,
padding
=
1
,
count_include_pad
=
False
)
branch_pool
=
self
.
branch_pool
(
branch_pool
)
outputs
=
[
branch1x1
,
branch3x3
,
branch3x3dbl
,
branch_pool
]
return
torch
.
cat
(
outputs
,
1
)
class
FIDInceptionE_2
(
models
.
inception
.
InceptionE
):
"""Second InceptionE block patched for FID computation."""
def
__init__
(
self
,
in_channels
):
super
().
__init__
(
in_channels
)
def
forward
(
self
,
x
):
"""Get second InceptionE feature maps.
Args:
x (torch.Tensor): Input tensor of shape BxCxHxW.
Returns:
torch.Tensor: Feature Maps of x outputted by this block.
"""
branch1x1
=
self
.
branch1x1
(
x
)
branch3x3
=
self
.
branch3x3_1
(
x
)
branch3x3
=
[
self
.
branch3x3_2a
(
branch3x3
),
self
.
branch3x3_2b
(
branch3x3
),
]
branch3x3
=
torch
.
cat
(
branch3x3
,
1
)
branch3x3dbl
=
self
.
branch3x3dbl_1
(
x
)
branch3x3dbl
=
self
.
branch3x3dbl_2
(
branch3x3dbl
)
branch3x3dbl
=
[
self
.
branch3x3dbl_3a
(
branch3x3dbl
),
self
.
branch3x3dbl_3b
(
branch3x3dbl
),
]
branch3x3dbl
=
torch
.
cat
(
branch3x3dbl
,
1
)
# Patch: The FID Inception model uses max pooling instead of average
# pooling. This is likely an error in this specific Inception
# implementation, as other Inception models use average pooling here
# (which matches the description in the paper).
branch_pool
=
F
.
max_pool2d
(
x
,
kernel_size
=
3
,
stride
=
1
,
padding
=
1
)
branch_pool
=
self
.
branch_pool
(
branch_pool
)
outputs
=
[
branch1x1
,
branch3x3
,
branch3x3dbl
,
branch_pool
]
return
torch
.
cat
(
outputs
,
1
)
mmgen/models/architectures/lpips/__init__.py
0 → 100755
View file @
b7536f78
# Copyright (c) OpenMMLab. All rights reserved.
r
"""
The lpips module was adapted from https://github.com/rosinality/stylegan2-pytorch/tree/master/lpips , # noqa
and you can see the origin implementation in https://github.com/richzhang/PerceptualSimilarity/tree/master/lpips # noqa
"""
from
.perceptual_loss
import
PerceptualLoss
__all__
=
[
'PerceptualLoss'
]
mmgen/models/architectures/lpips/networks_basic.py
0 → 100755
View file @
b7536f78
# Copyright (c) OpenMMLab. All rights reserved.
import
torch
import
torch.nn
as
nn
from
.pretrained_networks
import
vgg16
def
normalize_tensor
(
in_feat
,
eps
=
1e-10
):
"""L2 normalization.
Args:
in_feat (Tensor): Tensor with shape [N, C, H, W].
eps (float, optional): Epsilon value to avoid computation error.
Defaults to 1e-10.
Returns:
Tensor: Tensor after L2 normalization per-instance.
"""
norm_factor
=
torch
.
sqrt
(
torch
.
sum
(
in_feat
**
2
,
dim
=
1
,
keepdim
=
True
))
return
in_feat
/
(
norm_factor
+
eps
)
def
spatial_average
(
in_tens
,
keepdim
=
True
):
"""Returns the mean value of each row of the input tensor in the spatial
dimension.
Args:
in_tens (Tensor): Tensor with shape [N, C, H, W].
keepdim (bool, optional): If keepdim is True, the output tensor is of
the shape [N, C, 1, 1]. Otherwise, the output will have shape
[N, C]. Defaults to True.
Returns:
Tensor: Tensor after average pooling to 1x1 with shape [N, C, 1, 1] or
[N, C].
"""
return
in_tens
.
mean
([
2
,
3
],
keepdim
=
keepdim
)
def
upsample
(
in_tens
,
out_H
=
64
):
# assumes scale factor is same for H and W
"""Upsamples the input to the given size.
Args:
in_tens (Tensor): Tensor with shape [N, C, H, W].
out_H (int, optional): Output spatial size. Defaults to 64.
Returns:
Tensor: Output Tensor.
"""
in_H
=
in_tens
.
shape
[
2
]
scale_factor
=
1.
*
out_H
/
in_H
return
nn
.
Upsample
(
scale_factor
=
scale_factor
,
mode
=
'bilinear'
,
align_corners
=
False
)(
in_tens
)
# Learned perceptual metric
class
PNetLin
(
nn
.
Module
):
r
"""
Ref: https://github.com/richzhang/PerceptualSimilarity/blob/master/lpips/lpips.py # noqa
"""
def
__init__
(
self
,
pnet_rand
=
False
,
pnet_tune
=
False
,
use_dropout
=
True
,
spatial
=
False
,
version
=
'0.1'
,
lpips
=
True
):
super
().
__init__
()
self
.
pnet_tune
=
pnet_tune
self
.
pnet_rand
=
pnet_rand
self
.
spatial
=
spatial
self
.
lpips
=
lpips
self
.
version
=
version
self
.
scaling_layer
=
ScalingLayer
()
self
.
channels
=
[
64
,
128
,
256
,
512
,
512
]
self
.
L
=
len
(
self
.
channels
)
self
.
net
=
vgg16
(
pretrained
=
not
self
.
pnet_rand
,
requires_grad
=
self
.
pnet_tune
)
self
.
lin0
=
NetLinLayer
(
self
.
channels
[
0
],
use_dropout
=
use_dropout
)
self
.
lin1
=
NetLinLayer
(
self
.
channels
[
1
],
use_dropout
=
use_dropout
)
self
.
lin2
=
NetLinLayer
(
self
.
channels
[
2
],
use_dropout
=
use_dropout
)
self
.
lin3
=
NetLinLayer
(
self
.
channels
[
3
],
use_dropout
=
use_dropout
)
self
.
lin4
=
NetLinLayer
(
self
.
channels
[
4
],
use_dropout
=
use_dropout
)
self
.
lins
=
[
self
.
lin0
,
self
.
lin1
,
self
.
lin2
,
self
.
lin3
,
self
.
lin4
]
def
forward
(
self
,
in0
,
in1
,
retPerLayer
=
False
):
# v0.0 - original release had a bug, where input was not scaled
in0_input
,
in1_input
=
(
self
.
scaling_layer
(
in0
),
self
.
scaling_layer
(
in1
))
if
self
.
version
==
'0.1'
else
(
in0
,
in1
)
outs0
,
outs1
=
self
.
net
.
forward
(
in0_input
),
self
.
net
.
forward
(
in1_input
)
feats0
,
feats1
,
diffs
=
{},
{},
{}
for
kk
in
range
(
self
.
L
):
feats0
[
kk
],
feats1
[
kk
]
=
normalize_tensor
(
outs0
[
kk
]),
normalize_tensor
(
outs1
[
kk
])
diffs
[
kk
]
=
(
feats0
[
kk
]
-
feats1
[
kk
])
**
2
if
self
.
lpips
:
if
self
.
spatial
:
res
=
[
upsample
(
self
.
lins
[
kk
].
model
(
diffs
[
kk
]),
out_H
=
in0
.
shape
[
2
])
for
kk
in
range
(
self
.
L
)
]
else
:
res
=
[
spatial_average
(
self
.
lins
[
kk
].
model
(
diffs
[
kk
]),
keepdim
=
True
)
for
kk
in
range
(
self
.
L
)
]
else
:
if
self
.
spatial
:
res
=
[
upsample
(
diffs
[
kk
].
sum
(
dim
=
1
,
keepdim
=
True
),
out_H
=
in0
.
shape
[
2
])
for
kk
in
range
(
self
.
L
)
]
else
:
res
=
[
spatial_average
(
diffs
[
kk
].
sum
(
dim
=
1
,
keepdim
=
True
),
keepdim
=
True
)
for
kk
in
range
(
self
.
L
)
]
val
=
sum
(
res
)
if
retPerLayer
:
return
(
val
,
res
)
return
val
class
ScalingLayer
(
nn
.
Module
):
def
__init__
(
self
):
super
().
__init__
()
self
.
register_buffer
(
'shift'
,
torch
.
Tensor
([
-
.
030
,
-
.
088
,
-
.
188
])[
None
,
:,
None
,
None
])
self
.
register_buffer
(
'scale'
,
torch
.
Tensor
([.
458
,
.
448
,
.
450
])[
None
,
:,
None
,
None
])
def
forward
(
self
,
inp
):
return
(
inp
-
self
.
shift
)
/
self
.
scale
class
NetLinLayer
(
nn
.
Module
):
"""A single linear layer which does a 1x1 conv."""
def
__init__
(
self
,
chn_in
,
chn_out
=
1
,
use_dropout
=
False
):
super
().
__init__
()
layers
=
[
nn
.
Dropout
(),
]
if
(
use_dropout
)
else
[]
layers
+=
[
nn
.
Conv2d
(
chn_in
,
chn_out
,
1
,
stride
=
1
,
padding
=
0
,
bias
=
False
),
]
self
.
model
=
nn
.
Sequential
(
*
layers
)
class
Dist2LogitLayer
(
nn
.
Module
):
"""takes 2 distances, puts through fc layers, spits out value between [0,
1] (if use_sigmoid is True)"""
def
__init__
(
self
,
chn_mid
=
32
,
use_sigmoid
=
True
):
super
().
__init__
()
layers
=
[
nn
.
Conv2d
(
5
,
chn_mid
,
1
,
stride
=
1
,
padding
=
0
,
bias
=
True
),
]
layers
+=
[
nn
.
LeakyReLU
(
0.2
,
True
),
]
layers
+=
[
nn
.
Conv2d
(
chn_mid
,
chn_mid
,
1
,
stride
=
1
,
padding
=
0
,
bias
=
True
),
]
layers
+=
[
nn
.
LeakyReLU
(
0.2
,
True
),
]
layers
+=
[
nn
.
Conv2d
(
chn_mid
,
1
,
1
,
stride
=
1
,
padding
=
0
,
bias
=
True
),
]
if
use_sigmoid
:
layers
+=
[
nn
.
Sigmoid
(),
]
self
.
model
=
nn
.
Sequential
(
*
layers
)
def
forward
(
self
,
d0
,
d1
,
eps
=
0.1
):
return
self
.
model
.
forward
(
torch
.
cat
((
d0
,
d1
,
d0
-
d1
,
d0
/
(
d1
+
eps
),
d1
/
(
d0
+
eps
)),
dim
=
1
))
class
BCERankingLoss
(
nn
.
Module
):
def
__init__
(
self
,
chn_mid
=
32
):
super
().
__init__
()
self
.
net
=
Dist2LogitLayer
(
chn_mid
=
chn_mid
)
# self.parameters = list(self.net.parameters())
self
.
loss
=
torch
.
nn
.
BCELoss
()
def
forward
(
self
,
d0
,
d1
,
judge
):
per
=
(
judge
+
1.
)
/
2.
self
.
logit
=
self
.
net
.
forward
(
d0
,
d1
)
return
self
.
loss
(
self
.
logit
,
per
)
mmgen/models/architectures/lpips/perceptual_loss.py
0 → 100644
View file @
b7536f78
# Copyright (c) OpenMMLab. All rights reserved.
import
torch
from
torch.utils.model_zoo
import
load_url
from
.networks_basic
import
PNetLin
LPIPS_WEIGHTS_URL
=
'https://download.openmmlab.com/mmgen/evaluation/lpips/weights/v0.1/vgg.pth'
# noqa
class
PerceptualLoss
(
torch
.
nn
.
Module
):
r
"""LPIPS metric with VGG using our perceptually-learned weights.
Ref: https://github.com/rosinality/stylegan2-pytorch/blob/master/lpips/__init__.py # noqa
"""
def
__init__
(
self
,
spatial
=
False
,
use_gpu
=
True
,
gpu_ids
=
[
0
],
pretrained
=
True
):
super
().
__init__
()
print
(
'Setting up Perceptual loss...'
)
self
.
use_gpu
=
use_gpu
self
.
spatial
=
spatial
self
.
gpu_ids
=
gpu_ids
print
(
'...[pnet-lin, vgg16] initializing'
)
self
.
init_net
(
pretrained
=
pretrained
)
print
(
'...Done'
)
def
forward
(
self
,
pred
,
target
,
normalize
=
False
):
if
normalize
:
target
=
2
*
target
-
1
pred
=
2
*
pred
-
1
return
self
.
net
(
target
,
pred
)
def
init_net
(
self
,
pnet_rand
=
False
,
pnet_tune
=
False
,
pretrained
=
True
,
version
=
'0.1'
):
self
.
net
=
PNetLin
(
pnet_rand
=
pnet_rand
,
pnet_tune
=
pnet_tune
,
use_dropout
=
True
,
spatial
=
self
.
spatial
,
version
=
version
,
lpips
=
True
)
if
pretrained
:
print
(
'Loading model from: %s'
%
LPIPS_WEIGHTS_URL
)
self
.
net
.
load_state_dict
(
load_url
(
LPIPS_WEIGHTS_URL
,
map_location
=
'cpu'
,
progress
=
True
),
strict
=
False
)
self
.
parameters
=
list
(
self
.
net
.
parameters
())
self
.
net
.
eval
()
if
self
.
use_gpu
:
self
.
net
.
to
(
self
.
gpu_ids
[
0
])
self
.
net
=
torch
.
nn
.
DataParallel
(
self
.
net
,
device_ids
=
self
.
gpu_ids
)
mmgen/models/architectures/lpips/pretrained_networks.py
0 → 100755
View file @
b7536f78
# Copyright (c) OpenMMLab. All rights reserved.
from
collections
import
namedtuple
import
torch
from
torchvision
import
models
as
tv
class
vgg16
(
torch
.
nn
.
Module
):
r
"""VGG16 feature extractor for LPIPS metric.
Ref : https://github.com/richzhang/PerceptualSimilarity/blob/master/lpips/pretrained_networks.py # noqa
"""
def
__init__
(
self
,
requires_grad
=
False
,
pretrained
=
True
):
super
().
__init__
()
vgg_pretrained_features
=
tv
.
vgg16
(
pretrained
=
pretrained
).
features
self
.
slice1
=
torch
.
nn
.
Sequential
()
self
.
slice2
=
torch
.
nn
.
Sequential
()
self
.
slice3
=
torch
.
nn
.
Sequential
()
self
.
slice4
=
torch
.
nn
.
Sequential
()
self
.
slice5
=
torch
.
nn
.
Sequential
()
self
.
N_slices
=
5
for
x
in
range
(
4
):
self
.
slice1
.
add_module
(
str
(
x
),
vgg_pretrained_features
[
x
])
for
x
in
range
(
4
,
9
):
self
.
slice2
.
add_module
(
str
(
x
),
vgg_pretrained_features
[
x
])
for
x
in
range
(
9
,
16
):
self
.
slice3
.
add_module
(
str
(
x
),
vgg_pretrained_features
[
x
])
for
x
in
range
(
16
,
23
):
self
.
slice4
.
add_module
(
str
(
x
),
vgg_pretrained_features
[
x
])
for
x
in
range
(
23
,
30
):
self
.
slice5
.
add_module
(
str
(
x
),
vgg_pretrained_features
[
x
])
if
not
requires_grad
:
for
param
in
self
.
parameters
():
param
.
requires_grad
=
False
def
forward
(
self
,
X
):
h
=
self
.
slice1
(
X
)
h_relu1_2
=
h
h
=
self
.
slice2
(
h
)
h_relu2_2
=
h
h
=
self
.
slice3
(
h
)
h_relu3_3
=
h
h
=
self
.
slice4
(
h
)
h_relu4_3
=
h
h
=
self
.
slice5
(
h
)
h_relu5_3
=
h
vgg_outputs
=
namedtuple
(
'VggOutputs'
,
[
'relu1_2'
,
'relu2_2'
,
'relu3_3'
,
'relu4_3'
,
'relu5_3'
])
out
=
vgg_outputs
(
h_relu1_2
,
h_relu2_2
,
h_relu3_3
,
h_relu4_3
,
h_relu5_3
)
return
out
mmgen/models/architectures/lsgan/__init__.py
0 → 100644
View file @
b7536f78
# Copyright (c) OpenMMLab. All rights reserved.
from
.generator_discriminator
import
LSGANDiscriminator
,
LSGANGenerator
__all__
=
[
'LSGANDiscriminator'
,
'LSGANGenerator'
]
mmgen/models/architectures/lsgan/generator_discriminator.py
0 → 100644
View file @
b7536f78
# Copyright (c) OpenMMLab. All rights reserved.
import
numpy
as
np
import
torch
import
torch.nn
as
nn
from
mmcv.cnn
import
ConvModule
from
mmcv.cnn.bricks
import
build_activation_layer
from
mmgen.models.builder
import
MODULES
from
..common
import
get_module_device
@
MODULES
.
register_module
()
class
LSGANGenerator
(
nn
.
Module
):
"""Generator for LSGAN.
Implementation Details for LSGAN architecture:
#. Adopt transposed convolution in the generator;
#. Use batchnorm in the generator except for the final output layer;
#. Use ReLU in the generator in addition to the final output layer;
#. Keep channels of feature maps unchanged in the convolution backbone;
#. Use one more 3x3 conv every upsampling in the convolution backbone.
We follow the implementation details of the origin paper:
Least Squares Generative Adversarial Networks
https://arxiv.org/pdf/1611.04076.pdf
Args:
output_scale (int, optional): Output scale for the generated image.
Defaults to 128.
out_channels (int, optional): The channel number of the output feature.
Defaults to 3.
base_channels (int, optional): The basic channel number of the
generator. The other layers contains channels based on this number.
Defaults to 256.
input_scale (int, optional): The scale of the input 2D feature map.
Defaults to 8.
noise_size (int, optional): Size of the input noise
vector. Defaults to 1024.
conv_cfg (dict, optional): Config for the convolution module used in
this generator. Defaults to dict(type='ConvTranspose2d').
default_norm_cfg (dict, optional): Norm config for all of layers
except for the final output layer. Defaults to dict(type='BN').
default_act_cfg (dict, optional): Activation config for all of layers
except for the final output layer. Defaults to dict(type='ReLU').
out_act_cfg (dict, optional): Activation config for the final output
layer. Defaults to dict(type='Tanh').
"""
def
__init__
(
self
,
output_scale
=
128
,
out_channels
=
3
,
base_channels
=
256
,
input_scale
=
8
,
noise_size
=
1024
,
conv_cfg
=
dict
(
type
=
'ConvTranspose2d'
),
default_norm_cfg
=
dict
(
type
=
'BN'
),
default_act_cfg
=
dict
(
type
=
'ReLU'
),
out_act_cfg
=
dict
(
type
=
'Tanh'
)):
super
().
__init__
()
assert
output_scale
%
input_scale
==
0
assert
output_scale
//
input_scale
>=
4
self
.
output_scale
=
output_scale
self
.
base_channels
=
base_channels
self
.
input_scale
=
input_scale
self
.
noise_size
=
noise_size
self
.
noise2feat_head
=
nn
.
Sequential
(
nn
.
Linear
(
noise_size
,
input_scale
*
input_scale
*
base_channels
))
self
.
noise2feat_tail
=
nn
.
Sequential
(
nn
.
BatchNorm2d
(
base_channels
))
if
default_act_cfg
is
not
None
:
self
.
noise2feat_tail
.
add_module
(
'act'
,
build_activation_layer
(
default_act_cfg
))
# the number of times for upsampling
self
.
num_upsamples
=
int
(
np
.
log2
(
output_scale
//
input_scale
))
-
2
# build up convolution backbone (excluding the output layer)
self
.
conv_blocks
=
nn
.
ModuleList
()
for
_
in
range
(
self
.
num_upsamples
):
self
.
conv_blocks
.
append
(
ConvModule
(
base_channels
,
base_channels
,
kernel_size
=
3
,
stride
=
2
,
padding
=
1
,
conv_cfg
=
dict
(
conv_cfg
,
output_padding
=
1
),
norm_cfg
=
default_norm_cfg
,
act_cfg
=
default_act_cfg
))
self
.
conv_blocks
.
append
(
ConvModule
(
base_channels
,
base_channels
,
kernel_size
=
3
,
stride
=
1
,
padding
=
1
,
conv_cfg
=
conv_cfg
,
norm_cfg
=
default_norm_cfg
,
act_cfg
=
default_act_cfg
))
# output blocks
self
.
conv_blocks
.
append
(
ConvModule
(
base_channels
,
int
(
base_channels
//
2
),
kernel_size
=
3
,
stride
=
2
,
padding
=
1
,
conv_cfg
=
dict
(
conv_cfg
,
output_padding
=
1
),
norm_cfg
=
default_norm_cfg
,
act_cfg
=
default_act_cfg
))
self
.
conv_blocks
.
append
(
ConvModule
(
int
(
base_channels
//
2
),
int
(
base_channels
//
4
),
kernel_size
=
3
,
stride
=
2
,
padding
=
1
,
conv_cfg
=
dict
(
conv_cfg
,
output_padding
=
1
),
norm_cfg
=
default_norm_cfg
,
act_cfg
=
default_act_cfg
))
self
.
conv_blocks
.
append
(
ConvModule
(
int
(
base_channels
//
4
),
out_channels
,
kernel_size
=
3
,
stride
=
1
,
padding
=
1
,
conv_cfg
=
conv_cfg
,
norm_cfg
=
None
,
act_cfg
=
out_act_cfg
))
def
forward
(
self
,
noise
,
num_batches
=
0
,
return_noise
=
False
):
"""Forward function.
Args:
noise (torch.Tensor | callable | None): You can directly give a
batch of noise through a ``torch.Tensor`` or offer a callable
function to sample a batch of noise data. Otherwise, the
``None`` indicates to use the default noise sampler.
num_batches (int, optional): The number of batch size.
Defaults to 0.
return_noise (bool, optional): If True, ``noise_batch`` will be
returned in a dict with ``fake_img``. Defaults to False.
Returns:
torch.Tensor | dict: If not ``return_noise``, only the output image
will be returned. Otherwise, a dict contains ``fake_img`` and
``noise_batch`` will be returned.
"""
# receive noise and conduct sanity check.
if
isinstance
(
noise
,
torch
.
Tensor
):
assert
noise
.
shape
[
1
]
==
self
.
noise_size
if
noise
.
ndim
==
2
:
noise_batch
=
noise
else
:
raise
ValueError
(
'The noise should be in shape of (n, c)'
f
'but got
{
noise
.
shape
}
'
)
# receive a noise generator and sample noise.
elif
callable
(
noise
):
noise_generator
=
noise
assert
num_batches
>
0
noise_batch
=
noise_generator
((
num_batches
,
self
.
noise_size
))
# otherwise, we will adopt default noise sampler.
else
:
assert
num_batches
>
0
noise_batch
=
torch
.
randn
((
num_batches
,
self
.
noise_size
))
# dirty code for putting data on the right device
noise_batch
=
noise_batch
.
to
(
get_module_device
(
self
))
# noise2feat
x
=
self
.
noise2feat_head
(
noise_batch
)
x
=
x
.
reshape
(
(
-
1
,
self
.
base_channels
,
self
.
input_scale
,
self
.
input_scale
))
x
=
self
.
noise2feat_tail
(
x
)
# conv module
for
conv
in
self
.
conv_blocks
:
x
=
conv
(
x
)
if
return_noise
:
return
dict
(
fake_img
=
x
,
noise_batch
=
noise_batch
)
return
x
@
MODULES
.
register_module
()
class
LSGANDiscriminator
(
nn
.
Module
):
"""Discriminator for LSGAN.
Implementation Details for LSGAN architecture:
#. Adopt convolution in the discriminator;
#. Use batchnorm in the discriminator except for the input and final
\
output layer;
#. Use LeakyReLU in the discriminator in addition to the output layer;
#. Use fully connected layer in the output layer;
#. Use 5x5 conv rather than 4x4 conv in DCGAN.
Args:
input_scale (int, optional): The scale of the input image. Defaults to
128.
output_scale (int, optional): The final scale of the convolutional
feature. Defaults to 8.
out_channels (int, optional): The channel number of the final output
layer. Defaults to 1.
in_channels (int, optional): The channel number of the input image.
Defaults to 3.
base_channels (int, optional): The basic channel number of the
generator. The other layers contains channels based on this number.
Defaults to 128.
conv_cfg (dict, optional): Config for the convolution module used in
this discriminator. Defaults to dict(type='Conv2d').
default_norm_cfg (dict, optional): Norm config for all of layers
except for the final output layer. Defaults to ``dict(type='BN')``.
default_act_cfg (dict, optional): Activation config for all of layers
except for the final output layer. Defaults to
``dict(type='LeakyReLU', negative_slope=0.2)``.
out_act_cfg (dict, optional): Activation config for the final output
layer. Defaults to ``dict(type='Tanh')``.
"""
def
__init__
(
self
,
input_scale
=
128
,
output_scale
=
8
,
out_channels
=
1
,
in_channels
=
3
,
base_channels
=
64
,
conv_cfg
=
dict
(
type
=
'Conv2d'
),
default_norm_cfg
=
dict
(
type
=
'BN'
),
default_act_cfg
=
dict
(
type
=
'LeakyReLU'
,
negative_slope
=
0.2
),
out_act_cfg
=
None
):
super
().
__init__
()
assert
input_scale
%
output_scale
==
0
assert
input_scale
//
output_scale
>=
2
self
.
input_scale
=
input_scale
self
.
output_scale
=
output_scale
self
.
out_channels
=
out_channels
self
.
base_channels
=
base_channels
self
.
with_out_activation
=
out_act_cfg
is
not
None
self
.
conv_blocks
=
nn
.
ModuleList
()
self
.
conv_blocks
.
append
(
ConvModule
(
in_channels
,
base_channels
,
kernel_size
=
5
,
stride
=
2
,
padding
=
2
,
conv_cfg
=
conv_cfg
,
norm_cfg
=
None
,
act_cfg
=
default_act_cfg
))
# the number of times for downsampling
self
.
num_downsamples
=
int
(
np
.
log2
(
input_scale
//
output_scale
))
-
1
# build up downsampling backbone (excluding the output layer)
curr_channels
=
base_channels
for
_
in
range
(
self
.
num_downsamples
):
self
.
conv_blocks
.
append
(
ConvModule
(
curr_channels
,
curr_channels
*
2
,
kernel_size
=
5
,
stride
=
2
,
padding
=
2
,
conv_cfg
=
conv_cfg
,
norm_cfg
=
default_norm_cfg
,
act_cfg
=
default_act_cfg
))
curr_channels
=
curr_channels
*
2
# output layer
self
.
decision
=
nn
.
Sequential
(
nn
.
Linear
(
output_scale
*
output_scale
*
curr_channels
,
out_channels
))
if
self
.
with_out_activation
:
self
.
out_activation
=
build_activation_layer
(
out_act_cfg
)
def
forward
(
self
,
x
):
"""Forward function.
Args:
x (torch.Tensor): Fake or real image tensor.
Returns:
torch.Tensor: Prediction for the reality of the input image.
"""
n
=
x
.
shape
[
0
]
for
conv
in
self
.
conv_blocks
:
x
=
conv
(
x
)
x
=
x
.
reshape
(
n
,
-
1
)
x
=
self
.
decision
(
x
)
if
self
.
with_out_activation
:
x
=
self
.
out_activation
(
x
)
return
x
mmgen/models/architectures/pggan/__init__.py
0 → 100644
View file @
b7536f78
# Copyright (c) OpenMMLab. All rights reserved.
from
.generator_discriminator
import
PGGANDiscriminator
,
PGGANGenerator
from
.modules
import
(
EqualizedLR
,
EqualizedLRConvDownModule
,
EqualizedLRConvModule
,
EqualizedLRConvUpModule
,
EqualizedLRLinearModule
,
MiniBatchStddevLayer
,
PGGANNoiseTo2DFeat
,
PixelNorm
,
equalized_lr
)
__all__
=
[
'EqualizedLR'
,
'equalized_lr'
,
'EqualizedLRConvModule'
,
'EqualizedLRLinearModule'
,
'EqualizedLRConvUpModule'
,
'EqualizedLRConvDownModule'
,
'PixelNorm'
,
'MiniBatchStddevLayer'
,
'PGGANNoiseTo2DFeat'
,
'PGGANGenerator'
,
'PGGANDiscriminator'
]
mmgen/models/architectures/pggan/generator_discriminator.py
0 → 100644
View file @
b7536f78
# Copyright (c) OpenMMLab. All rights reserved.
from
copy
import
deepcopy
from
functools
import
partial
import
numpy
as
np
import
torch
import
torch.nn
as
nn
import
torch.nn.functional
as
F
from
mmcv.cnn.bricks.upsample
import
build_upsample_layer
from
mmgen.models.builder
import
MODULES
from
..common
import
get_module_device
from
.modules
import
(
EqualizedLRConvDownModule
,
EqualizedLRConvModule
,
EqualizedLRConvUpModule
,
MiniBatchStddevLayer
,
PGGANDecisionHead
,
PGGANNoiseTo2DFeat
)
@
MODULES
.
register_module
()
class
PGGANGenerator
(
nn
.
Module
):
"""Generator for PGGAN.
Args:
noise_size (int): Size of the input noise vector.
out_scale (int): Output scale for the generated image.
label_size (int, optional): Size of the label vector.
Defaults to 0.
base_channels (int, optional): The basic channel number of the
generator. The other layers contains channels based on this
number. Defaults to 8192.
channel_decay (float, optional): Decay for channels of feature maps.
Defaults to 1.0.
max_channels (int, optional): Maximum channels for the feature
maps in the generator block. Defaults to 512.
fused_upconv (bool, optional): Whether use fused upconv.
Defaults to True.
conv_module_cfg (dict, optional): Config for the convolution
module used in this generator. Defaults to None.
fused_upconv_cfg (dict, optional): Config for the fused upconv
module used in this generator. Defaults to None.
upsample_cfg (dict, optional): Config for the upsampling operation.
Defaults to None.
"""
_default_fused_upconv_cfg
=
dict
(
conv_cfg
=
dict
(
type
=
'deconv'
),
kernel_size
=
3
,
stride
=
2
,
padding
=
1
,
bias
=
True
,
act_cfg
=
dict
(
type
=
'LeakyReLU'
,
negative_slope
=
0.2
),
norm_cfg
=
dict
(
type
=
'PixelNorm'
),
order
=
(
'conv'
,
'act'
,
'norm'
))
_default_conv_module_cfg
=
dict
(
conv_cfg
=
None
,
kernel_size
=
3
,
stride
=
1
,
padding
=
1
,
bias
=
True
,
act_cfg
=
dict
(
type
=
'LeakyReLU'
,
negative_slope
=
0.2
),
norm_cfg
=
dict
(
type
=
'PixelNorm'
),
order
=
(
'conv'
,
'act'
,
'norm'
))
_default_upsample_cfg
=
dict
(
type
=
'nearest'
,
scale_factor
=
2
)
def
__init__
(
self
,
noise_size
,
out_scale
,
label_size
=
0
,
base_channels
=
8192
,
channel_decay
=
1.
,
max_channels
=
512
,
fused_upconv
=
True
,
conv_module_cfg
=
None
,
fused_upconv_cfg
=
None
,
upsample_cfg
=
None
):
super
().
__init__
()
self
.
noise_size
=
noise_size
if
noise_size
else
min
(
base_channels
,
max_channels
)
self
.
out_scale
=
out_scale
self
.
out_log2_scale
=
int
(
np
.
log2
(
out_scale
))
# sanity check for the output scale
assert
out_scale
==
2
**
self
.
out_log2_scale
and
out_scale
>=
4
self
.
label_size
=
label_size
self
.
base_channels
=
base_channels
self
.
channel_decay
=
channel_decay
self
.
max_channels
=
max_channels
self
.
fused_upconv
=
fused_upconv
# set conv cfg
self
.
conv_module_cfg
=
deepcopy
(
self
.
_default_conv_module_cfg
)
# update with customized config
if
conv_module_cfg
:
self
.
conv_module_cfg
.
update
(
conv_module_cfg
)
if
self
.
fused_upconv
:
self
.
fused_upconv_cfg
=
deepcopy
(
self
.
_default_fused_upconv_cfg
)
# update with customized config
if
fused_upconv_cfg
:
self
.
fused_upconv_cfg
.
update
(
fused_upconv_cfg
)
self
.
upsample_cfg
=
deepcopy
(
self
.
_default_upsample_cfg
)
if
upsample_cfg
is
not
None
:
self
.
upsample_cfg
.
update
(
upsample_cfg
)
self
.
noise2feat
=
PGGANNoiseTo2DFeat
(
noise_size
+
label_size
,
self
.
_num_out_channels
(
1
))
self
.
torgb_layers
=
nn
.
ModuleList
()
self
.
conv_blocks
=
nn
.
ModuleList
()
for
s
in
range
(
2
,
self
.
out_log2_scale
+
1
):
in_ch
=
self
.
_num_out_channels
(
s
-
1
)
if
s
==
2
else
self
.
_num_out_channels
(
s
-
2
)
# setup torgb layers
self
.
torgb_layers
.
append
(
self
.
_get_torgb_layer
(
self
.
_num_out_channels
(
s
-
1
)))
# setup upconv or conv blocks
self
.
conv_blocks
.
extend
(
self
.
_get_upconv_block
(
in_ch
,
s
))
# build upsample layer for residual path
self
.
upsample_layer
=
build_upsample_layer
(
self
.
upsample_cfg
)
def
_get_torgb_layer
(
self
,
in_channels
):
return
EqualizedLRConvModule
(
in_channels
,
3
,
kernel_size
=
1
,
stride
=
1
,
equalized_lr_cfg
=
dict
(
gain
=
1
),
bias
=
True
,
norm_cfg
=
None
,
act_cfg
=
None
)
def
_num_out_channels
(
self
,
log_scale
):
return
min
(
int
(
self
.
base_channels
/
(
2.0
**
(
log_scale
*
self
.
channel_decay
))),
self
.
max_channels
)
def
_get_upconv_block
(
self
,
in_channels
,
log_scale
):
modules
=
[]
# start 4x4 scale
if
log_scale
==
2
:
modules
.
append
(
EqualizedLRConvModule
(
in_channels
,
self
.
_num_out_channels
(
log_scale
-
1
),
**
self
.
conv_module_cfg
))
# 8x8 --> 1024x1024 scales
else
:
if
self
.
fused_upconv
:
cfg_
=
dict
(
upsample
=
dict
(
type
=
'fused_nn'
))
cfg_
.
update
(
self
.
fused_upconv_cfg
)
else
:
cfg_
=
dict
(
upsample
=
self
.
upsample_cfg
)
cfg_
.
update
(
self
.
conv_module_cfg
)
# up + conv
modules
.
append
(
EqualizedLRConvUpModule
(
in_channels
,
self
.
_num_out_channels
(
log_scale
-
1
),
**
cfg_
))
# refine conv
modules
.
append
(
EqualizedLRConvModule
(
self
.
_num_out_channels
(
log_scale
-
1
),
self
.
_num_out_channels
(
log_scale
-
1
),
**
self
.
conv_module_cfg
))
return
modules
def
forward
(
self
,
noise
,
label
=
None
,
num_batches
=
0
,
return_noise
=
False
,
transition_weight
=
1.
,
curr_scale
=-
1
):
"""Forward function.
Args:
noise (torch.Tensor | callable | None): You can directly give a
batch of noise through a ``torch.Tensor`` or offer a callable
function to sample a batch of noise data. Otherwise, the
``None`` indicates to use the default noise sampler.
label (Tensor, optional): Label vector with shape [N, C]. Defaults
to None.
num_batches (int, optional): The number of batch size. Defaults to
0.
return_noise (bool, optional): If True, ``noise_batch`` will be
returned in a dict with ``fake_img``. Defaults to False.
transition_weight (float, optional): The weight used in resolution
transition. Defaults to 1.0.
curr_scale (int, optional): The scale for the current inference or
training. Defaults to -1.
Returns:
torch.Tensor | dict: If not ``return_noise``, only the output image
will be returned. Otherwise, a dict contains ``fake_img`` and
``noise_batch`` will be returned.
"""
# receive noise and conduct sanity check.
if
isinstance
(
noise
,
torch
.
Tensor
):
assert
noise
.
shape
[
1
]
==
self
.
noise_size
assert
noise
.
ndim
==
2
,
(
'The noise should be in shape of (n, c), '
f
'but got
{
noise
.
shape
}
'
)
noise_batch
=
noise
# receive a noise generator and sample noise.
elif
callable
(
noise
):
noise_generator
=
noise
assert
num_batches
>
0
noise_batch
=
noise_generator
((
num_batches
,
self
.
noise_size
))
# otherwise, we will adopt default noise sampler.
else
:
assert
num_batches
>
0
# TODO: check pggan default noise type
noise_batch
=
torch
.
randn
((
num_batches
,
self
.
noise_size
))
# dirty code for putting data on the right device
noise_batch
=
noise_batch
.
to
(
get_module_device
(
self
))
if
label
is
not
None
:
noise_batch
=
torch
.
cat
(
[
noise_batch
,
label
.
to
(
noise_batch
)],
dim
=
1
)
# noise vector to 2D feature
x
=
self
.
noise2feat
(
noise_batch
)
# build current computational graph
curr_log2_scale
=
self
.
out_log2_scale
if
curr_scale
<
0
else
int
(
np
.
log2
(
curr_scale
))
# 4x4 scale
x
=
self
.
conv_blocks
[
0
](
x
)
if
curr_log2_scale
<=
3
:
out_img
=
last_img
=
self
.
torgb_layers
[
0
](
x
)
# 8x8 and larger scales
for
s
in
range
(
3
,
curr_log2_scale
+
1
):
x
=
self
.
conv_blocks
[
2
*
s
-
5
](
x
)
x
=
self
.
conv_blocks
[
2
*
s
-
4
](
x
)
if
s
+
1
==
curr_log2_scale
:
last_img
=
self
.
torgb_layers
[
s
-
2
](
x
)
elif
s
==
curr_log2_scale
:
out_img
=
self
.
torgb_layers
[
s
-
2
](
x
)
residual_img
=
self
.
upsample_layer
(
last_img
)
out_img
=
residual_img
+
transition_weight
*
(
out_img
-
residual_img
)
if
return_noise
:
output
=
dict
(
fake_img
=
out_img
,
noise_batch
=
noise_batch
,
label
=
label
)
return
output
return
out_img
@
MODULES
.
register_module
()
class
PGGANDiscriminator
(
nn
.
Module
):
"""Discriminator for PGGAN.
Args:
in_scale (int): The scale of the input image.
label_size (int, optional): Size of the label vector. Defaults to
0.
base_channels (int, optional): The basic channel number of the
generator. The other layers contains channels based on this
number. Defaults to 8192.
max_channels (int, optional): Maximum channels for the feature
maps in the discriminator block. Defaults to 512.
in_channels (int, optional): Number of channels in input images.
Defaults to 3.
channel_decay (float, optional): Decay for channels of feature
maps. Defaults to 1.0.
mbstd_cfg (dict, optional): Configs for minibatch-stddev layer.
Defaults to dict(group_size=4).
fused_convdown (bool, optional): Whether use fused downconv.
Defaults to True.
conv_module_cfg (dict, optional): Config for the convolution
module used in this generator. Defaults to None.
fused_convdown_cfg (dict, optional): Config for the fused downconv
module used in this discriminator. Defaults to None.
fromrgb_layer_cfg (dict, optional): Config for the fromrgb layer.
Defaults to None.
downsample_cfg (dict, optional): Config for the downsampling
operation. Defaults to None.
"""
_default_fromrgb_cfg
=
dict
(
conv_cfg
=
None
,
kernel_size
=
1
,
stride
=
1
,
padding
=
0
,
bias
=
True
,
act_cfg
=
dict
(
type
=
'LeakyReLU'
,
negative_slope
=
0.2
),
norm_cfg
=
None
,
order
=
(
'conv'
,
'act'
,
'norm'
))
_default_conv_module_cfg
=
dict
(
kernel_size
=
3
,
padding
=
1
,
stride
=
1
,
norm_cfg
=
None
,
act_cfg
=
dict
(
type
=
'LeakyReLU'
,
negative_slope
=
0.2
))
_default_convdown_cfg
=
dict
(
kernel_size
=
3
,
padding
=
1
,
stride
=
2
,
norm_cfg
=
None
,
act_cfg
=
dict
(
type
=
'LeakyReLU'
,
negative_slope
=
0.2
))
def
__init__
(
self
,
in_scale
,
label_size
=
0
,
base_channels
=
8192
,
max_channels
=
512
,
in_channels
=
3
,
channel_decay
=
1.0
,
mbstd_cfg
=
dict
(
group_size
=
4
),
fused_convdown
=
True
,
conv_module_cfg
=
None
,
fused_convdown_cfg
=
None
,
fromrgb_layer_cfg
=
None
,
downsample_cfg
=
None
):
super
().
__init__
()
self
.
in_scale
=
in_scale
self
.
in_log2_scale
=
int
(
np
.
log2
(
self
.
in_scale
))
self
.
label_size
=
label_size
self
.
base_channels
=
base_channels
self
.
max_channels
=
max_channels
self
.
in_channels
=
in_channels
self
.
channel_decay
=
channel_decay
self
.
with_mbstd
=
mbstd_cfg
is
not
None
self
.
fused_convdown
=
fused_convdown
self
.
conv_module_cfg
=
deepcopy
(
self
.
_default_conv_module_cfg
)
if
conv_module_cfg
is
not
None
:
self
.
conv_module_cfg
.
update
(
conv_module_cfg
)
if
self
.
fused_convdown
:
self
.
fused_convdown_cfg
=
deepcopy
(
self
.
_default_convdown_cfg
)
if
fused_convdown_cfg
is
not
None
:
self
.
fused_convdown_cfg
.
update
(
fused_convdown_cfg
)
self
.
fromrgb_layer_cfg
=
deepcopy
(
self
.
_default_fromrgb_cfg
)
if
fromrgb_layer_cfg
:
self
.
fromrgb_layer_cfg
.
update
(
fromrgb_layer_cfg
)
# setup conv blocks
self
.
conv_blocks
=
nn
.
ModuleList
()
self
.
fromrgb_layers
=
nn
.
ModuleList
()
for
s
in
range
(
2
,
self
.
in_log2_scale
+
1
):
self
.
fromrgb_layers
.
append
(
self
.
_get_fromrgb_layer
(
self
.
in_channels
,
s
))
self
.
conv_blocks
.
extend
(
self
.
_get_convdown_block
(
self
.
_num_out_channels
(
s
-
1
),
s
))
# setup downsample layer
self
.
downsample_cfg
=
deepcopy
(
downsample_cfg
)
if
self
.
downsample_cfg
is
None
or
self
.
downsample_cfg
.
get
(
'type'
,
None
)
==
'avgpool'
:
self
.
downsample
=
nn
.
AvgPool2d
(
kernel_size
=
2
,
stride
=
2
)
elif
self
.
downsample_cfg
.
get
(
'type'
,
None
)
in
[
'nearest'
,
'bilinear'
]:
self
.
downsample
=
partial
(
F
.
interpolate
,
mode
=
self
.
downsample_cfg
.
pop
(
'type'
),
**
self
.
downsample_cfg
)
else
:
raise
NotImplementedError
(
'We have not supported the downsampling with type'
f
'
{
downsample_cfg
}
.'
)
# setup minibatch stddev layer
if
self
.
with_mbstd
:
self
.
mbstd_layer
=
MiniBatchStddevLayer
(
**
mbstd_cfg
)
# minibatch stddev layer will concatenate an additional feature map
# in channel dimension.
decision_in_channels
=
self
.
_num_out_channels
(
1
)
*
16
+
16
else
:
decision_in_channels
=
self
.
_num_out_channels
(
1
)
*
16
# setup decision layer
self
.
decision
=
PGGANDecisionHead
(
decision_in_channels
,
self
.
_num_out_channels
(
0
),
1
+
self
.
label_size
)
def
_num_out_channels
(
self
,
log_scale
):
return
min
(
int
(
self
.
base_channels
/
(
2.0
**
(
log_scale
*
self
.
channel_decay
))),
self
.
max_channels
)
def
_get_fromrgb_layer
(
self
,
in_channels
,
log2_scale
):
return
EqualizedLRConvModule
(
in_channels
,
self
.
_num_out_channels
(
log2_scale
-
1
),
**
self
.
fromrgb_layer_cfg
)
def
_get_convdown_block
(
self
,
in_channels
,
log2_scale
):
modules
=
[]
if
log2_scale
==
2
:
modules
.
append
(
EqualizedLRConvModule
(
in_channels
,
self
.
_num_out_channels
(
log2_scale
-
1
),
**
self
.
conv_module_cfg
))
else
:
modules
.
append
(
EqualizedLRConvModule
(
in_channels
,
self
.
_num_out_channels
(
log2_scale
-
1
),
**
self
.
conv_module_cfg
))
if
self
.
fused_convdown
:
cfg_
=
dict
(
downsample
=
dict
(
type
=
'fused_pool'
))
cfg_
.
update
(
self
.
fused_convdown_cfg
)
else
:
cfg_
=
dict
(
downsample
=
self
.
downsample
)
cfg_
.
update
(
self
.
conv_module_cfg
)
modules
.
append
(
EqualizedLRConvDownModule
(
self
.
_num_out_channels
(
log2_scale
-
1
),
self
.
_num_out_channels
(
log2_scale
-
2
),
**
cfg_
))
return
modules
def
forward
(
self
,
x
,
transition_weight
=
1.
,
curr_scale
=-
1
):
"""Forward function.
Args:
x (torch.Tensor): Input image tensor.
transition_weight (float, optional): The weight used in resolution
transition. Defaults to 1.0.
curr_scale (int, optional): The scale for the current inference or
training. Defaults to -1.
Returns:
Tensor: Predict score for the input image.
"""
curr_log2_scale
=
self
.
in_log2_scale
if
curr_scale
<
4
else
int
(
np
.
log2
(
curr_scale
))
original_img
=
x
x
=
self
.
fromrgb_layers
[
curr_log2_scale
-
2
](
x
)
for
s
in
range
(
curr_log2_scale
,
2
,
-
1
):
x
=
self
.
conv_blocks
[
2
*
s
-
5
](
x
)
x
=
self
.
conv_blocks
[
2
*
s
-
4
](
x
)
if
s
==
curr_log2_scale
:
img_down
=
self
.
downsample
(
original_img
)
y
=
self
.
fromrgb_layers
[
curr_log2_scale
-
3
](
img_down
)
x
=
y
+
transition_weight
*
(
x
-
y
)
if
self
.
with_mbstd
:
x
=
self
.
mbstd_layer
(
x
)
x
=
self
.
decision
(
x
)
if
self
.
label_size
>
0
:
return
x
[:,
:
1
],
x
[:,
1
:]
return
x
mmgen/models/architectures/pggan/modules.py
0 → 100644
View file @
b7536f78
# Copyright (c) OpenMMLab. All rights reserved.
from
copy
import
deepcopy
import
numpy
as
np
import
torch
import
torch.nn
as
nn
import
torch.nn.functional
as
F
from
mmcv.cnn.bricks
import
(
NORM_LAYERS
,
PLUGIN_LAYERS
,
ConvModule
,
build_activation_layer
,
build_norm_layer
,
build_upsample_layer
)
from
mmcv.cnn.utils
import
normal_init
from
torch.nn.init
import
_calculate_correct_fan
from
mmgen.models.builder
import
MODULES
from
mmgen.models.common
import
AllGatherLayer
class
EqualizedLR
:
r
"""Equalized Learning Rate.
This trick is proposed in:
Progressive Growing of GANs for Improved Quality, Stability, and Variation
The general idea is to dynamically rescale the weight in training instead
of in initializing so that the variance of the responses in each layer is
guaranteed with some statistical properties.
Note that this function is always combined with a convolution module which
is initialized with :math:`\mathcal{N}(0, 1)`.
Args:
name (str | optional): The name of weights. Defaults to 'weight'.
mode (str, optional): The mode of computing ``fan`` which is the
same as ``kaiming_init`` in pytorch. You can choose one from
['fan_in', 'fan_out']. Defaults to 'fan_in'.
"""
def
__init__
(
self
,
name
=
'weight'
,
gain
=
2
**
0.5
,
mode
=
'fan_in'
,
lr_mul
=
1.0
):
self
.
name
=
name
self
.
mode
=
mode
self
.
gain
=
gain
self
.
lr_mul
=
lr_mul
def
compute_weight
(
self
,
module
):
"""Compute weight with equalized learning rate.
Args:
module (nn.Module): A module that is wrapped with equalized lr.
Returns:
torch.Tensor: Updated weight.
"""
weight
=
getattr
(
module
,
self
.
name
+
'_orig'
)
if
weight
.
ndim
==
5
:
# weight in shape of [b, out, in, k, k]
fan
=
_calculate_correct_fan
(
weight
[
0
],
self
.
mode
)
else
:
assert
weight
.
ndim
<=
4
fan
=
_calculate_correct_fan
(
weight
,
self
.
mode
)
weight
=
weight
*
torch
.
tensor
(
self
.
gain
,
device
=
weight
.
device
)
*
torch
.
sqrt
(
torch
.
tensor
(
1.
/
fan
,
device
=
weight
.
device
))
*
self
.
lr_mul
return
weight
def
__call__
(
self
,
module
,
inputs
):
"""Standard interface for forward pre hooks."""
setattr
(
module
,
self
.
name
,
self
.
compute_weight
(
module
))
@
staticmethod
def
apply
(
module
,
name
,
gain
=
2
**
0.5
,
mode
=
'fan_in'
,
lr_mul
=
1.
):
"""Apply function.
This function is to register an equalized learning rate hook in an
``nn.Module``.
Args:
module (nn.Module): Module to be wrapped.
name (str | optional): The name of weights. Defaults to 'weight'.
mode (str, optional): The mode of computing ``fan`` which is the
same as ``kaiming_init`` in pytorch. You can choose one from
['fan_in', 'fan_out']. Defaults to 'fan_in'.
Returns:
nn.Module: Module that is registered with equalized lr hook.
"""
# sanity check for duplicated hooks.
for
_
,
hook
in
module
.
_forward_pre_hooks
.
items
():
if
isinstance
(
hook
,
EqualizedLR
):
raise
RuntimeError
(
'Cannot register two equalized_lr hooks on the same '
f
'parameter
{
name
}
in
{
module
}
module.'
)
fn
=
EqualizedLR
(
name
,
gain
=
gain
,
mode
=
mode
,
lr_mul
=
lr_mul
)
weight
=
module
.
_parameters
[
name
]
delattr
(
module
,
name
)
module
.
register_parameter
(
name
+
'_orig'
,
weight
)
# We still need to assign weight back as fn.name because all sorts of
# things may assume that it exists, e.g., when initializing weights.
# However, we can't directly assign as it could be an nn.Parameter and
# gets added as a parameter. Instead, we register weight.data as a
# plain attribute.
setattr
(
module
,
name
,
weight
.
data
)
module
.
register_forward_pre_hook
(
fn
)
# TODO: register load state dict hook
return
fn
def
equalized_lr
(
module
,
name
=
'weight'
,
gain
=
2
**
0.5
,
mode
=
'fan_in'
,
lr_mul
=
1.
):
r
"""Equalized Learning Rate.
This trick is proposed in:
Progressive Growing of GANs for Improved Quality, Stability, and Variation
The general idea is to dynamically rescale the weight in training instead
of in initializing so that the variance of the responses in each layer is
guaranteed with some statistical properties.
Note that this function is always combined with a convolution module which
is initialized with :math:`\mathcal{N}(0, 1)`.
Args:
module (nn.Module): Module to be wrapped.
name (str | optional): The name of weights. Defaults to 'weight'.
mode (str, optional): The mode of computing ``fan`` which is the
same as ``kaiming_init`` in pytorch. You can choose one from
['fan_in', 'fan_out']. Defaults to 'fan_in'.
Returns:
nn.Module: Module that is registered with equalized lr hook.
"""
EqualizedLR
.
apply
(
module
,
name
,
gain
=
gain
,
mode
=
mode
,
lr_mul
=
lr_mul
)
return
module
def
pixel_norm
(
x
,
eps
=
1e-6
):
"""Pixel Normalization.
This normalization is proposed in:
Progressive Growing of GANs for Improved Quality, Stability, and Variation
Args:
x (torch.Tensor): Tensor to be normalized.
eps (float, optional): Epsilon to avoid dividing zero.
Defaults to 1e-6.
Returns:
torch.Tensor: Normalized tensor.
"""
if
torch
.
__version__
>=
'1.7.0'
:
norm
=
torch
.
linalg
.
norm
(
x
,
ord
=
2
,
dim
=
1
,
keepdim
=
True
)
# support older pytorch version
else
:
norm
=
torch
.
norm
(
x
,
p
=
2
,
dim
=
1
,
keepdim
=
True
)
norm
=
norm
/
torch
.
sqrt
(
torch
.
tensor
(
x
.
shape
[
1
]).
to
(
x
))
return
x
/
(
norm
+
eps
)
@
MODULES
.
register_module
()
@
NORM_LAYERS
.
register_module
()
class
PixelNorm
(
nn
.
Module
):
"""Pixel Normalization.
This module is proposed in:
Progressive Growing of GANs for Improved Quality, Stability, and Variation
Args:
eps (float, optional): Epsilon value. Defaults to 1e-6.
"""
_abbr_
=
'pn'
def
__init__
(
self
,
in_channels
=
None
,
eps
=
1e-6
):
super
().
__init__
()
self
.
eps
=
eps
def
forward
(
self
,
x
):
"""Forward function.
Args:
x (torch.Tensor): Tensor to be normalized.
Returns:
torch.Tensor: Normalized tensor.
"""
return
pixel_norm
(
x
,
self
.
eps
)
@
PLUGIN_LAYERS
.
register_module
()
class
EqualizedLRConvModule
(
ConvModule
):
r
"""Equalized LR ConvModule.
In this module, we inherit default ``mmcv.cnn.ConvModule`` and adopt
equalized lr in convolution. The equalized learning rate is proposed in:
Progressive Growing of GANs for Improved Quality, Stability, and Variation
Note that, the initialization of ``self.conv`` will be overwritten as
:math:`\mathcal{N}(0, 1)`.
Args:
equalized_lr_cfg (dict | None, optional): Config for ``EqualizedLR``.
If ``None``, equalized learning rate is ignored. Defaults to
dict(mode='fan_in').
"""
def
__init__
(
self
,
*
args
,
equalized_lr_cfg
=
dict
(
mode
=
'fan_in'
),
**
kwargs
):
super
().
__init__
(
*
args
,
**
kwargs
)
self
.
with_equalized_lr
=
equalized_lr_cfg
is
not
None
if
self
.
with_equalized_lr
:
self
.
conv
=
equalized_lr
(
self
.
conv
,
**
equalized_lr_cfg
)
# initialize the conv weight with standard Gaussian noise.
self
.
_init_conv_weights
()
def
_init_conv_weights
(
self
):
"""Initialize conv weights as described in PGGAN."""
normal_init
(
self
.
conv
)
@
PLUGIN_LAYERS
.
register_module
()
class
EqualizedLRConvUpModule
(
EqualizedLRConvModule
):
r
"""Equalized LR (Upsample + Conv) Module.
In this module, we inherit ``EqualizedLRConvModule`` and adopt
upsampling before convolution. As for upsampling, in addition to the
sampling layer in MMCV, we also offer the "fused_nn" type. "fused_nn"
denotes fusing upsampling and convolution. The fusion is modified from
the official Tensorflow implementation in:
https://github.com/tkarras/progressive_growing_of_gans/blob/master/networks.py#L86
Args:
upsample (dict | None, optional): Config for upsampling operation. If
``None``, upsampling is ignored. If you need a faster fused version as
the official PGGAN in Tensorflow, you should set it as
``dict(type='fused_nn')``. Defaults to
``dict(type='nearest', scale_factor=2)``.
"""
def
__init__
(
self
,
*
args
,
upsample
=
dict
(
type
=
'nearest'
,
scale_factor
=
2
),
**
kwargs
):
super
().
__init__
(
*
args
,
**
kwargs
)
self
.
with_upsample
=
upsample
is
not
None
if
self
.
with_upsample
:
if
upsample
.
get
(
'type'
)
==
'fused_nn'
:
assert
isinstance
(
self
.
conv
,
nn
.
ConvTranspose2d
)
self
.
conv
.
register_forward_pre_hook
(
EqualizedLRConvUpModule
.
fused_nn_hook
)
else
:
self
.
upsample_layer
=
build_upsample_layer
(
upsample
)
def
forward
(
self
,
x
,
**
kwargs
):
"""Forward function.
Args:
x (Tensor): Input tensor with shape (n, c, h, w).
Returns:
Tensor: Forward results.
"""
if
hasattr
(
self
,
'upsample_layer'
):
x
=
self
.
upsample_layer
(
x
)
return
super
().
forward
(
x
,
**
kwargs
)
@
staticmethod
def
fused_nn_hook
(
module
,
inputs
):
"""Standard interface for forward pre hooks."""
weight
=
module
.
weight
# pad the last two dimensions
weight
=
F
.
pad
(
weight
,
(
1
,
1
,
1
,
1
))
weight
=
weight
[...,
1
:,
1
:]
+
weight
[...,
1
:,
:
-
1
]
+
weight
[
...,
:
-
1
,
1
:]
+
weight
[...,
:
-
1
,
:
-
1
]
module
.
weight
=
weight
@
PLUGIN_LAYERS
.
register_module
()
class
EqualizedLRConvDownModule
(
EqualizedLRConvModule
):
r
"""Equalized LR (Conv + Downsample) Module.
In this module, we inherit ``EqualizedLRConvModule`` and adopt
downsampling after convolution. As for downsampling, we provide two modes
of "avgpool" and "fused_pool". "avgpool" denotes the commonly used average
pooling operation, while "fused_pool" represents fusing downsampling and
convolution. The fusion is modified from the official Tensorflow
implementation in:
https://github.com/tkarras/progressive_growing_of_gans/blob/master/networks.py#L109
Args:
downsample (dict | None, optional): Config for downsampling operation.
If ``None``, downsampling is ignored. Currently, we support the
types of ["avgpool", "fused_pool"]. Defaults to
dict(type='fused_pool').
"""
def
__init__
(
self
,
*
args
,
downsample
=
dict
(
type
=
'fused_pool'
),
**
kwargs
):
super
().
__init__
(
*
args
,
**
kwargs
)
downsample_cfg
=
deepcopy
(
downsample
)
self
.
with_downsample
=
downsample
is
not
None
if
self
.
with_downsample
:
type_
=
downsample_cfg
.
pop
(
'type'
)
if
type_
==
'avgpool'
:
self
.
downsample
=
nn
.
AvgPool2d
(
2
,
2
)
elif
type_
==
'fused_pool'
:
self
.
conv
.
register_forward_pre_hook
(
EqualizedLRConvDownModule
.
fused_avgpool_hook
)
elif
callable
(
downsample
):
self
.
downsample
=
downsample
else
:
raise
NotImplementedError
(
'Currently, we only support ["avgpool", "fused_pool"] as '
f
'the type of downsample, but got
{
type_
}
instead.'
)
def
forward
(
self
,
x
,
**
kwargs
):
"""Forward function.
Args:
x (Tensor): Input tensor with shape (n, c, h, w).
Returns:
torch.Tensor: Normalized tensor.
"""
x
=
super
().
forward
(
x
,
**
kwargs
)
if
hasattr
(
self
,
'downsample'
):
x
=
self
.
downsample
(
x
)
return
x
@
staticmethod
def
fused_avgpool_hook
(
module
,
inputs
):
"""Standard interface for forward pre hooks."""
weight
=
module
.
weight
# pad the last two dimensions
weight
=
F
.
pad
(
weight
,
(
1
,
1
,
1
,
1
))
weight
=
(
weight
[...,
1
:,
1
:]
+
weight
[...,
1
:,
:
-
1
]
+
weight
[...,
:
-
1
,
1
:]
+
weight
[...,
:
-
1
,
:
-
1
])
*
0.25
module
.
weight
=
weight
@
PLUGIN_LAYERS
.
register_module
()
class
EqualizedLRLinearModule
(
nn
.
Linear
):
r
"""Equalized LR LinearModule.
In this module, we adopt equalized lr in ``nn.Linear``. The equalized
learning rate is proposed in:
Progressive Growing of GANs for Improved Quality, Stability, and Variation
Note that, the initialization of ``self.weight`` will be overwritten as
:math:`\mathcal{N}(0, 1)`.
Args:
equalized_lr_cfg (dict | None, optional): Config for ``EqualizedLR``.
If ``None``, equalized learning rate is ignored. Defaults to
dict(mode='fan_in').
"""
def
__init__
(
self
,
*
args
,
equalized_lr_cfg
=
dict
(
mode
=
'fan_in'
),
**
kwargs
):
super
().
__init__
(
*
args
,
**
kwargs
)
self
.
with_equalized_lr
=
equalized_lr_cfg
is
not
None
if
self
.
with_equalized_lr
:
self
.
lr_mul
=
equalized_lr_cfg
.
get
(
'lr_mul'
,
1.
)
else
:
# In fact, lr_mul will only be used in EqualizedLR for
# initialization
self
.
lr_mul
=
1.
if
self
.
with_equalized_lr
:
equalized_lr
(
self
,
**
equalized_lr_cfg
)
self
.
_init_linear_weights
()
def
_init_linear_weights
(
self
):
"""Initialize linear weights as described in PGGAN."""
nn
.
init
.
normal_
(
self
.
weight
,
0
,
1.
/
self
.
lr_mul
)
if
self
.
bias
is
not
None
:
nn
.
init
.
constant_
(
self
.
bias
,
0.
)
@
MODULES
.
register_module
()
class
PGGANNoiseTo2DFeat
(
nn
.
Module
):
def
__init__
(
self
,
noise_size
,
out_channels
,
act_cfg
=
dict
(
type
=
'LeakyReLU'
,
negative_slope
=
0.2
),
norm_cfg
=
dict
(
type
=
'PixelNorm'
),
normalize_latent
=
True
,
order
=
(
'linear'
,
'act'
,
'norm'
)):
super
().
__init__
()
self
.
noise_size
=
noise_size
self
.
out_channels
=
out_channels
self
.
normalize_latent
=
normalize_latent
self
.
with_activation
=
act_cfg
is
not
None
self
.
with_norm
=
norm_cfg
is
not
None
self
.
order
=
order
assert
len
(
order
)
==
3
and
set
(
order
)
==
set
([
'linear'
,
'act'
,
'norm'
])
# w/o bias, because the bias is added after reshaping the tensor to
# 2D feature
self
.
linear
=
EqualizedLRLinearModule
(
noise_size
,
out_channels
*
16
,
equalized_lr_cfg
=
dict
(
gain
=
np
.
sqrt
(
2
)
/
4
),
bias
=
False
)
if
self
.
with_activation
:
self
.
activation
=
build_activation_layer
(
act_cfg
)
# add bias for reshaped 2D feature.
self
.
register_parameter
(
'bias'
,
nn
.
Parameter
(
torch
.
zeros
(
1
,
out_channels
,
1
,
1
)))
if
self
.
with_norm
:
_
,
self
.
norm
=
build_norm_layer
(
norm_cfg
,
out_channels
)
def
forward
(
self
,
x
):
"""Forward function.
Args:
x (Tensor): Input noise tensor with shape (n, c).
Returns:
Tensor: Forward results with shape (n, c, 4, 4).
"""
assert
x
.
ndim
==
2
if
self
.
normalize_latent
:
x
=
pixel_norm
(
x
)
for
order
in
self
.
order
:
if
order
==
'linear'
:
x
=
self
.
linear
(
x
)
# [n, c, 4, 4]
x
=
torch
.
reshape
(
x
,
(
-
1
,
self
.
out_channels
,
4
,
4
))
x
=
x
+
self
.
bias
elif
order
==
'act'
and
self
.
with_activation
:
x
=
self
.
activation
(
x
)
elif
order
==
'norm'
and
self
.
with_norm
:
x
=
self
.
norm
(
x
)
return
x
class
PGGANDecisionHead
(
nn
.
Module
):
def
__init__
(
self
,
in_channels
,
mid_channels
,
out_channels
,
bias
=
True
,
equalized_lr_cfg
=
dict
(
gain
=
1
),
act_cfg
=
dict
(
type
=
'LeakyReLU'
,
negative_slope
=
0.2
),
out_act
=
None
):
super
().
__init__
()
self
.
in_channels
=
in_channels
self
.
mid_channels
=
mid_channels
self
.
out_channels
=
out_channels
self
.
with_activation
=
act_cfg
is
not
None
self
.
with_out_activation
=
out_act
is
not
None
# setup linear layers
# dirty code for supporting default mode in PGGAN
if
equalized_lr_cfg
:
equalized_lr_cfg_
=
dict
(
gain
=
2
**
0.5
)
else
:
equalized_lr_cfg_
=
None
self
.
linear0
=
EqualizedLRLinearModule
(
self
.
in_channels
,
self
.
mid_channels
,
bias
=
bias
,
equalized_lr_cfg
=
equalized_lr_cfg_
)
self
.
linear1
=
EqualizedLRLinearModule
(
self
.
mid_channels
,
self
.
out_channels
,
bias
=
bias
,
equalized_lr_cfg
=
equalized_lr_cfg
)
# setup activation layers
if
self
.
with_activation
:
self
.
activation
=
build_activation_layer
(
act_cfg
)
if
self
.
with_out_activation
:
self
.
out_activation
=
build_activation_layer
(
out_act
)
def
forward
(
self
,
x
):
"""Forward function.
Args:
x (Tensor): Input tensor with shape (n, c, h, w).
Returns:
Tensor: Forward results.
"""
if
x
.
ndim
>
2
:
x
=
torch
.
reshape
(
x
,
(
x
.
shape
[
0
],
-
1
))
x
=
self
.
linear0
(
x
)
if
self
.
with_activation
:
x
=
self
.
activation
(
x
)
x
=
self
.
linear1
(
x
)
if
self
.
with_out_activation
:
x
=
self
.
out_activation
(
x
)
return
x
@
MODULES
.
register_module
()
@
PLUGIN_LAYERS
.
register_module
()
class
MiniBatchStddevLayer
(
nn
.
Module
):
"""Minibatch standard deviation.
Args:
group_size (int, optional): The size of groups in batch dimension.
Defaults to 4.
eps (float, optional): Epsilon value to avoid computation error.
Defaults to 1e-8.
gather_all_batch (bool, optional): Whether gather batch from all GPUs.
Defaults to False.
"""
def
__init__
(
self
,
group_size
=
4
,
eps
=
1e-8
,
gather_all_batch
=
False
):
super
().
__init__
()
self
.
group_size
=
group_size
self
.
eps
=
eps
self
.
gather_all_batch
=
gather_all_batch
if
self
.
gather_all_batch
:
assert
torch
.
distributed
.
is_initialized
(
),
'Only in distributed training can the tensors be all gathered.'
def
forward
(
self
,
x
):
"""Forward function.
Args:
x (Tensor): Input tensor with shape (n, c, h, w).
Returns:
Tensor: Forward results.
"""
if
self
.
gather_all_batch
:
x
=
torch
.
cat
(
AllGatherLayer
.
apply
(
x
),
dim
=
0
)
# batch size should be smaller than or equal to group size. Otherwise,
# batch size should be divisible by the group size.
assert
x
.
shape
[
0
]
<=
self
.
group_size
or
x
.
shape
[
0
]
%
self
.
group_size
==
0
,
(
'Batch size be smaller than or equal '
'to group size. Otherwise,'
' batch size should be divisible by the group size.'
f
'But got batch size
{
x
.
shape
[
0
]
}
,'
f
' group size
{
self
.
group_size
}
'
)
n
,
c
,
h
,
w
=
x
.
shape
group_size
=
min
(
n
,
self
.
group_size
)
# [G, M, C, H, W]
y
=
torch
.
reshape
(
x
,
(
group_size
,
-
1
,
c
,
h
,
w
))
# [G, M, C, H, W]
y
=
y
-
y
.
mean
(
dim
=
0
,
keepdim
=
True
)
# In pt>=1.7, you can just use `.square()` function.
# [M, C, H, W]
y
=
y
.
pow
(
2
).
mean
(
dim
=
0
,
keepdim
=
False
)
y
=
torch
.
sqrt
(
y
+
self
.
eps
)
# [M, 1, 1, 1]
y
=
y
.
mean
(
dim
=
(
1
,
2
,
3
),
keepdim
=
True
)
y
=
y
.
repeat
(
group_size
,
1
,
h
,
w
)
return
torch
.
cat
([
x
,
y
],
dim
=
1
)
mmgen/models/architectures/pix2pix/__init__.py
0 → 100644
View file @
b7536f78
# Copyright (c) OpenMMLab. All rights reserved.
from
.generator_discriminator
import
PatchDiscriminator
,
UnetGenerator
from
.modules
import
UnetSkipConnectionBlock
,
generation_init_weights
__all__
=
[
'PatchDiscriminator'
,
'UnetGenerator'
,
'UnetSkipConnectionBlock'
,
'generation_init_weights'
]
mmgen/models/architectures/pix2pix/generator_discriminator.py
0 → 100644
View file @
b7536f78
# Copyright (c) OpenMMLab. All rights reserved.
import
torch.nn
as
nn
from
mmcv.cnn
import
ConvModule
,
build_conv_layer
from
mmcv.runner
import
load_checkpoint
from
mmgen.models.builder
import
MODULES
from
mmgen.utils
import
get_root_logger
from
.modules
import
UnetSkipConnectionBlock
,
generation_init_weights
@
MODULES
.
register_module
()
class
UnetGenerator
(
nn
.
Module
):
"""Construct the Unet-based generator from the innermost layer to the
outermost layer, which is a recursive process.
Args:
in_channels (int): Number of channels in input images.
out_channels (int): Number of channels in output images.
num_down (int): Number of downsamplings in Unet. If `num_down` is 8,
the image with size 256x256 will become 1x1 at the bottleneck.
Default: 8.
base_channels (int): Number of channels at the last conv layer.
Default: 64.
norm_cfg (dict): Config dict to build norm layer. Default:
`dict(type='BN')`.
use_dropout (bool): Whether to use dropout layers. Default: False.
init_cfg (dict): Config dict for initialization.
`type`: The name of our initialization method. Default: 'normal'.
`gain`: Scaling factor for normal, xavier and orthogonal.
Default: 0.02.
"""
def
__init__
(
self
,
in_channels
,
out_channels
,
num_down
=
8
,
base_channels
=
64
,
norm_cfg
=
dict
(
type
=
'BN'
),
use_dropout
=
False
,
init_cfg
=
dict
(
type
=
'normal'
,
gain
=
0.02
)):
super
().
__init__
()
# We use norm layers in the unet generator.
assert
isinstance
(
norm_cfg
,
dict
),
(
"'norm_cfg' should be dict, but"
f
'got
{
type
(
norm_cfg
)
}
'
)
assert
'type'
in
norm_cfg
,
"'norm_cfg' must have key 'type'"
# add the innermost layer
unet_block
=
UnetSkipConnectionBlock
(
base_channels
*
8
,
base_channels
*
8
,
in_channels
=
None
,
submodule
=
None
,
norm_cfg
=
norm_cfg
,
is_innermost
=
True
)
# add intermediate layers with base_channels * 8 filters
for
_
in
range
(
num_down
-
5
):
unet_block
=
UnetSkipConnectionBlock
(
base_channels
*
8
,
base_channels
*
8
,
in_channels
=
None
,
submodule
=
unet_block
,
norm_cfg
=
norm_cfg
,
use_dropout
=
use_dropout
)
# gradually reduce the number of filters
# from base_channels * 8 to base_channels
unet_block
=
UnetSkipConnectionBlock
(
base_channels
*
4
,
base_channels
*
8
,
in_channels
=
None
,
submodule
=
unet_block
,
norm_cfg
=
norm_cfg
)
unet_block
=
UnetSkipConnectionBlock
(
base_channels
*
2
,
base_channels
*
4
,
in_channels
=
None
,
submodule
=
unet_block
,
norm_cfg
=
norm_cfg
)
unet_block
=
UnetSkipConnectionBlock
(
base_channels
,
base_channels
*
2
,
in_channels
=
None
,
submodule
=
unet_block
,
norm_cfg
=
norm_cfg
)
# add the outermost layer
self
.
model
=
UnetSkipConnectionBlock
(
out_channels
,
base_channels
,
in_channels
=
in_channels
,
submodule
=
unet_block
,
is_outermost
=
True
,
norm_cfg
=
norm_cfg
)
self
.
init_type
=
'normal'
if
init_cfg
is
None
else
init_cfg
.
get
(
'type'
,
'normal'
)
self
.
init_gain
=
0.02
if
init_cfg
is
None
else
init_cfg
.
get
(
'gain'
,
0.02
)
def
forward
(
self
,
x
):
"""Forward function.
Args:
x (Tensor): Input tensor with shape (n, c, h, w).
Returns:
Tensor: Forward results.
"""
return
self
.
model
(
x
)
def
init_weights
(
self
,
pretrained
=
None
,
strict
=
True
):
"""Initialize weights for the model.
Args:
pretrained (str, optional): Path for pretrained weights. If given
None, pretrained weights will not be loaded. Default: None.
strict (bool, optional): Whether to allow different params for the
model and checkpoint. Default: True.
"""
if
isinstance
(
pretrained
,
str
):
logger
=
get_root_logger
()
load_checkpoint
(
self
,
pretrained
,
strict
=
strict
,
logger
=
logger
)
elif
pretrained
is
None
:
generation_init_weights
(
self
,
init_type
=
self
.
init_type
,
init_gain
=
self
.
init_gain
)
else
:
raise
TypeError
(
"'pretrained' must be a str or None. "
f
'But received
{
type
(
pretrained
)
}
.'
)
@
MODULES
.
register_module
()
class
PatchDiscriminator
(
nn
.
Module
):
"""A PatchGAN discriminator.
Args:
in_channels (int): Number of channels in input images.
base_channels (int): Number of channels at the first conv layer.
Default: 64.
num_conv (int): Number of stacked intermediate convs (excluding input
and output conv). Default: 3.
norm_cfg (dict): Config dict to build norm layer. Default:
`dict(type='BN')`.
init_cfg (dict): Config dict for initialization.
`type`: The name of our initialization method. Default: 'normal'.
`gain`: Scaling factor for normal, xavier and orthogonal.
Default: 0.02.
"""
def
__init__
(
self
,
in_channels
,
base_channels
=
64
,
num_conv
=
3
,
norm_cfg
=
dict
(
type
=
'BN'
),
init_cfg
=
dict
(
type
=
'normal'
,
gain
=
0.02
)):
super
().
__init__
()
assert
isinstance
(
norm_cfg
,
dict
),
(
"'norm_cfg' should be dict, but"
f
'got
{
type
(
norm_cfg
)
}
'
)
assert
'type'
in
norm_cfg
,
"'norm_cfg' must have key 'type'"
# We use norm layers in the patch discriminator.
# Only for IN, use bias since it does not have affine parameters.
use_bias
=
norm_cfg
[
'type'
]
==
'IN'
kernel_size
=
4
padding
=
1
# input layer
sequence
=
[
ConvModule
(
in_channels
=
in_channels
,
out_channels
=
base_channels
,
kernel_size
=
kernel_size
,
stride
=
2
,
padding
=
padding
,
bias
=
True
,
norm_cfg
=
None
,
act_cfg
=
dict
(
type
=
'LeakyReLU'
,
negative_slope
=
0.2
))
]
# stacked intermediate layers,
# gradually increasing the number of filters
multiple_now
=
1
multiple_prev
=
1
for
n
in
range
(
1
,
num_conv
):
multiple_prev
=
multiple_now
multiple_now
=
min
(
2
**
n
,
8
)
sequence
+=
[
ConvModule
(
in_channels
=
base_channels
*
multiple_prev
,
out_channels
=
base_channels
*
multiple_now
,
kernel_size
=
kernel_size
,
stride
=
2
,
padding
=
padding
,
bias
=
use_bias
,
norm_cfg
=
norm_cfg
,
act_cfg
=
dict
(
type
=
'LeakyReLU'
,
negative_slope
=
0.2
))
]
multiple_prev
=
multiple_now
multiple_now
=
min
(
2
**
num_conv
,
8
)
sequence
+=
[
ConvModule
(
in_channels
=
base_channels
*
multiple_prev
,
out_channels
=
base_channels
*
multiple_now
,
kernel_size
=
kernel_size
,
stride
=
1
,
padding
=
padding
,
bias
=
use_bias
,
norm_cfg
=
norm_cfg
,
act_cfg
=
dict
(
type
=
'LeakyReLU'
,
negative_slope
=
0.2
))
]
# output one-channel prediction map
sequence
+=
[
build_conv_layer
(
dict
(
type
=
'Conv2d'
),
base_channels
*
multiple_now
,
1
,
kernel_size
=
kernel_size
,
stride
=
1
,
padding
=
padding
)
]
self
.
model
=
nn
.
Sequential
(
*
sequence
)
self
.
init_type
=
'normal'
if
init_cfg
is
None
else
init_cfg
.
get
(
'type'
,
'normal'
)
self
.
init_gain
=
0.02
if
init_cfg
is
None
else
init_cfg
.
get
(
'gain'
,
0.02
)
def
forward
(
self
,
x
):
"""Forward function.
Args:
x (Tensor): Input tensor with shape (n, c, h, w).
Returns:
Tensor: Forward results.
"""
return
self
.
model
(
x
)
def
init_weights
(
self
,
pretrained
=
None
):
"""Initialize weights for the model.
Args:
pretrained (str, optional): Path for pretrained weights. If given
None, pretrained weights will not be loaded. Default: None.
"""
if
isinstance
(
pretrained
,
str
):
logger
=
get_root_logger
()
load_checkpoint
(
self
,
pretrained
,
strict
=
False
,
logger
=
logger
)
elif
pretrained
is
None
:
generation_init_weights
(
self
,
init_type
=
self
.
init_type
,
init_gain
=
self
.
init_gain
)
else
:
raise
TypeError
(
"'pretrained' must be a str or None. "
f
'But received
{
type
(
pretrained
)
}
.'
)
Prev
1
2
3
4
5
6
7
8
…
10
Next
Write
Preview
Markdown
is supported
0%
Try again
or
attach a new file
.
Attach a file
Cancel
You are about to add
0
people
to the discussion. Proceed with caution.
Finish editing this message first!
Cancel
Please
register
or
sign in
to comment