stylegan2_mmcv

1401de15 · dongchy920 · 1401de15 · 1401de15 · 1401de15 · 1401de15
Commit 1401de15 authored Jun 28, 2024 by dongchy920
20 changed files
--- a/build/lib/mmgen/models/architectures/arcface/__init__.py
+++ b/build/lib/mmgen/models/architectures/arcface/__init__.py
+# Copyright (c) OpenMMLab. All rights reserved.
+from .id_loss import IDLossModel
+
+__all__ = ['IDLossModel']
--- a/build/lib/mmgen/models/architectures/arcface/helpers.py
+++ b/build/lib/mmgen/models/architectures/arcface/helpers.py
+# Copyright (c) OpenMMLab. All rights reserved.
+from collections import namedtuple
+
+import torch
+from torch.nn import (AdaptiveAvgPool2d, BatchNorm2d, Conv2d, MaxPool2d,
+                      Module, PReLU, ReLU, Sequential, Sigmoid)
+
+# yapf: disable
+"""
+ArcFace implementation from [TreB1eN](https://github.com/TreB1eN/InsightFace_Pytorch) # isort:skip  # noqa
+"""
+# yapf: enable
+
+
+class Flatten(Module):
+    """Flatten Module."""
+
+    def forward(self, input):
+        return input.view(input.size(0), -1)
+
+
+def l2_norm(input, axis=1):
+    """l2 normalization.
+
+    Args:
+        input (torch.Tensor): The input tensor.
+        axis (int, optional): Specifies which axis of input to calculate the
+            norm across. Defaults to 1.
+
+    Returns:
+        Tensor: Tensor after L2 normalization per-instance.
+    """
+    norm = torch.norm(input, 2, axis, True)
+    output = torch.div(input, norm)
+    return output
+
+
+class Bottleneck(namedtuple('Block', ['in_channel', 'depth', 'stride'])):
+    """A named tuple describing a ResNet block."""
+
+
+def get_block(in_channel, depth, num_units, stride=2):
+    """Get a single block config.
+
+    Args:
+        in_channel (int): Input channels.
+        depth (int): Output channels.
+        num_units (int): Number of unit modules.
+        stride (int, optional): Conv2d stride. Defaults to 2.
+
+    Returns:
+        list: A list of unit modules' config.
+    """
+    return [Bottleneck(in_channel, depth, stride)
+            ] + [Bottleneck(depth, depth, 1) for i in range(num_units - 1)]
+
+
+def get_blocks(num_layers):
+    """Get block configs of backbone.
+
+    Args:
+        num_layers (int): Number of ConvBlock layers in backbone.
+
+    Raises:
+        ValueError: `num_layers` must be one of [50, 100, 152].
+
+    Returns:
+        list: A list of block configs.
+    """
+    if num_layers == 50:
+        blocks = [
+            get_block(in_channel=64, depth=64, num_units=3),
+            get_block(in_channel=64, depth=128, num_units=4),
+            get_block(in_channel=128, depth=256, num_units=14),
+            get_block(in_channel=256, depth=512, num_units=3)
+        ]
+    elif num_layers == 100:
+        blocks = [
+            get_block(in_channel=64, depth=64, num_units=3),
+            get_block(in_channel=64, depth=128, num_units=13),
+            get_block(in_channel=128, depth=256, num_units=30),
+            get_block(in_channel=256, depth=512, num_units=3)
+        ]
+    elif num_layers == 152:
+        blocks = [
+            get_block(in_channel=64, depth=64, num_units=3),
+            get_block(in_channel=64, depth=128, num_units=8),
+            get_block(in_channel=128, depth=256, num_units=36),
+            get_block(in_channel=256, depth=512, num_units=3)
+        ]
+    else:
+        raise ValueError(
+            'Invalid number of layers: {}. Must be one of [50, 100, 152]'.
+            format(num_layers))
+    return blocks
+
+
+class SEModule(Module):
+    """Squeeze-and-Excitation Modules.
+
+    Args:
+        channels (int): Input channels.
+        reduction (int): Intermediate channels reduction ratio.
+    """
+
+    def __init__(self, channels, reduction):
+        super(SEModule, self).__init__()
+        self.avg_pool = AdaptiveAvgPool2d(1)
+        self.fc1 = Conv2d(
+            channels,
+            channels // reduction,
+            kernel_size=1,
+            padding=0,
+            bias=False)
+        self.relu = ReLU(inplace=True)
+        self.fc2 = Conv2d(
+            channels // reduction,
+            channels,
+            kernel_size=1,
+            padding=0,
+            bias=False)
+        self.sigmoid = Sigmoid()
+
+    def forward(self, x):
+        """Forward Function."""
+        module_input = x
+        x = self.avg_pool(x)
+        x = self.fc1(x)
+        x = self.relu(x)
+        x = self.fc2(x)
+        x = self.sigmoid(x)
+        return module_input * x
+
+
+class bottleneck_IR(Module):
+    """Intermediate Resblock of bottleneck.
+
+    Args:
+        in_channel (int): Input channels.
+        depth (int): Output channels.
+        stride (int): Conv2d stride.
+    """
+
+    def __init__(self, in_channel, depth, stride):
+        """Intermediate Resblock of bottleneck.
+
+        Args:
+            in_channel (int): Input channels.
+            depth (int): Output channels.
+            stride (int): Conv2d stride.
+        """
+        super(bottleneck_IR, self).__init__()
+        if in_channel == depth:
+            self.shortcut_layer = MaxPool2d(1, stride)
+        else:
+            self.shortcut_layer = Sequential(
+                Conv2d(in_channel, depth, (1, 1), stride, bias=False),
+                BatchNorm2d(depth))
+        self.res_layer = Sequential(
+            BatchNorm2d(in_channel),
+            Conv2d(in_channel, depth, (3, 3), (1, 1), 1, bias=False),
+            PReLU(depth), Conv2d(depth, depth, (3, 3), stride, 1, bias=False),
+            BatchNorm2d(depth))
+
+    def forward(self, x):
+        """Forward function."""
+        shortcut = self.shortcut_layer(x)
+        res = self.res_layer(x)
+        return res + shortcut
+
+
+class bottleneck_IR_SE(Module):
+    """Intermediate Resblock of bottleneck with SEModule.
+
+    Args:
+        in_channel (int): Input channels.
+        depth (int): Output channels.
+        stride (int): Conv2d stride.
+    """
+
+    def __init__(self, in_channel, depth, stride):
+        super(bottleneck_IR_SE, self).__init__()
+        if in_channel == depth:
+            self.shortcut_layer = MaxPool2d(1, stride)
+        else:
+            self.shortcut_layer = Sequential(
+                Conv2d(in_channel, depth, (1, 1), stride, bias=False),
+                BatchNorm2d(depth))
+        self.res_layer = Sequential(
+            BatchNorm2d(in_channel),
+            Conv2d(in_channel, depth, (3, 3), (1, 1), 1, bias=False),
+            PReLU(depth), Conv2d(depth, depth, (3, 3), stride, 1, bias=False),
+            BatchNorm2d(depth), SEModule(depth, 16))
+
+    def forward(self, x):
+        """Forward function."""
+        shortcut = self.shortcut_layer(x)
+        res = self.res_layer(x)
+        return res + shortcut
--- a/build/lib/mmgen/models/architectures/arcface/id_loss.py
+++ b/build/lib/mmgen/models/architectures/arcface/id_loss.py
+# Copyright (c) OpenMMLab. All rights reserved.
+import mmcv
+import torch
+from torch import nn
+
+from mmgen.models.builder import MODULES
+from .model_irse import Backbone
+
+
+@MODULES.register_module('ArcFace')
+class IDLossModel(nn.Module):
+    # ir se50 weight download link
+    _ir_se50_url = 'https://gg0ltg.by.files.1drv.com/y4m3fNNszG03z9n8JQ7EhdtQKW8tQVQMFBisPVRgoXi_UfP8pKSSqv8RJNmHy2JampcPmEazo_Mx6NTFSqBpZmhPniROm9uNoghnzaavvYpxkCfiNmDH9YyIF3g-0nwt6bsjk2X80JDdL5z88OAblSDmB-kuQkWSWvA9BM3Xt8DHMCY8lO4HOQCZ5YWUtFyPAVwEyzTGDM-JRA5EJoN2bF1cg'  # noqa
+
+    def __init__(self, ir_se50_weights=None, device='cuda'):
+        super(IDLossModel, self).__init__()
+        mmcv.print_log('Loading ResNet ArcFace', 'mmgen')
+        self.facenet = Backbone(
+            input_size=112, num_layers=50, drop_ratio=0.6, mode='ir_se')
+        if ir_se50_weights is None:
+            ir_se50_weights = self._ir_se50_url
+        self.facenet.load_state_dict(
+            torch.hub.load_state_dict_from_url(ir_se50_weights))
+        self.pool = torch.nn.AdaptiveAvgPool2d((256, 256))
+        self.face_pool = torch.nn.AdaptiveAvgPool2d((112, 112))
+        self.facenet = self.facenet.eval().to(device)
+
+    def extract_feats(self, x):
+        if x.shape[2] != 256:
+            x = self.pool(x)
+        x = x[:, :, 35:223, 32:220]  # Crop interesting region
+        x = self.face_pool(x)
+        x_feats = self.facenet(x)
+        return x_feats
+
+    def forward(self, pred=None, gt=None):
+        n_samples = gt.shape[0]
+        y_feats = self.extract_feats(
+            gt)  # Otherwise use the feature from there
+        y_hat_feats = self.extract_feats(pred)
+        y_feats = y_feats.detach()
+        loss = 0
+        sim_improvement = 0
+        count = 0
+        for i in range(n_samples):
+            diff_target = y_hat_feats[i].dot(y_feats[i])
+            loss += 1 - diff_target
+            count += 1
+
+        return loss / count, sim_improvement / count
--- a/build/lib/mmgen/models/architectures/arcface/model_irse.py
+++ b/build/lib/mmgen/models/architectures/arcface/model_irse.py
+# Copyright (c) OpenMMLab. All rights reserved.
+from torch.nn import (BatchNorm1d, BatchNorm2d, Conv2d, Dropout, Linear,
+                      Module, PReLU, Sequential)
+
+from .helpers import (Flatten, bottleneck_IR, bottleneck_IR_SE, get_blocks,
+                      l2_norm)
+
+# yapf: disable
+"""
+Modified Backbone implementation from [TreB1eN](https://github.com/TreB1eN/InsightFace_Pytorch) # isort:skip  # noqa
+"""
+# yapf: enable
+
+
+class Backbone(Module):
+    ''' Arcface backbone.
+    There are many repos follow this codes for facial recognition, and we also
+    follow this routine.
+    Ref: https://github.com/orpatashnik/StyleCLIP/blob/main/models/facial_recognition/helpers.py # noqa
+
+    Args:
+        input_size (int): Input size of image.
+        num_layers (int): Number of layer in backbone.
+        mode (str, optional): Bottle neck mode. If set to 'ir_se', then
+            SEModule will be applied. Defaults to 'ir'.
+        drop_ratio (float, optional): Drop out ratio. Defaults to 0.4.
+        affine (bool, optional): Whether use affine in BatchNorm1d.
+            Defaults to True.
+    '''
+
+    def __init__(self,
+                 input_size,
+                 num_layers,
+                 mode='ir',
+                 drop_ratio=0.4,
+                 affine=True):
+        super(Backbone, self).__init__()
+        assert input_size in [112, 224], 'input_size should be 112 or 224'
+        assert num_layers in [50, 100,
+                              152], 'num_layers should be 50, 100 or 152'
+        assert mode in ['ir', 'ir_se'], 'mode should be ir or ir_se'
+        blocks = get_blocks(num_layers)
+        if mode == 'ir':
+            unit_module = bottleneck_IR
+        elif mode == 'ir_se':
+            unit_module = bottleneck_IR_SE
+        self.input_layer = Sequential(
+            Conv2d(3, 64, (3, 3), 1, 1, bias=False), BatchNorm2d(64),
+            PReLU(64))
+        if input_size == 112:
+            self.output_layer = Sequential(
+                BatchNorm2d(512), Dropout(drop_ratio), Flatten(),
+                Linear(512 * 7 * 7, 512), BatchNorm1d(512, affine=affine))
+        else:
+            self.output_layer = Sequential(
+                BatchNorm2d(512), Dropout(drop_ratio), Flatten(),
+                Linear(512 * 14 * 14, 512), BatchNorm1d(512, affine=affine))
+
+        modules = []
+        for block in blocks:
+            for bottleneck in block:
+                modules.append(
+                    unit_module(bottleneck.in_channel, bottleneck.depth,
+                                bottleneck.stride))
+        self.body = Sequential(*modules)
+
+    def forward(self, x):
+        """Forward function."""
+        x = self.input_layer(x)
+        x = self.body(x)
+        x = self.output_layer(x)
+        return l2_norm(x)
+
+
+def IR_50(input_size):
+    """Constructs a ir-50 model."""
+    model = Backbone(
+        input_size, num_layers=50, mode='ir', drop_ratio=0.4, affine=False)
+    return model
+
+
+def IR_101(input_size):
+    """Constructs a ir-101 model."""
+    model = Backbone(
+        input_size, num_layers=100, mode='ir', drop_ratio=0.4, affine=False)
+    return model
+
+
+def IR_152(input_size):
+    """Constructs a ir-152 model."""
+    model = Backbone(
+        input_size, num_layers=152, mode='ir', drop_ratio=0.4, affine=False)
+    return model
+
+
+def IR_SE_50(input_size):
+    """Constructs a ir_se-50 model."""
+    model = Backbone(
+        input_size, num_layers=50, mode='ir_se', drop_ratio=0.4, affine=False)
+    return model
+
+
+def IR_SE_101(input_size):
+    """Constructs a ir_se-101 model."""
+    model = Backbone(
+        input_size, num_layers=100, mode='ir_se', drop_ratio=0.4, affine=False)
+    return model
+
+
+def IR_SE_152(input_size):
+    """Constructs a ir_se-152 model."""
+    model = Backbone(
+        input_size, num_layers=152, mode='ir_se', drop_ratio=0.4, affine=False)
+    return model
--- a/build/lib/mmgen/models/architectures/biggan/__init__.py
+++ b/build/lib/mmgen/models/architectures/biggan/__init__.py
+# Copyright (c) OpenMMLab. All rights reserved.
+from .generator_discriminator import BigGANDiscriminator, BigGANGenerator
+from .generator_discriminator_deep import (BigGANDeepDiscriminator,
+                                           BigGANDeepGenerator)
+from .modules import (BigGANConditionBN, BigGANDeepDiscResBlock,
+                      BigGANDeepGenResBlock, BigGANDiscResBlock,
+                      BigGANGenResBlock, SelfAttentionBlock, SNConvModule)
+
+__all__ = [
+    'BigGANGenerator', 'BigGANGenResBlock', 'BigGANConditionBN',
+    'BigGANDiscriminator', 'SelfAttentionBlock', 'BigGANDiscResBlock',
+    'BigGANDeepDiscriminator', 'BigGANDeepGenerator', 'BigGANDeepDiscResBlock',
+    'BigGANDeepGenResBlock', 'SNConvModule'
+]
--- a/build/lib/mmgen/models/architectures/biggan/biggan_snmodule.py
+++ b/build/lib/mmgen/models/architectures/biggan/biggan_snmodule.py
+# Copyright (c) OpenMMLab. All rights reserved.
+import torch
+import torch.nn as nn
+import torch.nn.functional as F
+
+# yapf:disable
+'''
+    Ref: Functions in this file are borrowed from https://github.com/ajbrock/BigGAN-PyTorch/blob/master/layers.py # noqa
+'''
+# yapf:enable
+
+
+def proj(x, y):
+    """Calculate Projection of x onto y.
+
+    Args:
+        x (torch.Tensor): Projection vector x.
+        y (torch.Tensor): Direction vector y.
+
+    Returns:
+        torch.Tensor: Projection of x onto y.
+    """
+    return torch.mm(y, x.t()) * y / torch.mm(y, y.t())
+
+
+def gram_schmidt(x, ys):
+    """Orthogonalize x w.r.t list of vectors ys.
+
+    Args:
+        x (torch.Tensor): Vector to be added into the
+            orthogonal vectors.
+        ys (list[torch.Tensor]): A set of orthogonal vectors.
+
+    Returns:
+        torch.Tensor: Result of Gram–Schmidt orthogonalization.
+    """
+    for y in ys:
+        x = x - proj(x, y)
+    return x
+
+
+@torch.no_grad()
+def power_iteration(weight, u_list, update=True, eps=1e-12):
+    """Power iteration method for calculating spectral norm.
+
+    Args:
+        weight (torch.Tensor): Module weight.
+        u_list (list[torch.Tensor]): list of left singular vector.
+            The length of list equals to the simulation times.
+        update (bool, optional): Whether update left singular
+            vector. Defaults to True.
+        eps (float, optional): Vector Normalization epsilon.
+            Defaults to 1e-12.
+
+    Returns:
+        tuple[list[tensor.Tensor]]: Tuple consist of three lists
+            which contain singular values, left singular
+            vector and right singular vector respectively.
+    """
+    us, vs, svs = [], [], []
+    for i, u in enumerate(u_list):
+        v = torch.matmul(u, weight)
+        v = F.normalize(gram_schmidt(v, vs), eps=eps)
+        vs += [v]
+        u = torch.matmul(v, weight.t())
+        u = F.normalize(gram_schmidt(u, us), eps=eps)
+        us += [u]
+        if update:
+            u_list[i][:] = u
+        svs += [
+            torch.squeeze(torch.matmul(torch.matmul(v, weight.t()), u.t()))
+        ]
+    return svs, us, vs
+
+
+class SpectralNorm(object):
+    """Spectral normalization base class.
+
+    Args:
+        num_svs (int): Number of singular values.
+        num_iters (int): Number of power iterations per step.
+        num_outputs (int): Number of output channels.
+        transpose (bool, optional): If set to `True`, weight
+            matrix will be transposed before power iteration.
+            Defaults to False.
+        eps (float, optional): Vector Normalization epsilon for
+            avoiding divide by zero. Defaults to 1e-12.
+    """
+
+    def __init__(self,
+                 num_svs,
+                 num_iters,
+                 num_outputs,
+                 transpose=False,
+                 eps=1e-12):
+        self.num_iters = num_iters
+        self.num_svs = num_svs
+        self.transpose = transpose
+        self.eps = eps
+        # Register a singular vector for each sv
+        for i in range(self.num_svs):
+            self.register_buffer('u%d' % i, torch.randn(1, num_outputs))
+            self.register_buffer('sv%d' % i, torch.ones(1))
+
+    @property
+    def u(self):
+        """Get left singular vectors."""
+        return [getattr(self, 'u%d' % i) for i in range(self.num_svs)]
+
+    @property
+    def sv(self):
+        """Get singular values."""
+        return [getattr(self, 'sv%d' % i) for i in range(self.num_svs)]
+
+    def sn_weight(self):
+        """Compute the spectrally-normalized weight."""
+        W_mat = self.weight.view(self.weight.size(0), -1)
+        if self.transpose:
+            W_mat = W_mat.t()
+        # Apply num_iters power iterations
+        for _ in range(self.num_iters):
+            svs, us, vs = power_iteration(
+                W_mat, self.u, update=self.training, eps=self.eps)
+        # Update the svs
+        if self.training:
+            with torch.no_grad():
+                for i, sv in enumerate(svs):
+                    self.sv[i][:] = sv
+        return self.weight / svs[-1]
+
+
+class SNConv2d(nn.Conv2d, SpectralNorm):
+    """2D Conv layer with spectral norm.
+
+    Args:
+        in_channels (int): Number of channels in the input feature map.
+        out_channels (int): Number of channels produced by the convolution.
+        kernel_size (int): Size of the convolving kernel.
+        stride (int, optional): Stride of the convolution.. Defaults to 1.
+        padding (int, optional): Zero-padding added to both sides of
+            the input. Defaults to 0.
+        dilation (int, optional): Spacing between kernel elements.
+            Defaults to 1.
+        groups (int, optional): Number of blocked connections from input
+            channels to output channels. Defaults to 1.
+        bias (bool, optional): Whether to use bias parameter.
+            Defaults to True.
+        num_svs (int): Number of singular values.
+        num_iters (int): Number of power iterations per step.
+        eps (float, optional): Vector Normalization epsilon for
+            avoiding divide by zero. Defaults to 1e-12.
+    """
+
+    def __init__(self,
+                 in_channels,
+                 out_channels,
+                 kernel_size,
+                 stride=1,
+                 padding=0,
+                 dilation=1,
+                 groups=1,
+                 bias=True,
+                 num_svs=1,
+                 num_iters=1,
+                 eps=1e-12):
+        nn.Conv2d.__init__(self, in_channels, out_channels, kernel_size,
+                           stride, padding, dilation, groups, bias)
+        SpectralNorm.__init__(self, num_svs, num_iters, out_channels, eps=eps)
+
+    def forward(self, x):
+        """Forward function."""
+        return F.conv2d(x, self.sn_weight(), self.bias, self.stride,
+                        self.padding, self.dilation, self.groups)
+
+
+class SNLinear(nn.Linear, SpectralNorm):
+    """Linear layer with spectral norm.
+
+    Args:
+        in_features (int): Number of channels in the input feature.
+        out_features (int): Number of channels in the out feature.
+        bias (bool, optional):  Whether to use bias parameter.
+            Defaults to True.
+        num_svs (int): Number of singular values.
+        num_iters (int): Number of power iterations per step.
+        eps (float, optional): Vector Normalization epsilon for
+            avoiding divide by zero. Defaults to 1e-12.
+    """
+
+    def __init__(self,
+                 in_features,
+                 out_features,
+                 bias=True,
+                 num_svs=1,
+                 num_iters=1,
+                 eps=1e-12):
+        nn.Linear.__init__(self, in_features, out_features, bias)
+        SpectralNorm.__init__(self, num_svs, num_iters, out_features, eps=eps)
+
+    def forward(self, x):
+        """Forward function."""
+        return F.linear(x, self.sn_weight(), self.bias)
+
+
+# We use num_embeddings as the dim instead of embedding_dim here
+# for convenience sake
+class SNEmbedding(nn.Embedding, SpectralNorm):
+    """Embedding layer with spectral norm.
+
+    Args:
+        num_embeddings (int): Size of the dictionary of embeddings.
+        embedding_dim (int): The size of each embedding vector.
+        padding_idx (int, optional):  If specified, the entries at
+            padding_idx do not contribute to the gradient; therefore,
+            the embedding vector at padding_idx is not updated during
+            training, i.e. it remains as a fixed “pad”. For a newly
+            constructed Embedding, the embedding vector at padding_idx
+            will default to all zeros, but can be updated to another value
+            to be used as the padding vector. Defaults to None.
+        max_norm (float, optional): If given, each embedding vector with
+            norm larger than max_norm is renormalized to have norm
+            max_norm. Defaults to None.
+        norm_type (int, optional):  The p of the p-norm to compute for
+            the max_norm option. Default 2.
+        scale_grad_by_freq (bool, optional): If given, this will scale
+            gradients by the inverse of frequency of the words in the
+            mini-batch. Default False.
+        sparse (bool, optional):  If True, gradient w.r.t. weight matrix
+            will be a sparse tensor. See Notes for more details regarding
+            sparse gradients. Defaults to False.
+        _weight (torch.Tensor, optional): Initial Weight. Defaults to None.
+        num_svs (int): Number of singular values.
+        num_iters (int): Number of power iterations per step.
+        eps (float, optional): Vector Normalization epsilon for
+            avoiding divide by zero. Defaults to 1e-12.
+    """
+
+    def __init__(self,
+                 num_embeddings,
+                 embedding_dim,
+                 padding_idx=None,
+                 max_norm=None,
+                 norm_type=2,
+                 scale_grad_by_freq=False,
+                 sparse=False,
+                 _weight=None,
+                 num_svs=1,
+                 num_iters=1,
+                 eps=1e-12):
+        nn.Embedding.__init__(self, num_embeddings, embedding_dim, padding_idx,
+                              max_norm, norm_type, scale_grad_by_freq, sparse,
+                              _weight)
+        SpectralNorm.__init__(
+            self, num_svs, num_iters, num_embeddings, eps=eps)
+
+    def forward(self, x):
+        """Forward function."""
+        return F.embedding(x, self.sn_weight())
--- a/build/lib/mmgen/models/architectures/biggan/generator_discriminator.py
+++ b/build/lib/mmgen/models/architectures/biggan/generator_discriminator.py
+# Copyright (c) OpenMMLab. All rights reserved.
+from copy import deepcopy
+
+import mmcv
+import torch
+import torch.nn as nn
+from mmcv.cnn import normal_init, xavier_init
+from mmcv.cnn.bricks import build_activation_layer
+from mmcv.runner import load_checkpoint
+from mmcv.runner.checkpoint import _load_checkpoint_with_prefix
+from torch.nn.utils import spectral_norm
+
+from mmgen.models.builder import MODULES, build_module
+from mmgen.utils import get_root_logger
+from ..common import get_module_device
+from .biggan_snmodule import SNEmbedding, SNLinear
+from .modules import SelfAttentionBlock, SNConvModule
+
+
+@MODULES.register_module()
+class BigGANGenerator(nn.Module):
+    """BigGAN Generator. The implementation refers to
+    https://github.com/ajbrock/BigGAN-PyTorch/blob/master/BigGAN.py # noqa.
+
+    In BigGAN, we use a SAGAN-based architecture composing of an self-attention
+    block and number of convolutional residual blocks with spectral
+    normalization.
+
+    More details can be found in: Large Scale GAN Training for High Fidelity
+    Natural Image Synthesis (ICLR2019).
+
+    The design of the model structure is highly corresponding to the output
+    resolution. For the original BigGAN's generator, you can set ``output_scale``
+    as you need and use the default value of ``arch_cfg`` and ``blocks_cfg``.
+    If you want to customize the model, you can set the arguments in this way:
+
+    ``arch_cfg``: Config for the architecture of this generator. You can refer
+    the ``_default_arch_cfgs`` in the ``_get_default_arch_cfg`` function to see
+    the format of the ``arch_cfg``. Basically, you need to provide information
+    of each block such as the numbers of input and output channels, whether to
+    perform upsampling, etc.
+
+    ``blocks_cfg``: Config for the convolution block. You can replace the block
+    type to your registered customized block and adjust block params here.
+    However, you should notice that some params are shared among these blocks
+    like ``act_cfg``, ``with_spectral_norm``, ``sn_eps``, etc.
+
+    Args:
+        output_scale (int): Output scale for the generated image.
+        noise_size (int, optional): Size of the input noise vector. Defaults
+            to 120.
+        num_classes (int, optional): The number of conditional classes. If set
+            to 0, this model will be degraded to an unconditional model.
+            Defaults to 0.
+        out_channels (int, optional): Number of channels in output images.
+            Defaults to 3.
+        base_channels (int, optional): The basic channel number of the
+            generator. The other layers contains channels based on this number.
+            Defaults to 96.
+        input_scale (int, optional): The scale of the input 2D feature map.
+            Defaults to 4.
+        with_shared_embedding (bool, optional): Whether to use shared
+            embedding. Defaults to True.
+        shared_dim (int, optional): The output channels of shared embedding.
+            Defaults to 128.
+        sn_eps (float, optional): Epsilon value for spectral normalization.
+            Defaults to 1e-6.
+        sn_style (str, optional): The style of spectral normalization.
+            If set to `ajbrock`, implementation by
+            ajbrock(https://github.com/ajbrock/BigGAN-PyTorch/blob/master/layers.py)
+            will be adopted.
+            If set to `torch`, implementation by `PyTorch` will be adopted.
+            Defaults to `ajbrock`.
+        init_type (str, optional): The name of an initialization method:
+            ortho | N02 | xavier. Defaults to 'ortho'.
+        split_noise (bool, optional): Whether to split input noise vector.
+            Defaults to True.
+        act_cfg (dict, optional): Config for the activation layer. Defaults to
+            dict(type='ReLU').
+        upsample_cfg (dict, optional): Config for the upsampling operation.
+            Defaults to dict(type='nearest', scale_factor=2).
+        with_spectral_norm (bool, optional): Whether to use spectral
+            normalization. Defaults to True.
+        auto_sync_bn (bool, optional): Whether to use synchronized batch
+            normalization. Defaults to True.
+        blocks_cfg (dict, optional): Config for the convolution block. Defaults
+            to dict(type='BigGANGenResBlock').
+        arch_cfg (dict, optional): Config for the architecture of this
+            generator. Defaults to None.
+        out_norm_cfg (dict, optional): Config for the norm of output layer.
+            Defaults to dict(type='BN').
+        pretrained (str | dict, optional): Path for the pretrained model or
+            dict containing information for pretained models whose necessary
+            key is 'ckpt_path'. Besides, you can also provide 'prefix' to load
+            the generator part from the whole state dict. Defaults to None.
+        rgb2bgr (bool, optional): Whether to reformat the output channels
+                with order `bgr`. We provide several pre-trained BigGAN
+                weights whose output channels order is `rgb`. You can set
+                this argument to True to use the weights.
+    """
+
+    def __init__(self,
+                 output_scale,
+                 noise_size=120,
+                 num_classes=0,
+                 out_channels=3,
+                 base_channels=96,
+                 input_scale=4,
+                 with_shared_embedding=True,
+                 shared_dim=128,
+                 sn_eps=1e-6,
+                 sn_style='ajbrock',
+                 init_type='ortho',
+                 split_noise=True,
+                 act_cfg=dict(type='ReLU'),
+                 upsample_cfg=dict(type='nearest', scale_factor=2),
+                 with_spectral_norm=True,
+                 auto_sync_bn=True,
+                 blocks_cfg=dict(type='BigGANGenResBlock'),
+                 arch_cfg=None,
+                 out_norm_cfg=dict(type='BN'),
+                 pretrained=None,
+                 rgb2bgr=False):
+        super().__init__()
+        self.noise_size = noise_size
+        self.num_classes = num_classes
+        self.shared_dim = shared_dim
+        self.with_shared_embedding = with_shared_embedding
+        self.output_scale = output_scale
+        self.arch = arch_cfg if arch_cfg else self._get_default_arch_cfg(
+            self.output_scale, base_channels)
+        self.input_scale = input_scale
+        self.split_noise = split_noise
+        self.blocks_cfg = deepcopy(blocks_cfg)
+        self.upsample_cfg = deepcopy(upsample_cfg)
+        self.rgb2bgr = rgb2bgr
+        self.sn_style = sn_style
+
+        # Validity Check
+        # If 'num_classes' equals to zero, we shall set 'with_shared_embedding'
+        # to False.
+        if num_classes == 0:
+            assert not self.with_shared_embedding
+        else:
+            if not self.with_shared_embedding:
+                # If not `with_shared_embedding`, we will use `nn.Embedding` to
+                # replace the original `Linear` layer in conditional BN.
+                # Meanwhile, we do not adopt split noises.
+                assert not self.split_noise
+
+        # If using split latents, we may need to adjust noise_size
+        if self.split_noise:
+            # Number of places z slots into
+            self.num_slots = len(self.arch['in_channels']) + 1
+            self.noise_chunk_size = self.noise_size // self.num_slots
+            # Recalculate latent dimensionality for even splitting into chunks
+            self.noise_size = self.noise_chunk_size * self.num_slots
+        else:
+            self.num_slots = 1
+            self.noise_chunk_size = 0
+
+        # First linear layer
+        self.noise2feat = nn.Linear(
+            self.noise_size // self.num_slots,
+            self.arch['in_channels'][0] * (self.input_scale**2))
+        if with_spectral_norm:
+            if sn_style == 'torch':
+                self.noise2feat = spectral_norm(self.noise2feat, eps=sn_eps)
+            elif sn_style == 'ajbrock':
+                self.noise2feat = SNLinear(
+                    self.noise_size // self.num_slots,
+                    self.arch['in_channels'][0] * (self.input_scale**2),
+                    eps=sn_eps)
+            else:
+                raise NotImplementedError(f'Your {sn_style} is not supported')
+
+        # If using 'shared_embedding', we will get an unified embedding of
+        # label for all blocks. If not, we just pass the label to each
+        # block.
+        if with_shared_embedding:
+            self.shared_embedding = nn.Embedding(num_classes, shared_dim)
+        else:
+            self.shared_embedding = nn.Identity()
+
+        if num_classes > 0:
+            self.dim_after_concat = (
+                self.shared_dim + self.noise_chunk_size
+                if self.with_shared_embedding else self.num_classes)
+        else:
+            self.dim_after_concat = self.noise_chunk_size
+
+        self.blocks_cfg.update(
+            dict(
+                dim_after_concat=self.dim_after_concat,
+                act_cfg=act_cfg,
+                sn_eps=sn_eps,
+                sn_style=sn_style,
+                input_is_label=(num_classes > 0)
+                and (not with_shared_embedding),
+                with_spectral_norm=with_spectral_norm,
+                auto_sync_bn=auto_sync_bn))
+
+        self.conv_blocks = nn.ModuleList()
+        for index, out_ch in enumerate(self.arch['out_channels']):
+            # change args to adapt to current block
+            self.blocks_cfg.update(
+                dict(
+                    in_channels=self.arch['in_channels'][index],
+                    out_channels=out_ch,
+                    upsample_cfg=self.upsample_cfg
+                    if self.arch['upsample'][index] else None))
+            self.conv_blocks.append(build_module(self.blocks_cfg))
+            if self.arch['attention'][index]:
+                self.conv_blocks.append(
+                    SelfAttentionBlock(
+                        out_ch,
+                        with_spectral_norm=with_spectral_norm,
+                        sn_eps=sn_eps,
+                        sn_style=sn_style))
+
+        self.output_layer = SNConvModule(
+            self.arch['out_channels'][-1],
+            out_channels,
+            kernel_size=3,
+            padding=1,
+            with_spectral_norm=with_spectral_norm,
+            spectral_norm_cfg=dict(eps=sn_eps, sn_style=sn_style),
+            act_cfg=act_cfg,
+            norm_cfg=out_norm_cfg,
+            bias=True,
+            order=('norm', 'act', 'conv'))
+
+        self.init_weights(pretrained=pretrained, init_type=init_type)
+
+    def _get_default_arch_cfg(self, output_scale, base_channels):
+        assert output_scale in [32, 64, 128, 256, 512]
+        _default_arch_cfgs = {
+            '32': {
+                'in_channels': [base_channels * item for item in [4, 4, 4]],
+                'out_channels': [base_channels * item for item in [4, 4, 4]],
+                'upsample': [True] * 3,
+                'resolution': [8, 16, 32],
+                'attention': [False, False, False]
+            },
+            '64': {
+                'in_channels':
+                [base_channels * item for item in [16, 16, 8, 4]],
+                'out_channels':
+                [base_channels * item for item in [16, 8, 4, 2]],
+                'upsample': [True] * 4,
+                'resolution': [8, 16, 32, 64],
+                'attention': [False, False, False, True]
+            },
+            '128': {
+                'in_channels':
+                [base_channels * item for item in [16, 16, 8, 4, 2]],
+                'out_channels':
+                [base_channels * item for item in [16, 8, 4, 2, 1]],
+                'upsample': [True] * 5,
+                'resolution': [8, 16, 32, 64, 128],
+                'attention': [False, False, False, True, False]
+            },
+            '256': {
+                'in_channels':
+                [base_channels * item for item in [16, 16, 8, 8, 4, 2]],
+                'out_channels':
+                [base_channels * item for item in [16, 8, 8, 4, 2, 1]],
+                'upsample': [True] * 6,
+                'resolution': [8, 16, 32, 64, 128, 256],
+                'attention': [False, False, False, True, False, False]
+            },
+            '512': {
+                'in_channels':
+                [base_channels * item for item in [16, 16, 8, 8, 4, 2, 1]],
+                'out_channels':
+                [base_channels * item for item in [16, 8, 8, 4, 2, 1, 1]],
+                'upsample': [True] * 7,
+                'resolution': [8, 16, 32, 64, 128, 256, 512],
+                'attention': [False, False, False, True, False, False, False]
+            }
+        }
+
+        return _default_arch_cfgs[str(output_scale)]
+
+    def forward(self,
+                noise,
+                label=None,
+                num_batches=0,
+                return_noise=False,
+                truncation=-1.0,
+                use_outside_embedding=False):
+        """Forward function.
+
+        Args:
+            noise (torch.Tensor | callable | None): You can directly give a
+                batch of noise through a ``torch.Tensor`` or offer a callable
+                function to sample a batch of noise data. Otherwise, the
+                ``None`` indicates to use the default noise sampler.
+            label (torch.Tensor | callable | None): You can directly give a
+                batch of label through a ``torch.Tensor`` or offer a callable
+                function to sample a batch of label data. Otherwise, the
+                ``None`` indicates to use the default label sampler.
+                Defaults to None.
+            num_batches (int, optional): The number of batch size.
+                Defaults to 0.
+            return_noise (bool, optional): If True, ``noise_batch`` and
+                ``label`` will be returned in a dict with ``fake_img``.
+                Defaults to False.
+            truncation (float, optional): Truncation factor. Give value not
+                less than 0., the truncation trick will be adopted.
+                Otherwise, the truncation trick will not be adopted.
+                Defaults to -1..
+            use_outside_embedding (bool, optional): Whether to use outside
+                embedding or use `shared_embedding`. Set to `True` if
+                embedding has already be performed outside this function.
+                Default to False.
+
+        Returns:
+            torch.Tensor | dict: If not ``return_noise``, only the output image
+                will be returned. Otherwise, a dict contains ``fake_img``,
+                ``noise_batch`` and ``label`` will be returned.
+        """
+        if isinstance(noise, torch.Tensor):
+            assert noise.shape[1] == self.noise_size
+            assert noise.ndim == 2, ('The noise should be in shape of (n, c), '
+                                     f'but got {noise.shape}')
+            noise_batch = noise
+        # receive a noise generator and sample noise.
+        elif callable(noise):
+            noise_generator = noise
+            assert num_batches > 0
+            noise_batch = noise_generator((num_batches, self.noise_size))
+        # otherwise, we will adopt default noise sampler.
+        else:
+            assert num_batches > 0
+            noise_batch = torch.randn((num_batches, self.noise_size))
+
+        # perform truncation
+        if truncation >= 0.0:
+            noise_batch = torch.clamp(noise_batch, -1. * truncation,
+                                      1. * truncation)
+
+        if self.num_classes == 0:
+            label_batch = None
+
+        elif isinstance(label, torch.Tensor):
+            if not use_outside_embedding:
+                assert label.ndim == 1, (
+                    'The label shoube be in shape of (n, )'
+                    f'but got {label.shape}.')
+            label_batch = label
+        elif callable(label):
+            label_generator = label
+            assert num_batches > 0
+            label_batch = label_generator((num_batches, ))
+        else:
+            assert num_batches > 0
+            label_batch = torch.randint(0, self.num_classes, (num_batches, ))
+
+        # dirty code for putting data on the right device
+        noise_batch = noise_batch.to(get_module_device(self))
+        if label_batch is not None:
+            label_batch = label_batch.to(get_module_device(self))
+            if not use_outside_embedding:
+                class_vector = self.shared_embedding(label_batch)
+            else:
+                class_vector = label_batch
+        else:
+            class_vector = None
+        # If 'split noise', concat class vector and noise chunk
+        if self.split_noise:
+            zs = torch.split(noise_batch, self.noise_chunk_size, dim=1)
+            z = zs[0]
+            if class_vector is not None:
+                ys = [torch.cat([class_vector, item], 1) for item in zs[1:]]
+            else:
+                ys = zs[1:]
+        else:
+            ys = [class_vector] * len(self.conv_blocks)
+            z = noise_batch
+
+        # First linear layer
+        x = self.noise2feat(z)
+        # Reshape
+        x = x.view(x.size(0), -1, self.input_scale, self.input_scale)
+
+        # Loop over blocks
+        counter = 0
+        for conv_block in self.conv_blocks:
+            if isinstance(conv_block, SelfAttentionBlock):
+                x = conv_block(x)
+            else:
+                x = conv_block(x, ys[counter])
+                counter += 1
+
+        # Apply batchnorm-relu-conv-tanh at output
+        out_img = torch.tanh(self.output_layer(x))
+
+        if self.rgb2bgr:
+            out_img = out_img[:, [2, 1, 0], ...]
+
+        if return_noise:
+            output = dict(
+                fake_img=out_img, noise_batch=noise_batch, label=label_batch)
+            return output
+
+        return out_img
+
+    def init_weights(self, pretrained=None, init_type='ortho'):
+        """Init weights for models.
+
+        Args:
+            pretrained (str | dict, optional): Path for the pretrained model or
+                dict containing information for pretained models whose
+                necessary key is 'ckpt_path'. Besides, you can also provide
+                'prefix' to load the generator part from the whole state dict.
+                Defaults to None.
+            init_type (str, optional): The name of an initialization method:
+                ortho | N02 | xavier. Defaults to 'ortho'.
+        """
+        if isinstance(pretrained, str):
+            logger = get_root_logger()
+            load_checkpoint(self, pretrained, strict=False, logger=logger)
+        elif isinstance(pretrained, dict):
+            ckpt_path = pretrained.get('ckpt_path', None)
+            assert ckpt_path is not None
+            prefix = pretrained.get('prefix', '')
+            map_location = pretrained.get('map_location', 'cpu')
+            strict = pretrained.get('strict', True)
+            state_dict = _load_checkpoint_with_prefix(prefix, ckpt_path,
+                                                      map_location)
+            self.load_state_dict(state_dict, strict=strict)
+            mmcv.print_log(f'Load pretrained model from {ckpt_path}', 'mmgen')
+        elif pretrained is None:
+            for m in self.modules():
+                if isinstance(m, (nn.Conv2d, nn.Linear, nn.Embedding)):
+                    if init_type == 'ortho':
+                        nn.init.orthogonal_(m.weight)
+                    elif init_type == 'N02':
+                        normal_init(m, 0.0, 0.02)
+                    elif init_type == 'xavier':
+                        xavier_init(m)
+                    else:
+                        raise NotImplementedError(
+                            f'{init_type} initialization \
+                            not supported now.')
+        else:
+            raise TypeError('pretrained must be a str or None but'
+                            f' got {type(pretrained)} instead.')
+
+
+@MODULES.register_module()
+class BigGANDiscriminator(nn.Module):
+    """BigGAN Discriminator. The implementation refers to
+    https://github.com/ajbrock/BigGAN-PyTorch/blob/master/BigGAN.py # noqa.
+
+    In BigGAN, we use a SAGAN-based architecture composing of an self-attention
+    block and number of convolutional residual blocks with spectral
+    normalization.
+
+    More details can be found in: Large Scale GAN Training for High Fidelity
+    Natural Image Synthesis (ICLR2019).
+
+    The design of the model structure is highly corresponding to the output
+    resolution. For the original BigGAN's generator, you can set ``output_scale``
+    as you need and use the default value of ``arch_cfg`` and ``blocks_cfg``.
+    If you want to customize the model, you can set the arguments in this way:
+
+    ``arch_cfg``: Config for the architecture of this generator. You can refer
+    the ``_default_arch_cfgs`` in the ``_get_default_arch_cfg`` function to see
+    the format of the ``arch_cfg``. Basically, you need to provide information
+    of each block such as the numbers of input and output channels, whether to
+    perform upsampling, etc.
+
+    ``blocks_cfg``: Config for the convolution block. You can replace the block
+    type to your registered customized block and adjust block params here.
+    However, you should notice that some params are shared among these blocks
+    like ``act_cfg``, ``with_spectral_norm``, ``sn_eps``, etc.
+
+    Args:
+        input_scale (int): The scale of the input image.
+        num_classes (int, optional): The number of conditional classes.
+            Defaults to 0.
+        in_channels (int, optional): The channel number of the input image.
+            Defaults to 3.
+        out_channels (int, optional): The channel number of the final output.
+            Defaults to 1.
+        base_channels (int, optional): The basic channel number of the
+            discriminator. The other layers contains channels based on this
+            number. Defaults to 96.
+        sn_eps (float, optional): Epsilon value for spectral normalization.
+            Defaults to 1e-6.
+        sn_style (str, optional): The style of spectral normalization.
+            If set to `ajbrock`, implementation by
+            ajbrock(https://github.com/ajbrock/BigGAN-PyTorch/blob/master/layers.py)
+            will be adopted.
+            If set to `torch`, implementation by `PyTorch` will be adopted.
+            Defaults to `ajbrock`.
+        init_type (str, optional): The name of an initialization method:
+            ortho | N02 | xavier. Defaults to 'ortho'.
+        act_cfg (dict, optional): Config for the activation layer.
+            Defaults to dict(type='ReLU').
+        with_spectral_norm (bool, optional): Whether to use spectral
+            normalization. Defaults to True.
+        blocks_cfg (dict, optional): Config for the convolution block.
+            Defaults to dict(type='BigGANDiscResBlock').
+        arch_cfg (dict, optional): Config for the architecture of this
+            discriminator. Defaults to None.
+        pretrained (str | dict, optional): Path for the pretrained model or
+            dict containing information for pretained models whose necessary
+            key is 'ckpt_path'. Besides, you can also provide 'prefix' to load
+            the generator part from the whole state dict. Defaults to None.
+    """
+
+    def __init__(self,
+                 input_scale,
+                 num_classes=0,
+                 in_channels=3,
+                 out_channels=1,
+                 base_channels=96,
+                 sn_eps=1e-6,
+                 sn_style='ajbrock',
+                 init_type='ortho',
+                 act_cfg=dict(type='ReLU'),
+                 with_spectral_norm=True,
+                 blocks_cfg=dict(type='BigGANDiscResBlock'),
+                 arch_cfg=None,
+                 pretrained=None):
+        super().__init__()
+        self.num_classes = num_classes
+        self.out_channels = out_channels
+        self.input_scale = input_scale
+        self.in_channels = in_channels
+        self.base_channels = base_channels
+        self.arch = arch_cfg if arch_cfg else self._get_default_arch_cfg(
+            self.input_scale, self.in_channels, self.base_channels)
+        self.blocks_cfg = deepcopy(blocks_cfg)
+        self.blocks_cfg.update(
+            dict(
+                act_cfg=act_cfg,
+                sn_eps=sn_eps,
+                sn_style=sn_style,
+                with_spectral_norm=with_spectral_norm))
+        self.sn_style = sn_style
+
+        self.conv_blocks = nn.ModuleList()
+        for index, out_ch in enumerate(self.arch['out_channels']):
+            # change args to adapt to current block
+            self.blocks_cfg.update(
+                dict(
+                    in_channels=self.arch['in_channels'][index],
+                    out_channels=out_ch,
+                    with_downsample=self.arch['downsample'][index],
+                    is_head_block=(index == 0)))
+            self.conv_blocks.append(build_module(self.blocks_cfg))
+            if self.arch['attention'][index]:
+                self.conv_blocks.append(
+                    SelfAttentionBlock(
+                        out_ch,
+                        with_spectral_norm=with_spectral_norm,
+                        sn_eps=sn_eps,
+                        sn_style=sn_style))
+
+        self.activate = build_activation_layer(act_cfg)
+
+        self.decision = nn.Linear(self.arch['out_channels'][-1], out_channels)
+        if with_spectral_norm:
+            if sn_style == 'torch':
+                self.decision = spectral_norm(self.decision, eps=sn_eps)
+            elif sn_style == 'ajbrock':
+                self.decision = SNLinear(
+                    self.arch['out_channels'][-1], out_channels, eps=sn_eps)
+            else:
+                raise NotImplementedError('sn style')
+
+        if self.num_classes > 0:
+            self.proj_y = nn.Embedding(self.num_classes,
+                                       self.arch['out_channels'][-1])
+            if with_spectral_norm:
+                if sn_style == 'torch':
+                    self.proj_y = spectral_norm(self.proj_y, eps=sn_eps)
+                elif sn_style == 'ajbrock':
+                    self.proj_y = SNEmbedding(
+                        self.num_classes,
+                        self.arch['out_channels'][-1],
+                        eps=sn_eps)
+                else:
+                    raise NotImplementedError('sn style')
+        self.init_weights(pretrained=pretrained, init_type=init_type)
+
+    def _get_default_arch_cfg(self, input_scale, in_channels, base_channels):
+        assert input_scale in [32, 64, 128, 256, 512]
+        _default_arch_cfgs = {
+            '32': {
+                'in_channels':
+                [in_channels] + [base_channels * item for item in [4, 4, 4]],
+                'out_channels':
+                [base_channels * item for item in [4, 4, 4, 4]],
+                'downsample': [True, True, False, False],
+                'resolution': [16, 8, 8, 8],
+                'attention': [False, False, False, False]
+            },
+            '64': {
+                'in_channels': [in_channels] +
+                [base_channels * item for item in [1, 2, 4, 8]],
+                'out_channels':
+                [base_channels * item for item in [1, 2, 4, 8, 16]],
+                'downsample': [True] * 4 + [False],
+                'resolution': [32, 16, 8, 4, 4],
+                'attention': [False, False, False, False, False]
+            },
+            '128': {
+                'in_channels': [in_channels] +
+                [base_channels * item for item in [1, 2, 4, 8, 16]],
+                'out_channels':
+                [base_channels * item for item in [1, 2, 4, 8, 16, 16]],
+                'downsample': [True] * 5 + [False],
+                'resolution': [64, 32, 16, 8, 4, 4],
+                'attention': [True, False, False, False, False, False]
+            },
+            '256': {
+                'in_channels': [in_channels] +
+                [base_channels * item for item in [1, 2, 4, 8, 8, 16]],
+                'out_channels':
+                [base_channels * item for item in [1, 2, 4, 8, 8, 16, 16]],
+                'downsample': [True] * 6 + [False],
+                'resolution': [128, 64, 32, 16, 8, 4, 4],
+                'attention': [False, True, False, False, False, False]
+            },
+            '512': {
+                'in_channels': [in_channels] +
+                [base_channels * item for item in [1, 1, 2, 4, 8, 8, 16]],
+                'out_channels':
+                [base_channels * item for item in [1, 1, 2, 4, 8, 8, 16, 16]],
+                'downsample': [True] * 7 + [False],
+                'resolution': [256, 128, 64, 32, 16, 8, 4, 4],
+                'attention': [False, False, False, True, False, False, False]
+            }
+        }
+
+        return _default_arch_cfgs[str(input_scale)]
+
+    def forward(self, x, label=None):
+        """Forward function.
+
+        Args:
+            x (torch.Tensor): Fake or real image tensor.
+            label (torch.Tensor | None): Label Tensor. Defaults to None.
+
+        Returns:
+            torch.Tensor: Prediction for the reality of the input image with
+                given label.
+        """
+        x0 = x
+        for conv_block in self.conv_blocks:
+            x0 = conv_block(x0)
+        x0 = self.activate(x0)
+        x0 = torch.sum(x0, dim=[2, 3])
+        out = self.decision(x0)
+
+        if self.num_classes > 0:
+            w_y = self.proj_y(label)
+            out = out + torch.sum(w_y * x0, dim=1, keepdim=True)
+        return out
+
+    def init_weights(self, pretrained=None, init_type='ortho'):
+        """Init weights for models.
+
+        Args:
+            pretrained (str | dict, optional): Path for the pretrained model or
+                dict containing information for pretained models whose
+                necessary key is 'ckpt_path'. Besides, you can also provide
+                'prefix' to load the generator part from the whole state dict.
+                Defaults to None.
+            init_type (str, optional): The name of an initialization method:
+                ortho | N02 | xavier. Defaults to 'ortho'.
+        """
+
+        if isinstance(pretrained, str):
+            logger = get_root_logger()
+            load_checkpoint(self, pretrained, strict=False, logger=logger)
+        elif isinstance(pretrained, dict):
+            ckpt_path = pretrained.get('ckpt_path', None)
+            assert ckpt_path is not None
+            prefix = pretrained.get('prefix', '')
+            map_location = pretrained.get('map_location', 'cpu')
+            strict = pretrained.get('strict', True)
+            state_dict = _load_checkpoint_with_prefix(prefix, ckpt_path,
+                                                      map_location)
+            self.load_state_dict(state_dict, strict=strict)
+            mmcv.print_log(f'Load pretrained model from {ckpt_path}', 'mmgen')
+        elif pretrained is None:
+            for m in self.modules():
+                if isinstance(m, (nn.Conv2d, nn.Linear, nn.Embedding)):
+                    if init_type == 'ortho':
+                        nn.init.orthogonal_(m.weight)
+                    elif init_type == 'N02':
+                        normal_init(m, 0.0, 0.02)
+                    elif init_type == 'xavier':
+                        xavier_init(m)
+                    else:
+                        raise NotImplementedError(
+                            f'{init_type} initialization \
+                            not supported now.')
+        else:
+            raise TypeError('pretrained must be a str or None but'
+                            f' got {type(pretrained)} instead.')
--- a/build/lib/mmgen/models/architectures/biggan/generator_discriminator_deep.py
+++ b/build/lib/mmgen/models/architectures/biggan/generator_discriminator_deep.py
+# Copyright (c) OpenMMLab. All rights reserved.
+from copy import deepcopy
+
+import mmcv
+import torch
+import torch.nn as nn
+from mmcv.cnn import normal_init, xavier_init
+from mmcv.cnn.bricks import build_activation_layer
+from mmcv.runner import load_checkpoint
+from mmcv.runner.checkpoint import _load_checkpoint_with_prefix
+from torch.nn.utils import spectral_norm
+
+from mmgen.models.builder import MODULES, build_module
+from mmgen.utils import get_root_logger
+from ..common import get_module_device
+from .biggan_snmodule import SNEmbedding, SNLinear
+from .modules import SelfAttentionBlock, SNConvModule
+
+
+@MODULES.register_module()
+class BigGANDeepGenerator(nn.Module):
+    """BigGAN-Deep Generator. The implementation refers to
+    https://github.com/ajbrock/BigGAN-PyTorch/blob/master/BigGANdeep.py # noqa.
+
+    In BigGAN, we use a SAGAN-based architecture composing of an
+    self-attention block and number of convolutional residual blocks
+    with spectral normalization. BigGAN-deep follow the same architecture.
+
+    The main difference between BigGAN and BigGAN-deep is that
+    BigGAN-deep uses deeper residual blocks to construct the whole
+    model.
+
+    More details can be found in: Large Scale GAN Training for High Fidelity
+    Natural Image Synthesis (ICLR2019).
+
+    The design of the model structure is highly corresponding to the output
+    resolution. For the original BigGAN-Deep's generator, you can set ``output_scale``
+    as you need and use the default value of ``arch_cfg`` and ``blocks_cfg``.
+    If you want to customize the model, you can set the arguments in this way:
+
+    ``arch_cfg``: Config for the architecture of this generator. You can refer
+    the ``_default_arch_cfgs`` in the ``_get_default_arch_cfg`` function to see
+    the format of the ``arch_cfg``. Basically, you need to provide information
+    of each block such as the numbers of input and output channels, whether to
+    perform upsampling, etc.
+
+    ``blocks_cfg``: Config for the convolution block. You can adjust block params
+    like ``channel_ratio`` here. You can also replace the block type
+    to your registered customized block. However, you should notice that some
+    params are shared among these blocks like ``act_cfg``, ``with_spectral_norm``,
+    ``sn_eps``, etc.
+
+    Args:
+        output_scale (int): Output scale for the generated image.
+        noise_size (int, optional): Size of the input noise vector. Defaults
+            to 120.
+        num_classes (int, optional): The number of conditional classes. If set
+            to 0, this model will be degraded to an unconditional model.
+            Defaults to 0.
+        out_channels (int, optional): Number of channels in output images.
+            Defaults to 3.
+        base_channels (int, optional): The basic channel number of the
+            generator. The other layers contains channels based on this number.
+            Defaults to 96.
+        block_depth (int, optional): The repeat times of Residual Blocks in
+            each level of architecture. Defaults to 2.
+        input_scale (int, optional): The scale of the input 2D feature map.
+            Defaults to 4.
+        with_shared_embedding (bool, optional): Whether to use shared
+            embedding. Defaults to True.
+        shared_dim (int, optional): The output channels of shared embedding.
+            Defaults to 128.
+        sn_eps (float, optional): Epsilon value for spectral normalization.
+            Defaults to 1e-6.
+        sn_style (str, optional): The style of spectral normalization.
+            If set to `ajbrock`, implementation by
+            ajbrock(https://github.com/ajbrock/BigGAN-PyTorch/blob/master/layers.py)
+            will be adopted.
+            If set to `torch`, implementation by `PyTorch` will be adopted.
+            Defaults to `ajbrock`.
+        init_type (str, optional): The name of an initialization method:
+            ortho | N02 | xavier. Defaults to 'ortho'.
+        concat_noise (bool, optional): Whether to concat input noise vector
+            with class vector. Defaults to True.
+        act_cfg (dict, optional): Config for the activation layer. Defaults to
+            dict(type='ReLU').
+        upsample_cfg (dict, optional): Config for the upsampling operation.
+            Defaults to dict(type='nearest', scale_factor=2).
+        with_spectral_norm (bool, optional): Whether to use spectral
+            normalization. Defaults to True.
+        auto_sync_bn (bool, optional): Whether to use synchronized batch
+            normalization. Defaults to True.
+        blocks_cfg (dict, optional): Config for the convolution block. Defaults
+            to dict(type='BigGANGenResBlock').
+        arch_cfg (dict, optional): Config for the architecture of this
+            generator. Defaults to None.
+        out_norm_cfg (dict, optional): Config for the norm of output layer.
+            Defaults to dict(type='BN').
+        pretrained (str | dict, optional): Path for the pretrained model or
+            dict containing information for pretained models whose necessary
+            key is 'ckpt_path'. Besides, you can also provide 'prefix' to load
+            the generator part from the whole state dict. Defaults to None.
+        rgb2bgr (bool, optional): Whether to reformat the output channels
+                with order `bgr`. We provide several pre-trained BigGAN-Deep
+                weights whose output channels order is `rgb`. You can set
+                this argument to True to use the weights.
+    """
+
+    def __init__(self,
+                 output_scale,
+                 noise_size=120,
+                 num_classes=0,
+                 out_channels=3,
+                 base_channels=96,
+                 block_depth=2,
+                 input_scale=4,
+                 with_shared_embedding=True,
+                 shared_dim=128,
+                 sn_eps=1e-6,
+                 sn_style='ajbrock',
+                 init_type='ortho',
+                 concat_noise=True,
+                 act_cfg=dict(type='ReLU', inplace=False),
+                 upsample_cfg=dict(type='nearest', scale_factor=2),
+                 with_spectral_norm=True,
+                 auto_sync_bn=True,
+                 blocks_cfg=dict(type='BigGANDeepGenResBlock'),
+                 arch_cfg=None,
+                 out_norm_cfg=dict(type='BN'),
+                 pretrained=None,
+                 rgb2bgr=False):
+        super().__init__()
+        self.noise_size = noise_size
+        self.num_classes = num_classes
+        self.shared_dim = shared_dim
+        self.with_shared_embedding = with_shared_embedding
+        self.output_scale = output_scale
+        self.arch = arch_cfg if arch_cfg else self._get_default_arch_cfg(
+            self.output_scale, base_channels)
+        self.input_scale = input_scale
+        self.concat_noise = concat_noise
+        self.blocks_cfg = deepcopy(blocks_cfg)
+        self.upsample_cfg = deepcopy(upsample_cfg)
+        self.block_depth = block_depth
+        self.rgb2bgr = rgb2bgr
+        self.sn_style = sn_style
+
+        # Validity Check
+        # If 'num_classes' equals to zero, we shall set 'with_shared_embedding'
+        # to False.
+        if num_classes == 0:
+            assert not self.with_shared_embedding
+            assert not self.concat_noise
+        elif not self.with_shared_embedding:
+            # If not `with_shared_embedding`, we will use `nn.Embedding` to
+            # replace the original `Linear` layer in conditional BN.
+            # Meanwhile, we do not adopt split noises.
+            assert not self.concat_noise
+
+        # First linear layer
+        if self.concat_noise:
+            self.noise2feat = nn.Linear(
+                self.noise_size + self.shared_dim,
+                self.arch['in_channels'][0] * (self.input_scale**2))
+        else:
+            self.noise2feat = nn.Linear(
+                self.noise_size,
+                self.arch['in_channels'][0] * (self.input_scale**2))
+
+        if with_spectral_norm:
+            if sn_style == 'torch':
+                self.noise2feat = spectral_norm(self.noise2feat, eps=sn_eps)
+            elif sn_style == 'ajbrock':
+                self.noise2feat = SNLinear(
+                    self.noise_size +
+                    (self.shared_dim if self.concat_noise else 0),
+                    self.arch['in_channels'][0] * (self.input_scale**2),
+                    eps=sn_eps)
+            else:
+                NotImplementedError(f'{sn_style} style SN is not supported')
+
+        # If using 'shared_embedding', we will get an unified embedding of
+        # label for all blocks. If not, we just pass the label to each
+        # block.
+        if with_shared_embedding:
+            self.shared_embedding = nn.Embedding(num_classes, shared_dim)
+        else:
+            self.shared_embedding = nn.Identity()
+
+        if num_classes > 0:
+            if self.concat_noise:
+                self.dim_after_concat = (
+                    self.shared_dim + self.noise_size
+                    if self.with_shared_embedding else self.num_classes)
+            else:
+                self.dim_after_concat = (
+                    self.shared_dim
+                    if self.with_shared_embedding else self.num_classes)
+        else:
+            self.dim_after_concat = 0
+        self.blocks_cfg.update(
+            dict(
+                dim_after_concat=self.dim_after_concat,
+                act_cfg=act_cfg,
+                sn_eps=sn_eps,
+                sn_style=sn_style,
+                input_is_label=(num_classes > 0)
+                and (not with_shared_embedding),
+                with_spectral_norm=with_spectral_norm,
+                auto_sync_bn=auto_sync_bn))
+
+        self.conv_blocks = nn.ModuleList()
+        for index, out_ch in enumerate(self.arch['out_channels']):
+            for depth in range(self.block_depth):
+                # change args to adapt to current block
+                block_cfg_ = deepcopy(self.blocks_cfg)
+                block_cfg_.update(
+                    dict(
+                        in_channels=self.arch['in_channels'][index],
+                        out_channels=out_ch if depth == (self.block_depth - 1)
+                        else self.arch['in_channels'][index],
+                        upsample_cfg=self.upsample_cfg
+                        if self.arch['upsample'][index]
+                        and depth == (self.block_depth - 1) else None))
+                self.conv_blocks.append(build_module(block_cfg_))
+
+            if self.arch['attention'][index]:
+                self.conv_blocks.append(
+                    SelfAttentionBlock(
+                        out_ch,
+                        with_spectral_norm=with_spectral_norm,
+                        sn_eps=sn_eps,
+                        sn_style=sn_style))
+
+        self.output_layer = SNConvModule(
+            self.arch['out_channels'][-1],
+            out_channels,
+            kernel_size=3,
+            padding=1,
+            with_spectral_norm=with_spectral_norm,
+            spectral_norm_cfg=dict(eps=sn_eps, sn_style=sn_style),
+            act_cfg=act_cfg,
+            norm_cfg=out_norm_cfg,
+            bias=True,
+            order=('norm', 'act', 'conv'))
+
+        self.init_weights(pretrained=pretrained, init_type=init_type)
+
+    def _get_default_arch_cfg(self, output_scale, base_channels):
+        assert output_scale in [32, 64, 128, 256, 512]
+        _default_arch_cfgs = {
+            '32': {
+                'in_channels': [base_channels * item for item in [4, 4, 4]],
+                'out_channels': [base_channels * item for item in [4, 4, 4]],
+                'upsample': [True] * 3,
+                'resolution': [8, 16, 32],
+                'attention': [False, False, False]
+            },
+            '64': {
+                'in_channels':
+                [base_channels * item for item in [16, 16, 8, 4]],
+                'out_channels':
+                [base_channels * item for item in [16, 8, 4, 2]],
+                'upsample': [True] * 4,
+                'resolution': [8, 16, 32, 64],
+                'attention': [False, False, False, True]
+            },
+            '128': {
+                'in_channels':
+                [base_channels * item for item in [16, 16, 8, 4, 2]],
+                'out_channels':
+                [base_channels * item for item in [16, 8, 4, 2, 1]],
+                'upsample': [True] * 5,
+                'resolution': [8, 16, 32, 64, 128],
+                'attention': [False, False, False, True, False]
+            },
+            '256': {
+                'in_channels':
+                [base_channels * item for item in [16, 16, 8, 8, 4, 2]],
+                'out_channels':
+                [base_channels * item for item in [16, 8, 8, 4, 2, 1]],
+                'upsample': [True] * 6,
+                'resolution': [8, 16, 32, 64, 128, 256],
+                'attention': [False, False, False, True, False, False]
+            },
+            '512': {
+                'in_channels':
+                [base_channels * item for item in [16, 16, 8, 8, 4, 2, 1]],
+                'out_channels':
+                [base_channels * item for item in [16, 8, 8, 4, 2, 1, 1]],
+                'upsample': [True] * 7,
+                'resolution': [8, 16, 32, 64, 128, 256, 512],
+                'attention': [False, False, False, True, False, False, False]
+            }
+        }
+
+        return _default_arch_cfgs[str(output_scale)]
+
+    def forward(self,
+                noise,
+                label=None,
+                num_batches=0,
+                return_noise=False,
+                truncation=-1.0,
+                use_outside_embedding=False):
+        """Forward function.
+
+        Args:
+            noise (torch.Tensor | callable | None): You can directly give a
+                batch of noise through a ``torch.Tensor`` or offer a callable
+                function to sample a batch of noise data. Otherwise, the
+                ``None`` indicates to use the default noise sampler.
+            label (torch.Tensor | callable | None): You can directly give a
+                batch of label through a ``torch.Tensor`` or offer a callable
+                function to sample a batch of label data. Otherwise, the
+                ``None`` indicates to use the default label sampler.
+                Defaults to None.
+            num_batches (int, optional): The number of batch size.
+                Defaults to 0.
+            return_noise (bool, optional): If True, ``noise_batch`` and
+                ``label`` will be returned in a dict with ``fake_img``.
+                Defaults to False.
+            truncation (float, optional): Truncation factor. Give value not
+                less than 0., the truncation trick will be adopted.
+                Otherwise, the truncation trick will not be adopted.
+                Defaults to -1..
+            use_outside_embedding (bool, optional): Whether to use outside
+                embedding or use `shared_embedding`. Set to `True` if
+                embedding has already be performed outside this function.
+                Default to False.
+
+        Returns:
+            torch.Tensor | dict: If not ``return_noise``, only the output image
+                will be returned. Otherwise, a dict contains ``fake_img``,
+                ``noise_batch`` and ``label`` will be returned.
+        """
+        if isinstance(noise, torch.Tensor):
+            assert noise.shape[1] == self.noise_size
+            assert noise.ndim == 2, ('The noise should be in shape of (n, c), '
+                                     f'but got {noise.shape}')
+            noise_batch = noise
+        # receive a noise generator and sample noise.
+        elif callable(noise):
+            noise_generator = noise
+            assert num_batches > 0
+            noise_batch = noise_generator((num_batches, self.noise_size))
+        # otherwise, we will adopt default noise sampler.
+        else:
+            assert num_batches > 0
+            noise_batch = torch.randn((num_batches, self.noise_size))
+        # perform truncation
+        if truncation >= 0.0:
+            noise_batch = torch.clamp(noise_batch, -1. * truncation,
+                                      1. * truncation)
+
+        if self.num_classes == 0:
+            label_batch = None
+
+        elif isinstance(label, torch.Tensor):
+            if not use_outside_embedding:
+                assert label.ndim == 1, (
+                    'The label shoube be in shape of (n, )'
+                    f'but got {label.shape}.')
+            label_batch = label
+        elif callable(label):
+            label_generator = label
+            assert num_batches > 0
+            label_batch = label_generator((num_batches, ))
+        else:
+            assert num_batches > 0
+            label_batch = torch.randint(0, self.num_classes, (num_batches, ))
+
+        # dirty code for putting data on the right device
+        noise_batch = noise_batch.to(get_module_device(self))
+        if label_batch is not None:
+            label_batch = label_batch.to(get_module_device(self))
+            if not use_outside_embedding:
+                class_vector = self.shared_embedding(label_batch)
+            else:
+                class_vector = label_batch
+        else:
+            class_vector = None
+
+        # If 'concat noise', concat class vector and noise batch
+        if self.concat_noise:
+            if class_vector is not None:
+                z = torch.cat([noise_batch, class_vector], dim=1)
+                y = z
+        elif self.num_classes > 0:
+            z = noise_batch
+            y = class_vector
+        else:
+            z = noise_batch
+            y = None
+
+        # First linear layer
+        x = self.noise2feat(z)
+        # Reshape
+        # We use this conversion step to allow for loading TF weights
+        # TF convention on shape is [batch, height, width, channels]
+        # PT convention on shape is [batch, channels, height, width]
+        x = x.view(x.size(0), self.input_scale, self.input_scale, -1)
+        x = x.permute(0, 3, 1, 2).contiguous()
+        # Loop over blocks
+        for idx, conv_block in enumerate(self.conv_blocks):
+            if isinstance(conv_block, SelfAttentionBlock):
+                x = conv_block(x)
+            else:
+                x = conv_block(x, y)
+        # Apply batchnorm-relu-conv-tanh at output
+        x = self.output_layer(x)
+        out_img = torch.tanh(x)
+
+        if self.rgb2bgr:
+            out_img = out_img[:, [2, 1, 0], ...]
+
+        if return_noise:
+            output = dict(
+                fake_img=out_img, noise_batch=noise_batch, label=label_batch)
+            return output
+
+        return out_img
+
+    def init_weights(self, pretrained=None, init_type='ortho'):
+        """Init weights for models.
+
+        Args:
+            pretrained (str | dict, optional): Path for the pretrained model or
+                dict containing information for pretained models whose
+                necessary key is 'ckpt_path'. Besides, you can also provide
+                'prefix' to load the generator part from the whole state dict.
+                Defaults to None.
+            init_type (str, optional): The name of an initialization method:
+                ortho | N02 | xavier. Defaults to 'ortho'.
+        """
+        if isinstance(pretrained, str):
+            logger = get_root_logger()
+            load_checkpoint(self, pretrained, strict=False, logger=logger)
+        elif isinstance(pretrained, dict):
+            ckpt_path = pretrained.get('ckpt_path', None)
+            assert ckpt_path is not None
+            prefix = pretrained.get('prefix', '')
+            map_location = pretrained.get('map_location', 'cpu')
+            strict = pretrained.get('strict', True)
+            state_dict = _load_checkpoint_with_prefix(prefix, ckpt_path,
+                                                      map_location)
+            self.load_state_dict(state_dict, strict=strict)
+            mmcv.print_log(f'Load pretrained model from {ckpt_path}', 'mmgen')
+        elif pretrained is None:
+            for m in self.modules():
+                if isinstance(m, (nn.Conv2d, nn.Linear, nn.Embedding)):
+                    if init_type == 'ortho':
+                        nn.init.orthogonal_(m.weight)
+                    elif init_type == 'N02':
+                        normal_init(m, 0.0, 0.02)
+                    elif init_type == 'xavier':
+                        xavier_init(m)
+                    else:
+                        raise NotImplementedError(
+                            f'{init_type} initialization \
+                            not supported now.')
+        else:
+            raise TypeError('pretrained must be a str or None but'
+                            f' got {type(pretrained)} instead.')
+
+
+@MODULES.register_module()
+class BigGANDeepDiscriminator(nn.Module):
+    """BigGAN-Deep Discriminator. The implementation refers to
+    https://github.com/ajbrock/BigGAN-PyTorch/blob/master/BigGANdeep.py # noqa.
+
+    The overall structure of BigGAN's discriminator is the same with
+    the projection discriminator.
+
+    The main difference between BigGAN and BigGAN-deep is that
+    BigGAN-deep use more deeper residual blocks to construct the whole
+    model.
+
+    More details can be found in: Large Scale GAN Training for High Fidelity
+    Natural Image Synthesis (ICLR2019).
+
+    The design of the model structure is highly corresponding to the output
+    resolution. For origin BigGAN-Deep's generator, you can set ``output_scale``
+    as you need and use the default value of ``arch_cfg`` and ``blocks_cfg``.
+    If you want to customize the model, you can set the arguments in this way:
+
+    ``arch_cfg``: Config for the architecture of this generator. You can refer
+    the ``_default_arch_cfgs`` in the ``_get_default_arch_cfg`` function to see
+    the format of the ``arch_cfg``. Basically, you need to provide information
+    of each block such as the numbers of input and output channels, whether to
+    perform upsampling etc.
+
+    ``blocks_cfg``: Config for the convolution block. You can adjust block params
+    like ``channel_ratio`` here. You can also replace the block type
+    to your registered customized block. However, you should notice that some
+    params are shared between these blocks like ``act_cfg``, ``with_spectral_norm``,
+    ``sn_eps`` etc.
+
+    Args:
+        input_scale (int): The scale of the input image.
+        num_classes (int, optional): The number of conditional classes.
+            Defaults to 0.
+        in_channels (int, optional): The channel number of the input image.
+            Defaults to 3.
+        out_channels (int, optional): The channel number of the final output.
+            Defaults to 1.
+        base_channels (int, optional): The basic channel number of the
+            discriminator. The other layers contains channels based on this
+            number. Defaults to 96.
+        block_depth (int, optional): The repeat times of Residual Blocks in
+            each level of architecture. Defaults to 2.
+        sn_eps (float, optional): Epsilon value for spectral normalization.
+            Defaults to 1e-6.
+        sn_style (str, optional): The style of spectral normalization.
+            If set to `ajbrock`, implementation by
+            ajbrock(https://github.com/ajbrock/BigGAN-PyTorch/blob/master/layers.py)
+            will be adopted.
+            If set to `torch`, implementation by `PyTorch` will be adopted.
+            Defaults to `ajbrock`.
+        init_type (str, optional): The name of an initialization method:
+            ortho | N02 | xavier. Defaults to 'ortho'.
+        act_cfg (dict, optional): Config for the activation layer.
+            Defaults to dict(type='ReLU').
+        with_spectral_norm (bool, optional): Whether to use spectral
+            normalization. Defaults to True.
+        blocks_cfg (dict, optional): Config for the convolution block.
+            Defaults to dict(type='BigGANDiscResBlock').
+        arch_cfg (dict, optional): Config for the architecture of this
+            discriminator. Defaults to None.
+        pretrained (str | dict, optional): Path for the pretrained model or
+            dict containing information for pretained models whose necessary
+            key is 'ckpt_path'. Besides, you can also provide 'prefix' to load
+            the generator part from the whole state dict. Defaults to None.
+    """
+
+    def __init__(self,
+                 input_scale,
+                 num_classes=0,
+                 in_channels=3,
+                 out_channels=1,
+                 base_channels=96,
+                 block_depth=2,
+                 sn_eps=1e-6,
+                 sn_style='ajbrock',
+                 init_type='ortho',
+                 act_cfg=dict(type='ReLU', inplace=False),
+                 with_spectral_norm=True,
+                 blocks_cfg=dict(type='BigGANDeepDiscResBlock'),
+                 arch_cfg=None,
+                 pretrained=None):
+        super().__init__()
+        self.num_classes = num_classes
+        self.out_channels = out_channels
+        self.input_scale = input_scale
+        self.in_channels = in_channels
+        self.base_channels = base_channels
+        self.block_depth = block_depth
+        self.arch = arch_cfg if arch_cfg else self._get_default_arch_cfg(
+            self.input_scale, self.base_channels)
+        self.blocks_cfg = deepcopy(blocks_cfg)
+        self.blocks_cfg.update(
+            dict(
+                act_cfg=act_cfg,
+                sn_eps=sn_eps,
+                sn_style=sn_style,
+                with_spectral_norm=with_spectral_norm))
+
+        self.input_conv = SNConvModule(
+            3,
+            self.arch['in_channels'][0],
+            kernel_size=3,
+            padding=1,
+            with_spectral_norm=with_spectral_norm,
+            spectral_norm_cfg=dict(eps=sn_eps, sn_style=sn_style),
+            act_cfg=None)
+
+        self.conv_blocks = nn.ModuleList()
+        for index, out_ch in enumerate(self.arch['out_channels']):
+            for depth in range(self.block_depth):
+                # change args to adapt to current block
+                block_cfg_ = deepcopy(self.blocks_cfg)
+                block_cfg_.update(
+                    dict(
+                        in_channels=self.arch['in_channels'][index]
+                        if depth == 0 else out_ch,
+                        out_channels=out_ch,
+                        with_downsample=self.arch['downsample'][index]
+                        and depth == 0))
+                self.conv_blocks.append(build_module(block_cfg_))
+            if self.arch['attention'][index]:
+                self.conv_blocks.append(
+                    SelfAttentionBlock(
+                        out_ch,
+                        with_spectral_norm=with_spectral_norm,
+                        sn_eps=sn_eps,
+                        sn_style=sn_style))
+
+        self.activate = build_activation_layer(act_cfg)
+
+        self.decision = nn.Linear(self.arch['out_channels'][-1], out_channels)
+        if with_spectral_norm:
+            if sn_style == 'torch':
+                self.decision = spectral_norm(self.decision, eps=sn_eps)
+            elif sn_style == 'ajbrock':
+                self.decision = SNLinear(
+                    self.arch['out_channels'][-1], out_channels, eps=sn_eps)
+            else:
+                raise NotImplementedError(
+                    f'{sn_style} style SN is not supported yet')
+
+        if self.num_classes > 0:
+            self.proj_y = nn.Embedding(self.num_classes,
+                                       self.arch['out_channels'][-1])
+            if with_spectral_norm:
+                if sn_style == 'torch':
+                    self.proj_y = spectral_norm(self.proj_y, eps=sn_eps)
+                elif sn_style == 'ajbrock':
+                    self.proj_y = SNEmbedding(
+                        self.num_classes,
+                        self.arch['out_channels'][-1],
+                        eps=sn_eps)
+                else:
+                    raise NotImplementedError(
+                        f'{sn_style} style SN is not supported yet')
+
+        self.init_weights(pretrained=pretrained, init_type=init_type)
+
+    def _get_default_arch_cfg(self, input_scale, base_channels):
+        assert input_scale in [32, 64, 128, 256, 512]
+        _default_arch_cfgs = {
+            '32': {
+                'in_channels': [base_channels * item for item in [4, 4, 4]],
+                'out_channels': [base_channels * item for item in [4, 4, 4]],
+                'downsample': [True, True, False, False],
+                'resolution': [16, 8, 8, 8],
+                'attention': [False, False, False, False]
+            },
+            '64': {
+                'in_channels': [base_channels * item for item in [1, 2, 4, 8]],
+                'out_channels':
+                [base_channels * item for item in [2, 4, 8, 16]],
+                'downsample': [True] * 4 + [False],
+                'resolution': [32, 16, 8, 4, 4],
+                'attention': [False, False, False, False, False]
+            },
+            '128': {
+                'in_channels':
+                [base_channels * item for item in [1, 2, 4, 8, 16]],
+                'out_channels':
+                [base_channels * item for item in [2, 4, 8, 16, 16]],
+                'downsample': [True] * 5 + [False],
+                'resolution': [64, 32, 16, 8, 4, 4],
+                'attention': [True, False, False, False, False, False]
+            },
+            '256': {
+                'in_channels':
+                [base_channels * item for item in [1, 2, 4, 8, 8, 16]],
+                'out_channels':
+                [base_channels * item for item in [2, 4, 8, 8, 16, 16]],
+                'downsample': [True] * 6 + [False],
+                'resolution': [128, 64, 32, 16, 8, 4, 4],
+                'attention': [False, True, False, False, False, False]
+            },
+            '512': {
+                'in_channels':
+                [base_channels * item for item in [1, 1, 2, 4, 8, 8, 16]],
+                'out_channels':
+                [base_channels * item for item in [1, 2, 4, 8, 8, 16, 16]],
+                'downsample': [True] * 7 + [False],
+                'resolution': [256, 128, 64, 32, 16, 8, 4, 4],
+                'attention': [False, False, False, True, False, False, False]
+            }
+        }
+
+        return _default_arch_cfgs[str(input_scale)]
+
+    def forward(self, x, label=None):
+        """Forward function.
+
+        Args:
+            x (torch.Tensor): Fake or real image tensor.
+            label (torch.Tensor | None): Label Tensor. Defaults to None.
+
+        Returns:
+            torch.Tensor: Prediction for the reality of the input image with
+                given label.
+        """
+        x0 = self.input_conv(x)
+        for conv_block in self.conv_blocks:
+            x0 = conv_block(x0)
+        x0 = self.activate(x0)
+        x0 = torch.sum(x0, dim=[2, 3])
+        out = self.decision(x0)
+
+        if self.num_classes > 0:
+            w_y = self.proj_y(label)
+            out = out + torch.sum(w_y * x0, dim=1, keepdim=True)
+        return out
+
+    def init_weights(self, pretrained=None, init_type='ortho'):
+        """Init weights for models.
+
+        Args:
+            pretrained (str | dict, optional): Path for the pretrained model or
+                dict containing information for pretained models whose
+                necessary key is 'ckpt_path'. Besides, you can also provide
+                'prefix' to load the generator part from the whole state dict.
+                Defaults to None.
+            init_type (str, optional): The name of an initialization method:
+                ortho | N02 | xavier. Defaults to 'ortho'.
+        """
+
+        if isinstance(pretrained, str):
+            logger = get_root_logger()
+            load_checkpoint(self, pretrained, strict=False, logger=logger)
+        elif isinstance(pretrained, dict):
+            ckpt_path = pretrained.get('ckpt_path', None)
+            assert ckpt_path is not None
+            prefix = pretrained.get('prefix', '')
+            map_location = pretrained.get('map_location', 'cpu')
+            strict = pretrained.get('strict', True)
+            state_dict = _load_checkpoint_with_prefix(prefix, ckpt_path,
+                                                      map_location)
+            self.load_state_dict(state_dict, strict=strict)
+            mmcv.print_log(f'Load pretrained model from {ckpt_path}', 'mmgen')
+        elif pretrained is None:
+            for m in self.modules():
+                if isinstance(m, (nn.Conv2d, nn.Linear, nn.Embedding)):
+                    if init_type == 'ortho':
+                        nn.init.orthogonal_(m.weight)
+                    elif init_type == 'N02':
+                        normal_init(m, 0.0, 0.02)
+                    elif init_type == 'xavier':
+                        xavier_init(m)
+                    else:
+                        raise NotImplementedError(
+                            f'{init_type} initialization \
+                            not supported now.')
+        else:
+            raise TypeError('pretrained must be a str or None but'
+                            f' got {type(pretrained)} instead.')
--- a/build/lib/mmgen/models/architectures/biggan/modules.py
+++ b/build/lib/mmgen/models/architectures/biggan/modules.py
+# Copyright (c) OpenMMLab. All rights reserved.
+from copy import deepcopy
+
+import torch
+import torch.distributed as dist
+import torch.nn as nn
+import torch.nn.functional as F
+from mmcv.cnn import ConvModule
+from mmcv.cnn.bricks import build_activation_layer, build_upsample_layer
+from torch.nn import Parameter
+from torch.nn.modules.batchnorm import SyncBatchNorm
+from torch.nn.utils import spectral_norm
+
+from mmgen.models.builder import MODULES
+from .biggan_snmodule import SNConv2d, SNLinear
+
+
+class SNConvModule(ConvModule):
+    """Spectral Normalization ConvModule.
+
+    In this module, we inherit default ``mmcv.cnn.ConvModule`` and adopt
+    spectral normalization. The spectral normalization is proposed in:
+    Spectral Normalization for Generative Adversarial Networks.
+
+    Args:
+        with_spectral_norm (bool, optional): Whether to use Spectral
+            Normalization. Defaults to False.
+        spectral_norm_cfg (dict, optional): Config for Spectral Normalization.
+            Defaults to None.
+    """
+
+    def __init__(self,
+                 *args,
+                 with_spectral_norm=False,
+                 spectral_norm_cfg=None,
+                 **kwargs):
+        super().__init__(*args, with_spectral_norm=False, **kwargs)
+        self.with_spectral_norm = with_spectral_norm
+        self.spectral_norm_cfg = deepcopy(
+            spectral_norm_cfg) if spectral_norm_cfg else dict()
+
+        self.sn_eps = self.spectral_norm_cfg.get('eps', 1e-6)
+        self.sn_style = self.spectral_norm_cfg.get('sn_style', 'torch')
+
+        if self.with_spectral_norm:
+            if self.sn_style == 'torch':
+                self.conv = spectral_norm(self.conv, eps=self.sn_eps)
+            elif self.sn_style == 'ajbrock':
+                self.snconv_kwargs = deepcopy(kwargs) if kwargs else dict()
+                if 'act_cfg' in self.snconv_kwargs.keys():
+                    self.snconv_kwargs.pop('act_cfg')
+                if 'norm_cfg' in self.snconv_kwargs.keys():
+                    self.snconv_kwargs.pop('norm_cfg')
+                if 'order' in self.snconv_kwargs.keys():
+                    self.snconv_kwargs.pop('order')
+                self.conv = SNConv2d(
+                    *args, **self.snconv_kwargs, eps=self.sn_eps)
+            else:
+                raise NotImplementedError(
+                    f'{self.sn_style} style spectral Norm is not supported yet'
+                )
+
+
+@MODULES.register_module()
+class BigGANGenResBlock(nn.Module):
+    """Residual block used in BigGAN's generator.
+
+    Args:
+        in_channels (int): The channel number of the input feature map.
+        out_channels (int): The channel number of the output feature map.
+        dim_after_concat (int): The channel number of the noise concatenated
+            with the class vector.
+        act_cfg (dict, optional): Config for the activation layer. Defaults to
+            dict(type='ReLU').
+        upsample_cfg (dict, optional): Config for the upsampling operation.
+            Defaults to dict(type='nearest', scale_factor=2).
+        sn_eps (float, optional): Epsilon value for spectral normalization.
+            Defaults to 1e-6.
+        sn_style (str, optional): The style of spectral normalization.
+            If set to `ajbrock`, implementation by
+            ajbrock(https://github.com/ajbrock/BigGAN-PyTorch/blob/master/layers.py)
+            will be adopted.
+            If set to `torch`, implementation by `PyTorch` will be adopted.
+            Defaults to `ajbrock`.
+        with_spectral_norm (bool, optional): Whether to use spectral
+            normalization in this block. Defaults to True.
+        input_is_label (bool, optional): Whether the input of BNs' linear layer
+            is raw label instead of class vector. Defaults to False.
+        auto_sync_bn (bool, optional): Whether to use synchronized batch
+            normalization. Defaults to True.
+    """
+
+    def __init__(self,
+                 in_channels,
+                 out_channels,
+                 dim_after_concat,
+                 act_cfg=dict(type='ReLU'),
+                 upsample_cfg=dict(type='nearest', scale_factor=2),
+                 sn_eps=1e-6,
+                 sn_style='ajbrock',
+                 with_spectral_norm=True,
+                 input_is_label=False,
+                 auto_sync_bn=True):
+        super().__init__()
+        self.activation = build_activation_layer(act_cfg)
+        self.upsample_cfg = deepcopy(upsample_cfg)
+        self.with_upsample = upsample_cfg is not None
+        if self.with_upsample:
+            self.upsample_layer = build_upsample_layer(self.upsample_cfg)
+        self.learnable_sc = in_channels != out_channels or self.with_upsample
+        if self.learnable_sc:
+            self.shortcut = SNConvModule(
+                in_channels=in_channels,
+                out_channels=out_channels,
+                kernel_size=1,
+                stride=1,
+                padding=0,
+                act_cfg=None,
+                with_spectral_norm=with_spectral_norm,
+                spectral_norm_cfg=dict(eps=sn_eps, sn_style=sn_style))
+        # Here in_channels of BigGANGenResBlock equal to num_features of
+        # BigGANConditionBN
+        self.bn1 = BigGANConditionBN(
+            in_channels,
+            dim_after_concat,
+            sn_eps=sn_eps,
+            sn_style=sn_style,
+            input_is_label=input_is_label,
+            with_spectral_norm=with_spectral_norm,
+            auto_sync_bn=auto_sync_bn)
+        # Here out_channels of BigGANGenResBlock equal to num_features of
+        # BigGANConditionBN
+        self.bn2 = BigGANConditionBN(
+            out_channels,
+            dim_after_concat,
+            sn_eps=sn_eps,
+            sn_style=sn_style,
+            input_is_label=input_is_label,
+            with_spectral_norm=with_spectral_norm,
+            auto_sync_bn=auto_sync_bn)
+
+        self.conv1 = SNConvModule(
+            in_channels=in_channels,
+            out_channels=out_channels,
+            kernel_size=3,
+            stride=1,
+            padding=1,
+            act_cfg=None,
+            with_spectral_norm=with_spectral_norm,
+            spectral_norm_cfg=dict(eps=sn_eps, sn_style=sn_style))
+
+        self.conv2 = SNConvModule(
+            in_channels=out_channels,
+            out_channels=out_channels,
+            kernel_size=3,
+            stride=1,
+            padding=1,
+            act_cfg=None,
+            with_spectral_norm=with_spectral_norm,
+            spectral_norm_cfg=dict(eps=sn_eps, sn_style=sn_style))
+
+    def forward(self, x, y):
+        """Forward function.
+
+        Args:
+            x (torch.Tensor): Input feature map tensor.
+            y (torch.Tensor): Label tensor or class embedding concatenated with
+                noise tensor.
+
+        Returns:
+            torch.Tensor: Output feature map tensor.
+        """
+        x0 = self.bn1(x, y)
+        x0 = self.activation(x0)
+        if self.with_upsample:
+            x0 = self.upsample_layer(x0)
+            x = self.upsample_layer(x)
+        x0 = self.conv1(x0)
+        x0 = self.bn2(x0, y)
+        x0 = self.activation(x0)
+        x0 = self.conv2(x0)
+        if self.learnable_sc:
+            x = self.shortcut(x)
+        return x0 + x
+
+
+@MODULES.register_module()
+class BigGANConditionBN(nn.Module):
+    """Conditional Batch Normalization used in BigGAN.
+
+    Args:
+        num_features (int): The channel number of the input feature map tensor.
+        linear_input_channels (int): The channel number of the linear layers'
+            input tensor.
+        bn_eps (float, optional): Epsilon value for batch normalization.
+            Defaults to 1e-5.
+        sn_eps (float, optional): Epsilon value for spectral normalization.
+            Defaults to 1e-6.
+        sn_style (str, optional): The style of spectral normalization.
+            If set to `ajbrock`, implementation by
+            ajbrock(https://github.com/ajbrock/BigGAN-PyTorch/blob/master/layers.py)
+            will be adopted.
+            If set to `torch`, implementation by `PyTorch` will be adopted.
+            Defaults to `ajbrock`.
+        momentum (float, optional): The value used for the running_mean and
+            running_var computation. Defaults to 0.1.
+        input_is_label (bool, optional): Whether the input of BNs' linear layer
+            is raw label instead of class vector. Defaults to False.
+        with_spectral_norm (bool, optional): Whether to use spectral
+            normalization. Defaults to True.
+        auto_sync_bn (bool, optional): Whether to use synchronized batch
+            normalization. Defaults to True.
+    """
+
+    def __init__(self,
+                 num_features,
+                 linear_input_channels,
+                 bn_eps=1e-5,
+                 sn_eps=1e-6,
+                 sn_style='ajbrock',
+                 momentum=0.1,
+                 input_is_label=False,
+                 with_spectral_norm=True,
+                 auto_sync_bn=True):
+        super().__init__()
+        assert num_features > 0
+        if linear_input_channels > 0:
+            self.use_cbn = True
+        else:
+            self.use_cbn = False
+        # Prepare gain and bias layers
+        if self.use_cbn:
+            if not input_is_label:
+                self.gain = nn.Linear(
+                    linear_input_channels, num_features, bias=False)
+                self.bias = nn.Linear(
+                    linear_input_channels, num_features, bias=False)
+                # please pay attention if shared_embedding is False
+                if with_spectral_norm:
+                    if sn_style == 'torch':
+                        self.gain = spectral_norm(self.gain, eps=sn_eps)
+                        self.bias = spectral_norm(self.bias, eps=sn_eps)
+                    elif sn_style == 'ajbrock':
+                        self.gain = SNLinear(
+                            linear_input_channels,
+                            num_features,
+                            bias=False,
+                            eps=sn_eps)
+                        self.bias = SNLinear(
+                            linear_input_channels,
+                            num_features,
+                            bias=False,
+                            eps=sn_eps)
+                    else:
+                        raise NotImplementedError('sn style')
+            else:
+                self.gain = nn.Embedding(linear_input_channels, num_features)
+                self.bias = nn.Embedding(linear_input_channels, num_features)
+
+        self.bn = nn.BatchNorm2d(
+            num_features,
+            eps=bn_eps,
+            momentum=momentum,
+            affine=not self.use_cbn)
+
+        if auto_sync_bn and dist.is_initialized():
+            self.bn = SyncBatchNorm.convert_sync_batchnorm(self.bn)
+
+    def forward(self, x, y):
+        """Forward function.
+
+        Args:
+            x (torch.Tensor): Input feature map tensor.
+            y (torch.Tensor): Label tensor or class embedding concatenated with
+                noise tensor.
+
+        Returns:
+            torch.Tensor: Output feature map tensor.
+        """
+        if self.use_cbn:
+            # Calculate class-conditional gains and biases
+            gain = (1. + self.gain(y)).view(y.size(0), -1, 1, 1)
+            bias = self.bias(y).view(y.size(0), -1, 1, 1)
+            out = self.bn(x)
+            out = out * gain + bias
+        else:
+            out = self.bn(x)
+        return out
+
+
+@MODULES.register_module()
+class SelfAttentionBlock(nn.Module):
+    """Self-Attention block used in BigGAN.
+
+    Args:
+        in_channels (int): The channel number of the input feature map.
+        with_spectral_norm (bool, optional): Whether to use spectral
+            normalization. Defaults to True.
+        sn_eps (float, optional): Epsilon value for spectral normalization.
+            Defaults to 1e-6.
+        sn_style (str, optional): The style of spectral normalization.
+            If set to `ajbrock`, implementation by
+            ajbrock(https://github.com/ajbrock/BigGAN-PyTorch/blob/master/layers.py)
+            will be adopted.
+            If set to `torch`, implementation by `PyTorch` will be adopted.
+            Defaults to `ajbrock`.
+    """
+
+    def __init__(self,
+                 in_channels,
+                 with_spectral_norm=True,
+                 sn_eps=1e-6,
+                 sn_style='ajbrock'):
+        super(SelfAttentionBlock, self).__init__()
+
+        self.in_channels = in_channels
+        self.theta = SNConvModule(
+            self.in_channels,
+            self.in_channels // 8,
+            kernel_size=1,
+            padding=0,
+            bias=False,
+            act_cfg=None,
+            with_spectral_norm=with_spectral_norm,
+            spectral_norm_cfg=dict(eps=sn_eps, sn_style=sn_style))
+        self.phi = SNConvModule(
+            self.in_channels,
+            self.in_channels // 8,
+            kernel_size=1,
+            padding=0,
+            bias=False,
+            act_cfg=None,
+            with_spectral_norm=with_spectral_norm,
+            spectral_norm_cfg=dict(eps=sn_eps, sn_style=sn_style))
+        self.g = SNConvModule(
+            self.in_channels,
+            self.in_channels // 2,
+            kernel_size=1,
+            padding=0,
+            bias=False,
+            act_cfg=None,
+            with_spectral_norm=with_spectral_norm,
+            spectral_norm_cfg=dict(eps=sn_eps, sn_style=sn_style))
+        self.o = SNConvModule(
+            self.in_channels // 2,
+            self.in_channels,
+            kernel_size=1,
+            padding=0,
+            bias=False,
+            act_cfg=None,
+            with_spectral_norm=with_spectral_norm,
+            spectral_norm_cfg=dict(eps=sn_eps, sn_style=sn_style))
+        # Learnable gain parameter
+        self.gamma = Parameter(torch.tensor(0.), requires_grad=True)
+
+    def forward(self, x):
+        """Forward function.
+
+        Args:
+            x (torch.Tensor): Input feature map tensor.
+
+        Returns:
+            torch.Tensor: Output feature map tensor.
+        """
+        # Apply convs
+        theta = self.theta(x)
+        phi = F.max_pool2d(self.phi(x), [2, 2])
+        g = F.max_pool2d(self.g(x), [2, 2])
+        # Perform reshapes
+        theta = theta.view(-1, self.in_channels // 8, x.shape[2] * x.shape[3])
+        phi = phi.view(-1, self.in_channels // 8, x.shape[2] * x.shape[3] // 4)
+        g = g.view(-1, self.in_channels // 2, x.shape[2] * x.shape[3] // 4)
+        # Matmul and softmax to get attention maps
+        beta = F.softmax(torch.bmm(theta.transpose(1, 2), phi), -1)
+        # Attention map times g path
+        o = self.o(
+            torch.bmm(g, beta.transpose(1, 2)).view(-1, self.in_channels // 2,
+                                                    x.shape[2], x.shape[3]))
+        return self.gamma * o + x
+
+
+@MODULES.register_module()
+class BigGANDiscResBlock(nn.Module):
+    """Residual block used in BigGAN's discriminator.
+
+    Args:
+        in_channels (int): The channel number of the input tensor.
+        out_channels (int): The channel number of the output tensor.
+        act_cfg (dict, optional): Config for the activation layer. Defaults to
+            dict(type='ReLU', inplace=False).
+        sn_eps (float, optional): Epsilon value for spectral normalization.
+            Defaults to 1e-6.
+        sn_style (str, optional): The style of spectral normalization.
+            If set to `ajbrock`, implementation by
+            ajbrock(https://github.com/ajbrock/BigGAN-PyTorch/blob/master/layers.py)
+            will be adopted.
+            If set to `torch`, implementation by `PyTorch` will be adopted.
+            Defaults to `ajbrock`.
+        with_downsample (bool, optional): Whether to use downsampling in this
+            block. Defaults to True.
+        with_spectral_norm (bool, optional): Whether to use spectral
+            normalization. Defaults to True.
+        is_head_block (bool, optional): Whether this block is the first block
+            of BigGAN. Defaults to False.
+    """
+
+    def __init__(self,
+                 in_channels,
+                 out_channels,
+                 act_cfg=dict(type='ReLU', inplace=False),
+                 sn_eps=1e-6,
+                 sn_style='ajbrock',
+                 with_downsample=True,
+                 with_spectral_norm=True,
+                 is_head_block=False):
+        super().__init__()
+        self.activation = build_activation_layer(act_cfg)
+        self.with_downsample = with_downsample
+        self.is_head_block = is_head_block
+        if self.with_downsample:
+            self.downsample = nn.AvgPool2d(kernel_size=2, stride=2)
+        self.learnable_sc = in_channels != out_channels or self.with_downsample
+        if self.learnable_sc:
+            self.shortcut = SNConvModule(
+                in_channels=in_channels,
+                out_channels=out_channels,
+                kernel_size=1,
+                stride=1,
+                padding=0,
+                act_cfg=None,
+                with_spectral_norm=with_spectral_norm,
+                spectral_norm_cfg=dict(eps=sn_eps, sn_style=sn_style))
+
+        self.conv1 = SNConvModule(
+            in_channels=in_channels,
+            out_channels=out_channels,
+            kernel_size=3,
+            stride=1,
+            padding=1,
+            act_cfg=None,
+            with_spectral_norm=with_spectral_norm,
+            spectral_norm_cfg=dict(eps=sn_eps, sn_style=sn_style))
+
+        self.conv2 = SNConvModule(
+            in_channels=out_channels,
+            out_channels=out_channels,
+            kernel_size=3,
+            stride=1,
+            padding=1,
+            act_cfg=None,
+            with_spectral_norm=with_spectral_norm,
+            spectral_norm_cfg=dict(eps=sn_eps, sn_style=sn_style))
+
+    def forward_sc(self, x):
+        """Forward function of shortcut.
+
+        Args:
+            x (torch.Tensor): Input feature map tensor.
+
+        Returns:
+            torch.Tensor: Output tensor of shortcut.
+        """
+        if self.is_head_block:
+            if self.with_downsample:
+                x = self.downsample(x)
+            if self.learnable_sc:
+                x = self.shortcut(x)
+        else:
+            if self.learnable_sc:
+                x = self.shortcut(x)
+            if self.with_downsample:
+                x = self.downsample(x)
+        return x
+
+    def forward(self, x):
+        """Forward function.
+
+        Args:
+            x (torch.Tensor): Input feature map tensor.
+
+        Returns:
+            torch.Tensor: Output feature map tensor.
+        """
+        if self.is_head_block:
+            x0 = x
+        else:
+            x0 = self.activation(x)
+        x0 = self.conv1(x0)
+        x0 = self.activation(x0)
+        x0 = self.conv2(x0)
+        if self.with_downsample:
+            x0 = self.downsample(x0)
+        x1 = self.forward_sc(x)
+        return x0 + x1
+
+
+@MODULES.register_module()
+class BigGANDeepGenResBlock(nn.Module):
+    """Residual block used in BigGAN-Deep's generator.
+
+    Args:
+        in_channels (int): The channel number of the input feature map.
+        out_channels (int): The channel number of the output feature map.
+        dim_after_concat (int): The channel number of the noise concatenated
+            with the class vector.
+        act_cfg (dict, optional): Config for the activation layer. Defaults to
+            dict(type='ReLU').
+        upsample_cfg (dict, optional): Config for the upsampling operation.
+            Defaults to dict(type='nearest', scale_factor=2).
+        sn_eps (float, optional): Epsilon value for spectral normalization.
+            Defaults to 1e-6.
+        sn_style (str, optional): The style of spectral normalization.
+            If set to `ajbrock`, implementation by
+            ajbrock(https://github.com/ajbrock/BigGAN-PyTorch/blob/master/layers.py)
+            will be adopted.
+            If set to `torch`, implementation by `PyTorch` will be adopted.
+            Defaults to `ajbrock`.
+        bn_eps (float, optional): Epsilon value for batch normalization.
+            Defaults to 1e-5.
+        with_spectral_norm (bool, optional): Whether to use spectral
+            normalization in this block. Defaults to True.
+        input_is_label (bool, optional): Whether the input of BNs' linear layer
+            is raw label instead of class vector. Defaults to False.
+        auto_sync_bn (bool, optional): Whether to use synchronized batch
+            normalization. Defaults to True.
+        channel_ratio (int, optional): The ratio of the input channels' number
+            to the hidden channels' number. Defaults to 4.
+    """
+
+    def __init__(self,
+                 in_channels,
+                 out_channels,
+                 dim_after_concat,
+                 act_cfg=dict(type='ReLU'),
+                 upsample_cfg=dict(type='nearest', scale_factor=2),
+                 sn_eps=1e-6,
+                 sn_style='ajbrock',
+                 bn_eps=1e-5,
+                 with_spectral_norm=True,
+                 input_is_label=False,
+                 auto_sync_bn=True,
+                 channel_ratio=4):
+        super().__init__()
+        self.in_channels = in_channels
+        self.out_channels = out_channels
+        self.hidden_channels = self.in_channels // channel_ratio
+        self.activation = build_activation_layer(act_cfg)
+        self.upsample_cfg = deepcopy(upsample_cfg)
+        self.with_upsample = upsample_cfg is not None
+        if self.with_upsample:
+            self.upsample_layer = build_upsample_layer(self.upsample_cfg)
+        # Here in_channels of BigGANGenResBlock equal to num_features of
+        # BigGANConditionBN
+        self.bn1 = BigGANConditionBN(
+            in_channels,
+            dim_after_concat,
+            sn_eps=sn_eps,
+            sn_style=sn_style,
+            bn_eps=bn_eps,
+            input_is_label=input_is_label,
+            with_spectral_norm=with_spectral_norm,
+            auto_sync_bn=auto_sync_bn)
+        # Here out_channels of BigGANGenResBlock equal to num_features of
+        # BigGANConditionBN
+        self.bn2 = BigGANConditionBN(
+            self.hidden_channels,
+            dim_after_concat,
+            sn_eps=sn_eps,
+            sn_style=sn_style,
+            bn_eps=bn_eps,
+            input_is_label=input_is_label,
+            with_spectral_norm=with_spectral_norm,
+            auto_sync_bn=auto_sync_bn)
+
+        self.bn3 = BigGANConditionBN(
+            self.hidden_channels,
+            dim_after_concat,
+            sn_eps=sn_eps,
+            sn_style=sn_style,
+            bn_eps=bn_eps,
+            input_is_label=input_is_label,
+            with_spectral_norm=with_spectral_norm,
+            auto_sync_bn=auto_sync_bn)
+
+        self.bn4 = BigGANConditionBN(
+            self.hidden_channels,
+            dim_after_concat,
+            sn_eps=sn_eps,
+            sn_style=sn_style,
+            bn_eps=bn_eps,
+            input_is_label=input_is_label,
+            with_spectral_norm=with_spectral_norm,
+            auto_sync_bn=auto_sync_bn)
+
+        self.conv1 = SNConvModule(
+            in_channels=in_channels,
+            out_channels=self.hidden_channels,
+            kernel_size=1,
+            stride=1,
+            padding=0,
+            act_cfg=None,
+            with_spectral_norm=with_spectral_norm,
+            spectral_norm_cfg=dict(eps=sn_eps, sn_style=sn_style))
+
+        self.conv2 = SNConvModule(
+            in_channels=self.hidden_channels,
+            out_channels=self.hidden_channels,
+            kernel_size=3,
+            stride=1,
+            padding=1,
+            act_cfg=None,
+            with_spectral_norm=with_spectral_norm,
+            spectral_norm_cfg=dict(eps=sn_eps, sn_style=sn_style))
+
+        self.conv3 = SNConvModule(
+            in_channels=self.hidden_channels,
+            out_channels=self.hidden_channels,
+            kernel_size=3,
+            stride=1,
+            padding=1,
+            act_cfg=None,
+            with_spectral_norm=with_spectral_norm,
+            spectral_norm_cfg=dict(eps=sn_eps, sn_style=sn_style))
+
+        self.conv4 = SNConvModule(
+            in_channels=self.hidden_channels,
+            out_channels=out_channels,
+            kernel_size=1,
+            stride=1,
+            padding=0,
+            act_cfg=None,
+            with_spectral_norm=with_spectral_norm,
+            spectral_norm_cfg=dict(eps=sn_eps, sn_style=sn_style))
+
+    def forward(self, x, y):
+        """Forward function.
+
+        Args:
+            x (torch.Tensor): Input feature map tensor.
+            y (torch.Tensor): Label tensor or class embedding concatenated with
+                noise tensor.
+
+        Returns:
+            torch.Tensor: Output feature map tensor.
+        """
+        x0 = self.bn1(x, y)
+        x0 = self.activation(x0)
+        x0 = self.conv1(x0)
+
+        x0 = self.bn2(x0, y)
+        x0 = self.activation(x0)
+        # Drop channels in x  if necessary
+        if self.in_channels != self.out_channels:
+            x = x[:, :self.out_channels]
+        # unsample both h and x at this point
+        if self.with_upsample:
+            x0 = self.upsample_layer(x0)
+            x = self.upsample_layer(x)
+        x0 = self.conv2(x0)
+
+        x0 = self.bn3(x0, y)
+        x0 = self.activation(x0)
+        x0 = self.conv3(x0)
+
+        x0 = self.bn4(x0, y)
+        x0 = self.activation(x0)
+        x0 = self.conv4(x0)
+        return x0 + x
+
+
+@MODULES.register_module()
+class BigGANDeepDiscResBlock(nn.Module):
+    """Residual block used in BigGAN-Deep's discriminator.
+
+    Args:
+        in_channels (int): The channel number of the input tensor.
+        out_channels (int): The channel number of the output tensor.
+        channel_ratio (int, optional): The ratio of the input channels' number
+            to the hidden channels' number. Defaults to 4.
+        act_cfg (dict, optional): Config for the activation layer. Defaults to
+            dict(type='ReLU', inplace=False).
+        sn_eps (float, optional): Epsilon value for spectral normalization.
+            Defaults to 1e-6.
+        sn_style (str, optional): The style of spectral normalization.
+            If set to `ajbrock`, implementation by
+            ajbrock(https://github.com/ajbrock/BigGAN-PyTorch/blob/master/layers.py)
+            will be adopted.
+            If set to `torch`, implementation by `PyTorch` will be adopted.
+            Defaults to `ajbrock`.
+        with_downsample (bool, optional): Whether to use downsampling in this
+            block. Defaults to True.
+        with_spectral_norm (bool, optional): Whether to use spectral
+            normalization. Defaults to True.
+    """
+
+    def __init__(self,
+                 in_channels,
+                 out_channels,
+                 channel_ratio=4,
+                 act_cfg=dict(type='ReLU', inplace=False),
+                 sn_eps=1e-6,
+                 sn_style='ajbrock',
+                 with_downsample=True,
+                 with_spectral_norm=True):
+        super().__init__()
+        self.in_channels = in_channels
+        self.out_channels = out_channels
+        self.hidden_channels = self.out_channels // channel_ratio
+        self.activation = build_activation_layer(act_cfg)
+        self.with_downsample = with_downsample
+
+        if self.with_downsample:
+            self.downsample = nn.AvgPool2d(kernel_size=2, stride=2)
+
+        self.learnable_sc = (in_channels != out_channels)
+        if self.learnable_sc:
+            self.shortcut = SNConvModule(
+                in_channels=in_channels,
+                out_channels=out_channels - in_channels,
+                kernel_size=1,
+                stride=1,
+                padding=0,
+                act_cfg=None,
+                with_spectral_norm=with_spectral_norm,
+                spectral_norm_cfg=dict(eps=sn_eps, sn_style=sn_style))
+
+        self.conv1 = SNConvModule(
+            in_channels=in_channels,
+            out_channels=self.hidden_channels,
+            kernel_size=1,
+            stride=1,
+            padding=0,
+            act_cfg=act_cfg,
+            with_spectral_norm=with_spectral_norm,
+            spectral_norm_cfg=dict(eps=sn_eps, sn_style=sn_style),
+            order=('act', 'conv', 'norm'))
+
+        self.conv2 = SNConvModule(
+            in_channels=self.hidden_channels,
+            out_channels=self.hidden_channels,
+            kernel_size=3,
+            stride=1,
+            padding=1,
+            act_cfg=act_cfg,
+            with_spectral_norm=with_spectral_norm,
+            spectral_norm_cfg=dict(eps=sn_eps, sn_style=sn_style),
+            order=('act', 'conv', 'norm'))
+
+        self.conv3 = SNConvModule(
+            in_channels=self.hidden_channels,
+            out_channels=self.hidden_channels,
+            kernel_size=3,
+            stride=1,
+            padding=1,
+            act_cfg=act_cfg,
+            with_spectral_norm=with_spectral_norm,
+            spectral_norm_cfg=dict(eps=sn_eps, sn_style=sn_style),
+            order=('act', 'conv', 'norm'))
+
+        self.conv4 = SNConvModule(
+            in_channels=self.hidden_channels,
+            out_channels=out_channels,
+            kernel_size=1,
+            stride=1,
+            padding=0,
+            act_cfg=None,
+            with_spectral_norm=with_spectral_norm,
+            spectral_norm_cfg=dict(eps=sn_eps, sn_style=sn_style))
+
+    def forward_sc(self, x):
+        """Forward function of shortcut.
+
+        Args:
+            x (torch.Tensor): Input feature map tensor.
+
+        Returns:
+            torch.Tensor: Output tensor of shortcut.
+        """
+        if self.with_downsample:
+            x = self.downsample(x)
+        if self.learnable_sc:
+            x0 = self.shortcut(x)
+            x = torch.cat([x, x0], dim=1)
+        return x
+
+    def forward(self, x):
+        """Forward function.
+
+        Args:
+            x (torch.Tensor): Input feature map tensor.
+
+        Returns:
+            torch.Tensor: Output feature map tensor.
+        """
+
+        x0 = self.conv1(x)
+        x0 = self.conv2(x0)
+        x0 = self.conv3(x0)
+        x0 = self.activation(x0)
+        # downsample
+        if self.with_downsample:
+            x0 = self.downsample(x0)
+        x0 = self.conv4(x0)
+        x1 = self.forward_sc(x)
+        return x0 + x1
--- a/build/lib/mmgen/models/architectures/common.py
+++ b/build/lib/mmgen/models/architectures/common.py
+# Copyright (c) OpenMMLab. All rights reserved.
+import torch
+
+
+def get_module_device(module):
+    """Get the device of a module.
+
+    Args:
+        module (nn.Module): A module contains the parameters.
+
+    Returns:
+        torch.device: The device of the module.
+    """
+    try:
+        next(module.parameters())
+    except StopIteration:
+        raise ValueError('The input module should contain parameters.')
+
+    if next(module.parameters()).is_cuda:
+        return next(module.parameters()).get_device()
+
+    return torch.device('cpu')
--- a/build/lib/mmgen/models/architectures/cyclegan/__init__.py
+++ b/build/lib/mmgen/models/architectures/cyclegan/__init__.py
+# Copyright (c) OpenMMLab. All rights reserved.
+from .generator_discriminator import ResnetGenerator
+from .modules import ResidualBlockWithDropout
+
+__all__ = ['ResnetGenerator', 'ResidualBlockWithDropout']
--- a/build/lib/mmgen/models/architectures/cyclegan/generator_discriminator.py
+++ b/build/lib/mmgen/models/architectures/cyclegan/generator_discriminator.py
+# Copyright (c) OpenMMLab. All rights reserved.
+import torch.nn as nn
+from mmcv.cnn import ConvModule
+from mmcv.runner import load_checkpoint
+
+from mmgen.models.architectures.pix2pix import generation_init_weights
+from mmgen.models.builder import MODULES
+from mmgen.utils import get_root_logger
+from .modules import ResidualBlockWithDropout
+
+
+@MODULES.register_module()
+class ResnetGenerator(nn.Module):
+    """Construct a Resnet-based generator that consists of residual blocks
+    between a few downsampling/upsampling operations.
+
+    Args:
+        in_channels (int): Number of channels in input images.
+        out_channels (int): Number of channels in output images.
+        base_channels (int): Number of filters at the last conv layer.
+            Default: 64.
+        norm_cfg (dict): Config dict to build norm layer. Default:
+            `dict(type='IN')`.
+        use_dropout (bool): Whether to use dropout layers. Default: False.
+        num_blocks (int): Number of residual blocks. Default: 9.
+        padding_mode (str): The name of padding layer in conv layers:
+            'reflect' | 'replicate' | 'zeros'. Default: 'reflect'.
+        init_cfg (dict): Config dict for initialization.
+            `type`: The name of our initialization method. Default: 'normal'.
+            `gain`: Scaling factor for normal, xavier and orthogonal.
+            Default: 0.02.
+    """
+
+    def __init__(self,
+                 in_channels,
+                 out_channels,
+                 base_channels=64,
+                 norm_cfg=dict(type='IN'),
+                 use_dropout=False,
+                 num_blocks=9,
+                 padding_mode='reflect',
+                 init_cfg=dict(type='normal', gain=0.02)):
+        super().__init__()
+        assert num_blocks >= 0, ('Number of residual blocks must be '
+                                 f'non-negative, but got {num_blocks}.')
+        assert isinstance(norm_cfg, dict), ("'norm_cfg' should be dict, but"
+                                            f'got {type(norm_cfg)}')
+        assert 'type' in norm_cfg, "'norm_cfg' must have key 'type'"
+        # We use norm layers in the resnet generator.
+        # Only for IN, use bias to follow cyclegan's original implementation.
+        use_bias = norm_cfg['type'] == 'IN'
+
+        model = []
+        model += [
+            ConvModule(
+                in_channels=in_channels,
+                out_channels=base_channels,
+                kernel_size=7,
+                padding=3,
+                bias=use_bias,
+                norm_cfg=norm_cfg,
+                padding_mode=padding_mode)
+        ]
+
+        num_down = 2
+        # add downsampling layers
+        for i in range(num_down):
+            multiple = 2**i
+            model += [
+                ConvModule(
+                    in_channels=base_channels * multiple,
+                    out_channels=base_channels * multiple * 2,
+                    kernel_size=3,
+                    stride=2,
+                    padding=1,
+                    bias=use_bias,
+                    norm_cfg=norm_cfg)
+            ]
+
+        # add residual blocks
+        multiple = 2**num_down
+        for i in range(num_blocks):
+            model += [
+                ResidualBlockWithDropout(
+                    base_channels * multiple,
+                    padding_mode=padding_mode,
+                    norm_cfg=norm_cfg,
+                    use_dropout=use_dropout)
+            ]
+
+        # add upsampling layers
+        for i in range(num_down):
+            multiple = 2**(num_down - i)
+            model += [
+                ConvModule(
+                    in_channels=base_channels * multiple,
+                    out_channels=base_channels * multiple // 2,
+                    kernel_size=3,
+                    stride=2,
+                    padding=1,
+                    bias=use_bias,
+                    conv_cfg=dict(type='deconv', output_padding=1),
+                    norm_cfg=norm_cfg)
+            ]
+
+        model += [
+            ConvModule(
+                in_channels=base_channels,
+                out_channels=out_channels,
+                kernel_size=7,
+                padding=3,
+                bias=True,
+                norm_cfg=None,
+                act_cfg=dict(type='Tanh'),
+                padding_mode=padding_mode)
+        ]
+
+        self.model = nn.Sequential(*model)
+        self.init_type = 'normal' if init_cfg is None else init_cfg.get(
+            'type', 'normal')
+        self.init_gain = 0.02 if init_cfg is None else init_cfg.get(
+            'gain', 0.02)
+
+    def forward(self, x):
+        """Forward function.
+
+        Args:
+            x (Tensor): Input tensor with shape (n, c, h, w).
+
+        Returns:
+            Tensor: Forward results.
+        """
+        return self.model(x)
+
+    def init_weights(self, pretrained=None, strict=True):
+        """Initialize weights for the model.
+
+        Args:
+            pretrained (str, optional): Path for pretrained weights. If given
+                None, pretrained weights will not be loaded. Default: None.
+            strict (bool, optional): Whether to allow different params for the
+                model and checkpoint. Default: True.
+        """
+        if isinstance(pretrained, str):
+            logger = get_root_logger()
+            load_checkpoint(self, pretrained, strict=strict, logger=logger)
+        elif pretrained is None:
+            generation_init_weights(
+                self, init_type=self.init_type, init_gain=self.init_gain)
+        else:
+            raise TypeError("'pretrained' must be a str or None. "
+                            f'But received {type(pretrained)}.')
--- a/build/lib/mmgen/models/architectures/cyclegan/modules.py
+++ b/build/lib/mmgen/models/architectures/cyclegan/modules.py
+# Copyright (c) OpenMMLab. All rights reserved.
+import torch.nn as nn
+from mmcv.cnn import ConvModule
+
+
+class ResidualBlockWithDropout(nn.Module):
+    """Define a Residual Block with dropout layers.
+
+    Ref:
+    Deep Residual Learning for Image Recognition
+
+    A residual block is a conv block with skip connections. A dropout layer is
+    added between two common conv modules.
+
+    Args:
+        channels (int): Number of channels in the conv layer.
+        padding_mode (str): The name of padding layer:
+            'reflect' | 'replicate' | 'zeros'.
+        norm_cfg (dict): Config dict to build norm layer. Default:
+            `dict(type='IN')`.
+        use_dropout (bool): Whether to use dropout layers. Default: True.
+    """
+
+    def __init__(self,
+                 channels,
+                 padding_mode,
+                 norm_cfg=dict(type='BN'),
+                 use_dropout=True):
+        super().__init__()
+        assert isinstance(norm_cfg, dict), ("'norm_cfg' should be dict, but"
+                                            f'got {type(norm_cfg)}')
+        assert 'type' in norm_cfg, "'norm_cfg' must have key 'type'"
+        # We use norm layers in the residual block with dropout layers.
+        # Only for IN, use bias to follow cyclegan's original implementation.
+        use_bias = norm_cfg['type'] == 'IN'
+
+        block = [
+            ConvModule(
+                in_channels=channels,
+                out_channels=channels,
+                kernel_size=3,
+                padding=1,
+                bias=use_bias,
+                norm_cfg=norm_cfg,
+                padding_mode=padding_mode)
+        ]
+
+        if use_dropout:
+            block += [nn.Dropout(0.5)]
+
+        block += [
+            ConvModule(
+                in_channels=channels,
+                out_channels=channels,
+                kernel_size=3,
+                padding=1,
+                bias=use_bias,
+                norm_cfg=norm_cfg,
+                act_cfg=None,
+                padding_mode=padding_mode)
+        ]
+
+        self.block = nn.Sequential(*block)
+
+    def forward(self, x):
+        """Forward function. Add skip connections without final ReLU.
+
+        Args:
+            x (Tensor): Input tensor with shape (n, c, h, w).
+
+        Returns:
+            Tensor: Forward results.
+        """
+        out = x + self.block(x)
+        return out
--- a/build/lib/mmgen/models/architectures/dcgan/__init__.py
+++ b/build/lib/mmgen/models/architectures/dcgan/__init__.py
+# Copyright (c) OpenMMLab. All rights reserved.
+from .generator_discriminator import DCGANDiscriminator, DCGANGenerator
+
+__all__ = ['DCGANGenerator', 'DCGANDiscriminator']
--- a/build/lib/mmgen/models/architectures/dcgan/generator_discriminator.py
+++ b/build/lib/mmgen/models/architectures/dcgan/generator_discriminator.py
+# Copyright (c) OpenMMLab. All rights reserved.
+import numpy as np
+import torch
+import torch.nn as nn
+from mmcv.cnn import ConvModule, normal_init
+from mmcv.runner import load_checkpoint
+from mmcv.utils.parrots_wrapper import _BatchNorm
+
+from mmgen.models.builder import MODULES
+from mmgen.utils import get_root_logger
+from ..common import get_module_device
+
+
+@MODULES.register_module()
+class DCGANGenerator(nn.Module):
+    """Generator for DCGAN.
+
+    Implementation Details for DCGAN architecture:
+
+    #. Adopt transposed convolution in the generator;
+    #. Use batchnorm in the generator except for the final output layer;
+    #. Use ReLU in the generator in addition to the final output layer.
+
+    More details can be found in the original paper:
+    Unsupervised Representation Learning with Deep Convolutional
+    Generative Adversarial Networks
+    http://arxiv.org/abs/1511.06434
+
+    Args:
+        output_scale (int | tuple[int]): Output scale for the generated
+            image. If only a integer is provided, the output image will
+            be a square shape. The tuple of two integers will set the
+            height and width for the output image, respectively.
+        out_channels (int, optional): The channel number of the output feature.
+            Default to 3.
+        base_channels (int, optional): The basic channel number of the
+            generator. The other layers contains channels based on this number.
+            Default to 1024.
+        input_scale (int | tuple[int], optional): Output scale for the
+            generated image. If only a integer is provided, the input feature
+            ahead of the convolutional generator will be a square shape. The
+            tuple of two integers will set the height and width for the input
+            convolutional feature, respectively. Defaults to 4.
+        noise_size (int, optional): Size of the input noise
+            vector. Defaults to 100.
+        default_norm_cfg (dict, optional): Norm config for all of layers
+            except for the final output layer. Defaults to ``dict(type='BN')``.
+        default_act_cfg (dict, optional): Activation config for all of layers
+            except for the final output layer. Defaults to
+            ``dict(type='ReLU')``.
+        out_act_cfg (dict, optional): Activation config for the final output
+            layer. Defaults to ``dict(type='Tanh')``.
+        pretrained (str, optional): Path for the pretrained model. Default to
+            ``None``.
+    """
+
+    def __init__(self,
+                 output_scale,
+                 out_channels=3,
+                 base_channels=1024,
+                 input_scale=4,
+                 noise_size=100,
+                 default_norm_cfg=dict(type='BN'),
+                 default_act_cfg=dict(type='ReLU'),
+                 out_act_cfg=dict(type='Tanh'),
+                 pretrained=None):
+        super().__init__()
+        self.output_scale = output_scale
+        self.base_channels = base_channels
+        self.input_scale = input_scale
+        self.noise_size = noise_size
+
+        # the number of times for upsampling
+        self.num_upsamples = int(np.log2(output_scale // input_scale))
+
+        # output 4x4 feature map
+        self.noise2feat = ConvModule(
+            noise_size,
+            base_channels,
+            kernel_size=4,
+            stride=1,
+            padding=0,
+            conv_cfg=dict(type='ConvTranspose2d'),
+            norm_cfg=default_norm_cfg,
+            act_cfg=default_act_cfg)
+
+        # build up upsampling backbone (excluding the output layer)
+        upsampling = []
+        curr_channel = base_channels
+        for _ in range(self.num_upsamples - 1):
+            upsampling.append(
+                ConvModule(
+                    curr_channel,
+                    curr_channel // 2,
+                    kernel_size=4,
+                    stride=2,
+                    padding=1,
+                    conv_cfg=dict(type='ConvTranspose2d'),
+                    norm_cfg=default_norm_cfg,
+                    act_cfg=default_act_cfg))
+
+            curr_channel //= 2
+
+        self.upsampling = nn.Sequential(*upsampling)
+
+        # output layer
+        self.output_layer = ConvModule(
+            curr_channel,
+            out_channels,
+            kernel_size=4,
+            stride=2,
+            padding=1,
+            conv_cfg=dict(type='ConvTranspose2d'),
+            norm_cfg=None,
+            act_cfg=out_act_cfg)
+
+        self.init_weights(pretrained=pretrained)
+
+    def forward(self, noise, num_batches=0, return_noise=False):
+        """Forward function.
+
+        Args:
+            noise (torch.Tensor | callable | None): You can directly give a
+                batch of noise through a ``torch.Tensor`` or offer a callable
+                function to sample a batch of noise data. Otherwise, the
+                ``None`` indicates to use the default noise sampler.
+            num_batches (int, optional): The number of batch size.
+                Defaults to 0.
+            return_noise (bool, optional): If True, ``noise_batch`` will be
+                returned in a dict with ``fake_img``. Defaults to False.
+
+        Returns:
+            torch.Tensor | dict: If not ``return_noise``, only the output image
+                will be returned. Otherwise, a dict contains ``fake_img`` and
+                ``noise_batch`` will be returned.
+        """
+        # receive noise and conduct sanity check.
+        if isinstance(noise, torch.Tensor):
+            assert noise.shape[1] == self.noise_size
+            if noise.ndim == 2:
+                noise_batch = noise[:, :, None, None]
+            elif noise.ndim == 4:
+                noise_batch = noise
+            else:
+                raise ValueError('The noise should be in shape of (n, c) or '
+                                 f'(n, c, 1, 1), but got {noise.shape}')
+        # receive a noise generator and sample noise.
+        elif callable(noise):
+            noise_generator = noise
+            assert num_batches > 0
+            noise_batch = noise_generator((num_batches, self.noise_size, 1, 1))
+        # otherwise, we will adopt default noise sampler.
+        else:
+            assert num_batches > 0
+            noise_batch = torch.randn((num_batches, self.noise_size, 1, 1))
+
+        # dirty code for putting data on the right device
+        noise_batch = noise_batch.to(get_module_device(self))
+
+        x = self.noise2feat(noise_batch)
+        x = self.upsampling(x)
+        x = self.output_layer(x)
+
+        if return_noise:
+            return dict(fake_img=x, noise_batch=noise_batch)
+
+        return x
+
+    def init_weights(self, pretrained=None):
+        """Init weights for models.
+
+        We just use the initialization method proposed in the original paper.
+
+        Args:
+            pretrained (str, optional): Path for pretrained weights. If given
+                None, pretrained weights will not be loaded. Defaults to None.
+        """
+        if isinstance(pretrained, str):
+            logger = get_root_logger()
+            load_checkpoint(self, pretrained, strict=False, logger=logger)
+        elif pretrained is None:
+            for m in self.modules():
+                if isinstance(m, (nn.Conv2d, nn.ConvTranspose2d)):
+                    normal_init(m, 0, 0.02)
+                elif isinstance(m, _BatchNorm):
+                    nn.init.normal_(m.weight.data)
+                    nn.init.constant_(m.bias.data, 0)
+        else:
+            raise TypeError('pretrained must be a str or None but'
+                            f' got {type(pretrained)} instead.')
+
+
+@MODULES.register_module()
+class DCGANDiscriminator(nn.Module):
+    """Discriminator for DCGAN.
+
+    Implementation Details for DCGAN architecture:
+
+    #. Adopt convolution in the discriminator;
+    #. Use batchnorm in the discriminator except for the input and final \
+       output layer;
+    #. Use LeakyReLU in the discriminator in addition to the output layer.
+
+    Args:
+        input_scale (int): The scale of the input image.
+        output_scale (int): The final scale of the convolutional feature.
+        out_channels (int): The channel number of the final output layer.
+        in_channels (int, optional): The channel number of the input image.
+            Defaults to 3.
+        base_channels (int, optional): The basic channel number of the
+            generator. The other layers contains channels based on this number.
+            Defaults to 128.
+        default_norm_cfg (dict, optional): Norm config for all of layers
+            except for the final output layer. Defaults to ``dict(type='BN')``.
+        default_act_cfg (dict, optional): Activation config for all of layers
+            except for the final output layer. Defaults to
+            ``dict(type='ReLU')``.
+        out_act_cfg (dict, optional): Activation config for the final output
+            layer. Defaults to ``dict(type='Tanh')``.
+        pretrained (str, optional): Path for the pretrained model. Default to
+            ``None``.
+    """
+
+    def __init__(self,
+                 input_scale,
+                 output_scale,
+                 out_channels,
+                 in_channels=3,
+                 base_channels=128,
+                 default_norm_cfg=dict(type='BN'),
+                 default_act_cfg=dict(type='LeakyReLU'),
+                 out_act_cfg=None,
+                 pretrained=None):
+        super().__init__()
+        self.input_scale = input_scale
+        self.output_scale = output_scale
+        self.out_channels = out_channels
+        self.base_channels = base_channels
+
+        # the number of times for downsampling
+        self.num_downsamples = int(np.log2(input_scale // output_scale))
+
+        # build up downsampling backbone (excluding the output layer)
+        downsamples = []
+        for i in range(self.num_downsamples):
+            # remove norm for the first conv
+            norm_cfg_ = None if i == 0 else default_norm_cfg
+            in_ch = in_channels if i == 0 else base_channels * 2**(i - 1)
+
+            downsamples.append(
+                ConvModule(
+                    in_ch,
+                    base_channels * 2**i,
+                    kernel_size=4,
+                    stride=2,
+                    padding=1,
+                    conv_cfg=dict(type='Conv2d'),
+                    norm_cfg=norm_cfg_,
+                    act_cfg=default_act_cfg))
+            curr_channels = base_channels * 2**i
+
+        self.downsamples = nn.Sequential(*downsamples)
+
+        # define output layer
+        self.output_layer = ConvModule(
+            curr_channels,
+            out_channels,
+            kernel_size=4,
+            stride=1,
+            padding=0,
+            conv_cfg=dict(type='Conv2d'),
+            norm_cfg=None,
+            act_cfg=out_act_cfg)
+
+        self.init_weights(pretrained=pretrained)
+
+    def forward(self, x):
+        """Forward function.
+
+        Args:
+            x (torch.Tensor): Fake or real image tensor.
+
+        Returns:
+            torch.Tensor: Prediction for the reality of the input image.
+        """
+
+        n = x.shape[0]
+        x = self.downsamples(x)
+        x = self.output_layer(x)
+
+        # reshape to a flatten feature
+        return x.view(n, -1)
+
+    def init_weights(self, pretrained=None):
+        """Init weights for models.
+
+        We just use the initialization method proposed in the original paper.
+
+        Args:
+            pretrained (str, optional): Path for pretrained weights. If given
+                None, pretrained weights will not be loaded. Defaults to None.
+        """
+        if isinstance(pretrained, str):
+            logger = get_root_logger()
+            load_checkpoint(self, pretrained, strict=False, logger=logger)
+        elif pretrained is None:
+            for m in self.modules():
+                if isinstance(m, (nn.Conv2d, nn.ConvTranspose2d)):
+                    normal_init(m, 0, 0.02)
+                elif isinstance(m, _BatchNorm):
+                    nn.init.normal_(m.weight.data)
+                    nn.init.constant_(m.bias.data, 0)
+        else:
+            raise TypeError('pretrained must be a str or None but'
+                            f' got {type(pretrained)} instead.')
--- a/build/lib/mmgen/models/architectures/ddpm/__init__.py
+++ b/build/lib/mmgen/models/architectures/ddpm/__init__.py
+# Copyright (c) OpenMMLab. All rights reserved.
+from .denoising import DenoisingUnet
+from .modules import (DenoisingDownsample, DenoisingResBlock,
+                      DenoisingUpsample, TimeEmbedding)
+
+__all__ = [
+    'DenoisingUnet', 'TimeEmbedding', 'DenoisingDownsample',
+    'DenoisingUpsample', 'DenoisingResBlock'
+]
--- a/build/lib/mmgen/models/architectures/ddpm/denoising.py
+++ b/build/lib/mmgen/models/architectures/ddpm/denoising.py
+# Copyright (c) OpenMMLab. All rights reserved.
+from copy import deepcopy
+
+import torch
+import torch.nn as nn
+from mmcv.cnn import constant_init
+from mmcv.cnn.bricks.conv_module import ConvModule
+from mmcv.runner import load_checkpoint
+
+from mmgen.models.builder import MODULES, build_module
+from mmgen.utils import get_root_logger
+from .modules import EmbedSequential, TimeEmbedding
+
+
+@MODULES.register_module()
+class DenoisingUnet(nn.Module):
+    """Denoising Unet. This network receives a diffused image ``x_t`` and
+    current timestep ``t``, and returns a ``output_dict`` corresponding to the
+    passed ``output_cfg``.
+
+    ``output_cfg`` defines the number of channels and the meaning of the
+    output. ``output_cfg`` mainly contains keys of ``mean`` and ``var``,
+    denoting how the network outputs mean and variance required for the
+    denoising process.
+    For ``mean``:
+    1. ``dict(mean='EPS')``: Model will predict noise added in the
+        diffusion process, and the ``output_dict`` will contain a key named
+        ``eps_t_pred``.
+    2. ``dict(mean='START_X')``: Model will direct predict the mean of the
+        original image `x_0`, and the ``output_dict`` will contain a key named
+        ``x_0_pred``.
+    3. ``dict(mean='X_TM1_PRED')``: Model will predict the mean of diffused
+        image at `t-1` timestep, and the ``output_dict`` will contain a key
+        named ``x_tm1_pred``.
+
+    For ``var``:
+    1. ``dict(var='FIXED_SMALL')`` or ``dict(var='FIXED_LARGE')``: Variance in
+        the denoising process is regarded as a fixed value. Therefore only
+        'mean' will be predicted, and the output channels will equal to the
+        input image (e.g., three channels for RGB image.)
+    2. ``dict(var='LEARNED')``: Model will predict `log_variance` in the
+        denoising process, and the ``output_dict`` will contain a key named
+        ``log_var``.
+    3. ``dict(var='LEARNED_RANGE')``: Model will predict an interpolation
+        factor and the `log_variance` will be calculated as
+        `factor * upper_bound + (1-factor) * lower_bound`. The ``output_dict``
+        will contain a key named ``factor``.
+
+    If ``var`` is not ``FIXED_SMALL`` or ``FIXED_LARGE``, the number of output
+    channels will be the double of input channels, where the first half part
+    contains predicted mean values and the other part is the predicted
+    variance values. Otherwise, the number of output channels equals to the
+    input channels, only containing the predicted mean values.
+
+    Args:
+        image_size (int | list[int]): The size of image to denoise.
+        in_channels (int, optional): The input channels of the input image.
+            Defaults as ``3``.
+        base_channels (int, optional): The basic channel number of the
+            generator. The other layers contain channels based on this number.
+            Defaults to ``128``.
+        resblocks_per_downsample (int, optional): Number of ResBlock used
+            between two downsample operations. The number of ResBlock between
+            upsample operations will be the same value to keep symmetry.
+            Defaults to 3.
+        num_timesteps (int, optional): The total timestep of the denoising
+            process and the diffusion process. Defaults to ``1000``.
+        use_rescale_timesteps (bool, optional): Whether rescale the input
+            timesteps in range of [0, 1000].  Defaults to ``True``.
+        dropout (float, optional): The probability of dropout operation of
+            each ResBlock. Pass ``0`` to do not use dropout. Defaults as 0.
+        embedding_channels (int, optional): The output channels of time
+            embedding layer and label embedding layer. If not passed (or
+            passed ``-1``), output channels of the embedding layers will set
+            as four times of ``base_channels``. Defaults to ``-1``.
+        num_classes (int, optional): The number of conditional classes. If set
+            to 0, this model will be degraded to an unconditional model.
+            Defaults to 0.
+        channels_cfg (list | dict[list], optional): Config for input channels
+            of the intermedia blocks. If list is passed, each element of the
+            list indicates the scale factor for the input channels of the
+            current block with regard to the ``base_channels``. For block
+            ``i``, the input and output channels should be
+            ``channels_cfg[i] * base_channels`` and
+            ``channels_cfg[i+1] * base_channels`` If dict is provided, the key
+            of the dict should be the output scale and corresponding value
+            should be a list to define channels. Default: Please refer to
+            ``_defualt_channels_cfg``.
+        output_cfg (dict, optional): Config for output variables. Defaults to
+            ``dict(mean='eps', var='learned_range')``.
+        norm_cfg (dict, optional): The config for normalization layers.
+            Defaults to ``dict(type='GN', num_groups=32)``.
+        act_cfg (dict, optional): The config for activation layers. Defaults
+            to ``dict(type='SiLU', inplace=False)``.
+        shortcut_kernel_size (int, optional): The kernel size for shortcut
+            conv in ResBlocks. The value of this argument will overwrite the
+            default value of `resblock_cfg`. Defaults to `3`.
+        use_scale_shift_norm (bool, optional): Whether perform scale and shift
+            after normalization operation. Defaults to True.
+        num_heads (int, optional): The number of attention heads. Defaults to
+            4.
+        time_embedding_mode (str, optional): Embedding method of
+            ``time_embedding``. Defaults to 'sin'.
+        time_embedding_cfg (dict, optional): Config for ``time_embedding``.
+            Defaults to None.
+        resblock_cfg (dict, optional): Config for ResBlock. Defaults to
+            ``dict(type='DenoisingResBlock')``.
+        attention_cfg (dict, optional): Config for attention operation.
+            Defaults to ``dict(type='MultiHeadAttention')``.
+        upsample_conv (bool, optional): Whether use conv in upsample block.
+            Defaults to ``True``.
+        downsample_conv (bool, optional): Whether use conv operation in
+            downsample block.  Defaults to ``True``.
+        upsample_cfg (dict, optional): Config for upsample blocks.
+            Defaults to ``dict(type='DenoisingUpsample')``.
+        downsample_cfg (dict, optional): Config for downsample blocks.
+            Defaults to ``dict(type='DenoisingDownsample')``.
+        attention_res (int | list[int], optional): Resolution of feature maps
+            to apply attention operation. Defaults to ``[16, 8]``.
+        pretrained (str | dict, optional): Path for the pretrained model or
+            dict containing information for pretained models whose necessary
+            key is 'ckpt_path'. Besides, you can also provide 'prefix' to load
+            the generator part from the whole state dict.  Defaults to None.
+    """
+
+    _default_channels_cfg = {
+        256: [1, 1, 2, 2, 4, 4],
+        64: [1, 2, 3, 4],
+        32: [1, 2, 2, 2]
+    }
+
+    def __init__(self,
+                 image_size,
+                 in_channels=3,
+                 base_channels=128,
+                 resblocks_per_downsample=3,
+                 num_timesteps=1000,
+                 use_rescale_timesteps=True,
+                 dropout=0,
+                 embedding_channels=-1,
+                 num_classes=0,
+                 channels_cfg=None,
+                 output_cfg=dict(mean='eps', var='learned_range'),
+                 norm_cfg=dict(type='GN', num_groups=32),
+                 act_cfg=dict(type='SiLU', inplace=False),
+                 shortcut_kernel_size=1,
+                 use_scale_shift_norm=False,
+                 num_heads=4,
+                 time_embedding_mode='sin',
+                 time_embedding_cfg=None,
+                 resblock_cfg=dict(type='DenoisingResBlock'),
+                 attention_cfg=dict(type='MultiHeadAttention'),
+                 downsample_conv=True,
+                 upsample_conv=True,
+                 downsample_cfg=dict(type='DenoisingDownsample'),
+                 upsample_cfg=dict(type='DenoisingUpsample'),
+                 attention_res=[16, 8],
+                 pretrained=None):
+
+        super().__init__()
+
+        self.num_classes = num_classes
+        self.num_timesteps = num_timesteps
+        self.use_rescale_timesteps = use_rescale_timesteps
+
+        self.output_cfg = deepcopy(output_cfg)
+        self.mean_mode = self.output_cfg.get('mean', 'eps')
+        self.var_mode = self.output_cfg.get('var', 'learned_range')
+
+        # double output_channels to output mean and var at same time
+        out_channels = in_channels if 'FIXED' in self.var_mode.upper() \
+            else 2 * in_channels
+        self.out_channels = out_channels
+
+        # check type of image_size
+        if not isinstance(image_size, int) and not isinstance(
+                image_size, list):
+            raise TypeError(
+                'Only support `int` and `list[int]` for `image_size`.')
+        if isinstance(image_size, list):
+            assert len(
+                image_size) == 2, 'The length of `image_size` should be 2.'
+            assert image_size[0] == image_size[
+                1], 'Width and height of the image should be same.'
+            image_size = image_size[0]
+        self.image_size = image_size
+
+        channels_cfg = deepcopy(self._default_channels_cfg) \
+            if channels_cfg is None else deepcopy(channels_cfg)
+        if isinstance(channels_cfg, dict):
+            if image_size not in channels_cfg:
+                raise KeyError(f'`image_size={image_size} is not found in '
+                               '`channels_cfg`, only support configs for '
+                               f'{[chn for chn in channels_cfg.keys()]}')
+            self.channel_factor_list = channels_cfg[image_size]
+        elif isinstance(channels_cfg, list):
+            self.channel_factor_list = channels_cfg
+        else:
+            raise ValueError('Only support list or dict for `channels_cfg`, '
+                             f'receive {type(channels_cfg)}')
+
+        embedding_channels = base_channels * 4 \
+            if embedding_channels == -1 else embedding_channels
+        self.time_embedding = TimeEmbedding(
+            base_channels,
+            embedding_channels=embedding_channels,
+            embedding_mode=time_embedding_mode,
+            embedding_cfg=time_embedding_cfg,
+            act_cfg=act_cfg)
+
+        if self.num_classes != 0:
+            self.label_embedding = nn.Embedding(self.num_classes,
+                                                embedding_channels)
+
+        self.resblock_cfg = deepcopy(resblock_cfg)
+        self.resblock_cfg.setdefault('dropout', dropout)
+        self.resblock_cfg.setdefault('norm_cfg', norm_cfg)
+        self.resblock_cfg.setdefault('act_cfg', act_cfg)
+        self.resblock_cfg.setdefault('embedding_channels', embedding_channels)
+        self.resblock_cfg.setdefault('use_scale_shift_norm',
+                                     use_scale_shift_norm)
+        self.resblock_cfg.setdefault('shortcut_kernel_size',
+                                     shortcut_kernel_size)
+
+        # get scales of ResBlock to apply attention
+        attention_scale = [image_size // int(res) for res in attention_res]
+        self.attention_cfg = deepcopy(attention_cfg)
+        self.attention_cfg.setdefault('num_heads', num_heads)
+        self.attention_cfg.setdefault('norm_cfg', norm_cfg)
+
+        self.downsample_cfg = deepcopy(downsample_cfg)
+        self.downsample_cfg.setdefault('with_conv', downsample_conv)
+        self.upsample_cfg = deepcopy(upsample_cfg)
+        self.upsample_cfg.setdefault('with_conv', upsample_conv)
+
+        # init the channel scale factor
+        scale = 1
+        self.in_blocks = nn.ModuleList([
+            EmbedSequential(
+                nn.Conv2d(in_channels, base_channels, 3, 1, padding=1))
+        ])
+        self.in_channels_list = [base_channels]
+
+        # construct the encoder part of Unet
+        for level, factor in enumerate(self.channel_factor_list):
+            in_channels_ = base_channels if level == 0 \
+                else base_channels * self.channel_factor_list[level - 1]
+            out_channels_ = base_channels * factor
+
+            for _ in range(resblocks_per_downsample):
+                layers = [
+                    build_module(self.resblock_cfg, {
+                        'in_channels': in_channels_,
+                        'out_channels': out_channels_
+                    })
+                ]
+                in_channels_ = out_channels_
+
+                if scale in attention_scale:
+                    layers.append(
+                        build_module(self.attention_cfg,
+                                     {'in_channels': in_channels_}))
+
+                self.in_channels_list.append(in_channels_)
+                self.in_blocks.append(EmbedSequential(*layers))
+
+            if level != len(self.channel_factor_list) - 1:
+                self.in_blocks.append(
+                    EmbedSequential(
+                        build_module(self.downsample_cfg,
+                                     {'in_channels': in_channels_})))
+                self.in_channels_list.append(in_channels_)
+                scale *= 2
+
+        # construct the bottom part of Unet
+        self.mid_blocks = EmbedSequential(
+            build_module(self.resblock_cfg, {'in_channels': in_channels_}),
+            build_module(self.attention_cfg, {'in_channels': in_channels_}),
+            build_module(self.resblock_cfg, {'in_channels': in_channels_}),
+        )
+
+        # construct the decoder part of Unet
+        in_channels_list = deepcopy(self.in_channels_list)
+        self.out_blocks = nn.ModuleList()
+        for level, factor in enumerate(self.channel_factor_list[::-1]):
+            for idx in range(resblocks_per_downsample + 1):
+                layers = [
+                    build_module(
+                        self.resblock_cfg, {
+                            'in_channels':
+                            in_channels_ + in_channels_list.pop(),
+                            'out_channels': base_channels * factor
+                        })
+                ]
+                in_channels_ = base_channels * factor
+                if scale in attention_scale:
+                    layers.append(
+                        build_module(self.attention_cfg,
+                                     {'in_channels': in_channels_}))
+                if (level != len(self.channel_factor_list) - 1
+                        and idx == resblocks_per_downsample):
+                    layers.append(
+                        build_module(self.upsample_cfg,
+                                     {'in_channels': in_channels_}))
+                    scale //= 2
+                self.out_blocks.append(EmbedSequential(*layers))
+
+        self.out = ConvModule(
+            in_channels=in_channels_,
+            out_channels=out_channels,
+            kernel_size=3,
+            padding=1,
+            act_cfg=act_cfg,
+            norm_cfg=norm_cfg,
+            bias=True,
+            order=('norm', 'act', 'conv'))
+
+        self.init_weights(pretrained)
+
+    def forward(self, x_t, t, label=None, return_noise=False):
+        """Forward function.
+        Args:
+            x_t (torch.Tensor): Diffused image at timestep `t` to denoise.
+            t (torch.Tensor): Current timestep.
+            label (torch.Tensor | callable | None): You can directly give a
+                batch of label through a ``torch.Tensor`` or offer a callable
+                function to sample a batch of label data. Otherwise, the
+                ``None`` indicates to use the default label sampler.
+            return_noise (bool, optional): If True, inputted ``x_t`` and ``t``
+                will be returned in a dict with output desired by
+                ``output_cfg``. Defaults to False.
+
+        Returns:
+            torch.Tensor | dict: If not ``return_noise``
+        """
+
+        if self.use_rescale_timesteps:
+            t = t.float() * (1000.0 / self.num_timesteps)
+        embedding = self.time_embedding(t)
+
+        if label is not None:
+            assert hasattr(self, 'label_embedding')
+            embedding = self.label_embedding(label) + embedding
+
+        h, hs = x_t, []
+        # forward downsample blocks
+        for block in self.in_blocks:
+            h = block(h, embedding)
+            hs.append(h)
+
+        # forward middle blocks
+        h = self.mid_blocks(h, embedding)
+
+        # forward upsample blocks
+        for block in self.out_blocks:
+            h = block(torch.cat([h, hs.pop()], dim=1), embedding)
+        outputs = self.out(h)
+
+        output_dict = dict()
+        if 'FIXED' not in self.var_mode.upper():
+            # split mean and learned from output
+            mean, var = outputs.split(self.out_channels // 2, dim=1)
+            if self.var_mode.upper() == 'LEARNED_RANGE':
+                # rescale [-1, 1] to [0, 1]
+                output_dict['factor'] = (var + 1) / 2
+            elif self.var_mode.upper() == 'LEARNED':
+                output_dict['logvar'] = var
+            else:
+                raise AttributeError(
+                    'Only support \'FIXED\', \'LEARNED_RANGE\' '
+                    'and \'LEARNED\' for variance output format. But receive '
+                    f'\'{self.var_mode}\'.')
+        else:
+            mean = outputs
+
+        if self.mean_mode.upper() == 'EPS':
+            output_dict['eps_t_pred'] = mean
+        elif self.mean_mode.upper() == 'START_X':
+            output_dict['x_0_pred'] = mean
+        elif self.mean_mode.upper() == 'PREVIOUS_X':
+            output_dict['x_tm1_pred'] = mean
+        else:
+            raise AttributeError(
+                'Only support \'EPS\', \'START_X\' and \'PREVIOUS_X\' for '
+                f'mean output format. But receive \'{self.mean_mode}\'.')
+
+        if return_noise:
+            output_dict['x_t'] = x_t
+            output_dict['t_rescaled'] = t
+            if self.num_classes > 0:
+                output_dict['label'] = label
+
+        return output_dict
+
+    def init_weights(self, pretrained=None):
+        """Init weights for models.
+
+        We just use the initialization method proposed in the original paper.
+
+        Args:
+            pretrained (str, optional): Path for pretrained weights. If given
+                None, pretrained weights will not be loaded. Defaults to None.
+        """
+        if isinstance(pretrained, str):
+            logger = get_root_logger()
+            load_checkpoint(self, pretrained, strict=False, logger=logger)
+        elif pretrained is None:
+            # As Improved-DDPM, we apply zero-initialization to
+            #   second conv block in ResBlock (keywords: conv_2)
+            #   the output layer of the Unet (keywords: 'out' but
+            #     not 'out_blocks')
+            #   projection layer in Attention layer (keywords: proj)
+            for n, m in self.named_modules():
+                if isinstance(m, nn.Conv2d) and ('conv_2' in n or
+                                                 ('out' in n
+                                                  and 'out_blocks' not in n)):
+                    constant_init(m, 0)
+                if isinstance(m, nn.Conv1d) and 'proj' in n:
+                    constant_init(m, 0)
+        else:
+            raise TypeError('pretrained must be a str or None but'
+                            f' got {type(pretrained)} instead.')
--- a/build/lib/mmgen/models/architectures/ddpm/modules.py
+++ b/build/lib/mmgen/models/architectures/ddpm/modules.py
+# Copyright (c) OpenMMLab. All rights reserved.
+from copy import deepcopy
+from functools import partial
+
+import mmcv
+import numpy as np
+import torch
+import torch.nn as nn
+import torch.nn.functional as F
+from mmcv.cnn import ACTIVATION_LAYERS
+from mmcv.cnn.bricks import build_activation_layer, build_norm_layer
+from mmcv.cnn.utils import constant_init
+from mmcv.utils import digit_version
+
+from mmgen.models.builder import MODULES, build_module
+
+
+class EmbedSequential(nn.Sequential):
+    """A sequential module that passes timestep embeddings to the children that
+    support it as an extra input.
+
+    Modified from
+    https://github.com/openai/improved-diffusion/blob/main/improved_diffusion/unet.py#L35
+    """
+
+    def forward(self, x, y):
+        for layer in self:
+            if isinstance(layer, DenoisingResBlock):
+                x = layer(x, y)
+            else:
+                x = layer(x)
+        return x
+
+
+if 'SiLU' not in ACTIVATION_LAYERS:
+
+    @ACTIVATION_LAYERS.register_module()
+    class SiLU(nn.Module):
+        r"""Applies the Sigmoid Linear Unit (SiLU) function, element-wise.
+        The SiLU function is also known as the swish function.
+        Args:
+            input (bool, optional): Use inplace operation or not.
+                Defaults to `False`.
+        """
+
+        def __init__(self, inplace=False):
+            super().__init__()
+            if digit_version(
+                    torch.__version__) < digit_version('1.7.0') and inplace:
+                mmcv.print_log('Inplace version of \'SiLU\' is not supported '
+                               'for torch < 1.7.0, found '
+                               f'\'{torch.version}\'.')
+            self.inplace = inplace
+
+        def forward(self, x):
+            """Forward function for SiLU.
+            Args:
+                x (torch.Tensor): Input tensor.
+
+            Returns:
+                torch.Tensor: Tensor after activation.
+            """
+
+            if digit_version(torch.__version__) < digit_version('1.7.0'):
+                return x * torch.sigmoid(x)
+
+            return F.silu(x, inplace=self.inplace)
+
+
+@MODULES.register_module()
+class MultiHeadAttention(nn.Module):
+    """An attention block allows spatial position to attend to each other.
+
+    Originally ported from here, but adapted to the N-d case.
+    https://github.com/hojonathanho/diffusion/blob/1e0dceb3b3495bbe19116a5e1b3596cd0706c543/diffusion_tf/models/unet.py#L66.  # noqa
+
+    Args:
+        in_channels (int): Channels of the input feature map.
+        num_heads (int, optional): Number of heads in the attention.
+        norm_cfg (dict, optional): Config for normalization layer. Default
+            to ``dict(type='GN', num_groups=32)``
+    """
+
+    def __init__(self,
+                 in_channels,
+                 num_heads=1,
+                 norm_cfg=dict(type='GN', num_groups=32)):
+        super().__init__()
+        self.num_heads = num_heads
+        _, self.norm = build_norm_layer(norm_cfg, in_channels)
+        self.qkv = nn.Conv1d(in_channels, in_channels * 3, 1)
+        self.proj = nn.Conv1d(in_channels, in_channels, 1)
+        self.init_weights()
+
+    @staticmethod
+    def QKVAttention(qkv):
+        channel = qkv.shape[1] // 3
+        q, k, v = torch.chunk(qkv, 3, dim=1)
+        scale = 1 / np.sqrt(np.sqrt(channel))
+        weight = torch.einsum('bct,bcs->bts', q * scale, k * scale)
+        weight = torch.softmax(weight.float(), dim=-1).type(weight.dtype)
+        weight = torch.einsum('bts,bcs->bct', weight, v)
+        return weight
+
+    def forward(self, x):
+        """Forward function for multi head attention.
+        Args:
+            x (torch.Tensor): Input feature map.
+
+        Returns:
+            torch.Tensor: Feature map after attention.
+        """
+        b, c, *spatial = x.shape
+        x = x.reshape(b, c, -1)
+        qkv = self.qkv(self.norm(x))
+        qkv = qkv.reshape(b * self.num_heads, -1, qkv.shape[2])
+        h = self.QKVAttention(qkv)
+        h = h.reshape(b, -1, h.shape[-1])
+        h = self.proj(h)
+        return (h + x).reshape(b, c, *spatial)
+
+    def init_weights(self):
+        constant_init(self.proj, 0)
+
+
+@MODULES.register_module()
+class TimeEmbedding(nn.Module):
+    """Time embedding layer, reference to Two level embedding. First embedding
+    time by an embedding function, then feed to neural networks.
+
+    Args:
+        in_channels (int): The channel number of the input feature map.
+        embedding_channels (int): The channel number of the output embedding.
+        embedding_mode (str, optional): Embedding mode for the time embedding.
+            Defaults to 'sin'.
+        embedding_cfg (dict, optional): Config for time embedding.
+            Defaults to None.
+        act_cfg (dict, optional): Config for activation layer. Defaults to
+            ``dict(type='SiLU', inplace=False)``.
+    """
+
+    def __init__(self,
+                 in_channels,
+                 embedding_channels,
+                 embedding_mode='sin',
+                 embedding_cfg=None,
+                 act_cfg=dict(type='SiLU', inplace=False)):
+        super().__init__()
+        self.blocks = nn.Sequential(
+            nn.Linear(in_channels, embedding_channels),
+            build_activation_layer(act_cfg),
+            nn.Linear(embedding_channels, embedding_channels))
+
+        # add `dim` to embedding config
+        embedding_cfg_ = dict(dim=in_channels)
+        if embedding_cfg is not None:
+            embedding_cfg_.update(embedding_cfg)
+        if embedding_mode.upper() == 'SIN':
+            self.embedding_fn = partial(self.sinusodial_embedding,
+                                        **embedding_cfg_)
+        else:
+            raise ValueError('Only support `SIN` for time embedding, '
+                             f'but receive {embedding_mode}.')
+
+    @staticmethod
+    def sinusodial_embedding(timesteps, dim, max_period=10000):
+        """Create sinusoidal timestep embeddings.
+
+        Args:
+            timesteps (torch.Tensor): Timestep to embedding. 1-D tensor shape
+                as ``[bz, ]``,  one per batch element.
+            dim (int): The dimension of the embedding.
+            max_period (int, optional): Controls the minimum frequency of the
+                embeddings. Defaults to ``10000``.
+
+        Returns:
+            torch.Tensor: Embedding results shape as `[bz, dim]`.
+        """
+
+        half = dim // 2
+        freqs = torch.exp(
+            -np.log(max_period) *
+            torch.arange(start=0, end=half, dtype=torch.float32) /
+            half).to(device=timesteps.device)
+        args = timesteps[:, None].float() * freqs[None]
+        embedding = torch.cat([torch.cos(args), torch.sin(args)], dim=-1)
+        if dim % 2:
+            embedding = torch.cat(
+                [embedding, torch.zeros_like(embedding[:, :1])], dim=-1)
+        return embedding
+
+    def forward(self, t):
+        """Forward function for time embedding layer.
+        Args:
+            t (torch.Tensor): Input timesteps.
+
+        Returns:
+            torch.Tensor: Timesteps embedding.
+
+        """
+        return self.blocks(self.embedding_fn(t))
+
+
+@MODULES.register_module()
+class DenoisingResBlock(nn.Module):
+    """Resblock for the denoising network. If `in_channels` not equals to
+    `out_channels`, a learnable shortcut with conv layers will be added.
+
+    Args:
+        in_channels (int): Number of channels of the input feature map.
+        embedding_channels (int): Number of channels of the input embedding.
+        use_scale_shift_norm (bool): Whether use scale-shift-norm in
+            `NormWithEmbedding` layer.
+        dropout (float): Probability of the dropout layers.
+        out_channels (int, optional): Number of output channels of the
+            ResBlock. If not defined, the output channels will equal to the
+            `in_channels`. Defaults to `None`.
+        norm_cfg (dict, optional): The config for the normalization layers.
+            Defaults too ``dict(type='GN', num_groups=32)``.
+        act_cfg (dict, optional): The config for the activation layers.
+            Defaults to ``dict(type='SiLU', inplace=False)``.
+        shortcut_kernel_size (int, optional): The kernel size for the shortcut
+            conv. Defaults to ``1``.
+    """
+
+    def __init__(self,
+                 in_channels,
+                 embedding_channels,
+                 use_scale_shift_norm,
+                 dropout,
+                 out_channels=None,
+                 norm_cfg=dict(type='GN', num_groups=32),
+                 act_cfg=dict(type='SiLU', inplace=False),
+                 shortcut_kernel_size=1):
+        super().__init__()
+        out_channels = in_channels if out_channels is None else out_channels
+
+        _norm_cfg = deepcopy(norm_cfg)
+
+        _, norm_1 = build_norm_layer(_norm_cfg, in_channels)
+        conv_1 = [
+            norm_1,
+            build_activation_layer(act_cfg),
+            nn.Conv2d(in_channels, out_channels, 3, padding=1)
+        ]
+        self.conv_1 = nn.Sequential(*conv_1)
+
+        norm_with_embedding_cfg = dict(
+            in_channels=out_channels,
+            embedding_channels=embedding_channels,
+            use_scale_shift=use_scale_shift_norm,
+            norm_cfg=_norm_cfg)
+        self.norm_with_embedding = build_module(
+            dict(type='NormWithEmbedding'),
+            default_args=norm_with_embedding_cfg)
+
+        conv_2 = [
+            build_activation_layer(act_cfg),
+            nn.Dropout(dropout),
+            nn.Conv2d(out_channels, out_channels, 3, padding=1)
+        ]
+        self.conv_2 = nn.Sequential(*conv_2)
+
+        assert shortcut_kernel_size in [
+            1, 3
+        ], ('Only support `1` and `3` for `shortcut_kernel_size`, but '
+            f'receive {shortcut_kernel_size}.')
+
+        self.learnable_shortcut = out_channels != in_channels
+
+        if self.learnable_shortcut:
+            shortcut_padding = 1 if shortcut_kernel_size == 3 else 0
+            self.shortcut = nn.Conv2d(
+                in_channels,
+                out_channels,
+                shortcut_kernel_size,
+                padding=shortcut_padding)
+        self.init_weights()
+
+    def forward_shortcut(self, x):
+        if self.learnable_shortcut:
+            return self.shortcut(x)
+        return x
+
+    def forward(self, x, y):
+        """Forward function.
+
+        Args:
+            x (torch.Tensor): Input feature map tensor.
+            y (torch.Tensor): Shared time embedding or shared label embedding.
+
+        Returns:
+            torch.Tensor : Output feature map tensor.
+        """
+        shortcut = self.forward_shortcut(x)
+        x = self.conv_1(x)
+        x = self.norm_with_embedding(x, y)
+        x = self.conv_2(x)
+        return x + shortcut
+
+    def init_weights(self):
+        # apply zero init to last conv layer
+        constant_init(self.conv_2[-1], 0)
+
+
+@MODULES.register_module()
+class NormWithEmbedding(nn.Module):
+    """Nornalization with embedding layer. If `use_scale_shift == True`,
+    embedding results will be chunked and used to re-shift and re-scale
+    normalization results. Otherwise, embedding results will directly add to
+    input of normalization layer.
+
+    Args:
+        in_channels (int): Number of channels of the input feature map.
+        embedding_channels (int) Number of channels of the input embedding.
+        norm_cfg (dict, optional): Config for the normalization operation.
+            Defaults to `dict(type='GN', num_groups=32)`.
+        act_cfg (dict, optional): Config for the activation layer. Defaults
+            to `dict(type='SiLU', inplace=False)`.
+        use_scale_shift (bool): If True, the output of Embedding layer will be
+            split to 'scale' and 'shift' and map the output of normalization
+            layer to ``out * (1 + scale) + shift``. Otherwise, the output of
+            Embedding layer will be added with the input before normalization
+            operation. Defaults to True.
+    """
+
+    def __init__(self,
+                 in_channels,
+                 embedding_channels,
+                 norm_cfg=dict(type='GN', num_groups=32),
+                 act_cfg=dict(type='SiLU', inplace=False),
+                 use_scale_shift=True):
+        super().__init__()
+        self.use_scale_shift = use_scale_shift
+        _, self.norm = build_norm_layer(norm_cfg, in_channels)
+
+        embedding_output = in_channels * 2 if use_scale_shift else in_channels
+        self.embedding_layer = nn.Sequential(
+            build_activation_layer(act_cfg),
+            nn.Linear(embedding_channels, embedding_output))
+
+    def forward(self, x, y):
+        """Forward function.
+
+        Args:
+            x (torch.Tensor): Input feature map tensor.
+            y (torch.Tensor): Shared time embedding or shared label embedding.
+
+        Returns:
+            torch.Tensor : Output feature map tensor.
+        """
+        embedding = self.embedding_layer(y)[:, :, None, None]
+        if self.use_scale_shift:
+            scale, shift = torch.chunk(embedding, 2, dim=1)
+            x = self.norm(x)
+            x = x * (1 + scale) + shift
+        else:
+            x = self.norm(x + embedding)
+        return x
+
+
+@MODULES.register_module()
+class DenoisingDownsample(nn.Module):
+    """Downsampling operation used in the denoising network. Support average
+    pooling and convolution for downsample operation.
+
+    Args:
+        in_channels (int): Number of channels of the input feature map to be
+            downsampled.
+        with_conv (bool, optional): Whether use convolution operation for
+            downsampling.  Defaults to `True`.
+    """
+
+    def __init__(self, in_channels, with_conv=True):
+        super().__init__()
+        if with_conv:
+            self.downsample = nn.Conv2d(in_channels, in_channels, 3, 2, 1)
+        else:
+            self.downsample = nn.AvgPool2d(stride=2)
+
+    def forward(self, x):
+        """Forward function for downsampling operation.
+        Args:
+            x (torch.Tensor): Feature map to downsample.
+
+        Returns:
+            torch.Tensor: Feature map after downsampling.
+        """
+        return self.downsample(x)
+
+
+@MODULES.register_module()
+class DenoisingUpsample(nn.Module):
+    """Upsampling operation used in the denoising network. Allows users to
+    apply an additional convolution layer after the nearest interpolation
+    operation.
+
+    Args:
+        in_channels (int): Number of channels of the input feature map to be
+            downsampled.
+        with_conv (bool, optional): Whether apply an additional convolution
+            layer after upsampling.  Defaults to `True`.
+    """
+
+    def __init__(self, in_channels, with_conv=True):
+        super().__init__()
+        if with_conv:
+            self.with_conv = True
+            self.conv = nn.Conv2d(in_channels, in_channels, 3, 1, 1)
+
+    def forward(self, x):
+        """Forward function for upsampling operation.
+        Args:
+            x (torch.Tensor): Feature map to upsample.
+
+        Returns:
+            torch.Tensor: Feature map after upsampling.
+        """
+        x = F.interpolate(x, scale_factor=2, mode='nearest')
+        if self.with_conv:
+            x = self.conv(x)
+        return x
--- a/build/lib/mmgen/models/architectures/fid_inception.py
+++ b/build/lib/mmgen/models/architectures/fid_inception.py
+# Copyright (c) OpenMMLab. All rights reserved.
+"""Inception networks used in calculating FID and Inception metrics.
+
+This code is modified from:
+https://github.com/rosinality/stylegan2-pytorch/blob/master/inception.py
+"""
+
+import torch
+import torch.nn as nn
+import torch.nn.functional as F
+from torch.utils.model_zoo import load_url
+from torchvision import models
+
+# Inception weights ported to PyTorch from
+# https://download.tensorflow.org/models/image/imagenet/inception-2015-12-05.tgz
+FID_WEIGHTS_URL = 'https://github.com/mseitzer/pytorch-fid/releases/download/fid_weights/pt_inception-2015-12-05-6726825d.pth'  # noqa: E501
+
+
+class InceptionV3(nn.Module):
+    """Pretrained InceptionV3 network returning feature maps."""
+
+    # Index of default block of inception to return,
+    # corresponds to output of final average pooling
+    DEFAULT_BLOCK_INDEX = 3
+
+    # Maps feature dimensionality to their output blocks indices
+    BLOCK_INDEX_BY_DIM = {
+        64: 0,  # First max pooling features
+        192: 1,  # Second max pooling features
+        768: 2,  # Pre-aux classifier features
+        2048: 3  # Final average pooling features
+    }
+
+    def __init__(self,
+                 output_blocks=[DEFAULT_BLOCK_INDEX],
+                 resize_input=True,
+                 normalize_input=True,
+                 requires_grad=False,
+                 use_fid_inception=True,
+                 load_fid_inception=True):
+        """Build pretrained InceptionV3.
+
+        Args:
+            output_blocks (list[int]): Indices of blocks to return features of.
+                Possible values are:
+                    - 0: corresponds to output of first max pooling
+                    - 1: corresponds to output of second max pooling
+                    - 2: corresponds to output which is fed to aux classifier
+                    - 3: corresponds to output of final average pooling
+            resize_input (bool): If true, bilinearly resizes input to width and
+                height 299 before feeding input to model. As the network
+                without fully connected layers is fully convolutional, it
+                should be able to handle inputs of arbitrary size, so resizing
+                might not be strictly needed.
+            normalize_input (bool): If true, scales the input from range (0, 1)
+                to the range the pretrained Inception network expects, namely
+                (-1, 1).
+            requires_grad (bool): If true, parameters of the model require
+                gradients. Possibly useful for finetuning the network.
+            use_fid_inception (bool): If true, uses the pretrained Inception
+                model used in Tensorflow's FID implementation. If false, uses
+                the pretrained Inception model available in torchvision. The
+                FID Inception model has different weights and a slightly
+                different structure from torchvision's Inception model. If you
+                want to compute FID scores, you are strongly advised to set
+                this parameter to true to get comparable results.
+        """
+        super().__init__()
+
+        self.resize_input = resize_input
+        self.normalize_input = normalize_input
+        self.output_blocks = sorted(output_blocks)
+        self.last_needed_block = max(output_blocks)
+
+        assert self.last_needed_block <= 3, \
+            'Last possible output block index is 3'
+
+        self.blocks = nn.ModuleList()
+
+        if use_fid_inception:
+            inception = fid_inception_v3(load_fid_inception)
+        else:
+            inception = models.inception_v3(pretrained=True)
+
+        # Block 0: input to maxpool1
+        block0 = [
+            inception.Conv2d_1a_3x3, inception.Conv2d_2a_3x3,
+            inception.Conv2d_2b_3x3,
+            nn.MaxPool2d(kernel_size=3, stride=2)
+        ]
+        self.blocks.append(nn.Sequential(*block0))
+
+        # Block 1: maxpool1 to maxpool2
+        if self.last_needed_block >= 1:
+            block1 = [
+                inception.Conv2d_3b_1x1, inception.Conv2d_4a_3x3,
+                nn.MaxPool2d(kernel_size=3, stride=2)
+            ]
+            self.blocks.append(nn.Sequential(*block1))
+
+        # Block 2: maxpool2 to aux classifier
+        if self.last_needed_block >= 2:
+            block2 = [
+                inception.Mixed_5b,
+                inception.Mixed_5c,
+                inception.Mixed_5d,
+                inception.Mixed_6a,
+                inception.Mixed_6b,
+                inception.Mixed_6c,
+                inception.Mixed_6d,
+                inception.Mixed_6e,
+            ]
+            self.blocks.append(nn.Sequential(*block2))
+
+        # Block 3: aux classifier to final avgpool
+        if self.last_needed_block >= 3:
+            block3 = [
+                inception.Mixed_7a, inception.Mixed_7b, inception.Mixed_7c,
+                nn.AdaptiveAvgPool2d(output_size=(1, 1))
+            ]
+            self.blocks.append(nn.Sequential(*block3))
+
+        for param in self.parameters():
+            param.requires_grad = requires_grad
+
+    def forward(self, inp):
+        """Get Inception feature maps.
+
+        Args:
+            inp (torch.Tensor): Input tensor of shape Bx3xHxW.
+                Values are expected to be in range (0, 1)
+
+        Returns:
+            list(torch.Tensor): Corresponding to the selected output \
+                block, sorted ascending by index.
+        """
+        outp = []
+        x = inp
+
+        if self.resize_input:
+            x = F.interpolate(
+                x, size=(299, 299), mode='bilinear', align_corners=False)
+
+        if self.normalize_input:
+            x = 2 * x - 1  # Scale from range (0, 1) to range (-1, 1)
+
+        for idx, block in enumerate(self.blocks):
+            x = block(x)
+            if idx in self.output_blocks:
+                outp.append(x)
+
+            if idx == self.last_needed_block:
+                break
+
+        return outp
+
+
+def fid_inception_v3(load_ckpt=True):
+    """Build pretrained Inception model for FID computation.
+
+    The Inception model for FID computation uses a different set of weights
+    and has a slightly different structure than torchvision's Inception.
+
+    This method first constructs torchvision's Inception and then patches the
+    necessary parts that are different in the FID Inception model.
+    """
+    inception = models.inception_v3(
+        num_classes=1008, aux_logits=False, pretrained=False)
+    inception.Mixed_5b = FIDInceptionA(192, pool_features=32)
+    inception.Mixed_5c = FIDInceptionA(256, pool_features=64)
+    inception.Mixed_5d = FIDInceptionA(288, pool_features=64)
+    inception.Mixed_6b = FIDInceptionC(768, channels_7x7=128)
+    inception.Mixed_6c = FIDInceptionC(768, channels_7x7=160)
+    inception.Mixed_6d = FIDInceptionC(768, channels_7x7=160)
+    inception.Mixed_6e = FIDInceptionC(768, channels_7x7=192)
+    inception.Mixed_7b = FIDInceptionE_1(1280)
+    inception.Mixed_7c = FIDInceptionE_2(2048)
+
+    if load_ckpt:
+        state_dict = load_url(FID_WEIGHTS_URL, progress=True)
+        inception.load_state_dict(state_dict)
+
+    return inception
+
+
+class FIDInceptionA(models.inception.InceptionA):
+    """InceptionA block patched for FID computation."""
+
+    def __init__(self, in_channels, pool_features):
+        super().__init__(in_channels, pool_features)
+
+    def forward(self, x):
+        """Get InceptionA feature maps.
+
+        Args:
+            x (torch.Tensor): Input tensor of shape BxCxHxW.
+
+        Returns:
+            torch.Tensor: Feature Maps of x outputted by this block.
+        """
+        branch1x1 = self.branch1x1(x)
+
+        branch5x5 = self.branch5x5_1(x)
+        branch5x5 = self.branch5x5_2(branch5x5)
+
+        branch3x3dbl = self.branch3x3dbl_1(x)
+        branch3x3dbl = self.branch3x3dbl_2(branch3x3dbl)
+        branch3x3dbl = self.branch3x3dbl_3(branch3x3dbl)
+
+        # Patch: Tensorflow's average pool does not use the padded zero's in
+        # its average calculation
+        branch_pool = F.avg_pool2d(
+            x, kernel_size=3, stride=1, padding=1, count_include_pad=False)
+        branch_pool = self.branch_pool(branch_pool)
+
+        outputs = [branch1x1, branch5x5, branch3x3dbl, branch_pool]
+        return torch.cat(outputs, 1)
+
+
+class FIDInceptionC(models.inception.InceptionC):
+    """InceptionC block patched for FID computation."""
+
+    def __init__(self, in_channels, channels_7x7):
+        super().__init__(in_channels, channels_7x7)
+
+    def forward(self, x):
+        """Get InceptionC feature maps.
+
+        Args:
+            x (torch.Tensor): Input tensor of shape BxCxHxW.
+
+        Returns:
+            torch.Tensor: Feature Maps of x outputted by this block.
+        """
+        branch1x1 = self.branch1x1(x)
+
+        branch7x7 = self.branch7x7_1(x)
+        branch7x7 = self.branch7x7_2(branch7x7)
+        branch7x7 = self.branch7x7_3(branch7x7)
+
+        branch7x7dbl = self.branch7x7dbl_1(x)
+        branch7x7dbl = self.branch7x7dbl_2(branch7x7dbl)
+        branch7x7dbl = self.branch7x7dbl_3(branch7x7dbl)
+        branch7x7dbl = self.branch7x7dbl_4(branch7x7dbl)
+        branch7x7dbl = self.branch7x7dbl_5(branch7x7dbl)
+
+        # Patch: Tensorflow's average pool does not use the padded zero's in
+        # its average calculation
+        branch_pool = F.avg_pool2d(
+            x, kernel_size=3, stride=1, padding=1, count_include_pad=False)
+        branch_pool = self.branch_pool(branch_pool)
+
+        outputs = [branch1x1, branch7x7, branch7x7dbl, branch_pool]
+        return torch.cat(outputs, 1)
+
+
+class FIDInceptionE_1(models.inception.InceptionE):
+    """First InceptionE block patched for FID computation."""
+
+    def __init__(self, in_channels):
+        super().__init__(in_channels)
+
+    def forward(self, x):
+        """Get first InceptionE feature maps.
+
+        Args:
+            x (torch.Tensor): Input tensor of shape BxCxHxW.
+
+        Returns:
+            torch.Tensor: Feature Maps of x outputted by this block.
+        """
+        branch1x1 = self.branch1x1(x)
+
+        branch3x3 = self.branch3x3_1(x)
+        branch3x3 = [
+            self.branch3x3_2a(branch3x3),
+            self.branch3x3_2b(branch3x3),
+        ]
+        branch3x3 = torch.cat(branch3x3, 1)
+
+        branch3x3dbl = self.branch3x3dbl_1(x)
+        branch3x3dbl = self.branch3x3dbl_2(branch3x3dbl)
+        branch3x3dbl = [
+            self.branch3x3dbl_3a(branch3x3dbl),
+            self.branch3x3dbl_3b(branch3x3dbl),
+        ]
+        branch3x3dbl = torch.cat(branch3x3dbl, 1)
+
+        # Patch: Tensorflow's average pool does not use the padded zero's in
+        # its average calculation
+        branch_pool = F.avg_pool2d(
+            x, kernel_size=3, stride=1, padding=1, count_include_pad=False)
+        branch_pool = self.branch_pool(branch_pool)
+
+        outputs = [branch1x1, branch3x3, branch3x3dbl, branch_pool]
+        return torch.cat(outputs, 1)
+
+
+class FIDInceptionE_2(models.inception.InceptionE):
+    """Second InceptionE block patched for FID computation."""
+
+    def __init__(self, in_channels):
+        super().__init__(in_channels)
+
+    def forward(self, x):
+        """Get second InceptionE feature maps.
+
+        Args:
+            x (torch.Tensor): Input tensor of shape BxCxHxW.
+
+        Returns:
+            torch.Tensor: Feature Maps of x outputted by this block.
+        """
+        branch1x1 = self.branch1x1(x)
+
+        branch3x3 = self.branch3x3_1(x)
+        branch3x3 = [
+            self.branch3x3_2a(branch3x3),
+            self.branch3x3_2b(branch3x3),
+        ]
+        branch3x3 = torch.cat(branch3x3, 1)
+
+        branch3x3dbl = self.branch3x3dbl_1(x)
+        branch3x3dbl = self.branch3x3dbl_2(branch3x3dbl)
+        branch3x3dbl = [
+            self.branch3x3dbl_3a(branch3x3dbl),
+            self.branch3x3dbl_3b(branch3x3dbl),
+        ]
+        branch3x3dbl = torch.cat(branch3x3dbl, 1)
+
+        # Patch: The FID Inception model uses max pooling instead of average
+        # pooling. This is likely an error in this specific Inception
+        # implementation, as other Inception models use average pooling here
+        # (which matches the description in the paper).
+        branch_pool = F.max_pool2d(x, kernel_size=3, stride=1, padding=1)
+        branch_pool = self.branch_pool(branch_pool)
+
+        outputs = [branch1x1, branch3x3, branch3x3dbl, branch_pool]
+        return torch.cat(outputs, 1)
--- a/build/lib/mmgen/models/architectures/lpips/__init__.py
+++ b/build/lib/mmgen/models/architectures/lpips/__init__.py
+# Copyright (c) OpenMMLab. All rights reserved.
+r"""
+    The lpips module was adapted from https://github.com/rosinality/stylegan2-pytorch/tree/master/lpips ,  # noqa
+    and you can see the origin implementation in https://github.com/richzhang/PerceptualSimilarity/tree/master/lpips  # noqa
+"""
+from .perceptual_loss import PerceptualLoss
+
+__all__ = ['PerceptualLoss']