add a to another part of mmgeneration code

b7536f78 · limm · 57e0e891 · b7536f78 · b7536f78 · b7536f78
Commit b7536f78 authored Jun 16, 2025 by limm
20 changed files
--- a/mmgen/datasets/pipelines/loading.py
+++ b/mmgen/datasets/pipelines/loading.py
+# Copyright (c) OpenMMLab. All rights reserved.
+import mmcv
+import numpy as np
+from mmcv.fileio import FileClient
+
+from ..builder import PIPELINES
+
+
+@PIPELINES.register_module()
+class LoadImageFromFile:
+    """Load image from file.
+
+    Args:
+        io_backend (str): io backend where images are store. Default: 'disk'.
+        key (str): Keys in results to find corresponding path. Default: 'gt'.
+        flag (str): Loading flag for images. Default: 'color'.
+        channel_order (str): Order of channel, candidates are 'bgr' and 'rgb'.
+            Default: 'bgr'.
+        backend (str | None): The image decoding backend type. Options are
+            `cv2`, `pillow`, `turbojpeg`, `None`. If backend is None, the
+            global imread_backend specified by ``mmcv.use_backend()`` will be
+            used. Default: None.
+        save_original_img (bool): If True, maintain a copy of the image in
+            ``results`` dict with name of ``f'ori_{key}'``. Default: False.
+        kwargs (dict): Args for file client.
+    """
+
+    def __init__(self,
+                 io_backend='disk',
+                 key='gt',
+                 flag='color',
+                 channel_order='bgr',
+                 backend=None,
+                 save_original_img=False,
+                 **kwargs):
+        self.io_backend = io_backend
+        self.key = key
+        self.flag = flag
+        self.save_original_img = save_original_img
+        self.channel_order = channel_order
+        self.backend = backend
+        self.kwargs = kwargs
+        self.file_client = None
+
+    def __call__(self, results):
+        """Call function.
+
+        Args:
+            results (dict): A dict containing the necessary information and
+                data for augmentation.
+
+        Returns:
+            dict: A dict containing the processed data and information.
+        """
+        if self.file_client is None:
+            self.file_client = FileClient(self.io_backend, **self.kwargs)
+        filepath = str(results[f'{self.key}_path'])
+        img_bytes = self.file_client.get(filepath)
+        img = mmcv.imfrombytes(
+            img_bytes,
+            flag=self.flag,
+            channel_order=self.channel_order,
+            backend=self.backend)  # HWC
+
+        results[self.key] = img
+        results[f'{self.key}_path'] = filepath
+        results[f'{self.key}_ori_shape'] = img.shape
+        if self.save_original_img:
+            results[f'ori_{self.key}'] = img.copy()
+
+        return results
+
+    def __repr__(self):
+        repr_str = self.__class__.__name__
+        repr_str += (
+            f'(io_backend={self.io_backend}, key={self.key}, '
+            f'flag={self.flag}, save_original_img={self.save_original_img})')
+        return repr_str
+
+
+@PIPELINES.register_module()
+class LoadPairedImageFromFile(LoadImageFromFile):
+    """Load a pair of images from file.
+
+    Each sample contains a pair of images, which are concatenated in the w
+    dimension (a|b). This is a special loading class for generation paired
+    dataset. It loads a pair of images as the common loader does and crops
+    it into two images with the same shape in different domains.
+
+    Required key is "pair_path". Added or modified keys are "pair",
+    "pair_ori_shape", "ori_pair", "img_{domain_a}", "img_{domain_b}",
+    "img_{domain_a}_path", "img_{domain_b}_path", "img_{domain_a}_ori_shape",
+    "img_{domain_b}_ori_shape", "ori_img_{domain_a}" and
+    "ori_img_{domain_b}".
+
+    Args:
+        io_backend (str): io backend where images are store. Default: 'disk'.
+        key (str): Keys in results to find corresponding path. Default: 'gt'.
+        domain_a (str, optional): One of the paired image domain.
+            Defaults to None.
+        domain_b (str, optional): The other image domain.
+            Defaults to None.
+        flag (str): Loading flag for images. Default: 'color'.
+        channel_order (str): Order of channel, candidates are 'bgr' and 'rgb'.
+            Default: 'bgr'.
+        save_original_img (bool): If True, maintain a copy of the image in
+            `results` dict with name of `f'ori_{key}'`. Default: False.
+        kwargs (dict): Args for file client.
+    """
+
+    def __init__(self,
+                 io_backend='disk',
+                 key='pair',
+                 domain_a=None,
+                 domain_b=None,
+                 flag='color',
+                 channel_order='bgr',
+                 backend=None,
+                 save_original_img=False,
+                 **kwargs):
+        super().__init__(
+            io_backend,
+            key=key,
+            flag=flag,
+            channel_order=channel_order,
+            backend=backend,
+            save_original_img=save_original_img,
+            **kwargs)
+        assert isinstance(domain_a, str)
+        assert isinstance(domain_b, str)
+        self.domain_a = domain_a
+        self.domain_b = domain_b
+
+    def __call__(self, results):
+        """Call function.
+
+        Args:
+            results (dict): A dict containing the necessary information and
+                data for augmentation.
+
+        Returns:
+            dict: A dict containing the processed data and information.
+        """
+        if self.file_client is None:
+            self.file_client = FileClient(self.io_backend, **self.kwargs)
+        filepath = str(results[f'{self.key}_path'])
+        img_bytes = self.file_client.get(filepath)
+        img = mmcv.imfrombytes(img_bytes, flag=self.flag)  # HWC, BGR
+        if img.ndim == 2:
+            img = np.expand_dims(img, axis=2)
+
+        results[self.key] = img
+        results[f'{self.key}_path'] = filepath
+        results[f'{self.key}_ori_shape'] = img.shape
+        if self.save_original_img:
+            results[f'ori_{self.key}'] = img.copy()
+
+        # crop pair into a and b
+        w = img.shape[1]
+        if w % 2 != 0:
+            raise ValueError(
+                f'The width of image pair must be even number, but got {w}.')
+        new_w = w // 2
+        img_a = img[:, :new_w, :]
+        img_b = img[:, new_w:, :]
+
+        results[f'img_{self.domain_a}'] = img_a
+        results[f'img_{self.domain_b}'] = img_b
+        results[f'img_{self.domain_a}_path'] = filepath
+        results[f'img_{self.domain_b}_path'] = filepath
+        results[f'img_{self.domain_a}_ori_shape'] = img_a.shape
+        results[f'img_{self.domain_b}_ori_shape'] = img_b.shape
+        if self.save_original_img:
+            results[f'ori_img_{self.domain_a}'] = img_a.copy()
+            results[f'ori_img_{self.domain_b}'] = img_b.copy()
+
+        return results
--- a/mmgen/datasets/pipelines/normalize.py
+++ b/mmgen/datasets/pipelines/normalize.py
+# Copyright (c) OpenMMLab. All rights reserved.
+import mmcv
+import numpy as np
+
+from ..builder import PIPELINES
+
+
+@PIPELINES.register_module()
+class Normalize:
+    """Normalize images with the given mean and std value.
+
+    Required keys are the keys in attribute "keys", added or modified keys are
+    the keys in attribute "keys" and these keys with postfix '_norm_cfg'.
+    It also supports normalizing a list of images.
+    Args:
+        keys (Sequence[str]): The images to be normalized.
+        mean (np.ndarray): Mean values of different channels.
+        std (np.ndarray): Std values of different channels.
+        to_rgb (bool): Whether to convert channels from BGR to RGB.
+    """
+
+    def __init__(self, keys, mean, std, to_rgb=False):
+        self.keys = keys
+        self.mean = np.array(mean, dtype=np.float32)
+        self.std = np.array(std, dtype=np.float32)
+        self.to_rgb = to_rgb
+
+    def __call__(self, results):
+        """Call function.
+
+        Args:
+            results (dict): A dict containing the necessary information and
+                data for augmentation.
+        Returns:
+            dict: A dict containing the processed data and information.
+        """
+        for key in self.keys:
+            if isinstance(results[key], list):
+                results[key] = [
+                    mmcv.imnormalize(v, self.mean, self.std, self.to_rgb)
+                    for v in results[key]
+                ]
+            else:
+                results[key] = mmcv.imnormalize(results[key], self.mean,
+                                                self.std, self.to_rgb)
+
+        results['img_norm_cfg'] = dict(
+            mean=self.mean, std=self.std, to_rgb=self.to_rgb)
+        return results
+
+    def __repr__(self):
+        repr_str = self.__class__.__name__
+        repr_str += (f'(keys={self.keys}, mean={self.mean}, std={self.std}, '
+                     f'to_rgb={self.to_rgb})')
+
+        return repr_str
+
+
+@PIPELINES.register_module()
+class RescaleToZeroOne:
+    """Transform the images into a range between 0 and 1.
+
+    Required keys are the keys in attribute "keys", added or modified keys are
+    the keys in attribute "keys".
+    It also supports rescaling a list of images.
+
+    Args:
+        keys (Sequence[str]): The images to be transformed.
+    """
+
+    def __init__(self, keys):
+        self.keys = keys
+
+    def __call__(self, results):
+        """Call function.
+
+        Args:
+            results (dict): A dict containing the necessary information and
+                data for augmentation.
+
+        Returns:
+            dict: A dict containing the processed data and information.
+        """
+        for key in self.keys:
+            if isinstance(results[key], list):
+                results[key] = [
+                    v.astype(np.float32) / 255. for v in results[key]
+                ]
+            else:
+                results[key] = results[key].astype(np.float32) / 255.
+        return results
+
+    def __repr__(self):
+        return self.__class__.__name__ + f'(keys={self.keys})'
--- a/mmgen/datasets/quick_test_dataset.py
+++ b/mmgen/datasets/quick_test_dataset.py
+# Copyright (c) OpenMMLab. All rights reserved.
+import torch
+from torch.utils.data import Dataset
+
+from .builder import DATASETS
+
+
+@DATASETS.register_module()
+class QuickTestImageDataset(Dataset):
+    """Dataset for quickly testing the correctness.
+
+    Args:
+        size (tuple[int]): The size of the images. Defaults to `None`.
+    """
+
+    def __init__(self, *args, size=None, **kwargs):
+        super().__init__()
+        self.size = size
+        self.img_tensor = torch.randn(3, self.size[0], self.size[1])
+
+    def __len__(self):
+        return 10000
+
+    def __getitem__(self, idx):
+        return dict(real_img=self.img_tensor)
--- a/mmgen/datasets/samplers/__init__.py
+++ b/mmgen/datasets/samplers/__init__.py
+# Copyright (c) OpenMMLab. All rights reserved.
+from .distributed_sampler import DistributedSampler
+
+__all__ = ['DistributedSampler']
--- a/mmgen/datasets/samplers/distributed_sampler.py
+++ b/mmgen/datasets/samplers/distributed_sampler.py
+# Copyright (c) OpenMMLab. All rights reserved.
+from __future__ import division
+
+import numpy as np
+import torch
+from torch.utils.data import DistributedSampler as _DistributedSampler
+
+from mmgen.utils import sync_random_seed
+
+
+class DistributedSampler(_DistributedSampler):
+    """DistributedSampler inheriting from
+    `torch.utils.data.DistributedSampler`.
+
+    In pytorch of lower versions, there is no `shuffle` argument. This child
+    class will port one to DistributedSampler.
+    """
+
+    def __init__(self,
+                 dataset,
+                 num_replicas=None,
+                 rank=None,
+                 shuffle=True,
+                 samples_per_gpu=1,
+                 seed=None):
+        super().__init__(dataset, num_replicas=num_replicas, rank=rank)
+
+        self.shuffle = shuffle
+        self.samples_per_gpu = samples_per_gpu
+        # fix the bug of the official implementation
+        self.num_samples_per_replica = int(
+            int(
+                np.ceil(
+                    len(self.dataset) * 1.0 / self.num_replicas /
+                    samples_per_gpu)))
+        self.num_samples = self.num_samples_per_replica * self.samples_per_gpu
+        self.total_size = self.num_samples * self.num_replicas
+
+        # to avoid padding bug when meeting too small dataset
+        if len(dataset) < self.num_replicas * samples_per_gpu:
+            raise ValueError(
+                'You may use too small dataset and our distributed '
+                'sampler cannot pad your dataset correctly. We highly '
+                'recommend you to use fewer GPUs to finish your work')
+        # In distributed sampling, different ranks should sample
+        # non-overlapped data in the dataset. Therefore, this function
+        # is used to make sure that each rank shuffles the data indices
+        # in the same order based on the same seed. Then different ranks
+        # could use different indices to select non-overlapped data from the
+        # same data list.
+        self.seed = sync_random_seed(seed)
+
+    def update_sampler(self, dataset, samples_per_gpu=None):
+        self.dataset = dataset
+        if samples_per_gpu is not None:
+            self.samples_per_gpu = samples_per_gpu
+        # fix the bug of the official implementation
+        self.num_samples_per_replica = int(
+            int(
+                np.ceil(
+                    len(self.dataset) * 1.0 / self.num_replicas /
+                    self.samples_per_gpu)))
+        self.num_samples = self.num_samples_per_replica * self.samples_per_gpu
+        self.total_size = self.num_samples * self.num_replicas
+
+        # to avoid padding bug when meeting too small dataset
+        if len(dataset) < self.num_replicas * self.samples_per_gpu:
+            raise ValueError(
+                'You may use too small dataset and our distributed '
+                'sampler cannot pad your dataset correctly. We highly '
+                'recommend you to use fewer GPUs to finish your work')
+
+    def __iter__(self):
+        # deterministically shuffle based on epoch
+        if self.shuffle:
+            g = torch.Generator()
+            # When :attr:`shuffle=True`, this ensures all replicas
+            # use a different random ordering for each epoch.
+            # Otherwise, the next iteration of this sampler will
+            # yield the same ordering.
+            g.manual_seed(self.seed + self.epoch)
+            indices = torch.randperm(len(self.dataset), generator=g).tolist()
+        else:
+            indices = torch.arange(len(self.dataset)).tolist()
+
+        # add extra samples to make it evenly divisible
+        indices += indices[:(self.total_size - len(indices))]
+        assert len(indices) == self.total_size
+
+        # subsample
+        indices = indices[self.rank:self.total_size:self.num_replicas]
+        assert len(indices) == self.num_samples
+
+        return iter(indices)
--- a/mmgen/datasets/singan_dataset.py
+++ b/mmgen/datasets/singan_dataset.py
+# Copyright (c) OpenMMLab. All rights reserved.
+import mmcv
+import numpy as np
+import torch
+from torch.utils.data import Dataset
+
+from .builder import DATASETS
+
+
+def create_real_pyramid(real, min_size, max_size, scale_factor_init):
+    """Create image pyramid.
+
+    This function is modified from the official implementation:
+    https://github.com/tamarott/SinGAN/blob/master/SinGAN/functions.py#L221
+
+    In this implementation, we adopt the rescaling function from MMCV.
+    Args:
+        real (np.array): The real image array.
+        min_size (int): The minimum size for the image pyramid.
+        max_size (int): The maximum size for the image pyramid.
+        scale_factor_init (float): The initial scale factor.
+    """
+
+    num_scales = int(
+        np.ceil(
+            np.log(np.power(min_size / min(real.shape[0], real.shape[1]), 1)) /
+            np.log(scale_factor_init))) + 1
+
+    scale2stop = int(
+        np.ceil(
+            np.log(
+                min([max_size, max([real.shape[0], real.shape[1]])]) /
+                max([real.shape[0], real.shape[1]])) /
+            np.log(scale_factor_init)))
+
+    stop_scale = num_scales - scale2stop
+
+    scale1 = min(max_size / max([real.shape[0], real.shape[1]]), 1)
+    real_max = mmcv.imrescale(real, scale1)
+    scale_factor = np.power(
+        min_size / (min(real_max.shape[0], real_max.shape[1])),
+        1 / (stop_scale))
+
+    scale2stop = int(
+        np.ceil(
+            np.log(
+                min([max_size, max([real.shape[0], real.shape[1]])]) /
+                max([real.shape[0], real.shape[1]])) /
+            np.log(scale_factor_init)))
+    stop_scale = num_scales - scale2stop
+
+    reals = []
+    for i in range(stop_scale + 1):
+        scale = np.power(scale_factor, stop_scale - i)
+        curr_real = mmcv.imrescale(real, scale)
+        reals.append(curr_real)
+
+    return reals, scale_factor, stop_scale
+
+
+@DATASETS.register_module()
+class SinGANDataset(Dataset):
+    """SinGAN Dataset.
+
+    In this dataset, we create an image pyramid and save it in the cache.
+
+    Args:
+        img_path (str): Path to the single image file.
+        min_size (int): Min size of the image pyramid. Here, the number will be
+            set to the ``min(H, W)``.
+        max_size (int): Max size of the image pyramid. Here, the number will be
+            set to the ``max(H, W)``.
+        scale_factor_init (float): Rescale factor. Note that the actual factor
+            we use may be a little bit different from this value.
+        num_samples (int, optional): The number of samples (length) in this
+            dataset. Defaults to -1.
+    """
+
+    def __init__(self,
+                 img_path,
+                 min_size,
+                 max_size,
+                 scale_factor_init,
+                 num_samples=-1):
+        self.img_path = img_path
+        assert mmcv.is_filepath(self.img_path)
+        self.load_annotations(min_size, max_size, scale_factor_init)
+        self.num_samples = num_samples
+
+    def load_annotations(self, min_size, max_size, scale_factor_init):
+        """Load annatations for SinGAN Dataset.
+
+        Args:
+            min_size (int): The minimum size for the image pyramid.
+            max_size (int): The maximum size for the image pyramid.
+            scale_factor_init (float): The initial scale factor.
+        """
+        real = mmcv.imread(self.img_path)
+        self.reals, self.scale_factor, self.stop_scale = create_real_pyramid(
+            real, min_size, max_size, scale_factor_init)
+
+        self.data_dict = {}
+
+        for i, real in enumerate(self.reals):
+            self.data_dict[f'real_scale{i}'] = self._img2tensor(real)
+
+        self.data_dict['input_sample'] = torch.zeros_like(
+            self.data_dict['real_scale0'])
+
+    def _img2tensor(self, img):
+        img = torch.from_numpy(img).to(torch.float32).permute(2, 0,
+                                                              1).contiguous()
+        img = (img / 255 - 0.5) * 2
+
+        return img
+
+    def __getitem__(self, index):
+        return self.data_dict
+
+    def __len__(self):
+        return int(1e6) if self.num_samples < 0 else self.num_samples
--- a/mmgen/datasets/unconditional_image_dataset.py
+++ b/mmgen/datasets/unconditional_image_dataset.py
+# Copyright (c) OpenMMLab. All rights reserved.
+import os.path as osp
+
+import mmcv
+from torch.utils.data import Dataset
+
+from .builder import DATASETS
+from .pipelines import Compose
+
+
+@DATASETS.register_module()
+class UnconditionalImageDataset(Dataset):
+    """Unconditional Image Dataset.
+
+    This dataset contains raw images for training unconditional GANs. Given
+    a root dir, we will recursively find all images in this root. The
+    transformation on data is defined by the pipeline.
+
+    Args:
+        imgs_root (str): Root path for unconditional images.
+        pipeline (list[dict | callable]): A sequence of data transforms.
+        test_mode (bool, optional): If True, the dataset will work in test
+            mode. Otherwise, in train mode. Default to False.
+    """
+
+    _VALID_IMG_SUFFIX = ('.jpg', '.png', '.jpeg', '.JPEG')
+
+    def __init__(self, imgs_root, pipeline, test_mode=False):
+        super().__init__()
+        self.imgs_root = imgs_root
+        self.pipeline = Compose(pipeline)
+        self.test_mode = test_mode
+        self.load_annotations()
+
+        # print basic dataset information to check the validity
+        mmcv.print_log(repr(self), 'mmgen')
+
+    def load_annotations(self):
+        """Load annotations."""
+        # recursively find all of the valid images from imgs_root
+        imgs_list = mmcv.scandir(
+            self.imgs_root, self._VALID_IMG_SUFFIX, recursive=True)
+        self.imgs_list = [osp.join(self.imgs_root, x) for x in imgs_list]
+
+    def prepare_train_data(self, idx):
+        """Prepare training data.
+
+        Args:
+            idx (int): Index of current batch.
+
+        Returns:
+            dict: Prepared training data batch.
+        """
+        results = dict(real_img_path=self.imgs_list[idx])
+        return self.pipeline(results)
+
+    def prepare_test_data(self, idx):
+        """Prepare testing data.
+
+        Args:
+            idx (int): Index of current batch.
+
+        Returns:
+            dict: Prepared training data batch.
+        """
+        results = dict(real_img_path=self.imgs_list[idx])
+        return self.pipeline(results)
+
+    def __len__(self):
+        return len(self.imgs_list)
+
+    def __getitem__(self, idx):
+        if not self.test_mode:
+            return self.prepare_train_data(idx)
+
+        return self.prepare_test_data(idx)
+
+    def __repr__(self):
+        dataset_name = self.__class__
+        imgs_root = self.imgs_root
+        num_imgs = len(self)
+        return (f'dataset_name: {dataset_name}, total {num_imgs} images in '
+                f'imgs_root: {imgs_root}')
--- a/mmgen/datasets/unpaired_image_dataset.py
+++ b/mmgen/datasets/unpaired_image_dataset.py
+# Copyright (c) OpenMMLab. All rights reserved.
+import os.path as osp
+from pathlib import Path
+
+import numpy as np
+from mmcv import scandir
+from torch.utils.data import Dataset
+
+from .builder import DATASETS
+from .pipelines import Compose
+
+IMG_EXTENSIONS = ('.jpg', '.JPG', '.jpeg', '.JPEG', '.png', '.PNG', '.ppm',
+                  '.PPM', '.bmp', '.BMP', '.tif', '.TIF', '.tiff', '.TIFF')
+
+
+@DATASETS.register_module()
+class UnpairedImageDataset(Dataset):
+    """General unpaired image folder dataset for image generation.
+
+    It assumes that the training directory of images from domain A is
+    '/path/to/data/trainA', and that from domain B is '/path/to/data/trainB',
+    respectively. '/path/to/data' can be initialized by args 'dataroot'.
+    During test time, the directory is '/path/to/data/testA' and
+    '/path/to/data/testB', respectively.
+
+    Args:
+        dataroot (str | :obj:`Path`): Path to the folder root of unpaired
+            images.
+        pipeline (List[dict | callable]): A sequence of data transformations.
+        test_mode (bool): Store `True` when building test dataset.
+            Default: `False`.
+        domain_a (str, optional): Domain of images in trainA / testA.
+            Defaults to None.
+        domain_b (str, optional): Domain of images in trainB / testB.
+            Defaults to None.
+    """
+
+    def __init__(self,
+                 dataroot,
+                 pipeline,
+                 test_mode=False,
+                 domain_a=None,
+                 domain_b=None):
+        super().__init__()
+        phase = 'test' if test_mode else 'train'
+        self.dataroot_a = osp.join(str(dataroot), phase + 'A')
+        self.dataroot_b = osp.join(str(dataroot), phase + 'B')
+        self.data_infos_a = self.load_annotations(self.dataroot_a)
+        self.data_infos_b = self.load_annotations(self.dataroot_b)
+        self.len_a = len(self.data_infos_a)
+        self.len_b = len(self.data_infos_b)
+        self.test_mode = test_mode
+        self.pipeline = Compose(pipeline)
+        assert isinstance(domain_a, str)
+        assert isinstance(domain_b, str)
+        self.domain_a = domain_a
+        self.domain_b = domain_b
+
+    def load_annotations(self, dataroot):
+        """Load unpaired image paths of one domain.
+
+        Args:
+            dataroot (str): Path to the folder root for unpaired images of
+                one domain.
+
+        Returns:
+            list[dict]: List that contains unpaired image paths of one domain.
+        """
+        data_infos = []
+        paths = sorted(self.scan_folder(dataroot))
+        for path in paths:
+            data_infos.append(dict(path=path))
+        return data_infos
+
+    def prepare_train_data(self, idx):
+        """Prepare unpaired training data.
+
+        Args:
+            idx (int): Index of current batch.
+
+        Returns:
+            dict: Prepared training data batch.
+        """
+        img_a_path = self.data_infos_a[idx % self.len_a]['path']
+        idx_b = np.random.randint(0, self.len_b)
+        img_b_path = self.data_infos_b[idx_b]['path']
+        results = dict()
+        results[f'img_{self.domain_a}_path'] = img_a_path
+        results[f'img_{self.domain_b}_path'] = img_b_path
+        return self.pipeline(results)
+
+    def prepare_test_data(self, idx):
+        """Prepare unpaired test data.
+
+        Args:
+            idx (int): Index of current batch.
+
+        Returns:
+            list[dict]: Prepared test data batch.
+        """
+        img_a_path = self.data_infos_a[idx % self.len_a]['path']
+        img_b_path = self.data_infos_b[idx % self.len_b]['path']
+        results = dict()
+        results[f'img_{self.domain_a}_path'] = img_a_path
+        results[f'img_{self.domain_b}_path'] = img_b_path
+        return self.pipeline(results)
+
+    def __len__(self):
+        return max(self.len_a, self.len_b)
+
+    @staticmethod
+    def scan_folder(path):
+        """Obtain image path list (including sub-folders) from a given folder.
+
+        Args:
+            path (str | :obj:`Path`): Folder path.
+
+        Returns:
+            list[str]: Image list obtained from the given folder.
+        """
+
+        if isinstance(path, (str, Path)):
+            path = str(path)
+        else:
+            raise TypeError("'path' must be a str or a Path object, "
+                            f'but received {type(path)}.')
+
+        images = scandir(path, suffix=IMG_EXTENSIONS, recursive=True)
+        images = [osp.join(path, v) for v in images]
+        assert images, f'{path} has no valid image file.'
+        return images
+
+    def __getitem__(self, idx):
+        """Get item at each call.
+
+        Args:
+            idx (int): Index for getting each item.
+        """
+        if not self.test_mode:
+            return self.prepare_train_data(idx)
+
+        return self.prepare_test_data(idx)
--- a/mmgen/models/__init__.py
+++ b/mmgen/models/__init__.py
+# Copyright (c) OpenMMLab. All rights reserved.
+from .architectures import *  # noqa: F401, F403
+from .builder import MODELS, MODULES, build_model, build_module
+from .common import *  # noqa: F401, F403
+from .diffusions import *  # noqa: F401, F403
+from .gans import *  # noqa: F401, F403
+from .losses import *  # noqa: F401, F403
+from .misc import *  # noqa: F401, F403
+from .translation_models import *  # noqa: F401, F403
+
+__all__ = ['build_model', 'MODELS', 'build_module', 'MODULES']
--- a/mmgen/models/architectures/__init__.py
+++ b/mmgen/models/architectures/__init__.py
+# Copyright (c) OpenMMLab. All rights reserved.
+from .arcface import IDLossModel
+from .biggan import (BigGANDeepDiscriminator, BigGANDeepGenerator,
+                     BigGANDiscriminator, BigGANGenerator, SNConvModule)
+from .cyclegan import ResnetGenerator
+from .dcgan import DCGANDiscriminator, DCGANGenerator
+from .ddpm import DenoisingUnet
+from .fid_inception import InceptionV3
+from .lpips import PerceptualLoss
+from .lsgan import LSGANDiscriminator, LSGANGenerator
+from .pggan import (EqualizedLR, EqualizedLRConvDownModule,
+                    EqualizedLRConvModule, EqualizedLRConvUpModule,
+                    EqualizedLRLinearModule, MiniBatchStddevLayer,
+                    PGGANDiscriminator, PGGANGenerator, PGGANNoiseTo2DFeat,
+                    PixelNorm, equalized_lr)
+from .pix2pix import PatchDiscriminator, generation_init_weights
+from .positional_encoding import CatersianGrid, SinusoidalPositionalEmbedding
+from .singan import SinGANMultiScaleDiscriminator, SinGANMultiScaleGenerator
+from .sngan_proj import ProjDiscriminator, SNGANGenerator
+from .stylegan import (MSStyleGAN2Discriminator, MSStyleGANv2Generator,
+                       StyleGAN1Discriminator, StyleGAN2Discriminator,
+                       StyleGANv1Generator, StyleGANv2Generator,
+                       StyleGANv3Generator)
+from .wgan_gp import WGANGPDiscriminator, WGANGPGenerator
+
+__all__ = [
+    'DCGANGenerator', 'DCGANDiscriminator', 'EqualizedLR',
+    'EqualizedLRConvModule', 'equalized_lr', 'EqualizedLRLinearModule',
+    'EqualizedLRConvUpModule', 'EqualizedLRConvDownModule', 'PixelNorm',
+    'MiniBatchStddevLayer', 'PGGANNoiseTo2DFeat', 'PGGANGenerator',
+    'PGGANDiscriminator', 'InceptionV3', 'SinGANMultiScaleDiscriminator',
+    'SinGANMultiScaleGenerator', 'CatersianGrid',
+    'SinusoidalPositionalEmbedding', 'StyleGAN2Discriminator',
+    'StyleGANv2Generator', 'StyleGANv1Generator', 'StyleGAN1Discriminator',
+    'MSStyleGAN2Discriminator', 'MSStyleGANv2Generator',
+    'generation_init_weights', 'PatchDiscriminator', 'ResnetGenerator',
+    'PerceptualLoss', 'WGANGPDiscriminator', 'WGANGPGenerator',
+    'LSGANDiscriminator', 'LSGANGenerator', 'ProjDiscriminator',
+    'SNGANGenerator', 'BigGANGenerator', 'SNConvModule', 'BigGANDiscriminator',
+    'BigGANDeepGenerator', 'BigGANDeepDiscriminator', 'DenoisingUnet',
+    'StyleGANv3Generator', 'IDLossModel'
+]
--- a/mmgen/models/architectures/arcface/__init__.py
+++ b/mmgen/models/architectures/arcface/__init__.py
+# Copyright (c) OpenMMLab. All rights reserved.
+from .id_loss import IDLossModel
+
+__all__ = ['IDLossModel']
--- a/mmgen/models/architectures/arcface/helpers.py
+++ b/mmgen/models/architectures/arcface/helpers.py
+# Copyright (c) OpenMMLab. All rights reserved.
+from collections import namedtuple
+
+import torch
+from torch.nn import (AdaptiveAvgPool2d, BatchNorm2d, Conv2d, MaxPool2d,
+                      Module, PReLU, ReLU, Sequential, Sigmoid)
+
+# yapf: disable
+"""
+ArcFace implementation from [TreB1eN](https://github.com/TreB1eN/InsightFace_Pytorch) # isort:skip  # noqa
+"""
+# yapf: enable
+
+
+class Flatten(Module):
+    """Flatten Module."""
+
+    def forward(self, input):
+        return input.view(input.size(0), -1)
+
+
+def l2_norm(input, axis=1):
+    """l2 normalization.
+
+    Args:
+        input (torch.Tensor): The input tensor.
+        axis (int, optional): Specifies which axis of input to calculate the
+            norm across. Defaults to 1.
+
+    Returns:
+        Tensor: Tensor after L2 normalization per-instance.
+    """
+    norm = torch.norm(input, 2, axis, True)
+    output = torch.div(input, norm)
+    return output
+
+
+class Bottleneck(namedtuple('Block', ['in_channel', 'depth', 'stride'])):
+    """A named tuple describing a ResNet block."""
+
+
+def get_block(in_channel, depth, num_units, stride=2):
+    """Get a single block config.
+
+    Args:
+        in_channel (int): Input channels.
+        depth (int): Output channels.
+        num_units (int): Number of unit modules.
+        stride (int, optional): Conv2d stride. Defaults to 2.
+
+    Returns:
+        list: A list of unit modules' config.
+    """
+    return [Bottleneck(in_channel, depth, stride)
+            ] + [Bottleneck(depth, depth, 1) for i in range(num_units - 1)]
+
+
+def get_blocks(num_layers):
+    """Get block configs of backbone.
+
+    Args:
+        num_layers (int): Number of ConvBlock layers in backbone.
+
+    Raises:
+        ValueError: `num_layers` must be one of [50, 100, 152].
+
+    Returns:
+        list: A list of block configs.
+    """
+    if num_layers == 50:
+        blocks = [
+            get_block(in_channel=64, depth=64, num_units=3),
+            get_block(in_channel=64, depth=128, num_units=4),
+            get_block(in_channel=128, depth=256, num_units=14),
+            get_block(in_channel=256, depth=512, num_units=3)
+        ]
+    elif num_layers == 100:
+        blocks = [
+            get_block(in_channel=64, depth=64, num_units=3),
+            get_block(in_channel=64, depth=128, num_units=13),
+            get_block(in_channel=128, depth=256, num_units=30),
+            get_block(in_channel=256, depth=512, num_units=3)
+        ]
+    elif num_layers == 152:
+        blocks = [
+            get_block(in_channel=64, depth=64, num_units=3),
+            get_block(in_channel=64, depth=128, num_units=8),
+            get_block(in_channel=128, depth=256, num_units=36),
+            get_block(in_channel=256, depth=512, num_units=3)
+        ]
+    else:
+        raise ValueError(
+            'Invalid number of layers: {}. Must be one of [50, 100, 152]'.
+            format(num_layers))
+    return blocks
+
+
+class SEModule(Module):
+    """Squeeze-and-Excitation Modules.
+
+    Args:
+        channels (int): Input channels.
+        reduction (int): Intermediate channels reduction ratio.
+    """
+
+    def __init__(self, channels, reduction):
+        super(SEModule, self).__init__()
+        self.avg_pool = AdaptiveAvgPool2d(1)
+        self.fc1 = Conv2d(
+            channels,
+            channels // reduction,
+            kernel_size=1,
+            padding=0,
+            bias=False)
+        self.relu = ReLU(inplace=True)
+        self.fc2 = Conv2d(
+            channels // reduction,
+            channels,
+            kernel_size=1,
+            padding=0,
+            bias=False)
+        self.sigmoid = Sigmoid()
+
+    def forward(self, x):
+        """Forward Function."""
+        module_input = x
+        x = self.avg_pool(x)
+        x = self.fc1(x)
+        x = self.relu(x)
+        x = self.fc2(x)
+        x = self.sigmoid(x)
+        return module_input * x
+
+
+class bottleneck_IR(Module):
+    """Intermediate Resblock of bottleneck.
+
+    Args:
+        in_channel (int): Input channels.
+        depth (int): Output channels.
+        stride (int): Conv2d stride.
+    """
+
+    def __init__(self, in_channel, depth, stride):
+        """Intermediate Resblock of bottleneck.
+
+        Args:
+            in_channel (int): Input channels.
+            depth (int): Output channels.
+            stride (int): Conv2d stride.
+        """
+        super(bottleneck_IR, self).__init__()
+        if in_channel == depth:
+            self.shortcut_layer = MaxPool2d(1, stride)
+        else:
+            self.shortcut_layer = Sequential(
+                Conv2d(in_channel, depth, (1, 1), stride, bias=False),
+                BatchNorm2d(depth))
+        self.res_layer = Sequential(
+            BatchNorm2d(in_channel),
+            Conv2d(in_channel, depth, (3, 3), (1, 1), 1, bias=False),
+            PReLU(depth), Conv2d(depth, depth, (3, 3), stride, 1, bias=False),
+            BatchNorm2d(depth))
+
+    def forward(self, x):
+        """Forward function."""
+        shortcut = self.shortcut_layer(x)
+        res = self.res_layer(x)
+        return res + shortcut
+
+
+class bottleneck_IR_SE(Module):
+    """Intermediate Resblock of bottleneck with SEModule.
+
+    Args:
+        in_channel (int): Input channels.
+        depth (int): Output channels.
+        stride (int): Conv2d stride.
+    """
+
+    def __init__(self, in_channel, depth, stride):
+        super(bottleneck_IR_SE, self).__init__()
+        if in_channel == depth:
+            self.shortcut_layer = MaxPool2d(1, stride)
+        else:
+            self.shortcut_layer = Sequential(
+                Conv2d(in_channel, depth, (1, 1), stride, bias=False),
+                BatchNorm2d(depth))
+        self.res_layer = Sequential(
+            BatchNorm2d(in_channel),
+            Conv2d(in_channel, depth, (3, 3), (1, 1), 1, bias=False),
+            PReLU(depth), Conv2d(depth, depth, (3, 3), stride, 1, bias=False),
+            BatchNorm2d(depth), SEModule(depth, 16))
+
+    def forward(self, x):
+        """Forward function."""
+        shortcut = self.shortcut_layer(x)
+        res = self.res_layer(x)
+        return res + shortcut
--- a/mmgen/models/architectures/arcface/id_loss.py
+++ b/mmgen/models/architectures/arcface/id_loss.py
+# Copyright (c) OpenMMLab. All rights reserved.
+import mmcv
+import torch
+from torch import nn
+
+from mmgen.models.builder import MODULES
+from .model_irse import Backbone
+
+
+@MODULES.register_module('ArcFace')
+class IDLossModel(nn.Module):
+    # ir se50 weight download link
+    _ir_se50_url = 'https://gg0ltg.by.files.1drv.com/y4m3fNNszG03z9n8JQ7EhdtQKW8tQVQMFBisPVRgoXi_UfP8pKSSqv8RJNmHy2JampcPmEazo_Mx6NTFSqBpZmhPniROm9uNoghnzaavvYpxkCfiNmDH9YyIF3g-0nwt6bsjk2X80JDdL5z88OAblSDmB-kuQkWSWvA9BM3Xt8DHMCY8lO4HOQCZ5YWUtFyPAVwEyzTGDM-JRA5EJoN2bF1cg'  # noqa
+
+    def __init__(self, ir_se50_weights=None, device='cuda'):
+        super(IDLossModel, self).__init__()
+        mmcv.print_log('Loading ResNet ArcFace', 'mmgen')
+        self.facenet = Backbone(
+            input_size=112, num_layers=50, drop_ratio=0.6, mode='ir_se')
+        if ir_se50_weights is None:
+            ir_se50_weights = self._ir_se50_url
+        self.facenet.load_state_dict(
+            torch.hub.load_state_dict_from_url(ir_se50_weights))
+        self.pool = torch.nn.AdaptiveAvgPool2d((256, 256))
+        self.face_pool = torch.nn.AdaptiveAvgPool2d((112, 112))
+        self.facenet = self.facenet.eval().to(device)
+
+    def extract_feats(self, x):
+        if x.shape[2] != 256:
+            x = self.pool(x)
+        x = x[:, :, 35:223, 32:220]  # Crop interesting region
+        x = self.face_pool(x)
+        x_feats = self.facenet(x)
+        return x_feats
+
+    def forward(self, pred=None, gt=None):
+        n_samples = gt.shape[0]
+        y_feats = self.extract_feats(
+            gt)  # Otherwise use the feature from there
+        y_hat_feats = self.extract_feats(pred)
+        y_feats = y_feats.detach()
+        loss = 0
+        sim_improvement = 0
+        count = 0
+        for i in range(n_samples):
+            diff_target = y_hat_feats[i].dot(y_feats[i])
+            loss += 1 - diff_target
+            count += 1
+
+        return loss / count, sim_improvement / count
--- a/mmgen/models/architectures/arcface/model_irse.py
+++ b/mmgen/models/architectures/arcface/model_irse.py
+# Copyright (c) OpenMMLab. All rights reserved.
+from torch.nn import (BatchNorm1d, BatchNorm2d, Conv2d, Dropout, Linear,
+                      Module, PReLU, Sequential)
+
+from .helpers import (Flatten, bottleneck_IR, bottleneck_IR_SE, get_blocks,
+                      l2_norm)
+
+# yapf: disable
+"""
+Modified Backbone implementation from [TreB1eN](https://github.com/TreB1eN/InsightFace_Pytorch) # isort:skip  # noqa
+"""
+# yapf: enable
+
+
+class Backbone(Module):
+    ''' Arcface backbone.
+    There are many repos follow this codes for facial recognition, and we also
+    follow this routine.
+    Ref: https://github.com/orpatashnik/StyleCLIP/blob/main/models/facial_recognition/helpers.py # noqa
+
+    Args:
+        input_size (int): Input size of image.
+        num_layers (int): Number of layer in backbone.
+        mode (str, optional): Bottle neck mode. If set to 'ir_se', then
+            SEModule will be applied. Defaults to 'ir'.
+        drop_ratio (float, optional): Drop out ratio. Defaults to 0.4.
+        affine (bool, optional): Whether use affine in BatchNorm1d.
+            Defaults to True.
+    '''
+
+    def __init__(self,
+                 input_size,
+                 num_layers,
+                 mode='ir',
+                 drop_ratio=0.4,
+                 affine=True):
+        super(Backbone, self).__init__()
+        assert input_size in [112, 224], 'input_size should be 112 or 224'
+        assert num_layers in [50, 100,
+                              152], 'num_layers should be 50, 100 or 152'
+        assert mode in ['ir', 'ir_se'], 'mode should be ir or ir_se'
+        blocks = get_blocks(num_layers)
+        if mode == 'ir':
+            unit_module = bottleneck_IR
+        elif mode == 'ir_se':
+            unit_module = bottleneck_IR_SE
+        self.input_layer = Sequential(
+            Conv2d(3, 64, (3, 3), 1, 1, bias=False), BatchNorm2d(64),
+            PReLU(64))
+        if input_size == 112:
+            self.output_layer = Sequential(
+                BatchNorm2d(512), Dropout(drop_ratio), Flatten(),
+                Linear(512 * 7 * 7, 512), BatchNorm1d(512, affine=affine))
+        else:
+            self.output_layer = Sequential(
+                BatchNorm2d(512), Dropout(drop_ratio), Flatten(),
+                Linear(512 * 14 * 14, 512), BatchNorm1d(512, affine=affine))
+
+        modules = []
+        for block in blocks:
+            for bottleneck in block:
+                modules.append(
+                    unit_module(bottleneck.in_channel, bottleneck.depth,
+                                bottleneck.stride))
+        self.body = Sequential(*modules)
+
+    def forward(self, x):
+        """Forward function."""
+        x = self.input_layer(x)
+        x = self.body(x)
+        x = self.output_layer(x)
+        return l2_norm(x)
+
+
+def IR_50(input_size):
+    """Constructs a ir-50 model."""
+    model = Backbone(
+        input_size, num_layers=50, mode='ir', drop_ratio=0.4, affine=False)
+    return model
+
+
+def IR_101(input_size):
+    """Constructs a ir-101 model."""
+    model = Backbone(
+        input_size, num_layers=100, mode='ir', drop_ratio=0.4, affine=False)
+    return model
+
+
+def IR_152(input_size):
+    """Constructs a ir-152 model."""
+    model = Backbone(
+        input_size, num_layers=152, mode='ir', drop_ratio=0.4, affine=False)
+    return model
+
+
+def IR_SE_50(input_size):
+    """Constructs a ir_se-50 model."""
+    model = Backbone(
+        input_size, num_layers=50, mode='ir_se', drop_ratio=0.4, affine=False)
+    return model
+
+
+def IR_SE_101(input_size):
+    """Constructs a ir_se-101 model."""
+    model = Backbone(
+        input_size, num_layers=100, mode='ir_se', drop_ratio=0.4, affine=False)
+    return model
+
+
+def IR_SE_152(input_size):
+    """Constructs a ir_se-152 model."""
+    model = Backbone(
+        input_size, num_layers=152, mode='ir_se', drop_ratio=0.4, affine=False)
+    return model
--- a/mmgen/models/architectures/biggan/__init__.py
+++ b/mmgen/models/architectures/biggan/__init__.py
+# Copyright (c) OpenMMLab. All rights reserved.
+from .generator_discriminator import BigGANDiscriminator, BigGANGenerator
+from .generator_discriminator_deep import (BigGANDeepDiscriminator,
+                                           BigGANDeepGenerator)
+from .modules import (BigGANConditionBN, BigGANDeepDiscResBlock,
+                      BigGANDeepGenResBlock, BigGANDiscResBlock,
+                      BigGANGenResBlock, SelfAttentionBlock, SNConvModule)
+
+__all__ = [
+    'BigGANGenerator', 'BigGANGenResBlock', 'BigGANConditionBN',
+    'BigGANDiscriminator', 'SelfAttentionBlock', 'BigGANDiscResBlock',
+    'BigGANDeepDiscriminator', 'BigGANDeepGenerator', 'BigGANDeepDiscResBlock',
+    'BigGANDeepGenResBlock', 'SNConvModule'
+]
--- a/mmgen/models/architectures/biggan/biggan_snmodule.py
+++ b/mmgen/models/architectures/biggan/biggan_snmodule.py
+# Copyright (c) OpenMMLab. All rights reserved.
+import torch
+import torch.nn as nn
+import torch.nn.functional as F
+
+# yapf:disable
+'''
+    Ref: Functions in this file are borrowed from https://github.com/ajbrock/BigGAN-PyTorch/blob/master/layers.py # noqa
+'''
+# yapf:enable
+
+
+def proj(x, y):
+    """Calculate Projection of x onto y.
+
+    Args:
+        x (torch.Tensor): Projection vector x.
+        y (torch.Tensor): Direction vector y.
+
+    Returns:
+        torch.Tensor: Projection of x onto y.
+    """
+    return torch.mm(y, x.t()) * y / torch.mm(y, y.t())
+
+
+def gram_schmidt(x, ys):
+    """Orthogonalize x w.r.t list of vectors ys.
+
+    Args:
+        x (torch.Tensor): Vector to be added into the
+            orthogonal vectors.
+        ys (list[torch.Tensor]): A set of orthogonal vectors.
+
+    Returns:
+        torch.Tensor: Result of Gram–Schmidt orthogonalization.
+    """
+    for y in ys:
+        x = x - proj(x, y)
+    return x
+
+
+@torch.no_grad()
+def power_iteration(weight, u_list, update=True, eps=1e-12):
+    """Power iteration method for calculating spectral norm.
+
+    Args:
+        weight (torch.Tensor): Module weight.
+        u_list (list[torch.Tensor]): list of left singular vector.
+            The length of list equals to the simulation times.
+        update (bool, optional): Whether update left singular
+            vector. Defaults to True.
+        eps (float, optional): Vector Normalization epsilon.
+            Defaults to 1e-12.
+
+    Returns:
+        tuple[list[tensor.Tensor]]: Tuple consist of three lists
+            which contain singular values, left singular
+            vector and right singular vector respectively.
+    """
+    us, vs, svs = [], [], []
+    for i, u in enumerate(u_list):
+        v = torch.matmul(u, weight)
+        v = F.normalize(gram_schmidt(v, vs), eps=eps)
+        vs += [v]
+        u = torch.matmul(v, weight.t())
+        u = F.normalize(gram_schmidt(u, us), eps=eps)
+        us += [u]
+        if update:
+            u_list[i][:] = u
+        svs += [
+            torch.squeeze(torch.matmul(torch.matmul(v, weight.t()), u.t()))
+        ]
+    return svs, us, vs
+
+
+class SpectralNorm(object):
+    """Spectral normalization base class.
+
+    Args:
+        num_svs (int): Number of singular values.
+        num_iters (int): Number of power iterations per step.
+        num_outputs (int): Number of output channels.
+        transpose (bool, optional): If set to `True`, weight
+            matrix will be transposed before power iteration.
+            Defaults to False.
+        eps (float, optional): Vector Normalization epsilon for
+            avoiding divide by zero. Defaults to 1e-12.
+    """
+
+    def __init__(self,
+                 num_svs,
+                 num_iters,
+                 num_outputs,
+                 transpose=False,
+                 eps=1e-12):
+        self.num_iters = num_iters
+        self.num_svs = num_svs
+        self.transpose = transpose
+        self.eps = eps
+        # Register a singular vector for each sv
+        for i in range(self.num_svs):
+            self.register_buffer('u%d' % i, torch.randn(1, num_outputs))
+            self.register_buffer('sv%d' % i, torch.ones(1))
+
+    @property
+    def u(self):
+        """Get left singular vectors."""
+        return [getattr(self, 'u%d' % i) for i in range(self.num_svs)]
+
+    @property
+    def sv(self):
+        """Get singular values."""
+        return [getattr(self, 'sv%d' % i) for i in range(self.num_svs)]
+
+    def sn_weight(self):
+        """Compute the spectrally-normalized weight."""
+        W_mat = self.weight.view(self.weight.size(0), -1)
+        if self.transpose:
+            W_mat = W_mat.t()
+        # Apply num_iters power iterations
+        for _ in range(self.num_iters):
+            svs, us, vs = power_iteration(
+                W_mat, self.u, update=self.training, eps=self.eps)
+        # Update the svs
+        if self.training:
+            with torch.no_grad():
+                for i, sv in enumerate(svs):
+                    self.sv[i][:] = sv
+        return self.weight / svs[-1]
+
+
+class SNConv2d(nn.Conv2d, SpectralNorm):
+    """2D Conv layer with spectral norm.
+
+    Args:
+        in_channels (int): Number of channels in the input feature map.
+        out_channels (int): Number of channels produced by the convolution.
+        kernel_size (int): Size of the convolving kernel.
+        stride (int, optional): Stride of the convolution.. Defaults to 1.
+        padding (int, optional): Zero-padding added to both sides of
+            the input. Defaults to 0.
+        dilation (int, optional): Spacing between kernel elements.
+            Defaults to 1.
+        groups (int, optional): Number of blocked connections from input
+            channels to output channels. Defaults to 1.
+        bias (bool, optional): Whether to use bias parameter.
+            Defaults to True.
+        num_svs (int): Number of singular values.
+        num_iters (int): Number of power iterations per step.
+        eps (float, optional): Vector Normalization epsilon for
+            avoiding divide by zero. Defaults to 1e-12.
+    """
+
+    def __init__(self,
+                 in_channels,
+                 out_channels,
+                 kernel_size,
+                 stride=1,
+                 padding=0,
+                 dilation=1,
+                 groups=1,
+                 bias=True,
+                 num_svs=1,
+                 num_iters=1,
+                 eps=1e-12):
+        nn.Conv2d.__init__(self, in_channels, out_channels, kernel_size,
+                           stride, padding, dilation, groups, bias)
+        SpectralNorm.__init__(self, num_svs, num_iters, out_channels, eps=eps)
+
+    def forward(self, x):
+        """Forward function."""
+        return F.conv2d(x, self.sn_weight(), self.bias, self.stride,
+                        self.padding, self.dilation, self.groups)
+
+
+class SNLinear(nn.Linear, SpectralNorm):
+    """Linear layer with spectral norm.
+
+    Args:
+        in_features (int): Number of channels in the input feature.
+        out_features (int): Number of channels in the out feature.
+        bias (bool, optional):  Whether to use bias parameter.
+            Defaults to True.
+        num_svs (int): Number of singular values.
+        num_iters (int): Number of power iterations per step.
+        eps (float, optional): Vector Normalization epsilon for
+            avoiding divide by zero. Defaults to 1e-12.
+    """
+
+    def __init__(self,
+                 in_features,
+                 out_features,
+                 bias=True,
+                 num_svs=1,
+                 num_iters=1,
+                 eps=1e-12):
+        nn.Linear.__init__(self, in_features, out_features, bias)
+        SpectralNorm.__init__(self, num_svs, num_iters, out_features, eps=eps)
+
+    def forward(self, x):
+        """Forward function."""
+        return F.linear(x, self.sn_weight(), self.bias)
+
+
+# We use num_embeddings as the dim instead of embedding_dim here
+# for convenience sake
+class SNEmbedding(nn.Embedding, SpectralNorm):
+    """Embedding layer with spectral norm.
+
+    Args:
+        num_embeddings (int): Size of the dictionary of embeddings.
+        embedding_dim (int): The size of each embedding vector.
+        padding_idx (int, optional):  If specified, the entries at
+            padding_idx do not contribute to the gradient; therefore,
+            the embedding vector at padding_idx is not updated during
+            training, i.e. it remains as a fixed “pad”. For a newly
+            constructed Embedding, the embedding vector at padding_idx
+            will default to all zeros, but can be updated to another value
+            to be used as the padding vector. Defaults to None.
+        max_norm (float, optional): If given, each embedding vector with
+            norm larger than max_norm is renormalized to have norm
+            max_norm. Defaults to None.
+        norm_type (int, optional):  The p of the p-norm to compute for
+            the max_norm option. Default 2.
+        scale_grad_by_freq (bool, optional): If given, this will scale
+            gradients by the inverse of frequency of the words in the
+            mini-batch. Default False.
+        sparse (bool, optional):  If True, gradient w.r.t. weight matrix
+            will be a sparse tensor. See Notes for more details regarding
+            sparse gradients. Defaults to False.
+        _weight (torch.Tensor, optional): Initial Weight. Defaults to None.
+        num_svs (int): Number of singular values.
+        num_iters (int): Number of power iterations per step.
+        eps (float, optional): Vector Normalization epsilon for
+            avoiding divide by zero. Defaults to 1e-12.
+    """
+
+    def __init__(self,
+                 num_embeddings,
+                 embedding_dim,
+                 padding_idx=None,
+                 max_norm=None,
+                 norm_type=2,
+                 scale_grad_by_freq=False,
+                 sparse=False,
+                 _weight=None,
+                 num_svs=1,
+                 num_iters=1,
+                 eps=1e-12):
+        nn.Embedding.__init__(self, num_embeddings, embedding_dim, padding_idx,
+                              max_norm, norm_type, scale_grad_by_freq, sparse,
+                              _weight)
+        SpectralNorm.__init__(
+            self, num_svs, num_iters, num_embeddings, eps=eps)
+
+    def forward(self, x):
+        """Forward function."""
+        return F.embedding(x, self.sn_weight())
--- a/mmgen/models/architectures/biggan/generator_discriminator.py
+++ b/mmgen/models/architectures/biggan/generator_discriminator.py
+# Copyright (c) OpenMMLab. All rights reserved.
+from copy import deepcopy
+
+import mmcv
+import torch
+import torch.nn as nn
+from mmcv.cnn import normal_init, xavier_init
+from mmcv.cnn.bricks import build_activation_layer
+from mmcv.runner import load_checkpoint
+from mmcv.runner.checkpoint import _load_checkpoint_with_prefix
+from torch.nn.utils import spectral_norm
+
+from mmgen.models.builder import MODULES, build_module
+from mmgen.utils import get_root_logger
+from ..common import get_module_device
+from .biggan_snmodule import SNEmbedding, SNLinear
+from .modules import SelfAttentionBlock, SNConvModule
+
+
+@MODULES.register_module()
+class BigGANGenerator(nn.Module):
+    """BigGAN Generator. The implementation refers to
+    https://github.com/ajbrock/BigGAN-PyTorch/blob/master/BigGAN.py # noqa.
+
+    In BigGAN, we use a SAGAN-based architecture composing of an self-attention
+    block and number of convolutional residual blocks with spectral
+    normalization.
+
+    More details can be found in: Large Scale GAN Training for High Fidelity
+    Natural Image Synthesis (ICLR2019).
+
+    The design of the model structure is highly corresponding to the output
+    resolution. For the original BigGAN's generator, you can set ``output_scale``
+    as you need and use the default value of ``arch_cfg`` and ``blocks_cfg``.
+    If you want to customize the model, you can set the arguments in this way:
+
+    ``arch_cfg``: Config for the architecture of this generator. You can refer
+    the ``_default_arch_cfgs`` in the ``_get_default_arch_cfg`` function to see
+    the format of the ``arch_cfg``. Basically, you need to provide information
+    of each block such as the numbers of input and output channels, whether to
+    perform upsampling, etc.
+
+    ``blocks_cfg``: Config for the convolution block. You can replace the block
+    type to your registered customized block and adjust block params here.
+    However, you should notice that some params are shared among these blocks
+    like ``act_cfg``, ``with_spectral_norm``, ``sn_eps``, etc.
+
+    Args:
+        output_scale (int): Output scale for the generated image.
+        noise_size (int, optional): Size of the input noise vector. Defaults
+            to 120.
+        num_classes (int, optional): The number of conditional classes. If set
+            to 0, this model will be degraded to an unconditional model.
+            Defaults to 0.
+        out_channels (int, optional): Number of channels in output images.
+            Defaults to 3.
+        base_channels (int, optional): The basic channel number of the
+            generator. The other layers contains channels based on this number.
+            Defaults to 96.
+        input_scale (int, optional): The scale of the input 2D feature map.
+            Defaults to 4.
+        with_shared_embedding (bool, optional): Whether to use shared
+            embedding. Defaults to True.
+        shared_dim (int, optional): The output channels of shared embedding.
+            Defaults to 128.
+        sn_eps (float, optional): Epsilon value for spectral normalization.
+            Defaults to 1e-6.
+        sn_style (str, optional): The style of spectral normalization.
+            If set to `ajbrock`, implementation by
+            ajbrock(https://github.com/ajbrock/BigGAN-PyTorch/blob/master/layers.py)
+            will be adopted.
+            If set to `torch`, implementation by `PyTorch` will be adopted.
+            Defaults to `ajbrock`.
+        init_type (str, optional): The name of an initialization method:
+            ortho | N02 | xavier. Defaults to 'ortho'.
+        split_noise (bool, optional): Whether to split input noise vector.
+            Defaults to True.
+        act_cfg (dict, optional): Config for the activation layer. Defaults to
+            dict(type='ReLU').
+        upsample_cfg (dict, optional): Config for the upsampling operation.
+            Defaults to dict(type='nearest', scale_factor=2).
+        with_spectral_norm (bool, optional): Whether to use spectral
+            normalization. Defaults to True.
+        auto_sync_bn (bool, optional): Whether to use synchronized batch
+            normalization. Defaults to True.
+        blocks_cfg (dict, optional): Config for the convolution block. Defaults
+            to dict(type='BigGANGenResBlock').
+        arch_cfg (dict, optional): Config for the architecture of this
+            generator. Defaults to None.
+        out_norm_cfg (dict, optional): Config for the norm of output layer.
+            Defaults to dict(type='BN').
+        pretrained (str | dict, optional): Path for the pretrained model or
+            dict containing information for pretained models whose necessary
+            key is 'ckpt_path'. Besides, you can also provide 'prefix' to load
+            the generator part from the whole state dict. Defaults to None.
+        rgb2bgr (bool, optional): Whether to reformat the output channels
+                with order `bgr`. We provide several pre-trained BigGAN
+                weights whose output channels order is `rgb`. You can set
+                this argument to True to use the weights.
+    """
+
+    def __init__(self,
+                 output_scale,
+                 noise_size=120,
+                 num_classes=0,
+                 out_channels=3,
+                 base_channels=96,
+                 input_scale=4,
+                 with_shared_embedding=True,
+                 shared_dim=128,
+                 sn_eps=1e-6,
+                 sn_style='ajbrock',
+                 init_type='ortho',
+                 split_noise=True,
+                 act_cfg=dict(type='ReLU'),
+                 upsample_cfg=dict(type='nearest', scale_factor=2),
+                 with_spectral_norm=True,
+                 auto_sync_bn=True,
+                 blocks_cfg=dict(type='BigGANGenResBlock'),
+                 arch_cfg=None,
+                 out_norm_cfg=dict(type='BN'),
+                 pretrained=None,
+                 rgb2bgr=False):
+        super().__init__()
+        self.noise_size = noise_size
+        self.num_classes = num_classes
+        self.shared_dim = shared_dim
+        self.with_shared_embedding = with_shared_embedding
+        self.output_scale = output_scale
+        self.arch = arch_cfg if arch_cfg else self._get_default_arch_cfg(
+            self.output_scale, base_channels)
+        self.input_scale = input_scale
+        self.split_noise = split_noise
+        self.blocks_cfg = deepcopy(blocks_cfg)
+        self.upsample_cfg = deepcopy(upsample_cfg)
+        self.rgb2bgr = rgb2bgr
+        self.sn_style = sn_style
+
+        # Validity Check
+        # If 'num_classes' equals to zero, we shall set 'with_shared_embedding'
+        # to False.
+        if num_classes == 0:
+            assert not self.with_shared_embedding
+        else:
+            if not self.with_shared_embedding:
+                # If not `with_shared_embedding`, we will use `nn.Embedding` to
+                # replace the original `Linear` layer in conditional BN.
+                # Meanwhile, we do not adopt split noises.
+                assert not self.split_noise
+
+        # If using split latents, we may need to adjust noise_size
+        if self.split_noise:
+            # Number of places z slots into
+            self.num_slots = len(self.arch['in_channels']) + 1
+            self.noise_chunk_size = self.noise_size // self.num_slots
+            # Recalculate latent dimensionality for even splitting into chunks
+            self.noise_size = self.noise_chunk_size * self.num_slots
+        else:
+            self.num_slots = 1
+            self.noise_chunk_size = 0
+
+        # First linear layer
+        self.noise2feat = nn.Linear(
+            self.noise_size // self.num_slots,
+            self.arch['in_channels'][0] * (self.input_scale**2))
+        if with_spectral_norm:
+            if sn_style == 'torch':
+                self.noise2feat = spectral_norm(self.noise2feat, eps=sn_eps)
+            elif sn_style == 'ajbrock':
+                self.noise2feat = SNLinear(
+                    self.noise_size // self.num_slots,
+                    self.arch['in_channels'][0] * (self.input_scale**2),
+                    eps=sn_eps)
+            else:
+                raise NotImplementedError(f'Your {sn_style} is not supported')
+
+        # If using 'shared_embedding', we will get an unified embedding of
+        # label for all blocks. If not, we just pass the label to each
+        # block.
+        if with_shared_embedding:
+            self.shared_embedding = nn.Embedding(num_classes, shared_dim)
+        else:
+            self.shared_embedding = nn.Identity()
+
+        if num_classes > 0:
+            self.dim_after_concat = (
+                self.shared_dim + self.noise_chunk_size
+                if self.with_shared_embedding else self.num_classes)
+        else:
+            self.dim_after_concat = self.noise_chunk_size
+
+        self.blocks_cfg.update(
+            dict(
+                dim_after_concat=self.dim_after_concat,
+                act_cfg=act_cfg,
+                sn_eps=sn_eps,
+                sn_style=sn_style,
+                input_is_label=(num_classes > 0)
+                and (not with_shared_embedding),
+                with_spectral_norm=with_spectral_norm,
+                auto_sync_bn=auto_sync_bn))
+
+        self.conv_blocks = nn.ModuleList()
+        for index, out_ch in enumerate(self.arch['out_channels']):
+            # change args to adapt to current block
+            self.blocks_cfg.update(
+                dict(
+                    in_channels=self.arch['in_channels'][index],
+                    out_channels=out_ch,
+                    upsample_cfg=self.upsample_cfg
+                    if self.arch['upsample'][index] else None))
+            self.conv_blocks.append(build_module(self.blocks_cfg))
+            if self.arch['attention'][index]:
+                self.conv_blocks.append(
+                    SelfAttentionBlock(
+                        out_ch,
+                        with_spectral_norm=with_spectral_norm,
+                        sn_eps=sn_eps,
+                        sn_style=sn_style))
+
+        self.output_layer = SNConvModule(
+            self.arch['out_channels'][-1],
+            out_channels,
+            kernel_size=3,
+            padding=1,
+            with_spectral_norm=with_spectral_norm,
+            spectral_norm_cfg=dict(eps=sn_eps, sn_style=sn_style),
+            act_cfg=act_cfg,
+            norm_cfg=out_norm_cfg,
+            bias=True,
+            order=('norm', 'act', 'conv'))
+
+        self.init_weights(pretrained=pretrained, init_type=init_type)
+
+    def _get_default_arch_cfg(self, output_scale, base_channels):
+        assert output_scale in [32, 64, 128, 256, 512]
+        _default_arch_cfgs = {
+            '32': {
+                'in_channels': [base_channels * item for item in [4, 4, 4]],
+                'out_channels': [base_channels * item for item in [4, 4, 4]],
+                'upsample': [True] * 3,
+                'resolution': [8, 16, 32],
+                'attention': [False, False, False]
+            },
+            '64': {
+                'in_channels':
+                [base_channels * item for item in [16, 16, 8, 4]],
+                'out_channels':
+                [base_channels * item for item in [16, 8, 4, 2]],
+                'upsample': [True] * 4,
+                'resolution': [8, 16, 32, 64],
+                'attention': [False, False, False, True]
+            },
+            '128': {
+                'in_channels':
+                [base_channels * item for item in [16, 16, 8, 4, 2]],
+                'out_channels':
+                [base_channels * item for item in [16, 8, 4, 2, 1]],
+                'upsample': [True] * 5,
+                'resolution': [8, 16, 32, 64, 128],
+                'attention': [False, False, False, True, False]
+            },
+            '256': {
+                'in_channels':
+                [base_channels * item for item in [16, 16, 8, 8, 4, 2]],
+                'out_channels':
+                [base_channels * item for item in [16, 8, 8, 4, 2, 1]],
+                'upsample': [True] * 6,
+                'resolution': [8, 16, 32, 64, 128, 256],
+                'attention': [False, False, False, True, False, False]
+            },
+            '512': {
+                'in_channels':
+                [base_channels * item for item in [16, 16, 8, 8, 4, 2, 1]],
+                'out_channels':
+                [base_channels * item for item in [16, 8, 8, 4, 2, 1, 1]],
+                'upsample': [True] * 7,
+                'resolution': [8, 16, 32, 64, 128, 256, 512],
+                'attention': [False, False, False, True, False, False, False]
+            }
+        }
+
+        return _default_arch_cfgs[str(output_scale)]
+
+    def forward(self,
+                noise,
+                label=None,
+                num_batches=0,
+                return_noise=False,
+                truncation=-1.0,
+                use_outside_embedding=False):
+        """Forward function.
+
+        Args:
+            noise (torch.Tensor | callable | None): You can directly give a
+                batch of noise through a ``torch.Tensor`` or offer a callable
+                function to sample a batch of noise data. Otherwise, the
+                ``None`` indicates to use the default noise sampler.
+            label (torch.Tensor | callable | None): You can directly give a
+                batch of label through a ``torch.Tensor`` or offer a callable
+                function to sample a batch of label data. Otherwise, the
+                ``None`` indicates to use the default label sampler.
+                Defaults to None.
+            num_batches (int, optional): The number of batch size.
+                Defaults to 0.
+            return_noise (bool, optional): If True, ``noise_batch`` and
+                ``label`` will be returned in a dict with ``fake_img``.
+                Defaults to False.
+            truncation (float, optional): Truncation factor. Give value not
+                less than 0., the truncation trick will be adopted.
+                Otherwise, the truncation trick will not be adopted.
+                Defaults to -1..
+            use_outside_embedding (bool, optional): Whether to use outside
+                embedding or use `shared_embedding`. Set to `True` if
+                embedding has already be performed outside this function.
+                Default to False.
+
+        Returns:
+            torch.Tensor | dict: If not ``return_noise``, only the output image
+                will be returned. Otherwise, a dict contains ``fake_img``,
+                ``noise_batch`` and ``label`` will be returned.
+        """
+        if isinstance(noise, torch.Tensor):
+            assert noise.shape[1] == self.noise_size
+            assert noise.ndim == 2, ('The noise should be in shape of (n, c), '
+                                     f'but got {noise.shape}')
+            noise_batch = noise
+        # receive a noise generator and sample noise.
+        elif callable(noise):
+            noise_generator = noise
+            assert num_batches > 0
+            noise_batch = noise_generator((num_batches, self.noise_size))
+        # otherwise, we will adopt default noise sampler.
+        else:
+            assert num_batches > 0
+            noise_batch = torch.randn((num_batches, self.noise_size))
+
+        # perform truncation
+        if truncation >= 0.0:
+            noise_batch = torch.clamp(noise_batch, -1. * truncation,
+                                      1. * truncation)
+
+        if self.num_classes == 0:
+            label_batch = None
+
+        elif isinstance(label, torch.Tensor):
+            if not use_outside_embedding:
+                assert label.ndim == 1, (
+                    'The label shoube be in shape of (n, )'
+                    f'but got {label.shape}.')
+            label_batch = label
+        elif callable(label):
+            label_generator = label
+            assert num_batches > 0
+            label_batch = label_generator((num_batches, ))
+        else:
+            assert num_batches > 0
+            label_batch = torch.randint(0, self.num_classes, (num_batches, ))
+
+        # dirty code for putting data on the right device
+        noise_batch = noise_batch.to(get_module_device(self))
+        if label_batch is not None:
+            label_batch = label_batch.to(get_module_device(self))
+            if not use_outside_embedding:
+                class_vector = self.shared_embedding(label_batch)
+            else:
+                class_vector = label_batch
+        else:
+            class_vector = None
+        # If 'split noise', concat class vector and noise chunk
+        if self.split_noise:
+            zs = torch.split(noise_batch, self.noise_chunk_size, dim=1)
+            z = zs[0]
+            if class_vector is not None:
+                ys = [torch.cat([class_vector, item], 1) for item in zs[1:]]
+            else:
+                ys = zs[1:]
+        else:
+            ys = [class_vector] * len(self.conv_blocks)
+            z = noise_batch
+
+        # First linear layer
+        x = self.noise2feat(z)
+        # Reshape
+        x = x.view(x.size(0), -1, self.input_scale, self.input_scale)
+
+        # Loop over blocks
+        counter = 0
+        for conv_block in self.conv_blocks:
+            if isinstance(conv_block, SelfAttentionBlock):
+                x = conv_block(x)
+            else:
+                x = conv_block(x, ys[counter])
+                counter += 1
+
+        # Apply batchnorm-relu-conv-tanh at output
+        out_img = torch.tanh(self.output_layer(x))
+
+        if self.rgb2bgr:
+            out_img = out_img[:, [2, 1, 0], ...]
+
+        if return_noise:
+            output = dict(
+                fake_img=out_img, noise_batch=noise_batch, label=label_batch)
+            return output
+
+        return out_img
+
+    def init_weights(self, pretrained=None, init_type='ortho'):
+        """Init weights for models.
+
+        Args:
+            pretrained (str | dict, optional): Path for the pretrained model or
+                dict containing information for pretained models whose
+                necessary key is 'ckpt_path'. Besides, you can also provide
+                'prefix' to load the generator part from the whole state dict.
+                Defaults to None.
+            init_type (str, optional): The name of an initialization method:
+                ortho | N02 | xavier. Defaults to 'ortho'.
+        """
+        if isinstance(pretrained, str):
+            logger = get_root_logger()
+            load_checkpoint(self, pretrained, strict=False, logger=logger)
+        elif isinstance(pretrained, dict):
+            ckpt_path = pretrained.get('ckpt_path', None)
+            assert ckpt_path is not None
+            prefix = pretrained.get('prefix', '')
+            map_location = pretrained.get('map_location', 'cpu')
+            strict = pretrained.get('strict', True)
+            state_dict = _load_checkpoint_with_prefix(prefix, ckpt_path,
+                                                      map_location)
+            self.load_state_dict(state_dict, strict=strict)
+            mmcv.print_log(f'Load pretrained model from {ckpt_path}', 'mmgen')
+        elif pretrained is None:
+            for m in self.modules():
+                if isinstance(m, (nn.Conv2d, nn.Linear, nn.Embedding)):
+                    if init_type == 'ortho':
+                        nn.init.orthogonal_(m.weight)
+                    elif init_type == 'N02':
+                        normal_init(m, 0.0, 0.02)
+                    elif init_type == 'xavier':
+                        xavier_init(m)
+                    else:
+                        raise NotImplementedError(
+                            f'{init_type} initialization \
+                            not supported now.')
+        else:
+            raise TypeError('pretrained must be a str or None but'
+                            f' got {type(pretrained)} instead.')
+
+
+@MODULES.register_module()
+class BigGANDiscriminator(nn.Module):
+    """BigGAN Discriminator. The implementation refers to
+    https://github.com/ajbrock/BigGAN-PyTorch/blob/master/BigGAN.py # noqa.
+
+    In BigGAN, we use a SAGAN-based architecture composing of an self-attention
+    block and number of convolutional residual blocks with spectral
+    normalization.
+
+    More details can be found in: Large Scale GAN Training for High Fidelity
+    Natural Image Synthesis (ICLR2019).
+
+    The design of the model structure is highly corresponding to the output
+    resolution. For the original BigGAN's generator, you can set ``output_scale``
+    as you need and use the default value of ``arch_cfg`` and ``blocks_cfg``.
+    If you want to customize the model, you can set the arguments in this way:
+
+    ``arch_cfg``: Config for the architecture of this generator. You can refer
+    the ``_default_arch_cfgs`` in the ``_get_default_arch_cfg`` function to see
+    the format of the ``arch_cfg``. Basically, you need to provide information
+    of each block such as the numbers of input and output channels, whether to
+    perform upsampling, etc.
+
+    ``blocks_cfg``: Config for the convolution block. You can replace the block
+    type to your registered customized block and adjust block params here.
+    However, you should notice that some params are shared among these blocks
+    like ``act_cfg``, ``with_spectral_norm``, ``sn_eps``, etc.
+
+    Args:
+        input_scale (int): The scale of the input image.
+        num_classes (int, optional): The number of conditional classes.
+            Defaults to 0.
+        in_channels (int, optional): The channel number of the input image.
+            Defaults to 3.
+        out_channels (int, optional): The channel number of the final output.
+            Defaults to 1.
+        base_channels (int, optional): The basic channel number of the
+            discriminator. The other layers contains channels based on this
+            number. Defaults to 96.
+        sn_eps (float, optional): Epsilon value for spectral normalization.
+            Defaults to 1e-6.
+        sn_style (str, optional): The style of spectral normalization.
+            If set to `ajbrock`, implementation by
+            ajbrock(https://github.com/ajbrock/BigGAN-PyTorch/blob/master/layers.py)
+            will be adopted.
+            If set to `torch`, implementation by `PyTorch` will be adopted.
+            Defaults to `ajbrock`.
+        init_type (str, optional): The name of an initialization method:
+            ortho | N02 | xavier. Defaults to 'ortho'.
+        act_cfg (dict, optional): Config for the activation layer.
+            Defaults to dict(type='ReLU').
+        with_spectral_norm (bool, optional): Whether to use spectral
+            normalization. Defaults to True.
+        blocks_cfg (dict, optional): Config for the convolution block.
+            Defaults to dict(type='BigGANDiscResBlock').
+        arch_cfg (dict, optional): Config for the architecture of this
+            discriminator. Defaults to None.
+        pretrained (str | dict, optional): Path for the pretrained model or
+            dict containing information for pretained models whose necessary
+            key is 'ckpt_path'. Besides, you can also provide 'prefix' to load
+            the generator part from the whole state dict. Defaults to None.
+    """
+
+    def __init__(self,
+                 input_scale,
+                 num_classes=0,
+                 in_channels=3,
+                 out_channels=1,
+                 base_channels=96,
+                 sn_eps=1e-6,
+                 sn_style='ajbrock',
+                 init_type='ortho',
+                 act_cfg=dict(type='ReLU'),
+                 with_spectral_norm=True,
+                 blocks_cfg=dict(type='BigGANDiscResBlock'),
+                 arch_cfg=None,
+                 pretrained=None):
+        super().__init__()
+        self.num_classes = num_classes
+        self.out_channels = out_channels
+        self.input_scale = input_scale
+        self.in_channels = in_channels
+        self.base_channels = base_channels
+        self.arch = arch_cfg if arch_cfg else self._get_default_arch_cfg(
+            self.input_scale, self.in_channels, self.base_channels)
+        self.blocks_cfg = deepcopy(blocks_cfg)
+        self.blocks_cfg.update(
+            dict(
+                act_cfg=act_cfg,
+                sn_eps=sn_eps,
+                sn_style=sn_style,
+                with_spectral_norm=with_spectral_norm))
+        self.sn_style = sn_style
+
+        self.conv_blocks = nn.ModuleList()
+        for index, out_ch in enumerate(self.arch['out_channels']):
+            # change args to adapt to current block
+            self.blocks_cfg.update(
+                dict(
+                    in_channels=self.arch['in_channels'][index],
+                    out_channels=out_ch,
+                    with_downsample=self.arch['downsample'][index],
+                    is_head_block=(index == 0)))
+            self.conv_blocks.append(build_module(self.blocks_cfg))
+            if self.arch['attention'][index]:
+                self.conv_blocks.append(
+                    SelfAttentionBlock(
+                        out_ch,
+                        with_spectral_norm=with_spectral_norm,
+                        sn_eps=sn_eps,
+                        sn_style=sn_style))
+
+        self.activate = build_activation_layer(act_cfg)
+
+        self.decision = nn.Linear(self.arch['out_channels'][-1], out_channels)
+        if with_spectral_norm:
+            if sn_style == 'torch':
+                self.decision = spectral_norm(self.decision, eps=sn_eps)
+            elif sn_style == 'ajbrock':
+                self.decision = SNLinear(
+                    self.arch['out_channels'][-1], out_channels, eps=sn_eps)
+            else:
+                raise NotImplementedError('sn style')
+
+        if self.num_classes > 0:
+            self.proj_y = nn.Embedding(self.num_classes,
+                                       self.arch['out_channels'][-1])
+            if with_spectral_norm:
+                if sn_style == 'torch':
+                    self.proj_y = spectral_norm(self.proj_y, eps=sn_eps)
+                elif sn_style == 'ajbrock':
+                    self.proj_y = SNEmbedding(
+                        self.num_classes,
+                        self.arch['out_channels'][-1],
+                        eps=sn_eps)
+                else:
+                    raise NotImplementedError('sn style')
+        self.init_weights(pretrained=pretrained, init_type=init_type)
+
+    def _get_default_arch_cfg(self, input_scale, in_channels, base_channels):
+        assert input_scale in [32, 64, 128, 256, 512]
+        _default_arch_cfgs = {
+            '32': {
+                'in_channels':
+                [in_channels] + [base_channels * item for item in [4, 4, 4]],
+                'out_channels':
+                [base_channels * item for item in [4, 4, 4, 4]],
+                'downsample': [True, True, False, False],
+                'resolution': [16, 8, 8, 8],
+                'attention': [False, False, False, False]
+            },
+            '64': {
+                'in_channels': [in_channels] +
+                [base_channels * item for item in [1, 2, 4, 8]],
+                'out_channels':
+                [base_channels * item for item in [1, 2, 4, 8, 16]],
+                'downsample': [True] * 4 + [False],
+                'resolution': [32, 16, 8, 4, 4],
+                'attention': [False, False, False, False, False]
+            },
+            '128': {
+                'in_channels': [in_channels] +
+                [base_channels * item for item in [1, 2, 4, 8, 16]],
+                'out_channels':
+                [base_channels * item for item in [1, 2, 4, 8, 16, 16]],
+                'downsample': [True] * 5 + [False],
+                'resolution': [64, 32, 16, 8, 4, 4],
+                'attention': [True, False, False, False, False, False]
+            },
+            '256': {
+                'in_channels': [in_channels] +
+                [base_channels * item for item in [1, 2, 4, 8, 8, 16]],
+                'out_channels':
+                [base_channels * item for item in [1, 2, 4, 8, 8, 16, 16]],
+                'downsample': [True] * 6 + [False],
+                'resolution': [128, 64, 32, 16, 8, 4, 4],
+                'attention': [False, True, False, False, False, False]
+            },
+            '512': {
+                'in_channels': [in_channels] +
+                [base_channels * item for item in [1, 1, 2, 4, 8, 8, 16]],
+                'out_channels':
+                [base_channels * item for item in [1, 1, 2, 4, 8, 8, 16, 16]],
+                'downsample': [True] * 7 + [False],
+                'resolution': [256, 128, 64, 32, 16, 8, 4, 4],
+                'attention': [False, False, False, True, False, False, False]
+            }
+        }
+
+        return _default_arch_cfgs[str(input_scale)]
+
+    def forward(self, x, label=None):
+        """Forward function.
+
+        Args:
+            x (torch.Tensor): Fake or real image tensor.
+            label (torch.Tensor | None): Label Tensor. Defaults to None.
+
+        Returns:
+            torch.Tensor: Prediction for the reality of the input image with
+                given label.
+        """
+        x0 = x
+        for conv_block in self.conv_blocks:
+            x0 = conv_block(x0)
+        x0 = self.activate(x0)
+        x0 = torch.sum(x0, dim=[2, 3])
+        out = self.decision(x0)
+
+        if self.num_classes > 0:
+            w_y = self.proj_y(label)
+            out = out + torch.sum(w_y * x0, dim=1, keepdim=True)
+        return out
+
+    def init_weights(self, pretrained=None, init_type='ortho'):
+        """Init weights for models.
+
+        Args:
+            pretrained (str | dict, optional): Path for the pretrained model or
+                dict containing information for pretained models whose
+                necessary key is 'ckpt_path'. Besides, you can also provide
+                'prefix' to load the generator part from the whole state dict.
+                Defaults to None.
+            init_type (str, optional): The name of an initialization method:
+                ortho | N02 | xavier. Defaults to 'ortho'.
+        """
+
+        if isinstance(pretrained, str):
+            logger = get_root_logger()
+            load_checkpoint(self, pretrained, strict=False, logger=logger)
+        elif isinstance(pretrained, dict):
+            ckpt_path = pretrained.get('ckpt_path', None)
+            assert ckpt_path is not None
+            prefix = pretrained.get('prefix', '')
+            map_location = pretrained.get('map_location', 'cpu')
+            strict = pretrained.get('strict', True)
+            state_dict = _load_checkpoint_with_prefix(prefix, ckpt_path,
+                                                      map_location)
+            self.load_state_dict(state_dict, strict=strict)
+            mmcv.print_log(f'Load pretrained model from {ckpt_path}', 'mmgen')
+        elif pretrained is None:
+            for m in self.modules():
+                if isinstance(m, (nn.Conv2d, nn.Linear, nn.Embedding)):
+                    if init_type == 'ortho':
+                        nn.init.orthogonal_(m.weight)
+                    elif init_type == 'N02':
+                        normal_init(m, 0.0, 0.02)
+                    elif init_type == 'xavier':
+                        xavier_init(m)
+                    else:
+                        raise NotImplementedError(
+                            f'{init_type} initialization \
+                            not supported now.')
+        else:
+            raise TypeError('pretrained must be a str or None but'
+                            f' got {type(pretrained)} instead.')
--- a/mmgen/models/architectures/biggan/generator_discriminator_deep.py
+++ b/mmgen/models/architectures/biggan/generator_discriminator_deep.py
+# Copyright (c) OpenMMLab. All rights reserved.
+from copy import deepcopy
+
+import mmcv
+import torch
+import torch.nn as nn
+from mmcv.cnn import normal_init, xavier_init
+from mmcv.cnn.bricks import build_activation_layer
+from mmcv.runner import load_checkpoint
+from mmcv.runner.checkpoint import _load_checkpoint_with_prefix
+from torch.nn.utils import spectral_norm
+
+from mmgen.models.builder import MODULES, build_module
+from mmgen.utils import get_root_logger
+from ..common import get_module_device
+from .biggan_snmodule import SNEmbedding, SNLinear
+from .modules import SelfAttentionBlock, SNConvModule
+
+
+@MODULES.register_module()
+class BigGANDeepGenerator(nn.Module):
+    """BigGAN-Deep Generator. The implementation refers to
+    https://github.com/ajbrock/BigGAN-PyTorch/blob/master/BigGANdeep.py # noqa.
+
+    In BigGAN, we use a SAGAN-based architecture composing of an
+    self-attention block and number of convolutional residual blocks
+    with spectral normalization. BigGAN-deep follow the same architecture.
+
+    The main difference between BigGAN and BigGAN-deep is that
+    BigGAN-deep uses deeper residual blocks to construct the whole
+    model.
+
+    More details can be found in: Large Scale GAN Training for High Fidelity
+    Natural Image Synthesis (ICLR2019).
+
+    The design of the model structure is highly corresponding to the output
+    resolution. For the original BigGAN-Deep's generator, you can set ``output_scale``
+    as you need and use the default value of ``arch_cfg`` and ``blocks_cfg``.
+    If you want to customize the model, you can set the arguments in this way:
+
+    ``arch_cfg``: Config for the architecture of this generator. You can refer
+    the ``_default_arch_cfgs`` in the ``_get_default_arch_cfg`` function to see
+    the format of the ``arch_cfg``. Basically, you need to provide information
+    of each block such as the numbers of input and output channels, whether to
+    perform upsampling, etc.
+
+    ``blocks_cfg``: Config for the convolution block. You can adjust block params
+    like ``channel_ratio`` here. You can also replace the block type
+    to your registered customized block. However, you should notice that some
+    params are shared among these blocks like ``act_cfg``, ``with_spectral_norm``,
+    ``sn_eps``, etc.
+
+    Args:
+        output_scale (int): Output scale for the generated image.
+        noise_size (int, optional): Size of the input noise vector. Defaults
+            to 120.
+        num_classes (int, optional): The number of conditional classes. If set
+            to 0, this model will be degraded to an unconditional model.
+            Defaults to 0.
+        out_channels (int, optional): Number of channels in output images.
+            Defaults to 3.
+        base_channels (int, optional): The basic channel number of the
+            generator. The other layers contains channels based on this number.
+            Defaults to 96.
+        block_depth (int, optional): The repeat times of Residual Blocks in
+            each level of architecture. Defaults to 2.
+        input_scale (int, optional): The scale of the input 2D feature map.
+            Defaults to 4.
+        with_shared_embedding (bool, optional): Whether to use shared
+            embedding. Defaults to True.
+        shared_dim (int, optional): The output channels of shared embedding.
+            Defaults to 128.
+        sn_eps (float, optional): Epsilon value for spectral normalization.
+            Defaults to 1e-6.
+        sn_style (str, optional): The style of spectral normalization.
+            If set to `ajbrock`, implementation by
+            ajbrock(https://github.com/ajbrock/BigGAN-PyTorch/blob/master/layers.py)
+            will be adopted.
+            If set to `torch`, implementation by `PyTorch` will be adopted.
+            Defaults to `ajbrock`.
+        init_type (str, optional): The name of an initialization method:
+            ortho | N02 | xavier. Defaults to 'ortho'.
+        concat_noise (bool, optional): Whether to concat input noise vector
+            with class vector. Defaults to True.
+        act_cfg (dict, optional): Config for the activation layer. Defaults to
+            dict(type='ReLU').
+        upsample_cfg (dict, optional): Config for the upsampling operation.
+            Defaults to dict(type='nearest', scale_factor=2).
+        with_spectral_norm (bool, optional): Whether to use spectral
+            normalization. Defaults to True.
+        auto_sync_bn (bool, optional): Whether to use synchronized batch
+            normalization. Defaults to True.
+        blocks_cfg (dict, optional): Config for the convolution block. Defaults
+            to dict(type='BigGANGenResBlock').
+        arch_cfg (dict, optional): Config for the architecture of this
+            generator. Defaults to None.
+        out_norm_cfg (dict, optional): Config for the norm of output layer.
+            Defaults to dict(type='BN').
+        pretrained (str | dict, optional): Path for the pretrained model or
+            dict containing information for pretained models whose necessary
+            key is 'ckpt_path'. Besides, you can also provide 'prefix' to load
+            the generator part from the whole state dict. Defaults to None.
+        rgb2bgr (bool, optional): Whether to reformat the output channels
+                with order `bgr`. We provide several pre-trained BigGAN-Deep
+                weights whose output channels order is `rgb`. You can set
+                this argument to True to use the weights.
+    """
+
+    def __init__(self,
+                 output_scale,
+                 noise_size=120,
+                 num_classes=0,
+                 out_channels=3,
+                 base_channels=96,
+                 block_depth=2,
+                 input_scale=4,
+                 with_shared_embedding=True,
+                 shared_dim=128,
+                 sn_eps=1e-6,
+                 sn_style='ajbrock',
+                 init_type='ortho',
+                 concat_noise=True,
+                 act_cfg=dict(type='ReLU', inplace=False),
+                 upsample_cfg=dict(type='nearest', scale_factor=2),
+                 with_spectral_norm=True,
+                 auto_sync_bn=True,
+                 blocks_cfg=dict(type='BigGANDeepGenResBlock'),
+                 arch_cfg=None,
+                 out_norm_cfg=dict(type='BN'),
+                 pretrained=None,
+                 rgb2bgr=False):
+        super().__init__()
+        self.noise_size = noise_size
+        self.num_classes = num_classes
+        self.shared_dim = shared_dim
+        self.with_shared_embedding = with_shared_embedding
+        self.output_scale = output_scale
+        self.arch = arch_cfg if arch_cfg else self._get_default_arch_cfg(
+            self.output_scale, base_channels)
+        self.input_scale = input_scale
+        self.concat_noise = concat_noise
+        self.blocks_cfg = deepcopy(blocks_cfg)
+        self.upsample_cfg = deepcopy(upsample_cfg)
+        self.block_depth = block_depth
+        self.rgb2bgr = rgb2bgr
+        self.sn_style = sn_style
+
+        # Validity Check
+        # If 'num_classes' equals to zero, we shall set 'with_shared_embedding'
+        # to False.
+        if num_classes == 0:
+            assert not self.with_shared_embedding
+            assert not self.concat_noise
+        elif not self.with_shared_embedding:
+            # If not `with_shared_embedding`, we will use `nn.Embedding` to
+            # replace the original `Linear` layer in conditional BN.
+            # Meanwhile, we do not adopt split noises.
+            assert not self.concat_noise
+
+        # First linear layer
+        if self.concat_noise:
+            self.noise2feat = nn.Linear(
+                self.noise_size + self.shared_dim,
+                self.arch['in_channels'][0] * (self.input_scale**2))
+        else:
+            self.noise2feat = nn.Linear(
+                self.noise_size,
+                self.arch['in_channels'][0] * (self.input_scale**2))
+
+        if with_spectral_norm:
+            if sn_style == 'torch':
+                self.noise2feat = spectral_norm(self.noise2feat, eps=sn_eps)
+            elif sn_style == 'ajbrock':
+                self.noise2feat = SNLinear(
+                    self.noise_size +
+                    (self.shared_dim if self.concat_noise else 0),
+                    self.arch['in_channels'][0] * (self.input_scale**2),
+                    eps=sn_eps)
+            else:
+                NotImplementedError(f'{sn_style} style SN is not supported')
+
+        # If using 'shared_embedding', we will get an unified embedding of
+        # label for all blocks. If not, we just pass the label to each
+        # block.
+        if with_shared_embedding:
+            self.shared_embedding = nn.Embedding(num_classes, shared_dim)
+        else:
+            self.shared_embedding = nn.Identity()
+
+        if num_classes > 0:
+            if self.concat_noise:
+                self.dim_after_concat = (
+                    self.shared_dim + self.noise_size
+                    if self.with_shared_embedding else self.num_classes)
+            else:
+                self.dim_after_concat = (
+                    self.shared_dim
+                    if self.with_shared_embedding else self.num_classes)
+        else:
+            self.dim_after_concat = 0
+        self.blocks_cfg.update(
+            dict(
+                dim_after_concat=self.dim_after_concat,
+                act_cfg=act_cfg,
+                sn_eps=sn_eps,
+                sn_style=sn_style,
+                input_is_label=(num_classes > 0)
+                and (not with_shared_embedding),
+                with_spectral_norm=with_spectral_norm,
+                auto_sync_bn=auto_sync_bn))
+
+        self.conv_blocks = nn.ModuleList()
+        for index, out_ch in enumerate(self.arch['out_channels']):
+            for depth in range(self.block_depth):
+                # change args to adapt to current block
+                block_cfg_ = deepcopy(self.blocks_cfg)
+                block_cfg_.update(
+                    dict(
+                        in_channels=self.arch['in_channels'][index],
+                        out_channels=out_ch if depth == (self.block_depth - 1)
+                        else self.arch['in_channels'][index],
+                        upsample_cfg=self.upsample_cfg
+                        if self.arch['upsample'][index]
+                        and depth == (self.block_depth - 1) else None))
+                self.conv_blocks.append(build_module(block_cfg_))
+
+            if self.arch['attention'][index]:
+                self.conv_blocks.append(
+                    SelfAttentionBlock(
+                        out_ch,
+                        with_spectral_norm=with_spectral_norm,
+                        sn_eps=sn_eps,
+                        sn_style=sn_style))
+
+        self.output_layer = SNConvModule(
+            self.arch['out_channels'][-1],
+            out_channels,
+            kernel_size=3,
+            padding=1,
+            with_spectral_norm=with_spectral_norm,
+            spectral_norm_cfg=dict(eps=sn_eps, sn_style=sn_style),
+            act_cfg=act_cfg,
+            norm_cfg=out_norm_cfg,
+            bias=True,
+            order=('norm', 'act', 'conv'))
+
+        self.init_weights(pretrained=pretrained, init_type=init_type)
+
+    def _get_default_arch_cfg(self, output_scale, base_channels):
+        assert output_scale in [32, 64, 128, 256, 512]
+        _default_arch_cfgs = {
+            '32': {
+                'in_channels': [base_channels * item for item in [4, 4, 4]],
+                'out_channels': [base_channels * item for item in [4, 4, 4]],
+                'upsample': [True] * 3,
+                'resolution': [8, 16, 32],
+                'attention': [False, False, False]
+            },
+            '64': {
+                'in_channels':
+                [base_channels * item for item in [16, 16, 8, 4]],
+                'out_channels':
+                [base_channels * item for item in [16, 8, 4, 2]],
+                'upsample': [True] * 4,
+                'resolution': [8, 16, 32, 64],
+                'attention': [False, False, False, True]
+            },
+            '128': {
+                'in_channels':
+                [base_channels * item for item in [16, 16, 8, 4, 2]],
+                'out_channels':
+                [base_channels * item for item in [16, 8, 4, 2, 1]],
+                'upsample': [True] * 5,
+                'resolution': [8, 16, 32, 64, 128],
+                'attention': [False, False, False, True, False]
+            },
+            '256': {
+                'in_channels':
+                [base_channels * item for item in [16, 16, 8, 8, 4, 2]],
+                'out_channels':
+                [base_channels * item for item in [16, 8, 8, 4, 2, 1]],
+                'upsample': [True] * 6,
+                'resolution': [8, 16, 32, 64, 128, 256],
+                'attention': [False, False, False, True, False, False]
+            },
+            '512': {
+                'in_channels':
+                [base_channels * item for item in [16, 16, 8, 8, 4, 2, 1]],
+                'out_channels':
+                [base_channels * item for item in [16, 8, 8, 4, 2, 1, 1]],
+                'upsample': [True] * 7,
+                'resolution': [8, 16, 32, 64, 128, 256, 512],
+                'attention': [False, False, False, True, False, False, False]
+            }
+        }
+
+        return _default_arch_cfgs[str(output_scale)]
+
+    def forward(self,
+                noise,
+                label=None,
+                num_batches=0,
+                return_noise=False,
+                truncation=-1.0,
+                use_outside_embedding=False):
+        """Forward function.
+
+        Args:
+            noise (torch.Tensor | callable | None): You can directly give a
+                batch of noise through a ``torch.Tensor`` or offer a callable
+                function to sample a batch of noise data. Otherwise, the
+                ``None`` indicates to use the default noise sampler.
+            label (torch.Tensor | callable | None): You can directly give a
+                batch of label through a ``torch.Tensor`` or offer a callable
+                function to sample a batch of label data. Otherwise, the
+                ``None`` indicates to use the default label sampler.
+                Defaults to None.
+            num_batches (int, optional): The number of batch size.
+                Defaults to 0.
+            return_noise (bool, optional): If True, ``noise_batch`` and
+                ``label`` will be returned in a dict with ``fake_img``.
+                Defaults to False.
+            truncation (float, optional): Truncation factor. Give value not
+                less than 0., the truncation trick will be adopted.
+                Otherwise, the truncation trick will not be adopted.
+                Defaults to -1..
+            use_outside_embedding (bool, optional): Whether to use outside
+                embedding or use `shared_embedding`. Set to `True` if
+                embedding has already be performed outside this function.
+                Default to False.
+
+        Returns:
+            torch.Tensor | dict: If not ``return_noise``, only the output image
+                will be returned. Otherwise, a dict contains ``fake_img``,
+                ``noise_batch`` and ``label`` will be returned.
+        """
+        if isinstance(noise, torch.Tensor):
+            assert noise.shape[1] == self.noise_size
+            assert noise.ndim == 2, ('The noise should be in shape of (n, c), '
+                                     f'but got {noise.shape}')
+            noise_batch = noise
+        # receive a noise generator and sample noise.
+        elif callable(noise):
+            noise_generator = noise
+            assert num_batches > 0
+            noise_batch = noise_generator((num_batches, self.noise_size))
+        # otherwise, we will adopt default noise sampler.
+        else:
+            assert num_batches > 0
+            noise_batch = torch.randn((num_batches, self.noise_size))
+        # perform truncation
+        if truncation >= 0.0:
+            noise_batch = torch.clamp(noise_batch, -1. * truncation,
+                                      1. * truncation)
+
+        if self.num_classes == 0:
+            label_batch = None
+
+        elif isinstance(label, torch.Tensor):
+            if not use_outside_embedding:
+                assert label.ndim == 1, (
+                    'The label shoube be in shape of (n, )'
+                    f'but got {label.shape}.')
+            label_batch = label
+        elif callable(label):
+            label_generator = label
+            assert num_batches > 0
+            label_batch = label_generator((num_batches, ))
+        else:
+            assert num_batches > 0
+            label_batch = torch.randint(0, self.num_classes, (num_batches, ))
+
+        # dirty code for putting data on the right device
+        noise_batch = noise_batch.to(get_module_device(self))
+        if label_batch is not None:
+            label_batch = label_batch.to(get_module_device(self))
+            if not use_outside_embedding:
+                class_vector = self.shared_embedding(label_batch)
+            else:
+                class_vector = label_batch
+        else:
+            class_vector = None
+
+        # If 'concat noise', concat class vector and noise batch
+        if self.concat_noise:
+            if class_vector is not None:
+                z = torch.cat([noise_batch, class_vector], dim=1)
+                y = z
+        elif self.num_classes > 0:
+            z = noise_batch
+            y = class_vector
+        else:
+            z = noise_batch
+            y = None
+
+        # First linear layer
+        x = self.noise2feat(z)
+        # Reshape
+        # We use this conversion step to allow for loading TF weights
+        # TF convention on shape is [batch, height, width, channels]
+        # PT convention on shape is [batch, channels, height, width]
+        x = x.view(x.size(0), self.input_scale, self.input_scale, -1)
+        x = x.permute(0, 3, 1, 2).contiguous()
+        # Loop over blocks
+        for idx, conv_block in enumerate(self.conv_blocks):
+            if isinstance(conv_block, SelfAttentionBlock):
+                x = conv_block(x)
+            else:
+                x = conv_block(x, y)
+        # Apply batchnorm-relu-conv-tanh at output
+        x = self.output_layer(x)
+        out_img = torch.tanh(x)
+
+        if self.rgb2bgr:
+            out_img = out_img[:, [2, 1, 0], ...]
+
+        if return_noise:
+            output = dict(
+                fake_img=out_img, noise_batch=noise_batch, label=label_batch)
+            return output
+
+        return out_img
+
+    def init_weights(self, pretrained=None, init_type='ortho'):
+        """Init weights for models.
+
+        Args:
+            pretrained (str | dict, optional): Path for the pretrained model or
+                dict containing information for pretained models whose
+                necessary key is 'ckpt_path'. Besides, you can also provide
+                'prefix' to load the generator part from the whole state dict.
+                Defaults to None.
+            init_type (str, optional): The name of an initialization method:
+                ortho | N02 | xavier. Defaults to 'ortho'.
+        """
+        if isinstance(pretrained, str):
+            logger = get_root_logger()
+            load_checkpoint(self, pretrained, strict=False, logger=logger)
+        elif isinstance(pretrained, dict):
+            ckpt_path = pretrained.get('ckpt_path', None)
+            assert ckpt_path is not None
+            prefix = pretrained.get('prefix', '')
+            map_location = pretrained.get('map_location', 'cpu')
+            strict = pretrained.get('strict', True)
+            state_dict = _load_checkpoint_with_prefix(prefix, ckpt_path,
+                                                      map_location)
+            self.load_state_dict(state_dict, strict=strict)
+            mmcv.print_log(f'Load pretrained model from {ckpt_path}', 'mmgen')
+        elif pretrained is None:
+            for m in self.modules():
+                if isinstance(m, (nn.Conv2d, nn.Linear, nn.Embedding)):
+                    if init_type == 'ortho':
+                        nn.init.orthogonal_(m.weight)
+                    elif init_type == 'N02':
+                        normal_init(m, 0.0, 0.02)
+                    elif init_type == 'xavier':
+                        xavier_init(m)
+                    else:
+                        raise NotImplementedError(
+                            f'{init_type} initialization \
+                            not supported now.')
+        else:
+            raise TypeError('pretrained must be a str or None but'
+                            f' got {type(pretrained)} instead.')
+
+
+@MODULES.register_module()
+class BigGANDeepDiscriminator(nn.Module):
+    """BigGAN-Deep Discriminator. The implementation refers to
+    https://github.com/ajbrock/BigGAN-PyTorch/blob/master/BigGANdeep.py # noqa.
+
+    The overall structure of BigGAN's discriminator is the same with
+    the projection discriminator.
+
+    The main difference between BigGAN and BigGAN-deep is that
+    BigGAN-deep use more deeper residual blocks to construct the whole
+    model.
+
+    More details can be found in: Large Scale GAN Training for High Fidelity
+    Natural Image Synthesis (ICLR2019).
+
+    The design of the model structure is highly corresponding to the output
+    resolution. For origin BigGAN-Deep's generator, you can set ``output_scale``
+    as you need and use the default value of ``arch_cfg`` and ``blocks_cfg``.
+    If you want to customize the model, you can set the arguments in this way:
+
+    ``arch_cfg``: Config for the architecture of this generator. You can refer
+    the ``_default_arch_cfgs`` in the ``_get_default_arch_cfg`` function to see
+    the format of the ``arch_cfg``. Basically, you need to provide information
+    of each block such as the numbers of input and output channels, whether to
+    perform upsampling etc.
+
+    ``blocks_cfg``: Config for the convolution block. You can adjust block params
+    like ``channel_ratio`` here. You can also replace the block type
+    to your registered customized block. However, you should notice that some
+    params are shared between these blocks like ``act_cfg``, ``with_spectral_norm``,
+    ``sn_eps`` etc.
+
+    Args:
+        input_scale (int): The scale of the input image.
+        num_classes (int, optional): The number of conditional classes.
+            Defaults to 0.
+        in_channels (int, optional): The channel number of the input image.
+            Defaults to 3.
+        out_channels (int, optional): The channel number of the final output.
+            Defaults to 1.
+        base_channels (int, optional): The basic channel number of the
+            discriminator. The other layers contains channels based on this
+            number. Defaults to 96.
+        block_depth (int, optional): The repeat times of Residual Blocks in
+            each level of architecture. Defaults to 2.
+        sn_eps (float, optional): Epsilon value for spectral normalization.
+            Defaults to 1e-6.
+        sn_style (str, optional): The style of spectral normalization.
+            If set to `ajbrock`, implementation by
+            ajbrock(https://github.com/ajbrock/BigGAN-PyTorch/blob/master/layers.py)
+            will be adopted.
+            If set to `torch`, implementation by `PyTorch` will be adopted.
+            Defaults to `ajbrock`.
+        init_type (str, optional): The name of an initialization method:
+            ortho | N02 | xavier. Defaults to 'ortho'.
+        act_cfg (dict, optional): Config for the activation layer.
+            Defaults to dict(type='ReLU').
+        with_spectral_norm (bool, optional): Whether to use spectral
+            normalization. Defaults to True.
+        blocks_cfg (dict, optional): Config for the convolution block.
+            Defaults to dict(type='BigGANDiscResBlock').
+        arch_cfg (dict, optional): Config for the architecture of this
+            discriminator. Defaults to None.
+        pretrained (str | dict, optional): Path for the pretrained model or
+            dict containing information for pretained models whose necessary
+            key is 'ckpt_path'. Besides, you can also provide 'prefix' to load
+            the generator part from the whole state dict. Defaults to None.
+    """
+
+    def __init__(self,
+                 input_scale,
+                 num_classes=0,
+                 in_channels=3,
+                 out_channels=1,
+                 base_channels=96,
+                 block_depth=2,
+                 sn_eps=1e-6,
+                 sn_style='ajbrock',
+                 init_type='ortho',
+                 act_cfg=dict(type='ReLU', inplace=False),
+                 with_spectral_norm=True,
+                 blocks_cfg=dict(type='BigGANDeepDiscResBlock'),
+                 arch_cfg=None,
+                 pretrained=None):
+        super().__init__()
+        self.num_classes = num_classes
+        self.out_channels = out_channels
+        self.input_scale = input_scale
+        self.in_channels = in_channels
+        self.base_channels = base_channels
+        self.block_depth = block_depth
+        self.arch = arch_cfg if arch_cfg else self._get_default_arch_cfg(
+            self.input_scale, self.base_channels)
+        self.blocks_cfg = deepcopy(blocks_cfg)
+        self.blocks_cfg.update(
+            dict(
+                act_cfg=act_cfg,
+                sn_eps=sn_eps,
+                sn_style=sn_style,
+                with_spectral_norm=with_spectral_norm))
+
+        self.input_conv = SNConvModule(
+            3,
+            self.arch['in_channels'][0],
+            kernel_size=3,
+            padding=1,
+            with_spectral_norm=with_spectral_norm,
+            spectral_norm_cfg=dict(eps=sn_eps, sn_style=sn_style),
+            act_cfg=None)
+
+        self.conv_blocks = nn.ModuleList()
+        for index, out_ch in enumerate(self.arch['out_channels']):
+            for depth in range(self.block_depth):
+                # change args to adapt to current block
+                block_cfg_ = deepcopy(self.blocks_cfg)
+                block_cfg_.update(
+                    dict(
+                        in_channels=self.arch['in_channels'][index]
+                        if depth == 0 else out_ch,
+                        out_channels=out_ch,
+                        with_downsample=self.arch['downsample'][index]
+                        and depth == 0))
+                self.conv_blocks.append(build_module(block_cfg_))
+            if self.arch['attention'][index]:
+                self.conv_blocks.append(
+                    SelfAttentionBlock(
+                        out_ch,
+                        with_spectral_norm=with_spectral_norm,
+                        sn_eps=sn_eps,
+                        sn_style=sn_style))
+
+        self.activate = build_activation_layer(act_cfg)
+
+        self.decision = nn.Linear(self.arch['out_channels'][-1], out_channels)
+        if with_spectral_norm:
+            if sn_style == 'torch':
+                self.decision = spectral_norm(self.decision, eps=sn_eps)
+            elif sn_style == 'ajbrock':
+                self.decision = SNLinear(
+                    self.arch['out_channels'][-1], out_channels, eps=sn_eps)
+            else:
+                raise NotImplementedError(
+                    f'{sn_style} style SN is not supported yet')
+
+        if self.num_classes > 0:
+            self.proj_y = nn.Embedding(self.num_classes,
+                                       self.arch['out_channels'][-1])
+            if with_spectral_norm:
+                if sn_style == 'torch':
+                    self.proj_y = spectral_norm(self.proj_y, eps=sn_eps)
+                elif sn_style == 'ajbrock':
+                    self.proj_y = SNEmbedding(
+                        self.num_classes,
+                        self.arch['out_channels'][-1],
+                        eps=sn_eps)
+                else:
+                    raise NotImplementedError(
+                        f'{sn_style} style SN is not supported yet')
+
+        self.init_weights(pretrained=pretrained, init_type=init_type)
+
+    def _get_default_arch_cfg(self, input_scale, base_channels):
+        assert input_scale in [32, 64, 128, 256, 512]
+        _default_arch_cfgs = {
+            '32': {
+                'in_channels': [base_channels * item for item in [4, 4, 4]],
+                'out_channels': [base_channels * item for item in [4, 4, 4]],
+                'downsample': [True, True, False, False],
+                'resolution': [16, 8, 8, 8],
+                'attention': [False, False, False, False]
+            },
+            '64': {
+                'in_channels': [base_channels * item for item in [1, 2, 4, 8]],
+                'out_channels':
+                [base_channels * item for item in [2, 4, 8, 16]],
+                'downsample': [True] * 4 + [False],
+                'resolution': [32, 16, 8, 4, 4],
+                'attention': [False, False, False, False, False]
+            },
+            '128': {
+                'in_channels':
+                [base_channels * item for item in [1, 2, 4, 8, 16]],
+                'out_channels':
+                [base_channels * item for item in [2, 4, 8, 16, 16]],
+                'downsample': [True] * 5 + [False],
+                'resolution': [64, 32, 16, 8, 4, 4],
+                'attention': [True, False, False, False, False, False]
+            },
+            '256': {
+                'in_channels':
+                [base_channels * item for item in [1, 2, 4, 8, 8, 16]],
+                'out_channels':
+                [base_channels * item for item in [2, 4, 8, 8, 16, 16]],
+                'downsample': [True] * 6 + [False],
+                'resolution': [128, 64, 32, 16, 8, 4, 4],
+                'attention': [False, True, False, False, False, False]
+            },
+            '512': {
+                'in_channels':
+                [base_channels * item for item in [1, 1, 2, 4, 8, 8, 16]],
+                'out_channels':
+                [base_channels * item for item in [1, 2, 4, 8, 8, 16, 16]],
+                'downsample': [True] * 7 + [False],
+                'resolution': [256, 128, 64, 32, 16, 8, 4, 4],
+                'attention': [False, False, False, True, False, False, False]
+            }
+        }
+
+        return _default_arch_cfgs[str(input_scale)]
+
+    def forward(self, x, label=None):
+        """Forward function.
+
+        Args:
+            x (torch.Tensor): Fake or real image tensor.
+            label (torch.Tensor | None): Label Tensor. Defaults to None.
+
+        Returns:
+            torch.Tensor: Prediction for the reality of the input image with
+                given label.
+        """
+        x0 = self.input_conv(x)
+        for conv_block in self.conv_blocks:
+            x0 = conv_block(x0)
+        x0 = self.activate(x0)
+        x0 = torch.sum(x0, dim=[2, 3])
+        out = self.decision(x0)
+
+        if self.num_classes > 0:
+            w_y = self.proj_y(label)
+            out = out + torch.sum(w_y * x0, dim=1, keepdim=True)
+        return out
+
+    def init_weights(self, pretrained=None, init_type='ortho'):
+        """Init weights for models.
+
+        Args:
+            pretrained (str | dict, optional): Path for the pretrained model or
+                dict containing information for pretained models whose
+                necessary key is 'ckpt_path'. Besides, you can also provide
+                'prefix' to load the generator part from the whole state dict.
+                Defaults to None.
+            init_type (str, optional): The name of an initialization method:
+                ortho | N02 | xavier. Defaults to 'ortho'.
+        """
+
+        if isinstance(pretrained, str):
+            logger = get_root_logger()
+            load_checkpoint(self, pretrained, strict=False, logger=logger)
+        elif isinstance(pretrained, dict):
+            ckpt_path = pretrained.get('ckpt_path', None)
+            assert ckpt_path is not None
+            prefix = pretrained.get('prefix', '')
+            map_location = pretrained.get('map_location', 'cpu')
+            strict = pretrained.get('strict', True)
+            state_dict = _load_checkpoint_with_prefix(prefix, ckpt_path,
+                                                      map_location)
+            self.load_state_dict(state_dict, strict=strict)
+            mmcv.print_log(f'Load pretrained model from {ckpt_path}', 'mmgen')
+        elif pretrained is None:
+            for m in self.modules():
+                if isinstance(m, (nn.Conv2d, nn.Linear, nn.Embedding)):
+                    if init_type == 'ortho':
+                        nn.init.orthogonal_(m.weight)
+                    elif init_type == 'N02':
+                        normal_init(m, 0.0, 0.02)
+                    elif init_type == 'xavier':
+                        xavier_init(m)
+                    else:
+                        raise NotImplementedError(
+                            f'{init_type} initialization \
+                            not supported now.')
+        else:
+            raise TypeError('pretrained must be a str or None but'
+                            f' got {type(pretrained)} instead.')
--- a/mmgen/models/architectures/biggan/modules.py
+++ b/mmgen/models/architectures/biggan/modules.py
+# Copyright (c) OpenMMLab. All rights reserved.
+from copy import deepcopy
+
+import torch
+import torch.distributed as dist
+import torch.nn as nn
+import torch.nn.functional as F
+from mmcv.cnn import ConvModule
+from mmcv.cnn.bricks import build_activation_layer, build_upsample_layer
+from torch.nn import Parameter
+from torch.nn.modules.batchnorm import SyncBatchNorm
+from torch.nn.utils import spectral_norm
+
+from mmgen.models.builder import MODULES
+from .biggan_snmodule import SNConv2d, SNLinear
+
+
+class SNConvModule(ConvModule):
+    """Spectral Normalization ConvModule.
+
+    In this module, we inherit default ``mmcv.cnn.ConvModule`` and adopt
+    spectral normalization. The spectral normalization is proposed in:
+    Spectral Normalization for Generative Adversarial Networks.
+
+    Args:
+        with_spectral_norm (bool, optional): Whether to use Spectral
+            Normalization. Defaults to False.
+        spectral_norm_cfg (dict, optional): Config for Spectral Normalization.
+            Defaults to None.
+    """
+
+    def __init__(self,
+                 *args,
+                 with_spectral_norm=False,
+                 spectral_norm_cfg=None,
+                 **kwargs):
+        super().__init__(*args, with_spectral_norm=False, **kwargs)
+        self.with_spectral_norm = with_spectral_norm
+        self.spectral_norm_cfg = deepcopy(
+            spectral_norm_cfg) if spectral_norm_cfg else dict()
+
+        self.sn_eps = self.spectral_norm_cfg.get('eps', 1e-6)
+        self.sn_style = self.spectral_norm_cfg.get('sn_style', 'torch')
+
+        if self.with_spectral_norm:
+            if self.sn_style == 'torch':
+                self.conv = spectral_norm(self.conv, eps=self.sn_eps)
+            elif self.sn_style == 'ajbrock':
+                self.snconv_kwargs = deepcopy(kwargs) if kwargs else dict()
+                if 'act_cfg' in self.snconv_kwargs.keys():
+                    self.snconv_kwargs.pop('act_cfg')
+                if 'norm_cfg' in self.snconv_kwargs.keys():
+                    self.snconv_kwargs.pop('norm_cfg')
+                if 'order' in self.snconv_kwargs.keys():
+                    self.snconv_kwargs.pop('order')
+                self.conv = SNConv2d(
+                    *args, **self.snconv_kwargs, eps=self.sn_eps)
+            else:
+                raise NotImplementedError(
+                    f'{self.sn_style} style spectral Norm is not supported yet'
+                )
+
+
+@MODULES.register_module()
+class BigGANGenResBlock(nn.Module):
+    """Residual block used in BigGAN's generator.
+
+    Args:
+        in_channels (int): The channel number of the input feature map.
+        out_channels (int): The channel number of the output feature map.
+        dim_after_concat (int): The channel number of the noise concatenated
+            with the class vector.
+        act_cfg (dict, optional): Config for the activation layer. Defaults to
+            dict(type='ReLU').
+        upsample_cfg (dict, optional): Config for the upsampling operation.
+            Defaults to dict(type='nearest', scale_factor=2).
+        sn_eps (float, optional): Epsilon value for spectral normalization.
+            Defaults to 1e-6.
+        sn_style (str, optional): The style of spectral normalization.
+            If set to `ajbrock`, implementation by
+            ajbrock(https://github.com/ajbrock/BigGAN-PyTorch/blob/master/layers.py)
+            will be adopted.
+            If set to `torch`, implementation by `PyTorch` will be adopted.
+            Defaults to `ajbrock`.
+        with_spectral_norm (bool, optional): Whether to use spectral
+            normalization in this block. Defaults to True.
+        input_is_label (bool, optional): Whether the input of BNs' linear layer
+            is raw label instead of class vector. Defaults to False.
+        auto_sync_bn (bool, optional): Whether to use synchronized batch
+            normalization. Defaults to True.
+    """
+
+    def __init__(self,
+                 in_channels,
+                 out_channels,
+                 dim_after_concat,
+                 act_cfg=dict(type='ReLU'),
+                 upsample_cfg=dict(type='nearest', scale_factor=2),
+                 sn_eps=1e-6,
+                 sn_style='ajbrock',
+                 with_spectral_norm=True,
+                 input_is_label=False,
+                 auto_sync_bn=True):
+        super().__init__()
+        self.activation = build_activation_layer(act_cfg)
+        self.upsample_cfg = deepcopy(upsample_cfg)
+        self.with_upsample = upsample_cfg is not None
+        if self.with_upsample:
+            self.upsample_layer = build_upsample_layer(self.upsample_cfg)
+        self.learnable_sc = in_channels != out_channels or self.with_upsample
+        if self.learnable_sc:
+            self.shortcut = SNConvModule(
+                in_channels=in_channels,
+                out_channels=out_channels,
+                kernel_size=1,
+                stride=1,
+                padding=0,
+                act_cfg=None,
+                with_spectral_norm=with_spectral_norm,
+                spectral_norm_cfg=dict(eps=sn_eps, sn_style=sn_style))
+        # Here in_channels of BigGANGenResBlock equal to num_features of
+        # BigGANConditionBN
+        self.bn1 = BigGANConditionBN(
+            in_channels,
+            dim_after_concat,
+            sn_eps=sn_eps,
+            sn_style=sn_style,
+            input_is_label=input_is_label,
+            with_spectral_norm=with_spectral_norm,
+            auto_sync_bn=auto_sync_bn)
+        # Here out_channels of BigGANGenResBlock equal to num_features of
+        # BigGANConditionBN
+        self.bn2 = BigGANConditionBN(
+            out_channels,
+            dim_after_concat,
+            sn_eps=sn_eps,
+            sn_style=sn_style,
+            input_is_label=input_is_label,
+            with_spectral_norm=with_spectral_norm,
+            auto_sync_bn=auto_sync_bn)
+
+        self.conv1 = SNConvModule(
+            in_channels=in_channels,
+            out_channels=out_channels,
+            kernel_size=3,
+            stride=1,
+            padding=1,
+            act_cfg=None,
+            with_spectral_norm=with_spectral_norm,
+            spectral_norm_cfg=dict(eps=sn_eps, sn_style=sn_style))
+
+        self.conv2 = SNConvModule(
+            in_channels=out_channels,
+            out_channels=out_channels,
+            kernel_size=3,
+            stride=1,
+            padding=1,
+            act_cfg=None,
+            with_spectral_norm=with_spectral_norm,
+            spectral_norm_cfg=dict(eps=sn_eps, sn_style=sn_style))
+
+    def forward(self, x, y):
+        """Forward function.
+
+        Args:
+            x (torch.Tensor): Input feature map tensor.
+            y (torch.Tensor): Label tensor or class embedding concatenated with
+                noise tensor.
+
+        Returns:
+            torch.Tensor: Output feature map tensor.
+        """
+        x0 = self.bn1(x, y)
+        x0 = self.activation(x0)
+        if self.with_upsample:
+            x0 = self.upsample_layer(x0)
+            x = self.upsample_layer(x)
+        x0 = self.conv1(x0)
+        x0 = self.bn2(x0, y)
+        x0 = self.activation(x0)
+        x0 = self.conv2(x0)
+        if self.learnable_sc:
+            x = self.shortcut(x)
+        return x0 + x
+
+
+@MODULES.register_module()
+class BigGANConditionBN(nn.Module):
+    """Conditional Batch Normalization used in BigGAN.
+
+    Args:
+        num_features (int): The channel number of the input feature map tensor.
+        linear_input_channels (int): The channel number of the linear layers'
+            input tensor.
+        bn_eps (float, optional): Epsilon value for batch normalization.
+            Defaults to 1e-5.
+        sn_eps (float, optional): Epsilon value for spectral normalization.
+            Defaults to 1e-6.
+        sn_style (str, optional): The style of spectral normalization.
+            If set to `ajbrock`, implementation by
+            ajbrock(https://github.com/ajbrock/BigGAN-PyTorch/blob/master/layers.py)
+            will be adopted.
+            If set to `torch`, implementation by `PyTorch` will be adopted.
+            Defaults to `ajbrock`.
+        momentum (float, optional): The value used for the running_mean and
+            running_var computation. Defaults to 0.1.
+        input_is_label (bool, optional): Whether the input of BNs' linear layer
+            is raw label instead of class vector. Defaults to False.
+        with_spectral_norm (bool, optional): Whether to use spectral
+            normalization. Defaults to True.
+        auto_sync_bn (bool, optional): Whether to use synchronized batch
+            normalization. Defaults to True.
+    """
+
+    def __init__(self,
+                 num_features,
+                 linear_input_channels,
+                 bn_eps=1e-5,
+                 sn_eps=1e-6,
+                 sn_style='ajbrock',
+                 momentum=0.1,
+                 input_is_label=False,
+                 with_spectral_norm=True,
+                 auto_sync_bn=True):
+        super().__init__()
+        assert num_features > 0
+        if linear_input_channels > 0:
+            self.use_cbn = True
+        else:
+            self.use_cbn = False
+        # Prepare gain and bias layers
+        if self.use_cbn:
+            if not input_is_label:
+                self.gain = nn.Linear(
+                    linear_input_channels, num_features, bias=False)
+                self.bias = nn.Linear(
+                    linear_input_channels, num_features, bias=False)
+                # please pay attention if shared_embedding is False
+                if with_spectral_norm:
+                    if sn_style == 'torch':
+                        self.gain = spectral_norm(self.gain, eps=sn_eps)
+                        self.bias = spectral_norm(self.bias, eps=sn_eps)
+                    elif sn_style == 'ajbrock':
+                        self.gain = SNLinear(
+                            linear_input_channels,
+                            num_features,
+                            bias=False,
+                            eps=sn_eps)
+                        self.bias = SNLinear(
+                            linear_input_channels,
+                            num_features,
+                            bias=False,
+                            eps=sn_eps)
+                    else:
+                        raise NotImplementedError('sn style')
+            else:
+                self.gain = nn.Embedding(linear_input_channels, num_features)
+                self.bias = nn.Embedding(linear_input_channels, num_features)
+
+        self.bn = nn.BatchNorm2d(
+            num_features,
+            eps=bn_eps,
+            momentum=momentum,
+            affine=not self.use_cbn)
+
+        if auto_sync_bn and dist.is_initialized():
+            self.bn = SyncBatchNorm.convert_sync_batchnorm(self.bn)
+
+    def forward(self, x, y):
+        """Forward function.
+
+        Args:
+            x (torch.Tensor): Input feature map tensor.
+            y (torch.Tensor): Label tensor or class embedding concatenated with
+                noise tensor.
+
+        Returns:
+            torch.Tensor: Output feature map tensor.
+        """
+        if self.use_cbn:
+            # Calculate class-conditional gains and biases
+            gain = (1. + self.gain(y)).view(y.size(0), -1, 1, 1)
+            bias = self.bias(y).view(y.size(0), -1, 1, 1)
+            out = self.bn(x)
+            out = out * gain + bias
+        else:
+            out = self.bn(x)
+        return out
+
+
+@MODULES.register_module()
+class SelfAttentionBlock(nn.Module):
+    """Self-Attention block used in BigGAN.
+
+    Args:
+        in_channels (int): The channel number of the input feature map.
+        with_spectral_norm (bool, optional): Whether to use spectral
+            normalization. Defaults to True.
+        sn_eps (float, optional): Epsilon value for spectral normalization.
+            Defaults to 1e-6.
+        sn_style (str, optional): The style of spectral normalization.
+            If set to `ajbrock`, implementation by
+            ajbrock(https://github.com/ajbrock/BigGAN-PyTorch/blob/master/layers.py)
+            will be adopted.
+            If set to `torch`, implementation by `PyTorch` will be adopted.
+            Defaults to `ajbrock`.
+    """
+
+    def __init__(self,
+                 in_channels,
+                 with_spectral_norm=True,
+                 sn_eps=1e-6,
+                 sn_style='ajbrock'):
+        super(SelfAttentionBlock, self).__init__()
+
+        self.in_channels = in_channels
+        self.theta = SNConvModule(
+            self.in_channels,
+            self.in_channels // 8,
+            kernel_size=1,
+            padding=0,
+            bias=False,
+            act_cfg=None,
+            with_spectral_norm=with_spectral_norm,
+            spectral_norm_cfg=dict(eps=sn_eps, sn_style=sn_style))
+        self.phi = SNConvModule(
+            self.in_channels,
+            self.in_channels // 8,
+            kernel_size=1,
+            padding=0,
+            bias=False,
+            act_cfg=None,
+            with_spectral_norm=with_spectral_norm,
+            spectral_norm_cfg=dict(eps=sn_eps, sn_style=sn_style))
+        self.g = SNConvModule(
+            self.in_channels,
+            self.in_channels // 2,
+            kernel_size=1,
+            padding=0,
+            bias=False,
+            act_cfg=None,
+            with_spectral_norm=with_spectral_norm,
+            spectral_norm_cfg=dict(eps=sn_eps, sn_style=sn_style))
+        self.o = SNConvModule(
+            self.in_channels // 2,
+            self.in_channels,
+            kernel_size=1,
+            padding=0,
+            bias=False,
+            act_cfg=None,
+            with_spectral_norm=with_spectral_norm,
+            spectral_norm_cfg=dict(eps=sn_eps, sn_style=sn_style))
+        # Learnable gain parameter
+        self.gamma = Parameter(torch.tensor(0.), requires_grad=True)
+
+    def forward(self, x):
+        """Forward function.
+
+        Args:
+            x (torch.Tensor): Input feature map tensor.
+
+        Returns:
+            torch.Tensor: Output feature map tensor.
+        """
+        # Apply convs
+        theta = self.theta(x)
+        phi = F.max_pool2d(self.phi(x), [2, 2])
+        g = F.max_pool2d(self.g(x), [2, 2])
+        # Perform reshapes
+        theta = theta.view(-1, self.in_channels // 8, x.shape[2] * x.shape[3])
+        phi = phi.view(-1, self.in_channels // 8, x.shape[2] * x.shape[3] // 4)
+        g = g.view(-1, self.in_channels // 2, x.shape[2] * x.shape[3] // 4)
+        # Matmul and softmax to get attention maps
+        beta = F.softmax(torch.bmm(theta.transpose(1, 2), phi), -1)
+        # Attention map times g path
+        o = self.o(
+            torch.bmm(g, beta.transpose(1, 2)).view(-1, self.in_channels // 2,
+                                                    x.shape[2], x.shape[3]))
+        return self.gamma * o + x
+
+
+@MODULES.register_module()
+class BigGANDiscResBlock(nn.Module):
+    """Residual block used in BigGAN's discriminator.
+
+    Args:
+        in_channels (int): The channel number of the input tensor.
+        out_channels (int): The channel number of the output tensor.
+        act_cfg (dict, optional): Config for the activation layer. Defaults to
+            dict(type='ReLU', inplace=False).
+        sn_eps (float, optional): Epsilon value for spectral normalization.
+            Defaults to 1e-6.
+        sn_style (str, optional): The style of spectral normalization.
+            If set to `ajbrock`, implementation by
+            ajbrock(https://github.com/ajbrock/BigGAN-PyTorch/blob/master/layers.py)
+            will be adopted.
+            If set to `torch`, implementation by `PyTorch` will be adopted.
+            Defaults to `ajbrock`.
+        with_downsample (bool, optional): Whether to use downsampling in this
+            block. Defaults to True.
+        with_spectral_norm (bool, optional): Whether to use spectral
+            normalization. Defaults to True.
+        is_head_block (bool, optional): Whether this block is the first block
+            of BigGAN. Defaults to False.
+    """
+
+    def __init__(self,
+                 in_channels,
+                 out_channels,
+                 act_cfg=dict(type='ReLU', inplace=False),
+                 sn_eps=1e-6,
+                 sn_style='ajbrock',
+                 with_downsample=True,
+                 with_spectral_norm=True,
+                 is_head_block=False):
+        super().__init__()
+        self.activation = build_activation_layer(act_cfg)
+        self.with_downsample = with_downsample
+        self.is_head_block = is_head_block
+        if self.with_downsample:
+            self.downsample = nn.AvgPool2d(kernel_size=2, stride=2)
+        self.learnable_sc = in_channels != out_channels or self.with_downsample
+        if self.learnable_sc:
+            self.shortcut = SNConvModule(
+                in_channels=in_channels,
+                out_channels=out_channels,
+                kernel_size=1,
+                stride=1,
+                padding=0,
+                act_cfg=None,
+                with_spectral_norm=with_spectral_norm,
+                spectral_norm_cfg=dict(eps=sn_eps, sn_style=sn_style))
+
+        self.conv1 = SNConvModule(
+            in_channels=in_channels,
+            out_channels=out_channels,
+            kernel_size=3,
+            stride=1,
+            padding=1,
+            act_cfg=None,
+            with_spectral_norm=with_spectral_norm,
+            spectral_norm_cfg=dict(eps=sn_eps, sn_style=sn_style))
+
+        self.conv2 = SNConvModule(
+            in_channels=out_channels,
+            out_channels=out_channels,
+            kernel_size=3,
+            stride=1,
+            padding=1,
+            act_cfg=None,
+            with_spectral_norm=with_spectral_norm,
+            spectral_norm_cfg=dict(eps=sn_eps, sn_style=sn_style))
+
+    def forward_sc(self, x):
+        """Forward function of shortcut.
+
+        Args:
+            x (torch.Tensor): Input feature map tensor.
+
+        Returns:
+            torch.Tensor: Output tensor of shortcut.
+        """
+        if self.is_head_block:
+            if self.with_downsample:
+                x = self.downsample(x)
+            if self.learnable_sc:
+                x = self.shortcut(x)
+        else:
+            if self.learnable_sc:
+                x = self.shortcut(x)
+            if self.with_downsample:
+                x = self.downsample(x)
+        return x
+
+    def forward(self, x):
+        """Forward function.
+
+        Args:
+            x (torch.Tensor): Input feature map tensor.
+
+        Returns:
+            torch.Tensor: Output feature map tensor.
+        """
+        if self.is_head_block:
+            x0 = x
+        else:
+            x0 = self.activation(x)
+        x0 = self.conv1(x0)
+        x0 = self.activation(x0)
+        x0 = self.conv2(x0)
+        if self.with_downsample:
+            x0 = self.downsample(x0)
+        x1 = self.forward_sc(x)
+        return x0 + x1
+
+
+@MODULES.register_module()
+class BigGANDeepGenResBlock(nn.Module):
+    """Residual block used in BigGAN-Deep's generator.
+
+    Args:
+        in_channels (int): The channel number of the input feature map.
+        out_channels (int): The channel number of the output feature map.
+        dim_after_concat (int): The channel number of the noise concatenated
+            with the class vector.
+        act_cfg (dict, optional): Config for the activation layer. Defaults to
+            dict(type='ReLU').
+        upsample_cfg (dict, optional): Config for the upsampling operation.
+            Defaults to dict(type='nearest', scale_factor=2).
+        sn_eps (float, optional): Epsilon value for spectral normalization.
+            Defaults to 1e-6.
+        sn_style (str, optional): The style of spectral normalization.
+            If set to `ajbrock`, implementation by
+            ajbrock(https://github.com/ajbrock/BigGAN-PyTorch/blob/master/layers.py)
+            will be adopted.
+            If set to `torch`, implementation by `PyTorch` will be adopted.
+            Defaults to `ajbrock`.
+        bn_eps (float, optional): Epsilon value for batch normalization.
+            Defaults to 1e-5.
+        with_spectral_norm (bool, optional): Whether to use spectral
+            normalization in this block. Defaults to True.
+        input_is_label (bool, optional): Whether the input of BNs' linear layer
+            is raw label instead of class vector. Defaults to False.
+        auto_sync_bn (bool, optional): Whether to use synchronized batch
+            normalization. Defaults to True.
+        channel_ratio (int, optional): The ratio of the input channels' number
+            to the hidden channels' number. Defaults to 4.
+    """
+
+    def __init__(self,
+                 in_channels,
+                 out_channels,
+                 dim_after_concat,
+                 act_cfg=dict(type='ReLU'),
+                 upsample_cfg=dict(type='nearest', scale_factor=2),
+                 sn_eps=1e-6,
+                 sn_style='ajbrock',
+                 bn_eps=1e-5,
+                 with_spectral_norm=True,
+                 input_is_label=False,
+                 auto_sync_bn=True,
+                 channel_ratio=4):
+        super().__init__()
+        self.in_channels = in_channels
+        self.out_channels = out_channels
+        self.hidden_channels = self.in_channels // channel_ratio
+        self.activation = build_activation_layer(act_cfg)
+        self.upsample_cfg = deepcopy(upsample_cfg)
+        self.with_upsample = upsample_cfg is not None
+        if self.with_upsample:
+            self.upsample_layer = build_upsample_layer(self.upsample_cfg)
+        # Here in_channels of BigGANGenResBlock equal to num_features of
+        # BigGANConditionBN
+        self.bn1 = BigGANConditionBN(
+            in_channels,
+            dim_after_concat,
+            sn_eps=sn_eps,
+            sn_style=sn_style,
+            bn_eps=bn_eps,
+            input_is_label=input_is_label,
+            with_spectral_norm=with_spectral_norm,
+            auto_sync_bn=auto_sync_bn)
+        # Here out_channels of BigGANGenResBlock equal to num_features of
+        # BigGANConditionBN
+        self.bn2 = BigGANConditionBN(
+            self.hidden_channels,
+            dim_after_concat,
+            sn_eps=sn_eps,
+            sn_style=sn_style,
+            bn_eps=bn_eps,
+            input_is_label=input_is_label,
+            with_spectral_norm=with_spectral_norm,
+            auto_sync_bn=auto_sync_bn)
+
+        self.bn3 = BigGANConditionBN(
+            self.hidden_channels,
+            dim_after_concat,
+            sn_eps=sn_eps,
+            sn_style=sn_style,
+            bn_eps=bn_eps,
+            input_is_label=input_is_label,
+            with_spectral_norm=with_spectral_norm,
+            auto_sync_bn=auto_sync_bn)
+
+        self.bn4 = BigGANConditionBN(
+            self.hidden_channels,
+            dim_after_concat,
+            sn_eps=sn_eps,
+            sn_style=sn_style,
+            bn_eps=bn_eps,
+            input_is_label=input_is_label,
+            with_spectral_norm=with_spectral_norm,
+            auto_sync_bn=auto_sync_bn)
+
+        self.conv1 = SNConvModule(
+            in_channels=in_channels,
+            out_channels=self.hidden_channels,
+            kernel_size=1,
+            stride=1,
+            padding=0,
+            act_cfg=None,
+            with_spectral_norm=with_spectral_norm,
+            spectral_norm_cfg=dict(eps=sn_eps, sn_style=sn_style))
+
+        self.conv2 = SNConvModule(
+            in_channels=self.hidden_channels,
+            out_channels=self.hidden_channels,
+            kernel_size=3,
+            stride=1,
+            padding=1,
+            act_cfg=None,
+            with_spectral_norm=with_spectral_norm,
+            spectral_norm_cfg=dict(eps=sn_eps, sn_style=sn_style))
+
+        self.conv3 = SNConvModule(
+            in_channels=self.hidden_channels,
+            out_channels=self.hidden_channels,
+            kernel_size=3,
+            stride=1,
+            padding=1,
+            act_cfg=None,
+            with_spectral_norm=with_spectral_norm,
+            spectral_norm_cfg=dict(eps=sn_eps, sn_style=sn_style))
+
+        self.conv4 = SNConvModule(
+            in_channels=self.hidden_channels,
+            out_channels=out_channels,
+            kernel_size=1,
+            stride=1,
+            padding=0,
+            act_cfg=None,
+            with_spectral_norm=with_spectral_norm,
+            spectral_norm_cfg=dict(eps=sn_eps, sn_style=sn_style))
+
+    def forward(self, x, y):
+        """Forward function.
+
+        Args:
+            x (torch.Tensor): Input feature map tensor.
+            y (torch.Tensor): Label tensor or class embedding concatenated with
+                noise tensor.
+
+        Returns:
+            torch.Tensor: Output feature map tensor.
+        """
+        x0 = self.bn1(x, y)
+        x0 = self.activation(x0)
+        x0 = self.conv1(x0)
+
+        x0 = self.bn2(x0, y)
+        x0 = self.activation(x0)
+        # Drop channels in x  if necessary
+        if self.in_channels != self.out_channels:
+            x = x[:, :self.out_channels]
+        # unsample both h and x at this point
+        if self.with_upsample:
+            x0 = self.upsample_layer(x0)
+            x = self.upsample_layer(x)
+        x0 = self.conv2(x0)
+
+        x0 = self.bn3(x0, y)
+        x0 = self.activation(x0)
+        x0 = self.conv3(x0)
+
+        x0 = self.bn4(x0, y)
+        x0 = self.activation(x0)
+        x0 = self.conv4(x0)
+        return x0 + x
+
+
+@MODULES.register_module()
+class BigGANDeepDiscResBlock(nn.Module):
+    """Residual block used in BigGAN-Deep's discriminator.
+
+    Args:
+        in_channels (int): The channel number of the input tensor.
+        out_channels (int): The channel number of the output tensor.
+        channel_ratio (int, optional): The ratio of the input channels' number
+            to the hidden channels' number. Defaults to 4.
+        act_cfg (dict, optional): Config for the activation layer. Defaults to
+            dict(type='ReLU', inplace=False).
+        sn_eps (float, optional): Epsilon value for spectral normalization.
+            Defaults to 1e-6.
+        sn_style (str, optional): The style of spectral normalization.
+            If set to `ajbrock`, implementation by
+            ajbrock(https://github.com/ajbrock/BigGAN-PyTorch/blob/master/layers.py)
+            will be adopted.
+            If set to `torch`, implementation by `PyTorch` will be adopted.
+            Defaults to `ajbrock`.
+        with_downsample (bool, optional): Whether to use downsampling in this
+            block. Defaults to True.
+        with_spectral_norm (bool, optional): Whether to use spectral
+            normalization. Defaults to True.
+    """
+
+    def __init__(self,
+                 in_channels,
+                 out_channels,
+                 channel_ratio=4,
+                 act_cfg=dict(type='ReLU', inplace=False),
+                 sn_eps=1e-6,
+                 sn_style='ajbrock',
+                 with_downsample=True,
+                 with_spectral_norm=True):
+        super().__init__()
+        self.in_channels = in_channels
+        self.out_channels = out_channels
+        self.hidden_channels = self.out_channels // channel_ratio
+        self.activation = build_activation_layer(act_cfg)
+        self.with_downsample = with_downsample
+
+        if self.with_downsample:
+            self.downsample = nn.AvgPool2d(kernel_size=2, stride=2)
+
+        self.learnable_sc = (in_channels != out_channels)
+        if self.learnable_sc:
+            self.shortcut = SNConvModule(
+                in_channels=in_channels,
+                out_channels=out_channels - in_channels,
+                kernel_size=1,
+                stride=1,
+                padding=0,
+                act_cfg=None,
+                with_spectral_norm=with_spectral_norm,
+                spectral_norm_cfg=dict(eps=sn_eps, sn_style=sn_style))
+
+        self.conv1 = SNConvModule(
+            in_channels=in_channels,
+            out_channels=self.hidden_channels,
+            kernel_size=1,
+            stride=1,
+            padding=0,
+            act_cfg=act_cfg,
+            with_spectral_norm=with_spectral_norm,
+            spectral_norm_cfg=dict(eps=sn_eps, sn_style=sn_style),
+            order=('act', 'conv', 'norm'))
+
+        self.conv2 = SNConvModule(
+            in_channels=self.hidden_channels,
+            out_channels=self.hidden_channels,
+            kernel_size=3,
+            stride=1,
+            padding=1,
+            act_cfg=act_cfg,
+            with_spectral_norm=with_spectral_norm,
+            spectral_norm_cfg=dict(eps=sn_eps, sn_style=sn_style),
+            order=('act', 'conv', 'norm'))
+
+        self.conv3 = SNConvModule(
+            in_channels=self.hidden_channels,
+            out_channels=self.hidden_channels,
+            kernel_size=3,
+            stride=1,
+            padding=1,
+            act_cfg=act_cfg,
+            with_spectral_norm=with_spectral_norm,
+            spectral_norm_cfg=dict(eps=sn_eps, sn_style=sn_style),
+            order=('act', 'conv', 'norm'))
+
+        self.conv4 = SNConvModule(
+            in_channels=self.hidden_channels,
+            out_channels=out_channels,
+            kernel_size=1,
+            stride=1,
+            padding=0,
+            act_cfg=None,
+            with_spectral_norm=with_spectral_norm,
+            spectral_norm_cfg=dict(eps=sn_eps, sn_style=sn_style))
+
+    def forward_sc(self, x):
+        """Forward function of shortcut.
+
+        Args:
+            x (torch.Tensor): Input feature map tensor.
+
+        Returns:
+            torch.Tensor: Output tensor of shortcut.
+        """
+        if self.with_downsample:
+            x = self.downsample(x)
+        if self.learnable_sc:
+            x0 = self.shortcut(x)
+            x = torch.cat([x, x0], dim=1)
+        return x
+
+    def forward(self, x):
+        """Forward function.
+
+        Args:
+            x (torch.Tensor): Input feature map tensor.
+
+        Returns:
+            torch.Tensor: Output feature map tensor.
+        """
+
+        x0 = self.conv1(x)
+        x0 = self.conv2(x0)
+        x0 = self.conv3(x0)
+        x0 = self.activation(x0)
+        # downsample
+        if self.with_downsample:
+            x0 = self.downsample(x0)
+        x0 = self.conv4(x0)
+        x1 = self.forward_sc(x)
+        return x0 + x1
--- a/mmgen/models/architectures/common.py
+++ b/mmgen/models/architectures/common.py
+# Copyright (c) OpenMMLab. All rights reserved.
+import torch
+
+
+def get_module_device(module):
+    """Get the device of a module.
+
+    Args:
+        module (nn.Module): A module contains the parameters.
+
+    Returns:
+        torch.device: The device of the module.
+    """
+    try:
+        next(module.parameters())
+    except StopIteration:
+        raise ValueError('The input module should contain parameters.')
+
+    if next(module.parameters()).is_cuda:
+        return next(module.parameters()).get_device()
+
+    return torch.device('cpu')