stylegan2_mmcv

1401de15 · dongchy920 · 1401de15 · 1401de15 · 1401de15 · 1401de15
Commit 1401de15 authored Jun 28, 2024 by dongchy920
20 changed files
--- a/build/lib/mmgen/ops/__init__.py
+++ b/build/lib/mmgen/ops/__init__.py
+# Copyright (c) OpenMMLab. All rights reserved.
+from .conv2d_gradfix import conv2d, conv_transpose2d
+from .stylegan3.ops import bias_act, filtered_lrelu
+
+__all__ = ['conv2d', 'conv_transpose2d', 'filtered_lrelu', 'bias_act']
--- a/build/lib/mmgen/ops/conv2d_gradfix.py
+++ b/build/lib/mmgen/ops/conv2d_gradfix.py
+# Copyright (c) 2021, NVIDIA CORPORATION & AFFILIATES.  All rights reserved.
+#
+# NVIDIA CORPORATION and its licensors retain all intellectual property
+# and proprietary rights in and to this software, related documentation
+# and any modifications thereto.  Any use, reproduction, disclosure or
+# distribution of this software and related documentation without an express
+# license agreement from NVIDIA CORPORATION is strictly prohibited.
+"""Custom replacement for `torch.nn.functional.conv2d` that supports
+arbitrarily high order gradients with zero performance penalty."""
+
+import contextlib
+
+import torch
+
+enabled = True
+weight_gradients_disabled = False
+
+
+@contextlib.contextmanager
+def no_weight_gradients(disable=True):
+    global weight_gradients_disabled
+    old = weight_gradients_disabled
+    if disable:
+        weight_gradients_disabled = True
+    yield
+    weight_gradients_disabled = old
+
+
+def conv2d(input,
+           weight,
+           bias=None,
+           stride=1,
+           padding=0,
+           dilation=1,
+           groups=1):
+    if _should_use_custom_op(input):
+        return _conv2d_gradfix(
+            transpose=False,
+            weight_shape=weight.shape,
+            stride=stride,
+            padding=padding,
+            output_padding=0,
+            dilation=dilation,
+            groups=groups).apply(input, weight, bias)
+    return torch.nn.functional.conv2d(
+        input=input,
+        weight=weight,
+        bias=bias,
+        stride=stride,
+        padding=padding,
+        dilation=dilation,
+        groups=groups)
+
+
+def conv_transpose2d(input,
+                     weight,
+                     bias=None,
+                     stride=1,
+                     padding=0,
+                     output_padding=0,
+                     groups=1,
+                     dilation=1):
+    if _should_use_custom_op(input):
+        return _conv2d_gradfix(
+            transpose=True,
+            weight_shape=weight.shape,
+            stride=stride,
+            padding=padding,
+            output_padding=output_padding,
+            groups=groups,
+            dilation=dilation).apply(input, weight, bias)
+    return torch.nn.functional.conv_transpose2d(
+        input=input,
+        weight=weight,
+        bias=bias,
+        stride=stride,
+        padding=padding,
+        output_padding=output_padding,
+        groups=groups,
+        dilation=dilation)
+
+
+def _should_use_custom_op(input):
+    assert isinstance(input, torch.Tensor)
+    if (not enabled) or (not torch.backends.cudnn.enabled):
+        return False
+    if input.device.type != 'cuda':
+        return False
+    return True
+
+
+def _tuple_of_ints(xs, ndim):
+    xs = tuple(xs) if isinstance(xs, (tuple, list)) else (xs, ) * ndim
+    assert len(xs) == ndim
+    assert all(isinstance(x, int) for x in xs)
+    return xs
+
+
+_conv2d_gradfix_cache = dict()
+_null_tensor = torch.empty([0])
+
+
+def _conv2d_gradfix(transpose, weight_shape, stride, padding, output_padding,
+                    dilation, groups):
+    # Parse arguments.
+    ndim = 2
+    weight_shape = tuple(weight_shape)
+    stride = _tuple_of_ints(stride, ndim)
+    padding = _tuple_of_ints(padding, ndim)
+    output_padding = _tuple_of_ints(output_padding, ndim)
+    dilation = _tuple_of_ints(dilation, ndim)
+
+    # Lookup from cache.
+    key = (transpose, weight_shape, stride, padding, output_padding, dilation,
+           groups)
+    if key in _conv2d_gradfix_cache:
+        return _conv2d_gradfix_cache[key]
+
+    # Validate arguments.
+
+    assert groups >= 1
+    assert len(weight_shape) == ndim + 2
+    assert all(stride[i] >= 1 for i in range(ndim))
+    assert all(padding[i] >= 0 for i in range(ndim))
+    assert all(dilation[i] >= 0 for i in range(ndim))
+    if not transpose:
+        assert all(output_padding[i] == 0 for i in range(ndim))
+    else:  # transpose
+        assert all(0 <= output_padding[i] < max(stride[i], dilation[i])
+                   for i in range(ndim))
+
+    # Helpers.
+    common_kwargs = dict(
+        stride=stride, padding=padding, dilation=dilation, groups=groups)
+
+    def calc_output_padding(input_shape, output_shape):
+        if transpose:
+            return [0, 0]
+        return [
+            input_shape[i + 2] - (output_shape[i + 2] - 1) * stride[i] -
+            (1 - 2 * padding[i]) - dilation[i] * (weight_shape[i + 2] - 1)
+            for i in range(ndim)
+        ]
+
+    # Forward & backward.
+    class Conv2d(torch.autograd.Function):
+
+        @staticmethod
+        def forward(ctx, input, weight, bias):
+            assert weight.shape == weight_shape
+            ctx.save_for_backward(
+                input if weight.requires_grad else _null_tensor,
+                weight if input.requires_grad else _null_tensor,
+            )
+            ctx.input_shape = input.shape
+
+            # Simple 1x1 convolution => cuBLAS (only on Volta, not on Ampere).
+            if weight_shape[2:] == stride == dilation == (
+                    1, 1) and padding == (
+                        0, 0) and torch.cuda.get_device_capability(
+                            input.device) < (8, 0):
+                a = weight.reshape(groups, weight_shape[0] // groups,
+                                   weight_shape[1])
+                b = input.reshape(input.shape[0], groups,
+                                  input.shape[1] // groups, -1)
+                c = (a.transpose(1, 2) if transpose else a) @ b.permute(
+                    1, 2, 0, 3).flatten(2)
+                c = c.reshape(-1, input.shape[0],
+                              *input.shape[2:]).transpose(0, 1)
+                c = c if bias is None else c + bias.unsqueeze(0).unsqueeze(
+                    2).unsqueeze(3)
+                return c.contiguous(
+                    memory_format=(torch.channels_last if input.stride(1) ==
+                                   1 else torch.contiguous_format))
+
+            # General case => cuDNN.
+            if transpose:
+                return torch.nn.functional.conv_transpose2d(
+                    input=input,
+                    weight=weight,
+                    bias=bias,
+                    output_padding=output_padding,
+                    **common_kwargs)
+            return torch.nn.functional.conv2d(
+                input=input, weight=weight, bias=bias, **common_kwargs)
+
+        @staticmethod
+        def backward(ctx, grad_output):
+            input, weight = ctx.saved_tensors
+            input_shape = ctx.input_shape
+            grad_input = None
+            grad_weight = None
+            grad_bias = None
+
+            if ctx.needs_input_grad[0]:
+                p = calc_output_padding(
+                    input_shape=input_shape, output_shape=grad_output.shape)
+                op = _conv2d_gradfix(
+                    transpose=(not transpose),
+                    weight_shape=weight_shape,
+                    output_padding=p,
+                    **common_kwargs)
+                grad_input = op.apply(grad_output, weight, None)
+                assert grad_input.shape == input_shape
+
+            if ctx.needs_input_grad[1] and not weight_gradients_disabled:
+                grad_weight = Conv2dGradWeight.apply(grad_output, input)
+                assert grad_weight.shape == weight_shape
+
+            if ctx.needs_input_grad[2]:
+                grad_bias = grad_output.sum([0, 2, 3])
+
+            return grad_input, grad_weight, grad_bias
+
+    # Gradient with respect to the weights.
+    class Conv2dGradWeight(torch.autograd.Function):
+
+        @staticmethod
+        def forward(ctx, grad_output, input):
+            ctx.save_for_backward(
+                grad_output if input.requires_grad else _null_tensor,
+                input if grad_output.requires_grad else _null_tensor,
+            )
+            ctx.grad_output_shape = grad_output.shape
+            ctx.input_shape = input.shape
+
+            # Simple 1x1 convolution => cuBLAS (on both Volta and Ampere).
+            if weight_shape[2:] == stride == dilation == (
+                    1, 1) and padding == (0, 0):
+                a = grad_output.reshape(grad_output.shape[0], groups,
+                                        grad_output.shape[1] // groups,
+                                        -1).permute(1, 2, 0, 3).flatten(2)
+                b = input.reshape(input.shape[0], groups,
+                                  input.shape[1] // groups,
+                                  -1).permute(1, 2, 0, 3).flatten(2)
+                c = (b @ a.transpose(1, 2) if transpose else
+                     a @ b.transpose(1, 2)).reshape(weight_shape)
+                return c.contiguous(
+                    memory_format=(torch.channels_last if input.stride(1) ==
+                                   1 else torch.contiguous_format))
+
+            # General case => cuDNN.
+            name = ('aten::cudnn_convolution_transpose_backward_weight' if
+                    transpose else 'aten::cudnn_convolution_backward_weight')
+            flags = [
+                torch.backends.cudnn.benchmark,
+                torch.backends.cudnn.deterministic,
+                torch.backends.cudnn.allow_tf32
+            ]
+            return torch._C._jit_get_operation(name)(weight_shape, grad_output,
+                                                     input, padding, stride,
+                                                     dilation, groups, *flags)
+
+        @staticmethod
+        def backward(ctx, grad2_grad_weight):
+            grad_output, input = ctx.saved_tensors
+            grad_output_shape = ctx.grad_output_shape
+            input_shape = ctx.input_shape
+            grad2_grad_output = None
+            grad2_input = None
+
+            if ctx.needs_input_grad[0]:
+                grad2_grad_output = Conv2d.apply(input, grad2_grad_weight,
+                                                 None)
+                assert grad2_grad_output.shape == grad_output_shape
+
+            if ctx.needs_input_grad[1]:
+                p = calc_output_padding(
+                    input_shape=input_shape, output_shape=grad_output_shape)
+                op = _conv2d_gradfix(
+                    transpose=(not transpose),
+                    weight_shape=weight_shape,
+                    output_padding=p,
+                    **common_kwargs)
+                grad2_input = op.apply(grad_output, grad2_grad_weight, None)
+                assert grad2_input.shape == input_shape
+
+            return grad2_grad_output, grad2_input
+
+    _conv2d_gradfix_cache[key] = Conv2d
+    return Conv2d
--- a/build/lib/mmgen/ops/stylegan3/__init__.py
+++ b/build/lib/mmgen/ops/stylegan3/__init__.py
+# Copyright (c) 2021, NVIDIA CORPORATION & AFFILIATES.  All rights reserved.
+#
+# NVIDIA CORPORATION and its licensors retain all intellectual property
+# and proprietary rights in and to this software, related documentation
+# and any modifications thereto.  Any use, reproduction, disclosure or
+# distribution of this software and related documentation without an express
+# license agreement from NVIDIA CORPORATION is strictly prohibited.
+
+# empty
+from .ops import filtered_lrelu
+
+__all__ = ['filtered_lrelu']
--- a/build/lib/mmgen/ops/stylegan3/custom_ops.py
+++ b/build/lib/mmgen/ops/stylegan3/custom_ops.py
+# Copyright (c) 2021, NVIDIA CORPORATION & AFFILIATES.  All rights reserved.
+#
+# NVIDIA CORPORATION and its licensors retain all intellectual property
+# and proprietary rights in and to this software, related documentation
+# and any modifications thereto.  Any use, reproduction, disclosure or
+# distribution of this software and related documentation without an express
+# license agreement from NVIDIA CORPORATION is strictly prohibited.
+
+import glob
+import hashlib
+import importlib
+import os
+import re
+import shutil
+import uuid
+
+import torch
+import torch.utils.cpp_extension
+
+# Global options.
+
+verbosity = 'brief'  # Verbosity level: 'none', 'brief', 'full'
+
+# Internal helper funcs.
+
+
+def _find_compiler_bindir():
+    patterns = [
+        'C:/Program Files (x86)/Microsoft Visual Studio/*/Professional/VC/Tools/MSVC/*/bin/Hostx64/x64',  # noqa
+        'C:/Program Files (x86)/Microsoft Visual Studio/*/BuildTools/VC/Tools/MSVC/*/bin/Hostx64/x64',  # noqa
+        'C:/Program Files (x86)/Microsoft Visual Studio/*/Community/VC/Tools/MSVC/*/bin/Hostx64/x64',  # noqa
+        'C:/Program Files (x86)/Microsoft Visual Studio */vc/bin',
+    ]
+    for pattern in patterns:
+        matches = sorted(glob.glob(pattern))
+        if len(matches):
+            return matches[-1]
+    return None
+
+
+def _get_mangled_gpu_name():
+    name = torch.cuda.get_device_name().lower()
+    out = []
+    for c in name:
+        if re.match('[a-z0-9_-]+', c):
+            out.append(c)
+        else:
+            out.append('-')
+    return ''.join(out)
+
+
+# Main entry point for compiling and loading C++/CUDA plugins.
+
+_cached_plugins = dict()
+
+
+def get_plugin(module_name,
+               sources,
+               headers=None,
+               source_dir=None,
+               **build_kwargs):
+    assert verbosity in ['none', 'brief', 'full']
+    if headers is None:
+        headers = []
+    if source_dir is not None:
+        sources = [os.path.join(source_dir, fname) for fname in sources]
+        headers = [os.path.join(source_dir, fname) for fname in headers]
+
+    # Already cached?
+    if module_name in _cached_plugins:
+        return _cached_plugins[module_name]
+
+    # Print status.
+    if verbosity == 'full':
+        print(f'Setting up PyTorch plugin "{module_name}"...')
+    elif verbosity == 'brief':
+        print(
+            f'Setting up PyTorch plugin "{module_name}"... ',
+            end='',
+            flush=True)
+    verbose_build = (verbosity == 'full')
+
+    # Compile and load.
+    try:  # pylint: disable=too-many-nested-blocks
+        # Make sure we can find the necessary compiler binaries.
+        if os.name == 'nt' and os.system('where cl.exe >nul 2>nul') != 0:
+            compiler_bindir = _find_compiler_bindir()
+            if compiler_bindir is None:
+                raise RuntimeError(
+                    'Could not find MSVC/GCC/CLANG installation on this '
+                    f'computer. Check _find_compiler_bindir() in "{__file__}".'
+                )
+            os.environ['PATH'] += ';' + compiler_bindir
+
+        # Some containers set TORCH_CUDA_ARCH_LIST to a list that can either
+        # break the build or unnecessarily restrict what's available to nvcc.
+        # Unset it to let nvcc decide based on what's available on the
+        # machine.
+        os.environ['TORCH_CUDA_ARCH_LIST'] = ''
+
+        # Incremental build md5sum trickery.  Copies all the input source files
+        # into a cached build directory under a combined md5 digest of the
+        # input source files.  Copying is done only if the combined digest has
+        # changed.
+        # This keeps input file timestamps and filenames the same as in
+        # previous extension builds, allowing for fast incremental rebuilds.
+        #
+        # This optimization is done only in case all the source files reside in
+        # a single directory (just for simplicity) and if the
+        # TORCH_EXTENSIONS_DIR environment variable is set (we take this as a
+        # signal that the user
+        # actually cares about this.)
+        #
+        # EDIT: We now do it regardless of TORCH_EXTENSIOS_DIR, in order to
+        # work around the *.cu dependency bug in ninja config.
+
+        all_source_files = sorted(sources + headers)
+        all_source_dirs = set(
+            os.path.dirname(fname) for fname in all_source_files)
+        if len(all_source_dirs
+               ) == 1:  # and ('TORCH_EXTENSIONS_DIR' in os.environ):
+
+            # Compute combined hash digest for all source files.
+            hash_md5 = hashlib.md5()
+            for src in all_source_files:
+                with open(src, 'rb') as f:
+                    hash_md5.update(f.read())
+
+            # Select cached build directory name.
+            source_digest = hash_md5.hexdigest()
+            build_top_dir = torch.utils.cpp_extension._get_build_directory(
+                module_name, verbose=verbose_build)
+            cached_build_dir = os.path.join(
+                build_top_dir, f'{source_digest}-{_get_mangled_gpu_name()}')
+
+            if not os.path.isdir(cached_build_dir):
+                tmpdir = f'{build_top_dir}/srctmp-{uuid.uuid4().hex}'
+                os.makedirs(tmpdir)
+                for src in all_source_files:
+                    shutil.copyfile(
+                        src, os.path.join(tmpdir, os.path.basename(src)))
+                try:
+                    os.replace(tmpdir, cached_build_dir)  # atomic
+                except OSError:
+                    # source directory already exists
+                    # delete tmpdir and its contents.
+                    shutil.rmtree(tmpdir)
+                    if not os.path.isdir(cached_build_dir):
+                        raise
+
+            # Compile.
+            cached_sources = [
+                os.path.join(cached_build_dir, os.path.basename(fname))
+                for fname in sources
+            ]
+            torch.utils.cpp_extension.load(
+                name=module_name,
+                build_directory=cached_build_dir,
+                verbose=verbose_build,
+                sources=cached_sources,
+                **build_kwargs)
+        else:
+            torch.utils.cpp_extension.load(
+                name=module_name,
+                verbose=verbose_build,
+                sources=sources,
+                **build_kwargs)
+
+        # Load.
+        module = importlib.import_module(module_name)
+
+    except Exception as err:
+        if verbosity == 'brief':
+            print('Failed!')
+        raise err
+
+    # Print status and add to cache dict.
+    if verbosity == 'full':
+        print(f'Done setting up PyTorch plugin "{module_name}".')
+    elif verbosity == 'brief':
+        print('Done.')
+    _cached_plugins[module_name] = module
+    return module
--- a/build/lib/mmgen/ops/stylegan3/ops/__init__.py
+++ b/build/lib/mmgen/ops/stylegan3/ops/__init__.py
+# Copyright (c) 2021, NVIDIA CORPORATION & AFFILIATES.  All rights reserved.
+#
+# NVIDIA CORPORATION and its licensors retain all intellectual property
+# and proprietary rights in and to this software, related documentation
+# and any modifications thereto.  Any use, reproduction, disclosure or
+# distribution of this software and related documentation without an express
+# license agreement from NVIDIA CORPORATION is strictly prohibited.
+
+# empty
--- a/build/lib/mmgen/ops/stylegan3/ops/bias_act.py
+++ b/build/lib/mmgen/ops/stylegan3/ops/bias_act.py
+# Copyright (c) 2021, NVIDIA CORPORATION & AFFILIATES.  All rights reserved.
+#
+# NVIDIA CORPORATION and its licensors retain all intellectual property
+# and proprietary rights in and to this software, related documentation
+# and any modifications thereto.  Any use, reproduction, disclosure or
+# distribution of this software and related documentation without an express
+# license agreement from NVIDIA CORPORATION is strictly prohibited.
+"""Custom PyTorch ops for efficient bias and activation."""
+
+import os
+from typing import Any
+
+import numpy as np
+import torch
+
+from .. import custom_ops
+
+
+class EasyDict(dict):
+    """Convenience class that behaves like a dict but allows access with the
+    attribute syntax."""
+
+    def __getattr__(self, name: str) -> Any:
+        try:
+            return self[name]
+        except KeyError:
+            raise AttributeError(name)
+
+    def __setattr__(self, name: str, value: Any) -> None:
+        self[name] = value
+
+    def __delattr__(self, name: str) -> None:
+        del self[name]
+
+
+activation_funcs = {
+    'linear':
+    EasyDict(
+        func=lambda x, **_: x,
+        def_alpha=0,
+        def_gain=1,
+        cuda_idx=1,
+        ref='',
+        has_2nd_grad=False),
+    'relu':
+    EasyDict(
+        func=lambda x, **_: torch.nn.functional.relu(x),
+        def_alpha=0,
+        def_gain=np.sqrt(2),
+        cuda_idx=2,
+        ref='y',
+        has_2nd_grad=False),
+    'lrelu':
+    EasyDict(
+        func=lambda x, alpha, **_: torch.nn.functional.leaky_relu(x, alpha),
+        def_alpha=0.2,
+        def_gain=np.sqrt(2),
+        cuda_idx=3,
+        ref='y',
+        has_2nd_grad=False),
+    'tanh':
+    EasyDict(
+        func=lambda x, **_: torch.tanh(x),
+        def_alpha=0,
+        def_gain=1,
+        cuda_idx=4,
+        ref='y',
+        has_2nd_grad=True),
+    'sigmoid':
+    EasyDict(
+        func=lambda x, **_: torch.sigmoid(x),
+        def_alpha=0,
+        def_gain=1,
+        cuda_idx=5,
+        ref='y',
+        has_2nd_grad=True),
+    'elu':
+    EasyDict(
+        func=lambda x, **_: torch.nn.functional.elu(x),
+        def_alpha=0,
+        def_gain=1,
+        cuda_idx=6,
+        ref='y',
+        has_2nd_grad=True),
+    'selu':
+    EasyDict(
+        func=lambda x, **_: torch.nn.functional.selu(x),
+        def_alpha=0,
+        def_gain=1,
+        cuda_idx=7,
+        ref='y',
+        has_2nd_grad=True),
+    'softplus':
+    EasyDict(
+        func=lambda x, **_: torch.nn.functional.softplus(x),
+        def_alpha=0,
+        def_gain=1,
+        cuda_idx=8,
+        ref='y',
+        has_2nd_grad=True),
+    'swish':
+    EasyDict(
+        func=lambda x, **_: torch.sigmoid(x) * x,
+        def_alpha=0,
+        def_gain=np.sqrt(2),
+        cuda_idx=9,
+        ref='x',
+        has_2nd_grad=True),
+}
+
+_plugin = None
+_null_tensor = torch.empty([0])
+
+
+def _init():
+    global _plugin
+    if _plugin is None:
+        _plugin = custom_ops.get_plugin(
+            module_name='bias_act_plugin',
+            sources=['bias_act.cpp', 'bias_act.cu'],
+            headers=['bias_act.h'],
+            source_dir=os.path.dirname(__file__),
+            extra_cuda_cflags=['--use_fast_math'],
+        )
+    return True
+
+
+def bias_act(x,
+             b=None,
+             dim=1,
+             act='linear',
+             alpha=None,
+             gain=None,
+             clamp=None,
+             impl='cuda'):
+    r"""Fused bias and activation function.
+    Adds bias `b` to activation tensor `x`, evaluates activation function
+    `act`, and scales the result by `gain`. Each of the steps is optional.
+    In most cases, the fused op is considerably more efficient than performing
+    the same calculation using standard PyTorch ops. It supports first and
+    second order gradients, but not third order gradients.
+
+    Args:
+        x:      Input activation tensor. Can be of any shape.
+        b:      Bias vector, or `None` to disable. Must be a 1D tensor of the
+                same type as `x`. The shape must be known, and it must match
+                the dimension of `x` corresponding to `dim`.
+        dim:    The dimension in `x` corresponding to the elements of `b`.
+                The value of `dim` is ignored if `b` is not specified.
+        act:    Name of the activation function to evaluate, or `"linear"` to
+                disable. Can be e.g. `"relu"`, `"lrelu"`, `"tanh"`,
+                `"sigmoid"`, `"swish"`, etc. See `activation_funcs` for a full
+                list. `None` is not allowed.
+        alpha:  Shape parameter for the activation function, or `None` to use
+                the default.
+        gain:   Scaling factor for the output tensor, or `None` to use default.
+                See `activation_funcs` for the default scaling of each
+                activation function. If unsure, consider specifying 1.
+        clamp:  Clamp the output values to `[-clamp, +clamp]`, or `None` to
+                disable the clamping (default).
+        impl:   Name of the implementation to use. Can be `"ref"` or `"cuda"`
+                (default).
+
+    Returns:
+        Tensor of the same shape and datatype as `x`.
+    """
+    assert isinstance(x, torch.Tensor)
+    assert impl in ['ref', 'cuda']
+    if impl == 'cuda' and x.device.type == 'cuda' and _init():
+        return _bias_act_cuda(
+            dim=dim, act=act, alpha=alpha, gain=gain, clamp=clamp).apply(x, b)
+    return _bias_act_ref(
+        x=x, b=b, dim=dim, act=act, alpha=alpha, gain=gain, clamp=clamp)
+
+
+def _bias_act_ref(x,
+                  b=None,
+                  dim=1,
+                  act='linear',
+                  alpha=None,
+                  gain=None,
+                  clamp=None):
+    """Slow reference implementation of `bias_act()` using standard TensorFlow
+    ops."""
+    assert isinstance(x, torch.Tensor)
+    assert clamp is None or clamp >= 0
+    spec = activation_funcs[act]
+    alpha = float(alpha if alpha is not None else spec.def_alpha)
+    gain = float(gain if gain is not None else spec.def_gain)
+    clamp = float(clamp if clamp is not None else -1)
+
+    # Add bias.
+    if b is not None:
+        assert isinstance(b, torch.Tensor) and b.ndim == 1
+        assert 0 <= dim < x.ndim
+        assert b.shape[0] == x.shape[dim]
+        x = x + b.reshape([-1 if i == dim else 1 for i in range(x.ndim)])
+
+    # Evaluate activation function.
+    alpha = float(alpha)
+    x = spec.func(x, alpha=alpha)
+
+    # Scale by gain.
+    gain = float(gain)
+    if gain != 1:
+        x = x * gain
+
+    # Clamp.
+    if clamp >= 0:
+        # pylint: disable=invalid-unary-operand-type
+        x = x.clamp(-clamp, clamp)
+    return x
+
+
+_bias_act_cuda_cache = dict()
+
+
+def _bias_act_cuda(dim=1, act='linear', alpha=None, gain=None, clamp=None):
+    """Fast CUDA implementation of `bias_act()` using custom ops."""
+    # Parse arguments.
+    assert clamp is None or clamp >= 0
+    spec = activation_funcs[act]
+    alpha = float(alpha if alpha is not None else spec.def_alpha)
+    gain = float(gain if gain is not None else spec.def_gain)
+    clamp = float(clamp if clamp is not None else -1)
+
+    # Lookup from cache.
+    key = (dim, act, alpha, gain, clamp)
+    if key in _bias_act_cuda_cache:
+        return _bias_act_cuda_cache[key]
+
+    # Forward op.
+    class BiasActCuda(torch.autograd.Function):
+
+        @staticmethod
+        def forward(ctx, x, b):  # pylint: disable=arguments-differ
+            ctx.memory_format = torch.channels_last if x.ndim > 2 and x.stride(
+                1) == 1 else torch.contiguous_format
+            x = x.contiguous(memory_format=ctx.memory_format)
+            b = b.contiguous() if b is not None else _null_tensor
+            y = x
+            if act != 'linear' or gain != 1 or clamp >= 0 or (
+                    b is not _null_tensor):
+                y = _plugin.bias_act(x, b, _null_tensor, _null_tensor,
+                                     _null_tensor, 0, dim, spec.cuda_idx,
+                                     alpha, gain, clamp)
+            ctx.save_for_backward(
+                x if 'x' in spec.ref or spec.has_2nd_grad else _null_tensor,
+                b if 'x' in spec.ref or spec.has_2nd_grad else _null_tensor,
+                y if 'y' in spec.ref else _null_tensor)
+            return y
+
+        @staticmethod
+        def backward(ctx, dy):  # pylint: disable=arguments-differ
+            dy = dy.contiguous(memory_format=ctx.memory_format)
+            x, b, y = ctx.saved_tensors
+            dx = None
+            db = None
+
+            if ctx.needs_input_grad[0] or ctx.needs_input_grad[1]:
+                dx = dy
+                if act != 'linear' or gain != 1 or clamp >= 0:
+                    dx = BiasActCudaGrad.apply(dy, x, b, y)
+
+            if ctx.needs_input_grad[1]:
+                db = dx.sum([i for i in range(dx.ndim) if i != dim])
+
+            return dx, db
+
+    # Backward op.
+    class BiasActCudaGrad(torch.autograd.Function):
+
+        @staticmethod
+        def forward(ctx, dy, x, b, y):  # pylint: disable=arguments-differ
+            ctx.memory_format = torch.channels_last if dy.ndim > 2 and (
+                dy.stride(1) == 1) else torch.contiguous_format
+            dx = _plugin.bias_act(dy, b, x, y, _null_tensor, 1, dim,
+                                  spec.cuda_idx, alpha, gain, clamp)
+            ctx.save_for_backward(dy if spec.has_2nd_grad else _null_tensor, x,
+                                  b, y)
+            return dx
+
+        @staticmethod
+        def backward(ctx, d_dx):  # pylint: disable=arguments-differ
+            d_dx = d_dx.contiguous(memory_format=ctx.memory_format)
+            dy, x, b, y = ctx.saved_tensors
+            d_dy = None
+            d_x = None
+            d_b = None
+            d_y = None
+
+            if ctx.needs_input_grad[0]:
+                d_dy = BiasActCudaGrad.apply(d_dx, x, b, y)
+
+            if spec.has_2nd_grad and (ctx.needs_input_grad[1]
+                                      or ctx.needs_input_grad[2]):
+                d_x = _plugin.bias_act(d_dx, b, x, y, dy, 2, dim,
+                                       spec.cuda_idx, alpha, gain, clamp)
+
+            if spec.has_2nd_grad and ctx.needs_input_grad[2]:
+                d_b = d_x.sum([i for i in range(d_x.ndim) if i != dim])
+
+            return d_dy, d_x, d_b, d_y
+
+    # Add to cache.
+    _bias_act_cuda_cache[key] = BiasActCuda
+    return BiasActCuda
--- a/build/lib/mmgen/ops/stylegan3/ops/filtered_lrelu.py
+++ b/build/lib/mmgen/ops/stylegan3/ops/filtered_lrelu.py
+# Copyright (c) 2021, NVIDIA CORPORATION & AFFILIATES.  All rights reserved.
+#
+# NVIDIA CORPORATION and its licensors retain all intellectual property
+# and proprietary rights in and to this software, related documentation
+# and any modifications thereto.  Any use, reproduction, disclosure or
+# distribution of this software and related documentation without an express
+# license agreement from NVIDIA CORPORATION is strictly prohibited.
+
+import os
+import warnings
+
+import numpy as np
+import torch
+
+from .. import custom_ops
+from . import bias_act, upfirdn2d
+
+_plugin = None
+
+
+def _init():
+    global _plugin
+    if _plugin is None:
+        _plugin = custom_ops.get_plugin(
+            module_name='filtered_lrelu_plugin',
+            sources=[
+                'filtered_lrelu.cpp', 'filtered_lrelu_wr.cu',
+                'filtered_lrelu_rd.cu', 'filtered_lrelu_ns.cu'
+            ],
+            headers=['filtered_lrelu.h', 'filtered_lrelu.cu'],
+            source_dir=os.path.dirname(__file__),
+            extra_cuda_cflags=['--use_fast_math'],
+        )
+    return True
+
+
+def _get_filter_size(f):
+    if f is None:
+        return 1, 1
+    assert isinstance(f, torch.Tensor)
+    assert 1 <= f.ndim <= 2
+    return f.shape[-1], f.shape[0]  # width, height
+
+
+def _parse_padding(padding):
+    if isinstance(padding, int):
+        padding = [padding, padding]
+    assert isinstance(padding, (list, tuple))
+    assert all(isinstance(x, (int, np.integer)) for x in padding)
+    padding = [int(x) for x in padding]
+    if len(padding) == 2:
+        px, py = padding
+        padding = [px, px, py, py]
+    px0, px1, py0, py1 = padding
+    return px0, px1, py0, py1
+
+
+def filtered_lrelu(x,
+                   fu=None,
+                   fd=None,
+                   b=None,
+                   up=1,
+                   down=1,
+                   padding=0,
+                   gain=np.sqrt(2),
+                   slope=0.2,
+                   clamp=None,
+                   flip_filter=False,
+                   impl='cuda'):
+    r"""Filtered leaky ReLU for a batch of 2D images.
+
+    Performs the following sequence of operations for each channel:
+
+    1. Add channel-specific bias if provided (`b`).
+
+    2. Upsample the image by inserting N-1 zeros after each pixel (`up`).
+
+    3. Pad the image with the specified number of zeros on each side
+      (`padding`). Negative padding corresponds to cropping the image.
+
+    4. Convolve the image with the specified upsampling FIR filter (`fu`),
+       shrinking it so that the footprint of all output pixels lies within the
+       input image.
+
+    5. Multiply each value by the provided gain factor (`gain`).
+
+    6. Apply leaky ReLU activation function to each value.
+
+    7. Clamp each value between -clamp and +clamp, if `clamp` parameter is
+    provided.
+
+    8. Convolve the image with the specified downsampling FIR filter (`fd`),
+       shrinking it so that the footprint of all output pixels lies within the
+       input image.
+
+    9. Downsample the image by keeping every Nth pixel (`down`).
+
+    The fused op is considerably more efficient than performing the same
+    calculation using standard PyTorch ops. It supports gradients of arbitrary
+    order.
+
+    Args:
+        x:           Float32/float16/float64 input tensor of the shape
+                     `[batch_size, num_channels, in_height, in_width]`.
+        fu:          Float32 upsampling FIR filter of the shape
+                     `[filter_height, filter_width]` (non-separable),
+                     `[filter_taps]` (separable), or
+                     `None` (identity).
+        fd:          Float32 downsampling FIR filter of the shape
+                     `[filter_height, filter_width]` (non-separable),
+                     `[filter_taps]` (separable), or
+                     `None` (identity).
+        b:           Bias vector, or `None` to disable. Must be a 1D tensor of
+                     the same type as `x`. The length of vector must must match
+                     the channel dimension of `x`.
+        up:          Integer upsampling factor (default: 1).
+        down:        Integer downsampling factor. (default: 1).
+        padding:     Padding with respect to the upsampled image. Can be a
+                     single number or a list/tuple `[x, y]` or `[x_before,
+                     x_after, y_before, y_after]` (default: 0).
+        gain:        Overall scaling factor for signal magnitude (default:
+                     sqrt(2)).
+        slope:       Slope on the negative side of leaky ReLU (default: 0.2).
+        clamp:       Maximum magnitude for leaky ReLU output (default: None).
+        flip_filter: False = convolution, True = correlation (default: False).
+        impl:        Implementation to use. Can be `'ref'` or `'cuda'`
+                     (default: `'cuda'`).
+
+    Returns:
+        Tensor of the shape `[batch_size, num_channels, out_height,
+                out_width]`.
+    """
+    assert isinstance(x, torch.Tensor)
+    assert impl in ['ref', 'cuda']
+    if impl == 'cuda' and x.device.type == 'cuda' and _init():
+        return _filtered_lrelu_cuda(
+            up=up,
+            down=down,
+            padding=padding,
+            gain=gain,
+            slope=slope,
+            clamp=clamp,
+            flip_filter=flip_filter).apply(x, fu, fd, b, None, 0, 0)
+    return _filtered_lrelu_ref(
+        x,
+        fu=fu,
+        fd=fd,
+        b=b,
+        up=up,
+        down=down,
+        padding=padding,
+        gain=gain,
+        slope=slope,
+        clamp=clamp,
+        flip_filter=flip_filter)
+
+
+def _filtered_lrelu_ref(x,
+                        fu=None,
+                        fd=None,
+                        b=None,
+                        up=1,
+                        down=1,
+                        padding=0,
+                        gain=np.sqrt(2),
+                        slope=0.2,
+                        clamp=None,
+                        flip_filter=False):
+    """Slow and memory-inefficient reference implementation of
+    `filtered_lrelu()` using existing `upfirdn2n()` and `bias_act()` ops."""
+    assert isinstance(x, torch.Tensor) and x.ndim == 4
+    fu_w, fu_h = _get_filter_size(fu)
+    fd_w, fd_h = _get_filter_size(fd)
+    if b is not None:
+        assert isinstance(b, torch.Tensor) and b.dtype == x.dtype
+    assert isinstance(up, int) and up >= 1
+    assert isinstance(down, int) and down >= 1
+    px0, px1, py0, py1 = _parse_padding(padding)
+    assert gain == float(gain) and gain > 0
+    assert slope == float(slope) and slope >= 0
+    assert clamp is None or (clamp == float(clamp) and clamp >= 0)
+
+    # Calculate output size.
+    batch_size, channels, in_h, in_w = x.shape
+    in_dtype = x.dtype
+    out_w = (in_w * up + (px0 + px1) - (fu_w - 1) - (fd_w - 1) +
+             (down - 1)) // down
+    out_h = (in_h * up + (py0 + py1) - (fu_h - 1) - (fd_h - 1) +
+             (down - 1)) // down
+
+    # Compute using existing ops.
+    x = bias_act.bias_act(x=x, b=b)  # Apply bias.
+    x = upfirdn2d.upfirdn2d(
+        x=x,
+        f=fu,
+        up=up,
+        padding=[px0, px1, py0, py1],
+        gain=up**2,
+        flip_filter=flip_filter)  # Upsample.
+    x = bias_act.bias_act(
+        x=x, act='lrelu', alpha=slope, gain=gain,
+        clamp=clamp)  # Bias, leaky ReLU, clamp.
+    x = upfirdn2d.upfirdn2d(
+        x=x, f=fd, down=down, flip_filter=flip_filter)  # Downsample.
+
+    assert x.shape == (batch_size, channels, out_h, out_w)
+    assert x.dtype == in_dtype
+    return x
+
+
+_filtered_lrelu_cuda_cache = dict()
+
+
+def _filtered_lrelu_cuda(up=1,
+                         down=1,
+                         padding=0,
+                         gain=np.sqrt(2),
+                         slope=0.2,
+                         clamp=None,
+                         flip_filter=False):
+    """Fast CUDA implementation of `filtered_lrelu()` using custom ops."""
+    assert isinstance(up, int) and up >= 1
+    assert isinstance(down, int) and down >= 1
+    px0, px1, py0, py1 = _parse_padding(padding)
+    assert gain == float(gain) and gain > 0
+    gain = float(gain)
+    assert slope == float(slope) and slope >= 0
+    slope = float(slope)
+    assert clamp is None or (clamp == float(clamp) and clamp >= 0)
+    clamp = float(clamp if clamp is not None else 'inf')
+
+    # Lookup from cache.
+    key = (up, down, px0, px1, py0, py1, gain, slope, clamp, flip_filter)
+    if key in _filtered_lrelu_cuda_cache:
+        return _filtered_lrelu_cuda_cache[key]
+
+    # Forward op.
+    class FilteredLReluCuda(torch.autograd.Function):
+
+        @staticmethod
+        def forward(ctx, x, fu, fd, b, si, sx, sy):
+            # pylint: disable=arguments-differ
+            assert isinstance(x, torch.Tensor) and x.ndim == 4
+
+            # Replace empty up/downsample kernels with full 1x1 kernels
+            # (faster than separable).
+            if fu is None:
+                fu = torch.ones([1, 1], dtype=torch.float32, device=x.device)
+            if fd is None:
+                fd = torch.ones([1, 1], dtype=torch.float32, device=x.device)
+            assert 1 <= fu.ndim <= 2
+            assert 1 <= fd.ndim <= 2
+
+            # Replace separable 1x1 kernels with full 1x1 kernels when scale
+            # factor is 1.
+            if up == 1 and fu.ndim == 1 and fu.shape[0] == 1:
+                fu = fu.square()[None]
+            if down == 1 and fd.ndim == 1 and fd.shape[0] == 1:
+                fd = fd.square()[None]
+
+            # Missing sign input tensor.
+            if si is None:
+                si = torch.empty([0])
+
+            # Missing bias tensor.
+            if b is None:
+                b = torch.zeros([x.shape[1]], dtype=x.dtype, device=x.device)
+
+            # Construct internal sign tensor only if gradients are needed.
+            write_signs = (si.numel() == 0) and (x.requires_grad
+                                                 or b.requires_grad)
+
+            # Warn if input storage strides are not in decreasing order due to
+            # e.g. channels-last layout.
+            strides = [x.stride(i) for i in range(x.ndim) if x.size(i) > 1]
+            if any(a < b for a, b in zip(strides[:-1], strides[1:])):
+                warnings.warn(
+                    'low-performance memory layout detected in filtered_lrelu '
+                    'input', RuntimeWarning)
+
+            # Call C++/Cuda plugin if datatype is supported.
+            if x.dtype in [torch.float16, torch.float32]:
+                if torch.cuda.current_stream(
+                        x.device) != torch.cuda.default_stream(x.device):
+                    warnings.warn(
+                        'filtered_lrelu called with non-default cuda stream '
+                        'but concurrent execution is not supported',
+                        RuntimeWarning)
+                y, so, return_code = _plugin.filtered_lrelu(
+                    x, fu, fd, b, si, up, down, px0, px1, py0, py1, sx, sy,
+                    gain, slope, clamp, flip_filter, write_signs)
+            else:
+                return_code = -1
+
+            # No Cuda kernel found? Fall back to generic implementation.
+            # Still more memory efficient than the reference implementation
+            # because only the bit-packed sign tensor is retained for gradient
+            # computation.
+            if return_code < 0:
+                warnings.warn(
+                    'filtered_lrelu called with parameters that have no '
+                    'optimized CUDA kernel, using generic fallback',
+                    RuntimeWarning)
+
+                y = x.add(b.unsqueeze(-1).unsqueeze(-1))  # Add bias.
+                y = upfirdn2d.upfirdn2d(
+                    x=y,
+                    f=fu,
+                    up=up,
+                    padding=[px0, px1, py0, py1],
+                    gain=up**2,
+                    flip_filter=flip_filter)  # Upsample.
+                # Activation function and sign handling. Modifies y in-place.
+                so = _plugin.filtered_lrelu_act_(y, si, sx, sy, gain, slope,
+                                                 clamp, write_signs)
+                y = upfirdn2d.upfirdn2d(
+                    x=y, f=fd, down=down,
+                    flip_filter=flip_filter)  # Downsample.
+
+            # Prepare for gradient computation.
+            ctx.save_for_backward(fu, fd, (si if si.numel() else so))
+            ctx.x_shape = x.shape
+            ctx.y_shape = y.shape
+            ctx.s_ofs = sx, sy
+            return y
+
+        @staticmethod
+        def backward(ctx, dy):  # pylint: disable=arguments-differ
+            fu, fd, si = ctx.saved_tensors
+            _, _, xh, xw = ctx.x_shape
+            _, _, yh, yw = ctx.y_shape
+            sx, sy = ctx.s_ofs
+            dx = None  # 0
+            dfu = None
+            assert not ctx.needs_input_grad[1]
+            dfd = None
+            assert not ctx.needs_input_grad[2]
+            db = None  # 3
+            dsi = None
+            assert not ctx.needs_input_grad[4]
+            dsx = None
+            assert not ctx.needs_input_grad[5]
+            dsy = None
+            assert not ctx.needs_input_grad[6]
+
+            if ctx.needs_input_grad[0] or ctx.needs_input_grad[3]:
+                pp = [
+                    (fu.shape[-1] - 1) + (fd.shape[-1] - 1) - px0,
+                    xw * up - yw * down + px0 - (up - 1),
+                    (fu.shape[0] - 1) + (fd.shape[0] - 1) - py0,
+                    xh * up - yh * down + py0 - (up - 1),
+                ]
+                gg = gain * (up**2) / (down**2)
+                ff = (not flip_filter)
+                sx = sx - (fu.shape[-1] - 1) + px0
+                sy = sy - (fu.shape[0] - 1) + py0
+                dx = _filtered_lrelu_cuda(
+                    up=down,
+                    down=up,
+                    padding=pp,
+                    gain=gg,
+                    slope=slope,
+                    clamp=None,
+                    flip_filter=ff).apply(dy, fd, fu, None, si, sx, sy)
+
+            if ctx.needs_input_grad[3]:
+                db = dx.sum([0, 2, 3])
+
+            return dx, dfu, dfd, db, dsi, dsx, dsy
+
+    # Add to cache.
+    _filtered_lrelu_cuda_cache[key] = FilteredLReluCuda
+    return FilteredLReluCuda
--- a/build/lib/mmgen/ops/stylegan3/ops/upfirdn2d.py
+++ b/build/lib/mmgen/ops/stylegan3/ops/upfirdn2d.py
+# Copyright (c) 2021, NVIDIA CORPORATION & AFFILIATES.  All rights reserved.
+#
+# NVIDIA CORPORATION and its licensors retain all intellectual property
+# and proprietary rights in and to this software, related documentation
+# and any modifications thereto.  Any use, reproduction, disclosure or
+# distribution of this software and related documentation without an express
+# license agreement from NVIDIA CORPORATION is strictly prohibited.
+"""Custom PyTorch ops for efficient resampling of 2D images."""
+
+import os
+
+import numpy as np
+import torch
+
+from ... import conv2d
+from .. import custom_ops
+
+_plugin = None
+
+
+def _init():
+    global _plugin
+    if _plugin is None:
+        _plugin = custom_ops.get_plugin(
+            module_name='upfirdn2d_plugin',
+            sources=['upfirdn2d.cpp', 'upfirdn2d.cu'],
+            headers=['upfirdn2d.h'],
+            source_dir=os.path.dirname(__file__),
+            extra_cuda_cflags=['--use_fast_math'],
+        )
+    return True
+
+
+def _parse_scaling(scaling):
+    """parse scaling into list [x, y]"""
+    if isinstance(scaling, int):
+        scaling = [scaling, scaling]
+    assert isinstance(scaling, (list, tuple))
+    assert all(isinstance(x, int) for x in scaling)
+    sx, sy = scaling
+    assert sx >= 1 and sy >= 1
+    return sx, sy
+
+
+def _parse_padding(padding):
+    """parse padding into list [padx0, padx1, pady0, pady1]"""
+    if isinstance(padding, int):
+        padding = [padding, padding]
+    assert isinstance(padding, (list, tuple))
+    assert all(isinstance(x, int) for x in padding)
+    if len(padding) == 2:
+        padx, pady = padding
+        padding = [padx, padx, pady, pady]
+    padx0, padx1, pady0, pady1 = padding
+    return padx0, padx1, pady0, pady1
+
+
+def _get_filter_size(f):
+    """get width and height of filter kernel."""
+    if f is None:
+        return 1, 1
+    assert isinstance(f, torch.Tensor) and f.ndim in [1, 2]
+    fw = f.shape[-1]
+    fh = f.shape[0]
+    fw = int(fw)
+    fh = int(fh)
+    assert fw >= 1 and fh >= 1
+    return fw, fh
+
+
+def setup_filter(f,
+                 device=torch.device('cpu'),
+                 normalize=True,
+                 flip_filter=False,
+                 gain=1,
+                 separable=None):
+    r"""Convenience function to setup 2D FIR filter for `upfirdn2d()`.
+
+    Args:
+        f:           Torch tensor, numpy array, or python list of the shape
+                     `[filter_height, filter_width]` (non-separable),
+                     `[filter_taps]` (separable),
+                     `[]` (impulse), or
+                     `None` (identity).
+        device:      Result device (default: cpu).
+        normalize:   Normalize the filter so that it retains the magnitude
+                     for constant input signal (DC)? (default: True).
+        flip_filter: Flip the filter? (default: False).
+        gain:        Overall scaling factor for signal magnitude (default: 1).
+        separable:   Return a separable filter? (default: select automatically)
+                    .
+
+    Returns:
+        Float32 tensor of the shape
+        `[filter_height, filter_width]` (non-separable) or
+        `[filter_taps]` (separable).
+    """
+    # Validate.
+    if f is None:
+        f = 1
+    f = torch.as_tensor(f, dtype=torch.float32)
+    assert f.ndim in [0, 1, 2]
+    assert f.numel() > 0
+    if f.ndim == 0:
+        f = f[np.newaxis]
+
+    # Separable?
+    if separable is None:
+        separable = (f.ndim == 1 and f.numel() >= 8)
+    if f.ndim == 1 and not separable:
+        f = f.ger(f)
+    assert f.ndim == (1 if separable else 2)
+
+    # Apply normalize, flip, gain, and device.
+    if normalize:
+        f /= f.sum()
+    if flip_filter:
+        f = f.flip(list(range(f.ndim)))
+    f = f * (gain**(f.ndim / 2))
+    f = f.to(device=device)
+    return f
+
+
+def upfirdn2d(x,
+              f,
+              up=1,
+              down=1,
+              padding=0,
+              flip_filter=False,
+              gain=1,
+              impl='cuda'):
+    r"""Pad, upsample, filter, and downsample a batch of 2D images.
+
+    Performs the following sequence of operations for each channel:
+
+    1. Upsample the image by inserting N-1 zeros after each pixel (`up`).
+
+    2. Pad the image with the specified number of zeros on each side
+    (`padding`). Negative padding corresponds to cropping the image.
+
+    3. Convolve the image with the specified 2D FIR filter (`f`), shrinking it
+       so that the footprint of all output pixels lies within the input image.
+
+    4. Downsample the image by keeping every Nth pixel (`down`).
+
+    This sequence of operations bears close resemblance to
+        scipy.signal.upfirdn().
+
+    The fused op is considerably more efficient than performing the same
+    calculation using standard PyTorch ops. It supports gradients of arbitrary
+    order.
+
+    Args:
+        x:           Float32/float64/float16 input tensor of the shape
+                     `[batch_size, num_channels, in_height, in_width]`.
+        f:           Float32 FIR filter of the shape
+                     `[filter_height, filter_width]` (non-separable),
+                     `[filter_taps]` (separable), or
+                     `None` (identity).
+        up:          Integer upsampling factor. Can be a single int or a
+        list/tuple
+                     `[x, y]` (default: 1).
+        down:        Integer downsampling factor. Can be a single int or a
+                     list/tuple
+                     `[x, y]` (default: 1).
+        padding:     Padding with respect to the upsampled image. Can be a
+                     single number or a list/tuple `[x, y]` or
+                     `[x_before, x_after, y_before, y_after]` (default: 0).
+        flip_filter: False = convolution, True = correlation (default: False).
+        gain:        Overall scaling factor for signal magnitude (default: 1).
+        impl:        Implementation to use. Can be `'ref'` or `'cuda'`
+                     (default: `'cuda'`).
+
+    Returns:
+        Tensor of the shape `[batch_size, num_channels, out_height, out_width]`
+        .
+    """
+    assert isinstance(x, torch.Tensor)
+    assert impl in ['ref', 'cuda']
+    if impl == 'cuda' and x.device.type == 'cuda' and _init():
+        return _upfirdn2d_cuda(
+            up=up,
+            down=down,
+            padding=padding,
+            flip_filter=flip_filter,
+            gain=gain).apply(x, f)
+    return _upfirdn2d_ref(
+        x,
+        f,
+        up=up,
+        down=down,
+        padding=padding,
+        flip_filter=flip_filter,
+        gain=gain)
+
+
+def _upfirdn2d_ref(x, f, up=1, down=1, padding=0, flip_filter=False, gain=1):
+    """Slow reference implementation of `upfirdn2d()` using standard PyTorch
+    ops."""
+    # Validate arguments.
+    assert isinstance(x, torch.Tensor) and x.ndim == 4
+    if f is None:
+        f = torch.ones([1, 1], dtype=torch.float32, device=x.device)
+    assert isinstance(f, torch.Tensor) and f.ndim in [1, 2]
+    assert f.dtype == torch.float32 and not f.requires_grad
+    batch_size, num_channels, in_height, in_width = x.shape
+    upx, upy = _parse_scaling(up)
+    downx, downy = _parse_scaling(down)
+    padx0, padx1, pady0, pady1 = _parse_padding(padding)
+
+    # Check that upsampled buffer is not smaller than the filter.
+    upW = in_width * upx + padx0 + padx1
+    upH = in_height * upy + pady0 + pady1
+    assert upW >= f.shape[-1] and upH >= f.shape[0]
+
+    # Upsample by inserting zeros.
+    x = x.reshape([batch_size, num_channels, in_height, 1, in_width, 1])
+    x = torch.nn.functional.pad(x, [0, upx - 1, 0, 0, 0, upy - 1])
+    x = x.reshape([batch_size, num_channels, in_height * upy, in_width * upx])
+
+    # Pad or crop.
+    x = torch.nn.functional.pad(
+        x, [max(padx0, 0),
+            max(padx1, 0),
+            max(pady0, 0),
+            max(pady1, 0)])
+    x = x[:, :,
+          max(-pady0, 0):x.shape[2] - max(-pady1, 0),
+          max(-padx0, 0):x.shape[3] - max(-padx1, 0)]
+
+    # Setup filter.
+    f = f * (gain**(f.ndim / 2))
+    f = f.to(x.dtype)
+    if not flip_filter:
+        f = f.flip(list(range(f.ndim)))
+
+    # Convolve with the filter.
+    f = f[np.newaxis, np.newaxis].repeat([num_channels, 1] + [1] * f.ndim)
+    if f.ndim == 4:
+        x = conv2d(input=x, weight=f, groups=num_channels)
+    else:
+        x = conv2d(input=x, weight=f.unsqueeze(2), groups=num_channels)
+        x = conv2d(input=x, weight=f.unsqueeze(3), groups=num_channels)
+
+    # Downsample by throwing away pixels.
+    x = x[:, :, ::downy, ::downx]
+    return x
+
+
+_upfirdn2d_cuda_cache = dict()
+
+
+def _upfirdn2d_cuda(up=1, down=1, padding=0, flip_filter=False, gain=1):
+    """Fast CUDA implementation of `upfirdn2d()` using custom ops."""
+    # Parse arguments.
+    upx, upy = _parse_scaling(up)
+    downx, downy = _parse_scaling(down)
+    padx0, padx1, pady0, pady1 = _parse_padding(padding)
+
+    # Lookup from cache.
+    key = (upx, upy, downx, downy, padx0, padx1, pady0, pady1, flip_filter,
+           gain)
+    if key in _upfirdn2d_cuda_cache:
+        return _upfirdn2d_cuda_cache[key]
+
+    # Forward op.
+    class Upfirdn2dCuda(torch.autograd.Function):
+
+        @staticmethod
+        def forward(ctx, x, f):  # pylint: disable=arguments-differ
+            assert isinstance(x, torch.Tensor) and x.ndim == 4
+            if f is None:
+                f = torch.ones([1, 1], dtype=torch.float32, device=x.device)
+            if f.ndim == 1 and f.shape[0] == 1:
+                f = f.square().unsqueeze(
+                    0)  # Convert separable-1 into full-1x1.
+            assert isinstance(f, torch.Tensor) and f.ndim in [1, 2]
+            y = x
+            if f.ndim == 2:
+                y = _plugin.upfirdn2d(y, f, upx, upy, downx, downy, padx0,
+                                      padx1, pady0, pady1, flip_filter, gain)
+            else:
+                y = _plugin.upfirdn2d(y, f.unsqueeze(0), upx, 1, downx, 1,
+                                      padx0, padx1, 0, 0, flip_filter, 1.0)
+                y = _plugin.upfirdn2d(y, f.unsqueeze(1), 1, upy, 1, downy, 0,
+                                      0, pady0, pady1, flip_filter, gain)
+            ctx.save_for_backward(f)
+            ctx.x_shape = x.shape
+            return y
+
+        @staticmethod
+        def backward(ctx, dy):  # pylint: disable=arguments-differ
+            f, = ctx.saved_tensors
+            _, _, ih, iw = ctx.x_shape
+            _, _, oh, ow = dy.shape
+            fw, fh = _get_filter_size(f)
+            p = [
+                fw - padx0 - 1,
+                iw * upx - ow * downx + padx0 - upx + 1,
+                fh - pady0 - 1,
+                ih * upy - oh * downy + pady0 - upy + 1,
+            ]
+            dx = None
+            df = None
+
+            if ctx.needs_input_grad[0]:
+                dx = _upfirdn2d_cuda(
+                    up=down,
+                    down=up,
+                    padding=p,
+                    flip_filter=(not flip_filter),
+                    gain=gain).apply(dy, f)
+
+            assert not ctx.needs_input_grad[1]
+            return dx, df
+
+    # Add to cache.
+    _upfirdn2d_cuda_cache[key] = Upfirdn2dCuda
+    return Upfirdn2dCuda
+
+
+def filter2d(x, f, padding=0, flip_filter=False, gain=1, impl='cuda'):
+    r"""Filter a batch of 2D images using the given 2D FIR filter.
+
+    By default, the result is padded so that its shape matches the input.
+    User-specified padding is applied on top of that, with negative values
+    indicating cropping. Pixels outside the image are assumed to be zero.
+
+    Args:
+        x:           Float32/float64/float16 input tensor of the shape
+                     `[batch_size, num_channels, in_height, in_width]`.
+        f:           Float32 FIR filter of the shape
+                     `[filter_height, filter_width]` (non-separable),
+                     `[filter_taps]` (separable), or
+                     `None` (identity).
+        padding:     Padding with respect to the output. Can be a single number
+                     or a list/tuple `[x, y]` or `[x_before, x_after, y_before,
+                     y_after]` (default: 0).
+        flip_filter: False = convolution, True = correlation (default: False).
+        gain:        Overall scaling factor for signal magnitude (default: 1).
+        impl:        Implementation to use. Can be `'ref'` or `'cuda'`
+        (default: `'cuda'`).
+
+    Returns:
+        Tensor of the shape `[batch_size, num_channels, out_height,
+                            out_width]`.
+    """
+    padx0, padx1, pady0, pady1 = _parse_padding(padding)
+    fw, fh = _get_filter_size(f)
+    p = [
+        padx0 + fw // 2,
+        padx1 + (fw - 1) // 2,
+        pady0 + fh // 2,
+        pady1 + (fh - 1) // 2,
+    ]
+    return upfirdn2d(
+        x, f, padding=p, flip_filter=flip_filter, gain=gain, impl=impl)
+
+
+def upsample2d(x, f, up=2, padding=0, flip_filter=False, gain=1, impl='cuda'):
+    r"""Upsample a batch of 2D images using the given 2D FIR filter.
+
+    By default, the result is padded so that its shape is a multiple of the
+    input.
+    User-specified padding is applied on top of that, with negative values
+    indicating cropping. Pixels outside the image are assumed to be zero.
+
+    Args:
+        x:           Float32/float64/float16 input tensor of the shape
+                     `[batch_size, num_channels, in_height, in_width]`.
+        f:           Float32 FIR filter of the shape
+                     `[filter_height, filter_width]` (non-separable),
+                     `[filter_taps]` (separable), or
+                     `None` (identity).
+        up:          Integer upsampling factor. Can be a single int or a
+                     list/tuple `[x, y]` (default: 1).
+        padding:     Padding with respect to the output. Can be a single number
+                     or a list/tuple `[x, y]` or `[x_before, x_after, y_before,
+                     y_after]` (default: 0).
+        flip_filter: False = convolution, True = correlation (default: False).
+        gain:        Overall scaling factor for signal magnitude (default: 1).
+        impl:        Implementation to use. Can be `'ref'` or `'cuda'`
+                    (default: `'cuda'`).
+
+    Returns:
+        Tensor of the shape `[batch_size, num_channels, out_height, out_width]`
+        .
+    """
+    upx, upy = _parse_scaling(up)
+    padx0, padx1, pady0, pady1 = _parse_padding(padding)
+    fw, fh = _get_filter_size(f)
+    p = [
+        padx0 + (fw + upx - 1) // 2,
+        padx1 + (fw - upx) // 2,
+        pady0 + (fh + upy - 1) // 2,
+        pady1 + (fh - upy) // 2,
+    ]
+    return upfirdn2d(
+        x,
+        f,
+        up=up,
+        padding=p,
+        flip_filter=flip_filter,
+        gain=gain * upx * upy,
+        impl=impl)
+
+
+def downsample2d(x,
+                 f,
+                 down=2,
+                 padding=0,
+                 flip_filter=False,
+                 gain=1,
+                 impl='cuda'):
+    r"""Downsample a batch of 2D images using the given 2D FIR filter.
+
+    By default, the result is padded so that its shape is a fraction of the
+    input.
+    User-specified padding is applied on top of that, with negative values
+    indicating cropping. Pixels outside the image are assumed to be zero.
+
+    Args:
+        x:           Float32/float64/float16 input tensor of the shape
+                     `[batch_size, num_channels, in_height, in_width]`.
+        f:           Float32 FIR filter of the shape
+                     `[filter_height, filter_width]` (non-separable),
+                     `[filter_taps]` (separable), or
+                     `None` (identity).
+        down:        Integer downsampling factor. Can be a single int or a
+                     list/tuple `[x, y]` (default: 1).
+        padding:     Padding with respect to the input. Can be a single number
+                     or a list/tuple `[x, y]` or `[x_before, x_after, y_before,
+                     y_after]` (default: 0).
+        flip_filter: False = convolution, True = correlation (default: False).
+        gain:        Overall scaling factor for signal magnitude (default: 1).
+        impl:        Implementation to use. Can be `'ref'` or `'cuda'`
+                     (default: `'cuda'`).
+
+    Returns:
+        Tensor of the shape `[batch_size, num_channels, out_height, out_width]`
+        .
+    """
+    downx, downy = _parse_scaling(down)
+    padx0, padx1, pady0, pady1 = _parse_padding(padding)
+    fw, fh = _get_filter_size(f)
+    p = [
+        padx0 + (fw - downx + 1) // 2,
+        padx1 + (fw - downx) // 2,
+        pady0 + (fh - downy + 1) // 2,
+        pady1 + (fh - downy) // 2,
+    ]
+    return upfirdn2d(
+        x,
+        f,
+        down=down,
+        padding=p,
+        flip_filter=flip_filter,
+        gain=gain,
+        impl=impl)
--- a/build/lib/mmgen/utils/__init__.py
+++ b/build/lib/mmgen/utils/__init__.py
+# Copyright (c) OpenMMLab. All rights reserved.
+from .collect_env import collect_env
+from .dist_util import check_dist_init, sync_random_seed
+from .io_utils import MMGEN_CACHE_DIR, download_from_url
+from .logger import get_root_logger
+
+__all__ = [
+    'collect_env', 'get_root_logger', 'download_from_url', 'check_dist_init',
+    'MMGEN_CACHE_DIR', 'sync_random_seed'
+]
--- a/build/lib/mmgen/utils/collect_env.py
+++ b/build/lib/mmgen/utils/collect_env.py
+# Copyright (c) OpenMMLab. All rights reserved.
+import os.path as osp
+import subprocess
+import sys
+from collections import defaultdict
+
+import cv2
+import mmcv
+import torch
+import torchvision
+from mmcv.utils import get_build_config, get_git_hash
+
+import mmgen
+
+
+def collect_env():
+    """Collect the information of the running environments."""
+    env_info = {}
+    env_info['sys.platform'] = sys.platform
+    env_info['Python'] = sys.version.replace('\n', '')
+
+    cuda_available = torch.cuda.is_available()
+    env_info['CUDA available'] = cuda_available
+
+    if cuda_available:
+        if mmcv.__version__ < '1.3.11':
+            from mmcv.utils.parrots_wrapper import CUDA_HOME
+        else:
+            from mmcv.utils.parrots_wrapper import _get_cuda_home
+            CUDA_HOME = _get_cuda_home()
+
+        env_info['CUDA_HOME'] = CUDA_HOME
+
+        if CUDA_HOME is not None and osp.isdir(CUDA_HOME):
+            try:
+                nvcc = osp.join(CUDA_HOME, 'bin/nvcc')
+                nvcc = subprocess.check_output(
+                    '"{}" -V | tail -n1'.format(nvcc), shell=True)
+                nvcc = nvcc.decode('utf-8').strip()
+            except subprocess.SubprocessError:
+                nvcc = 'Not Available'
+            env_info['NVCC'] = nvcc
+
+        devices = defaultdict(list)
+        for k in range(torch.cuda.device_count()):
+            devices[torch.cuda.get_device_name(k)].append(str(k))
+        for devname, devids in devices.items():
+            env_info['GPU ' + ','.join(devids)] = devname
+
+    gcc = subprocess.check_output('gcc --version | head -n1', shell=True)
+    gcc = gcc.decode('utf-8').strip()
+    env_info['GCC'] = gcc
+
+    env_info['PyTorch'] = torch.__version__
+    env_info['PyTorch compiling details'] = get_build_config()
+
+    env_info['TorchVision'] = torchvision.__version__
+
+    env_info['OpenCV'] = cv2.__version__
+
+    env_info['MMCV'] = mmcv.__version__
+    env_info['MMGen'] = f'{ mmgen.__version__ }+{get_git_hash()[:7]}'
+    try:
+        from mmcv.ops import get_compiler_version, get_compiling_cuda_version
+        env_info['MMCV Compiler'] = get_compiler_version()
+        env_info['MMCV CUDA Compiler'] = get_compiling_cuda_version()
+    except ImportError:
+        env_info['MMCV Compiler'] = 'n/a'
+        env_info['MMCV CUDA Compiler'] = 'n/a'
+
+    return env_info
+
+
+if __name__ == '__main__':
+    for name, val in collect_env().items():
+        print('{}: {}'.format(name, val))
--- a/build/lib/mmgen/utils/dist_util.py
+++ b/build/lib/mmgen/utils/dist_util.py
+# Copyright (c) OpenMMLab. All rights reserved.
+import numpy as np
+import torch
+import torch.distributed as dist
+from mmcv.runner import get_dist_info
+
+
+def check_dist_init():
+    return dist.is_available() and dist.is_initialized()
+
+
+def sync_random_seed(seed=None, device='cuda'):
+    """Make sure different ranks share the same seed.
+
+    All workers must call
+    this function, otherwise it will deadlock. This method is generally used in
+    `DistributedSampler`, because the seed should be identical across all
+    processes in the distributed group.
+    In distributed sampling, different ranks should sample non-overlapped
+    data in the dataset. Therefore, this function is used to make sure that
+    each rank shuffles the data indices in the same order based
+    on the same seed. Then different ranks could use different indices
+    to select non-overlapped data from the same data list.
+    Args:
+        seed (int, Optional): The seed. Default to None.
+        device (str): The device where the seed will be put on.
+            Default to 'cuda'.
+    Returns:
+        int: Seed to be used.
+    """
+    if seed is None:
+        seed = np.random.randint(2**31)
+    assert isinstance(seed, int)
+
+    rank, world_size = get_dist_info()
+
+    if world_size == 1:
+        return seed
+
+    if rank == 0:
+        random_num = torch.tensor(seed, dtype=torch.int32, device=device)
+    else:
+        random_num = torch.tensor(0, dtype=torch.int32, device=device)
+    dist.broadcast(random_num, src=0)
+    return random_num.item()
--- a/build/lib/mmgen/utils/io_utils.py
+++ b/build/lib/mmgen/utils/io_utils.py
+# Copyright (c) OpenMMLab. All rights reserved.
+import hashlib
+import os
+
+import click
+import mmcv
+import requests
+import torch.distributed as dist
+from mmcv.runner import get_dist_info
+from requests.exceptions import InvalidURL, RequestException, Timeout
+
+MMGEN_CACHE_DIR = os.path.expanduser('~') + '/.cache/openmmlab/mmgen/'
+
+
+def get_content_from_url(url, timeout=15, stream=False):
+    """Get content from url.
+
+    Args:
+        url (str): Url for getting content.
+        timeout (int): Set the socket timeout. Default: 15.
+    """
+    try:
+        response = requests.get(url, timeout=timeout, stream=stream)
+    except InvalidURL as err:
+        raise err  # type: ignore
+    except Timeout as err:
+        raise err  # type: ignore
+    except RequestException as err:
+        raise err  # type: ignore
+    except Exception as err:
+        raise err  # type: ignore
+    return response
+
+
+def download_from_url(url,
+                      dest_path=None,
+                      dest_dir=MMGEN_CACHE_DIR,
+                      hash_prefix=None):
+    """Download object at the given URL to a local path.
+    Args:
+        url (str): URL of the object to download.
+        dest_path (str): Path where object will be saved.
+        dest_dir (str): The directory of the destination. Defaults to
+            ``'~/.cache/openmmlab/mmgen/'``.
+        hash_prefix (string, optional): If not None, the SHA256 downloaded
+            file should start with `hash_prefix`. Default: None.
+
+    Return:
+        str: path for the downloaded file.
+    """
+    # get the exact destination path
+    if dest_path is None:
+        filename = url.split('/')[-1]
+        dest_path = os.path.join(dest_dir, filename)
+
+    if dest_path.startswith('~'):
+        dest_path = os.path.expanduser('~') + dest_path[1:]
+
+    # advoid downloading existed file
+    if os.path.exists(dest_path):
+        return dest_path
+
+    rank, ws = get_dist_info()
+
+    # only download from the master process
+    if rank == 0:
+        # mkdir
+        _dir = os.path.dirname(dest_path)
+        mmcv.mkdir_or_exist(_dir)
+
+        if hash_prefix is not None:
+            sha256 = hashlib.sha256()
+
+        response = get_content_from_url(url, stream=True)
+        size = int(response.headers.get('content-length'))
+        with open(dest_path, 'wb') as fw:
+            content_iter = response.iter_content(chunk_size=1024)
+            with click.progressbar(content_iter, length=size / 1024) as chunks:
+                for chunk in chunks:
+                    if chunk:
+                        fw.write(chunk)
+                        fw.flush()
+                        if hash_prefix is not None:
+                            sha256.update(chunk)
+
+        if hash_prefix is not None:
+            digest = sha256.hexdigest()
+            if digest[:len(hash_prefix)] != hash_prefix:
+                raise RuntimeError(
+                    f'invalid hash value, expected "{hash_prefix}", but got '
+                    f'"{digest}"')
+
+    # sync the other processes
+    if ws > 1:
+        dist.barrier()
+
+    return dest_path
--- a/build/lib/mmgen/utils/logger.py
+++ b/build/lib/mmgen/utils/logger.py
+# Copyright (c) OpenMMLab. All rights reserved.
+import logging
+
+from mmcv.utils import get_logger
+
+
+def get_root_logger(log_file=None, log_level=logging.INFO, file_mode='w'):
+    """Initialize and get a logger with name of mmgen.
+
+    If the logger has not been initialized, this method will initialize the
+    logger by adding one or two handlers, otherwise the initialized logger will
+    be directly returned. During initialization, a StreamHandler will always be
+    added. If `log_file` is specified and the process rank is 0, a FileHandler
+    will also be added.
+
+    Args:
+        log_file (str | None): The log filename. If specified, a FileHandler
+            will be added to the logger. Defaults to ``None``.
+        log_level (int): The logger level. Note that only the process of
+            rank 0 is affected, and other processes will set the level to
+            "Error" thus be silent most of the time.
+            Defaults to ``logging.INFO``.
+        file_mode (str): The file mode used in opening log file.
+            Defaults to 'w'.
+
+    Returns:
+        logging.Logger: The expected logger.
+    """
+    return get_logger('mmgen', log_file, log_level, file_mode=file_mode)
--- a/build/lib/mmgen/version.py
+++ b/build/lib/mmgen/version.py
+# Copyright (c) OpenMMLab. All rights reserved.
+__version__ = '0.7.3'
+
+
+def parse_version_info(version_str):
+    """Parse version information.
+
+    Args:
+        version_str (str): Version string.
+
+    Returns:
+        tuple: Version information in tuple.
+    """
+    version_info = []
+    for x in version_str.split('.'):
+        if x.isdigit():
+            version_info.append(int(x))
+        elif x.find('rc') != -1:
+            patch_version = x.split('rc')
+            version_info.append(int(patch_version[0]))
+            version_info.append(f'rc{patch_version[1]}')
+    return tuple(version_info)
+
+
+version_info = parse_version_info(__version__)
--- a/configs/_base_/datasets/Inception_Score.py
+++ b/configs/_base_/datasets/Inception_Score.py
+dataset_type = 'UnconditionalImageDataset'
+
+# To be noted that, `Resize` operation with `pillow` backend and
+# `bicubic` interpolation is the must for correct IS evaluation
+val_pipeline = [
+    dict(
+        type='LoadImageFromFile',
+        key='real_img',
+        io_backend='disk',
+    ),
+    dict(
+        type='Resize',
+        keys=['real_img'],
+        scale=(299, 299),
+        backend='pillow',
+        interpolation='bicubic'),
+    dict(
+        type='Normalize',
+        keys=['real_img'],
+        mean=[127.5] * 3,
+        std=[127.5] * 3,
+        to_rgb=True),
+    dict(type='ImageToTensor', keys=['real_img']),
+    dict(type='Collect', keys=['real_img'], meta_keys=['real_img_path'])
+]
+
+data = dict(
+    samples_per_gpu=None,
+    workers_per_gpu=4,
+    val=dict(type=dataset_type, imgs_root=None, pipeline=val_pipeline))
--- a/configs/_base_/datasets/cifar10.py
+++ b/configs/_base_/datasets/cifar10.py
+dataset_type = 'mmcls.CIFAR10'
+
+# different from mmcls, we adopt the setting used in BigGAN
+# Note that the pipelines below are from MMClassification. Importantly, the
+# `to_rgb` is set to `True` to convert image to BGR orders. The default order
+# in Cifar10 is RGB. Thus, we have to convert it to BGR.
+img_norm_cfg = dict(
+    mean=[127.5, 127.5, 127.5], std=[127.5, 127.5, 127.5], to_rgb=True)
+train_pipeline = [
+    dict(type='RandomCrop', size=32, padding=4),
+    dict(type='RandomFlip', flip_prob=0.5, direction='horizontal'),
+    dict(type='Normalize', **img_norm_cfg),
+    dict(type='ImageToTensor', keys=['img']),
+    dict(type='ToTensor', keys=['gt_label']),
+    dict(type='Collect', keys=['img', 'gt_label'])
+]
+test_pipeline = [
+    dict(type='Normalize', **img_norm_cfg),
+    dict(type='ImageToTensor', keys=['img']),
+    dict(type='Collect', keys=['img'])
+]
+
+# Different from the classification task, the val/test split also use the
+# training part, which is the same to StyleGAN-ADA.
+data = dict(
+    samples_per_gpu=None,
+    workers_per_gpu=4,
+    train=dict(
+        type=dataset_type, data_prefix='data/cifar10',
+        pipeline=train_pipeline),
+    val=dict(
+        type=dataset_type, data_prefix='data/cifar10', pipeline=test_pipeline),
+    test=dict(
+        type=dataset_type, data_prefix='data/cifar10', pipeline=test_pipeline))
--- a/configs/_base_/datasets/cifar10_inception_stat.py
+++ b/configs/_base_/datasets/cifar10_inception_stat.py
+dataset_type = 'mmcls.CIFAR10'
+
+# This config is set for extract inception state of CIFAR dataset.
+
+# Different from mmcls, we adopt the setting used in BigGAN.
+# Note that the pipelines below are from MMClassification.
+# The default order in Cifar10 is RGB. Thus, we set `to_rgb` as `False`.
+img_norm_cfg = dict(
+    mean=[127.5, 127.5, 127.5], std=[127.5, 127.5, 127.5], to_rgb=False)
+train_pipeline = [
+    dict(type='Normalize', **img_norm_cfg),
+    dict(type='ImageToTensor', keys=['img']),
+    dict(type='ToTensor', keys=['gt_label']),
+    dict(type='Collect', keys=['img', 'gt_label'])
+]
+test_pipeline = [
+    dict(type='Normalize', **img_norm_cfg),
+    dict(type='ImageToTensor', keys=['img']),
+    dict(type='Collect', keys=['img'])
+]
+
+# Different from the classification task, the val/test split also use the
+# training part, which is the same to StyleGAN-ADA.
+data = dict(
+    samples_per_gpu=None,
+    workers_per_gpu=4,
+    train=dict(
+        type=dataset_type, data_prefix='data/cifar10',
+        pipeline=train_pipeline),
+    val=dict(
+        type=dataset_type, data_prefix='data/cifar10', pipeline=test_pipeline),
+    test=dict(
+        type=dataset_type, data_prefix='data/cifar10', pipeline=test_pipeline))
--- a/configs/_base_/datasets/cifar10_noaug.py
+++ b/configs/_base_/datasets/cifar10_noaug.py
+dataset_type = 'mmcls.CIFAR10'
+
+# different from mmcls, we adopt the setting used in BigGAN
+# Note that the pipelines below are from MMClassification. Importantly, the
+# `to_rgb` is set to `True` to convert image to BGR orders. The default order
+# in Cifar10 is RGB. Thus, we have to convert it to BGR.
+
+# Cifar dataset w/o augmentations. Remove `RandomFlip` and `RandomCrop`
+# augmentations.
+img_norm_cfg = dict(
+    mean=[127.5, 127.5, 127.5], std=[127.5, 127.5, 127.5], to_rgb=True)
+train_pipeline = [
+    dict(type='Normalize', **img_norm_cfg),
+    dict(type='ImageToTensor', keys=['img']),
+    dict(type='ToTensor', keys=['gt_label']),
+    dict(type='Collect', keys=['img', 'gt_label'])
+]
+test_pipeline = [
+    dict(type='Normalize', **img_norm_cfg),
+    dict(type='ImageToTensor', keys=['img']),
+    dict(type='Collect', keys=['img'])
+]
+
+# Different from the classification task, the val/test split also use the
+# training part, which is the same to StyleGAN-ADA.
+data = dict(
+    samples_per_gpu=None,
+    workers_per_gpu=4,
+    train=dict(
+        type='RepeatDataset',
+        times=500,
+        dataset=dict(
+            type=dataset_type,
+            data_prefix='data/cifar10',
+            pipeline=train_pipeline)),
+    val=dict(
+        type=dataset_type, data_prefix='data/cifar10', pipeline=test_pipeline),
+    test=dict(
+        type=dataset_type, data_prefix='data/cifar10', pipeline=test_pipeline))
--- a/configs/_base_/datasets/cifar10_nopad.py
+++ b/configs/_base_/datasets/cifar10_nopad.py
+dataset_type = 'mmcls.CIFAR10'
+
+# different from mmcls, we adopt the setting used in BigGAN
+# Note that the pipelines below are from MMClassification. Importantly, the
+# `to_rgb` is set to `True` to convert image to BGR orders. The default order
+# in Cifar10 is RGB. Thus, we have to convert it to BGR.
+img_norm_cfg = dict(
+    mean=[127.5, 127.5, 127.5], std=[127.5, 127.5, 127.5], to_rgb=True)
+train_pipeline = [
+    dict(type='RandomFlip', flip_prob=0.5, direction='horizontal'),
+    dict(type='Normalize', **img_norm_cfg),
+    dict(type='ImageToTensor', keys=['img']),
+    dict(type='ToTensor', keys=['gt_label']),
+    dict(type='Collect', keys=['img', 'gt_label'])
+]
+test_pipeline = [
+    dict(type='Normalize', **img_norm_cfg),
+    dict(type='ImageToTensor', keys=['img']),
+    dict(type='Collect', keys=['img'])
+]
+
+# Different from the classification task, the val/test split also use the
+# training part, which is the same to StyleGAN-ADA.
+data = dict(
+    samples_per_gpu=None,
+    workers_per_gpu=4,
+    train=dict(
+        type=dataset_type, data_prefix='data/cifar10',
+        pipeline=train_pipeline),
+    val=dict(
+        type=dataset_type, data_prefix='data/cifar10', pipeline=test_pipeline),
+    test=dict(
+        type=dataset_type, data_prefix='data/cifar10', pipeline=test_pipeline))
--- a/configs/_base_/datasets/cifar10_random_noise.py
+++ b/configs/_base_/datasets/cifar10_random_noise.py
+dataset_type = 'mmcls.CIFAR10'
+
+# cifar dataset without augmentation
+# different from mmcls, we adopt the setting used in BigGAN
+# Note that the pipelines below are from MMClassification. Importantly, the
+# `to_rgb` is set to `True` to convert image to BGR orders. The default order
+# in Cifar10 is RGB. Thus, we have to convert it to BGR.
+
+# Follow the pipeline in
+# https://github.com/pfnet-research/sngan_projection/blob/master/datasets/cifar10.py
+# Only `RandomImageNoise` augmentation is adopted.
+img_norm_cfg = dict(
+    mean=[127.5, 127.5, 127.5], std=[127.5, 127.5, 127.5], to_rgb=True)
+train_pipeline = [
+    dict(type='Normalize', **img_norm_cfg),
+    dict(type='RandomImgNoise', keys=['img']),
+    dict(type='ImageToTensor', keys=['img']),
+    dict(type='ToTensor', keys=['gt_label']),
+    dict(type='Collect', keys=['img', 'gt_label'])
+]
+test_pipeline = [
+    dict(type='Normalize', **img_norm_cfg),
+    dict(type='ImageToTensor', keys=['img']),
+    dict(type='Collect', keys=['img'])
+]
+
+# Different from the classification task, the val/test split also use the
+# training part, which is the same to StyleGAN-ADA.
+data = dict(
+    samples_per_gpu=None,
+    workers_per_gpu=4,
+    train=dict(
+        type=dataset_type, data_prefix='data/cifar10',
+        pipeline=train_pipeline),
+    val=dict(
+        type=dataset_type, data_prefix='data/cifar10', pipeline=test_pipeline),
+    test=dict(
+        type=dataset_type, data_prefix='data/cifar10', pipeline=test_pipeline))