release v1.6.1 of mmcv

fdeee889 · limm · df465820 · fdeee889 · fdeee889 · fdeee889
Commit fdeee889 authored May 25, 2025 by limm
20 changed files
--- a/mmcv/ops/roi_align.py
+++ b/mmcv/ops/roi_align.py
 # Copyright (c) OpenMMLab. All rights reserved.
+from typing import Any
+
 import torch
 import torch.nn as nn
 from torch.autograd import Function
@@ -30,9 +32,10 @@ class RoIAlignFunction(Function):
                mode_s=pool_mode,
                aligned_i=aligned)
        else:
-            from torch.onnx.symbolic_opset9 import sub, squeeze
-            from torch.onnx.symbolic_helper import _slice_helper
            from torch.onnx import TensorProtoDataType
+            from torch.onnx.symbolic_helper import _slice_helper
+            from torch.onnx.symbolic_opset9 import squeeze, sub
+
            # batch_indices = rois[:, 0].long()
            batch_indices = _slice_helper(
                g, rois, axes=[1], starts=[0], ends=[1])
@@ -61,14 +64,14 @@ class RoIAlignFunction(Function):
                mode_s=pool_mode)

    @staticmethod
-    def forward(ctx,
-                input,
-                rois,
-                output_size,
-                spatial_scale=1.0,
-                sampling_ratio=0,
-                pool_mode='avg',
-                aligned=True):
+    def forward(ctx: Any,
+                input: torch.Tensor,
+                rois: torch.Tensor,
+                output_size: int,
+                spatial_scale: float = 1.0,
+                sampling_ratio: int = 0,
+                pool_mode: str = 'avg',
+                aligned: bool = True) -> torch.Tensor:
        ctx.output_size = _pair(output_size)
        ctx.spatial_scale = spatial_scale
        ctx.sampling_ratio = sampling_ratio
@@ -107,7 +110,7 @@ class RoIAlignFunction(Function):

    @staticmethod
    @once_differentiable
-    def backward(ctx, grad_output):
+    def backward(ctx: Any, grad_output: torch.Tensor) -> tuple:
        rois, argmax_y, argmax_x = ctx.saved_tensors
        grad_input = grad_output.new_zeros(ctx.input_shape)
        # complex head architecture may cause grad_output uncontiguous.
@@ -174,13 +177,13 @@ class RoIAlign(nn.Module):
        },
        cls_name='RoIAlign')
    def __init__(self,
-                 output_size,
-                 spatial_scale=1.0,
-                 sampling_ratio=0,
-                 pool_mode='avg',
-                 aligned=True,
-                 use_torchvision=False):
-        super(RoIAlign, self).__init__()
+                 output_size: tuple,
+                 spatial_scale: float = 1.0,
+                 sampling_ratio: int = 0,
+                 pool_mode: str = 'avg',
+                 aligned: bool = True,
+                 use_torchvision: bool = False):
+        super().__init__()

        self.output_size = _pair(output_size)
        self.spatial_scale = float(spatial_scale)
@@ -189,7 +192,7 @@ class RoIAlign(nn.Module):
        self.aligned = aligned
        self.use_torchvision = use_torchvision

-    def forward(self, input, rois):
+    def forward(self, input: torch.Tensor, rois: torch.Tensor) -> torch.Tensor:
        """
        Args:
            input: NCHW images

--- a/mmcv/ops/roi_align_rotated.py
+++ b/mmcv/ops/roi_align_rotated.py
 # Copyright (c) OpenMMLab. All rights reserved.
+from typing import Any, Optional, Tuple, Union
+
+import torch
 import torch.nn as nn
 from torch.autograd import Function
+from torch.nn.modules.utils import _pair

-from ..utils import ext_loader
+from ..utils import deprecated_api_warning, ext_loader

 ext_module = ext_loader.load_ext(
    '_ext', ['roi_align_rotated_forward', 'roi_align_rotated_backward'])
@@ -11,80 +15,70 @@ ext_module = ext_loader.load_ext(
 class RoIAlignRotatedFunction(Function):

    @staticmethod
-    def symbolic(g, features, rois, out_size, spatial_scale, sample_num,
+    def symbolic(g, input, rois, output_size, spatial_scale, sampling_ratio,
                 aligned, clockwise):
-        if isinstance(out_size, int):
-            out_h = out_size
-            out_w = out_size
-        elif isinstance(out_size, tuple):
-            assert len(out_size) == 2
-            assert isinstance(out_size[0], int)
-            assert isinstance(out_size[1], int)
-            out_h, out_w = out_size
+        if isinstance(output_size, int):
+            out_h = output_size
+            out_w = output_size
+        elif isinstance(output_size, tuple):
+            assert len(output_size) == 2
+            assert isinstance(output_size[0], int)
+            assert isinstance(output_size[1], int)
+            out_h, out_w = output_size
        else:
            raise TypeError(
-                '"out_size" must be an integer or tuple of integers')
+                '"output_size" must be an integer or tuple of integers')
        return g.op(
            'mmcv::MMCVRoIAlignRotated',
-            features,
+            input,
            rois,
            output_height_i=out_h,
            output_width_i=out_h,
            spatial_scale_f=spatial_scale,
-            sampling_ratio_i=sample_num,
+            sampling_ratio_i=sampling_ratio,
            aligned_i=aligned,
            clockwise_i=clockwise)

    @staticmethod
-    def forward(ctx,
-                features,
-                rois,
-                out_size,
-                spatial_scale,
-                sample_num=0,
-                aligned=True,
-                clockwise=False):
-        if isinstance(out_size, int):
-            out_h = out_size
-            out_w = out_size
-        elif isinstance(out_size, tuple):
-            assert len(out_size) == 2
-            assert isinstance(out_size[0], int)
-            assert isinstance(out_size[1], int)
-            out_h, out_w = out_size
-        else:
-            raise TypeError(
-                '"out_size" must be an integer or tuple of integers')
+    def forward(ctx: Any,
+                input: torch.Tensor,
+                rois: torch.Tensor,
+                output_size: Union[int, tuple],
+                spatial_scale: float,
+                sampling_ratio: int = 0,
+                aligned: bool = True,
+                clockwise: bool = False) -> torch.Tensor:
+        ctx.output_size = _pair(output_size)
        ctx.spatial_scale = spatial_scale
-        ctx.sample_num = sample_num
+        ctx.sampling_ratio = sampling_ratio
        ctx.aligned = aligned
        ctx.clockwise = clockwise
        ctx.save_for_backward(rois)
-        ctx.feature_size = features.size()
+        ctx.feature_size = input.size()

-        batch_size, num_channels, data_height, data_width = features.size()
+        batch_size, num_channels, data_height, data_width = input.size()
        num_rois = rois.size(0)

-        output = features.new_zeros(num_rois, num_channels, out_h, out_w)
+        output = input.new_zeros(num_rois, num_channels, ctx.output_size[0],
+                                 ctx.output_size[1])
        ext_module.roi_align_rotated_forward(
-            features,
+            input,
            rois,
            output,
-            pooled_height=out_h,
-            pooled_width=out_w,
-            spatial_scale=spatial_scale,
-            sample_num=sample_num,
-            aligned=aligned,
-            clockwise=clockwise)
+            pooled_height=ctx.output_size[0],
+            pooled_width=ctx.output_size[1],
+            spatial_scale=ctx.spatial_scale,
+            sampling_ratio=ctx.sampling_ratio,
+            aligned=ctx.aligned,
+            clockwise=ctx.clockwise)
        return output

    @staticmethod
-    def backward(ctx, grad_output):
+    def backward(
+        ctx: Any, grad_output: torch.Tensor
+    ) -> Tuple[Optional[torch.Tensor], Optional[torch.Tensor], None, None,
+               None, None, None]:
        feature_size = ctx.feature_size
-        spatial_scale = ctx.spatial_scale
-        aligned = ctx.aligned
-        clockwise = ctx.clockwise
-        sample_num = ctx.sample_num
        rois = ctx.saved_tensors[0]
        assert feature_size is not None
        batch_size, num_channels, data_height, data_width = feature_size
@@ -103,10 +97,10 @@ class RoIAlignRotatedFunction(Function):
                grad_input,
                pooled_height=out_h,
                pooled_width=out_w,
-                spatial_scale=spatial_scale,
-                sample_num=sample_num,
-                aligned=aligned,
-                clockwise=clockwise)
+                spatial_scale=ctx.spatial_scale,
+                sampling_ratio=ctx.sampling_ratio,
+                aligned=ctx.aligned,
+                clockwise=ctx.clockwise)
        return grad_input, grad_rois, None, None, None, None, None


@@ -121,9 +115,9 @@ class RoIAlignRotated(nn.Module):
    w, h, angle). The angle is in radian.

    Args:
-        out_size (tuple): h, w
+        output_size (tuple): h, w
        spatial_scale (float): scale the input boxes by this number
-        sample_num (int): number of inputs samples to take for each
+        sampling_ratio(int): number of inputs samples to take for each
            output sample. 0 to take samples densely for current models.
        aligned (bool): if False, use the legacy implementation in
            MMDetection. If True, align the results more perfectly.
@@ -156,22 +150,37 @@ class RoIAlignRotated(nn.Module):
        performance if ROIAlign is used together with conv layers.
    """

+    @deprecated_api_warning(
+        {
+            'out_size': 'output_size',
+            'sample_num': 'sampling_ratio'
+        },
+        cls_name='RoIAlignRotated')
    def __init__(self,
-                 out_size,
-                 spatial_scale,
-                 sample_num=0,
-                 aligned=True,
-                 clockwise=False):
-        super(RoIAlignRotated, self).__init__()
-
-        self.out_size = out_size
+                 output_size: Union[int, tuple],
+                 spatial_scale: float,
+                 sampling_ratio: int = 0,
+                 aligned: bool = True,
+                 clockwise: bool = False):
+        super().__init__()
+
+        self.output_size = _pair(output_size)
        self.spatial_scale = float(spatial_scale)
-        self.sample_num = int(sample_num)
+        self.sampling_ratio = int(sampling_ratio)
        self.aligned = aligned
        self.clockwise = clockwise

-    def forward(self, features, rois):
-        return RoIAlignRotatedFunction.apply(features, rois, self.out_size,
+    def forward(self, input: torch.Tensor, rois: torch.Tensor) -> torch.Tensor:
+        return RoIAlignRotatedFunction.apply(input, rois, self.output_size,
                                             self.spatial_scale,
-                                             self.sample_num, self.aligned,
+                                             self.sampling_ratio, self.aligned,
                                             self.clockwise)
+
+    def __repr__(self):
+        s = self.__class__.__name__
+        s += f'(output_size={self.output_size}, '
+        s += f'spatial_scale={self.spatial_scale}, '
+        s += f'sampling_ratio={self.sampling_ratio}, '
+        s += f'aligned={self.aligned}, '
+        s += f'clockwise={self.clockwise})'
+        return s
--- a/mmcv/ops/roi_pool.py
+++ b/mmcv/ops/roi_pool.py
 # Copyright (c) OpenMMLab. All rights reserved.
+from typing import Any, Tuple, Union
+
 import torch
 import torch.nn as nn
 from torch.autograd import Function
@@ -23,7 +25,11 @@ class RoIPoolFunction(Function):
            spatial_scale_f=spatial_scale)

    @staticmethod
-    def forward(ctx, input, rois, output_size, spatial_scale=1.0):
+    def forward(ctx: Any,
+                input: torch.Tensor,
+                rois: torch.Tensor,
+                output_size: Union[int, tuple],
+                spatial_scale: float = 1.0) -> torch.Tensor:
        ctx.output_size = _pair(output_size)
        ctx.spatial_scale = spatial_scale
        ctx.input_shape = input.size()
@@ -49,7 +55,9 @@ class RoIPoolFunction(Function):

    @staticmethod
    @once_differentiable
-    def backward(ctx, grad_output):
+    def backward(
+            ctx: Any, grad_output: torch.Tensor
+    ) -> Tuple[torch.Tensor, None, None, None]:
        rois, argmax = ctx.saved_tensors
        grad_input = grad_output.new_zeros(ctx.input_shape)

@@ -70,13 +78,15 @@ roi_pool = RoIPoolFunction.apply

 class RoIPool(nn.Module):

-    def __init__(self, output_size, spatial_scale=1.0):
-        super(RoIPool, self).__init__()
+    def __init__(self,
+                 output_size: Union[int, tuple],
+                 spatial_scale: float = 1.0):
+        super().__init__()

        self.output_size = _pair(output_size)
        self.spatial_scale = float(spatial_scale)

-    def forward(self, input, rois):
+    def forward(self, input: torch.Tensor, rois: torch.Tensor) -> torch.Tensor:
        return roi_pool(input, rois, self.output_size, self.spatial_scale)

    def __repr__(self):

--- a/mmcv/ops/roiaware_pool3d.py
+++ b/mmcv/ops/roiaware_pool3d.py
 # Copyright (c) OpenMMLab. All rights reserved.
+from typing import Any, Tuple, Union
+
 import torch
 from torch import nn as nn
 from torch.autograd import Function
@@ -25,7 +27,10 @@ class RoIAwarePool3d(nn.Module):
            Default: 'max'.
    """

-    def __init__(self, out_size, max_pts_per_voxel=128, mode='max'):
+    def __init__(self,
+                 out_size: Union[int, tuple],
+                 max_pts_per_voxel: int = 128,
+                 mode: str = 'max'):
        super().__init__()

        self.out_size = out_size
@@ -34,7 +39,8 @@ class RoIAwarePool3d(nn.Module):
        pool_mapping = {'max': 0, 'avg': 1}
        self.mode = pool_mapping[mode]

-    def forward(self, rois, pts, pts_feature):
+    def forward(self, rois: torch.Tensor, pts: torch.Tensor,
+                pts_feature: torch.Tensor) -> torch.Tensor:
        """
        Args:
            rois (torch.Tensor): [N, 7], in LiDAR coordinate,
@@ -43,7 +49,8 @@ class RoIAwarePool3d(nn.Module):
            pts_feature (torch.Tensor): [npoints, C], features of input points.

        Returns:
-            pooled_features (torch.Tensor): [N, out_x, out_y, out_z, C]
+            torch.Tensor: Pooled features whose shape is
+            [N, out_x, out_y, out_z, C].
        """

        return RoIAwarePool3dFunction.apply(rois, pts, pts_feature,
@@ -54,8 +61,9 @@ class RoIAwarePool3d(nn.Module):
 class RoIAwarePool3dFunction(Function):

    @staticmethod
-    def forward(ctx, rois, pts, pts_feature, out_size, max_pts_per_voxel,
-                mode):
+    def forward(ctx: Any, rois: torch.Tensor, pts: torch.Tensor,
+                pts_feature: torch.Tensor, out_size: Union[int, tuple],
+                max_pts_per_voxel: int, mode: int) -> torch.Tensor:
        """
        Args:
            rois (torch.Tensor): [N, 7], in LiDAR coordinate,
@@ -70,8 +78,8 @@ class RoIAwarePool3dFunction(Function):
                pool).

        Returns:
-            pooled_features (torch.Tensor): [N, out_x, out_y, out_z, C], output
-                pooled features.
+            torch.Tensor: Pooled features whose shape is
+            [N, out_x, out_y, out_z, C].
        """

        if isinstance(out_size, int):
@@ -107,7 +115,9 @@ class RoIAwarePool3dFunction(Function):
        return pooled_features

    @staticmethod
-    def backward(ctx, grad_out):
+    def backward(
+        ctx: Any, grad_out: torch.Tensor
+    ) -> Tuple[None, None, torch.Tensor, None, None, None]:
        ret = ctx.roiaware_pool3d_for_backward
        pts_idx_of_voxels, argmax, mode, num_pts, num_channels = ret


--- a/mmcv/ops/roipoint_pool3d.py
+++ b/mmcv/ops/roipoint_pool3d.py
+from typing import Any, Tuple
+
+import torch
 from torch import nn as nn
 from torch.autograd import Function

@@ -17,11 +20,12 @@ class RoIPointPool3d(nn.Module):
            Default: 512.
    """

-    def __init__(self, num_sampled_points=512):
+    def __init__(self, num_sampled_points: int = 512):
        super().__init__()
        self.num_sampled_points = num_sampled_points

-    def forward(self, points, point_features, boxes3d):
+    def forward(self, points: torch.Tensor, point_features: torch.Tensor,
+                boxes3d: torch.Tensor) -> Tuple[torch.Tensor]:
        """
        Args:
            points (torch.Tensor): Input points whose shape is (B, N, C).
@@ -30,9 +34,9 @@ class RoIPointPool3d(nn.Module):
            boxes3d (B, M, 7), Input bounding boxes whose shape is (B, M, 7).

        Returns:
-            pooled_features (torch.Tensor): The output pooled features whose
-                shape is (B, M, 512, 3 + C).
-            pooled_empty_flag (torch.Tensor): Empty flag whose shape is (B, M).
+            tuple[torch.Tensor]: A tuple contains two elements. The first one
+            is the pooled features whose shape is (B, M, 512, 3 + C). The
+            second is an empty flag whose shape is (B, M).
        """
        return RoIPointPool3dFunction.apply(points, point_features, boxes3d,
                                            self.num_sampled_points)
@@ -41,7 +45,13 @@ class RoIPointPool3d(nn.Module):
 class RoIPointPool3dFunction(Function):

    @staticmethod
-    def forward(ctx, points, point_features, boxes3d, num_sampled_points=512):
+    def forward(
+            ctx: Any,
+            points: torch.Tensor,
+            point_features: torch.Tensor,
+            boxes3d: torch.Tensor,
+            num_sampled_points: int = 512
+    ) -> Tuple[torch.Tensor, torch.Tensor]:
        """
        Args:
            points (torch.Tensor): Input points whose shape is (B, N, C).
@@ -52,9 +62,9 @@ class RoIPointPool3dFunction(Function):
                Default: 512.

        Returns:
-            pooled_features (torch.Tensor): The output pooled features whose
-                shape is (B, M, 512, 3 + C).
-            pooled_empty_flag (torch.Tensor): Empty flag whose shape is (B, M).
+            tuple[torch.Tensor]: A tuple contains two elements. The first one
+            is the pooled features whose shape is (B, M, 512, 3 + C). The
+            second is an empty flag whose shape is (B, M).
        """
        assert len(points.shape) == 3 and points.shape[2] == 3
        batch_size, boxes_num, feature_len = points.shape[0], boxes3d.shape[
@@ -73,5 +83,5 @@ class RoIPointPool3dFunction(Function):
        return pooled_features, pooled_empty_flag

    @staticmethod
-    def backward(ctx, grad_out):
+    def backward(ctx: Any, grad_out: torch.Tensor) -> torch.Tensor:
        raise NotImplementedError
--- a/mmcv/ops/rotated_feature_align.py
+++ b/mmcv/ops/rotated_feature_align.py
+# Copyright (c) OpenMMLab. All rights reserved.
+from typing import Any
+
+import torch
+from torch.autograd import Function
+from torch.autograd.function import once_differentiable
+
+from ..utils import ext_loader
+
+ext_module = ext_loader.load_ext(
+    '_ext',
+    ['rotated_feature_align_forward', 'rotated_feature_align_backward'])
+
+
+class RotatedFeatureAlignFunction(Function):
+    """Using the feature interpolation to obtain the position information
+    correspond to the refined rotate anchors and reconstruct the feature maps
+    in pixel-wise manner to achieve feature alignment.
+
+    The details are described in the paper
+    `R3Det: Refined Single-Stage Detector with Feature Refinement for Rotating
+    Object <https://arxiv.org/abs/1908.05612>`_.
+    """
+
+    @staticmethod
+    def symbolic(g, features, best_rbboxes, spatial_scale, points):
+        assert points in [1, 5]
+        return g.op(
+            'mmcv::MMCVRotatedFeatureAlign',
+            features,
+            best_rbboxes,
+            spatial_scale_f=spatial_scale,
+            points_i=points)
+
+    @staticmethod
+    def forward(ctx: Any, features: torch.Tensor, best_rbboxes: torch.Tensor,
+                spatial_scale: float, points: int) -> torch.Tensor:
+        """
+        Args:
+            features (torch.Tensor): Input features with shape [N,C,H,W].
+            best_rbboxes (torch.Tensor): Refined rotate anchors with
+                shape [N,H,W,5]. Coordinate format (cx,cx,h,w,a).
+            spatial_scale (float): The scale of feature map size and
+                input image size.
+            points (int, optional): The number of sample points.
+                Only 1 and 5 are supported. Defaults to 1.
+
+        Returns:
+            torch.Tensor: Refined features with shape [N,C,H,W].
+        """
+        ctx.spatial_scale = spatial_scale
+        ctx.points = points
+        ctx.save_for_backward(best_rbboxes)
+        assert points in [1, 5]
+        output = torch.zeros_like(features)
+        ext_module.rotated_feature_align_forward(
+            features,
+            best_rbboxes,
+            output,
+            spatial_scale=spatial_scale,
+            points=points)
+        return output
+
+    @staticmethod
+    @once_differentiable
+    def backward(ctx: Any, grad_output: torch.Tensor) -> tuple:
+        """
+        Args:
+            grad_output (torch.Tensor): The gradiant of output features
+                with shape [N,C,H,W].
+
+        Returns:
+            torch.Tensor: The gradiant of input features with shape [N,C,H,W].
+        """
+        best_rbboxes = ctx.saved_tensors[0]
+        points = ctx.points
+        spatial_scale = ctx.spatial_scale
+        grad_input = None
+        if ctx.needs_input_grad[0]:
+            grad_input = torch.zeros_like(grad_output)
+            ext_module.rotated_feature_align_backward(
+                grad_output.contiguous(),
+                best_rbboxes,
+                grad_input,
+                spatial_scale=spatial_scale,
+                points=points)
+        return grad_input, None, None, None
+
+
+def rotated_feature_align(features: torch.Tensor,
+                          best_rbboxes: torch.Tensor,
+                          spatial_scale: float = 1 / 8,
+                          points: int = 1) -> torch.Tensor:
+    return RotatedFeatureAlignFunction.apply(features, best_rbboxes,
+                                             spatial_scale, points)
--- a/mmcv/ops/saconv.py
+++ b/mmcv/ops/saconv.py
@@ -12,8 +12,9 @@ from mmcv.utils import TORCH_VERSION, digit_version
 class SAConv2d(ConvAWS2d):
    """SAC (Switchable Atrous Convolution)

-    This is an implementation of SAC in DetectoRS
-    (https://arxiv.org/pdf/2006.02334.pdf).
+    This is an implementation of `DetectoRS: Detecting Objects with Recursive
+    Feature Pyramid and Switchable Atrous Convolution
+    <https://arxiv.org/abs/2006.02334>`_.

    Args:
        in_channels (int): Number of channels in the input image

--- a/mmcv/ops/scatter_points.py
+++ b/mmcv/ops/scatter_points.py
 # Copyright (c) OpenMMLab. All rights reserved.
+from typing import Any, List, Optional, Tuple
+
 import torch
+import torch.nn.functional as F
 from torch import nn
 from torch.autograd import Function

@@ -13,7 +16,10 @@ ext_module = ext_loader.load_ext(
 class _DynamicScatter(Function):

    @staticmethod
-    def forward(ctx, feats, coors, reduce_type='max'):
+    def forward(ctx: Any,
+                feats: torch.Tensor,
+                coors: torch.Tensor,
+                reduce_type: str = 'max') -> Tuple[torch.Tensor, torch.Tensor]:
        """convert kitti points(N, >=3) to voxels.

        Args:
@@ -25,10 +31,10 @@ class _DynamicScatter(Function):
                'mean'. Default: 'max'.

        Returns:
-            voxel_feats (torch.Tensor): [M, C]. Reduced features, input
-                features that shares the same voxel coordinates are reduced to
-                one row.
-            voxel_coors (torch.Tensor): [M, ndim]. Voxel coordinates.
+            tuple[torch.Tensor]: A tuple contains two elements. The first one
+            is the voxel features with shape [M, C] which are respectively
+            reduced from input features that share the same voxel coordinates.
+            The second is voxel coordinates with shape [M, ndim].
        """
        results = ext_module.dynamic_point_to_voxel_forward(
            feats, coors, reduce_type)
@@ -41,7 +47,9 @@ class _DynamicScatter(Function):
        return voxel_feats, voxel_coors

    @staticmethod
-    def backward(ctx, grad_voxel_feats, grad_voxel_coors=None):
+    def backward(ctx: Any,
+                 grad_voxel_feats: torch.Tensor,
+                 grad_voxel_coors: Optional[torch.Tensor] = None) -> tuple:
        (feats, voxel_feats, point2voxel_map,
         voxel_points_count) = ctx.saved_tensors
        grad_feats = torch.zeros_like(feats)
@@ -72,14 +80,17 @@ class DynamicScatter(nn.Module):
            into voxel.
    """

-    def __init__(self, voxel_size, point_cloud_range, average_points: bool):
+    def __init__(self, voxel_size: List, point_cloud_range: List,
+                 average_points: bool):
        super().__init__()

        self.voxel_size = voxel_size
        self.point_cloud_range = point_cloud_range
        self.average_points = average_points

-    def forward_single(self, points, coors):
+    def forward_single(
+            self, points: torch.Tensor,
+            coors: torch.Tensor) -> Tuple[torch.Tensor, torch.Tensor]:
        """Scatters points into voxels.

        Args:
@@ -88,14 +99,16 @@ class DynamicScatter(nn.Module):
                multi-dim voxel index) of each points.

        Returns:
-            voxel_feats (torch.Tensor): Reduced features, input features that
-                shares the same voxel coordinates are reduced to one row.
-            voxel_coors (torch.Tensor): Voxel coordinates.
+            tuple[torch.Tensor]: A tuple contains two elements. The first one
+            is the voxel features with shape [M, C] which are respectively
+            reduced from input features that share the same voxel coordinates.
+            The second is voxel coordinates with shape [M, ndim].
        """
        reduce = 'mean' if self.average_points else 'max'
        return dynamic_scatter(points.contiguous(), coors.contiguous(), reduce)

-    def forward(self, points, coors):
+    def forward(self, points: torch.Tensor,
+                coors: torch.Tensor) -> Tuple[torch.Tensor, torch.Tensor]:
        """Scatters points/features into voxels.

        Args:
@@ -104,9 +117,10 @@ class DynamicScatter(nn.Module):
                multi-dim voxel index) of each points.

        Returns:
-            voxel_feats (torch.Tensor): Reduced features, input features that
-                shares the same voxel coordinates are reduced to one row.
-            voxel_coors (torch.Tensor): Voxel coordinates.
+            tuple[torch.Tensor]: A tuple contains two elements. The first one
+            is the voxel features with shape [M, C] which are respectively
+            reduced from input features that share the same voxel coordinates.
+            The second is voxel coordinates with shape [M, ndim].
        """
        if coors.size(-1) == 3:
            return self.forward_single(points, coors)
@@ -117,8 +131,7 @@ class DynamicScatter(nn.Module):
                inds = torch.where(coors[:, 0] == i)
                voxel, voxel_coor = self.forward_single(
                    points[inds], coors[inds][:, 1:])
-                coor_pad = nn.functional.pad(
-                    voxel_coor, (1, 0), mode='constant', value=i)
+                coor_pad = F.pad(voxel_coor, (1, 0), mode='constant', value=i)
                voxel_coors.append(coor_pad)
                voxels.append(voxel)
            features = torch.cat(voxels, dim=0)

--- a/mmcv/ops/sparse_conv.py
+++ b/mmcv/ops/sparse_conv.py
+# Copyright 2019 Yan Yan
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+import math
+
+import numpy as np
+import torch
+from torch.nn import init
+from torch.nn.parameter import Parameter
+
+from ..cnn import CONV_LAYERS
+from . import sparse_functional as Fsp
+from . import sparse_ops as ops
+from .sparse_modules import SparseModule
+from .sparse_structure import SparseConvTensor
+
+
+def _calculate_fan_in_and_fan_out_hwio(tensor):
+    dimensions = tensor.ndimension()
+    if dimensions < 2:
+        raise ValueError('fan in and fan out can not be computed for tensor'
+                         'with fewer than 2 dimensions')
+
+    if dimensions == 2:  # Linear
+        fan_in = tensor.size(-2)
+        fan_out = tensor.size(-1)
+    else:
+        num_input_fmaps = tensor.size(-2)
+        num_output_fmaps = tensor.size(-1)
+        receptive_field_size = 1
+        if tensor.dim() > 2:
+            receptive_field_size = tensor[..., 0, 0].numel()
+        fan_in = num_input_fmaps * receptive_field_size
+        fan_out = num_output_fmaps * receptive_field_size
+
+    return fan_in, fan_out
+
+
+class SparseConvolution(SparseModule):
+
+    def __init__(self,
+                 ndim,
+                 in_channels,
+                 out_channels,
+                 kernel_size=3,
+                 stride=1,
+                 padding=0,
+                 dilation=1,
+                 groups=1,
+                 bias=True,
+                 subm=False,
+                 output_padding=0,
+                 transposed=False,
+                 inverse=False,
+                 indice_key=None,
+                 fused_bn=False):
+        super().__init__()
+        assert groups == 1
+        if not isinstance(kernel_size, (list, tuple)):
+            kernel_size = [kernel_size] * ndim
+        if not isinstance(stride, (list, tuple)):
+            stride = [stride] * ndim
+        if not isinstance(padding, (list, tuple)):
+            padding = [padding] * ndim
+        if not isinstance(dilation, (list, tuple)):
+            dilation = [dilation] * ndim
+        if not isinstance(output_padding, (list, tuple)):
+            output_padding = [output_padding] * ndim
+
+        for d, s in zip(dilation, stride):
+            assert any([s == 1, d == 1]), "don't support this."
+
+        self.ndim = ndim
+        self.in_channels = in_channels
+        self.out_channels = out_channels
+        self.kernel_size = kernel_size
+        self.conv1x1 = np.prod(kernel_size) == 1
+        self.stride = stride
+        self.padding = padding
+        self.dilation = dilation
+        self.transposed = transposed
+        self.inverse = inverse
+        self.output_padding = output_padding
+        self.groups = groups
+        self.subm = subm
+        self.indice_key = indice_key
+        self.fused_bn = fused_bn
+
+        self.weight = Parameter(
+            torch.Tensor(*kernel_size, in_channels, out_channels))
+        if bias:
+            self.bias = Parameter(torch.Tensor(out_channels))
+        else:
+            self.register_parameter('bias', None)
+        self.reset_parameters()
+
+    def reset_parameters(self):
+        init.kaiming_uniform_(self.weight, a=math.sqrt(5))
+        if self.bias is not None:
+            fan_in, _ = _calculate_fan_in_and_fan_out_hwio(self.weight)
+            bound = 1 / math.sqrt(fan_in)
+            init.uniform_(self.bias, -bound, bound)
+
+    def forward(self, input):
+        assert isinstance(input, SparseConvTensor)
+        features = input.features
+        device = features.device
+        indices = input.indices
+        spatial_shape = input.spatial_shape
+        batch_size = input.batch_size
+        if not self.subm:
+            if self.transposed:
+                out_spatial_shape = ops.get_deconv_output_size(
+                    spatial_shape, self.kernel_size, self.stride, self.padding,
+                    self.dilation, self.output_padding)
+            else:
+                out_spatial_shape = ops.get_conv_output_size(
+                    spatial_shape, self.kernel_size, self.stride, self.padding,
+                    self.dilation)
+
+        else:
+            out_spatial_shape = spatial_shape
+
+        if self.conv1x1:
+            features = torch.mm(
+                input.features,
+                self.weight.view(self.in_channels, self.out_channels))
+            if self.bias is not None:
+                features += self.bias
+            out_tensor = SparseConvTensor(features, input.indices,
+                                          input.spatial_shape,
+                                          input.batch_size)
+            out_tensor.indice_dict = input.indice_dict
+            out_tensor.grid = input.grid
+            return out_tensor
+        data = input.find_indice_pair(self.indice_key)
+        if self.inverse:
+            assert data is not None and self.indice_key is not None
+            _, outids, indice_pairs, indice_pair_num, out_spatial_shape = data
+            assert indice_pairs.shape[0] == np.prod(
+                self.kernel_size
+            ), 'inverse conv must have same kernel size as its couple conv'
+        else:
+            if self.indice_key is not None and data is not None:
+                outids, _, indice_pairs, indice_pair_num, _ = data
+            else:
+                outids, indice_pairs, indice_pair_num = ops.get_indice_pairs(
+                    indices,
+                    batch_size,
+                    spatial_shape,
+                    self.kernel_size,
+                    self.stride,
+                    self.padding,
+                    self.dilation,
+                    self.output_padding,
+                    self.subm,
+                    self.transposed,
+                    grid=input.grid)
+                input.indice_dict[self.indice_key] = (outids, indices,
+                                                      indice_pairs,
+                                                      indice_pair_num,
+                                                      spatial_shape)
+        if self.fused_bn:
+            assert self.bias is not None
+            out_features = ops.fused_indice_conv(features, self.weight,
+                                                 self.bias,
+                                                 indice_pairs.to(device),
+                                                 indice_pair_num,
+                                                 outids.shape[0], self.inverse,
+                                                 self.subm)
+        else:
+            if self.subm:
+                out_features = Fsp.indice_subm_conv(features, self.weight,
+                                                    indice_pairs.to(device),
+                                                    indice_pair_num,
+                                                    outids.shape[0])
+            else:
+                if self.inverse:
+                    out_features = Fsp.indice_inverse_conv(
+                        features, self.weight, indice_pairs.to(device),
+                        indice_pair_num, outids.shape[0])
+                else:
+                    out_features = Fsp.indice_conv(features, self.weight,
+                                                   indice_pairs.to(device),
+                                                   indice_pair_num,
+                                                   outids.shape[0])
+
+            if self.bias is not None:
+                out_features += self.bias
+        out_tensor = SparseConvTensor(out_features, outids, out_spatial_shape,
+                                      batch_size)
+        out_tensor.indice_dict = input.indice_dict
+        out_tensor.grid = input.grid
+        return out_tensor
+
+
+@CONV_LAYERS.register_module()
+class SparseConv2d(SparseConvolution):
+
+    def __init__(self,
+                 in_channels,
+                 out_channels,
+                 kernel_size,
+                 stride=1,
+                 padding=0,
+                 dilation=1,
+                 groups=1,
+                 bias=True,
+                 indice_key=None):
+        super().__init__(
+            2,
+            in_channels,
+            out_channels,
+            kernel_size,
+            stride,
+            padding,
+            dilation,
+            groups,
+            bias,
+            indice_key=indice_key)
+
+
+@CONV_LAYERS.register_module()
+class SparseConv3d(SparseConvolution):
+
+    def __init__(self,
+                 in_channels,
+                 out_channels,
+                 kernel_size,
+                 stride=1,
+                 padding=0,
+                 dilation=1,
+                 groups=1,
+                 bias=True,
+                 indice_key=None):
+        super().__init__(
+            3,
+            in_channels,
+            out_channels,
+            kernel_size,
+            stride,
+            padding,
+            dilation,
+            groups,
+            bias,
+            indice_key=indice_key)
+
+
+@CONV_LAYERS.register_module()
+class SparseConv4d(SparseConvolution):
+
+    def __init__(self,
+                 in_channels,
+                 out_channels,
+                 kernel_size,
+                 stride=1,
+                 padding=0,
+                 dilation=1,
+                 groups=1,
+                 bias=True,
+                 indice_key=None):
+        super().__init__(
+            4,
+            in_channels,
+            out_channels,
+            kernel_size,
+            stride,
+            padding,
+            dilation,
+            groups,
+            bias,
+            indice_key=indice_key)
+
+
+@CONV_LAYERS.register_module()
+class SparseConvTranspose2d(SparseConvolution):
+
+    def __init__(self,
+                 in_channels,
+                 out_channels,
+                 kernel_size,
+                 stride=1,
+                 padding=0,
+                 dilation=1,
+                 groups=1,
+                 bias=True,
+                 indice_key=None):
+        super().__init__(
+            2,
+            in_channels,
+            out_channels,
+            kernel_size,
+            stride,
+            padding,
+            dilation,
+            groups,
+            bias,
+            transposed=True,
+            indice_key=indice_key)
+
+
+@CONV_LAYERS.register_module()
+class SparseConvTranspose3d(SparseConvolution):
+
+    def __init__(self,
+                 in_channels,
+                 out_channels,
+                 kernel_size,
+                 stride=1,
+                 padding=0,
+                 dilation=1,
+                 groups=1,
+                 bias=True,
+                 indice_key=None):
+        super().__init__(
+            3,
+            in_channels,
+            out_channels,
+            kernel_size,
+            stride,
+            padding,
+            dilation,
+            groups,
+            bias,
+            transposed=True,
+            indice_key=indice_key)
+
+
+@CONV_LAYERS.register_module()
+class SparseInverseConv2d(SparseConvolution):
+
+    def __init__(self,
+                 in_channels,
+                 out_channels,
+                 kernel_size,
+                 indice_key=None,
+                 bias=True):
+        super().__init__(
+            2,
+            in_channels,
+            out_channels,
+            kernel_size,
+            bias=bias,
+            inverse=True,
+            indice_key=indice_key)
+
+
+@CONV_LAYERS.register_module()
+class SparseInverseConv3d(SparseConvolution):
+
+    def __init__(self,
+                 in_channels,
+                 out_channels,
+                 kernel_size,
+                 indice_key=None,
+                 bias=True):
+        super().__init__(
+            3,
+            in_channels,
+            out_channels,
+            kernel_size,
+            bias=bias,
+            inverse=True,
+            indice_key=indice_key)
+
+
+@CONV_LAYERS.register_module()
+class SubMConv2d(SparseConvolution):
+
+    def __init__(self,
+                 in_channels,
+                 out_channels,
+                 kernel_size,
+                 stride=1,
+                 padding=0,
+                 dilation=1,
+                 groups=1,
+                 bias=True,
+                 indice_key=None):
+        super().__init__(
+            2,
+            in_channels,
+            out_channels,
+            kernel_size,
+            stride,
+            padding,
+            dilation,
+            groups,
+            bias,
+            True,
+            indice_key=indice_key)
+
+
+@CONV_LAYERS.register_module()
+class SubMConv3d(SparseConvolution):
+
+    def __init__(self,
+                 in_channels,
+                 out_channels,
+                 kernel_size,
+                 stride=1,
+                 padding=0,
+                 dilation=1,
+                 groups=1,
+                 bias=True,
+                 indice_key=None):
+        super().__init__(
+            3,
+            in_channels,
+            out_channels,
+            kernel_size,
+            stride,
+            padding,
+            dilation,
+            groups,
+            bias,
+            True,
+            indice_key=indice_key)
+
+
+@CONV_LAYERS.register_module()
+class SubMConv4d(SparseConvolution):
+
+    def __init__(self,
+                 in_channels,
+                 out_channels,
+                 kernel_size,
+                 stride=1,
+                 padding=0,
+                 dilation=1,
+                 groups=1,
+                 bias=True,
+                 indice_key=None):
+        super().__init__(
+            4,
+            in_channels,
+            out_channels,
+            kernel_size,
+            stride,
+            padding,
+            dilation,
+            groups,
+            bias,
+            True,
+            indice_key=indice_key)
--- a/mmcv/ops/sparse_functional.py
+++ b/mmcv/ops/sparse_functional.py
+# Copyright 2019 Yan Yan
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+from typing import Any
+
+import torch
+from torch.autograd import Function
+
+from . import sparse_ops as ops
+
+
+class SparseConvFunction(Function):
+    """Sparse Convolution.
+
+    Please refer to `SECOND <https://www.mdpi.com/1424-8220/18/10/3337>`_ for
+    more details.
+    """
+
+    @staticmethod
+    def forward(ctx: Any, features: torch.Tensor, filters: torch.nn.Parameter,
+                indice_pairs: torch.Tensor, indice_pair_num: torch.Tensor,
+                num_activate_out: torch.Tensor) -> torch.Tensor:
+        """
+        Args:
+            features (torch.Tensor): Features that needs to convolute.
+            filters (torch.nn.parameter.Parameter): Convolution filters.
+            indice_pairs (torch.Tensor): Indice pairs between inputs locations
+                and outputs locations.
+            indice_pair_num (torch.Tensor): Indice pairs num.
+            num_activate_out (torch.Tensor): Output channels num.
+
+        Returns:
+            torch.Tensor: Output features from gather-gemm-scatter.
+        """
+        ctx.save_for_backward(indice_pairs, indice_pair_num, features, filters)
+        return ops.indice_conv(features, filters, indice_pairs,
+                               indice_pair_num, num_activate_out, False)
+
+    @staticmethod
+    def backward(ctx: Any, grad_output: torch.Tensor) -> tuple:
+        indice_pairs, indice_pair_num, features, filters = ctx.saved_tensors
+        input_bp, filters_bp = ops.indice_conv_backward(
+            features, filters, grad_output, indice_pairs, indice_pair_num,
+            False)
+
+        return input_bp, filters_bp, None, None, None
+
+
+class SparseInverseConvFunction(Function):
+
+    @staticmethod
+    def forward(ctx: Any, features: torch.Tensor, filters: torch.nn.Parameter,
+                indice_pairs: torch.Tensor, indice_pair_num: torch.Tensor,
+                num_activate_out: torch.Tensor) -> torch.Tensor:
+        """
+        Args:
+            features (torch.Tensor): Features that needs to convolute.
+            filters (torch.nn.parameter.Parameter): Convolution filters.
+            indice_pairs (torch.Tensor): Indice pairs between inputs locations
+                and outputs locations.
+            indice_pair_num (torch.Tensor): Indice pairs num.
+            num_activate_out (torch.Tensor): Output channels num.
+
+        Returns:
+            torch.Tensor: Output features from gather-gemm-scatter.
+        """
+        ctx.save_for_backward(indice_pairs, indice_pair_num, features, filters)
+        return ops.indice_conv(features, filters, indice_pairs,
+                               indice_pair_num, num_activate_out, True, False)
+
+    @staticmethod
+    def backward(ctx: Any, grad_output: torch.Tensor) -> tuple:
+        indice_pairs, indice_pair_num, features, filters = ctx.saved_tensors
+        input_bp, filters_bp = ops.indice_conv_backward(
+            features, filters, grad_output, indice_pairs, indice_pair_num,
+            True, False)
+
+        return input_bp, filters_bp, None, None, None
+
+
+class SubMConvFunction(Function):
+
+    @staticmethod
+    def forward(ctx: Any, features: torch.Tensor, filters: torch.nn.Parameter,
+                indice_pairs: torch.Tensor, indice_pair_num: torch.Tensor,
+                num_activate_out: torch.Tensor) -> torch.Tensor:
+        """
+        Args:
+            features (torch.Tensor): Features that needs to convolute.
+            filters (torch.nn.parameter.Parameter): Convolution filters.
+            indice_pairs (torch.Tensor): Indice pairs between inputs locations
+                and outputs locations.
+            indice_pair_num (torch.Tensor): Indice pairs num.
+            num_activate_out (torch.Tensor): Output channels num.
+
+        Returns:
+            torch.Tensor: Output features from gather-gemm-scatter.
+        """
+        ctx.save_for_backward(indice_pairs, indice_pair_num, features, filters)
+        return ops.indice_conv(features, filters, indice_pairs,
+                               indice_pair_num, num_activate_out, False, True)
+
+    @staticmethod
+    def backward(ctx: Any, grad_output: torch.Tensor) -> tuple:
+        indice_pairs, indice_pair_num, features, filters = ctx.saved_tensors
+        input_bp, filters_bp = ops.indice_conv_backward(
+            features, filters, grad_output, indice_pairs, indice_pair_num,
+            False, True)
+
+        return input_bp, filters_bp, None, None, None
+
+
+class SparseMaxPoolFunction(Function):
+
+    @staticmethod
+    def forward(ctx, features: torch.Tensor, indice_pairs: torch.Tensor,
+                indice_pair_num: torch.Tensor,
+                num_activate_out: torch.Tensor) -> torch.Tensor:
+        """
+        Args:
+            features (torch.Tensor): Features that needs to convolute.
+            indice_pairs (torch.Tensor): Indice pairs between inputs locations
+                and outputs locations.
+            indice_pair_num (torch.Tensor): Indice pairs num.
+            num_activate_out (torch.Tensor): Output channels num.
+
+        Returns:
+            torch.Tensor: Output features from sparse maxpooling.
+        """
+        out = ops.indice_maxpool(features, indice_pairs, indice_pair_num,
+                                 num_activate_out)
+        ctx.save_for_backward(indice_pairs, indice_pair_num, features, out)
+        return out
+
+    @staticmethod
+    def backward(ctx: Any, grad_output: torch.Tensor) -> tuple:
+        indice_pairs, indice_pair_num, features, out = ctx.saved_tensors
+        input_bp = ops.indice_maxpool_backward(features, out, grad_output,
+                                               indice_pairs, indice_pair_num)
+        return input_bp, None, None, None
+
+
+indice_conv = SparseConvFunction.apply
+indice_inverse_conv = SparseInverseConvFunction.apply
+indice_subm_conv = SubMConvFunction.apply
+indice_maxpool = SparseMaxPoolFunction.apply
--- a/mmcv/ops/sparse_modules.py
+++ b/mmcv/ops/sparse_modules.py
+# Copyright 2019 Yan Yan
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+import sys
+from collections import OrderedDict
+from typing import Any, List, Optional, Union
+
+import torch
+from torch import nn
+
+from .sparse_structure import SparseConvTensor
+
+
+def is_spconv_module(module: nn.Module) -> bool:
+    spconv_modules = (SparseModule, )
+    return isinstance(module, spconv_modules)
+
+
+def is_sparse_conv(module: nn.Module) -> bool:
+    from .sparse_conv import SparseConvolution
+    return isinstance(module, SparseConvolution)
+
+
+def _mean_update(vals: Union[int, List], m_vals: Union[int, List],
+                 t: float) -> List:
+    outputs = []
+    if not isinstance(vals, list):
+        vals = [vals]
+    if not isinstance(m_vals, list):
+        m_vals = [m_vals]
+    for val, m_val in zip(vals, m_vals):
+        output = t / float(t + 1) * m_val + 1 / float(t + 1) * val
+        outputs.append(output)
+    if len(outputs) == 1:
+        outputs = outputs[0]
+    return outputs
+
+
+class SparseModule(nn.Module):
+    """place holder, All module subclass from this will take sptensor in
+    SparseSequential."""
+    pass
+
+
+class SparseSequential(SparseModule):
+    r"""A sequential container.
+    Modules will be added to it in the order they are passed in the
+    constructor.
+    Alternatively, an ordered dict of modules can also be passed in.
+
+    To make it easier to understand, given is a small example::
+
+    Example:
+        >>> # using Sequential:
+        >>> from mmcv.ops import SparseSequential
+        >>> model = SparseSequential(
+                    SparseConv2d(1,20,5),
+                    nn.ReLU(),
+                    SparseConv2d(20,64,5),
+                    nn.ReLU()
+                    )
+
+        >>> # using Sequential with OrderedDict
+        >>> model = SparseSequential(OrderedDict([
+                      ('conv1', SparseConv2d(1,20,5)),
+                      ('relu1', nn.ReLU()),
+                      ('conv2', SparseConv2d(20,64,5)),
+                      ('relu2', nn.ReLU())
+                    ]))
+
+        >>> # using Sequential with kwargs(python 3.6+)
+        >>> model = SparseSequential(
+                      conv1=SparseConv2d(1,20,5),
+                      relu1=nn.ReLU(),
+                      conv2=SparseConv2d(20,64,5),
+                      relu2=nn.ReLU()
+                    )
+    """
+
+    def __init__(self, *args, **kwargs):
+        super().__init__()
+        if len(args) == 1 and isinstance(args[0], OrderedDict):
+            for key, module in args[0].items():
+                self.add_module(key, module)
+        else:
+            for idx, module in enumerate(args):
+                self.add_module(str(idx), module)
+        for name, module in kwargs.items():
+            if sys.version_info < (3, 6):
+                raise ValueError('kwargs only supported in py36+')
+            if name in self._modules:
+                raise ValueError('name exists.')
+            self.add_module(name, module)
+        self._sparity_dict = {}
+
+    def __getitem__(self, idx: int) -> torch.Tensor:
+        if not (-len(self) <= idx < len(self)):
+            raise IndexError(f'index {idx} is out of range')
+        if idx < 0:
+            idx += len(self)
+        it = iter(self._modules.values())
+        for i in range(idx):
+            next(it)
+        return next(it)
+
+    def __len__(self):
+        return len(self._modules)
+
+    @property
+    def sparity_dict(self):
+        return self._sparity_dict
+
+    def add(self, module: Any, name: Optional[str] = None) -> None:
+        if name is None:
+            name = str(len(self._modules))
+            if name in self._modules:
+                raise KeyError('name exists')
+        self.add_module(name, module)
+
+    def forward(self, input: torch.Tensor) -> torch.Tensor:
+        for k, module in self._modules.items():
+            if is_spconv_module(module):
+                assert isinstance(input, SparseConvTensor)
+                self._sparity_dict[k] = input.sparity
+                input = module(input)
+            else:
+                if isinstance(input, SparseConvTensor):
+                    if input.indices.shape[0] != 0:
+                        input.features = module(input.features)
+                else:
+                    input = module(input)
+        return input
+
+    def fused(self):
+        from .sparse_conv import SparseConvolution
+        mods = [v for k, v in self._modules.items()]
+        fused_mods = []
+        idx = 0
+        while idx < len(mods):
+            if is_sparse_conv(mods[idx]):
+                if idx < len(mods) - 1 and isinstance(mods[idx + 1],
+                                                      nn.BatchNorm1d):
+                    new_module = SparseConvolution(
+                        ndim=mods[idx].ndim,
+                        in_channels=mods[idx].in_channels,
+                        out_channels=mods[idx].out_channels,
+                        kernel_size=mods[idx].kernel_size,
+                        stride=mods[idx].stride,
+                        padding=mods[idx].padding,
+                        dilation=mods[idx].dilation,
+                        groups=mods[idx].groups,
+                        bias=True,
+                        subm=mods[idx].subm,
+                        output_padding=mods[idx].output_padding,
+                        transposed=mods[idx].transposed,
+                        inverse=mods[idx].inverse,
+                        indice_key=mods[idx].indice_key,
+                        fused_bn=True,
+                    )
+                    new_module.load_state_dict(mods[idx].state_dict(), False)
+                    new_module.to(mods[idx].weight.device)
+                    conv = new_module
+                    bn = mods[idx + 1]
+                    conv.bias.data.zero_()
+                    conv.weight.data[:] = conv.weight.data * bn.weight.data / (
+                        torch.sqrt(bn.running_var) + bn.eps)
+                    conv.bias.data[:] = (
+                        conv.bias.data - bn.running_mean) * bn.weight.data / (
+                            torch.sqrt(bn.running_var) + bn.eps) + bn.bias.data
+                    fused_mods.append(conv)
+                    idx += 2
+                else:
+                    fused_mods.append(mods[idx])
+                    idx += 1
+            else:
+                fused_mods.append(mods[idx])
+                idx += 1
+        return SparseSequential(*fused_mods)
+
+
+class ToDense(SparseModule):
+    """convert SparseConvTensor to NCHW dense tensor."""
+
+    def forward(self, x: SparseConvTensor):
+        return x.dense()
+
+
+class RemoveGrid(SparseModule):
+    """remove pre-allocated grid buffer."""
+
+    def forward(self, x: SparseConvTensor):
+        x.grid = None
+        return x
--- a/mmcv/ops/sparse_ops.py
+++ b/mmcv/ops/sparse_ops.py
+# Copyright 2019 Yan Yan
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
+import torch
+
+from ..utils import ext_loader
+
+ext_module = ext_loader.load_ext('_ext', [
+    'get_indice_pairs_2d_forward', 'get_indice_pairs_3d_forward',
+    'get_indice_pairs_4d_forward', 'get_indice_pairs_2d_backward',
+    'get_indice_pairs_3d_backward', 'indice_conv_forward',
+    'indice_conv_backward', 'fused_indice_conv_forward',
+    'indice_maxpool_forward', 'indice_maxpool_backward'
+])
+
+
+def get_conv_output_size(input_size, kernel_size, stride, padding, dilation):
+    ndim = len(input_size)
+    output_size = []
+    for i in range(ndim):
+        size = (input_size[i] + 2 * padding[i] - dilation[i] *
+                (kernel_size[i] - 1) - 1) // stride[i] + 1
+        if kernel_size[i] == -1:
+            output_size.append(1)
+        else:
+            output_size.append(size)
+    return output_size
+
+
+def get_deconv_output_size(input_size, kernel_size, stride, padding, dilation,
+                           output_padding):
+    ndim = len(input_size)
+    output_size = []
+    for i in range(ndim):
+        if kernel_size[i] == -1:
+            raise ValueError("deconv don't support kernel_size < 0")
+        size = (input_size[i] - 1) * stride[i] - 2 * padding[i] + kernel_size[
+            i] + output_padding[i]
+        output_size.append(size)
+    return output_size
+
+
+def get_indice_pairs(indices,
+                     batch_size,
+                     spatial_shape,
+                     ksize=3,
+                     stride=1,
+                     padding=0,
+                     dilation=1,
+                     out_padding=0,
+                     subm=False,
+                     transpose=False,
+                     grid=None):
+    ndim = indices.shape[1] - 1
+    if not isinstance(ksize, (list, tuple)):
+        ksize = [ksize] * ndim
+    if not isinstance(stride, (list, tuple)):
+        stride = [stride] * ndim
+    if not isinstance(padding, (list, tuple)):
+        padding = [padding] * ndim
+    if not isinstance(dilation, (list, tuple)):
+        dilation = [dilation] * ndim
+    if not isinstance(out_padding, (list, tuple)):
+        out_padding = [out_padding] * ndim
+
+    for d, s in zip(dilation, stride):
+        assert any([s == 1, d == 1]), "don't support this."
+
+    if not subm:
+        if transpose:
+            out_shape = get_deconv_output_size(spatial_shape, ksize, stride,
+                                               padding, dilation, out_padding)
+        else:
+            out_shape = get_conv_output_size(spatial_shape, ksize, stride,
+                                             padding, dilation)
+
+    else:
+        out_shape = spatial_shape
+    if grid is None:
+        if ndim == 2:
+            get_indice_pairs_func = ext_module.get_indice_pairs_2d_forward
+        elif ndim == 3:
+            get_indice_pairs_func = ext_module.get_indice_pairs_3d_forward
+        elif ndim == 4:
+            get_indice_pairs_func = ext_module.get_indice_pairs_4d_forward
+        else:
+            raise NotImplementedError
+        return get_indice_pairs_func(indices, batch_size, out_shape,
+                                     spatial_shape, ksize, stride, padding,
+                                     dilation, out_padding, int(subm),
+                                     int(transpose))
+    else:
+        if ndim == 2:
+            get_indice_pairs_func = ext_module.get_indice_pairs_2d_backward
+        elif ndim == 3:
+            get_indice_pairs_func = ext_module.get_indice_pairs_3d_backward
+        else:
+            raise NotImplementedError
+        return get_indice_pairs_func(indices, grid, batch_size, out_shape,
+                                     spatial_shape, ksize, stride, padding,
+                                     dilation, out_padding, int(subm),
+                                     int(transpose))
+
+
+def indice_conv(features,
+                filters,
+                indice_pairs,
+                indice_pair_num,
+                num_activate_out,
+                inverse=False,
+                subm=False):
+    if filters.dtype == torch.float32 or filters.dtype == torch.half:
+        return ext_module.indice_conv_forward(features, filters, indice_pairs,
+                                              indice_pair_num,
+                                              num_activate_out, int(inverse),
+                                              int(subm))
+    else:
+        raise NotImplementedError
+
+
+def fused_indice_conv(features, filters, bias, indice_pairs, indice_pair_num,
+                      num_activate_out, inverse, subm):
+    if features.dtype == torch.half or filters.dtypes == torch.float32:
+        func = ext_module.fused_indice_conv_forward
+    else:
+        raise NotImplementedError
+
+    return func(features, filters, bias, indice_pairs, indice_pair_num,
+                num_activate_out, int(inverse), int(subm))
+
+
+def indice_conv_backward(features,
+                         filters,
+                         out_bp,
+                         indice_pairs,
+                         indice_pair_num,
+                         inverse=False,
+                         subm=False):
+    if filters.dtype == torch.float32 or filters.dtype == torch.half:
+        return ext_module.indice_conv_backward(features, filters, out_bp,
+                                               indice_pairs, indice_pair_num,
+                                               int(inverse), int(subm))
+    else:
+        raise NotImplementedError
+
+
+def indice_maxpool(features, indice_pairs, indice_pair_num, num_activate_out):
+    if features.dtype == torch.float32 or features.dtype == torch.half:
+        return ext_module.indice_maxpool_forward(features, indice_pairs,
+                                                 indice_pair_num,
+                                                 num_activate_out)
+    else:
+        raise NotImplementedError
+
+
+def indice_maxpool_backward(features, out_features, out_bp, indice_pairs,
+                            indice_pair_num):
+    if features.dtype == torch.float32 or features.dtype == torch.half:
+        return ext_module.indice_maxpool_backward(features, out_features,
+                                                  out_bp, indice_pairs,
+                                                  indice_pair_num)
+    else:
+        raise NotImplementedError
--- a/mmcv/ops/sparse_pool.py
+++ b/mmcv/ops/sparse_pool.py
+# Copyright 2019 Yan Yan
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
+# import sparse_functional as Fsp
+# import sparse_ops as ops
+from .sparse_functional import indice_maxpool
+from .sparse_modules import SparseModule
+from .sparse_ops import get_conv_output_size, get_indice_pairs
+from .sparse_structure import SparseConvTensor
+
+
+class SparseMaxPool(SparseModule):
+
+    def __init__(self,
+                 ndim,
+                 kernel_size,
+                 stride=1,
+                 padding=0,
+                 dilation=1,
+                 subm=False):
+        super().__init__()
+        if not isinstance(kernel_size, (list, tuple)):
+            kernel_size = [kernel_size] * ndim
+        if not isinstance(stride, (list, tuple)):
+            stride = [stride] * ndim
+        if not isinstance(padding, (list, tuple)):
+            padding = [padding] * ndim
+        if not isinstance(dilation, (list, tuple)):
+            dilation = [dilation] * ndim
+
+        self.ndim = ndim
+        self.kernel_size = kernel_size
+        self.stride = stride
+        self.padding = padding
+        self.subm = subm
+        self.dilation = dilation
+
+    def forward(self, input):
+        assert isinstance(input, SparseConvTensor)
+        features = input.features
+        device = features.device
+        indices = input.indices
+        spatial_shape = input.spatial_shape
+        batch_size = input.batch_size
+        if not self.subm:
+            out_spatial_shape = get_conv_output_size(spatial_shape,
+                                                     self.kernel_size,
+                                                     self.stride, self.padding,
+                                                     self.dilation)
+        else:
+            out_spatial_shape = spatial_shape
+        outids, indice_pairs, indice_pairs_num = get_indice_pairs(
+            indices, batch_size, spatial_shape, self.kernel_size, self.stride,
+            self.padding, self.dilation, 0, self.subm)
+
+        out_features = indice_maxpool(features, indice_pairs.to(device),
+                                      indice_pairs_num.to(device),
+                                      outids.shape[0])
+        out_tensor = SparseConvTensor(out_features, outids, out_spatial_shape,
+                                      batch_size)
+        out_tensor.indice_dict = input.indice_dict
+        out_tensor.grid = input.grid
+        return out_tensor
+
+
+class SparseMaxPool2d(SparseMaxPool):
+
+    def __init__(self, kernel_size, stride=1, padding=0, dilation=1):
+        super().__init__(2, kernel_size, stride, padding, dilation)
+
+
+class SparseMaxPool3d(SparseMaxPool):
+
+    def __init__(self, kernel_size, stride=1, padding=0, dilation=1):
+        super().__init__(3, kernel_size, stride, padding, dilation)
--- a/mmcv/ops/sparse_structure.py
+++ b/mmcv/ops/sparse_structure.py
+from typing import List, Optional, Tuple, Union
+
+import numpy as np
+import torch
+
+
+def scatter_nd(indices: torch.Tensor, updates: torch.Tensor,
+               shape: torch.Tensor) -> torch.Tensor:
+    """pytorch edition of tensorflow scatter_nd.
+
+    this function don't contain except handle code. so use this carefully when
+    indice repeats, don't support repeat add which is supported in tensorflow.
+    """
+    ret = torch.zeros(*shape, dtype=updates.dtype, device=updates.device)
+    ndim = indices.shape[-1]
+    output_shape = list(indices.shape[:-1]) + shape[indices.shape[-1]:]
+    flatted_indices = indices.view(-1, ndim)
+    slices = [flatted_indices[:, i] for i in range(ndim)]
+    slices += [Ellipsis]
+    ret[slices] = updates.view(*output_shape)
+    return ret
+
+
+class SparseConvTensor:
+
+    def __init__(self,
+                 features: torch.Tensor,
+                 indices: torch.Tensor,
+                 spatial_shape: Union[List, Tuple],
+                 batch_size: int,
+                 grid: Optional[torch.Tensor] = None):
+        self.features = features
+        self.indices = indices
+        if self.indices.dtype != torch.int32:
+            self.indices.int()
+        self.spatial_shape = spatial_shape
+        self.batch_size = batch_size
+        self.indice_dict: dict = {}
+        self.grid = grid
+
+    @property
+    def spatial_size(self):
+        return np.prod(self.spatial_shape)
+
+    def find_indice_pair(self, key):
+        if key is None:
+            return None
+        if key in self.indice_dict:
+            return self.indice_dict[key]
+        return None
+
+    def dense(self, channels_first: bool = True) -> torch.Tensor:
+        output_shape = [self.batch_size] + list(
+            self.spatial_shape) + [self.features.shape[1]]
+        res = scatter_nd(self.indices.long(), self.features, output_shape)
+        if not channels_first:
+            return res
+        ndim = len(self.spatial_shape)
+        trans_params = list(range(0, ndim + 1))
+        trans_params.insert(1, ndim + 1)
+        return res.permute(*trans_params).contiguous()
+
+    @property
+    def sparity(self):
+        return (self.indices.shape[0] / np.prod(self.spatial_shape) /
+                self.batch_size)
--- a/mmcv/ops/sync_bn.py
+++ b/mmcv/ops/sync_bn.py
 # Copyright (c) OpenMMLab. All rights reserved.
+from typing import Optional
+
 import torch
 import torch.distributed as dist
 import torch.nn.functional as F
@@ -35,8 +37,10 @@ class SyncBatchNormFunction(Function):
            stats_mode=stats_mode)

    @staticmethod
-    def forward(self, input, running_mean, running_var, weight, bias, momentum,
-                eps, group, group_size, stats_mode):
+    def forward(self, input: torch.Tensor, running_mean: torch.Tensor,
+                running_var: torch.Tensor, weight: torch.Tensor,
+                bias: torch.Tensor, momentum: float, eps: float, group: int,
+                group_size: int, stats_mode: str) -> torch.Tensor:
        self.momentum = momentum
        self.eps = eps
        self.group = group
@@ -126,7 +130,7 @@ class SyncBatchNormFunction(Function):

    @staticmethod
    @once_differentiable
-    def backward(self, grad_output):
+    def backward(self, grad_output: torch.Tensor) -> tuple:
        norm, std, weight = self.saved_tensors
        grad_weight = torch.zeros_like(weight)
        grad_bias = torch.zeros_like(weight)
@@ -191,14 +195,14 @@ class SyncBatchNorm(Module):
    """

    def __init__(self,
-                 num_features,
-                 eps=1e-5,
-                 momentum=0.1,
-                 affine=True,
-                 track_running_stats=True,
-                 group=None,
-                 stats_mode='default'):
-        super(SyncBatchNorm, self).__init__()
+                 num_features: int,
+                 eps: float = 1e-5,
+                 momentum: float = 0.1,
+                 affine: bool = True,
+                 track_running_stats: bool = True,
+                 group: Optional[int] = None,
+                 stats_mode: str = 'default'):
+        super().__init__()
        self.num_features = num_features
        self.eps = eps
        self.momentum = momentum
@@ -239,7 +243,7 @@ class SyncBatchNorm(Module):
            self.weight.data.uniform_()  # pytorch use ones_()
            self.bias.data.zero_()

-    def forward(self, input):
+    def forward(self, input: torch.Tensor) -> torch.Tensor:
        if input.dim() < 2:
            raise ValueError(
                f'expected at least 2D input, got {input.dim()}D input')

--- a/mmcv/ops/three_interpolate.py
+++ b/mmcv/ops/three_interpolate.py
-from typing import Tuple
+from typing import Any, Tuple

 import torch
 from torch.autograd import Function
@@ -17,18 +17,19 @@ class ThreeInterpolate(Function):
    """

    @staticmethod
-    def forward(ctx, features: torch.Tensor, indices: torch.Tensor,
+    def forward(ctx: Any, features: torch.Tensor, indices: torch.Tensor,
                weight: torch.Tensor) -> torch.Tensor:
        """
        Args:
-            features (Tensor): (B, C, M) Features descriptors to be
-                interpolated
-            indices (Tensor): (B, n, 3) index three nearest neighbors
-                of the target features in features
-            weight (Tensor): (B, n, 3) weights of interpolation
+            features (torch.Tensor): (B, C, M) Features descriptors to be
+                interpolated.
+            indices (torch.Tensor): (B, n, 3) indices of three nearest
+                neighbor features for the target features.
+            weight (torch.Tensor): (B, n, 3) weights of three nearest
+                neighbor features for the target features.

        Returns:
-            Tensor: (B, C, N) tensor of the interpolated features
+            torch.Tensor: (B, C, N) tensor of the interpolated features
        """
        assert features.is_contiguous()
        assert indices.is_contiguous()
@@ -49,10 +50,10 @@ class ThreeInterpolate(Function):
    ) -> Tuple[torch.Tensor, torch.Tensor, torch.Tensor]:
        """
        Args:
-            grad_out (Tensor): (B, C, N) tensor with gradients of outputs
+            grad_out (torch.Tensor): (B, C, N) tensor with gradients of outputs

        Returns:
-            Tensor: (B, C, M) tensor with gradients of features
+            torch.Tensor: (B, C, M) tensor with gradients of features
        """
        idx, weight, m = ctx.three_interpolate_for_backward
        B, c, n = grad_out.size()

--- a/mmcv/ops/three_nn.py
+++ b/mmcv/ops/three_nn.py
-from typing import Tuple
+from typing import Any, Tuple

 import torch
 from torch.autograd import Function
@@ -16,18 +16,18 @@ class ThreeNN(Function):
    """

    @staticmethod
-    def forward(ctx, target: torch.Tensor,
+    def forward(ctx: Any, target: torch.Tensor,
                source: torch.Tensor) -> Tuple[torch.Tensor, torch.Tensor]:
        """
        Args:
-            target (Tensor): shape (B, N, 3), points set that needs to
+            target (torch.Tensor): shape (B, N, 3), points set that needs to
                find the nearest neighbors.
-            source (Tensor): shape (B, M, 3), points set that is used
+            source (torch.Tensor): shape (B, M, 3), points set that is used
                to find the nearest neighbors of points in target set.

        Returns:
-            Tensor: shape (B, N, 3), L2 distance of each point in target
-                set to their corresponding nearest neighbors.
+            torch.Tensor: shape (B, N, 3), L2 distance of each point in target
+            set to their corresponding top three nearest neighbors.
        """
        target = target.contiguous()
        source = source.contiguous()

--- a/mmcv/ops/tin_shift.py
+++ b/mmcv/ops/tin_shift.py
@@ -18,6 +18,10 @@ class TINShiftFunction(Function):

    @staticmethod
    def forward(ctx, input, shift):
+        if input.size(0) != shift.size(0):
+            raise ValueError(
+                'The first dim (batch) of `input` and `shift` should be '
+                f'same, but got {input.size(0)} and {shift.size(0)}.')
        C = input.size(2)
        num_segments = shift.size(1)
        if C // num_segments <= 0 or C % num_segments != 0:
@@ -51,7 +55,9 @@ class TINShift(nn.Module):
    Temporal Interlace shift is a differentiable temporal-wise frame shifting
    which is proposed in "Temporal Interlacing Network"

-    Please refer to https://arxiv.org/abs/2001.06499 for more details.
+    Please refer to `Temporal Interlacing Network
+    <https://arxiv.org/abs/2001.06499>`_ for more details.
+
    Code is modified from https://github.com/mit-han-lab/temporal-shift-module
    """

@@ -59,8 +65,9 @@ class TINShift(nn.Module):
        """Perform temporal interlace shift.

        Args:
-            input (Tensor): Feature map with shape [N, num_segments, C, H * W].
-            shift (Tensor): Shift tensor with shape [N, num_segments].
+            input (torch.Tensor): Feature map with shape
+                [N, num_segments, C, H * W].
+            shift (torch.Tensor): Shift tensor with shape [N, num_segments].

        Returns:
            Feature map after temporal interlace shift.

--- a/mmcv/ops/upfirdn2d.py
+++ b/mmcv/ops/upfirdn2d.py
@@ -95,6 +95,8 @@

 # =======================================================================

+from typing import Any, List, Tuple, Union
+
 import torch
 from torch.autograd import Function
 from torch.nn import functional as F
@@ -108,8 +110,10 @@ upfirdn2d_ext = ext_loader.load_ext('_ext', ['upfirdn2d'])
 class UpFirDn2dBackward(Function):

    @staticmethod
-    def forward(ctx, grad_output, kernel, grad_kernel, up, down, pad, g_pad,
-                in_size, out_size):
+    def forward(ctx: Any, grad_output: torch.Tensor, kernel: torch.Tensor,
+                grad_kernel: torch.Tensor, up: tuple, down: tuple, pad: tuple,
+                g_pad: tuple, in_size: Union[List, Tuple],
+                out_size: Union[List, Tuple]) -> torch.Tensor:

        up_x, up_y = up
        down_x, down_y = down
@@ -149,7 +153,7 @@ class UpFirDn2dBackward(Function):
        return grad_input

    @staticmethod
-    def backward(ctx, gradgrad_input):
+    def backward(ctx: Any, gradgrad_input: torch.Tensor) -> tuple:
        kernel, = ctx.saved_tensors

        gradgrad_input = gradgrad_input.reshape(-1, ctx.in_size[2],
@@ -177,7 +181,8 @@ class UpFirDn2dBackward(Function):
 class UpFirDn2d(Function):

    @staticmethod
-    def forward(ctx, input, kernel, up, down, pad):
+    def forward(ctx: Any, input: torch.Tensor, kernel: torch.Tensor, up: tuple,
+                down: tuple, pad: tuple) -> torch.Tensor:
        up_x, up_y = up
        down_x, down_y = down
        pad_x0, pad_x1, pad_y0, pad_y1 = pad
@@ -222,7 +227,7 @@ class UpFirDn2d(Function):
        return out

    @staticmethod
-    def backward(ctx, grad_output):
+    def backward(ctx: Any, grad_output: torch.Tensor) -> tuple:
        kernel, grad_kernel = ctx.saved_tensors

        grad_input = UpFirDn2dBackward.apply(
@@ -240,7 +245,12 @@ class UpFirDn2d(Function):
        return grad_input, None, None, None, None


-def upfirdn2d(input, kernel, up=1, down=1, pad=(0, 0)):
+def upfirdn2d(
+    input: torch.Tensor,
+    kernel: torch.Tensor,
+    up: Union[int, tuple] = 1,
+    down: Union[int, tuple] = 1,
+    pad: tuple = (0, 0)) -> torch.Tensor:  # noqa E125
    """UpFRIDn for 2d features.

    UpFIRDn is short for upsample, apply FIR filter and downsample. More
@@ -248,8 +258,8 @@ def upfirdn2d(input, kernel, up=1, down=1, pad=(0, 0)):
    https://www.mathworks.com/help/signal/ref/upfirdn.html

    Args:
-        input (Tensor): Tensor with shape of (n, c, h, w).
-        kernel (Tensor): Filter kernel.
+        input (torch.Tensor): Tensor with shape of (n, c, h, w).
+        kernel (torch.Tensor): Filter kernel.
        up (int | tuple[int], optional): Upsampling factor. If given a number,
            we will use this factor for the both height and width side.
            Defaults to 1.
@@ -260,18 +270,18 @@ def upfirdn2d(input, kernel, up=1, down=1, pad=(0, 0)):
            (x_pad_0, x_pad_1, y_pad_0, y_pad_1). Defaults to (0, 0).

    Returns:
-        Tensor: Tensor after UpFIRDn.
+        torch.Tensor: Tensor after UpFIRDn.
    """
    if input.device.type == 'cpu':
        if len(pad) == 2:
-            pad = (pad[0], pad[1], pad[0], pad[1])
+            pad = (pad[0], pad[1], pad[0], pad[1])  # type: ignore

-        up = to_2tuple(up)
+        _up = to_2tuple(up)

-        down = to_2tuple(down)
+        _down = to_2tuple(down)

-        out = upfirdn2d_native(input, kernel, up[0], up[1], down[0], down[1],
-                               pad[0], pad[1], pad[2], pad[3])
+        out = upfirdn2d_native(input, kernel, _up[0], _up[1], _down[0],
+                               _down[1], pad[0], pad[1], pad[2], pad[3])
    else:
        _up = to_2tuple(up)

@@ -287,8 +297,9 @@ def upfirdn2d(input, kernel, up=1, down=1, pad=(0, 0)):
    return out


-def upfirdn2d_native(input, kernel, up_x, up_y, down_x, down_y, pad_x0, pad_x1,
-                     pad_y0, pad_y1):
+def upfirdn2d_native(input: torch.Tensor, kernel: torch.Tensor, up_x: int,
+                     up_y: int, down_x: int, down_y: int, pad_x0: int,
+                     pad_x1: int, pad_y0: int, pad_y1: int) -> torch.Tensor:
    _, channel, in_h, in_w = input.shape
    input = input.reshape(-1, in_h, in_w, 1)


--- a/mmcv/ops/voxelize.py
+++ b/mmcv/ops/voxelize.py
 # Copyright (c) OpenMMLab. All rights reserved.
+from typing import Any, List, Tuple, Union
+
 import torch
 from torch import nn
 from torch.autograd import Function
@@ -13,12 +15,14 @@ ext_module = ext_loader.load_ext(
 class _Voxelization(Function):

    @staticmethod
-    def forward(ctx,
-                points,
-                voxel_size,
-                coors_range,
-                max_points=35,
-                max_voxels=20000):
+    def forward(
+            ctx: Any,
+            points: torch.Tensor,
+            voxel_size: Union[tuple, float],
+            coors_range: Union[tuple, float],
+            max_points: int = 35,
+            max_voxels: int = 20000,
+            deterministic: bool = True) -> Union[Tuple[torch.Tensor], Tuple]:
        """Convert kitti points(N, >=3) to voxels.

        Args:
@@ -34,15 +38,24 @@ class _Voxelization(Function):
                for second, 20000 is a good choice. Users should shuffle points
                before call this function because max_voxels may drop points.
                Default: 20000.
+            deterministic: bool. whether to invoke the non-deterministic
+                version of hard-voxelization implementations. non-deterministic
+                version is considerablly fast but is not deterministic. only
+                affects hard voxelization. default True. for more information
+                of this argument and the implementation insights, please refer
+                to the following links:
+                https://github.com/open-mmlab/mmdetection3d/issues/894
+                https://github.com/open-mmlab/mmdetection3d/pull/904
+                it is an experimental feature and we will appreciate it if
+                you could share with us the failing cases.

        Returns:
-            voxels_out (torch.Tensor): Output voxels with the shape of [M,
-                max_points, ndim]. Only contain points and returned when
-                max_points != -1.
-            coors_out (torch.Tensor): Output coordinates with the shape of
-                [M, 3].
-            num_points_per_voxel_out (torch.Tensor): Num points per voxel with
-                the shape of [M]. Only returned when max_points != -1.
+            tuple[torch.Tensor]: tuple[torch.Tensor]: A tuple contains three
+            elements. The first one is the output voxels with the shape of
+            [M, max_points, n_dim], which only contain points and returned
+            when max_points != -1. The second is the voxel coordinates with
+            shape of [M, 3]. The last is number of point per voxel with the
+            shape of [M], which only returned when max_points != -1.
        """
        if max_points == -1 or max_voxels == -1:
            coors = points.new_zeros(size=(points.size(0), 3), dtype=torch.int)
@@ -70,7 +83,8 @@ class _Voxelization(Function):
                voxel_num,
                max_points=max_points,
                max_voxels=max_voxels,
-                NDim=3)
+                NDim=3,
+                deterministic=deterministic)
            # select the valid voxels
            voxels_out = voxels[:voxel_num]
            coors_out = coors[:voxel_num]
@@ -84,8 +98,8 @@ voxelization = _Voxelization.apply
 class Voxelization(nn.Module):
    """Convert kitti points(N, >=3) to voxels.

-    Please refer to `PVCNN <https://arxiv.org/abs/1907.03739>`_ for more
-    details.
+    Please refer to `Point-Voxel CNN for Efficient 3D Deep Learning
+    <https://arxiv.org/abs/1907.03739>`_ for more details.

    Args:
        voxel_size (tuple or float): The size of voxel with the shape of [3].
@@ -100,10 +114,30 @@ class Voxelization(nn.Module):
    """

    def __init__(self,
-                 voxel_size,
-                 point_cloud_range,
-                 max_num_points,
-                 max_voxels=20000):
+                 voxel_size: List,
+                 point_cloud_range: List,
+                 max_num_points: int,
+                 max_voxels: Union[tuple, int] = 20000,
+                 deterministic: bool = True):
+        """
+        Args:
+            voxel_size (list): list [x, y, z] size of three dimension
+            point_cloud_range (list):
+                [x_min, y_min, z_min, x_max, y_max, z_max]
+            max_num_points (int): max number of points per voxel
+            max_voxels (tuple or int): max number of voxels in
+                (training, testing) time
+            deterministic: bool. whether to invoke the non-deterministic
+                version of hard-voxelization implementations. non-deterministic
+                version is considerablly fast but is not deterministic. only
+                affects hard voxelization. default True. for more information
+                of this argument and the implementation insights, please refer
+                to the following links:
+                https://github.com/open-mmlab/mmdetection3d/issues/894
+                https://github.com/open-mmlab/mmdetection3d/pull/904
+                it is an experimental feature and we will appreciate it if
+                you could share with us the failing cases.
+        """
        super().__init__()

        self.voxel_size = voxel_size
@@ -113,12 +147,14 @@ class Voxelization(nn.Module):
            self.max_voxels = max_voxels
        else:
            self.max_voxels = _pair(max_voxels)
+        self.deterministic = deterministic

        point_cloud_range = torch.tensor(
            point_cloud_range, dtype=torch.float32)
        voxel_size = torch.tensor(voxel_size, dtype=torch.float32)
-        grid_size = (point_cloud_range[3:] -
-                     point_cloud_range[:3]) / voxel_size
+        grid_size = (
+            point_cloud_range[3:] -  # type: ignore
+            point_cloud_range[:3]) / voxel_size  # type: ignore
        grid_size = torch.round(grid_size).long()
        input_feat_shape = grid_size[:2]
        self.grid_size = grid_size
@@ -126,14 +162,15 @@ class Voxelization(nn.Module):
        # [w, h, d] -> [d, h, w]
        self.pcd_shape = [*input_feat_shape, 1][::-1]

-    def forward(self, input):
+    def forward(self, input: torch.Tensor) -> torch.Tensor:
        if self.training:
            max_voxels = self.max_voxels[0]
        else:
            max_voxels = self.max_voxels[1]

        return voxelization(input, self.voxel_size, self.point_cloud_range,
-                            self.max_num_points, max_voxels)
+                            self.max_num_points, max_voxels,
+                            self.deterministic)

    def __repr__(self):
        s = self.__class__.__name__ + '('
@@ -141,5 +178,6 @@ class Voxelization(nn.Module):
        s += ', point_cloud_range=' + str(self.point_cloud_range)
        s += ', max_num_points=' + str(self.max_num_points)
        s += ', max_voxels=' + str(self.max_voxels)
+        s += ', deterministic=' + str(self.deterministic)
        s += ')'
        return s