release v1.6.1 of mmcv

fdeee889 · limm · df465820 · fdeee889 · fdeee889 · fdeee889
Commit fdeee889 authored May 25, 2025 by limm
20 changed files
--- a/mmcv/ops/furthest_point_sample.py
+++ b/mmcv/ops/furthest_point_sample.py
@@ -18,11 +18,11 @@ class FurthestPointSampling(Function):
                num_points: int) -> torch.Tensor:
        """
        Args:
-            points_xyz (Tensor): (B, N, 3) where N > num_points.
+            points_xyz (torch.Tensor): (B, N, 3) where N > num_points.
            num_points (int): Number of points in the sampled set.

        Returns:
-             Tensor: (B, num_points) indices of the sampled points.
+            torch.Tensor: (B, num_points) indices of the sampled points.
        """
        assert points_xyz.is_contiguous()

@@ -56,11 +56,12 @@ class FurthestPointSamplingWithDist(Function):
                num_points: int) -> torch.Tensor:
        """
        Args:
-            points_dist (Tensor): (B, N, N) Distance between each point pair.
+            points_dist (torch.Tensor): (B, N, N) Distance between each point
+                pair.
            num_points (int): Number of points in the sampled set.

        Returns:
-             Tensor: (B, num_points) indices of the sampled points.
+            torch.Tensor: (B, num_points) indices of the sampled points.
        """
        assert points_dist.is_contiguous()


--- a/mmcv/ops/fused_bias_leakyrelu.py
+++ b/mmcv/ops/fused_bias_leakyrelu.py
@@ -113,7 +113,8 @@ class FusedBiasLeakyReLUFunctionBackward(Function):
    """

    @staticmethod
-    def forward(ctx, grad_output, out, negative_slope, scale):
+    def forward(ctx, grad_output: torch.Tensor, out: torch.Tensor,
+                negative_slope: float, scale: float) -> tuple:
        ctx.save_for_backward(out)
        ctx.negative_slope = negative_slope
        ctx.scale = scale
@@ -139,7 +140,8 @@ class FusedBiasLeakyReLUFunctionBackward(Function):
        return grad_input, grad_bias

    @staticmethod
-    def backward(ctx, gradgrad_input, gradgrad_bias):
+    def backward(ctx, gradgrad_input: torch.Tensor,
+                 gradgrad_bias: nn.Parameter) -> tuple:
        out, = ctx.saved_tensors

        # The second order deviation, in fact, contains two parts, while the
@@ -160,7 +162,8 @@ class FusedBiasLeakyReLUFunctionBackward(Function):
 class FusedBiasLeakyReLUFunction(Function):

    @staticmethod
-    def forward(ctx, input, bias, negative_slope, scale):
+    def forward(ctx, input: torch.Tensor, bias: nn.Parameter,
+                negative_slope: float, scale: float) -> torch.Tensor:
        empty = input.new_empty(0)

        out = ext_module.fused_bias_leakyrelu(
@@ -178,7 +181,7 @@ class FusedBiasLeakyReLUFunction(Function):
        return out

    @staticmethod
-    def backward(ctx, grad_output):
+    def backward(ctx, grad_output: torch.Tensor) -> tuple:
        out, = ctx.saved_tensors

        grad_input, grad_bias = FusedBiasLeakyReLUFunctionBackward.apply(
@@ -188,51 +191,59 @@ class FusedBiasLeakyReLUFunction(Function):


 class FusedBiasLeakyReLU(nn.Module):
-    """Fused bias leaky ReLU.
+    r"""Fused bias leaky ReLU.

    This function is introduced in the StyleGAN2:
-    http://arxiv.org/abs/1912.04958
+    `Analyzing and Improving the Image Quality of StyleGAN
+    <http://arxiv.org/abs/1912.04958>`_

    The bias term comes from the convolution operation. In addition, to keep
    the variance of the feature map or gradients unchanged, they also adopt a
    scale similarly with Kaiming initialization. However, since the
-    :math:`1+{alpha}^2` : is too small, we can just ignore it. Therefore, the
-    final scale is just :math:`\sqrt{2}`:. Of course, you may change it with # noqa: W605, E501
+    :math:`1+{alpha}^2` is too small, we can just ignore it. Therefore, the
+    final scale is just :math:`\sqrt{2}`. Of course, you may change it with
    your own scale.

    TODO: Implement the CPU version.

    Args:
-        channel (int): The channel number of the feature map.
+        num_channels (int): The channel number of the feature map.
        negative_slope (float, optional): Same as nn.LeakyRelu.
            Defaults to 0.2.
        scale (float, optional): A scalar to adjust the variance of the feature
            map. Defaults to 2**0.5.
    """

-    def __init__(self, num_channels, negative_slope=0.2, scale=2**0.5):
-        super(FusedBiasLeakyReLU, self).__init__()
+    def __init__(self,
+                 num_channels: int,
+                 negative_slope: float = 0.2,
+                 scale: float = 2**0.5):
+        super().__init__()

        self.bias = nn.Parameter(torch.zeros(num_channels))
        self.negative_slope = negative_slope
        self.scale = scale

-    def forward(self, input):
+    def forward(self, input: torch.Tensor) -> torch.Tensor:
        return fused_bias_leakyrelu(input, self.bias, self.negative_slope,
                                    self.scale)


-def fused_bias_leakyrelu(input, bias, negative_slope=0.2, scale=2**0.5):
-    """Fused bias leaky ReLU function.
+def fused_bias_leakyrelu(input: torch.Tensor,
+                         bias: nn.Parameter,
+                         negative_slope: float = 0.2,
+                         scale: float = 2**0.5) -> torch.Tensor:
+    r"""Fused bias leaky ReLU function.

    This function is introduced in the StyleGAN2:
-    http://arxiv.org/abs/1912.04958
+    `Analyzing and Improving the Image Quality of StyleGAN
+    <http://arxiv.org/abs/1912.04958>`_

    The bias term comes from the convolution operation. In addition, to keep
    the variance of the feature map or gradients unchanged, they also adopt a
    scale similarly with Kaiming initialization. However, since the
-    :math:`1+{alpha}^2` : is too small, we can just ignore it. Therefore, the
-    final scale is just :math:`\sqrt{2}`:. Of course, you may change it with # noqa: W605, E501
+    :math:`1+{alpha}^2` is too small, we can just ignore it. Therefore, the
+    final scale is just :math:`\sqrt{2}`. Of course, you may change it with
    your own scale.

    Args:
@@ -254,7 +265,10 @@ def fused_bias_leakyrelu(input, bias, negative_slope=0.2, scale=2**0.5):
                                            negative_slope, scale)


-def bias_leakyrelu_ref(x, bias, negative_slope=0.2, scale=2**0.5):
+def bias_leakyrelu_ref(x: torch.Tensor,
+                       bias: nn.Parameter,
+                       negative_slope: float = 0.2,
+                       scale: float = 2**0.5) -> torch.Tensor:

    if bias is not None:
        assert bias.ndim == 1

--- a/mmcv/ops/gather_points.py
+++ b/mmcv/ops/gather_points.py
+from typing import Tuple
+
 import torch
 from torch.autograd import Function

@@ -15,18 +17,18 @@ class GatherPoints(Function):
                indices: torch.Tensor) -> torch.Tensor:
        """
        Args:
-            features (Tensor): (B, C, N) features to gather.
-            indices (Tensor): (B, M) where M is the number of points.
+            features (torch.Tensor): (B, C, N) features to gather.
+            indices (torch.Tensor): (B, M) where M is the number of points.

        Returns:
-            Tensor: (B, C, M) where M is the number of points.
+            torch.Tensor: (B, C, M) where M is the number of points.
        """
        assert features.is_contiguous()
        assert indices.is_contiguous()

        B, npoint = indices.size()
        _, C, N = features.size()
-        output = torch.cuda.FloatTensor(B, C, npoint)
+        output = features.new_zeros((B, C, npoint))

        ext_module.gather_points_forward(
            features, indices, output, b=B, c=C, n=N, npoints=npoint)
@@ -37,11 +39,11 @@ class GatherPoints(Function):
        return output

    @staticmethod
-    def backward(ctx, grad_out):
+    def backward(ctx, grad_out: torch.Tensor) -> Tuple[torch.Tensor, None]:
        idx, C, N = ctx.for_backwards
        B, npoint = idx.size()

-        grad_features = torch.cuda.FloatTensor(B, C, N).zero_()
+        grad_features = grad_out.new_zeros((B, C, N))
        grad_out_data = grad_out.data.contiguous()
        ext_module.gather_points_backward(
            grad_out_data,

--- a/mmcv/ops/group_points.py
+++ b/mmcv/ops/group_points.py
 # Copyright (c) OpenMMLab. All rights reserved.
-from typing import Tuple
+from typing import Optional, Tuple, Union

 import torch
 from torch import nn as nn
@@ -37,15 +37,15 @@ class QueryAndGroup(nn.Module):
    """

    def __init__(self,
-                 max_radius,
-                 sample_num,
-                 min_radius=0,
-                 use_xyz=True,
-                 return_grouped_xyz=False,
-                 normalize_xyz=False,
-                 uniform_sample=False,
-                 return_unique_cnt=False,
-                 return_grouped_idx=False):
+                 max_radius: float,
+                 sample_num: int,
+                 min_radius: float = 0.,
+                 use_xyz: bool = True,
+                 return_grouped_xyz: bool = False,
+                 normalize_xyz: bool = False,
+                 uniform_sample: bool = False,
+                 return_unique_cnt: bool = False,
+                 return_grouped_idx: bool = False):
        super().__init__()
        self.max_radius = max_radius
        self.min_radius = min_radius
@@ -64,15 +64,24 @@ class QueryAndGroup(nn.Module):
            assert not self.normalize_xyz, \
                'can not normalize grouped xyz when max_radius is None'

-    def forward(self, points_xyz, center_xyz, features=None):
+    def forward(
+        self,
+        points_xyz: torch.Tensor,
+        center_xyz: torch.Tensor,
+        features: Optional[torch.Tensor] = None,
+    ) -> Union[torch.Tensor, Tuple]:
        """
        Args:
-            points_xyz (Tensor): (B, N, 3) xyz coordinates of the features.
-            center_xyz (Tensor): (B, npoint, 3) coordinates of the centriods.
-            features (Tensor): (B, C, N) Descriptors of the features.
+            points_xyz (torch.Tensor): (B, N, 3) xyz coordinates of the
+                points.
+            center_xyz (torch.Tensor): (B, npoint, 3) coordinates of the
+                centriods.
+            features (torch.Tensor): (B, C, N) The features of grouped
+                points.

        Returns:
-            Tensor: (B, 3 + C, npoint, sample_num) Grouped feature.
+            Tuple | torch.Tensor: (B, 3 + C, npoint, sample_num) Grouped
+            concatenated coordinates and features of points.
        """
        # if self.max_radius is None, we will perform kNN instead of ball query
        # idx is of shape [B, npoint, sample_num]
@@ -145,7 +154,7 @@ class GroupAll(nn.Module):
    def forward(self,
                xyz: torch.Tensor,
                new_xyz: torch.Tensor,
-                features: torch.Tensor = None):
+                features: Optional[torch.Tensor] = None) -> torch.Tensor:
        """
        Args:
            xyz (Tensor): (B, N, 3) xyz coordinates of the features.
@@ -206,8 +215,7 @@ class GroupingOperation(Function):
        return output

    @staticmethod
-    def backward(ctx,
-                 grad_out: torch.Tensor) -> Tuple[torch.Tensor, torch.Tensor]:
+    def backward(ctx, grad_out: torch.Tensor) -> Tuple[torch.Tensor, None]:
        """
        Args:
            grad_out (Tensor): (B, C, npoint, nsample) tensor of the gradients

--- a/mmcv/ops/iou3d.py
+++ b/mmcv/ops/iou3d.py
 # Copyright (c) OpenMMLab. All rights reserved.
+import warnings
+from typing import Optional
+
 import torch
+from torch import Tensor

 from ..utils import ext_loader

 ext_module = ext_loader.load_ext('_ext', [
-    'iou3d_boxes_iou_bev_forward', 'iou3d_nms_forward',
-    'iou3d_nms_normal_forward'
+    'iou3d_boxes_overlap_bev_forward', 'iou3d_nms3d_forward',
+    'iou3d_nms3d_normal_forward'
 ])


-def boxes_iou_bev(boxes_a, boxes_b):
-    """Calculate boxes IoU in the Bird's Eye View.
+def boxes_overlap_bev(boxes_a: Tensor, boxes_b: Tensor) -> Tensor:
+    """Calculate boxes BEV overlap.
+
+    Args:
+        boxes_a (torch.Tensor): Input boxes a with shape (M, 7).
+        boxes_b (torch.Tensor): Input boxes b with shape (N, 7).
+
+    Returns:
+        torch.Tensor: BEV overlap result with shape (M, N).
+    """
+    ans_overlap = boxes_a.new_zeros(
+        torch.Size((boxes_a.shape[0], boxes_b.shape[0])))
+    ext_module.iou3d_boxes_overlap_bev_forward(boxes_a.contiguous(),
+                                               boxes_b.contiguous(),
+                                               ans_overlap)
+
+    return ans_overlap
+
+
+def boxes_iou3d(boxes_a: Tensor, boxes_b: Tensor) -> Tensor:
+    """Calculate boxes 3D IoU.

    Args:
-        boxes_a (torch.Tensor): Input boxes a with shape (M, 5).
-        boxes_b (torch.Tensor): Input boxes b with shape (N, 5).
+        boxes_a (torch.Tensor): Input boxes a with shape (M, 7).
+        boxes_b (torch.Tensor): Input boxes b with shape (N, 7).

    Returns:
-        ans_iou (torch.Tensor): IoU result with shape (M, N).
+        torch.Tensor: 3D IoU result with shape (M, N).
    """
-    ans_iou = boxes_a.new_zeros(
+    assert boxes_a.shape[1] == boxes_b.shape[1] == 7,\
+        'Input boxes shape should be (N, 7)'
+
+    boxes_a_height_max = (boxes_a[:, 2] + boxes_a[:, 5] / 2).view(-1, 1)
+    boxes_a_height_min = (boxes_a[:, 2] - boxes_a[:, 5] / 2).view(-1, 1)
+    boxes_b_height_max = (boxes_b[:, 2] + boxes_b[:, 5] / 2).view(1, -1)
+    boxes_b_height_min = (boxes_b[:, 2] - boxes_b[:, 5] / 2).view(1, -1)
+
+    overlaps_bev = boxes_a.new_zeros(
        torch.Size((boxes_a.shape[0], boxes_b.shape[0])))
+    ext_module.iou3d_boxes_overlap_bev_forward(boxes_a.contiguous(),
+                                               boxes_b.contiguous(),
+                                               overlaps_bev)
+
+    max_of_min = torch.max(boxes_a_height_min, boxes_b_height_min)
+    min_of_max = torch.min(boxes_a_height_max, boxes_b_height_max)
+    overlaps_h = torch.clamp(min_of_max - max_of_min, min=0)
+    overlaps_3d = overlaps_bev * overlaps_h
+    vol_a = (boxes_a[:, 3] * boxes_a[:, 4] * boxes_a[:, 5]).view(-1, 1)
+    vol_b = (boxes_b[:, 3] * boxes_b[:, 4] * boxes_b[:, 5]).view(1, -1)
+    iou3d = overlaps_3d / torch.clamp(vol_a + vol_b - overlaps_3d, min=1e-6)
+    return iou3d
+
+
+def nms3d(boxes: Tensor, scores: Tensor, iou_threshold: float) -> Tensor:
+    """3D NMS function GPU implementation (for BEV boxes).
+
+    Args:
+        boxes (torch.Tensor): Input boxes with the shape of (N, 7)
+            ([x, y, z, dx, dy, dz, heading]).
+        scores (torch.Tensor): Scores of boxes with the shape of (N).
+        iou_threshold (float): Overlap threshold of NMS.
+
+    Returns:
+        torch.Tensor: Indexes after NMS.
+    """
+    assert boxes.size(1) == 7, 'Input boxes shape should be (N, 7)'
+    order = scores.sort(0, descending=True)[1]
+    boxes = boxes[order].contiguous()
+
+    keep = torch.zeros(boxes.size(0), dtype=torch.long)
+    num_out = torch.zeros(size=(), dtype=torch.long)
+    ext_module.iou3d_nms3d_forward(
+        boxes, keep, num_out, nms_overlap_thresh=iou_threshold)
+    keep = order[keep[:num_out].cuda(boxes.device)].contiguous()
+    return keep
+
+
+def nms3d_normal(boxes: Tensor, scores: Tensor,
+                 iou_threshold: float) -> Tensor:
+    """Normal 3D NMS function GPU implementation. The overlap of two boxes for
+    IoU calculation is defined as the exact overlapping area of the two boxes
+    WITH their yaw angle set to 0.
+
+    Args:
+        boxes (torch.Tensor): Input boxes with shape (N, 7).
+            ([x, y, z, dx, dy, dz, heading]).
+        scores (torch.Tensor): Scores of predicted boxes with shape (N).
+        iou_threshold (float): Overlap threshold of NMS.
+
+    Returns:
+        torch.Tensor: Remaining indices with scores in descending order.
+    """
+    assert boxes.shape[1] == 7, 'Input boxes shape should be (N, 7)'
+    order = scores.sort(0, descending=True)[1]
+    boxes = boxes[order].contiguous()

-    ext_module.iou3d_boxes_iou_bev_forward(boxes_a.contiguous(),
-                                           boxes_b.contiguous(), ans_iou)
+    keep = torch.zeros(boxes.size(0), dtype=torch.long)
+    num_out = torch.zeros(size=(), dtype=torch.long)
+    ext_module.iou3d_nms3d_normal_forward(
+        boxes, keep, num_out, nms_overlap_thresh=iou_threshold)
+    return order[keep[:num_out].cuda(boxes.device)].contiguous()
+
+
+def _xyxyr2xywhr(boxes: Tensor) -> Tensor:
+    """Convert [x1, y1, x2, y2, heading] box to [x, y, dx, dy, heading] box.
+
+    Args:
+        box (torch.Tensor): Input boxes with shape (N, 5).
+
+    Returns:
+        torch.Tensor: Converted boxes with shape (N, 7).
+    """
+    warnings.warn(
+        'This function is deprecated and will be removed in the future.',
+        DeprecationWarning)
+    return torch.stack(
+        ((boxes[:, 0] + boxes[:, 2]) / 2, (boxes[:, 1] + boxes[:, 3]) / 2,
+         boxes[:, 2] - boxes[:, 0], boxes[:, 3] - boxes[:, 1], boxes[:, 4]),
+        dim=-1)
+
+
+def boxes_iou_bev(boxes_a: Tensor, boxes_b: Tensor) -> Tensor:
+    """Calculate boxes IoU in the Bird's Eye View.
+
+    Args:
+        boxes_a (torch.Tensor): Input boxes a with shape (M, 5)
+            ([x1, y1, x2, y2, ry]).
+        boxes_b (torch.Tensor): Input boxes b with shape (N, 5)
+            ([x1, y1, x2, y2, ry]).
+
+    Returns:
+        torch.Tensor: IoU result with shape (M, N).
+    """
+    from .box_iou_rotated import box_iou_rotated

-    return ans_iou
+    warnings.warn(
+        '`iou3d.boxes_iou_bev` is deprecated and will be removed in'
+        ' the future. Please, use `box_iou_rotated.box_iou_rotated`.',
+        DeprecationWarning)

+    return box_iou_rotated(_xyxyr2xywhr(boxes_a), _xyxyr2xywhr(boxes_b))

-def nms_bev(boxes, scores, thresh, pre_max_size=None, post_max_size=None):
-    """NMS function GPU implementation (for BEV boxes). The overlap of two
+
+def nms_bev(boxes: Tensor,
+            scores: Tensor,
+            thresh: float,
+            pre_max_size: Optional[int] = None,
+            post_max_size: Optional[int] = None) -> Tensor:
+    """NMS function GPU implementation (for BEV boxes).
+
+    The overlap of two
    boxes for IoU calculation is defined as the exact overlapping area of the
    two boxes. In this function, one can also set ``pre_max_size`` and
    ``post_max_size``.
-
    Args:
-        boxes (torch.Tensor): Input boxes with the shape of [N, 5]
+        boxes (torch.Tensor): Input boxes with the shape of (N, 5)
            ([x1, y1, x2, y2, ry]).
-        scores (torch.Tensor): Scores of boxes with the shape of [N].
+        scores (torch.Tensor): Scores of boxes with the shape of (N,).
        thresh (float): Overlap threshold of NMS.
        pre_max_size (int, optional): Max size of boxes before NMS.
            Default: None.
        post_max_size (int, optional): Max size of boxes after NMS.
            Default: None.
-
    Returns:
        torch.Tensor: Indexes after NMS.
    """
-    assert boxes.size(1) == 5, 'Input boxes shape should be [N, 5]'
+    from .nms import nms_rotated
+
+    warnings.warn(
+        '`iou3d.nms_bev` is deprecated and will be removed in'
+        ' the future. Please, use `nms.nms_rotated`.', DeprecationWarning)
+    assert boxes.size(1) == 5, 'Input boxes shape should be (N, 5)'
    order = scores.sort(0, descending=True)[1]

    if pre_max_size is not None:
        order = order[:pre_max_size]
-    boxes = boxes[order].contiguous()
+    boxes = _xyxyr2xywhr(boxes)[order]
+    scores = scores[order]
+
+    keep = nms_rotated(boxes, scores, thresh)[1]
+    keep = order[keep]

-    keep = torch.zeros(boxes.size(0), dtype=torch.long)
-    num_out = torch.zeros(size=(), dtype=torch.long)
-    ext_module.iou3d_nms_forward(
-        boxes, keep, num_out, nms_overlap_thresh=thresh)
-    keep = order[keep[:num_out].cuda(boxes.device)].contiguous()
    if post_max_size is not None:
        keep = keep[:post_max_size]
    return keep


-def nms_normal_bev(boxes, scores, thresh):
-    """Normal NMS function GPU implementation (for BEV boxes). The overlap of
+def nms_normal_bev(boxes: Tensor, scores: Tensor, thresh: float) -> Tensor:
+    """Normal NMS function GPU implementation (for BEV boxes).
+
+    The overlap of
    two boxes for IoU calculation is defined as the exact overlapping area of
    the two boxes WITH their yaw angle set to 0.
-
    Args:
-        boxes (torch.Tensor): Input boxes with shape (N, 5).
-        scores (torch.Tensor): Scores of predicted boxes with shape (N).
+        boxes (torch.Tensor): Input boxes with shape (N, 5)
+            ([x1, y1, x2, y2, ry]).
+        scores (torch.Tensor): Scores of predicted boxes with shape (N,).
        thresh (float): Overlap threshold of NMS.
-
    Returns:
        torch.Tensor: Remaining indices with scores in descending order.
    """
-    assert boxes.shape[1] == 5, 'Input boxes shape should be [N, 5]'
-    order = scores.sort(0, descending=True)[1]
+    from .nms import nms

-    boxes = boxes[order].contiguous()
+    warnings.warn(
+        '`iou3d.nms_normal_bev` is deprecated and will be removed in'
+        ' the future. Please, use `nms.nms`.', DeprecationWarning)
+    assert boxes.shape[1] == 5, 'Input boxes shape should be (N, 5)'

-    keep = torch.zeros(boxes.size(0), dtype=torch.long)
-    num_out = torch.zeros(size=(), dtype=torch.long)
-    ext_module.iou3d_nms_normal_forward(
-        boxes, keep, num_out, nms_overlap_thresh=thresh)
-    return order[keep[:num_out].cuda(boxes.device)].contiguous()
+    return nms(boxes[:, :-1], scores, thresh)[1]
--- a/mmcv/ops/knn.py
+++ b/mmcv/ops/knn.py
+from typing import Optional
+
 import torch
 from torch.autograd import Function

@@ -8,6 +10,7 @@ ext_module = ext_loader.load_ext('_ext', ['knn_forward'])

 class KNN(Function):
    r"""KNN (CUDA) based on heap data structure.
+
    Modified from `PAConv <https://github.com/CVMI-Lab/PAConv/tree/main/
    scene_seg/lib/pointops/src/knnquery_heap>`_.

@@ -18,15 +21,15 @@ class KNN(Function):
    def forward(ctx,
                k: int,
                xyz: torch.Tensor,
-                center_xyz: torch.Tensor = None,
+                center_xyz: Optional[torch.Tensor] = None,
                transposed: bool = False) -> torch.Tensor:
        """
        Args:
            k (int): number of nearest neighbors.
-            xyz (Tensor): (B, N, 3) if transposed == False, else (B, 3, N).
-                xyz coordinates of the features.
-            center_xyz (Tensor, optional): (B, npoint, 3) if transposed ==
-                False, else (B, 3, npoint). centers of the knn query.
+            xyz (torch.Tensor): (B, N, 3) if transposed == False, else
+                (B, 3, N). xyz coordinates of the features.
+            center_xyz (torch.Tensor, optional): (B, npoint, 3) if transposed
+                is False, else (B, 3, npoint). centers of the knn query.
                Default: None.
            transposed (bool, optional): whether the input tensors are
                transposed. Should not explicitly use this keyword when
@@ -34,8 +37,8 @@ class KNN(Function):
                Default: False.

        Returns:
-            Tensor: (B, k, npoint) tensor with the indices of
-                the features that form k-nearest neighbours.
+            torch.Tensor: (B, k, npoint) tensor with the indices of the
+            features that form k-nearest neighbours.
        """
        assert (k > 0) & (k < 100), 'k should be in range(0, 100)'


--- a/mmcv/ops/masked_conv.py
+++ b/mmcv/ops/masked_conv.py
 # Copyright (c) OpenMMLab. All rights reserved.
 import math
+from typing import Optional, Tuple, Union

 import torch
 import torch.nn as nn
@@ -27,7 +28,13 @@ class MaskedConv2dFunction(Function):
            stride_i=stride)

    @staticmethod
-    def forward(ctx, features, mask, weight, bias, padding=0, stride=1):
+    def forward(ctx,
+                features: torch.Tensor,
+                mask: torch.Tensor,
+                weight: torch.nn.Parameter,
+                bias: torch.nn.Parameter,
+                padding: int = 0,
+                stride: int = 1) -> torch.Tensor:
        assert mask.dim() == 3 and mask.size(0) == 1
        assert features.dim() == 4 and features.size(0) == 1
        assert features.size()[2:] == mask.size()[1:]
@@ -61,7 +68,6 @@ class MaskedConv2dFunction(Function):
                kernel_w=kernel_w,
                pad_h=pad_h,
                pad_w=pad_w)
-
            masked_output = torch.addmm(1, bias[:, None], 1,
                                        weight.view(out_channel, -1), data_col)
            ext_module.masked_col2im_forward(
@@ -76,7 +82,7 @@ class MaskedConv2dFunction(Function):

    @staticmethod
    @once_differentiable
-    def backward(ctx, grad_output):
+    def backward(ctx, grad_output: torch.Tensor) -> tuple:
        return (None, ) * 5


@@ -91,21 +97,22 @@ class MaskedConv2d(nn.Conv2d):
    """

    def __init__(self,
-                 in_channels,
-                 out_channels,
-                 kernel_size,
-                 stride=1,
-                 padding=0,
-                 dilation=1,
-                 groups=1,
-                 bias=True):
-        super(MaskedConv2d,
-              self).__init__(in_channels, out_channels, kernel_size, stride,
-                             padding, dilation, groups, bias)
-
-    def forward(self, input, mask=None):
+                 in_channels: int,
+                 out_channels: int,
+                 kernel_size: Union[int, Tuple[int, ...]],
+                 stride: int = 1,
+                 padding: int = 0,
+                 dilation: int = 1,
+                 groups: int = 1,
+                 bias: bool = True):
+        super().__init__(in_channels, out_channels, kernel_size, stride,
+                         padding, dilation, groups, bias)
+
+    def forward(self,
+                input: torch.Tensor,
+                mask: Optional[torch.Tensor] = None) -> torch.Tensor:
        if mask is None:  # fallback to the normal Conv2d
-            return super(MaskedConv2d, self).forward(input)
+            return super().forward(input)
        else:
            return masked_conv2d(input, mask, self.weight, self.bias,
                                 self.padding)
--- a/mmcv/ops/merge_cells.py
+++ b/mmcv/ops/merge_cells.py
 # Copyright (c) OpenMMLab. All rights reserved.
+import math
 from abc import abstractmethod
+from typing import Optional

 import torch
 import torch.nn as nn
@@ -18,7 +20,7 @@ class BaseMergeCell(nn.Module):
    another convolution layer.

    Args:
-        in_channels (int): number of input channels in out_conv layer.
+        fused_channels (int): number of input channels in out_conv layer.
        out_channels (int): number of output channels in out_conv layer.
        with_out_conv (bool): Whether to use out_conv layer
        out_conv_cfg (dict): Config dict for convolution layer, which should
@@ -41,19 +43,19 @@ class BaseMergeCell(nn.Module):
    """

    def __init__(self,
-                 fused_channels=256,
-                 out_channels=256,
-                 with_out_conv=True,
-                 out_conv_cfg=dict(
+                 fused_channels: Optional[int] = 256,
+                 out_channels: Optional[int] = 256,
+                 with_out_conv: bool = True,
+                 out_conv_cfg: dict = dict(
                     groups=1, kernel_size=3, padding=1, bias=True),
-                 out_norm_cfg=None,
-                 out_conv_order=('act', 'conv', 'norm'),
-                 with_input1_conv=False,
-                 with_input2_conv=False,
-                 input_conv_cfg=None,
-                 input_norm_cfg=None,
-                 upsample_mode='nearest'):
-        super(BaseMergeCell, self).__init__()
+                 out_norm_cfg: Optional[dict] = None,
+                 out_conv_order: tuple = ('act', 'conv', 'norm'),
+                 with_input1_conv: bool = False,
+                 with_input2_conv: bool = False,
+                 input_conv_cfg: Optional[dict] = None,
+                 input_norm_cfg: Optional[dict] = None,
+                 upsample_mode: str = 'nearest'):
+        super().__init__()
        assert upsample_mode in ['nearest', 'bilinear']
        self.with_out_conv = with_out_conv
        self.with_input1_conv = with_input1_conv
@@ -62,8 +64,8 @@ class BaseMergeCell(nn.Module):

        if self.with_out_conv:
            self.out_conv = ConvModule(
-                fused_channels,
-                out_channels,
+                fused_channels,  # type: ignore
+                out_channels,  # type: ignore
                **out_conv_cfg,
                norm_cfg=out_norm_cfg,
                order=out_conv_order)
@@ -95,12 +97,25 @@ class BaseMergeCell(nn.Module):
        elif x.shape[-2:] < size:
            return F.interpolate(x, size=size, mode=self.upsample_mode)
        else:
-            assert x.shape[-2] % size[-2] == 0 and x.shape[-1] % size[-1] == 0
-            kernel_size = x.shape[-1] // size[-1]
+            if x.shape[-2] % size[-2] != 0 or x.shape[-1] % size[-1] != 0:
+                h, w = x.shape[-2:]
+                target_h, target_w = size
+                pad_h = math.ceil(h / target_h) * target_h - h
+                pad_w = math.ceil(w / target_w) * target_w - w
+                pad_l = pad_w // 2
+                pad_r = pad_w - pad_l
+                pad_t = pad_h // 2
+                pad_b = pad_h - pad_t
+                pad = (pad_l, pad_r, pad_t, pad_b)
+                x = F.pad(x, pad, mode='constant', value=0.0)
+            kernel_size = (x.shape[-2] // size[-2], x.shape[-1] // size[-1])
            x = F.max_pool2d(x, kernel_size=kernel_size, stride=kernel_size)
            return x

-    def forward(self, x1, x2, out_size=None):
+    def forward(self,
+                x1: torch.Tensor,
+                x2: torch.Tensor,
+                out_size: Optional[tuple] = None) -> torch.Tensor:
        assert x1.shape[:2] == x2.shape[:2]
        assert out_size is None or len(out_size) == 2
        if out_size is None:  # resize to larger one
@@ -120,8 +135,8 @@ class BaseMergeCell(nn.Module):

 class SumCell(BaseMergeCell):

-    def __init__(self, in_channels, out_channels, **kwargs):
-        super(SumCell, self).__init__(in_channels, out_channels, **kwargs)
+    def __init__(self, in_channels: int, out_channels: int, **kwargs):
+        super().__init__(in_channels, out_channels, **kwargs)

    def _binary_op(self, x1, x2):
        return x1 + x2
@@ -129,9 +144,8 @@ class SumCell(BaseMergeCell):

 class ConcatCell(BaseMergeCell):

-    def __init__(self, in_channels, out_channels, **kwargs):
-        super(ConcatCell, self).__init__(in_channels * 2, out_channels,
-                                         **kwargs)
+    def __init__(self, in_channels: int, out_channels: int, **kwargs):
+        super().__init__(in_channels * 2, out_channels, **kwargs)

    def _binary_op(self, x1, x2):
        ret = torch.cat([x1, x2], dim=1)
@@ -140,7 +154,10 @@ class ConcatCell(BaseMergeCell):

 class GlobalPoolingCell(BaseMergeCell):

-    def __init__(self, in_channels=None, out_channels=None, **kwargs):
+    def __init__(self,
+                 in_channels: Optional[int] = None,
+                 out_channels: Optional[int] = None,
+                 **kwargs):
        super().__init__(in_channels, out_channels, **kwargs)
        self.global_pool = nn.AdaptiveAvgPool2d((1, 1))


--- a/mmcv/ops/min_area_polygons.py
+++ b/mmcv/ops/min_area_polygons.py
+# Copyright (c) OpenMMLab. All rights reserved.
+import torch
+
+from ..utils import ext_loader
+
+ext_module = ext_loader.load_ext('_ext', ['min_area_polygons'])
+
+
+def min_area_polygons(pointsets: torch.Tensor) -> torch.Tensor:
+    """Find the smallest polygons that surrounds all points in the point sets.
+
+    Args:
+        pointsets (Tensor): point sets with shape  (N, 18).
+
+    Returns:
+        torch.Tensor: Return the smallest polygons with shape (N, 8).
+    """
+    polygons = pointsets.new_zeros((pointsets.size(0), 8))
+    ext_module.min_area_polygons(pointsets, polygons)
+    return polygons
--- a/mmcv/ops/modulated_deform_conv.py
+++ b/mmcv/ops/modulated_deform_conv.py
 # Copyright (c) OpenMMLab. All rights reserved.
 import math
+from typing import Optional, Tuple, Union

 import torch
 import torch.nn as nn
@@ -35,16 +36,16 @@ class ModulatedDeformConv2dFunction(Function):

    @staticmethod
    def forward(ctx,
-                input,
-                offset,
-                mask,
-                weight,
-                bias=None,
-                stride=1,
-                padding=0,
-                dilation=1,
-                groups=1,
-                deform_groups=1):
+                input: torch.Tensor,
+                offset: torch.Tensor,
+                mask: torch.Tensor,
+                weight: nn.Parameter,
+                bias: Optional[nn.Parameter] = None,
+                stride: int = 1,
+                padding: int = 0,
+                dilation: int = 1,
+                groups: int = 1,
+                deform_groups: int = 1) -> torch.Tensor:
        if input is not None and input.dim() != 4:
            raise ValueError(
                f'Expected 4D tensor as input, got {input.dim()}D tensor \
@@ -66,6 +67,7 @@ class ModulatedDeformConv2dFunction(Function):
        # whatever the pytorch version is.
        input = input.type_as(offset)
        weight = weight.type_as(input)
+        bias = bias.type_as(input)  # type: ignore
        ctx.save_for_backward(input, offset, mask, weight, bias)
        output = input.new_empty(
            ModulatedDeformConv2dFunction._output_size(ctx, input, weight))
@@ -94,7 +96,7 @@ class ModulatedDeformConv2dFunction(Function):

    @staticmethod
    @once_differentiable
-    def backward(ctx, grad_output):
+    def backward(ctx, grad_output: torch.Tensor) -> tuple:
        input, offset, mask, weight, bias = ctx.saved_tensors
        grad_input = torch.zeros_like(input)
        grad_offset = torch.zeros_like(offset)
@@ -158,16 +160,16 @@ class ModulatedDeformConv2d(nn.Module):
    @deprecated_api_warning({'deformable_groups': 'deform_groups'},
                            cls_name='ModulatedDeformConv2d')
    def __init__(self,
-                 in_channels,
-                 out_channels,
-                 kernel_size,
-                 stride=1,
-                 padding=0,
-                 dilation=1,
-                 groups=1,
-                 deform_groups=1,
-                 bias=True):
-        super(ModulatedDeformConv2d, self).__init__()
+                 in_channels: int,
+                 out_channels: int,
+                 kernel_size: Union[int, Tuple[int]],
+                 stride: int = 1,
+                 padding: int = 0,
+                 dilation: int = 1,
+                 groups: int = 1,
+                 deform_groups: int = 1,
+                 bias: Union[bool, str] = True):
+        super().__init__()
        self.in_channels = in_channels
        self.out_channels = out_channels
        self.kernel_size = _pair(kernel_size)
@@ -198,7 +200,8 @@ class ModulatedDeformConv2d(nn.Module):
        if self.bias is not None:
            self.bias.data.zero_()

-    def forward(self, x, offset, mask):
+    def forward(self, x: torch.Tensor, offset: torch.Tensor,
+                mask: torch.Tensor) -> torch.Tensor:
        return modulated_deform_conv2d(x, offset, mask, self.weight, self.bias,
                                       self.stride, self.padding,
                                       self.dilation, self.groups,
@@ -226,7 +229,7 @@ class ModulatedDeformConv2dPack(ModulatedDeformConv2d):
    _version = 2

    def __init__(self, *args, **kwargs):
-        super(ModulatedDeformConv2dPack, self).__init__(*args, **kwargs)
+        super().__init__(*args, **kwargs)
        self.conv_offset = nn.Conv2d(
            self.in_channels,
            self.deform_groups * 3 * self.kernel_size[0] * self.kernel_size[1],
@@ -237,13 +240,13 @@ class ModulatedDeformConv2dPack(ModulatedDeformConv2d):
            bias=True)
        self.init_weights()

-    def init_weights(self):
-        super(ModulatedDeformConv2dPack, self).init_weights()
+    def init_weights(self) -> None:
+        super().init_weights()
        if hasattr(self, 'conv_offset'):
            self.conv_offset.weight.data.zero_()
            self.conv_offset.bias.data.zero_()

-    def forward(self, x):
+    def forward(self, x: torch.Tensor) -> torch.Tensor:  # type: ignore
        out = self.conv_offset(x)
        o1, o2, mask = torch.chunk(out, 3, dim=1)
        offset = torch.cat((o1, o2), dim=1)

--- a/mmcv/ops/multi_scale_deform_attn.py
+++ b/mmcv/ops/multi_scale_deform_attn.py
 # Copyright (c) OpenMMLab. All rights reserved.
 import math
 import warnings
+from typing import Optional, no_type_check

 import torch
 import torch.nn as nn
 import torch.nn.functional as F
 from torch.autograd.function import Function, once_differentiable

+import mmcv
 from mmcv import deprecated_api_warning
 from mmcv.cnn import constant_init, xavier_init
 from mmcv.cnn.bricks.registry import ATTENTION
@@ -20,27 +22,30 @@ ext_module = ext_loader.load_ext(
 class MultiScaleDeformableAttnFunction(Function):

    @staticmethod
-    def forward(ctx, value, value_spatial_shapes, value_level_start_index,
-                sampling_locations, attention_weights, im2col_step):
+    def forward(ctx, value: torch.Tensor, value_spatial_shapes: torch.Tensor,
+                value_level_start_index: torch.Tensor,
+                sampling_locations: torch.Tensor,
+                attention_weights: torch.Tensor,
+                im2col_step: torch.Tensor) -> torch.Tensor:
        """GPU version of multi-scale deformable attention.

        Args:
-            value (Tensor): The value has shape
+            value (torch.Tensor): The value has shape
                (bs, num_keys, mum_heads, embed_dims//num_heads)
-            value_spatial_shapes (Tensor): Spatial shape of
+            value_spatial_shapes (torch.Tensor): Spatial shape of
                each feature map, has shape (num_levels, 2),
                last dimension 2 represent (h, w)
-            sampling_locations (Tensor): The location of sampling points,
+            sampling_locations (torch.Tensor): The location of sampling points,
                has shape
                (bs ,num_queries, num_heads, num_levels, num_points, 2),
                the last dimension 2 represent (x, y).
-            attention_weights (Tensor): The weight of sampling points used
-                when calculate the attention, has shape
+            attention_weights (torch.Tensor): The weight of sampling points
+                used when calculate the attention, has shape
                (bs ,num_queries, num_heads, num_levels, num_points),
-            im2col_step (Tensor): The step used in image to column.
+            im2col_step (torch.Tensor): The step used in image to column.

        Returns:
-            Tensor: has shape (bs, num_queries, embed_dims)
+            torch.Tensor: has shape (bs, num_queries, embed_dims)
        """

        ctx.im2col_step = im2col_step
@@ -58,16 +63,14 @@ class MultiScaleDeformableAttnFunction(Function):

    @staticmethod
    @once_differentiable
-    def backward(ctx, grad_output):
+    def backward(ctx, grad_output: torch.Tensor) -> tuple:
        """GPU version of backward function.

        Args:
-            grad_output (Tensor): Gradient
-                of output tensor of forward.
+            grad_output (torch.Tensor): Gradient of output tensor of forward.

        Returns:
-             Tuple[Tensor]: Gradient
-                of input tensors in forward.
+            tuple[Tensor]: Gradient of input tensors in forward.
        """
        value, value_spatial_shapes, value_level_start_index,\
            sampling_locations, attention_weights = ctx.saved_tensors
@@ -91,26 +94,28 @@ class MultiScaleDeformableAttnFunction(Function):
            grad_sampling_loc, grad_attn_weight, None


-def multi_scale_deformable_attn_pytorch(value, value_spatial_shapes,
-                                        sampling_locations, attention_weights):
+def multi_scale_deformable_attn_pytorch(
+        value: torch.Tensor, value_spatial_shapes: torch.Tensor,
+        sampling_locations: torch.Tensor,
+        attention_weights: torch.Tensor) -> torch.Tensor:
    """CPU version of multi-scale deformable attention.

    Args:
-        value (Tensor): The value has shape
-            (bs, num_keys, mum_heads, embed_dims//num_heads)
-        value_spatial_shapes (Tensor): Spatial shape of
+        value (torch.Tensor): The value has shape
+            (bs, num_keys, num_heads, embed_dims//num_heads)
+        value_spatial_shapes (torch.Tensor): Spatial shape of
            each feature map, has shape (num_levels, 2),
            last dimension 2 represent (h, w)
-        sampling_locations (Tensor): The location of sampling points,
+        sampling_locations (torch.Tensor): The location of sampling points,
            has shape
            (bs ,num_queries, num_heads, num_levels, num_points, 2),
            the last dimension 2 represent (x, y).
-        attention_weights (Tensor): The weight of sampling points used
+        attention_weights (torch.Tensor): The weight of sampling points used
            when calculate the attention, has shape
            (bs ,num_queries, num_heads, num_levels, num_points),

    Returns:
-        Tensor: has shape (bs, num_queries, embed_dims)
+        torch.Tensor: has shape (bs, num_queries, embed_dims)
    """

    bs, _, num_heads, embed_dims = value.shape
@@ -180,15 +185,15 @@ class MultiScaleDeformableAttention(BaseModule):
    """

    def __init__(self,
-                 embed_dims=256,
-                 num_heads=8,
-                 num_levels=4,
-                 num_points=4,
-                 im2col_step=64,
-                 dropout=0.1,
-                 batch_first=False,
-                 norm_cfg=None,
-                 init_cfg=None):
+                 embed_dims: int = 256,
+                 num_heads: int = 8,
+                 num_levels: int = 4,
+                 num_points: int = 4,
+                 im2col_step: int = 64,
+                 dropout: float = 0.1,
+                 batch_first: bool = False,
+                 norm_cfg: Optional[dict] = None,
+                 init_cfg: Optional[mmcv.ConfigDict] = None):
        super().__init__(init_cfg)
        if embed_dims % num_heads != 0:
            raise ValueError(f'embed_dims must be divisible by num_heads, '
@@ -227,7 +232,7 @@ class MultiScaleDeformableAttention(BaseModule):
        self.output_proj = nn.Linear(embed_dims, embed_dims)
        self.init_weights()

-    def init_weights(self):
+    def init_weights(self) -> None:
        """Default initialization for Parameters of Module."""
        constant_init(self.sampling_offsets, 0.)
        thetas = torch.arange(
@@ -247,53 +252,53 @@ class MultiScaleDeformableAttention(BaseModule):
        xavier_init(self.output_proj, distribution='uniform', bias=0.)
        self._is_init = True

+    @no_type_check
    @deprecated_api_warning({'residual': 'identity'},
                            cls_name='MultiScaleDeformableAttention')
    def forward(self,
-                query,
-                key=None,
-                value=None,
-                identity=None,
-                query_pos=None,
-                key_padding_mask=None,
-                reference_points=None,
-                spatial_shapes=None,
-                level_start_index=None,
-                **kwargs):
+                query: torch.Tensor,
+                key: Optional[torch.Tensor] = None,
+                value: Optional[torch.Tensor] = None,
+                identity: Optional[torch.Tensor] = None,
+                query_pos: Optional[torch.Tensor] = None,
+                key_padding_mask: Optional[torch.Tensor] = None,
+                reference_points: Optional[torch.Tensor] = None,
+                spatial_shapes: Optional[torch.Tensor] = None,
+                level_start_index: Optional[torch.Tensor] = None,
+                **kwargs) -> torch.Tensor:
        """Forward Function of MultiScaleDeformAttention.

        Args:
-            query (Tensor): Query of Transformer with shape
+            query (torch.Tensor): Query of Transformer with shape
                (num_query, bs, embed_dims).
-            key (Tensor): The key tensor with shape
+            key (torch.Tensor): The key tensor with shape
                `(num_key, bs, embed_dims)`.
-            value (Tensor): The value tensor with shape
+            value (torch.Tensor): The value tensor with shape
                `(num_key, bs, embed_dims)`.
-            identity (Tensor): The tensor used for addition, with the
+            identity (torch.Tensor): The tensor used for addition, with the
                same shape as `query`. Default None. If None,
                `query` will be used.
-            query_pos (Tensor): The positional encoding for `query`.
+            query_pos (torch.Tensor): The positional encoding for `query`.
                Default: None.
-            key_pos (Tensor): The positional encoding for `key`. Default
-                None.
-            reference_points (Tensor):  The normalized reference
+            key_padding_mask (torch.Tensor): ByteTensor for `query`, with
+                shape [bs, num_key].
+            reference_points (torch.Tensor):  The normalized reference
                points with shape (bs, num_query, num_levels, 2),
                all elements is range in [0, 1], top-left (0,0),
                bottom-right (1, 1), including padding area.
                or (N, Length_{query}, num_levels, 4), add
                additional two dimensions is (w, h) to
                form reference boxes.
-            key_padding_mask (Tensor): ByteTensor for `query`, with
-                shape [bs, num_key].
-            spatial_shapes (Tensor): Spatial shape of features in
+            spatial_shapes (torch.Tensor): Spatial shape of features in
                different levels. With shape (num_levels, 2),
                last dimension represents (h, w).
-            level_start_index (Tensor): The start index of each level.
+            level_start_index (torch.Tensor): The start index of each level.
                A tensor has shape ``(num_levels, )`` and can be represented
                as [0, h_0*w_0, h_0*w_0+h_1*w_1, ...].

        Returns:
-             Tensor: forwarded results with shape [num_query, bs, embed_dims].
+            torch.Tensor: forwarded results with shape
+            [num_query, bs, embed_dims].
        """

        if value is None:

--- a/mmcv/ops/nms.py
+++ b/mmcv/ops/nms.py
 import os
+from typing import Any, Dict, List, Optional, Tuple, Union

 import numpy as np
 import torch
+from torch import Tensor

 from mmcv.utils import deprecated_api_warning
 from ..utils import ext_loader
@@ -14,8 +16,8 @@ ext_module = ext_loader.load_ext(
 class NMSop(torch.autograd.Function):

    @staticmethod
-    def forward(ctx, bboxes, scores, iou_threshold, offset, score_threshold,
-                max_num):
+    def forward(ctx: Any, bboxes: Tensor, scores: Tensor, iou_threshold: float,
+                offset: int, score_threshold: float, max_num: int) -> Tensor:
        is_filtering_by_score = score_threshold > 0
        if is_filtering_by_score:
            valid_mask = scores > score_threshold
@@ -48,6 +50,7 @@ class NMSop(torch.autograd.Function):
                offset_i=int(offset))
        else:
            from torch.onnx.symbolic_opset9 import select, squeeze, unsqueeze
+
            from ..onnx.onnx_utils.symbolic_helper import _size_helper

            boxes = unsqueeze(g, bboxes, 0)
@@ -82,8 +85,9 @@ class NMSop(torch.autograd.Function):
 class SoftNMSop(torch.autograd.Function):

    @staticmethod
-    def forward(ctx, boxes, scores, iou_threshold, sigma, min_score, method,
-                offset):
+    def forward(ctx: Any, boxes: Tensor, scores: Tensor, iou_threshold: float,
+                sigma: float, min_score: float, method: int,
+                offset: int) -> Tuple[Tensor, Tensor]:
        dets = boxes.new_empty((boxes.size(0), 5), device='cpu')
        inds = ext_module.softnms(
            boxes.cpu(),
@@ -114,8 +118,16 @@ class SoftNMSop(torch.autograd.Function):
        return nms_out


+array_like_type = Union[Tensor, np.ndarray]
+
+
 @deprecated_api_warning({'iou_thr': 'iou_threshold'})
-def nms(boxes, scores, iou_threshold, offset=0, score_threshold=0, max_num=-1):
+def nms(boxes: array_like_type,
+        scores: array_like_type,
+        iou_threshold: float,
+        offset: int = 0,
+        score_threshold: float = 0,
+        max_num: int = -1) -> Tuple[array_like_type, array_like_type]:
    """Dispatch to either CPU or GPU NMS implementations.

    The input can be either torch tensor or numpy array. GPU NMS will be used
@@ -131,8 +143,8 @@ def nms(boxes, scores, iou_threshold, offset=0, score_threshold=0, max_num=-1):
        max_num (int): maximum number of boxes after NMS.

    Returns:
-        tuple: kept dets(boxes and scores) and indice, which is always the \
-            same data type as the input.
+        tuple: kept dets (boxes and scores) and indice, which always have
+        the same data type as the input.

    Example:
        >>> boxes = np.array([[49.1, 32.4, 51.0, 35.9],
@@ -148,8 +160,8 @@ def nms(boxes, scores, iou_threshold, offset=0, score_threshold=0, max_num=-1):
        >>> dets, inds = nms(boxes, scores, iou_threshold)
        >>> assert len(inds) == len(dets) == 3
    """
-    assert isinstance(boxes, (torch.Tensor, np.ndarray))
-    assert isinstance(scores, (torch.Tensor, np.ndarray))
+    assert isinstance(boxes, (Tensor, np.ndarray))
+    assert isinstance(scores, (Tensor, np.ndarray))
    is_numpy = False
    if isinstance(boxes, np.ndarray):
        is_numpy = True
@@ -160,16 +172,8 @@ def nms(boxes, scores, iou_threshold, offset=0, score_threshold=0, max_num=-1):
    assert boxes.size(0) == scores.size(0)
    assert offset in (0, 1)

-    if torch.__version__ == 'parrots':
-        indata_list = [boxes, scores]
-        indata_dict = {
-            'iou_threshold': float(iou_threshold),
-            'offset': int(offset)
-        }
-        inds = ext_module.nms(*indata_list, **indata_dict)
-    else:
-        inds = NMSop.apply(boxes, scores, iou_threshold, offset,
-                           score_threshold, max_num)
+    inds = NMSop.apply(boxes, scores, iou_threshold, offset, score_threshold,
+                       max_num)
    dets = torch.cat((boxes[inds], scores[inds].reshape(-1, 1)), dim=1)
    if is_numpy:
        dets = dets.cpu().numpy()
@@ -178,19 +182,19 @@ def nms(boxes, scores, iou_threshold, offset=0, score_threshold=0, max_num=-1):


 @deprecated_api_warning({'iou_thr': 'iou_threshold'})
-def soft_nms(boxes,
-             scores,
-             iou_threshold=0.3,
-             sigma=0.5,
-             min_score=1e-3,
-             method='linear',
-             offset=0):
+def soft_nms(boxes: array_like_type,
+             scores: array_like_type,
+             iou_threshold: float = 0.3,
+             sigma: float = 0.5,
+             min_score: float = 1e-3,
+             method: str = 'linear',
+             offset: int = 0) -> Tuple[array_like_type, array_like_type]:
    """Dispatch to only CPU Soft NMS implementations.

    The input can be either a torch tensor or numpy array.
    The returned type will always be the same as inputs.

-    Arguments:
+    Args:
        boxes (torch.Tensor or np.ndarray): boxes in shape (N, 4).
        scores (torch.Tensor or np.ndarray): scores in shape (N, ).
        iou_threshold (float): IoU threshold for NMS.
@@ -200,8 +204,8 @@ def soft_nms(boxes,
        offset (int, 0 or 1): boxes' width or height is (x2 - x1 + offset).

    Returns:
-        tuple: kept dets(boxes and scores) and indice, which is always the \
-            same data type as the input.
+        tuple: kept dets (boxes and scores) and indice, which always have
+        the same data type as the input.

    Example:
        >>> boxes = np.array([[4., 3., 5., 3.],
@@ -216,8 +220,8 @@ def soft_nms(boxes,
        >>> assert len(inds) == len(dets) == 5
    """

-    assert isinstance(boxes, (torch.Tensor, np.ndarray))
-    assert isinstance(scores, (torch.Tensor, np.ndarray))
+    assert isinstance(boxes, (Tensor, np.ndarray))
+    assert isinstance(scores, (Tensor, np.ndarray))
    is_numpy = False
    if isinstance(boxes, np.ndarray):
        is_numpy = True
@@ -257,46 +261,85 @@ def soft_nms(boxes,
        return dets.to(device=boxes.device), inds.to(device=boxes.device)


-def batched_nms(boxes, scores, idxs, nms_cfg, class_agnostic=False):
-    """Performs non-maximum suppression in a batched fashion.
+def batched_nms(boxes: Tensor,
+                scores: Tensor,
+                idxs: Tensor,
+                nms_cfg: Optional[Dict],
+                class_agnostic: bool = False) -> Tuple[Tensor, Tensor]:
+    r"""Performs non-maximum suppression in a batched fashion.

-    Modified from https://github.com/pytorch/vision/blob
-    /505cd6957711af790211896d32b40291bea1bc21/torchvision/ops/boxes.py#L39.
+    Modified from `torchvision/ops/boxes.py#L39
+    <https://github.com/pytorch/vision/blob/
+    505cd6957711af790211896d32b40291bea1bc21/torchvision/ops/boxes.py#L39>`_.
    In order to perform NMS independently per class, we add an offset to all
    the boxes. The offset is dependent only on the class idx, and is large
    enough so that boxes from different classes do not overlap.

-    Arguments:
-        boxes (torch.Tensor): boxes in shape (N, 4).
+    Note:
+        In v1.4.1 and later, ``batched_nms`` supports skipping the NMS and
+        returns sorted raw results when `nms_cfg` is None.
+
+    Args:
+        boxes (torch.Tensor): boxes in shape (N, 4) or (N, 5).
        scores (torch.Tensor): scores in shape (N, ).
        idxs (torch.Tensor): each index value correspond to a bbox cluster,
            and NMS will not be applied between elements of different idxs,
            shape (N, ).
-        nms_cfg (dict): specify nms type and other parameters like iou_thr.
-            Possible keys includes the following.
+        nms_cfg (dict | optional): Supports skipping the nms when `nms_cfg`
+            is None, otherwise it should specify nms type and other
+            parameters like `iou_thr`. Possible keys includes the following.

-            - iou_thr (float): IoU threshold used for NMS.
+            - iou_threshold (float): IoU threshold used for NMS.
            - split_thr (float): threshold number of boxes. In some cases the
-                number of boxes is large (e.g., 200k). To avoid OOM during
-                training, the users could set `split_thr` to a small value.
-                If the number of boxes is greater than the threshold, it will
-                perform NMS on each group of boxes separately and sequentially.
-                Defaults to 10000.
+              number of boxes is large (e.g., 200k). To avoid OOM during
+              training, the users could set `split_thr` to a small value.
+              If the number of boxes is greater than the threshold, it will
+              perform NMS on each group of boxes separately and sequentially.
+              Defaults to 10000.
        class_agnostic (bool): if true, nms is class agnostic,
            i.e. IoU thresholding happens over all boxes,
-            regardless of the predicted class.
+            regardless of the predicted class. Defaults to False.

    Returns:
        tuple: kept dets and indice.
+
+        - boxes (Tensor): Bboxes with score after nms, has shape
+          (num_bboxes, 5). last dimension 5 arrange as
+          (x1, y1, x2, y2, score)
+        - keep (Tensor): The indices of remaining boxes in input
+          boxes.
    """
+    # skip nms when nms_cfg is None
+    if nms_cfg is None:
+        scores, inds = scores.sort(descending=True)
+        boxes = boxes[inds]
+        return torch.cat([boxes, scores[:, None]], -1), inds
+
    nms_cfg_ = nms_cfg.copy()
    class_agnostic = nms_cfg_.pop('class_agnostic', class_agnostic)
    if class_agnostic:
        boxes_for_nms = boxes
    else:
-        max_coordinate = boxes.max()
-        offsets = idxs.to(boxes) * (max_coordinate + torch.tensor(1).to(boxes))
-        boxes_for_nms = boxes + offsets[:, None]
+        # When using rotated boxes, only apply offsets on center.
+        if boxes.size(-1) == 5:
+            # Strictly, the maximum coordinates of the rotating box
+            # (x,y,w,h,a) should be calculated by polygon coordinates.
+            # But the conversion from rotated box to polygon will
+            # slow down the speed.
+            # So we use max(x,y) + max(w,h) as max coordinate
+            # which is larger than polygon max coordinate
+            # max(x1, y1, x2, y2,x3, y3, x4, y4)
+            max_coordinate = boxes[..., :2].max() + boxes[..., 2:4].max()
+            offsets = idxs.to(boxes) * (
+                max_coordinate + torch.tensor(1).to(boxes))
+            boxes_ctr_for_nms = boxes[..., :2] + offsets[:, None]
+            boxes_for_nms = torch.cat([boxes_ctr_for_nms, boxes[..., 2:5]],
+                                      dim=-1)
+        else:
+            max_coordinate = boxes.max()
+            offsets = idxs.to(boxes) * (
+                max_coordinate + torch.tensor(1).to(boxes))
+            boxes_for_nms = boxes + offsets[:, None]

    nms_type = nms_cfg_.pop('type', 'nms')
    nms_op = eval(nms_type)
@@ -306,12 +349,13 @@ def batched_nms(boxes, scores, idxs, nms_cfg, class_agnostic=False):
    if boxes_for_nms.shape[0] < split_thr or torch.onnx.is_in_onnx_export():
        dets, keep = nms_op(boxes_for_nms, scores, **nms_cfg_)
        boxes = boxes[keep]
-        # -1 indexing works abnormal in TensorRT
-        # This assumes `dets` has 5 dimensions where
+
+        # This assumes `dets` has arbitrary dimensions where
        # the last dimension is score.
-        # TODO: more elegant way to handle the dimension issue.
-        # Some type of nms would reweight the score, such as SoftNMS
-        scores = dets[:, 4]
+        # Currently it supports bounding boxes [x1, y1, x2, y2, score] or
+        # rotated boxes [cx, cy, w, h, angle_radian, score].
+
+        scores = dets[:, -1]
    else:
        max_num = nms_cfg_.pop('max_num', -1)
        total_mask = scores.new_zeros(scores.size(), dtype=torch.bool)
@@ -333,31 +377,33 @@ def batched_nms(boxes, scores, idxs, nms_cfg, class_agnostic=False):
            boxes = boxes[:max_num]
            scores = scores[:max_num]

-    return torch.cat([boxes, scores[:, None]], -1), keep
+    boxes = torch.cat([boxes, scores[:, None]], -1)
+    return boxes, keep


-def nms_match(dets, iou_threshold):
+def nms_match(dets: array_like_type,
+              iou_threshold: float) -> List[array_like_type]:
    """Matched dets into different groups by NMS.

    NMS match is Similar to NMS but when a bbox is suppressed, nms match will
    record the indice of suppressed bbox and form a group with the indice of
    kept bbox. In each group, indice is sorted as score order.

-    Arguments:
+    Args:
        dets (torch.Tensor | np.ndarray): Det boxes with scores, shape (N, 5).
-        iou_thr (float): IoU thresh for NMS.
+        iou_threshold (float): IoU thresh for NMS.

    Returns:
-        List[torch.Tensor | np.ndarray]: The outer list corresponds different
-            matched group, the inner Tensor corresponds the indices for a group
-            in score order.
+        list[torch.Tensor | np.ndarray]: The outer list corresponds different
+        matched group, the inner Tensor corresponds the indices for a group
+        in score order.
    """
    if dets.shape[0] == 0:
        matched = []
    else:
        assert dets.shape[-1] == 5, 'inputs dets.shape should be (N, 5), ' \
                                    f'but get {dets.shape}'
-        if isinstance(dets, torch.Tensor):
+        if isinstance(dets, Tensor):
            dets_t = dets.detach().cpu()
        else:
            dets_t = torch.from_numpy(dets)
@@ -365,15 +411,19 @@ def nms_match(dets, iou_threshold):
        indata_dict = {'iou_threshold': float(iou_threshold)}
        matched = ext_module.nms_match(*indata_list, **indata_dict)
        if torch.__version__ == 'parrots':
-            matched = matched.tolist()
+            matched = matched.tolist()  # type: ignore

-    if isinstance(dets, torch.Tensor):
+    if isinstance(dets, Tensor):
        return [dets.new_tensor(m, dtype=torch.long) for m in matched]
    else:
-        return [np.array(m, dtype=np.int) for m in matched]
+        return [np.array(m, dtype=int) for m in matched]


-def nms_rotated(dets, scores, iou_threshold, labels=None):
+def nms_rotated(dets: Tensor,
+                scores: Tensor,
+                iou_threshold: float,
+                labels: Optional[Tensor] = None,
+                clockwise: bool = True) -> Tuple[Tensor, Tensor]:
    """Performs non-maximum suppression (NMS) on the rotated boxes according to
    their intersection-over-union (IoU).

@@ -381,23 +431,33 @@ def nms_rotated(dets, scores, iou_threshold, labels=None):
    IoU greater than iou_threshold with another (higher scoring) rotated box.

    Args:
-        boxes (Tensor):  Rotated boxes in shape (N, 5). They are expected to \
-            be in (x_ctr, y_ctr, width, height, angle_radian) format.
-        scores (Tensor): scores in shape (N, ).
+        dets (torch.Tensor):  Rotated boxes in shape (N, 5).
+            They are expected to be in
+            (x_ctr, y_ctr, width, height, angle_radian) format.
+        scores (torch.Tensor): scores in shape (N, ).
        iou_threshold (float): IoU thresh for NMS.
-        labels (Tensor): boxes' label in shape (N,).
+        labels (torch.Tensor, optional): boxes' label in shape (N,).
+        clockwise (bool): flag indicating whether the positive angular
+            orientation is clockwise. default True.
+            `New in version 1.4.3.`

    Returns:
-        tuple: kept dets(boxes and scores) and indice, which is always the \
-            same data type as the input.
+        tuple: kept dets(boxes and scores) and indice, which is always the
+        same data type as the input.
    """
    if dets.shape[0] == 0:
        return dets, None
+    if not clockwise:
+        flip_mat = dets.new_ones(dets.shape[-1])
+        flip_mat[-1] = -1
+        dets_cw = dets * flip_mat
+    else:
+        dets_cw = dets
    multi_label = labels is not None
    if multi_label:
-        dets_wl = torch.cat((dets, labels.unsqueeze(1)), 1)
+        dets_wl = torch.cat((dets_cw, labels.unsqueeze(1)), 1)  # type: ignore
    else:
-        dets_wl = dets
+        dets_wl = dets_cw
    _, order = scores.sort(0, descending=True)
    dets_sorted = dets_wl.index_select(0, order)


--- a/mmcv/ops/pixel_group.py
+++ b/mmcv/ops/pixel_group.py
 # Copyright (c) OpenMMLab. All rights reserved.
+from typing import List, Union
+
 import numpy as np
 import torch
+from torch import Tensor

 from ..utils import ext_loader

 ext_module = ext_loader.load_ext('_ext', ['pixel_group'])


-def pixel_group(score, mask, embedding, kernel_label, kernel_contour,
-                kernel_region_num, distance_threshold):
+def pixel_group(
+    score: Union[np.ndarray, Tensor],
+    mask: Union[np.ndarray, Tensor],
+    embedding: Union[np.ndarray, Tensor],
+    kernel_label: Union[np.ndarray, Tensor],
+    kernel_contour: Union[np.ndarray, Tensor],
+    kernel_region_num: int,
+    distance_threshold: float,
+) -> List[List[float]]:
    """Group pixels into text instances, which is widely used text detection
    methods.

    Arguments:
-        score (np.array or Tensor): The foreground score with size hxw.
+        score (np.array or torch.Tensor): The foreground score with size hxw.
        mask (np.array or Tensor): The foreground mask with size hxw.
-        embedding (np.array or Tensor): The embedding with size hxwxc to
+        embedding (np.array or torch.Tensor): The embedding with size hxwxc to
            distinguish instances.
-        kernel_label (np.array or Tensor): The instance kernel index with
+        kernel_label (np.array or torch.Tensor): The instance kernel index with
+            size hxw.
+        kernel_contour (np.array or torch.Tensor): The kernel contour with
            size hxw.
-        kernel_contour (np.array or Tensor): The kernel contour with size hxw.
        kernel_region_num (int): The instance kernel region number.
        distance_threshold (float): The embedding distance threshold between
            kernel and pixel in one instance.

    Returns:
-        pixel_assignment (List[List[float]]): The instance coordinate list.
-            Each element consists of averaged confidence, pixel number, and
-            coordinates (x_i, y_i for all pixels) in order.
+        list[list[float]]: The instance coordinates and attributes list. Each
+        element consists of averaged confidence, pixel number, and coordinates
+        (x_i, y_i for all pixels) in order.
    """
    assert isinstance(score, (torch.Tensor, np.ndarray))
    assert isinstance(mask, (torch.Tensor, np.ndarray))

--- a/mmcv/ops/point_sample.py
+++ b/mmcv/ops/point_sample.py
 # Modified from https://github.com/facebookresearch/detectron2/tree/master/projects/PointRend  # noqa

 from os import path as osp
+from typing import Tuple, Union

 import torch
 import torch.nn as nn
 import torch.nn.functional as F
+from torch import Tensor
 from torch.nn.modules.utils import _pair
 from torch.onnx.operators import shape_as_tensor


-def bilinear_grid_sample(im, grid, align_corners=False):
+def bilinear_grid_sample(im: Tensor,
+                         grid: Tensor,
+                         align_corners: bool = False) -> Tensor:
    """Given an input and a flow-field grid, computes the output using input
    values and pixel locations from grid. Supported only bilinear interpolation
    method to sample the input pixels.
@@ -17,11 +21,12 @@ def bilinear_grid_sample(im, grid, align_corners=False):
    Args:
        im (torch.Tensor): Input feature map, shape (N, C, H, W)
        grid (torch.Tensor): Point coordinates, shape (N, Hg, Wg, 2)
-        align_corners {bool}: If set to True, the extrema (-1 and 1) are
+        align_corners (bool): If set to True, the extrema (-1 and 1) are
            considered as referring to the center points of the input’s
            corner pixels. If set to False, they are instead considered as
            referring to the corner points of the input’s corner pixels,
            making the sampling more resolution agnostic.
+
    Returns:
        torch.Tensor: A tensor with sampled points, shape (N, C, Hg, Wg)
    """
@@ -84,47 +89,52 @@ def bilinear_grid_sample(im, grid, align_corners=False):
    return (Ia * wa + Ib * wb + Ic * wc + Id * wd).reshape(n, c, gh, gw)


-def is_in_onnx_export_without_custom_ops():
+def is_in_onnx_export_without_custom_ops() -> bool:
    from mmcv.ops import get_onnxruntime_op_path
    ort_custom_op_path = get_onnxruntime_op_path()
    return torch.onnx.is_in_onnx_export(
    ) and not osp.exists(ort_custom_op_path)


-def normalize(grid):
+def normalize(grid: Tensor) -> Tensor:
    """Normalize input grid from [-1, 1] to [0, 1]
+
    Args:
-        grid (Tensor): The grid to be normalize, range [-1, 1].
+        grid (torch.Tensor): The grid to be normalize, range [-1, 1].
+
    Returns:
-        Tensor: Normalized grid, range [0, 1].
+        torch.Tensor: Normalized grid, range [0, 1].
    """

    return (grid + 1.0) / 2.0


-def denormalize(grid):
+def denormalize(grid: Tensor) -> Tensor:
    """Denormalize input grid from range [0, 1] to [-1, 1]
+
    Args:
-        grid (Tensor): The grid to be denormalize, range [0, 1].
+        grid (torch.Tensor): The grid to be denormalize, range [0, 1].
+
    Returns:
-        Tensor: Denormalized grid, range [-1, 1].
+        torch.Tensor: Denormalized grid, range [-1, 1].
    """

    return grid * 2.0 - 1.0


-def generate_grid(num_grid, size, device):
+def generate_grid(num_grid: int, size: Tuple[int, int],
+                  device: torch.device) -> Tensor:
    """Generate regular square grid of points in [0, 1] x [0, 1] coordinate
    space.

    Args:
        num_grid (int): The number of grids to sample, one for each region.
-        size (tuple(int, int)): The side size of the regular grid.
+        size (tuple[int, int]): The side size of the regular grid.
        device (torch.device): Desired device of returned tensor.

    Returns:
-        (torch.Tensor): A tensor of shape (num_grid, size[0]*size[1], 2) that
-            contains coordinates for the regular grids.
+        torch.Tensor: A tensor of shape (num_grid, size[0]*size[1], 2) that
+        contains coordinates for the regular grids.
    """

    affine_trans = torch.tensor([[[1., 0., 0.], [0., 1., 0.]]], device=device)
@@ -134,16 +144,17 @@ def generate_grid(num_grid, size, device):
    return grid.view(1, -1, 2).expand(num_grid, -1, -1)


-def rel_roi_point_to_abs_img_point(rois, rel_roi_points):
+def rel_roi_point_to_abs_img_point(rois: Tensor,
+                                   rel_roi_points: Tensor) -> Tensor:
    """Convert roi based relative point coordinates to image based absolute
    point coordinates.

    Args:
-        rois (Tensor): RoIs or BBoxes, shape (N, 4) or (N, 5)
-        rel_roi_points (Tensor): Point coordinates inside RoI, relative to
-            RoI, location, range (0, 1), shape (N, P, 2)
+        rois (torch.Tensor): RoIs or BBoxes, shape (N, 4) or (N, 5)
+        rel_roi_points (torch.Tensor): Point coordinates inside RoI, relative
+            to RoI, location, range (0, 1), shape (N, P, 2)
    Returns:
-        Tensor: Image based absolute point coordinates, shape (N, P, 2)
+        torch.Tensor: Image based absolute point coordinates, shape (N, P, 2)
    """

    with torch.no_grad():
@@ -165,12 +176,13 @@ def rel_roi_point_to_abs_img_point(rois, rel_roi_points):
    return abs_img_points


-def get_shape_from_feature_map(x):
+def get_shape_from_feature_map(x: Tensor) -> Tensor:
    """Get spatial resolution of input feature map considering exporting to
    onnx mode.

    Args:
        x (torch.Tensor): Input tensor, shape (N, C, H, W)
+
    Returns:
        torch.Tensor: Spatial resolution (width, height), shape (1, 1, 2)
    """
@@ -183,19 +195,22 @@ def get_shape_from_feature_map(x):
    return img_shape


-def abs_img_point_to_rel_img_point(abs_img_points, img, spatial_scale=1.):
+def abs_img_point_to_rel_img_point(abs_img_points: Tensor,
+                                   img: Union[tuple, Tensor],
+                                   spatial_scale: float = 1.) -> Tensor:
    """Convert image based absolute point coordinates to image based relative
    coordinates for sampling.

    Args:
-        abs_img_points (Tensor): Image based absolute point coordinates,
+        abs_img_points (torch.Tensor): Image based absolute point coordinates,
            shape (N, P, 2)
-        img (tuple/Tensor): (height, width) of image or feature map.
-        spatial_scale (float): Scale points by this factor. Default: 1.
+        img (tuple or torch.Tensor): (height, width) of image or feature map.
+        spatial_scale (float, optional): Scale points by this factor.
+            Default: 1.

    Returns:
-        Tensor: Image based relative point coordinates for sampling,
-            shape (N, P, 2)
+        Tensor: Image based relative point coordinates for sampling, shape
+        (N, P, 2).
    """

    assert (isinstance(img, tuple) and len(img) == 2) or \
@@ -213,23 +228,24 @@ def abs_img_point_to_rel_img_point(abs_img_points, img, spatial_scale=1.):
    return abs_img_points / scale * spatial_scale


-def rel_roi_point_to_rel_img_point(rois,
-                                   rel_roi_points,
-                                   img,
-                                   spatial_scale=1.):
+def rel_roi_point_to_rel_img_point(rois: Tensor,
+                                   rel_roi_points: Tensor,
+                                   img: Union[tuple, Tensor],
+                                   spatial_scale: float = 1.) -> Tensor:
    """Convert roi based relative point coordinates to image based absolute
    point coordinates.

    Args:
-        rois (Tensor): RoIs or BBoxes, shape (N, 4) or (N, 5)
-        rel_roi_points (Tensor): Point coordinates inside RoI, relative to
-            RoI, location, range (0, 1), shape (N, P, 2)
-        img (tuple/Tensor): (height, width) of image or feature map.
-        spatial_scale (float): Scale points by this factor. Default: 1.
+        rois (torch.Tensor): RoIs or BBoxes, shape (N, 4) or (N, 5)
+        rel_roi_points (torch.Tensor): Point coordinates inside RoI, relative
+            to RoI, location, range (0, 1), shape (N, P, 2)
+        img (tuple or torch.Tensor): (height, width) of image or feature map.
+        spatial_scale (float, optional): Scale points by this factor.
+            Default: 1.

    Returns:
-        Tensor: Image based relative point coordinates for sampling,
-            shape (N, P, 2)
+        torch.Tensor: Image based relative point coordinates for sampling,
+        shape (N, P, 2).
    """

    abs_img_point = rel_roi_point_to_abs_img_point(rois, rel_roi_points)
@@ -239,20 +255,25 @@ def rel_roi_point_to_rel_img_point(rois,
    return rel_img_point


-def point_sample(input, points, align_corners=False, **kwargs):
+def point_sample(input: Tensor,
+                 points: Tensor,
+                 align_corners: bool = False,
+                 **kwargs) -> Tensor:
    """A wrapper around :func:`grid_sample` to support 3D point_coords tensors
    Unlike :func:`torch.nn.functional.grid_sample` it assumes point_coords to
    lie inside ``[0, 1] x [0, 1]`` square.

    Args:
-        input (Tensor): Feature map, shape (N, C, H, W).
-        points (Tensor): Image based absolute point coordinates (normalized),
-            range [0, 1] x [0, 1], shape (N, P, 2) or (N, Hgrid, Wgrid, 2).
-        align_corners (bool): Whether align_corners. Default: False
+        input (torch.Tensor): Feature map, shape (N, C, H, W).
+        points (torch.Tensor): Image based absolute point coordinates
+            (normalized), range [0, 1] x [0, 1], shape (N, P, 2) or
+            (N, Hgrid, Wgrid, 2).
+        align_corners (bool, optional): Whether align_corners.
+            Default: False

    Returns:
-        Tensor: Features of `point` on `input`, shape (N, C, P) or
-            (N, C, Hgrid, Wgrid).
+        torch.Tensor: Features of `point` on `input`, shape (N, C, P) or
+        (N, C, Hgrid, Wgrid).
    """

    add_dim = False
@@ -275,7 +296,10 @@ def point_sample(input, points, align_corners=False, **kwargs):

 class SimpleRoIAlign(nn.Module):

-    def __init__(self, output_size, spatial_scale, aligned=True):
+    def __init__(self,
+                 output_size: Tuple[int],
+                 spatial_scale: float,
+                 aligned: bool = True) -> None:
        """Simple RoI align in PointRend, faster than standard RoIAlign.

        Args:
@@ -286,14 +310,14 @@ class SimpleRoIAlign(nn.Module):
                If True, align the results more perfectly.
        """

-        super(SimpleRoIAlign, self).__init__()
+        super().__init__()
        self.output_size = _pair(output_size)
        self.spatial_scale = float(spatial_scale)
        # to be consistent with other RoI ops
        self.use_torchvision = False
        self.aligned = aligned

-    def forward(self, features, rois):
+    def forward(self, features: Tensor, rois: Tensor) -> Tensor:
        num_imgs = features.size(0)
        num_rois = rois.size(0)
        rel_roi_points = generate_grid(
@@ -329,7 +353,7 @@ class SimpleRoIAlign(nn.Module):

        return roi_feats

-    def __repr__(self):
+    def __repr__(self) -> str:
        format_str = self.__class__.__name__
        format_str += '(output_size={}, spatial_scale={}'.format(
            self.output_size, self.spatial_scale)

--- a/mmcv/ops/points_in_boxes.py
+++ b/mmcv/ops/points_in_boxes.py
 import torch
+from torch import Tensor

 from ..utils import ext_loader

@@ -8,17 +9,18 @@ ext_module = ext_loader.load_ext('_ext', [
 ])


-def points_in_boxes_part(points, boxes):
+def points_in_boxes_part(points: Tensor, boxes: Tensor) -> Tensor:
    """Find the box in which each point is (CUDA).

    Args:
-        points (torch.Tensor): [B, M, 3], [x, y, z] in LiDAR/DEPTH coordinate
+        points (torch.Tensor): [B, M, 3], [x, y, z] in LiDAR/DEPTH coordinate.
        boxes (torch.Tensor): [B, T, 7],
            num_valid_boxes <= T, [x, y, z, x_size, y_size, z_size, rz] in
-            LiDAR/DEPTH coordinate, (x, y, z) is the bottom center
+            LiDAR/DEPTH coordinate, (x, y, z) is the bottom center.

    Returns:
-        box_idxs_of_pts (torch.Tensor): (B, M), default background = -1
+        torch.Tensor: Return the box indices of points with the shape of
+        (B, M). Default background = -1.
    """
    assert points.shape[0] == boxes.shape[0], \
        'Points and boxes should have the same batch size, ' \
@@ -55,7 +57,7 @@ def points_in_boxes_part(points, boxes):
    return box_idxs_of_pts


-def points_in_boxes_cpu(points, boxes):
+def points_in_boxes_cpu(points: Tensor, boxes: Tensor) -> Tensor:
    """Find all boxes in which each point is (CPU). The CPU version of
    :meth:`points_in_boxes_all`.

@@ -67,7 +69,8 @@ def points_in_boxes_cpu(points, boxes):
            (x, y, z) is the bottom center.

    Returns:
-        box_idxs_of_pts (torch.Tensor): (B, M, T), default background = 0.
+        torch.Tensor: Return the box indices of points with the shape of
+        (B, M, T). Default background = 0.
    """
    assert points.shape[0] == boxes.shape[0], \
        'Points and boxes should have the same batch size, ' \
@@ -92,7 +95,7 @@ def points_in_boxes_cpu(points, boxes):
    return point_indices


-def points_in_boxes_all(points, boxes):
+def points_in_boxes_all(points: Tensor, boxes: Tensor) -> Tensor:
    """Find all boxes in which each point is (CUDA).

    Args:
@@ -102,7 +105,8 @@ def points_in_boxes_all(points, boxes):
            (x, y, z) is the bottom center.

    Returns:
-        box_idxs_of_pts (torch.Tensor): (B, M, T), default background = 0.
+        torch.Tensor: Return the box indices of points with the shape of
+        (B, M, T). Default background = 0.
    """
    assert boxes.shape[0] == points.shape[0], \
        'Points and boxes should have the same batch size, ' \

--- a/mmcv/ops/points_in_polygons.py
+++ b/mmcv/ops/points_in_polygons.py
+import torch
+from torch import Tensor
+
+from ..utils import ext_loader
+
+ext_module = ext_loader.load_ext('_ext', ['points_in_polygons_forward'])
+
+
+def points_in_polygons(points: Tensor, polygons: Tensor) -> Tensor:
+    """Judging whether points are inside polygons, which is used in the ATSS
+    assignment for the rotated boxes.
+
+    It should be noted that when the point is just at the polygon boundary, the
+    judgment will be inaccurate, but the effect on assignment is limited.
+
+    Args:
+        points (torch.Tensor): It has shape (B, 2), indicating (x, y).
+            M means the number of predicted points.
+        polygons (torch.Tensor): It has shape (M, 8), indicating
+            (x1, y1, x2, y2, x3, y3, x4, y4). M means the number of
+            ground truth polygons.
+
+    Returns:
+        torch.Tensor: Return the result with the shape of (B, M),
+        1 indicates that the point is inside the polygon,
+        0 indicates that the point is outside the polygon.
+    """
+    assert points.shape[1] == 2, \
+        'points dimension should be 2, ' \
+        f'but got unexpected shape {points.shape[1]}'
+    assert polygons.shape[1] == 8, \
+        'polygons dimension should be 8, ' \
+        f'but got unexpected shape {polygons.shape[1]}'
+    output = torch.full([points.shape[0], polygons.shape[0]],
+                        0.).cuda().float()
+    ext_module.points_in_polygons_forward(points.contiguous(),
+                                          polygons.contiguous(), output)
+    return output
--- a/mmcv/ops/points_sampler.py
+++ b/mmcv/ops/points_sampler.py
 from typing import List

 import torch
+from torch import Tensor
 from torch import nn as nn

 from mmcv.runner import force_fp32
@@ -8,17 +9,19 @@ from .furthest_point_sample import (furthest_point_sample,
                                    furthest_point_sample_with_dist)


-def calc_square_dist(point_feat_a, point_feat_b, norm=True):
+def calc_square_dist(point_feat_a: Tensor,
+                     point_feat_b: Tensor,
+                     norm: bool = True) -> Tensor:
    """Calculating square distance between a and b.

    Args:
-        point_feat_a (Tensor): (B, N, C) Feature vector of each point.
-        point_feat_b (Tensor): (B, M, C) Feature vector of each point.
-        norm (Bool, optional): Whether to normalize the distance.
+        point_feat_a (torch.Tensor): (B, N, C) Feature vector of each point.
+        point_feat_b (torch.Tensor): (B, M, C) Feature vector of each point.
+        norm (bool, optional): Whether to normalize the distance.
            Default: True.

    Returns:
-        Tensor: (B, N, M) Distance between each pair points.
+        torch.Tensor: (B, N, M) Square distance between each point pair.
    """
    num_channel = point_feat_a.shape[-1]
    # [bs, n, 1]
@@ -34,7 +37,7 @@ def calc_square_dist(point_feat_a, point_feat_b, norm=True):
    return dist


-def get_sampler_cls(sampler_type):
+def get_sampler_cls(sampler_type: str) -> nn.Module:
    """Get the type and mode of points sampler.

    Args:
@@ -74,7 +77,7 @@ class PointsSampler(nn.Module):
    def __init__(self,
                 num_point: List[int],
                 fps_mod_list: List[str] = ['D-FPS'],
-                 fps_sample_range_list: List[int] = [-1]):
+                 fps_sample_range_list: List[int] = [-1]) -> None:
        super().__init__()
        # FPS would be applied to different fps_mod in the list,
        # so the length of the num_point should be equal to
@@ -89,18 +92,18 @@ class PointsSampler(nn.Module):
        self.fp16_enabled = False

    @force_fp32()
-    def forward(self, points_xyz, features):
+    def forward(self, points_xyz: Tensor, features: Tensor) -> Tensor:
        """
        Args:
-            points_xyz (Tensor): (B, N, 3) xyz coordinates of the features.
-            features (Tensor): (B, C, N) Descriptors of the features.
+            points_xyz (torch.Tensor): (B, N, 3) xyz coordinates of
+                the points.
+            features (torch.Tensor): (B, C, N) features of the points.

        Returns:
-            Tensor: (B, npoint, sample_num) Indices of sampled points.
+            torch.Tensor: (B, npoint, sample_num) Indices of sampled points.
        """
        indices = []
        last_fps_end_index = 0
-
        for fps_sample_range, sampler, npoint in zip(
                self.fps_sample_range_list, self.samplers, self.num_point):
            assert fps_sample_range < points_xyz.shape[1]
@@ -112,8 +115,8 @@ class PointsSampler(nn.Module):
                else:
                    sample_features = None
            else:
-                sample_points_xyz = \
-                    points_xyz[:, last_fps_end_index:fps_sample_range]
+                sample_points_xyz = points_xyz[:, last_fps_end_index:
+                                               fps_sample_range]
                if features is not None:
                    sample_features = features[:, :, last_fps_end_index:
                                               fps_sample_range]
@@ -124,7 +127,7 @@ class PointsSampler(nn.Module):
                              npoint)

            indices.append(fps_idx + last_fps_end_index)
-            last_fps_end_index += fps_sample_range
+            last_fps_end_index = fps_sample_range
        indices = torch.cat(indices, dim=1)

        return indices
@@ -133,10 +136,10 @@ class PointsSampler(nn.Module):
 class DFPSSampler(nn.Module):
    """Using Euclidean distances of points for FPS."""

-    def __init__(self):
+    def __init__(self) -> None:
        super().__init__()

-    def forward(self, points, features, npoint):
+    def forward(self, points: Tensor, features: Tensor, npoint: int) -> Tensor:
        """Sampling points with D-FPS."""
        fps_idx = furthest_point_sample(points.contiguous(), npoint)
        return fps_idx
@@ -145,10 +148,10 @@ class DFPSSampler(nn.Module):
 class FFPSSampler(nn.Module):
    """Using feature distances for FPS."""

-    def __init__(self):
+    def __init__(self) -> None:
        super().__init__()

-    def forward(self, points, features, npoint):
+    def forward(self, points: Tensor, features: Tensor, npoint: int) -> Tensor:
        """Sampling points with F-FPS."""
        assert features is not None, \
            'feature input to FFPS_Sampler should not be None'
@@ -162,10 +165,10 @@ class FFPSSampler(nn.Module):
 class FSSampler(nn.Module):
    """Using F-FPS and D-FPS simultaneously."""

-    def __init__(self):
+    def __init__(self) -> None:
        super().__init__()

-    def forward(self, points, features, npoint):
+    def forward(self, points: Tensor, features: Tensor, npoint: int) -> Tensor:
        """Sampling points with FS_Sampling."""
        assert features is not None, \
            'feature input to FS_Sampler should not be None'

--- a/mmcv/ops/prroi_pool.py
+++ b/mmcv/ops/prroi_pool.py
+# Copyright (c) OpenMMLab. All rights reserved.
+from typing import Tuple, Union
+
+import torch
+import torch.nn as nn
+from torch.autograd import Function
+from torch.autograd.function import once_differentiable
+from torch.nn.modules.utils import _pair
+
+from ..utils import ext_loader
+
+ext_module = ext_loader.load_ext(
+    '_ext',
+    ['prroi_pool_forward', 'prroi_pool_backward', 'prroi_pool_coor_backward'])
+
+
+class PrRoIPoolFunction(Function):
+
+    @staticmethod
+    def symbolic(g, features, rois, output_size, spatial_scale):
+        return g.op(
+            'mmcv::PrRoIPool',
+            features,
+            rois,
+            pooled_height_i=int(output_size[0]),
+            pooled_width_i=int(output_size[1]),
+            spatial_scale_f=float(spatial_scale))
+
+    @staticmethod
+    def forward(ctx,
+                features: torch.Tensor,
+                rois: torch.Tensor,
+                output_size: Tuple,
+                spatial_scale: float = 1.0) -> torch.Tensor:
+        if 'FloatTensor' not in features.type(
+        ) or 'FloatTensor' not in rois.type():
+            raise ValueError(
+                'Precise RoI Pooling only takes float input, got '
+                f'{features.type()} for features and {rois.type()} for rois.')
+
+        pooled_height = int(output_size[0])
+        pooled_width = int(output_size[1])
+        spatial_scale = float(spatial_scale)
+
+        features = features.contiguous()
+        rois = rois.contiguous()
+        output_shape = (rois.size(0), features.size(1), pooled_height,
+                        pooled_width)
+        output = features.new_zeros(output_shape)
+        params = (pooled_height, pooled_width, spatial_scale)
+
+        ext_module.prroi_pool_forward(features, rois, output, *params)
+        ctx.params = params
+        # everything here is contiguous.
+        ctx.save_for_backward(features, rois, output)
+
+        return output
+
+    @staticmethod
+    @once_differentiable
+    def backward(
+        ctx, grad_output: torch.Tensor
+    ) -> Tuple[torch.Tensor, torch.Tensor, None, None, None]:
+        features, rois, output = ctx.saved_tensors
+        grad_input = grad_output.new_zeros(*features.shape)
+        grad_coor = grad_output.new_zeros(*rois.shape)
+
+        if features.requires_grad:
+            grad_output = grad_output.contiguous()
+            ext_module.prroi_pool_backward(grad_output, rois, grad_input,
+                                           *ctx.params)
+        if rois.requires_grad:
+            grad_output = grad_output.contiguous()
+            ext_module.prroi_pool_coor_backward(output, grad_output, features,
+                                                rois, grad_coor, *ctx.params)
+
+        return grad_input, grad_coor, None, None, None
+
+
+prroi_pool = PrRoIPoolFunction.apply
+
+
+class PrRoIPool(nn.Module):
+    """The operation of precision RoI pooling. The implementation of PrRoIPool
+    is modified from https://github.com/vacancy/PreciseRoIPooling/
+
+    Precise RoI Pooling (PrRoIPool) is an integration-based (bilinear
+    interpolation) average pooling method for RoI Pooling. It avoids any
+    quantization and has a continuous gradient on bounding box coordinates.
+    It is:
+
+    1. different from the original RoI Pooling proposed in Fast R-CNN. PrRoI
+    Pooling uses average pooling instead of max pooling for each bin and has a
+    continuous gradient on bounding box coordinates. That is, one can take the
+    derivatives of some loss function w.r.t the coordinates of each RoI and
+    optimize the RoI coordinates.
+    2. different from the RoI Align proposed in Mask R-CNN. PrRoI Pooling uses
+    a full integration-based average pooling instead of sampling a constant
+    number of points. This makes the gradient w.r.t. the coordinates
+    continuous.
+
+    Args:
+        output_size (Union[int, tuple]): h, w.
+        spatial_scale (float, optional): scale the input boxes by this number.
+            Defaults to 1.0.
+    """
+
+    def __init__(self,
+                 output_size: Union[int, tuple],
+                 spatial_scale: float = 1.0):
+        super().__init__()
+
+        self.output_size = _pair(output_size)
+        self.spatial_scale = float(spatial_scale)
+
+    def forward(self, features: torch.Tensor,
+                rois: torch.Tensor) -> torch.Tensor:
+        """Forward function.
+
+        Args:
+            features (torch.Tensor): The feature map.
+            rois (torch.Tensor): The RoI bboxes in [tl_x, tl_y, br_x, br_y]
+                format.
+
+        Returns:
+            torch.Tensor: The pooled results.
+        """
+        return prroi_pool(features, rois, self.output_size, self.spatial_scale)
+
+    def __repr__(self):
+        s = self.__class__.__name__
+        s += f'(output_size={self.output_size}, '
+        s += f'spatial_scale={self.spatial_scale})'
+        return s
--- a/mmcv/ops/psa_mask.py
+++ b/mmcv/ops/psa_mask.py
 # Modified from https://github.com/hszhao/semseg/blob/master/lib/psa
+from typing import Optional, Tuple
+
+import torch
 from torch import nn
 from torch.autograd import Function
 from torch.nn.modules.utils import _pair
@@ -20,7 +23,8 @@ class PSAMaskFunction(Function):
            mask_size_i=mask_size)

    @staticmethod
-    def forward(ctx, input, psa_type, mask_size):
+    def forward(ctx, input: torch.Tensor, psa_type: str,
+                mask_size: int) -> torch.Tensor:
        ctx.psa_type = psa_type
        ctx.mask_size = _pair(mask_size)
        ctx.save_for_backward(input)
@@ -45,7 +49,9 @@ class PSAMaskFunction(Function):
        return output

    @staticmethod
-    def backward(ctx, grad_output):
+    def backward(
+            ctx, grad_output: torch.Tensor
+    ) -> Tuple[torch.Tensor, None, None, None]:
        input = ctx.saved_tensors[0]
        psa_type = ctx.psa_type
        h_mask, w_mask = ctx.mask_size
@@ -71,8 +77,8 @@ psa_mask = PSAMaskFunction.apply

 class PSAMask(nn.Module):

-    def __init__(self, psa_type, mask_size=None):
-        super(PSAMask, self).__init__()
+    def __init__(self, psa_type: str, mask_size: Optional[tuple] = None):
+        super().__init__()
        assert psa_type in ['collect', 'distribute']
        if psa_type == 'collect':
            psa_type_enum = 0
@@ -82,7 +88,7 @@ class PSAMask(nn.Module):
        self.mask_size = mask_size
        self.psa_type = psa_type

-    def forward(self, input):
+    def forward(self, input: torch.Tensor) -> torch.Tensor:
        return psa_mask(input, self.psa_type_enum, self.mask_size)

    def __repr__(self):

--- a/mmcv/ops/riroi_align_rotated.py
+++ b/mmcv/ops/riroi_align_rotated.py
+# Copyright (c) OpenMMLab. All rights reserved.
+from typing import Any, Optional, Tuple, Union
+
+import torch
+import torch.nn as nn
+from torch.autograd import Function
+
+from ..utils import ext_loader, is_tuple_of
+
+ext_module = ext_loader.load_ext(
+    '_ext', ['riroi_align_rotated_forward', 'riroi_align_rotated_backward'])
+
+
+class RiRoIAlignRotatedFunction(Function):
+
+    @staticmethod
+    def forward(ctx: Any,
+                features: torch.Tensor,
+                rois: torch.Tensor,
+                out_size: Union[int, tuple],
+                spatial_scale: float,
+                num_samples: int = 0,
+                num_orientations: int = 8,
+                clockwise: bool = False) -> torch.Tensor:
+        if isinstance(out_size, int):
+            out_h = out_size
+            out_w = out_size
+        elif is_tuple_of(out_size, int):
+            assert len(out_size) == 2
+            out_h, out_w = out_size
+        else:
+            raise TypeError(
+                f'"out_size" should be an integer or tuple of integers,'
+                f' but got {out_size}')
+        ctx.spatial_scale = spatial_scale
+        ctx.num_samples = num_samples
+        ctx.num_orientations = num_orientations
+        ctx.clockwise = clockwise
+        ctx.save_for_backward(rois)
+        ctx.feature_size = features.size()
+
+        batch_size, num_channels, _, _ = features.size()
+        num_rois = rois.size(0)
+
+        output = features.new_zeros(num_rois, num_channels, out_h, out_w)
+
+        ext_module.riroi_align_rotated_forward(
+            features,
+            rois,
+            output,
+            pooled_height=out_h,
+            pooled_width=out_w,
+            spatial_scale=spatial_scale,
+            num_samples=num_samples,
+            num_orientations=num_orientations,
+            clockwise=clockwise)
+        return output
+
+    @staticmethod
+    def backward(
+        ctx: Any, grad_output: torch.Tensor
+    ) -> Optional[Tuple[torch.Tensor, None, None, None, None, None, None]]:
+        feature_size = ctx.feature_size
+        spatial_scale = ctx.spatial_scale
+        num_orientations = ctx.num_orientations
+        clockwise = ctx.clockwise
+        num_samples = ctx.num_samples
+        rois = ctx.saved_tensors[0]
+        assert feature_size is not None
+        batch_size, num_channels, feature_h, feature_w = feature_size
+
+        out_w = grad_output.size(3)
+        out_h = grad_output.size(2)
+
+        grad_input = None
+
+        if ctx.needs_input_grad[0]:
+            grad_input = rois.new_zeros(batch_size, num_channels, feature_h,
+                                        feature_w)
+            ext_module.riroi_align_rotated_backward(
+                grad_output.contiguous(),
+                rois,
+                grad_input,
+                pooled_height=out_h,
+                pooled_width=out_w,
+                spatial_scale=spatial_scale,
+                num_samples=num_samples,
+                num_orientations=num_orientations,
+                clockwise=clockwise)
+
+            return grad_input, None, None, None, None, None, None
+        return None
+
+
+riroi_align_rotated = RiRoIAlignRotatedFunction.apply
+
+
+class RiRoIAlignRotated(nn.Module):
+    """Rotation-invariant RoI align pooling layer for rotated proposals.
+
+    It accepts a feature map of shape (N, C, H, W) and rois with shape
+    (n, 6) with each roi decoded as (batch_index, center_x, center_y,
+    w, h, angle). The angle is in radian.
+
+    The details are described in the paper `ReDet: A Rotation-equivariant
+    Detector for Aerial Object Detection  <https://arxiv.org/abs/2103.07733>`_.
+
+    Args:
+        out_size (tuple): fixed dimensional RoI output with shape (h, w).
+        spatial_scale (float): scale the input boxes by this number
+        num_samples (int): number of inputs samples to take for each
+            output sample. 0 to take samples densely for current models.
+        num_orientations (int): number of oriented channels.
+        clockwise (bool): If True, the angle in each proposal follows a
+            clockwise fashion in image space, otherwise, the angle is
+            counterclockwise. Default: False.
+    """
+
+    def __init__(self,
+                 out_size: tuple,
+                 spatial_scale: float,
+                 num_samples: int = 0,
+                 num_orientations: int = 8,
+                 clockwise: bool = False):
+        super().__init__()
+
+        self.out_size = out_size
+        self.spatial_scale = float(spatial_scale)
+        self.num_samples = int(num_samples)
+        self.num_orientations = int(num_orientations)
+        self.clockwise = clockwise
+
+    def forward(self, features: torch.Tensor,
+                rois: torch.Tensor) -> torch.Tensor:
+        return RiRoIAlignRotatedFunction.apply(features, rois, self.out_size,
+                                               self.spatial_scale,
+                                               self.num_samples,
+                                               self.num_orientations,
+                                               self.clockwise)