Commit fdeee889 authored by limm's avatar limm
Browse files

release v1.6.1 of mmcv

parent df465820
...@@ -18,11 +18,11 @@ class FurthestPointSampling(Function): ...@@ -18,11 +18,11 @@ class FurthestPointSampling(Function):
num_points: int) -> torch.Tensor: num_points: int) -> torch.Tensor:
""" """
Args: Args:
points_xyz (Tensor): (B, N, 3) where N > num_points. points_xyz (torch.Tensor): (B, N, 3) where N > num_points.
num_points (int): Number of points in the sampled set. num_points (int): Number of points in the sampled set.
Returns: Returns:
Tensor: (B, num_points) indices of the sampled points. torch.Tensor: (B, num_points) indices of the sampled points.
""" """
assert points_xyz.is_contiguous() assert points_xyz.is_contiguous()
...@@ -56,11 +56,12 @@ class FurthestPointSamplingWithDist(Function): ...@@ -56,11 +56,12 @@ class FurthestPointSamplingWithDist(Function):
num_points: int) -> torch.Tensor: num_points: int) -> torch.Tensor:
""" """
Args: Args:
points_dist (Tensor): (B, N, N) Distance between each point pair. points_dist (torch.Tensor): (B, N, N) Distance between each point
pair.
num_points (int): Number of points in the sampled set. num_points (int): Number of points in the sampled set.
Returns: Returns:
Tensor: (B, num_points) indices of the sampled points. torch.Tensor: (B, num_points) indices of the sampled points.
""" """
assert points_dist.is_contiguous() assert points_dist.is_contiguous()
......
...@@ -113,7 +113,8 @@ class FusedBiasLeakyReLUFunctionBackward(Function): ...@@ -113,7 +113,8 @@ class FusedBiasLeakyReLUFunctionBackward(Function):
""" """
@staticmethod @staticmethod
def forward(ctx, grad_output, out, negative_slope, scale): def forward(ctx, grad_output: torch.Tensor, out: torch.Tensor,
negative_slope: float, scale: float) -> tuple:
ctx.save_for_backward(out) ctx.save_for_backward(out)
ctx.negative_slope = negative_slope ctx.negative_slope = negative_slope
ctx.scale = scale ctx.scale = scale
...@@ -139,7 +140,8 @@ class FusedBiasLeakyReLUFunctionBackward(Function): ...@@ -139,7 +140,8 @@ class FusedBiasLeakyReLUFunctionBackward(Function):
return grad_input, grad_bias return grad_input, grad_bias
@staticmethod @staticmethod
def backward(ctx, gradgrad_input, gradgrad_bias): def backward(ctx, gradgrad_input: torch.Tensor,
gradgrad_bias: nn.Parameter) -> tuple:
out, = ctx.saved_tensors out, = ctx.saved_tensors
# The second order deviation, in fact, contains two parts, while the # The second order deviation, in fact, contains two parts, while the
...@@ -160,7 +162,8 @@ class FusedBiasLeakyReLUFunctionBackward(Function): ...@@ -160,7 +162,8 @@ class FusedBiasLeakyReLUFunctionBackward(Function):
class FusedBiasLeakyReLUFunction(Function): class FusedBiasLeakyReLUFunction(Function):
@staticmethod @staticmethod
def forward(ctx, input, bias, negative_slope, scale): def forward(ctx, input: torch.Tensor, bias: nn.Parameter,
negative_slope: float, scale: float) -> torch.Tensor:
empty = input.new_empty(0) empty = input.new_empty(0)
out = ext_module.fused_bias_leakyrelu( out = ext_module.fused_bias_leakyrelu(
...@@ -178,7 +181,7 @@ class FusedBiasLeakyReLUFunction(Function): ...@@ -178,7 +181,7 @@ class FusedBiasLeakyReLUFunction(Function):
return out return out
@staticmethod @staticmethod
def backward(ctx, grad_output): def backward(ctx, grad_output: torch.Tensor) -> tuple:
out, = ctx.saved_tensors out, = ctx.saved_tensors
grad_input, grad_bias = FusedBiasLeakyReLUFunctionBackward.apply( grad_input, grad_bias = FusedBiasLeakyReLUFunctionBackward.apply(
...@@ -188,51 +191,59 @@ class FusedBiasLeakyReLUFunction(Function): ...@@ -188,51 +191,59 @@ class FusedBiasLeakyReLUFunction(Function):
class FusedBiasLeakyReLU(nn.Module): class FusedBiasLeakyReLU(nn.Module):
"""Fused bias leaky ReLU. r"""Fused bias leaky ReLU.
This function is introduced in the StyleGAN2: This function is introduced in the StyleGAN2:
http://arxiv.org/abs/1912.04958 `Analyzing and Improving the Image Quality of StyleGAN
<http://arxiv.org/abs/1912.04958>`_
The bias term comes from the convolution operation. In addition, to keep The bias term comes from the convolution operation. In addition, to keep
the variance of the feature map or gradients unchanged, they also adopt a the variance of the feature map or gradients unchanged, they also adopt a
scale similarly with Kaiming initialization. However, since the scale similarly with Kaiming initialization. However, since the
:math:`1+{alpha}^2` : is too small, we can just ignore it. Therefore, the :math:`1+{alpha}^2` is too small, we can just ignore it. Therefore, the
final scale is just :math:`\sqrt{2}`:. Of course, you may change it with # noqa: W605, E501 final scale is just :math:`\sqrt{2}`. Of course, you may change it with
your own scale. your own scale.
TODO: Implement the CPU version. TODO: Implement the CPU version.
Args: Args:
channel (int): The channel number of the feature map. num_channels (int): The channel number of the feature map.
negative_slope (float, optional): Same as nn.LeakyRelu. negative_slope (float, optional): Same as nn.LeakyRelu.
Defaults to 0.2. Defaults to 0.2.
scale (float, optional): A scalar to adjust the variance of the feature scale (float, optional): A scalar to adjust the variance of the feature
map. Defaults to 2**0.5. map. Defaults to 2**0.5.
""" """
def __init__(self, num_channels, negative_slope=0.2, scale=2**0.5): def __init__(self,
super(FusedBiasLeakyReLU, self).__init__() num_channels: int,
negative_slope: float = 0.2,
scale: float = 2**0.5):
super().__init__()
self.bias = nn.Parameter(torch.zeros(num_channels)) self.bias = nn.Parameter(torch.zeros(num_channels))
self.negative_slope = negative_slope self.negative_slope = negative_slope
self.scale = scale self.scale = scale
def forward(self, input): def forward(self, input: torch.Tensor) -> torch.Tensor:
return fused_bias_leakyrelu(input, self.bias, self.negative_slope, return fused_bias_leakyrelu(input, self.bias, self.negative_slope,
self.scale) self.scale)
def fused_bias_leakyrelu(input, bias, negative_slope=0.2, scale=2**0.5): def fused_bias_leakyrelu(input: torch.Tensor,
"""Fused bias leaky ReLU function. bias: nn.Parameter,
negative_slope: float = 0.2,
scale: float = 2**0.5) -> torch.Tensor:
r"""Fused bias leaky ReLU function.
This function is introduced in the StyleGAN2: This function is introduced in the StyleGAN2:
http://arxiv.org/abs/1912.04958 `Analyzing and Improving the Image Quality of StyleGAN
<http://arxiv.org/abs/1912.04958>`_
The bias term comes from the convolution operation. In addition, to keep The bias term comes from the convolution operation. In addition, to keep
the variance of the feature map or gradients unchanged, they also adopt a the variance of the feature map or gradients unchanged, they also adopt a
scale similarly with Kaiming initialization. However, since the scale similarly with Kaiming initialization. However, since the
:math:`1+{alpha}^2` : is too small, we can just ignore it. Therefore, the :math:`1+{alpha}^2` is too small, we can just ignore it. Therefore, the
final scale is just :math:`\sqrt{2}`:. Of course, you may change it with # noqa: W605, E501 final scale is just :math:`\sqrt{2}`. Of course, you may change it with
your own scale. your own scale.
Args: Args:
...@@ -254,7 +265,10 @@ def fused_bias_leakyrelu(input, bias, negative_slope=0.2, scale=2**0.5): ...@@ -254,7 +265,10 @@ def fused_bias_leakyrelu(input, bias, negative_slope=0.2, scale=2**0.5):
negative_slope, scale) negative_slope, scale)
def bias_leakyrelu_ref(x, bias, negative_slope=0.2, scale=2**0.5): def bias_leakyrelu_ref(x: torch.Tensor,
bias: nn.Parameter,
negative_slope: float = 0.2,
scale: float = 2**0.5) -> torch.Tensor:
if bias is not None: if bias is not None:
assert bias.ndim == 1 assert bias.ndim == 1
......
from typing import Tuple
import torch import torch
from torch.autograd import Function from torch.autograd import Function
...@@ -15,18 +17,18 @@ class GatherPoints(Function): ...@@ -15,18 +17,18 @@ class GatherPoints(Function):
indices: torch.Tensor) -> torch.Tensor: indices: torch.Tensor) -> torch.Tensor:
""" """
Args: Args:
features (Tensor): (B, C, N) features to gather. features (torch.Tensor): (B, C, N) features to gather.
indices (Tensor): (B, M) where M is the number of points. indices (torch.Tensor): (B, M) where M is the number of points.
Returns: Returns:
Tensor: (B, C, M) where M is the number of points. torch.Tensor: (B, C, M) where M is the number of points.
""" """
assert features.is_contiguous() assert features.is_contiguous()
assert indices.is_contiguous() assert indices.is_contiguous()
B, npoint = indices.size() B, npoint = indices.size()
_, C, N = features.size() _, C, N = features.size()
output = torch.cuda.FloatTensor(B, C, npoint) output = features.new_zeros((B, C, npoint))
ext_module.gather_points_forward( ext_module.gather_points_forward(
features, indices, output, b=B, c=C, n=N, npoints=npoint) features, indices, output, b=B, c=C, n=N, npoints=npoint)
...@@ -37,11 +39,11 @@ class GatherPoints(Function): ...@@ -37,11 +39,11 @@ class GatherPoints(Function):
return output return output
@staticmethod @staticmethod
def backward(ctx, grad_out): def backward(ctx, grad_out: torch.Tensor) -> Tuple[torch.Tensor, None]:
idx, C, N = ctx.for_backwards idx, C, N = ctx.for_backwards
B, npoint = idx.size() B, npoint = idx.size()
grad_features = torch.cuda.FloatTensor(B, C, N).zero_() grad_features = grad_out.new_zeros((B, C, N))
grad_out_data = grad_out.data.contiguous() grad_out_data = grad_out.data.contiguous()
ext_module.gather_points_backward( ext_module.gather_points_backward(
grad_out_data, grad_out_data,
......
# Copyright (c) OpenMMLab. All rights reserved. # Copyright (c) OpenMMLab. All rights reserved.
from typing import Tuple from typing import Optional, Tuple, Union
import torch import torch
from torch import nn as nn from torch import nn as nn
...@@ -37,15 +37,15 @@ class QueryAndGroup(nn.Module): ...@@ -37,15 +37,15 @@ class QueryAndGroup(nn.Module):
""" """
def __init__(self, def __init__(self,
max_radius, max_radius: float,
sample_num, sample_num: int,
min_radius=0, min_radius: float = 0.,
use_xyz=True, use_xyz: bool = True,
return_grouped_xyz=False, return_grouped_xyz: bool = False,
normalize_xyz=False, normalize_xyz: bool = False,
uniform_sample=False, uniform_sample: bool = False,
return_unique_cnt=False, return_unique_cnt: bool = False,
return_grouped_idx=False): return_grouped_idx: bool = False):
super().__init__() super().__init__()
self.max_radius = max_radius self.max_radius = max_radius
self.min_radius = min_radius self.min_radius = min_radius
...@@ -64,15 +64,24 @@ class QueryAndGroup(nn.Module): ...@@ -64,15 +64,24 @@ class QueryAndGroup(nn.Module):
assert not self.normalize_xyz, \ assert not self.normalize_xyz, \
'can not normalize grouped xyz when max_radius is None' 'can not normalize grouped xyz when max_radius is None'
def forward(self, points_xyz, center_xyz, features=None): def forward(
self,
points_xyz: torch.Tensor,
center_xyz: torch.Tensor,
features: Optional[torch.Tensor] = None,
) -> Union[torch.Tensor, Tuple]:
""" """
Args: Args:
points_xyz (Tensor): (B, N, 3) xyz coordinates of the features. points_xyz (torch.Tensor): (B, N, 3) xyz coordinates of the
center_xyz (Tensor): (B, npoint, 3) coordinates of the centriods. points.
features (Tensor): (B, C, N) Descriptors of the features. center_xyz (torch.Tensor): (B, npoint, 3) coordinates of the
centriods.
features (torch.Tensor): (B, C, N) The features of grouped
points.
Returns: Returns:
Tensor: (B, 3 + C, npoint, sample_num) Grouped feature. Tuple | torch.Tensor: (B, 3 + C, npoint, sample_num) Grouped
concatenated coordinates and features of points.
""" """
# if self.max_radius is None, we will perform kNN instead of ball query # if self.max_radius is None, we will perform kNN instead of ball query
# idx is of shape [B, npoint, sample_num] # idx is of shape [B, npoint, sample_num]
...@@ -145,7 +154,7 @@ class GroupAll(nn.Module): ...@@ -145,7 +154,7 @@ class GroupAll(nn.Module):
def forward(self, def forward(self,
xyz: torch.Tensor, xyz: torch.Tensor,
new_xyz: torch.Tensor, new_xyz: torch.Tensor,
features: torch.Tensor = None): features: Optional[torch.Tensor] = None) -> torch.Tensor:
""" """
Args: Args:
xyz (Tensor): (B, N, 3) xyz coordinates of the features. xyz (Tensor): (B, N, 3) xyz coordinates of the features.
...@@ -206,8 +215,7 @@ class GroupingOperation(Function): ...@@ -206,8 +215,7 @@ class GroupingOperation(Function):
return output return output
@staticmethod @staticmethod
def backward(ctx, def backward(ctx, grad_out: torch.Tensor) -> Tuple[torch.Tensor, None]:
grad_out: torch.Tensor) -> Tuple[torch.Tensor, torch.Tensor]:
""" """
Args: Args:
grad_out (Tensor): (B, C, npoint, nsample) tensor of the gradients grad_out (Tensor): (B, C, npoint, nsample) tensor of the gradients
......
# Copyright (c) OpenMMLab. All rights reserved. # Copyright (c) OpenMMLab. All rights reserved.
import warnings
from typing import Optional
import torch import torch
from torch import Tensor
from ..utils import ext_loader from ..utils import ext_loader
ext_module = ext_loader.load_ext('_ext', [ ext_module = ext_loader.load_ext('_ext', [
'iou3d_boxes_iou_bev_forward', 'iou3d_nms_forward', 'iou3d_boxes_overlap_bev_forward', 'iou3d_nms3d_forward',
'iou3d_nms_normal_forward' 'iou3d_nms3d_normal_forward'
]) ])
def boxes_iou_bev(boxes_a, boxes_b): def boxes_overlap_bev(boxes_a: Tensor, boxes_b: Tensor) -> Tensor:
"""Calculate boxes IoU in the Bird's Eye View. """Calculate boxes BEV overlap.
Args:
boxes_a (torch.Tensor): Input boxes a with shape (M, 7).
boxes_b (torch.Tensor): Input boxes b with shape (N, 7).
Returns:
torch.Tensor: BEV overlap result with shape (M, N).
"""
ans_overlap = boxes_a.new_zeros(
torch.Size((boxes_a.shape[0], boxes_b.shape[0])))
ext_module.iou3d_boxes_overlap_bev_forward(boxes_a.contiguous(),
boxes_b.contiguous(),
ans_overlap)
return ans_overlap
def boxes_iou3d(boxes_a: Tensor, boxes_b: Tensor) -> Tensor:
"""Calculate boxes 3D IoU.
Args: Args:
boxes_a (torch.Tensor): Input boxes a with shape (M, 5). boxes_a (torch.Tensor): Input boxes a with shape (M, 7).
boxes_b (torch.Tensor): Input boxes b with shape (N, 5). boxes_b (torch.Tensor): Input boxes b with shape (N, 7).
Returns: Returns:
ans_iou (torch.Tensor): IoU result with shape (M, N). torch.Tensor: 3D IoU result with shape (M, N).
""" """
ans_iou = boxes_a.new_zeros( assert boxes_a.shape[1] == boxes_b.shape[1] == 7,\
'Input boxes shape should be (N, 7)'
boxes_a_height_max = (boxes_a[:, 2] + boxes_a[:, 5] / 2).view(-1, 1)
boxes_a_height_min = (boxes_a[:, 2] - boxes_a[:, 5] / 2).view(-1, 1)
boxes_b_height_max = (boxes_b[:, 2] + boxes_b[:, 5] / 2).view(1, -1)
boxes_b_height_min = (boxes_b[:, 2] - boxes_b[:, 5] / 2).view(1, -1)
overlaps_bev = boxes_a.new_zeros(
torch.Size((boxes_a.shape[0], boxes_b.shape[0]))) torch.Size((boxes_a.shape[0], boxes_b.shape[0])))
ext_module.iou3d_boxes_overlap_bev_forward(boxes_a.contiguous(),
boxes_b.contiguous(),
overlaps_bev)
max_of_min = torch.max(boxes_a_height_min, boxes_b_height_min)
min_of_max = torch.min(boxes_a_height_max, boxes_b_height_max)
overlaps_h = torch.clamp(min_of_max - max_of_min, min=0)
overlaps_3d = overlaps_bev * overlaps_h
vol_a = (boxes_a[:, 3] * boxes_a[:, 4] * boxes_a[:, 5]).view(-1, 1)
vol_b = (boxes_b[:, 3] * boxes_b[:, 4] * boxes_b[:, 5]).view(1, -1)
iou3d = overlaps_3d / torch.clamp(vol_a + vol_b - overlaps_3d, min=1e-6)
return iou3d
def nms3d(boxes: Tensor, scores: Tensor, iou_threshold: float) -> Tensor:
"""3D NMS function GPU implementation (for BEV boxes).
Args:
boxes (torch.Tensor): Input boxes with the shape of (N, 7)
([x, y, z, dx, dy, dz, heading]).
scores (torch.Tensor): Scores of boxes with the shape of (N).
iou_threshold (float): Overlap threshold of NMS.
Returns:
torch.Tensor: Indexes after NMS.
"""
assert boxes.size(1) == 7, 'Input boxes shape should be (N, 7)'
order = scores.sort(0, descending=True)[1]
boxes = boxes[order].contiguous()
keep = torch.zeros(boxes.size(0), dtype=torch.long)
num_out = torch.zeros(size=(), dtype=torch.long)
ext_module.iou3d_nms3d_forward(
boxes, keep, num_out, nms_overlap_thresh=iou_threshold)
keep = order[keep[:num_out].cuda(boxes.device)].contiguous()
return keep
def nms3d_normal(boxes: Tensor, scores: Tensor,
iou_threshold: float) -> Tensor:
"""Normal 3D NMS function GPU implementation. The overlap of two boxes for
IoU calculation is defined as the exact overlapping area of the two boxes
WITH their yaw angle set to 0.
Args:
boxes (torch.Tensor): Input boxes with shape (N, 7).
([x, y, z, dx, dy, dz, heading]).
scores (torch.Tensor): Scores of predicted boxes with shape (N).
iou_threshold (float): Overlap threshold of NMS.
Returns:
torch.Tensor: Remaining indices with scores in descending order.
"""
assert boxes.shape[1] == 7, 'Input boxes shape should be (N, 7)'
order = scores.sort(0, descending=True)[1]
boxes = boxes[order].contiguous()
ext_module.iou3d_boxes_iou_bev_forward(boxes_a.contiguous(), keep = torch.zeros(boxes.size(0), dtype=torch.long)
boxes_b.contiguous(), ans_iou) num_out = torch.zeros(size=(), dtype=torch.long)
ext_module.iou3d_nms3d_normal_forward(
boxes, keep, num_out, nms_overlap_thresh=iou_threshold)
return order[keep[:num_out].cuda(boxes.device)].contiguous()
def _xyxyr2xywhr(boxes: Tensor) -> Tensor:
"""Convert [x1, y1, x2, y2, heading] box to [x, y, dx, dy, heading] box.
Args:
box (torch.Tensor): Input boxes with shape (N, 5).
Returns:
torch.Tensor: Converted boxes with shape (N, 7).
"""
warnings.warn(
'This function is deprecated and will be removed in the future.',
DeprecationWarning)
return torch.stack(
((boxes[:, 0] + boxes[:, 2]) / 2, (boxes[:, 1] + boxes[:, 3]) / 2,
boxes[:, 2] - boxes[:, 0], boxes[:, 3] - boxes[:, 1], boxes[:, 4]),
dim=-1)
def boxes_iou_bev(boxes_a: Tensor, boxes_b: Tensor) -> Tensor:
"""Calculate boxes IoU in the Bird's Eye View.
Args:
boxes_a (torch.Tensor): Input boxes a with shape (M, 5)
([x1, y1, x2, y2, ry]).
boxes_b (torch.Tensor): Input boxes b with shape (N, 5)
([x1, y1, x2, y2, ry]).
Returns:
torch.Tensor: IoU result with shape (M, N).
"""
from .box_iou_rotated import box_iou_rotated
return ans_iou warnings.warn(
'`iou3d.boxes_iou_bev` is deprecated and will be removed in'
' the future. Please, use `box_iou_rotated.box_iou_rotated`.',
DeprecationWarning)
return box_iou_rotated(_xyxyr2xywhr(boxes_a), _xyxyr2xywhr(boxes_b))
def nms_bev(boxes, scores, thresh, pre_max_size=None, post_max_size=None):
"""NMS function GPU implementation (for BEV boxes). The overlap of two def nms_bev(boxes: Tensor,
scores: Tensor,
thresh: float,
pre_max_size: Optional[int] = None,
post_max_size: Optional[int] = None) -> Tensor:
"""NMS function GPU implementation (for BEV boxes).
The overlap of two
boxes for IoU calculation is defined as the exact overlapping area of the boxes for IoU calculation is defined as the exact overlapping area of the
two boxes. In this function, one can also set ``pre_max_size`` and two boxes. In this function, one can also set ``pre_max_size`` and
``post_max_size``. ``post_max_size``.
Args: Args:
boxes (torch.Tensor): Input boxes with the shape of [N, 5] boxes (torch.Tensor): Input boxes with the shape of (N, 5)
([x1, y1, x2, y2, ry]). ([x1, y1, x2, y2, ry]).
scores (torch.Tensor): Scores of boxes with the shape of [N]. scores (torch.Tensor): Scores of boxes with the shape of (N,).
thresh (float): Overlap threshold of NMS. thresh (float): Overlap threshold of NMS.
pre_max_size (int, optional): Max size of boxes before NMS. pre_max_size (int, optional): Max size of boxes before NMS.
Default: None. Default: None.
post_max_size (int, optional): Max size of boxes after NMS. post_max_size (int, optional): Max size of boxes after NMS.
Default: None. Default: None.
Returns: Returns:
torch.Tensor: Indexes after NMS. torch.Tensor: Indexes after NMS.
""" """
assert boxes.size(1) == 5, 'Input boxes shape should be [N, 5]' from .nms import nms_rotated
warnings.warn(
'`iou3d.nms_bev` is deprecated and will be removed in'
' the future. Please, use `nms.nms_rotated`.', DeprecationWarning)
assert boxes.size(1) == 5, 'Input boxes shape should be (N, 5)'
order = scores.sort(0, descending=True)[1] order = scores.sort(0, descending=True)[1]
if pre_max_size is not None: if pre_max_size is not None:
order = order[:pre_max_size] order = order[:pre_max_size]
boxes = boxes[order].contiguous() boxes = _xyxyr2xywhr(boxes)[order]
scores = scores[order]
keep = nms_rotated(boxes, scores, thresh)[1]
keep = order[keep]
keep = torch.zeros(boxes.size(0), dtype=torch.long)
num_out = torch.zeros(size=(), dtype=torch.long)
ext_module.iou3d_nms_forward(
boxes, keep, num_out, nms_overlap_thresh=thresh)
keep = order[keep[:num_out].cuda(boxes.device)].contiguous()
if post_max_size is not None: if post_max_size is not None:
keep = keep[:post_max_size] keep = keep[:post_max_size]
return keep return keep
def nms_normal_bev(boxes, scores, thresh): def nms_normal_bev(boxes: Tensor, scores: Tensor, thresh: float) -> Tensor:
"""Normal NMS function GPU implementation (for BEV boxes). The overlap of """Normal NMS function GPU implementation (for BEV boxes).
The overlap of
two boxes for IoU calculation is defined as the exact overlapping area of two boxes for IoU calculation is defined as the exact overlapping area of
the two boxes WITH their yaw angle set to 0. the two boxes WITH their yaw angle set to 0.
Args: Args:
boxes (torch.Tensor): Input boxes with shape (N, 5). boxes (torch.Tensor): Input boxes with shape (N, 5)
scores (torch.Tensor): Scores of predicted boxes with shape (N). ([x1, y1, x2, y2, ry]).
scores (torch.Tensor): Scores of predicted boxes with shape (N,).
thresh (float): Overlap threshold of NMS. thresh (float): Overlap threshold of NMS.
Returns: Returns:
torch.Tensor: Remaining indices with scores in descending order. torch.Tensor: Remaining indices with scores in descending order.
""" """
assert boxes.shape[1] == 5, 'Input boxes shape should be [N, 5]' from .nms import nms
order = scores.sort(0, descending=True)[1]
boxes = boxes[order].contiguous() warnings.warn(
'`iou3d.nms_normal_bev` is deprecated and will be removed in'
' the future. Please, use `nms.nms`.', DeprecationWarning)
assert boxes.shape[1] == 5, 'Input boxes shape should be (N, 5)'
keep = torch.zeros(boxes.size(0), dtype=torch.long) return nms(boxes[:, :-1], scores, thresh)[1]
num_out = torch.zeros(size=(), dtype=torch.long)
ext_module.iou3d_nms_normal_forward(
boxes, keep, num_out, nms_overlap_thresh=thresh)
return order[keep[:num_out].cuda(boxes.device)].contiguous()
from typing import Optional
import torch import torch
from torch.autograd import Function from torch.autograd import Function
...@@ -8,6 +10,7 @@ ext_module = ext_loader.load_ext('_ext', ['knn_forward']) ...@@ -8,6 +10,7 @@ ext_module = ext_loader.load_ext('_ext', ['knn_forward'])
class KNN(Function): class KNN(Function):
r"""KNN (CUDA) based on heap data structure. r"""KNN (CUDA) based on heap data structure.
Modified from `PAConv <https://github.com/CVMI-Lab/PAConv/tree/main/ Modified from `PAConv <https://github.com/CVMI-Lab/PAConv/tree/main/
scene_seg/lib/pointops/src/knnquery_heap>`_. scene_seg/lib/pointops/src/knnquery_heap>`_.
...@@ -18,15 +21,15 @@ class KNN(Function): ...@@ -18,15 +21,15 @@ class KNN(Function):
def forward(ctx, def forward(ctx,
k: int, k: int,
xyz: torch.Tensor, xyz: torch.Tensor,
center_xyz: torch.Tensor = None, center_xyz: Optional[torch.Tensor] = None,
transposed: bool = False) -> torch.Tensor: transposed: bool = False) -> torch.Tensor:
""" """
Args: Args:
k (int): number of nearest neighbors. k (int): number of nearest neighbors.
xyz (Tensor): (B, N, 3) if transposed == False, else (B, 3, N). xyz (torch.Tensor): (B, N, 3) if transposed == False, else
xyz coordinates of the features. (B, 3, N). xyz coordinates of the features.
center_xyz (Tensor, optional): (B, npoint, 3) if transposed == center_xyz (torch.Tensor, optional): (B, npoint, 3) if transposed
False, else (B, 3, npoint). centers of the knn query. is False, else (B, 3, npoint). centers of the knn query.
Default: None. Default: None.
transposed (bool, optional): whether the input tensors are transposed (bool, optional): whether the input tensors are
transposed. Should not explicitly use this keyword when transposed. Should not explicitly use this keyword when
...@@ -34,8 +37,8 @@ class KNN(Function): ...@@ -34,8 +37,8 @@ class KNN(Function):
Default: False. Default: False.
Returns: Returns:
Tensor: (B, k, npoint) tensor with the indices of torch.Tensor: (B, k, npoint) tensor with the indices of the
the features that form k-nearest neighbours. features that form k-nearest neighbours.
""" """
assert (k > 0) & (k < 100), 'k should be in range(0, 100)' assert (k > 0) & (k < 100), 'k should be in range(0, 100)'
......
# Copyright (c) OpenMMLab. All rights reserved. # Copyright (c) OpenMMLab. All rights reserved.
import math import math
from typing import Optional, Tuple, Union
import torch import torch
import torch.nn as nn import torch.nn as nn
...@@ -27,7 +28,13 @@ class MaskedConv2dFunction(Function): ...@@ -27,7 +28,13 @@ class MaskedConv2dFunction(Function):
stride_i=stride) stride_i=stride)
@staticmethod @staticmethod
def forward(ctx, features, mask, weight, bias, padding=0, stride=1): def forward(ctx,
features: torch.Tensor,
mask: torch.Tensor,
weight: torch.nn.Parameter,
bias: torch.nn.Parameter,
padding: int = 0,
stride: int = 1) -> torch.Tensor:
assert mask.dim() == 3 and mask.size(0) == 1 assert mask.dim() == 3 and mask.size(0) == 1
assert features.dim() == 4 and features.size(0) == 1 assert features.dim() == 4 and features.size(0) == 1
assert features.size()[2:] == mask.size()[1:] assert features.size()[2:] == mask.size()[1:]
...@@ -61,7 +68,6 @@ class MaskedConv2dFunction(Function): ...@@ -61,7 +68,6 @@ class MaskedConv2dFunction(Function):
kernel_w=kernel_w, kernel_w=kernel_w,
pad_h=pad_h, pad_h=pad_h,
pad_w=pad_w) pad_w=pad_w)
masked_output = torch.addmm(1, bias[:, None], 1, masked_output = torch.addmm(1, bias[:, None], 1,
weight.view(out_channel, -1), data_col) weight.view(out_channel, -1), data_col)
ext_module.masked_col2im_forward( ext_module.masked_col2im_forward(
...@@ -76,7 +82,7 @@ class MaskedConv2dFunction(Function): ...@@ -76,7 +82,7 @@ class MaskedConv2dFunction(Function):
@staticmethod @staticmethod
@once_differentiable @once_differentiable
def backward(ctx, grad_output): def backward(ctx, grad_output: torch.Tensor) -> tuple:
return (None, ) * 5 return (None, ) * 5
...@@ -91,21 +97,22 @@ class MaskedConv2d(nn.Conv2d): ...@@ -91,21 +97,22 @@ class MaskedConv2d(nn.Conv2d):
""" """
def __init__(self, def __init__(self,
in_channels, in_channels: int,
out_channels, out_channels: int,
kernel_size, kernel_size: Union[int, Tuple[int, ...]],
stride=1, stride: int = 1,
padding=0, padding: int = 0,
dilation=1, dilation: int = 1,
groups=1, groups: int = 1,
bias=True): bias: bool = True):
super(MaskedConv2d, super().__init__(in_channels, out_channels, kernel_size, stride,
self).__init__(in_channels, out_channels, kernel_size, stride, padding, dilation, groups, bias)
padding, dilation, groups, bias)
def forward(self,
def forward(self, input, mask=None): input: torch.Tensor,
mask: Optional[torch.Tensor] = None) -> torch.Tensor:
if mask is None: # fallback to the normal Conv2d if mask is None: # fallback to the normal Conv2d
return super(MaskedConv2d, self).forward(input) return super().forward(input)
else: else:
return masked_conv2d(input, mask, self.weight, self.bias, return masked_conv2d(input, mask, self.weight, self.bias,
self.padding) self.padding)
# Copyright (c) OpenMMLab. All rights reserved. # Copyright (c) OpenMMLab. All rights reserved.
import math
from abc import abstractmethod from abc import abstractmethod
from typing import Optional
import torch import torch
import torch.nn as nn import torch.nn as nn
...@@ -18,7 +20,7 @@ class BaseMergeCell(nn.Module): ...@@ -18,7 +20,7 @@ class BaseMergeCell(nn.Module):
another convolution layer. another convolution layer.
Args: Args:
in_channels (int): number of input channels in out_conv layer. fused_channels (int): number of input channels in out_conv layer.
out_channels (int): number of output channels in out_conv layer. out_channels (int): number of output channels in out_conv layer.
with_out_conv (bool): Whether to use out_conv layer with_out_conv (bool): Whether to use out_conv layer
out_conv_cfg (dict): Config dict for convolution layer, which should out_conv_cfg (dict): Config dict for convolution layer, which should
...@@ -41,19 +43,19 @@ class BaseMergeCell(nn.Module): ...@@ -41,19 +43,19 @@ class BaseMergeCell(nn.Module):
""" """
def __init__(self, def __init__(self,
fused_channels=256, fused_channels: Optional[int] = 256,
out_channels=256, out_channels: Optional[int] = 256,
with_out_conv=True, with_out_conv: bool = True,
out_conv_cfg=dict( out_conv_cfg: dict = dict(
groups=1, kernel_size=3, padding=1, bias=True), groups=1, kernel_size=3, padding=1, bias=True),
out_norm_cfg=None, out_norm_cfg: Optional[dict] = None,
out_conv_order=('act', 'conv', 'norm'), out_conv_order: tuple = ('act', 'conv', 'norm'),
with_input1_conv=False, with_input1_conv: bool = False,
with_input2_conv=False, with_input2_conv: bool = False,
input_conv_cfg=None, input_conv_cfg: Optional[dict] = None,
input_norm_cfg=None, input_norm_cfg: Optional[dict] = None,
upsample_mode='nearest'): upsample_mode: str = 'nearest'):
super(BaseMergeCell, self).__init__() super().__init__()
assert upsample_mode in ['nearest', 'bilinear'] assert upsample_mode in ['nearest', 'bilinear']
self.with_out_conv = with_out_conv self.with_out_conv = with_out_conv
self.with_input1_conv = with_input1_conv self.with_input1_conv = with_input1_conv
...@@ -62,8 +64,8 @@ class BaseMergeCell(nn.Module): ...@@ -62,8 +64,8 @@ class BaseMergeCell(nn.Module):
if self.with_out_conv: if self.with_out_conv:
self.out_conv = ConvModule( self.out_conv = ConvModule(
fused_channels, fused_channels, # type: ignore
out_channels, out_channels, # type: ignore
**out_conv_cfg, **out_conv_cfg,
norm_cfg=out_norm_cfg, norm_cfg=out_norm_cfg,
order=out_conv_order) order=out_conv_order)
...@@ -95,12 +97,25 @@ class BaseMergeCell(nn.Module): ...@@ -95,12 +97,25 @@ class BaseMergeCell(nn.Module):
elif x.shape[-2:] < size: elif x.shape[-2:] < size:
return F.interpolate(x, size=size, mode=self.upsample_mode) return F.interpolate(x, size=size, mode=self.upsample_mode)
else: else:
assert x.shape[-2] % size[-2] == 0 and x.shape[-1] % size[-1] == 0 if x.shape[-2] % size[-2] != 0 or x.shape[-1] % size[-1] != 0:
kernel_size = x.shape[-1] // size[-1] h, w = x.shape[-2:]
target_h, target_w = size
pad_h = math.ceil(h / target_h) * target_h - h
pad_w = math.ceil(w / target_w) * target_w - w
pad_l = pad_w // 2
pad_r = pad_w - pad_l
pad_t = pad_h // 2
pad_b = pad_h - pad_t
pad = (pad_l, pad_r, pad_t, pad_b)
x = F.pad(x, pad, mode='constant', value=0.0)
kernel_size = (x.shape[-2] // size[-2], x.shape[-1] // size[-1])
x = F.max_pool2d(x, kernel_size=kernel_size, stride=kernel_size) x = F.max_pool2d(x, kernel_size=kernel_size, stride=kernel_size)
return x return x
def forward(self, x1, x2, out_size=None): def forward(self,
x1: torch.Tensor,
x2: torch.Tensor,
out_size: Optional[tuple] = None) -> torch.Tensor:
assert x1.shape[:2] == x2.shape[:2] assert x1.shape[:2] == x2.shape[:2]
assert out_size is None or len(out_size) == 2 assert out_size is None or len(out_size) == 2
if out_size is None: # resize to larger one if out_size is None: # resize to larger one
...@@ -120,8 +135,8 @@ class BaseMergeCell(nn.Module): ...@@ -120,8 +135,8 @@ class BaseMergeCell(nn.Module):
class SumCell(BaseMergeCell): class SumCell(BaseMergeCell):
def __init__(self, in_channels, out_channels, **kwargs): def __init__(self, in_channels: int, out_channels: int, **kwargs):
super(SumCell, self).__init__(in_channels, out_channels, **kwargs) super().__init__(in_channels, out_channels, **kwargs)
def _binary_op(self, x1, x2): def _binary_op(self, x1, x2):
return x1 + x2 return x1 + x2
...@@ -129,9 +144,8 @@ class SumCell(BaseMergeCell): ...@@ -129,9 +144,8 @@ class SumCell(BaseMergeCell):
class ConcatCell(BaseMergeCell): class ConcatCell(BaseMergeCell):
def __init__(self, in_channels, out_channels, **kwargs): def __init__(self, in_channels: int, out_channels: int, **kwargs):
super(ConcatCell, self).__init__(in_channels * 2, out_channels, super().__init__(in_channels * 2, out_channels, **kwargs)
**kwargs)
def _binary_op(self, x1, x2): def _binary_op(self, x1, x2):
ret = torch.cat([x1, x2], dim=1) ret = torch.cat([x1, x2], dim=1)
...@@ -140,7 +154,10 @@ class ConcatCell(BaseMergeCell): ...@@ -140,7 +154,10 @@ class ConcatCell(BaseMergeCell):
class GlobalPoolingCell(BaseMergeCell): class GlobalPoolingCell(BaseMergeCell):
def __init__(self, in_channels=None, out_channels=None, **kwargs): def __init__(self,
in_channels: Optional[int] = None,
out_channels: Optional[int] = None,
**kwargs):
super().__init__(in_channels, out_channels, **kwargs) super().__init__(in_channels, out_channels, **kwargs)
self.global_pool = nn.AdaptiveAvgPool2d((1, 1)) self.global_pool = nn.AdaptiveAvgPool2d((1, 1))
......
# Copyright (c) OpenMMLab. All rights reserved.
import torch
from ..utils import ext_loader
ext_module = ext_loader.load_ext('_ext', ['min_area_polygons'])
def min_area_polygons(pointsets: torch.Tensor) -> torch.Tensor:
"""Find the smallest polygons that surrounds all points in the point sets.
Args:
pointsets (Tensor): point sets with shape (N, 18).
Returns:
torch.Tensor: Return the smallest polygons with shape (N, 8).
"""
polygons = pointsets.new_zeros((pointsets.size(0), 8))
ext_module.min_area_polygons(pointsets, polygons)
return polygons
# Copyright (c) OpenMMLab. All rights reserved. # Copyright (c) OpenMMLab. All rights reserved.
import math import math
from typing import Optional, Tuple, Union
import torch import torch
import torch.nn as nn import torch.nn as nn
...@@ -35,16 +36,16 @@ class ModulatedDeformConv2dFunction(Function): ...@@ -35,16 +36,16 @@ class ModulatedDeformConv2dFunction(Function):
@staticmethod @staticmethod
def forward(ctx, def forward(ctx,
input, input: torch.Tensor,
offset, offset: torch.Tensor,
mask, mask: torch.Tensor,
weight, weight: nn.Parameter,
bias=None, bias: Optional[nn.Parameter] = None,
stride=1, stride: int = 1,
padding=0, padding: int = 0,
dilation=1, dilation: int = 1,
groups=1, groups: int = 1,
deform_groups=1): deform_groups: int = 1) -> torch.Tensor:
if input is not None and input.dim() != 4: if input is not None and input.dim() != 4:
raise ValueError( raise ValueError(
f'Expected 4D tensor as input, got {input.dim()}D tensor \ f'Expected 4D tensor as input, got {input.dim()}D tensor \
...@@ -66,6 +67,7 @@ class ModulatedDeformConv2dFunction(Function): ...@@ -66,6 +67,7 @@ class ModulatedDeformConv2dFunction(Function):
# whatever the pytorch version is. # whatever the pytorch version is.
input = input.type_as(offset) input = input.type_as(offset)
weight = weight.type_as(input) weight = weight.type_as(input)
bias = bias.type_as(input) # type: ignore
ctx.save_for_backward(input, offset, mask, weight, bias) ctx.save_for_backward(input, offset, mask, weight, bias)
output = input.new_empty( output = input.new_empty(
ModulatedDeformConv2dFunction._output_size(ctx, input, weight)) ModulatedDeformConv2dFunction._output_size(ctx, input, weight))
...@@ -94,7 +96,7 @@ class ModulatedDeformConv2dFunction(Function): ...@@ -94,7 +96,7 @@ class ModulatedDeformConv2dFunction(Function):
@staticmethod @staticmethod
@once_differentiable @once_differentiable
def backward(ctx, grad_output): def backward(ctx, grad_output: torch.Tensor) -> tuple:
input, offset, mask, weight, bias = ctx.saved_tensors input, offset, mask, weight, bias = ctx.saved_tensors
grad_input = torch.zeros_like(input) grad_input = torch.zeros_like(input)
grad_offset = torch.zeros_like(offset) grad_offset = torch.zeros_like(offset)
...@@ -158,16 +160,16 @@ class ModulatedDeformConv2d(nn.Module): ...@@ -158,16 +160,16 @@ class ModulatedDeformConv2d(nn.Module):
@deprecated_api_warning({'deformable_groups': 'deform_groups'}, @deprecated_api_warning({'deformable_groups': 'deform_groups'},
cls_name='ModulatedDeformConv2d') cls_name='ModulatedDeformConv2d')
def __init__(self, def __init__(self,
in_channels, in_channels: int,
out_channels, out_channels: int,
kernel_size, kernel_size: Union[int, Tuple[int]],
stride=1, stride: int = 1,
padding=0, padding: int = 0,
dilation=1, dilation: int = 1,
groups=1, groups: int = 1,
deform_groups=1, deform_groups: int = 1,
bias=True): bias: Union[bool, str] = True):
super(ModulatedDeformConv2d, self).__init__() super().__init__()
self.in_channels = in_channels self.in_channels = in_channels
self.out_channels = out_channels self.out_channels = out_channels
self.kernel_size = _pair(kernel_size) self.kernel_size = _pair(kernel_size)
...@@ -198,7 +200,8 @@ class ModulatedDeformConv2d(nn.Module): ...@@ -198,7 +200,8 @@ class ModulatedDeformConv2d(nn.Module):
if self.bias is not None: if self.bias is not None:
self.bias.data.zero_() self.bias.data.zero_()
def forward(self, x, offset, mask): def forward(self, x: torch.Tensor, offset: torch.Tensor,
mask: torch.Tensor) -> torch.Tensor:
return modulated_deform_conv2d(x, offset, mask, self.weight, self.bias, return modulated_deform_conv2d(x, offset, mask, self.weight, self.bias,
self.stride, self.padding, self.stride, self.padding,
self.dilation, self.groups, self.dilation, self.groups,
...@@ -226,7 +229,7 @@ class ModulatedDeformConv2dPack(ModulatedDeformConv2d): ...@@ -226,7 +229,7 @@ class ModulatedDeformConv2dPack(ModulatedDeformConv2d):
_version = 2 _version = 2
def __init__(self, *args, **kwargs): def __init__(self, *args, **kwargs):
super(ModulatedDeformConv2dPack, self).__init__(*args, **kwargs) super().__init__(*args, **kwargs)
self.conv_offset = nn.Conv2d( self.conv_offset = nn.Conv2d(
self.in_channels, self.in_channels,
self.deform_groups * 3 * self.kernel_size[0] * self.kernel_size[1], self.deform_groups * 3 * self.kernel_size[0] * self.kernel_size[1],
...@@ -237,13 +240,13 @@ class ModulatedDeformConv2dPack(ModulatedDeformConv2d): ...@@ -237,13 +240,13 @@ class ModulatedDeformConv2dPack(ModulatedDeformConv2d):
bias=True) bias=True)
self.init_weights() self.init_weights()
def init_weights(self): def init_weights(self) -> None:
super(ModulatedDeformConv2dPack, self).init_weights() super().init_weights()
if hasattr(self, 'conv_offset'): if hasattr(self, 'conv_offset'):
self.conv_offset.weight.data.zero_() self.conv_offset.weight.data.zero_()
self.conv_offset.bias.data.zero_() self.conv_offset.bias.data.zero_()
def forward(self, x): def forward(self, x: torch.Tensor) -> torch.Tensor: # type: ignore
out = self.conv_offset(x) out = self.conv_offset(x)
o1, o2, mask = torch.chunk(out, 3, dim=1) o1, o2, mask = torch.chunk(out, 3, dim=1)
offset = torch.cat((o1, o2), dim=1) offset = torch.cat((o1, o2), dim=1)
......
# Copyright (c) OpenMMLab. All rights reserved. # Copyright (c) OpenMMLab. All rights reserved.
import math import math
import warnings import warnings
from typing import Optional, no_type_check
import torch import torch
import torch.nn as nn import torch.nn as nn
import torch.nn.functional as F import torch.nn.functional as F
from torch.autograd.function import Function, once_differentiable from torch.autograd.function import Function, once_differentiable
import mmcv
from mmcv import deprecated_api_warning from mmcv import deprecated_api_warning
from mmcv.cnn import constant_init, xavier_init from mmcv.cnn import constant_init, xavier_init
from mmcv.cnn.bricks.registry import ATTENTION from mmcv.cnn.bricks.registry import ATTENTION
...@@ -20,27 +22,30 @@ ext_module = ext_loader.load_ext( ...@@ -20,27 +22,30 @@ ext_module = ext_loader.load_ext(
class MultiScaleDeformableAttnFunction(Function): class MultiScaleDeformableAttnFunction(Function):
@staticmethod @staticmethod
def forward(ctx, value, value_spatial_shapes, value_level_start_index, def forward(ctx, value: torch.Tensor, value_spatial_shapes: torch.Tensor,
sampling_locations, attention_weights, im2col_step): value_level_start_index: torch.Tensor,
sampling_locations: torch.Tensor,
attention_weights: torch.Tensor,
im2col_step: torch.Tensor) -> torch.Tensor:
"""GPU version of multi-scale deformable attention. """GPU version of multi-scale deformable attention.
Args: Args:
value (Tensor): The value has shape value (torch.Tensor): The value has shape
(bs, num_keys, mum_heads, embed_dims//num_heads) (bs, num_keys, mum_heads, embed_dims//num_heads)
value_spatial_shapes (Tensor): Spatial shape of value_spatial_shapes (torch.Tensor): Spatial shape of
each feature map, has shape (num_levels, 2), each feature map, has shape (num_levels, 2),
last dimension 2 represent (h, w) last dimension 2 represent (h, w)
sampling_locations (Tensor): The location of sampling points, sampling_locations (torch.Tensor): The location of sampling points,
has shape has shape
(bs ,num_queries, num_heads, num_levels, num_points, 2), (bs ,num_queries, num_heads, num_levels, num_points, 2),
the last dimension 2 represent (x, y). the last dimension 2 represent (x, y).
attention_weights (Tensor): The weight of sampling points used attention_weights (torch.Tensor): The weight of sampling points
when calculate the attention, has shape used when calculate the attention, has shape
(bs ,num_queries, num_heads, num_levels, num_points), (bs ,num_queries, num_heads, num_levels, num_points),
im2col_step (Tensor): The step used in image to column. im2col_step (torch.Tensor): The step used in image to column.
Returns: Returns:
Tensor: has shape (bs, num_queries, embed_dims) torch.Tensor: has shape (bs, num_queries, embed_dims)
""" """
ctx.im2col_step = im2col_step ctx.im2col_step = im2col_step
...@@ -58,16 +63,14 @@ class MultiScaleDeformableAttnFunction(Function): ...@@ -58,16 +63,14 @@ class MultiScaleDeformableAttnFunction(Function):
@staticmethod @staticmethod
@once_differentiable @once_differentiable
def backward(ctx, grad_output): def backward(ctx, grad_output: torch.Tensor) -> tuple:
"""GPU version of backward function. """GPU version of backward function.
Args: Args:
grad_output (Tensor): Gradient grad_output (torch.Tensor): Gradient of output tensor of forward.
of output tensor of forward.
Returns: Returns:
Tuple[Tensor]: Gradient tuple[Tensor]: Gradient of input tensors in forward.
of input tensors in forward.
""" """
value, value_spatial_shapes, value_level_start_index,\ value, value_spatial_shapes, value_level_start_index,\
sampling_locations, attention_weights = ctx.saved_tensors sampling_locations, attention_weights = ctx.saved_tensors
...@@ -91,26 +94,28 @@ class MultiScaleDeformableAttnFunction(Function): ...@@ -91,26 +94,28 @@ class MultiScaleDeformableAttnFunction(Function):
grad_sampling_loc, grad_attn_weight, None grad_sampling_loc, grad_attn_weight, None
def multi_scale_deformable_attn_pytorch(value, value_spatial_shapes, def multi_scale_deformable_attn_pytorch(
sampling_locations, attention_weights): value: torch.Tensor, value_spatial_shapes: torch.Tensor,
sampling_locations: torch.Tensor,
attention_weights: torch.Tensor) -> torch.Tensor:
"""CPU version of multi-scale deformable attention. """CPU version of multi-scale deformable attention.
Args: Args:
value (Tensor): The value has shape value (torch.Tensor): The value has shape
(bs, num_keys, mum_heads, embed_dims//num_heads) (bs, num_keys, num_heads, embed_dims//num_heads)
value_spatial_shapes (Tensor): Spatial shape of value_spatial_shapes (torch.Tensor): Spatial shape of
each feature map, has shape (num_levels, 2), each feature map, has shape (num_levels, 2),
last dimension 2 represent (h, w) last dimension 2 represent (h, w)
sampling_locations (Tensor): The location of sampling points, sampling_locations (torch.Tensor): The location of sampling points,
has shape has shape
(bs ,num_queries, num_heads, num_levels, num_points, 2), (bs ,num_queries, num_heads, num_levels, num_points, 2),
the last dimension 2 represent (x, y). the last dimension 2 represent (x, y).
attention_weights (Tensor): The weight of sampling points used attention_weights (torch.Tensor): The weight of sampling points used
when calculate the attention, has shape when calculate the attention, has shape
(bs ,num_queries, num_heads, num_levels, num_points), (bs ,num_queries, num_heads, num_levels, num_points),
Returns: Returns:
Tensor: has shape (bs, num_queries, embed_dims) torch.Tensor: has shape (bs, num_queries, embed_dims)
""" """
bs, _, num_heads, embed_dims = value.shape bs, _, num_heads, embed_dims = value.shape
...@@ -180,15 +185,15 @@ class MultiScaleDeformableAttention(BaseModule): ...@@ -180,15 +185,15 @@ class MultiScaleDeformableAttention(BaseModule):
""" """
def __init__(self, def __init__(self,
embed_dims=256, embed_dims: int = 256,
num_heads=8, num_heads: int = 8,
num_levels=4, num_levels: int = 4,
num_points=4, num_points: int = 4,
im2col_step=64, im2col_step: int = 64,
dropout=0.1, dropout: float = 0.1,
batch_first=False, batch_first: bool = False,
norm_cfg=None, norm_cfg: Optional[dict] = None,
init_cfg=None): init_cfg: Optional[mmcv.ConfigDict] = None):
super().__init__(init_cfg) super().__init__(init_cfg)
if embed_dims % num_heads != 0: if embed_dims % num_heads != 0:
raise ValueError(f'embed_dims must be divisible by num_heads, ' raise ValueError(f'embed_dims must be divisible by num_heads, '
...@@ -227,7 +232,7 @@ class MultiScaleDeformableAttention(BaseModule): ...@@ -227,7 +232,7 @@ class MultiScaleDeformableAttention(BaseModule):
self.output_proj = nn.Linear(embed_dims, embed_dims) self.output_proj = nn.Linear(embed_dims, embed_dims)
self.init_weights() self.init_weights()
def init_weights(self): def init_weights(self) -> None:
"""Default initialization for Parameters of Module.""" """Default initialization for Parameters of Module."""
constant_init(self.sampling_offsets, 0.) constant_init(self.sampling_offsets, 0.)
thetas = torch.arange( thetas = torch.arange(
...@@ -247,53 +252,53 @@ class MultiScaleDeformableAttention(BaseModule): ...@@ -247,53 +252,53 @@ class MultiScaleDeformableAttention(BaseModule):
xavier_init(self.output_proj, distribution='uniform', bias=0.) xavier_init(self.output_proj, distribution='uniform', bias=0.)
self._is_init = True self._is_init = True
@no_type_check
@deprecated_api_warning({'residual': 'identity'}, @deprecated_api_warning({'residual': 'identity'},
cls_name='MultiScaleDeformableAttention') cls_name='MultiScaleDeformableAttention')
def forward(self, def forward(self,
query, query: torch.Tensor,
key=None, key: Optional[torch.Tensor] = None,
value=None, value: Optional[torch.Tensor] = None,
identity=None, identity: Optional[torch.Tensor] = None,
query_pos=None, query_pos: Optional[torch.Tensor] = None,
key_padding_mask=None, key_padding_mask: Optional[torch.Tensor] = None,
reference_points=None, reference_points: Optional[torch.Tensor] = None,
spatial_shapes=None, spatial_shapes: Optional[torch.Tensor] = None,
level_start_index=None, level_start_index: Optional[torch.Tensor] = None,
**kwargs): **kwargs) -> torch.Tensor:
"""Forward Function of MultiScaleDeformAttention. """Forward Function of MultiScaleDeformAttention.
Args: Args:
query (Tensor): Query of Transformer with shape query (torch.Tensor): Query of Transformer with shape
(num_query, bs, embed_dims). (num_query, bs, embed_dims).
key (Tensor): The key tensor with shape key (torch.Tensor): The key tensor with shape
`(num_key, bs, embed_dims)`. `(num_key, bs, embed_dims)`.
value (Tensor): The value tensor with shape value (torch.Tensor): The value tensor with shape
`(num_key, bs, embed_dims)`. `(num_key, bs, embed_dims)`.
identity (Tensor): The tensor used for addition, with the identity (torch.Tensor): The tensor used for addition, with the
same shape as `query`. Default None. If None, same shape as `query`. Default None. If None,
`query` will be used. `query` will be used.
query_pos (Tensor): The positional encoding for `query`. query_pos (torch.Tensor): The positional encoding for `query`.
Default: None. Default: None.
key_pos (Tensor): The positional encoding for `key`. Default key_padding_mask (torch.Tensor): ByteTensor for `query`, with
None. shape [bs, num_key].
reference_points (Tensor): The normalized reference reference_points (torch.Tensor): The normalized reference
points with shape (bs, num_query, num_levels, 2), points with shape (bs, num_query, num_levels, 2),
all elements is range in [0, 1], top-left (0,0), all elements is range in [0, 1], top-left (0,0),
bottom-right (1, 1), including padding area. bottom-right (1, 1), including padding area.
or (N, Length_{query}, num_levels, 4), add or (N, Length_{query}, num_levels, 4), add
additional two dimensions is (w, h) to additional two dimensions is (w, h) to
form reference boxes. form reference boxes.
key_padding_mask (Tensor): ByteTensor for `query`, with spatial_shapes (torch.Tensor): Spatial shape of features in
shape [bs, num_key].
spatial_shapes (Tensor): Spatial shape of features in
different levels. With shape (num_levels, 2), different levels. With shape (num_levels, 2),
last dimension represents (h, w). last dimension represents (h, w).
level_start_index (Tensor): The start index of each level. level_start_index (torch.Tensor): The start index of each level.
A tensor has shape ``(num_levels, )`` and can be represented A tensor has shape ``(num_levels, )`` and can be represented
as [0, h_0*w_0, h_0*w_0+h_1*w_1, ...]. as [0, h_0*w_0, h_0*w_0+h_1*w_1, ...].
Returns: Returns:
Tensor: forwarded results with shape [num_query, bs, embed_dims]. torch.Tensor: forwarded results with shape
[num_query, bs, embed_dims].
""" """
if value is None: if value is None:
......
import os import os
from typing import Any, Dict, List, Optional, Tuple, Union
import numpy as np import numpy as np
import torch import torch
from torch import Tensor
from mmcv.utils import deprecated_api_warning from mmcv.utils import deprecated_api_warning
from ..utils import ext_loader from ..utils import ext_loader
...@@ -14,8 +16,8 @@ ext_module = ext_loader.load_ext( ...@@ -14,8 +16,8 @@ ext_module = ext_loader.load_ext(
class NMSop(torch.autograd.Function): class NMSop(torch.autograd.Function):
@staticmethod @staticmethod
def forward(ctx, bboxes, scores, iou_threshold, offset, score_threshold, def forward(ctx: Any, bboxes: Tensor, scores: Tensor, iou_threshold: float,
max_num): offset: int, score_threshold: float, max_num: int) -> Tensor:
is_filtering_by_score = score_threshold > 0 is_filtering_by_score = score_threshold > 0
if is_filtering_by_score: if is_filtering_by_score:
valid_mask = scores > score_threshold valid_mask = scores > score_threshold
...@@ -48,6 +50,7 @@ class NMSop(torch.autograd.Function): ...@@ -48,6 +50,7 @@ class NMSop(torch.autograd.Function):
offset_i=int(offset)) offset_i=int(offset))
else: else:
from torch.onnx.symbolic_opset9 import select, squeeze, unsqueeze from torch.onnx.symbolic_opset9 import select, squeeze, unsqueeze
from ..onnx.onnx_utils.symbolic_helper import _size_helper from ..onnx.onnx_utils.symbolic_helper import _size_helper
boxes = unsqueeze(g, bboxes, 0) boxes = unsqueeze(g, bboxes, 0)
...@@ -82,8 +85,9 @@ class NMSop(torch.autograd.Function): ...@@ -82,8 +85,9 @@ class NMSop(torch.autograd.Function):
class SoftNMSop(torch.autograd.Function): class SoftNMSop(torch.autograd.Function):
@staticmethod @staticmethod
def forward(ctx, boxes, scores, iou_threshold, sigma, min_score, method, def forward(ctx: Any, boxes: Tensor, scores: Tensor, iou_threshold: float,
offset): sigma: float, min_score: float, method: int,
offset: int) -> Tuple[Tensor, Tensor]:
dets = boxes.new_empty((boxes.size(0), 5), device='cpu') dets = boxes.new_empty((boxes.size(0), 5), device='cpu')
inds = ext_module.softnms( inds = ext_module.softnms(
boxes.cpu(), boxes.cpu(),
...@@ -114,8 +118,16 @@ class SoftNMSop(torch.autograd.Function): ...@@ -114,8 +118,16 @@ class SoftNMSop(torch.autograd.Function):
return nms_out return nms_out
array_like_type = Union[Tensor, np.ndarray]
@deprecated_api_warning({'iou_thr': 'iou_threshold'}) @deprecated_api_warning({'iou_thr': 'iou_threshold'})
def nms(boxes, scores, iou_threshold, offset=0, score_threshold=0, max_num=-1): def nms(boxes: array_like_type,
scores: array_like_type,
iou_threshold: float,
offset: int = 0,
score_threshold: float = 0,
max_num: int = -1) -> Tuple[array_like_type, array_like_type]:
"""Dispatch to either CPU or GPU NMS implementations. """Dispatch to either CPU or GPU NMS implementations.
The input can be either torch tensor or numpy array. GPU NMS will be used The input can be either torch tensor or numpy array. GPU NMS will be used
...@@ -131,8 +143,8 @@ def nms(boxes, scores, iou_threshold, offset=0, score_threshold=0, max_num=-1): ...@@ -131,8 +143,8 @@ def nms(boxes, scores, iou_threshold, offset=0, score_threshold=0, max_num=-1):
max_num (int): maximum number of boxes after NMS. max_num (int): maximum number of boxes after NMS.
Returns: Returns:
tuple: kept dets(boxes and scores) and indice, which is always the \ tuple: kept dets (boxes and scores) and indice, which always have
same data type as the input. the same data type as the input.
Example: Example:
>>> boxes = np.array([[49.1, 32.4, 51.0, 35.9], >>> boxes = np.array([[49.1, 32.4, 51.0, 35.9],
...@@ -148,8 +160,8 @@ def nms(boxes, scores, iou_threshold, offset=0, score_threshold=0, max_num=-1): ...@@ -148,8 +160,8 @@ def nms(boxes, scores, iou_threshold, offset=0, score_threshold=0, max_num=-1):
>>> dets, inds = nms(boxes, scores, iou_threshold) >>> dets, inds = nms(boxes, scores, iou_threshold)
>>> assert len(inds) == len(dets) == 3 >>> assert len(inds) == len(dets) == 3
""" """
assert isinstance(boxes, (torch.Tensor, np.ndarray)) assert isinstance(boxes, (Tensor, np.ndarray))
assert isinstance(scores, (torch.Tensor, np.ndarray)) assert isinstance(scores, (Tensor, np.ndarray))
is_numpy = False is_numpy = False
if isinstance(boxes, np.ndarray): if isinstance(boxes, np.ndarray):
is_numpy = True is_numpy = True
...@@ -160,16 +172,8 @@ def nms(boxes, scores, iou_threshold, offset=0, score_threshold=0, max_num=-1): ...@@ -160,16 +172,8 @@ def nms(boxes, scores, iou_threshold, offset=0, score_threshold=0, max_num=-1):
assert boxes.size(0) == scores.size(0) assert boxes.size(0) == scores.size(0)
assert offset in (0, 1) assert offset in (0, 1)
if torch.__version__ == 'parrots': inds = NMSop.apply(boxes, scores, iou_threshold, offset, score_threshold,
indata_list = [boxes, scores] max_num)
indata_dict = {
'iou_threshold': float(iou_threshold),
'offset': int(offset)
}
inds = ext_module.nms(*indata_list, **indata_dict)
else:
inds = NMSop.apply(boxes, scores, iou_threshold, offset,
score_threshold, max_num)
dets = torch.cat((boxes[inds], scores[inds].reshape(-1, 1)), dim=1) dets = torch.cat((boxes[inds], scores[inds].reshape(-1, 1)), dim=1)
if is_numpy: if is_numpy:
dets = dets.cpu().numpy() dets = dets.cpu().numpy()
...@@ -178,19 +182,19 @@ def nms(boxes, scores, iou_threshold, offset=0, score_threshold=0, max_num=-1): ...@@ -178,19 +182,19 @@ def nms(boxes, scores, iou_threshold, offset=0, score_threshold=0, max_num=-1):
@deprecated_api_warning({'iou_thr': 'iou_threshold'}) @deprecated_api_warning({'iou_thr': 'iou_threshold'})
def soft_nms(boxes, def soft_nms(boxes: array_like_type,
scores, scores: array_like_type,
iou_threshold=0.3, iou_threshold: float = 0.3,
sigma=0.5, sigma: float = 0.5,
min_score=1e-3, min_score: float = 1e-3,
method='linear', method: str = 'linear',
offset=0): offset: int = 0) -> Tuple[array_like_type, array_like_type]:
"""Dispatch to only CPU Soft NMS implementations. """Dispatch to only CPU Soft NMS implementations.
The input can be either a torch tensor or numpy array. The input can be either a torch tensor or numpy array.
The returned type will always be the same as inputs. The returned type will always be the same as inputs.
Arguments: Args:
boxes (torch.Tensor or np.ndarray): boxes in shape (N, 4). boxes (torch.Tensor or np.ndarray): boxes in shape (N, 4).
scores (torch.Tensor or np.ndarray): scores in shape (N, ). scores (torch.Tensor or np.ndarray): scores in shape (N, ).
iou_threshold (float): IoU threshold for NMS. iou_threshold (float): IoU threshold for NMS.
...@@ -200,8 +204,8 @@ def soft_nms(boxes, ...@@ -200,8 +204,8 @@ def soft_nms(boxes,
offset (int, 0 or 1): boxes' width or height is (x2 - x1 + offset). offset (int, 0 or 1): boxes' width or height is (x2 - x1 + offset).
Returns: Returns:
tuple: kept dets(boxes and scores) and indice, which is always the \ tuple: kept dets (boxes and scores) and indice, which always have
same data type as the input. the same data type as the input.
Example: Example:
>>> boxes = np.array([[4., 3., 5., 3.], >>> boxes = np.array([[4., 3., 5., 3.],
...@@ -216,8 +220,8 @@ def soft_nms(boxes, ...@@ -216,8 +220,8 @@ def soft_nms(boxes,
>>> assert len(inds) == len(dets) == 5 >>> assert len(inds) == len(dets) == 5
""" """
assert isinstance(boxes, (torch.Tensor, np.ndarray)) assert isinstance(boxes, (Tensor, np.ndarray))
assert isinstance(scores, (torch.Tensor, np.ndarray)) assert isinstance(scores, (Tensor, np.ndarray))
is_numpy = False is_numpy = False
if isinstance(boxes, np.ndarray): if isinstance(boxes, np.ndarray):
is_numpy = True is_numpy = True
...@@ -257,46 +261,85 @@ def soft_nms(boxes, ...@@ -257,46 +261,85 @@ def soft_nms(boxes,
return dets.to(device=boxes.device), inds.to(device=boxes.device) return dets.to(device=boxes.device), inds.to(device=boxes.device)
def batched_nms(boxes, scores, idxs, nms_cfg, class_agnostic=False): def batched_nms(boxes: Tensor,
"""Performs non-maximum suppression in a batched fashion. scores: Tensor,
idxs: Tensor,
nms_cfg: Optional[Dict],
class_agnostic: bool = False) -> Tuple[Tensor, Tensor]:
r"""Performs non-maximum suppression in a batched fashion.
Modified from https://github.com/pytorch/vision/blob Modified from `torchvision/ops/boxes.py#L39
/505cd6957711af790211896d32b40291bea1bc21/torchvision/ops/boxes.py#L39. <https://github.com/pytorch/vision/blob/
505cd6957711af790211896d32b40291bea1bc21/torchvision/ops/boxes.py#L39>`_.
In order to perform NMS independently per class, we add an offset to all In order to perform NMS independently per class, we add an offset to all
the boxes. The offset is dependent only on the class idx, and is large the boxes. The offset is dependent only on the class idx, and is large
enough so that boxes from different classes do not overlap. enough so that boxes from different classes do not overlap.
Arguments: Note:
boxes (torch.Tensor): boxes in shape (N, 4). In v1.4.1 and later, ``batched_nms`` supports skipping the NMS and
returns sorted raw results when `nms_cfg` is None.
Args:
boxes (torch.Tensor): boxes in shape (N, 4) or (N, 5).
scores (torch.Tensor): scores in shape (N, ). scores (torch.Tensor): scores in shape (N, ).
idxs (torch.Tensor): each index value correspond to a bbox cluster, idxs (torch.Tensor): each index value correspond to a bbox cluster,
and NMS will not be applied between elements of different idxs, and NMS will not be applied between elements of different idxs,
shape (N, ). shape (N, ).
nms_cfg (dict): specify nms type and other parameters like iou_thr. nms_cfg (dict | optional): Supports skipping the nms when `nms_cfg`
Possible keys includes the following. is None, otherwise it should specify nms type and other
parameters like `iou_thr`. Possible keys includes the following.
- iou_thr (float): IoU threshold used for NMS. - iou_threshold (float): IoU threshold used for NMS.
- split_thr (float): threshold number of boxes. In some cases the - split_thr (float): threshold number of boxes. In some cases the
number of boxes is large (e.g., 200k). To avoid OOM during number of boxes is large (e.g., 200k). To avoid OOM during
training, the users could set `split_thr` to a small value. training, the users could set `split_thr` to a small value.
If the number of boxes is greater than the threshold, it will If the number of boxes is greater than the threshold, it will
perform NMS on each group of boxes separately and sequentially. perform NMS on each group of boxes separately and sequentially.
Defaults to 10000. Defaults to 10000.
class_agnostic (bool): if true, nms is class agnostic, class_agnostic (bool): if true, nms is class agnostic,
i.e. IoU thresholding happens over all boxes, i.e. IoU thresholding happens over all boxes,
regardless of the predicted class. regardless of the predicted class. Defaults to False.
Returns: Returns:
tuple: kept dets and indice. tuple: kept dets and indice.
- boxes (Tensor): Bboxes with score after nms, has shape
(num_bboxes, 5). last dimension 5 arrange as
(x1, y1, x2, y2, score)
- keep (Tensor): The indices of remaining boxes in input
boxes.
""" """
# skip nms when nms_cfg is None
if nms_cfg is None:
scores, inds = scores.sort(descending=True)
boxes = boxes[inds]
return torch.cat([boxes, scores[:, None]], -1), inds
nms_cfg_ = nms_cfg.copy() nms_cfg_ = nms_cfg.copy()
class_agnostic = nms_cfg_.pop('class_agnostic', class_agnostic) class_agnostic = nms_cfg_.pop('class_agnostic', class_agnostic)
if class_agnostic: if class_agnostic:
boxes_for_nms = boxes boxes_for_nms = boxes
else: else:
max_coordinate = boxes.max() # When using rotated boxes, only apply offsets on center.
offsets = idxs.to(boxes) * (max_coordinate + torch.tensor(1).to(boxes)) if boxes.size(-1) == 5:
boxes_for_nms = boxes + offsets[:, None] # Strictly, the maximum coordinates of the rotating box
# (x,y,w,h,a) should be calculated by polygon coordinates.
# But the conversion from rotated box to polygon will
# slow down the speed.
# So we use max(x,y) + max(w,h) as max coordinate
# which is larger than polygon max coordinate
# max(x1, y1, x2, y2,x3, y3, x4, y4)
max_coordinate = boxes[..., :2].max() + boxes[..., 2:4].max()
offsets = idxs.to(boxes) * (
max_coordinate + torch.tensor(1).to(boxes))
boxes_ctr_for_nms = boxes[..., :2] + offsets[:, None]
boxes_for_nms = torch.cat([boxes_ctr_for_nms, boxes[..., 2:5]],
dim=-1)
else:
max_coordinate = boxes.max()
offsets = idxs.to(boxes) * (
max_coordinate + torch.tensor(1).to(boxes))
boxes_for_nms = boxes + offsets[:, None]
nms_type = nms_cfg_.pop('type', 'nms') nms_type = nms_cfg_.pop('type', 'nms')
nms_op = eval(nms_type) nms_op = eval(nms_type)
...@@ -306,12 +349,13 @@ def batched_nms(boxes, scores, idxs, nms_cfg, class_agnostic=False): ...@@ -306,12 +349,13 @@ def batched_nms(boxes, scores, idxs, nms_cfg, class_agnostic=False):
if boxes_for_nms.shape[0] < split_thr or torch.onnx.is_in_onnx_export(): if boxes_for_nms.shape[0] < split_thr or torch.onnx.is_in_onnx_export():
dets, keep = nms_op(boxes_for_nms, scores, **nms_cfg_) dets, keep = nms_op(boxes_for_nms, scores, **nms_cfg_)
boxes = boxes[keep] boxes = boxes[keep]
# -1 indexing works abnormal in TensorRT
# This assumes `dets` has 5 dimensions where # This assumes `dets` has arbitrary dimensions where
# the last dimension is score. # the last dimension is score.
# TODO: more elegant way to handle the dimension issue. # Currently it supports bounding boxes [x1, y1, x2, y2, score] or
# Some type of nms would reweight the score, such as SoftNMS # rotated boxes [cx, cy, w, h, angle_radian, score].
scores = dets[:, 4]
scores = dets[:, -1]
else: else:
max_num = nms_cfg_.pop('max_num', -1) max_num = nms_cfg_.pop('max_num', -1)
total_mask = scores.new_zeros(scores.size(), dtype=torch.bool) total_mask = scores.new_zeros(scores.size(), dtype=torch.bool)
...@@ -333,31 +377,33 @@ def batched_nms(boxes, scores, idxs, nms_cfg, class_agnostic=False): ...@@ -333,31 +377,33 @@ def batched_nms(boxes, scores, idxs, nms_cfg, class_agnostic=False):
boxes = boxes[:max_num] boxes = boxes[:max_num]
scores = scores[:max_num] scores = scores[:max_num]
return torch.cat([boxes, scores[:, None]], -1), keep boxes = torch.cat([boxes, scores[:, None]], -1)
return boxes, keep
def nms_match(dets, iou_threshold): def nms_match(dets: array_like_type,
iou_threshold: float) -> List[array_like_type]:
"""Matched dets into different groups by NMS. """Matched dets into different groups by NMS.
NMS match is Similar to NMS but when a bbox is suppressed, nms match will NMS match is Similar to NMS but when a bbox is suppressed, nms match will
record the indice of suppressed bbox and form a group with the indice of record the indice of suppressed bbox and form a group with the indice of
kept bbox. In each group, indice is sorted as score order. kept bbox. In each group, indice is sorted as score order.
Arguments: Args:
dets (torch.Tensor | np.ndarray): Det boxes with scores, shape (N, 5). dets (torch.Tensor | np.ndarray): Det boxes with scores, shape (N, 5).
iou_thr (float): IoU thresh for NMS. iou_threshold (float): IoU thresh for NMS.
Returns: Returns:
List[torch.Tensor | np.ndarray]: The outer list corresponds different list[torch.Tensor | np.ndarray]: The outer list corresponds different
matched group, the inner Tensor corresponds the indices for a group matched group, the inner Tensor corresponds the indices for a group
in score order. in score order.
""" """
if dets.shape[0] == 0: if dets.shape[0] == 0:
matched = [] matched = []
else: else:
assert dets.shape[-1] == 5, 'inputs dets.shape should be (N, 5), ' \ assert dets.shape[-1] == 5, 'inputs dets.shape should be (N, 5), ' \
f'but get {dets.shape}' f'but get {dets.shape}'
if isinstance(dets, torch.Tensor): if isinstance(dets, Tensor):
dets_t = dets.detach().cpu() dets_t = dets.detach().cpu()
else: else:
dets_t = torch.from_numpy(dets) dets_t = torch.from_numpy(dets)
...@@ -365,15 +411,19 @@ def nms_match(dets, iou_threshold): ...@@ -365,15 +411,19 @@ def nms_match(dets, iou_threshold):
indata_dict = {'iou_threshold': float(iou_threshold)} indata_dict = {'iou_threshold': float(iou_threshold)}
matched = ext_module.nms_match(*indata_list, **indata_dict) matched = ext_module.nms_match(*indata_list, **indata_dict)
if torch.__version__ == 'parrots': if torch.__version__ == 'parrots':
matched = matched.tolist() matched = matched.tolist() # type: ignore
if isinstance(dets, torch.Tensor): if isinstance(dets, Tensor):
return [dets.new_tensor(m, dtype=torch.long) for m in matched] return [dets.new_tensor(m, dtype=torch.long) for m in matched]
else: else:
return [np.array(m, dtype=np.int) for m in matched] return [np.array(m, dtype=int) for m in matched]
def nms_rotated(dets, scores, iou_threshold, labels=None): def nms_rotated(dets: Tensor,
scores: Tensor,
iou_threshold: float,
labels: Optional[Tensor] = None,
clockwise: bool = True) -> Tuple[Tensor, Tensor]:
"""Performs non-maximum suppression (NMS) on the rotated boxes according to """Performs non-maximum suppression (NMS) on the rotated boxes according to
their intersection-over-union (IoU). their intersection-over-union (IoU).
...@@ -381,23 +431,33 @@ def nms_rotated(dets, scores, iou_threshold, labels=None): ...@@ -381,23 +431,33 @@ def nms_rotated(dets, scores, iou_threshold, labels=None):
IoU greater than iou_threshold with another (higher scoring) rotated box. IoU greater than iou_threshold with another (higher scoring) rotated box.
Args: Args:
boxes (Tensor): Rotated boxes in shape (N, 5). They are expected to \ dets (torch.Tensor): Rotated boxes in shape (N, 5).
be in (x_ctr, y_ctr, width, height, angle_radian) format. They are expected to be in
scores (Tensor): scores in shape (N, ). (x_ctr, y_ctr, width, height, angle_radian) format.
scores (torch.Tensor): scores in shape (N, ).
iou_threshold (float): IoU thresh for NMS. iou_threshold (float): IoU thresh for NMS.
labels (Tensor): boxes' label in shape (N,). labels (torch.Tensor, optional): boxes' label in shape (N,).
clockwise (bool): flag indicating whether the positive angular
orientation is clockwise. default True.
`New in version 1.4.3.`
Returns: Returns:
tuple: kept dets(boxes and scores) and indice, which is always the \ tuple: kept dets(boxes and scores) and indice, which is always the
same data type as the input. same data type as the input.
""" """
if dets.shape[0] == 0: if dets.shape[0] == 0:
return dets, None return dets, None
if not clockwise:
flip_mat = dets.new_ones(dets.shape[-1])
flip_mat[-1] = -1
dets_cw = dets * flip_mat
else:
dets_cw = dets
multi_label = labels is not None multi_label = labels is not None
if multi_label: if multi_label:
dets_wl = torch.cat((dets, labels.unsqueeze(1)), 1) dets_wl = torch.cat((dets_cw, labels.unsqueeze(1)), 1) # type: ignore
else: else:
dets_wl = dets dets_wl = dets_cw
_, order = scores.sort(0, descending=True) _, order = scores.sort(0, descending=True)
dets_sorted = dets_wl.index_select(0, order) dets_sorted = dets_wl.index_select(0, order)
......
# Copyright (c) OpenMMLab. All rights reserved. # Copyright (c) OpenMMLab. All rights reserved.
from typing import List, Union
import numpy as np import numpy as np
import torch import torch
from torch import Tensor
from ..utils import ext_loader from ..utils import ext_loader
ext_module = ext_loader.load_ext('_ext', ['pixel_group']) ext_module = ext_loader.load_ext('_ext', ['pixel_group'])
def pixel_group(score, mask, embedding, kernel_label, kernel_contour, def pixel_group(
kernel_region_num, distance_threshold): score: Union[np.ndarray, Tensor],
mask: Union[np.ndarray, Tensor],
embedding: Union[np.ndarray, Tensor],
kernel_label: Union[np.ndarray, Tensor],
kernel_contour: Union[np.ndarray, Tensor],
kernel_region_num: int,
distance_threshold: float,
) -> List[List[float]]:
"""Group pixels into text instances, which is widely used text detection """Group pixels into text instances, which is widely used text detection
methods. methods.
Arguments: Arguments:
score (np.array or Tensor): The foreground score with size hxw. score (np.array or torch.Tensor): The foreground score with size hxw.
mask (np.array or Tensor): The foreground mask with size hxw. mask (np.array or Tensor): The foreground mask with size hxw.
embedding (np.array or Tensor): The embedding with size hxwxc to embedding (np.array or torch.Tensor): The embedding with size hxwxc to
distinguish instances. distinguish instances.
kernel_label (np.array or Tensor): The instance kernel index with kernel_label (np.array or torch.Tensor): The instance kernel index with
size hxw.
kernel_contour (np.array or torch.Tensor): The kernel contour with
size hxw. size hxw.
kernel_contour (np.array or Tensor): The kernel contour with size hxw.
kernel_region_num (int): The instance kernel region number. kernel_region_num (int): The instance kernel region number.
distance_threshold (float): The embedding distance threshold between distance_threshold (float): The embedding distance threshold between
kernel and pixel in one instance. kernel and pixel in one instance.
Returns: Returns:
pixel_assignment (List[List[float]]): The instance coordinate list. list[list[float]]: The instance coordinates and attributes list. Each
Each element consists of averaged confidence, pixel number, and element consists of averaged confidence, pixel number, and coordinates
coordinates (x_i, y_i for all pixels) in order. (x_i, y_i for all pixels) in order.
""" """
assert isinstance(score, (torch.Tensor, np.ndarray)) assert isinstance(score, (torch.Tensor, np.ndarray))
assert isinstance(mask, (torch.Tensor, np.ndarray)) assert isinstance(mask, (torch.Tensor, np.ndarray))
......
# Modified from https://github.com/facebookresearch/detectron2/tree/master/projects/PointRend # noqa # Modified from https://github.com/facebookresearch/detectron2/tree/master/projects/PointRend # noqa
from os import path as osp from os import path as osp
from typing import Tuple, Union
import torch import torch
import torch.nn as nn import torch.nn as nn
import torch.nn.functional as F import torch.nn.functional as F
from torch import Tensor
from torch.nn.modules.utils import _pair from torch.nn.modules.utils import _pair
from torch.onnx.operators import shape_as_tensor from torch.onnx.operators import shape_as_tensor
def bilinear_grid_sample(im, grid, align_corners=False): def bilinear_grid_sample(im: Tensor,
grid: Tensor,
align_corners: bool = False) -> Tensor:
"""Given an input and a flow-field grid, computes the output using input """Given an input and a flow-field grid, computes the output using input
values and pixel locations from grid. Supported only bilinear interpolation values and pixel locations from grid. Supported only bilinear interpolation
method to sample the input pixels. method to sample the input pixels.
...@@ -17,11 +21,12 @@ def bilinear_grid_sample(im, grid, align_corners=False): ...@@ -17,11 +21,12 @@ def bilinear_grid_sample(im, grid, align_corners=False):
Args: Args:
im (torch.Tensor): Input feature map, shape (N, C, H, W) im (torch.Tensor): Input feature map, shape (N, C, H, W)
grid (torch.Tensor): Point coordinates, shape (N, Hg, Wg, 2) grid (torch.Tensor): Point coordinates, shape (N, Hg, Wg, 2)
align_corners {bool}: If set to True, the extrema (-1 and 1) are align_corners (bool): If set to True, the extrema (-1 and 1) are
considered as referring to the center points of the input’s considered as referring to the center points of the input’s
corner pixels. If set to False, they are instead considered as corner pixels. If set to False, they are instead considered as
referring to the corner points of the input’s corner pixels, referring to the corner points of the input’s corner pixels,
making the sampling more resolution agnostic. making the sampling more resolution agnostic.
Returns: Returns:
torch.Tensor: A tensor with sampled points, shape (N, C, Hg, Wg) torch.Tensor: A tensor with sampled points, shape (N, C, Hg, Wg)
""" """
...@@ -84,47 +89,52 @@ def bilinear_grid_sample(im, grid, align_corners=False): ...@@ -84,47 +89,52 @@ def bilinear_grid_sample(im, grid, align_corners=False):
return (Ia * wa + Ib * wb + Ic * wc + Id * wd).reshape(n, c, gh, gw) return (Ia * wa + Ib * wb + Ic * wc + Id * wd).reshape(n, c, gh, gw)
def is_in_onnx_export_without_custom_ops(): def is_in_onnx_export_without_custom_ops() -> bool:
from mmcv.ops import get_onnxruntime_op_path from mmcv.ops import get_onnxruntime_op_path
ort_custom_op_path = get_onnxruntime_op_path() ort_custom_op_path = get_onnxruntime_op_path()
return torch.onnx.is_in_onnx_export( return torch.onnx.is_in_onnx_export(
) and not osp.exists(ort_custom_op_path) ) and not osp.exists(ort_custom_op_path)
def normalize(grid): def normalize(grid: Tensor) -> Tensor:
"""Normalize input grid from [-1, 1] to [0, 1] """Normalize input grid from [-1, 1] to [0, 1]
Args: Args:
grid (Tensor): The grid to be normalize, range [-1, 1]. grid (torch.Tensor): The grid to be normalize, range [-1, 1].
Returns: Returns:
Tensor: Normalized grid, range [0, 1]. torch.Tensor: Normalized grid, range [0, 1].
""" """
return (grid + 1.0) / 2.0 return (grid + 1.0) / 2.0
def denormalize(grid): def denormalize(grid: Tensor) -> Tensor:
"""Denormalize input grid from range [0, 1] to [-1, 1] """Denormalize input grid from range [0, 1] to [-1, 1]
Args: Args:
grid (Tensor): The grid to be denormalize, range [0, 1]. grid (torch.Tensor): The grid to be denormalize, range [0, 1].
Returns: Returns:
Tensor: Denormalized grid, range [-1, 1]. torch.Tensor: Denormalized grid, range [-1, 1].
""" """
return grid * 2.0 - 1.0 return grid * 2.0 - 1.0
def generate_grid(num_grid, size, device): def generate_grid(num_grid: int, size: Tuple[int, int],
device: torch.device) -> Tensor:
"""Generate regular square grid of points in [0, 1] x [0, 1] coordinate """Generate regular square grid of points in [0, 1] x [0, 1] coordinate
space. space.
Args: Args:
num_grid (int): The number of grids to sample, one for each region. num_grid (int): The number of grids to sample, one for each region.
size (tuple(int, int)): The side size of the regular grid. size (tuple[int, int]): The side size of the regular grid.
device (torch.device): Desired device of returned tensor. device (torch.device): Desired device of returned tensor.
Returns: Returns:
(torch.Tensor): A tensor of shape (num_grid, size[0]*size[1], 2) that torch.Tensor: A tensor of shape (num_grid, size[0]*size[1], 2) that
contains coordinates for the regular grids. contains coordinates for the regular grids.
""" """
affine_trans = torch.tensor([[[1., 0., 0.], [0., 1., 0.]]], device=device) affine_trans = torch.tensor([[[1., 0., 0.], [0., 1., 0.]]], device=device)
...@@ -134,16 +144,17 @@ def generate_grid(num_grid, size, device): ...@@ -134,16 +144,17 @@ def generate_grid(num_grid, size, device):
return grid.view(1, -1, 2).expand(num_grid, -1, -1) return grid.view(1, -1, 2).expand(num_grid, -1, -1)
def rel_roi_point_to_abs_img_point(rois, rel_roi_points): def rel_roi_point_to_abs_img_point(rois: Tensor,
rel_roi_points: Tensor) -> Tensor:
"""Convert roi based relative point coordinates to image based absolute """Convert roi based relative point coordinates to image based absolute
point coordinates. point coordinates.
Args: Args:
rois (Tensor): RoIs or BBoxes, shape (N, 4) or (N, 5) rois (torch.Tensor): RoIs or BBoxes, shape (N, 4) or (N, 5)
rel_roi_points (Tensor): Point coordinates inside RoI, relative to rel_roi_points (torch.Tensor): Point coordinates inside RoI, relative
RoI, location, range (0, 1), shape (N, P, 2) to RoI, location, range (0, 1), shape (N, P, 2)
Returns: Returns:
Tensor: Image based absolute point coordinates, shape (N, P, 2) torch.Tensor: Image based absolute point coordinates, shape (N, P, 2)
""" """
with torch.no_grad(): with torch.no_grad():
...@@ -165,12 +176,13 @@ def rel_roi_point_to_abs_img_point(rois, rel_roi_points): ...@@ -165,12 +176,13 @@ def rel_roi_point_to_abs_img_point(rois, rel_roi_points):
return abs_img_points return abs_img_points
def get_shape_from_feature_map(x): def get_shape_from_feature_map(x: Tensor) -> Tensor:
"""Get spatial resolution of input feature map considering exporting to """Get spatial resolution of input feature map considering exporting to
onnx mode. onnx mode.
Args: Args:
x (torch.Tensor): Input tensor, shape (N, C, H, W) x (torch.Tensor): Input tensor, shape (N, C, H, W)
Returns: Returns:
torch.Tensor: Spatial resolution (width, height), shape (1, 1, 2) torch.Tensor: Spatial resolution (width, height), shape (1, 1, 2)
""" """
...@@ -183,19 +195,22 @@ def get_shape_from_feature_map(x): ...@@ -183,19 +195,22 @@ def get_shape_from_feature_map(x):
return img_shape return img_shape
def abs_img_point_to_rel_img_point(abs_img_points, img, spatial_scale=1.): def abs_img_point_to_rel_img_point(abs_img_points: Tensor,
img: Union[tuple, Tensor],
spatial_scale: float = 1.) -> Tensor:
"""Convert image based absolute point coordinates to image based relative """Convert image based absolute point coordinates to image based relative
coordinates for sampling. coordinates for sampling.
Args: Args:
abs_img_points (Tensor): Image based absolute point coordinates, abs_img_points (torch.Tensor): Image based absolute point coordinates,
shape (N, P, 2) shape (N, P, 2)
img (tuple/Tensor): (height, width) of image or feature map. img (tuple or torch.Tensor): (height, width) of image or feature map.
spatial_scale (float): Scale points by this factor. Default: 1. spatial_scale (float, optional): Scale points by this factor.
Default: 1.
Returns: Returns:
Tensor: Image based relative point coordinates for sampling, Tensor: Image based relative point coordinates for sampling, shape
shape (N, P, 2) (N, P, 2).
""" """
assert (isinstance(img, tuple) and len(img) == 2) or \ assert (isinstance(img, tuple) and len(img) == 2) or \
...@@ -213,23 +228,24 @@ def abs_img_point_to_rel_img_point(abs_img_points, img, spatial_scale=1.): ...@@ -213,23 +228,24 @@ def abs_img_point_to_rel_img_point(abs_img_points, img, spatial_scale=1.):
return abs_img_points / scale * spatial_scale return abs_img_points / scale * spatial_scale
def rel_roi_point_to_rel_img_point(rois, def rel_roi_point_to_rel_img_point(rois: Tensor,
rel_roi_points, rel_roi_points: Tensor,
img, img: Union[tuple, Tensor],
spatial_scale=1.): spatial_scale: float = 1.) -> Tensor:
"""Convert roi based relative point coordinates to image based absolute """Convert roi based relative point coordinates to image based absolute
point coordinates. point coordinates.
Args: Args:
rois (Tensor): RoIs or BBoxes, shape (N, 4) or (N, 5) rois (torch.Tensor): RoIs or BBoxes, shape (N, 4) or (N, 5)
rel_roi_points (Tensor): Point coordinates inside RoI, relative to rel_roi_points (torch.Tensor): Point coordinates inside RoI, relative
RoI, location, range (0, 1), shape (N, P, 2) to RoI, location, range (0, 1), shape (N, P, 2)
img (tuple/Tensor): (height, width) of image or feature map. img (tuple or torch.Tensor): (height, width) of image or feature map.
spatial_scale (float): Scale points by this factor. Default: 1. spatial_scale (float, optional): Scale points by this factor.
Default: 1.
Returns: Returns:
Tensor: Image based relative point coordinates for sampling, torch.Tensor: Image based relative point coordinates for sampling,
shape (N, P, 2) shape (N, P, 2).
""" """
abs_img_point = rel_roi_point_to_abs_img_point(rois, rel_roi_points) abs_img_point = rel_roi_point_to_abs_img_point(rois, rel_roi_points)
...@@ -239,20 +255,25 @@ def rel_roi_point_to_rel_img_point(rois, ...@@ -239,20 +255,25 @@ def rel_roi_point_to_rel_img_point(rois,
return rel_img_point return rel_img_point
def point_sample(input, points, align_corners=False, **kwargs): def point_sample(input: Tensor,
points: Tensor,
align_corners: bool = False,
**kwargs) -> Tensor:
"""A wrapper around :func:`grid_sample` to support 3D point_coords tensors """A wrapper around :func:`grid_sample` to support 3D point_coords tensors
Unlike :func:`torch.nn.functional.grid_sample` it assumes point_coords to Unlike :func:`torch.nn.functional.grid_sample` it assumes point_coords to
lie inside ``[0, 1] x [0, 1]`` square. lie inside ``[0, 1] x [0, 1]`` square.
Args: Args:
input (Tensor): Feature map, shape (N, C, H, W). input (torch.Tensor): Feature map, shape (N, C, H, W).
points (Tensor): Image based absolute point coordinates (normalized), points (torch.Tensor): Image based absolute point coordinates
range [0, 1] x [0, 1], shape (N, P, 2) or (N, Hgrid, Wgrid, 2). (normalized), range [0, 1] x [0, 1], shape (N, P, 2) or
align_corners (bool): Whether align_corners. Default: False (N, Hgrid, Wgrid, 2).
align_corners (bool, optional): Whether align_corners.
Default: False
Returns: Returns:
Tensor: Features of `point` on `input`, shape (N, C, P) or torch.Tensor: Features of `point` on `input`, shape (N, C, P) or
(N, C, Hgrid, Wgrid). (N, C, Hgrid, Wgrid).
""" """
add_dim = False add_dim = False
...@@ -275,7 +296,10 @@ def point_sample(input, points, align_corners=False, **kwargs): ...@@ -275,7 +296,10 @@ def point_sample(input, points, align_corners=False, **kwargs):
class SimpleRoIAlign(nn.Module): class SimpleRoIAlign(nn.Module):
def __init__(self, output_size, spatial_scale, aligned=True): def __init__(self,
output_size: Tuple[int],
spatial_scale: float,
aligned: bool = True) -> None:
"""Simple RoI align in PointRend, faster than standard RoIAlign. """Simple RoI align in PointRend, faster than standard RoIAlign.
Args: Args:
...@@ -286,14 +310,14 @@ class SimpleRoIAlign(nn.Module): ...@@ -286,14 +310,14 @@ class SimpleRoIAlign(nn.Module):
If True, align the results more perfectly. If True, align the results more perfectly.
""" """
super(SimpleRoIAlign, self).__init__() super().__init__()
self.output_size = _pair(output_size) self.output_size = _pair(output_size)
self.spatial_scale = float(spatial_scale) self.spatial_scale = float(spatial_scale)
# to be consistent with other RoI ops # to be consistent with other RoI ops
self.use_torchvision = False self.use_torchvision = False
self.aligned = aligned self.aligned = aligned
def forward(self, features, rois): def forward(self, features: Tensor, rois: Tensor) -> Tensor:
num_imgs = features.size(0) num_imgs = features.size(0)
num_rois = rois.size(0) num_rois = rois.size(0)
rel_roi_points = generate_grid( rel_roi_points = generate_grid(
...@@ -329,7 +353,7 @@ class SimpleRoIAlign(nn.Module): ...@@ -329,7 +353,7 @@ class SimpleRoIAlign(nn.Module):
return roi_feats return roi_feats
def __repr__(self): def __repr__(self) -> str:
format_str = self.__class__.__name__ format_str = self.__class__.__name__
format_str += '(output_size={}, spatial_scale={}'.format( format_str += '(output_size={}, spatial_scale={}'.format(
self.output_size, self.spatial_scale) self.output_size, self.spatial_scale)
......
import torch import torch
from torch import Tensor
from ..utils import ext_loader from ..utils import ext_loader
...@@ -8,17 +9,18 @@ ext_module = ext_loader.load_ext('_ext', [ ...@@ -8,17 +9,18 @@ ext_module = ext_loader.load_ext('_ext', [
]) ])
def points_in_boxes_part(points, boxes): def points_in_boxes_part(points: Tensor, boxes: Tensor) -> Tensor:
"""Find the box in which each point is (CUDA). """Find the box in which each point is (CUDA).
Args: Args:
points (torch.Tensor): [B, M, 3], [x, y, z] in LiDAR/DEPTH coordinate points (torch.Tensor): [B, M, 3], [x, y, z] in LiDAR/DEPTH coordinate.
boxes (torch.Tensor): [B, T, 7], boxes (torch.Tensor): [B, T, 7],
num_valid_boxes <= T, [x, y, z, x_size, y_size, z_size, rz] in num_valid_boxes <= T, [x, y, z, x_size, y_size, z_size, rz] in
LiDAR/DEPTH coordinate, (x, y, z) is the bottom center LiDAR/DEPTH coordinate, (x, y, z) is the bottom center.
Returns: Returns:
box_idxs_of_pts (torch.Tensor): (B, M), default background = -1 torch.Tensor: Return the box indices of points with the shape of
(B, M). Default background = -1.
""" """
assert points.shape[0] == boxes.shape[0], \ assert points.shape[0] == boxes.shape[0], \
'Points and boxes should have the same batch size, ' \ 'Points and boxes should have the same batch size, ' \
...@@ -55,7 +57,7 @@ def points_in_boxes_part(points, boxes): ...@@ -55,7 +57,7 @@ def points_in_boxes_part(points, boxes):
return box_idxs_of_pts return box_idxs_of_pts
def points_in_boxes_cpu(points, boxes): def points_in_boxes_cpu(points: Tensor, boxes: Tensor) -> Tensor:
"""Find all boxes in which each point is (CPU). The CPU version of """Find all boxes in which each point is (CPU). The CPU version of
:meth:`points_in_boxes_all`. :meth:`points_in_boxes_all`.
...@@ -67,7 +69,8 @@ def points_in_boxes_cpu(points, boxes): ...@@ -67,7 +69,8 @@ def points_in_boxes_cpu(points, boxes):
(x, y, z) is the bottom center. (x, y, z) is the bottom center.
Returns: Returns:
box_idxs_of_pts (torch.Tensor): (B, M, T), default background = 0. torch.Tensor: Return the box indices of points with the shape of
(B, M, T). Default background = 0.
""" """
assert points.shape[0] == boxes.shape[0], \ assert points.shape[0] == boxes.shape[0], \
'Points and boxes should have the same batch size, ' \ 'Points and boxes should have the same batch size, ' \
...@@ -92,7 +95,7 @@ def points_in_boxes_cpu(points, boxes): ...@@ -92,7 +95,7 @@ def points_in_boxes_cpu(points, boxes):
return point_indices return point_indices
def points_in_boxes_all(points, boxes): def points_in_boxes_all(points: Tensor, boxes: Tensor) -> Tensor:
"""Find all boxes in which each point is (CUDA). """Find all boxes in which each point is (CUDA).
Args: Args:
...@@ -102,7 +105,8 @@ def points_in_boxes_all(points, boxes): ...@@ -102,7 +105,8 @@ def points_in_boxes_all(points, boxes):
(x, y, z) is the bottom center. (x, y, z) is the bottom center.
Returns: Returns:
box_idxs_of_pts (torch.Tensor): (B, M, T), default background = 0. torch.Tensor: Return the box indices of points with the shape of
(B, M, T). Default background = 0.
""" """
assert boxes.shape[0] == points.shape[0], \ assert boxes.shape[0] == points.shape[0], \
'Points and boxes should have the same batch size, ' \ 'Points and boxes should have the same batch size, ' \
......
import torch
from torch import Tensor
from ..utils import ext_loader
ext_module = ext_loader.load_ext('_ext', ['points_in_polygons_forward'])
def points_in_polygons(points: Tensor, polygons: Tensor) -> Tensor:
"""Judging whether points are inside polygons, which is used in the ATSS
assignment for the rotated boxes.
It should be noted that when the point is just at the polygon boundary, the
judgment will be inaccurate, but the effect on assignment is limited.
Args:
points (torch.Tensor): It has shape (B, 2), indicating (x, y).
M means the number of predicted points.
polygons (torch.Tensor): It has shape (M, 8), indicating
(x1, y1, x2, y2, x3, y3, x4, y4). M means the number of
ground truth polygons.
Returns:
torch.Tensor: Return the result with the shape of (B, M),
1 indicates that the point is inside the polygon,
0 indicates that the point is outside the polygon.
"""
assert points.shape[1] == 2, \
'points dimension should be 2, ' \
f'but got unexpected shape {points.shape[1]}'
assert polygons.shape[1] == 8, \
'polygons dimension should be 8, ' \
f'but got unexpected shape {polygons.shape[1]}'
output = torch.full([points.shape[0], polygons.shape[0]],
0.).cuda().float()
ext_module.points_in_polygons_forward(points.contiguous(),
polygons.contiguous(), output)
return output
from typing import List from typing import List
import torch import torch
from torch import Tensor
from torch import nn as nn from torch import nn as nn
from mmcv.runner import force_fp32 from mmcv.runner import force_fp32
...@@ -8,17 +9,19 @@ from .furthest_point_sample import (furthest_point_sample, ...@@ -8,17 +9,19 @@ from .furthest_point_sample import (furthest_point_sample,
furthest_point_sample_with_dist) furthest_point_sample_with_dist)
def calc_square_dist(point_feat_a, point_feat_b, norm=True): def calc_square_dist(point_feat_a: Tensor,
point_feat_b: Tensor,
norm: bool = True) -> Tensor:
"""Calculating square distance between a and b. """Calculating square distance between a and b.
Args: Args:
point_feat_a (Tensor): (B, N, C) Feature vector of each point. point_feat_a (torch.Tensor): (B, N, C) Feature vector of each point.
point_feat_b (Tensor): (B, M, C) Feature vector of each point. point_feat_b (torch.Tensor): (B, M, C) Feature vector of each point.
norm (Bool, optional): Whether to normalize the distance. norm (bool, optional): Whether to normalize the distance.
Default: True. Default: True.
Returns: Returns:
Tensor: (B, N, M) Distance between each pair points. torch.Tensor: (B, N, M) Square distance between each point pair.
""" """
num_channel = point_feat_a.shape[-1] num_channel = point_feat_a.shape[-1]
# [bs, n, 1] # [bs, n, 1]
...@@ -34,7 +37,7 @@ def calc_square_dist(point_feat_a, point_feat_b, norm=True): ...@@ -34,7 +37,7 @@ def calc_square_dist(point_feat_a, point_feat_b, norm=True):
return dist return dist
def get_sampler_cls(sampler_type): def get_sampler_cls(sampler_type: str) -> nn.Module:
"""Get the type and mode of points sampler. """Get the type and mode of points sampler.
Args: Args:
...@@ -74,7 +77,7 @@ class PointsSampler(nn.Module): ...@@ -74,7 +77,7 @@ class PointsSampler(nn.Module):
def __init__(self, def __init__(self,
num_point: List[int], num_point: List[int],
fps_mod_list: List[str] = ['D-FPS'], fps_mod_list: List[str] = ['D-FPS'],
fps_sample_range_list: List[int] = [-1]): fps_sample_range_list: List[int] = [-1]) -> None:
super().__init__() super().__init__()
# FPS would be applied to different fps_mod in the list, # FPS would be applied to different fps_mod in the list,
# so the length of the num_point should be equal to # so the length of the num_point should be equal to
...@@ -89,18 +92,18 @@ class PointsSampler(nn.Module): ...@@ -89,18 +92,18 @@ class PointsSampler(nn.Module):
self.fp16_enabled = False self.fp16_enabled = False
@force_fp32() @force_fp32()
def forward(self, points_xyz, features): def forward(self, points_xyz: Tensor, features: Tensor) -> Tensor:
""" """
Args: Args:
points_xyz (Tensor): (B, N, 3) xyz coordinates of the features. points_xyz (torch.Tensor): (B, N, 3) xyz coordinates of
features (Tensor): (B, C, N) Descriptors of the features. the points.
features (torch.Tensor): (B, C, N) features of the points.
Returns: Returns:
Tensor: (B, npoint, sample_num) Indices of sampled points. torch.Tensor: (B, npoint, sample_num) Indices of sampled points.
""" """
indices = [] indices = []
last_fps_end_index = 0 last_fps_end_index = 0
for fps_sample_range, sampler, npoint in zip( for fps_sample_range, sampler, npoint in zip(
self.fps_sample_range_list, self.samplers, self.num_point): self.fps_sample_range_list, self.samplers, self.num_point):
assert fps_sample_range < points_xyz.shape[1] assert fps_sample_range < points_xyz.shape[1]
...@@ -112,8 +115,8 @@ class PointsSampler(nn.Module): ...@@ -112,8 +115,8 @@ class PointsSampler(nn.Module):
else: else:
sample_features = None sample_features = None
else: else:
sample_points_xyz = \ sample_points_xyz = points_xyz[:, last_fps_end_index:
points_xyz[:, last_fps_end_index:fps_sample_range] fps_sample_range]
if features is not None: if features is not None:
sample_features = features[:, :, last_fps_end_index: sample_features = features[:, :, last_fps_end_index:
fps_sample_range] fps_sample_range]
...@@ -124,7 +127,7 @@ class PointsSampler(nn.Module): ...@@ -124,7 +127,7 @@ class PointsSampler(nn.Module):
npoint) npoint)
indices.append(fps_idx + last_fps_end_index) indices.append(fps_idx + last_fps_end_index)
last_fps_end_index += fps_sample_range last_fps_end_index = fps_sample_range
indices = torch.cat(indices, dim=1) indices = torch.cat(indices, dim=1)
return indices return indices
...@@ -133,10 +136,10 @@ class PointsSampler(nn.Module): ...@@ -133,10 +136,10 @@ class PointsSampler(nn.Module):
class DFPSSampler(nn.Module): class DFPSSampler(nn.Module):
"""Using Euclidean distances of points for FPS.""" """Using Euclidean distances of points for FPS."""
def __init__(self): def __init__(self) -> None:
super().__init__() super().__init__()
def forward(self, points, features, npoint): def forward(self, points: Tensor, features: Tensor, npoint: int) -> Tensor:
"""Sampling points with D-FPS.""" """Sampling points with D-FPS."""
fps_idx = furthest_point_sample(points.contiguous(), npoint) fps_idx = furthest_point_sample(points.contiguous(), npoint)
return fps_idx return fps_idx
...@@ -145,10 +148,10 @@ class DFPSSampler(nn.Module): ...@@ -145,10 +148,10 @@ class DFPSSampler(nn.Module):
class FFPSSampler(nn.Module): class FFPSSampler(nn.Module):
"""Using feature distances for FPS.""" """Using feature distances for FPS."""
def __init__(self): def __init__(self) -> None:
super().__init__() super().__init__()
def forward(self, points, features, npoint): def forward(self, points: Tensor, features: Tensor, npoint: int) -> Tensor:
"""Sampling points with F-FPS.""" """Sampling points with F-FPS."""
assert features is not None, \ assert features is not None, \
'feature input to FFPS_Sampler should not be None' 'feature input to FFPS_Sampler should not be None'
...@@ -162,10 +165,10 @@ class FFPSSampler(nn.Module): ...@@ -162,10 +165,10 @@ class FFPSSampler(nn.Module):
class FSSampler(nn.Module): class FSSampler(nn.Module):
"""Using F-FPS and D-FPS simultaneously.""" """Using F-FPS and D-FPS simultaneously."""
def __init__(self): def __init__(self) -> None:
super().__init__() super().__init__()
def forward(self, points, features, npoint): def forward(self, points: Tensor, features: Tensor, npoint: int) -> Tensor:
"""Sampling points with FS_Sampling.""" """Sampling points with FS_Sampling."""
assert features is not None, \ assert features is not None, \
'feature input to FS_Sampler should not be None' 'feature input to FS_Sampler should not be None'
......
# Copyright (c) OpenMMLab. All rights reserved.
from typing import Tuple, Union
import torch
import torch.nn as nn
from torch.autograd import Function
from torch.autograd.function import once_differentiable
from torch.nn.modules.utils import _pair
from ..utils import ext_loader
ext_module = ext_loader.load_ext(
'_ext',
['prroi_pool_forward', 'prroi_pool_backward', 'prroi_pool_coor_backward'])
class PrRoIPoolFunction(Function):
@staticmethod
def symbolic(g, features, rois, output_size, spatial_scale):
return g.op(
'mmcv::PrRoIPool',
features,
rois,
pooled_height_i=int(output_size[0]),
pooled_width_i=int(output_size[1]),
spatial_scale_f=float(spatial_scale))
@staticmethod
def forward(ctx,
features: torch.Tensor,
rois: torch.Tensor,
output_size: Tuple,
spatial_scale: float = 1.0) -> torch.Tensor:
if 'FloatTensor' not in features.type(
) or 'FloatTensor' not in rois.type():
raise ValueError(
'Precise RoI Pooling only takes float input, got '
f'{features.type()} for features and {rois.type()} for rois.')
pooled_height = int(output_size[0])
pooled_width = int(output_size[1])
spatial_scale = float(spatial_scale)
features = features.contiguous()
rois = rois.contiguous()
output_shape = (rois.size(0), features.size(1), pooled_height,
pooled_width)
output = features.new_zeros(output_shape)
params = (pooled_height, pooled_width, spatial_scale)
ext_module.prroi_pool_forward(features, rois, output, *params)
ctx.params = params
# everything here is contiguous.
ctx.save_for_backward(features, rois, output)
return output
@staticmethod
@once_differentiable
def backward(
ctx, grad_output: torch.Tensor
) -> Tuple[torch.Tensor, torch.Tensor, None, None, None]:
features, rois, output = ctx.saved_tensors
grad_input = grad_output.new_zeros(*features.shape)
grad_coor = grad_output.new_zeros(*rois.shape)
if features.requires_grad:
grad_output = grad_output.contiguous()
ext_module.prroi_pool_backward(grad_output, rois, grad_input,
*ctx.params)
if rois.requires_grad:
grad_output = grad_output.contiguous()
ext_module.prroi_pool_coor_backward(output, grad_output, features,
rois, grad_coor, *ctx.params)
return grad_input, grad_coor, None, None, None
prroi_pool = PrRoIPoolFunction.apply
class PrRoIPool(nn.Module):
"""The operation of precision RoI pooling. The implementation of PrRoIPool
is modified from https://github.com/vacancy/PreciseRoIPooling/
Precise RoI Pooling (PrRoIPool) is an integration-based (bilinear
interpolation) average pooling method for RoI Pooling. It avoids any
quantization and has a continuous gradient on bounding box coordinates.
It is:
1. different from the original RoI Pooling proposed in Fast R-CNN. PrRoI
Pooling uses average pooling instead of max pooling for each bin and has a
continuous gradient on bounding box coordinates. That is, one can take the
derivatives of some loss function w.r.t the coordinates of each RoI and
optimize the RoI coordinates.
2. different from the RoI Align proposed in Mask R-CNN. PrRoI Pooling uses
a full integration-based average pooling instead of sampling a constant
number of points. This makes the gradient w.r.t. the coordinates
continuous.
Args:
output_size (Union[int, tuple]): h, w.
spatial_scale (float, optional): scale the input boxes by this number.
Defaults to 1.0.
"""
def __init__(self,
output_size: Union[int, tuple],
spatial_scale: float = 1.0):
super().__init__()
self.output_size = _pair(output_size)
self.spatial_scale = float(spatial_scale)
def forward(self, features: torch.Tensor,
rois: torch.Tensor) -> torch.Tensor:
"""Forward function.
Args:
features (torch.Tensor): The feature map.
rois (torch.Tensor): The RoI bboxes in [tl_x, tl_y, br_x, br_y]
format.
Returns:
torch.Tensor: The pooled results.
"""
return prroi_pool(features, rois, self.output_size, self.spatial_scale)
def __repr__(self):
s = self.__class__.__name__
s += f'(output_size={self.output_size}, '
s += f'spatial_scale={self.spatial_scale})'
return s
# Modified from https://github.com/hszhao/semseg/blob/master/lib/psa # Modified from https://github.com/hszhao/semseg/blob/master/lib/psa
from typing import Optional, Tuple
import torch
from torch import nn from torch import nn
from torch.autograd import Function from torch.autograd import Function
from torch.nn.modules.utils import _pair from torch.nn.modules.utils import _pair
...@@ -20,7 +23,8 @@ class PSAMaskFunction(Function): ...@@ -20,7 +23,8 @@ class PSAMaskFunction(Function):
mask_size_i=mask_size) mask_size_i=mask_size)
@staticmethod @staticmethod
def forward(ctx, input, psa_type, mask_size): def forward(ctx, input: torch.Tensor, psa_type: str,
mask_size: int) -> torch.Tensor:
ctx.psa_type = psa_type ctx.psa_type = psa_type
ctx.mask_size = _pair(mask_size) ctx.mask_size = _pair(mask_size)
ctx.save_for_backward(input) ctx.save_for_backward(input)
...@@ -45,7 +49,9 @@ class PSAMaskFunction(Function): ...@@ -45,7 +49,9 @@ class PSAMaskFunction(Function):
return output return output
@staticmethod @staticmethod
def backward(ctx, grad_output): def backward(
ctx, grad_output: torch.Tensor
) -> Tuple[torch.Tensor, None, None, None]:
input = ctx.saved_tensors[0] input = ctx.saved_tensors[0]
psa_type = ctx.psa_type psa_type = ctx.psa_type
h_mask, w_mask = ctx.mask_size h_mask, w_mask = ctx.mask_size
...@@ -71,8 +77,8 @@ psa_mask = PSAMaskFunction.apply ...@@ -71,8 +77,8 @@ psa_mask = PSAMaskFunction.apply
class PSAMask(nn.Module): class PSAMask(nn.Module):
def __init__(self, psa_type, mask_size=None): def __init__(self, psa_type: str, mask_size: Optional[tuple] = None):
super(PSAMask, self).__init__() super().__init__()
assert psa_type in ['collect', 'distribute'] assert psa_type in ['collect', 'distribute']
if psa_type == 'collect': if psa_type == 'collect':
psa_type_enum = 0 psa_type_enum = 0
...@@ -82,7 +88,7 @@ class PSAMask(nn.Module): ...@@ -82,7 +88,7 @@ class PSAMask(nn.Module):
self.mask_size = mask_size self.mask_size = mask_size
self.psa_type = psa_type self.psa_type = psa_type
def forward(self, input): def forward(self, input: torch.Tensor) -> torch.Tensor:
return psa_mask(input, self.psa_type_enum, self.mask_size) return psa_mask(input, self.psa_type_enum, self.mask_size)
def __repr__(self): def __repr__(self):
......
# Copyright (c) OpenMMLab. All rights reserved.
from typing import Any, Optional, Tuple, Union
import torch
import torch.nn as nn
from torch.autograd import Function
from ..utils import ext_loader, is_tuple_of
ext_module = ext_loader.load_ext(
'_ext', ['riroi_align_rotated_forward', 'riroi_align_rotated_backward'])
class RiRoIAlignRotatedFunction(Function):
@staticmethod
def forward(ctx: Any,
features: torch.Tensor,
rois: torch.Tensor,
out_size: Union[int, tuple],
spatial_scale: float,
num_samples: int = 0,
num_orientations: int = 8,
clockwise: bool = False) -> torch.Tensor:
if isinstance(out_size, int):
out_h = out_size
out_w = out_size
elif is_tuple_of(out_size, int):
assert len(out_size) == 2
out_h, out_w = out_size
else:
raise TypeError(
f'"out_size" should be an integer or tuple of integers,'
f' but got {out_size}')
ctx.spatial_scale = spatial_scale
ctx.num_samples = num_samples
ctx.num_orientations = num_orientations
ctx.clockwise = clockwise
ctx.save_for_backward(rois)
ctx.feature_size = features.size()
batch_size, num_channels, _, _ = features.size()
num_rois = rois.size(0)
output = features.new_zeros(num_rois, num_channels, out_h, out_w)
ext_module.riroi_align_rotated_forward(
features,
rois,
output,
pooled_height=out_h,
pooled_width=out_w,
spatial_scale=spatial_scale,
num_samples=num_samples,
num_orientations=num_orientations,
clockwise=clockwise)
return output
@staticmethod
def backward(
ctx: Any, grad_output: torch.Tensor
) -> Optional[Tuple[torch.Tensor, None, None, None, None, None, None]]:
feature_size = ctx.feature_size
spatial_scale = ctx.spatial_scale
num_orientations = ctx.num_orientations
clockwise = ctx.clockwise
num_samples = ctx.num_samples
rois = ctx.saved_tensors[0]
assert feature_size is not None
batch_size, num_channels, feature_h, feature_w = feature_size
out_w = grad_output.size(3)
out_h = grad_output.size(2)
grad_input = None
if ctx.needs_input_grad[0]:
grad_input = rois.new_zeros(batch_size, num_channels, feature_h,
feature_w)
ext_module.riroi_align_rotated_backward(
grad_output.contiguous(),
rois,
grad_input,
pooled_height=out_h,
pooled_width=out_w,
spatial_scale=spatial_scale,
num_samples=num_samples,
num_orientations=num_orientations,
clockwise=clockwise)
return grad_input, None, None, None, None, None, None
return None
riroi_align_rotated = RiRoIAlignRotatedFunction.apply
class RiRoIAlignRotated(nn.Module):
"""Rotation-invariant RoI align pooling layer for rotated proposals.
It accepts a feature map of shape (N, C, H, W) and rois with shape
(n, 6) with each roi decoded as (batch_index, center_x, center_y,
w, h, angle). The angle is in radian.
The details are described in the paper `ReDet: A Rotation-equivariant
Detector for Aerial Object Detection <https://arxiv.org/abs/2103.07733>`_.
Args:
out_size (tuple): fixed dimensional RoI output with shape (h, w).
spatial_scale (float): scale the input boxes by this number
num_samples (int): number of inputs samples to take for each
output sample. 0 to take samples densely for current models.
num_orientations (int): number of oriented channels.
clockwise (bool): If True, the angle in each proposal follows a
clockwise fashion in image space, otherwise, the angle is
counterclockwise. Default: False.
"""
def __init__(self,
out_size: tuple,
spatial_scale: float,
num_samples: int = 0,
num_orientations: int = 8,
clockwise: bool = False):
super().__init__()
self.out_size = out_size
self.spatial_scale = float(spatial_scale)
self.num_samples = int(num_samples)
self.num_orientations = int(num_orientations)
self.clockwise = clockwise
def forward(self, features: torch.Tensor,
rois: torch.Tensor) -> torch.Tensor:
return RiRoIAlignRotatedFunction.apply(features, rois, self.out_size,
self.spatial_scale,
self.num_samples,
self.num_orientations,
self.clockwise)
Markdown is supported
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment