Commit fdeee889 authored by limm's avatar limm
Browse files

release v1.6.1 of mmcv

parent df465820
......@@ -18,11 +18,11 @@ class FurthestPointSampling(Function):
num_points: int) -> torch.Tensor:
"""
Args:
points_xyz (Tensor): (B, N, 3) where N > num_points.
points_xyz (torch.Tensor): (B, N, 3) where N > num_points.
num_points (int): Number of points in the sampled set.
Returns:
Tensor: (B, num_points) indices of the sampled points.
torch.Tensor: (B, num_points) indices of the sampled points.
"""
assert points_xyz.is_contiguous()
......@@ -56,11 +56,12 @@ class FurthestPointSamplingWithDist(Function):
num_points: int) -> torch.Tensor:
"""
Args:
points_dist (Tensor): (B, N, N) Distance between each point pair.
points_dist (torch.Tensor): (B, N, N) Distance between each point
pair.
num_points (int): Number of points in the sampled set.
Returns:
Tensor: (B, num_points) indices of the sampled points.
torch.Tensor: (B, num_points) indices of the sampled points.
"""
assert points_dist.is_contiguous()
......
......@@ -113,7 +113,8 @@ class FusedBiasLeakyReLUFunctionBackward(Function):
"""
@staticmethod
def forward(ctx, grad_output, out, negative_slope, scale):
def forward(ctx, grad_output: torch.Tensor, out: torch.Tensor,
negative_slope: float, scale: float) -> tuple:
ctx.save_for_backward(out)
ctx.negative_slope = negative_slope
ctx.scale = scale
......@@ -139,7 +140,8 @@ class FusedBiasLeakyReLUFunctionBackward(Function):
return grad_input, grad_bias
@staticmethod
def backward(ctx, gradgrad_input, gradgrad_bias):
def backward(ctx, gradgrad_input: torch.Tensor,
gradgrad_bias: nn.Parameter) -> tuple:
out, = ctx.saved_tensors
# The second order deviation, in fact, contains two parts, while the
......@@ -160,7 +162,8 @@ class FusedBiasLeakyReLUFunctionBackward(Function):
class FusedBiasLeakyReLUFunction(Function):
@staticmethod
def forward(ctx, input, bias, negative_slope, scale):
def forward(ctx, input: torch.Tensor, bias: nn.Parameter,
negative_slope: float, scale: float) -> torch.Tensor:
empty = input.new_empty(0)
out = ext_module.fused_bias_leakyrelu(
......@@ -178,7 +181,7 @@ class FusedBiasLeakyReLUFunction(Function):
return out
@staticmethod
def backward(ctx, grad_output):
def backward(ctx, grad_output: torch.Tensor) -> tuple:
out, = ctx.saved_tensors
grad_input, grad_bias = FusedBiasLeakyReLUFunctionBackward.apply(
......@@ -188,51 +191,59 @@ class FusedBiasLeakyReLUFunction(Function):
class FusedBiasLeakyReLU(nn.Module):
"""Fused bias leaky ReLU.
r"""Fused bias leaky ReLU.
This function is introduced in the StyleGAN2:
http://arxiv.org/abs/1912.04958
`Analyzing and Improving the Image Quality of StyleGAN
<http://arxiv.org/abs/1912.04958>`_
The bias term comes from the convolution operation. In addition, to keep
the variance of the feature map or gradients unchanged, they also adopt a
scale similarly with Kaiming initialization. However, since the
:math:`1+{alpha}^2` : is too small, we can just ignore it. Therefore, the
final scale is just :math:`\sqrt{2}`:. Of course, you may change it with # noqa: W605, E501
:math:`1+{alpha}^2` is too small, we can just ignore it. Therefore, the
final scale is just :math:`\sqrt{2}`. Of course, you may change it with
your own scale.
TODO: Implement the CPU version.
Args:
channel (int): The channel number of the feature map.
num_channels (int): The channel number of the feature map.
negative_slope (float, optional): Same as nn.LeakyRelu.
Defaults to 0.2.
scale (float, optional): A scalar to adjust the variance of the feature
map. Defaults to 2**0.5.
"""
def __init__(self, num_channels, negative_slope=0.2, scale=2**0.5):
super(FusedBiasLeakyReLU, self).__init__()
def __init__(self,
num_channels: int,
negative_slope: float = 0.2,
scale: float = 2**0.5):
super().__init__()
self.bias = nn.Parameter(torch.zeros(num_channels))
self.negative_slope = negative_slope
self.scale = scale
def forward(self, input):
def forward(self, input: torch.Tensor) -> torch.Tensor:
return fused_bias_leakyrelu(input, self.bias, self.negative_slope,
self.scale)
def fused_bias_leakyrelu(input, bias, negative_slope=0.2, scale=2**0.5):
"""Fused bias leaky ReLU function.
def fused_bias_leakyrelu(input: torch.Tensor,
bias: nn.Parameter,
negative_slope: float = 0.2,
scale: float = 2**0.5) -> torch.Tensor:
r"""Fused bias leaky ReLU function.
This function is introduced in the StyleGAN2:
http://arxiv.org/abs/1912.04958
`Analyzing and Improving the Image Quality of StyleGAN
<http://arxiv.org/abs/1912.04958>`_
The bias term comes from the convolution operation. In addition, to keep
the variance of the feature map or gradients unchanged, they also adopt a
scale similarly with Kaiming initialization. However, since the
:math:`1+{alpha}^2` : is too small, we can just ignore it. Therefore, the
final scale is just :math:`\sqrt{2}`:. Of course, you may change it with # noqa: W605, E501
:math:`1+{alpha}^2` is too small, we can just ignore it. Therefore, the
final scale is just :math:`\sqrt{2}`. Of course, you may change it with
your own scale.
Args:
......@@ -254,7 +265,10 @@ def fused_bias_leakyrelu(input, bias, negative_slope=0.2, scale=2**0.5):
negative_slope, scale)
def bias_leakyrelu_ref(x, bias, negative_slope=0.2, scale=2**0.5):
def bias_leakyrelu_ref(x: torch.Tensor,
bias: nn.Parameter,
negative_slope: float = 0.2,
scale: float = 2**0.5) -> torch.Tensor:
if bias is not None:
assert bias.ndim == 1
......
from typing import Tuple
import torch
from torch.autograd import Function
......@@ -15,18 +17,18 @@ class GatherPoints(Function):
indices: torch.Tensor) -> torch.Tensor:
"""
Args:
features (Tensor): (B, C, N) features to gather.
indices (Tensor): (B, M) where M is the number of points.
features (torch.Tensor): (B, C, N) features to gather.
indices (torch.Tensor): (B, M) where M is the number of points.
Returns:
Tensor: (B, C, M) where M is the number of points.
torch.Tensor: (B, C, M) where M is the number of points.
"""
assert features.is_contiguous()
assert indices.is_contiguous()
B, npoint = indices.size()
_, C, N = features.size()
output = torch.cuda.FloatTensor(B, C, npoint)
output = features.new_zeros((B, C, npoint))
ext_module.gather_points_forward(
features, indices, output, b=B, c=C, n=N, npoints=npoint)
......@@ -37,11 +39,11 @@ class GatherPoints(Function):
return output
@staticmethod
def backward(ctx, grad_out):
def backward(ctx, grad_out: torch.Tensor) -> Tuple[torch.Tensor, None]:
idx, C, N = ctx.for_backwards
B, npoint = idx.size()
grad_features = torch.cuda.FloatTensor(B, C, N).zero_()
grad_features = grad_out.new_zeros((B, C, N))
grad_out_data = grad_out.data.contiguous()
ext_module.gather_points_backward(
grad_out_data,
......
# Copyright (c) OpenMMLab. All rights reserved.
from typing import Tuple
from typing import Optional, Tuple, Union
import torch
from torch import nn as nn
......@@ -37,15 +37,15 @@ class QueryAndGroup(nn.Module):
"""
def __init__(self,
max_radius,
sample_num,
min_radius=0,
use_xyz=True,
return_grouped_xyz=False,
normalize_xyz=False,
uniform_sample=False,
return_unique_cnt=False,
return_grouped_idx=False):
max_radius: float,
sample_num: int,
min_radius: float = 0.,
use_xyz: bool = True,
return_grouped_xyz: bool = False,
normalize_xyz: bool = False,
uniform_sample: bool = False,
return_unique_cnt: bool = False,
return_grouped_idx: bool = False):
super().__init__()
self.max_radius = max_radius
self.min_radius = min_radius
......@@ -64,15 +64,24 @@ class QueryAndGroup(nn.Module):
assert not self.normalize_xyz, \
'can not normalize grouped xyz when max_radius is None'
def forward(self, points_xyz, center_xyz, features=None):
def forward(
self,
points_xyz: torch.Tensor,
center_xyz: torch.Tensor,
features: Optional[torch.Tensor] = None,
) -> Union[torch.Tensor, Tuple]:
"""
Args:
points_xyz (Tensor): (B, N, 3) xyz coordinates of the features.
center_xyz (Tensor): (B, npoint, 3) coordinates of the centriods.
features (Tensor): (B, C, N) Descriptors of the features.
points_xyz (torch.Tensor): (B, N, 3) xyz coordinates of the
points.
center_xyz (torch.Tensor): (B, npoint, 3) coordinates of the
centriods.
features (torch.Tensor): (B, C, N) The features of grouped
points.
Returns:
Tensor: (B, 3 + C, npoint, sample_num) Grouped feature.
Tuple | torch.Tensor: (B, 3 + C, npoint, sample_num) Grouped
concatenated coordinates and features of points.
"""
# if self.max_radius is None, we will perform kNN instead of ball query
# idx is of shape [B, npoint, sample_num]
......@@ -145,7 +154,7 @@ class GroupAll(nn.Module):
def forward(self,
xyz: torch.Tensor,
new_xyz: torch.Tensor,
features: torch.Tensor = None):
features: Optional[torch.Tensor] = None) -> torch.Tensor:
"""
Args:
xyz (Tensor): (B, N, 3) xyz coordinates of the features.
......@@ -206,8 +215,7 @@ class GroupingOperation(Function):
return output
@staticmethod
def backward(ctx,
grad_out: torch.Tensor) -> Tuple[torch.Tensor, torch.Tensor]:
def backward(ctx, grad_out: torch.Tensor) -> Tuple[torch.Tensor, None]:
"""
Args:
grad_out (Tensor): (B, C, npoint, nsample) tensor of the gradients
......
# Copyright (c) OpenMMLab. All rights reserved.
import warnings
from typing import Optional
import torch
from torch import Tensor
from ..utils import ext_loader
ext_module = ext_loader.load_ext('_ext', [
'iou3d_boxes_iou_bev_forward', 'iou3d_nms_forward',
'iou3d_nms_normal_forward'
'iou3d_boxes_overlap_bev_forward', 'iou3d_nms3d_forward',
'iou3d_nms3d_normal_forward'
])
def boxes_iou_bev(boxes_a, boxes_b):
"""Calculate boxes IoU in the Bird's Eye View.
def boxes_overlap_bev(boxes_a: Tensor, boxes_b: Tensor) -> Tensor:
"""Calculate boxes BEV overlap.
Args:
boxes_a (torch.Tensor): Input boxes a with shape (M, 7).
boxes_b (torch.Tensor): Input boxes b with shape (N, 7).
Returns:
torch.Tensor: BEV overlap result with shape (M, N).
"""
ans_overlap = boxes_a.new_zeros(
torch.Size((boxes_a.shape[0], boxes_b.shape[0])))
ext_module.iou3d_boxes_overlap_bev_forward(boxes_a.contiguous(),
boxes_b.contiguous(),
ans_overlap)
return ans_overlap
def boxes_iou3d(boxes_a: Tensor, boxes_b: Tensor) -> Tensor:
"""Calculate boxes 3D IoU.
Args:
boxes_a (torch.Tensor): Input boxes a with shape (M, 5).
boxes_b (torch.Tensor): Input boxes b with shape (N, 5).
boxes_a (torch.Tensor): Input boxes a with shape (M, 7).
boxes_b (torch.Tensor): Input boxes b with shape (N, 7).
Returns:
ans_iou (torch.Tensor): IoU result with shape (M, N).
torch.Tensor: 3D IoU result with shape (M, N).
"""
ans_iou = boxes_a.new_zeros(
assert boxes_a.shape[1] == boxes_b.shape[1] == 7,\
'Input boxes shape should be (N, 7)'
boxes_a_height_max = (boxes_a[:, 2] + boxes_a[:, 5] / 2).view(-1, 1)
boxes_a_height_min = (boxes_a[:, 2] - boxes_a[:, 5] / 2).view(-1, 1)
boxes_b_height_max = (boxes_b[:, 2] + boxes_b[:, 5] / 2).view(1, -1)
boxes_b_height_min = (boxes_b[:, 2] - boxes_b[:, 5] / 2).view(1, -1)
overlaps_bev = boxes_a.new_zeros(
torch.Size((boxes_a.shape[0], boxes_b.shape[0])))
ext_module.iou3d_boxes_overlap_bev_forward(boxes_a.contiguous(),
boxes_b.contiguous(),
overlaps_bev)
max_of_min = torch.max(boxes_a_height_min, boxes_b_height_min)
min_of_max = torch.min(boxes_a_height_max, boxes_b_height_max)
overlaps_h = torch.clamp(min_of_max - max_of_min, min=0)
overlaps_3d = overlaps_bev * overlaps_h
vol_a = (boxes_a[:, 3] * boxes_a[:, 4] * boxes_a[:, 5]).view(-1, 1)
vol_b = (boxes_b[:, 3] * boxes_b[:, 4] * boxes_b[:, 5]).view(1, -1)
iou3d = overlaps_3d / torch.clamp(vol_a + vol_b - overlaps_3d, min=1e-6)
return iou3d
def nms3d(boxes: Tensor, scores: Tensor, iou_threshold: float) -> Tensor:
"""3D NMS function GPU implementation (for BEV boxes).
Args:
boxes (torch.Tensor): Input boxes with the shape of (N, 7)
([x, y, z, dx, dy, dz, heading]).
scores (torch.Tensor): Scores of boxes with the shape of (N).
iou_threshold (float): Overlap threshold of NMS.
Returns:
torch.Tensor: Indexes after NMS.
"""
assert boxes.size(1) == 7, 'Input boxes shape should be (N, 7)'
order = scores.sort(0, descending=True)[1]
boxes = boxes[order].contiguous()
keep = torch.zeros(boxes.size(0), dtype=torch.long)
num_out = torch.zeros(size=(), dtype=torch.long)
ext_module.iou3d_nms3d_forward(
boxes, keep, num_out, nms_overlap_thresh=iou_threshold)
keep = order[keep[:num_out].cuda(boxes.device)].contiguous()
return keep
def nms3d_normal(boxes: Tensor, scores: Tensor,
iou_threshold: float) -> Tensor:
"""Normal 3D NMS function GPU implementation. The overlap of two boxes for
IoU calculation is defined as the exact overlapping area of the two boxes
WITH their yaw angle set to 0.
Args:
boxes (torch.Tensor): Input boxes with shape (N, 7).
([x, y, z, dx, dy, dz, heading]).
scores (torch.Tensor): Scores of predicted boxes with shape (N).
iou_threshold (float): Overlap threshold of NMS.
Returns:
torch.Tensor: Remaining indices with scores in descending order.
"""
assert boxes.shape[1] == 7, 'Input boxes shape should be (N, 7)'
order = scores.sort(0, descending=True)[1]
boxes = boxes[order].contiguous()
ext_module.iou3d_boxes_iou_bev_forward(boxes_a.contiguous(),
boxes_b.contiguous(), ans_iou)
keep = torch.zeros(boxes.size(0), dtype=torch.long)
num_out = torch.zeros(size=(), dtype=torch.long)
ext_module.iou3d_nms3d_normal_forward(
boxes, keep, num_out, nms_overlap_thresh=iou_threshold)
return order[keep[:num_out].cuda(boxes.device)].contiguous()
def _xyxyr2xywhr(boxes: Tensor) -> Tensor:
"""Convert [x1, y1, x2, y2, heading] box to [x, y, dx, dy, heading] box.
Args:
box (torch.Tensor): Input boxes with shape (N, 5).
Returns:
torch.Tensor: Converted boxes with shape (N, 7).
"""
warnings.warn(
'This function is deprecated and will be removed in the future.',
DeprecationWarning)
return torch.stack(
((boxes[:, 0] + boxes[:, 2]) / 2, (boxes[:, 1] + boxes[:, 3]) / 2,
boxes[:, 2] - boxes[:, 0], boxes[:, 3] - boxes[:, 1], boxes[:, 4]),
dim=-1)
def boxes_iou_bev(boxes_a: Tensor, boxes_b: Tensor) -> Tensor:
"""Calculate boxes IoU in the Bird's Eye View.
Args:
boxes_a (torch.Tensor): Input boxes a with shape (M, 5)
([x1, y1, x2, y2, ry]).
boxes_b (torch.Tensor): Input boxes b with shape (N, 5)
([x1, y1, x2, y2, ry]).
Returns:
torch.Tensor: IoU result with shape (M, N).
"""
from .box_iou_rotated import box_iou_rotated
return ans_iou
warnings.warn(
'`iou3d.boxes_iou_bev` is deprecated and will be removed in'
' the future. Please, use `box_iou_rotated.box_iou_rotated`.',
DeprecationWarning)
return box_iou_rotated(_xyxyr2xywhr(boxes_a), _xyxyr2xywhr(boxes_b))
def nms_bev(boxes, scores, thresh, pre_max_size=None, post_max_size=None):
"""NMS function GPU implementation (for BEV boxes). The overlap of two
def nms_bev(boxes: Tensor,
scores: Tensor,
thresh: float,
pre_max_size: Optional[int] = None,
post_max_size: Optional[int] = None) -> Tensor:
"""NMS function GPU implementation (for BEV boxes).
The overlap of two
boxes for IoU calculation is defined as the exact overlapping area of the
two boxes. In this function, one can also set ``pre_max_size`` and
``post_max_size``.
Args:
boxes (torch.Tensor): Input boxes with the shape of [N, 5]
boxes (torch.Tensor): Input boxes with the shape of (N, 5)
([x1, y1, x2, y2, ry]).
scores (torch.Tensor): Scores of boxes with the shape of [N].
scores (torch.Tensor): Scores of boxes with the shape of (N,).
thresh (float): Overlap threshold of NMS.
pre_max_size (int, optional): Max size of boxes before NMS.
Default: None.
post_max_size (int, optional): Max size of boxes after NMS.
Default: None.
Returns:
torch.Tensor: Indexes after NMS.
"""
assert boxes.size(1) == 5, 'Input boxes shape should be [N, 5]'
from .nms import nms_rotated
warnings.warn(
'`iou3d.nms_bev` is deprecated and will be removed in'
' the future. Please, use `nms.nms_rotated`.', DeprecationWarning)
assert boxes.size(1) == 5, 'Input boxes shape should be (N, 5)'
order = scores.sort(0, descending=True)[1]
if pre_max_size is not None:
order = order[:pre_max_size]
boxes = boxes[order].contiguous()
boxes = _xyxyr2xywhr(boxes)[order]
scores = scores[order]
keep = nms_rotated(boxes, scores, thresh)[1]
keep = order[keep]
keep = torch.zeros(boxes.size(0), dtype=torch.long)
num_out = torch.zeros(size=(), dtype=torch.long)
ext_module.iou3d_nms_forward(
boxes, keep, num_out, nms_overlap_thresh=thresh)
keep = order[keep[:num_out].cuda(boxes.device)].contiguous()
if post_max_size is not None:
keep = keep[:post_max_size]
return keep
def nms_normal_bev(boxes, scores, thresh):
"""Normal NMS function GPU implementation (for BEV boxes). The overlap of
def nms_normal_bev(boxes: Tensor, scores: Tensor, thresh: float) -> Tensor:
"""Normal NMS function GPU implementation (for BEV boxes).
The overlap of
two boxes for IoU calculation is defined as the exact overlapping area of
the two boxes WITH their yaw angle set to 0.
Args:
boxes (torch.Tensor): Input boxes with shape (N, 5).
scores (torch.Tensor): Scores of predicted boxes with shape (N).
boxes (torch.Tensor): Input boxes with shape (N, 5)
([x1, y1, x2, y2, ry]).
scores (torch.Tensor): Scores of predicted boxes with shape (N,).
thresh (float): Overlap threshold of NMS.
Returns:
torch.Tensor: Remaining indices with scores in descending order.
"""
assert boxes.shape[1] == 5, 'Input boxes shape should be [N, 5]'
order = scores.sort(0, descending=True)[1]
from .nms import nms
boxes = boxes[order].contiguous()
warnings.warn(
'`iou3d.nms_normal_bev` is deprecated and will be removed in'
' the future. Please, use `nms.nms`.', DeprecationWarning)
assert boxes.shape[1] == 5, 'Input boxes shape should be (N, 5)'
keep = torch.zeros(boxes.size(0), dtype=torch.long)
num_out = torch.zeros(size=(), dtype=torch.long)
ext_module.iou3d_nms_normal_forward(
boxes, keep, num_out, nms_overlap_thresh=thresh)
return order[keep[:num_out].cuda(boxes.device)].contiguous()
return nms(boxes[:, :-1], scores, thresh)[1]
from typing import Optional
import torch
from torch.autograd import Function
......@@ -8,6 +10,7 @@ ext_module = ext_loader.load_ext('_ext', ['knn_forward'])
class KNN(Function):
r"""KNN (CUDA) based on heap data structure.
Modified from `PAConv <https://github.com/CVMI-Lab/PAConv/tree/main/
scene_seg/lib/pointops/src/knnquery_heap>`_.
......@@ -18,15 +21,15 @@ class KNN(Function):
def forward(ctx,
k: int,
xyz: torch.Tensor,
center_xyz: torch.Tensor = None,
center_xyz: Optional[torch.Tensor] = None,
transposed: bool = False) -> torch.Tensor:
"""
Args:
k (int): number of nearest neighbors.
xyz (Tensor): (B, N, 3) if transposed == False, else (B, 3, N).
xyz coordinates of the features.
center_xyz (Tensor, optional): (B, npoint, 3) if transposed ==
False, else (B, 3, npoint). centers of the knn query.
xyz (torch.Tensor): (B, N, 3) if transposed == False, else
(B, 3, N). xyz coordinates of the features.
center_xyz (torch.Tensor, optional): (B, npoint, 3) if transposed
is False, else (B, 3, npoint). centers of the knn query.
Default: None.
transposed (bool, optional): whether the input tensors are
transposed. Should not explicitly use this keyword when
......@@ -34,8 +37,8 @@ class KNN(Function):
Default: False.
Returns:
Tensor: (B, k, npoint) tensor with the indices of
the features that form k-nearest neighbours.
torch.Tensor: (B, k, npoint) tensor with the indices of the
features that form k-nearest neighbours.
"""
assert (k > 0) & (k < 100), 'k should be in range(0, 100)'
......
# Copyright (c) OpenMMLab. All rights reserved.
import math
from typing import Optional, Tuple, Union
import torch
import torch.nn as nn
......@@ -27,7 +28,13 @@ class MaskedConv2dFunction(Function):
stride_i=stride)
@staticmethod
def forward(ctx, features, mask, weight, bias, padding=0, stride=1):
def forward(ctx,
features: torch.Tensor,
mask: torch.Tensor,
weight: torch.nn.Parameter,
bias: torch.nn.Parameter,
padding: int = 0,
stride: int = 1) -> torch.Tensor:
assert mask.dim() == 3 and mask.size(0) == 1
assert features.dim() == 4 and features.size(0) == 1
assert features.size()[2:] == mask.size()[1:]
......@@ -61,7 +68,6 @@ class MaskedConv2dFunction(Function):
kernel_w=kernel_w,
pad_h=pad_h,
pad_w=pad_w)
masked_output = torch.addmm(1, bias[:, None], 1,
weight.view(out_channel, -1), data_col)
ext_module.masked_col2im_forward(
......@@ -76,7 +82,7 @@ class MaskedConv2dFunction(Function):
@staticmethod
@once_differentiable
def backward(ctx, grad_output):
def backward(ctx, grad_output: torch.Tensor) -> tuple:
return (None, ) * 5
......@@ -91,21 +97,22 @@ class MaskedConv2d(nn.Conv2d):
"""
def __init__(self,
in_channels,
out_channels,
kernel_size,
stride=1,
padding=0,
dilation=1,
groups=1,
bias=True):
super(MaskedConv2d,
self).__init__(in_channels, out_channels, kernel_size, stride,
padding, dilation, groups, bias)
def forward(self, input, mask=None):
in_channels: int,
out_channels: int,
kernel_size: Union[int, Tuple[int, ...]],
stride: int = 1,
padding: int = 0,
dilation: int = 1,
groups: int = 1,
bias: bool = True):
super().__init__(in_channels, out_channels, kernel_size, stride,
padding, dilation, groups, bias)
def forward(self,
input: torch.Tensor,
mask: Optional[torch.Tensor] = None) -> torch.Tensor:
if mask is None: # fallback to the normal Conv2d
return super(MaskedConv2d, self).forward(input)
return super().forward(input)
else:
return masked_conv2d(input, mask, self.weight, self.bias,
self.padding)
# Copyright (c) OpenMMLab. All rights reserved.
import math
from abc import abstractmethod
from typing import Optional
import torch
import torch.nn as nn
......@@ -18,7 +20,7 @@ class BaseMergeCell(nn.Module):
another convolution layer.
Args:
in_channels (int): number of input channels in out_conv layer.
fused_channels (int): number of input channels in out_conv layer.
out_channels (int): number of output channels in out_conv layer.
with_out_conv (bool): Whether to use out_conv layer
out_conv_cfg (dict): Config dict for convolution layer, which should
......@@ -41,19 +43,19 @@ class BaseMergeCell(nn.Module):
"""
def __init__(self,
fused_channels=256,
out_channels=256,
with_out_conv=True,
out_conv_cfg=dict(
fused_channels: Optional[int] = 256,
out_channels: Optional[int] = 256,
with_out_conv: bool = True,
out_conv_cfg: dict = dict(
groups=1, kernel_size=3, padding=1, bias=True),
out_norm_cfg=None,
out_conv_order=('act', 'conv', 'norm'),
with_input1_conv=False,
with_input2_conv=False,
input_conv_cfg=None,
input_norm_cfg=None,
upsample_mode='nearest'):
super(BaseMergeCell, self).__init__()
out_norm_cfg: Optional[dict] = None,
out_conv_order: tuple = ('act', 'conv', 'norm'),
with_input1_conv: bool = False,
with_input2_conv: bool = False,
input_conv_cfg: Optional[dict] = None,
input_norm_cfg: Optional[dict] = None,
upsample_mode: str = 'nearest'):
super().__init__()
assert upsample_mode in ['nearest', 'bilinear']
self.with_out_conv = with_out_conv
self.with_input1_conv = with_input1_conv
......@@ -62,8 +64,8 @@ class BaseMergeCell(nn.Module):
if self.with_out_conv:
self.out_conv = ConvModule(
fused_channels,
out_channels,
fused_channels, # type: ignore
out_channels, # type: ignore
**out_conv_cfg,
norm_cfg=out_norm_cfg,
order=out_conv_order)
......@@ -95,12 +97,25 @@ class BaseMergeCell(nn.Module):
elif x.shape[-2:] < size:
return F.interpolate(x, size=size, mode=self.upsample_mode)
else:
assert x.shape[-2] % size[-2] == 0 and x.shape[-1] % size[-1] == 0
kernel_size = x.shape[-1] // size[-1]
if x.shape[-2] % size[-2] != 0 or x.shape[-1] % size[-1] != 0:
h, w = x.shape[-2:]
target_h, target_w = size
pad_h = math.ceil(h / target_h) * target_h - h
pad_w = math.ceil(w / target_w) * target_w - w
pad_l = pad_w // 2
pad_r = pad_w - pad_l
pad_t = pad_h // 2
pad_b = pad_h - pad_t
pad = (pad_l, pad_r, pad_t, pad_b)
x = F.pad(x, pad, mode='constant', value=0.0)
kernel_size = (x.shape[-2] // size[-2], x.shape[-1] // size[-1])
x = F.max_pool2d(x, kernel_size=kernel_size, stride=kernel_size)
return x
def forward(self, x1, x2, out_size=None):
def forward(self,
x1: torch.Tensor,
x2: torch.Tensor,
out_size: Optional[tuple] = None) -> torch.Tensor:
assert x1.shape[:2] == x2.shape[:2]
assert out_size is None or len(out_size) == 2
if out_size is None: # resize to larger one
......@@ -120,8 +135,8 @@ class BaseMergeCell(nn.Module):
class SumCell(BaseMergeCell):
def __init__(self, in_channels, out_channels, **kwargs):
super(SumCell, self).__init__(in_channels, out_channels, **kwargs)
def __init__(self, in_channels: int, out_channels: int, **kwargs):
super().__init__(in_channels, out_channels, **kwargs)
def _binary_op(self, x1, x2):
return x1 + x2
......@@ -129,9 +144,8 @@ class SumCell(BaseMergeCell):
class ConcatCell(BaseMergeCell):
def __init__(self, in_channels, out_channels, **kwargs):
super(ConcatCell, self).__init__(in_channels * 2, out_channels,
**kwargs)
def __init__(self, in_channels: int, out_channels: int, **kwargs):
super().__init__(in_channels * 2, out_channels, **kwargs)
def _binary_op(self, x1, x2):
ret = torch.cat([x1, x2], dim=1)
......@@ -140,7 +154,10 @@ class ConcatCell(BaseMergeCell):
class GlobalPoolingCell(BaseMergeCell):
def __init__(self, in_channels=None, out_channels=None, **kwargs):
def __init__(self,
in_channels: Optional[int] = None,
out_channels: Optional[int] = None,
**kwargs):
super().__init__(in_channels, out_channels, **kwargs)
self.global_pool = nn.AdaptiveAvgPool2d((1, 1))
......
# Copyright (c) OpenMMLab. All rights reserved.
import torch
from ..utils import ext_loader
ext_module = ext_loader.load_ext('_ext', ['min_area_polygons'])
def min_area_polygons(pointsets: torch.Tensor) -> torch.Tensor:
"""Find the smallest polygons that surrounds all points in the point sets.
Args:
pointsets (Tensor): point sets with shape (N, 18).
Returns:
torch.Tensor: Return the smallest polygons with shape (N, 8).
"""
polygons = pointsets.new_zeros((pointsets.size(0), 8))
ext_module.min_area_polygons(pointsets, polygons)
return polygons
# Copyright (c) OpenMMLab. All rights reserved.
import math
from typing import Optional, Tuple, Union
import torch
import torch.nn as nn
......@@ -35,16 +36,16 @@ class ModulatedDeformConv2dFunction(Function):
@staticmethod
def forward(ctx,
input,
offset,
mask,
weight,
bias=None,
stride=1,
padding=0,
dilation=1,
groups=1,
deform_groups=1):
input: torch.Tensor,
offset: torch.Tensor,
mask: torch.Tensor,
weight: nn.Parameter,
bias: Optional[nn.Parameter] = None,
stride: int = 1,
padding: int = 0,
dilation: int = 1,
groups: int = 1,
deform_groups: int = 1) -> torch.Tensor:
if input is not None and input.dim() != 4:
raise ValueError(
f'Expected 4D tensor as input, got {input.dim()}D tensor \
......@@ -66,6 +67,7 @@ class ModulatedDeformConv2dFunction(Function):
# whatever the pytorch version is.
input = input.type_as(offset)
weight = weight.type_as(input)
bias = bias.type_as(input) # type: ignore
ctx.save_for_backward(input, offset, mask, weight, bias)
output = input.new_empty(
ModulatedDeformConv2dFunction._output_size(ctx, input, weight))
......@@ -94,7 +96,7 @@ class ModulatedDeformConv2dFunction(Function):
@staticmethod
@once_differentiable
def backward(ctx, grad_output):
def backward(ctx, grad_output: torch.Tensor) -> tuple:
input, offset, mask, weight, bias = ctx.saved_tensors
grad_input = torch.zeros_like(input)
grad_offset = torch.zeros_like(offset)
......@@ -158,16 +160,16 @@ class ModulatedDeformConv2d(nn.Module):
@deprecated_api_warning({'deformable_groups': 'deform_groups'},
cls_name='ModulatedDeformConv2d')
def __init__(self,
in_channels,
out_channels,
kernel_size,
stride=1,
padding=0,
dilation=1,
groups=1,
deform_groups=1,
bias=True):
super(ModulatedDeformConv2d, self).__init__()
in_channels: int,
out_channels: int,
kernel_size: Union[int, Tuple[int]],
stride: int = 1,
padding: int = 0,
dilation: int = 1,
groups: int = 1,
deform_groups: int = 1,
bias: Union[bool, str] = True):
super().__init__()
self.in_channels = in_channels
self.out_channels = out_channels
self.kernel_size = _pair(kernel_size)
......@@ -198,7 +200,8 @@ class ModulatedDeformConv2d(nn.Module):
if self.bias is not None:
self.bias.data.zero_()
def forward(self, x, offset, mask):
def forward(self, x: torch.Tensor, offset: torch.Tensor,
mask: torch.Tensor) -> torch.Tensor:
return modulated_deform_conv2d(x, offset, mask, self.weight, self.bias,
self.stride, self.padding,
self.dilation, self.groups,
......@@ -226,7 +229,7 @@ class ModulatedDeformConv2dPack(ModulatedDeformConv2d):
_version = 2
def __init__(self, *args, **kwargs):
super(ModulatedDeformConv2dPack, self).__init__(*args, **kwargs)
super().__init__(*args, **kwargs)
self.conv_offset = nn.Conv2d(
self.in_channels,
self.deform_groups * 3 * self.kernel_size[0] * self.kernel_size[1],
......@@ -237,13 +240,13 @@ class ModulatedDeformConv2dPack(ModulatedDeformConv2d):
bias=True)
self.init_weights()
def init_weights(self):
super(ModulatedDeformConv2dPack, self).init_weights()
def init_weights(self) -> None:
super().init_weights()
if hasattr(self, 'conv_offset'):
self.conv_offset.weight.data.zero_()
self.conv_offset.bias.data.zero_()
def forward(self, x):
def forward(self, x: torch.Tensor) -> torch.Tensor: # type: ignore
out = self.conv_offset(x)
o1, o2, mask = torch.chunk(out, 3, dim=1)
offset = torch.cat((o1, o2), dim=1)
......
# Copyright (c) OpenMMLab. All rights reserved.
import math
import warnings
from typing import Optional, no_type_check
import torch
import torch.nn as nn
import torch.nn.functional as F
from torch.autograd.function import Function, once_differentiable
import mmcv
from mmcv import deprecated_api_warning
from mmcv.cnn import constant_init, xavier_init
from mmcv.cnn.bricks.registry import ATTENTION
......@@ -20,27 +22,30 @@ ext_module = ext_loader.load_ext(
class MultiScaleDeformableAttnFunction(Function):
@staticmethod
def forward(ctx, value, value_spatial_shapes, value_level_start_index,
sampling_locations, attention_weights, im2col_step):
def forward(ctx, value: torch.Tensor, value_spatial_shapes: torch.Tensor,
value_level_start_index: torch.Tensor,
sampling_locations: torch.Tensor,
attention_weights: torch.Tensor,
im2col_step: torch.Tensor) -> torch.Tensor:
"""GPU version of multi-scale deformable attention.
Args:
value (Tensor): The value has shape
value (torch.Tensor): The value has shape
(bs, num_keys, mum_heads, embed_dims//num_heads)
value_spatial_shapes (Tensor): Spatial shape of
value_spatial_shapes (torch.Tensor): Spatial shape of
each feature map, has shape (num_levels, 2),
last dimension 2 represent (h, w)
sampling_locations (Tensor): The location of sampling points,
sampling_locations (torch.Tensor): The location of sampling points,
has shape
(bs ,num_queries, num_heads, num_levels, num_points, 2),
the last dimension 2 represent (x, y).
attention_weights (Tensor): The weight of sampling points used
when calculate the attention, has shape
attention_weights (torch.Tensor): The weight of sampling points
used when calculate the attention, has shape
(bs ,num_queries, num_heads, num_levels, num_points),
im2col_step (Tensor): The step used in image to column.
im2col_step (torch.Tensor): The step used in image to column.
Returns:
Tensor: has shape (bs, num_queries, embed_dims)
torch.Tensor: has shape (bs, num_queries, embed_dims)
"""
ctx.im2col_step = im2col_step
......@@ -58,16 +63,14 @@ class MultiScaleDeformableAttnFunction(Function):
@staticmethod
@once_differentiable
def backward(ctx, grad_output):
def backward(ctx, grad_output: torch.Tensor) -> tuple:
"""GPU version of backward function.
Args:
grad_output (Tensor): Gradient
of output tensor of forward.
grad_output (torch.Tensor): Gradient of output tensor of forward.
Returns:
Tuple[Tensor]: Gradient
of input tensors in forward.
tuple[Tensor]: Gradient of input tensors in forward.
"""
value, value_spatial_shapes, value_level_start_index,\
sampling_locations, attention_weights = ctx.saved_tensors
......@@ -91,26 +94,28 @@ class MultiScaleDeformableAttnFunction(Function):
grad_sampling_loc, grad_attn_weight, None
def multi_scale_deformable_attn_pytorch(value, value_spatial_shapes,
sampling_locations, attention_weights):
def multi_scale_deformable_attn_pytorch(
value: torch.Tensor, value_spatial_shapes: torch.Tensor,
sampling_locations: torch.Tensor,
attention_weights: torch.Tensor) -> torch.Tensor:
"""CPU version of multi-scale deformable attention.
Args:
value (Tensor): The value has shape
(bs, num_keys, mum_heads, embed_dims//num_heads)
value_spatial_shapes (Tensor): Spatial shape of
value (torch.Tensor): The value has shape
(bs, num_keys, num_heads, embed_dims//num_heads)
value_spatial_shapes (torch.Tensor): Spatial shape of
each feature map, has shape (num_levels, 2),
last dimension 2 represent (h, w)
sampling_locations (Tensor): The location of sampling points,
sampling_locations (torch.Tensor): The location of sampling points,
has shape
(bs ,num_queries, num_heads, num_levels, num_points, 2),
the last dimension 2 represent (x, y).
attention_weights (Tensor): The weight of sampling points used
attention_weights (torch.Tensor): The weight of sampling points used
when calculate the attention, has shape
(bs ,num_queries, num_heads, num_levels, num_points),
Returns:
Tensor: has shape (bs, num_queries, embed_dims)
torch.Tensor: has shape (bs, num_queries, embed_dims)
"""
bs, _, num_heads, embed_dims = value.shape
......@@ -180,15 +185,15 @@ class MultiScaleDeformableAttention(BaseModule):
"""
def __init__(self,
embed_dims=256,
num_heads=8,
num_levels=4,
num_points=4,
im2col_step=64,
dropout=0.1,
batch_first=False,
norm_cfg=None,
init_cfg=None):
embed_dims: int = 256,
num_heads: int = 8,
num_levels: int = 4,
num_points: int = 4,
im2col_step: int = 64,
dropout: float = 0.1,
batch_first: bool = False,
norm_cfg: Optional[dict] = None,
init_cfg: Optional[mmcv.ConfigDict] = None):
super().__init__(init_cfg)
if embed_dims % num_heads != 0:
raise ValueError(f'embed_dims must be divisible by num_heads, '
......@@ -227,7 +232,7 @@ class MultiScaleDeformableAttention(BaseModule):
self.output_proj = nn.Linear(embed_dims, embed_dims)
self.init_weights()
def init_weights(self):
def init_weights(self) -> None:
"""Default initialization for Parameters of Module."""
constant_init(self.sampling_offsets, 0.)
thetas = torch.arange(
......@@ -247,53 +252,53 @@ class MultiScaleDeformableAttention(BaseModule):
xavier_init(self.output_proj, distribution='uniform', bias=0.)
self._is_init = True
@no_type_check
@deprecated_api_warning({'residual': 'identity'},
cls_name='MultiScaleDeformableAttention')
def forward(self,
query,
key=None,
value=None,
identity=None,
query_pos=None,
key_padding_mask=None,
reference_points=None,
spatial_shapes=None,
level_start_index=None,
**kwargs):
query: torch.Tensor,
key: Optional[torch.Tensor] = None,
value: Optional[torch.Tensor] = None,
identity: Optional[torch.Tensor] = None,
query_pos: Optional[torch.Tensor] = None,
key_padding_mask: Optional[torch.Tensor] = None,
reference_points: Optional[torch.Tensor] = None,
spatial_shapes: Optional[torch.Tensor] = None,
level_start_index: Optional[torch.Tensor] = None,
**kwargs) -> torch.Tensor:
"""Forward Function of MultiScaleDeformAttention.
Args:
query (Tensor): Query of Transformer with shape
query (torch.Tensor): Query of Transformer with shape
(num_query, bs, embed_dims).
key (Tensor): The key tensor with shape
key (torch.Tensor): The key tensor with shape
`(num_key, bs, embed_dims)`.
value (Tensor): The value tensor with shape
value (torch.Tensor): The value tensor with shape
`(num_key, bs, embed_dims)`.
identity (Tensor): The tensor used for addition, with the
identity (torch.Tensor): The tensor used for addition, with the
same shape as `query`. Default None. If None,
`query` will be used.
query_pos (Tensor): The positional encoding for `query`.
query_pos (torch.Tensor): The positional encoding for `query`.
Default: None.
key_pos (Tensor): The positional encoding for `key`. Default
None.
reference_points (Tensor): The normalized reference
key_padding_mask (torch.Tensor): ByteTensor for `query`, with
shape [bs, num_key].
reference_points (torch.Tensor): The normalized reference
points with shape (bs, num_query, num_levels, 2),
all elements is range in [0, 1], top-left (0,0),
bottom-right (1, 1), including padding area.
or (N, Length_{query}, num_levels, 4), add
additional two dimensions is (w, h) to
form reference boxes.
key_padding_mask (Tensor): ByteTensor for `query`, with
shape [bs, num_key].
spatial_shapes (Tensor): Spatial shape of features in
spatial_shapes (torch.Tensor): Spatial shape of features in
different levels. With shape (num_levels, 2),
last dimension represents (h, w).
level_start_index (Tensor): The start index of each level.
level_start_index (torch.Tensor): The start index of each level.
A tensor has shape ``(num_levels, )`` and can be represented
as [0, h_0*w_0, h_0*w_0+h_1*w_1, ...].
Returns:
Tensor: forwarded results with shape [num_query, bs, embed_dims].
torch.Tensor: forwarded results with shape
[num_query, bs, embed_dims].
"""
if value is None:
......
import os
from typing import Any, Dict, List, Optional, Tuple, Union
import numpy as np
import torch
from torch import Tensor
from mmcv.utils import deprecated_api_warning
from ..utils import ext_loader
......@@ -14,8 +16,8 @@ ext_module = ext_loader.load_ext(
class NMSop(torch.autograd.Function):
@staticmethod
def forward(ctx, bboxes, scores, iou_threshold, offset, score_threshold,
max_num):
def forward(ctx: Any, bboxes: Tensor, scores: Tensor, iou_threshold: float,
offset: int, score_threshold: float, max_num: int) -> Tensor:
is_filtering_by_score = score_threshold > 0
if is_filtering_by_score:
valid_mask = scores > score_threshold
......@@ -48,6 +50,7 @@ class NMSop(torch.autograd.Function):
offset_i=int(offset))
else:
from torch.onnx.symbolic_opset9 import select, squeeze, unsqueeze
from ..onnx.onnx_utils.symbolic_helper import _size_helper
boxes = unsqueeze(g, bboxes, 0)
......@@ -82,8 +85,9 @@ class NMSop(torch.autograd.Function):
class SoftNMSop(torch.autograd.Function):
@staticmethod
def forward(ctx, boxes, scores, iou_threshold, sigma, min_score, method,
offset):
def forward(ctx: Any, boxes: Tensor, scores: Tensor, iou_threshold: float,
sigma: float, min_score: float, method: int,
offset: int) -> Tuple[Tensor, Tensor]:
dets = boxes.new_empty((boxes.size(0), 5), device='cpu')
inds = ext_module.softnms(
boxes.cpu(),
......@@ -114,8 +118,16 @@ class SoftNMSop(torch.autograd.Function):
return nms_out
array_like_type = Union[Tensor, np.ndarray]
@deprecated_api_warning({'iou_thr': 'iou_threshold'})
def nms(boxes, scores, iou_threshold, offset=0, score_threshold=0, max_num=-1):
def nms(boxes: array_like_type,
scores: array_like_type,
iou_threshold: float,
offset: int = 0,
score_threshold: float = 0,
max_num: int = -1) -> Tuple[array_like_type, array_like_type]:
"""Dispatch to either CPU or GPU NMS implementations.
The input can be either torch tensor or numpy array. GPU NMS will be used
......@@ -131,8 +143,8 @@ def nms(boxes, scores, iou_threshold, offset=0, score_threshold=0, max_num=-1):
max_num (int): maximum number of boxes after NMS.
Returns:
tuple: kept dets(boxes and scores) and indice, which is always the \
same data type as the input.
tuple: kept dets (boxes and scores) and indice, which always have
the same data type as the input.
Example:
>>> boxes = np.array([[49.1, 32.4, 51.0, 35.9],
......@@ -148,8 +160,8 @@ def nms(boxes, scores, iou_threshold, offset=0, score_threshold=0, max_num=-1):
>>> dets, inds = nms(boxes, scores, iou_threshold)
>>> assert len(inds) == len(dets) == 3
"""
assert isinstance(boxes, (torch.Tensor, np.ndarray))
assert isinstance(scores, (torch.Tensor, np.ndarray))
assert isinstance(boxes, (Tensor, np.ndarray))
assert isinstance(scores, (Tensor, np.ndarray))
is_numpy = False
if isinstance(boxes, np.ndarray):
is_numpy = True
......@@ -160,16 +172,8 @@ def nms(boxes, scores, iou_threshold, offset=0, score_threshold=0, max_num=-1):
assert boxes.size(0) == scores.size(0)
assert offset in (0, 1)
if torch.__version__ == 'parrots':
indata_list = [boxes, scores]
indata_dict = {
'iou_threshold': float(iou_threshold),
'offset': int(offset)
}
inds = ext_module.nms(*indata_list, **indata_dict)
else:
inds = NMSop.apply(boxes, scores, iou_threshold, offset,
score_threshold, max_num)
inds = NMSop.apply(boxes, scores, iou_threshold, offset, score_threshold,
max_num)
dets = torch.cat((boxes[inds], scores[inds].reshape(-1, 1)), dim=1)
if is_numpy:
dets = dets.cpu().numpy()
......@@ -178,19 +182,19 @@ def nms(boxes, scores, iou_threshold, offset=0, score_threshold=0, max_num=-1):
@deprecated_api_warning({'iou_thr': 'iou_threshold'})
def soft_nms(boxes,
scores,
iou_threshold=0.3,
sigma=0.5,
min_score=1e-3,
method='linear',
offset=0):
def soft_nms(boxes: array_like_type,
scores: array_like_type,
iou_threshold: float = 0.3,
sigma: float = 0.5,
min_score: float = 1e-3,
method: str = 'linear',
offset: int = 0) -> Tuple[array_like_type, array_like_type]:
"""Dispatch to only CPU Soft NMS implementations.
The input can be either a torch tensor or numpy array.
The returned type will always be the same as inputs.
Arguments:
Args:
boxes (torch.Tensor or np.ndarray): boxes in shape (N, 4).
scores (torch.Tensor or np.ndarray): scores in shape (N, ).
iou_threshold (float): IoU threshold for NMS.
......@@ -200,8 +204,8 @@ def soft_nms(boxes,
offset (int, 0 or 1): boxes' width or height is (x2 - x1 + offset).
Returns:
tuple: kept dets(boxes and scores) and indice, which is always the \
same data type as the input.
tuple: kept dets (boxes and scores) and indice, which always have
the same data type as the input.
Example:
>>> boxes = np.array([[4., 3., 5., 3.],
......@@ -216,8 +220,8 @@ def soft_nms(boxes,
>>> assert len(inds) == len(dets) == 5
"""
assert isinstance(boxes, (torch.Tensor, np.ndarray))
assert isinstance(scores, (torch.Tensor, np.ndarray))
assert isinstance(boxes, (Tensor, np.ndarray))
assert isinstance(scores, (Tensor, np.ndarray))
is_numpy = False
if isinstance(boxes, np.ndarray):
is_numpy = True
......@@ -257,46 +261,85 @@ def soft_nms(boxes,
return dets.to(device=boxes.device), inds.to(device=boxes.device)
def batched_nms(boxes, scores, idxs, nms_cfg, class_agnostic=False):
"""Performs non-maximum suppression in a batched fashion.
def batched_nms(boxes: Tensor,
scores: Tensor,
idxs: Tensor,
nms_cfg: Optional[Dict],
class_agnostic: bool = False) -> Tuple[Tensor, Tensor]:
r"""Performs non-maximum suppression in a batched fashion.
Modified from https://github.com/pytorch/vision/blob
/505cd6957711af790211896d32b40291bea1bc21/torchvision/ops/boxes.py#L39.
Modified from `torchvision/ops/boxes.py#L39
<https://github.com/pytorch/vision/blob/
505cd6957711af790211896d32b40291bea1bc21/torchvision/ops/boxes.py#L39>`_.
In order to perform NMS independently per class, we add an offset to all
the boxes. The offset is dependent only on the class idx, and is large
enough so that boxes from different classes do not overlap.
Arguments:
boxes (torch.Tensor): boxes in shape (N, 4).
Note:
In v1.4.1 and later, ``batched_nms`` supports skipping the NMS and
returns sorted raw results when `nms_cfg` is None.
Args:
boxes (torch.Tensor): boxes in shape (N, 4) or (N, 5).
scores (torch.Tensor): scores in shape (N, ).
idxs (torch.Tensor): each index value correspond to a bbox cluster,
and NMS will not be applied between elements of different idxs,
shape (N, ).
nms_cfg (dict): specify nms type and other parameters like iou_thr.
Possible keys includes the following.
nms_cfg (dict | optional): Supports skipping the nms when `nms_cfg`
is None, otherwise it should specify nms type and other
parameters like `iou_thr`. Possible keys includes the following.
- iou_thr (float): IoU threshold used for NMS.
- iou_threshold (float): IoU threshold used for NMS.
- split_thr (float): threshold number of boxes. In some cases the
number of boxes is large (e.g., 200k). To avoid OOM during
training, the users could set `split_thr` to a small value.
If the number of boxes is greater than the threshold, it will
perform NMS on each group of boxes separately and sequentially.
Defaults to 10000.
number of boxes is large (e.g., 200k). To avoid OOM during
training, the users could set `split_thr` to a small value.
If the number of boxes is greater than the threshold, it will
perform NMS on each group of boxes separately and sequentially.
Defaults to 10000.
class_agnostic (bool): if true, nms is class agnostic,
i.e. IoU thresholding happens over all boxes,
regardless of the predicted class.
regardless of the predicted class. Defaults to False.
Returns:
tuple: kept dets and indice.
- boxes (Tensor): Bboxes with score after nms, has shape
(num_bboxes, 5). last dimension 5 arrange as
(x1, y1, x2, y2, score)
- keep (Tensor): The indices of remaining boxes in input
boxes.
"""
# skip nms when nms_cfg is None
if nms_cfg is None:
scores, inds = scores.sort(descending=True)
boxes = boxes[inds]
return torch.cat([boxes, scores[:, None]], -1), inds
nms_cfg_ = nms_cfg.copy()
class_agnostic = nms_cfg_.pop('class_agnostic', class_agnostic)
if class_agnostic:
boxes_for_nms = boxes
else:
max_coordinate = boxes.max()
offsets = idxs.to(boxes) * (max_coordinate + torch.tensor(1).to(boxes))
boxes_for_nms = boxes + offsets[:, None]
# When using rotated boxes, only apply offsets on center.
if boxes.size(-1) == 5:
# Strictly, the maximum coordinates of the rotating box
# (x,y,w,h,a) should be calculated by polygon coordinates.
# But the conversion from rotated box to polygon will
# slow down the speed.
# So we use max(x,y) + max(w,h) as max coordinate
# which is larger than polygon max coordinate
# max(x1, y1, x2, y2,x3, y3, x4, y4)
max_coordinate = boxes[..., :2].max() + boxes[..., 2:4].max()
offsets = idxs.to(boxes) * (
max_coordinate + torch.tensor(1).to(boxes))
boxes_ctr_for_nms = boxes[..., :2] + offsets[:, None]
boxes_for_nms = torch.cat([boxes_ctr_for_nms, boxes[..., 2:5]],
dim=-1)
else:
max_coordinate = boxes.max()
offsets = idxs.to(boxes) * (
max_coordinate + torch.tensor(1).to(boxes))
boxes_for_nms = boxes + offsets[:, None]
nms_type = nms_cfg_.pop('type', 'nms')
nms_op = eval(nms_type)
......@@ -306,12 +349,13 @@ def batched_nms(boxes, scores, idxs, nms_cfg, class_agnostic=False):
if boxes_for_nms.shape[0] < split_thr or torch.onnx.is_in_onnx_export():
dets, keep = nms_op(boxes_for_nms, scores, **nms_cfg_)
boxes = boxes[keep]
# -1 indexing works abnormal in TensorRT
# This assumes `dets` has 5 dimensions where
# This assumes `dets` has arbitrary dimensions where
# the last dimension is score.
# TODO: more elegant way to handle the dimension issue.
# Some type of nms would reweight the score, such as SoftNMS
scores = dets[:, 4]
# Currently it supports bounding boxes [x1, y1, x2, y2, score] or
# rotated boxes [cx, cy, w, h, angle_radian, score].
scores = dets[:, -1]
else:
max_num = nms_cfg_.pop('max_num', -1)
total_mask = scores.new_zeros(scores.size(), dtype=torch.bool)
......@@ -333,31 +377,33 @@ def batched_nms(boxes, scores, idxs, nms_cfg, class_agnostic=False):
boxes = boxes[:max_num]
scores = scores[:max_num]
return torch.cat([boxes, scores[:, None]], -1), keep
boxes = torch.cat([boxes, scores[:, None]], -1)
return boxes, keep
def nms_match(dets, iou_threshold):
def nms_match(dets: array_like_type,
iou_threshold: float) -> List[array_like_type]:
"""Matched dets into different groups by NMS.
NMS match is Similar to NMS but when a bbox is suppressed, nms match will
record the indice of suppressed bbox and form a group with the indice of
kept bbox. In each group, indice is sorted as score order.
Arguments:
Args:
dets (torch.Tensor | np.ndarray): Det boxes with scores, shape (N, 5).
iou_thr (float): IoU thresh for NMS.
iou_threshold (float): IoU thresh for NMS.
Returns:
List[torch.Tensor | np.ndarray]: The outer list corresponds different
matched group, the inner Tensor corresponds the indices for a group
in score order.
list[torch.Tensor | np.ndarray]: The outer list corresponds different
matched group, the inner Tensor corresponds the indices for a group
in score order.
"""
if dets.shape[0] == 0:
matched = []
else:
assert dets.shape[-1] == 5, 'inputs dets.shape should be (N, 5), ' \
f'but get {dets.shape}'
if isinstance(dets, torch.Tensor):
if isinstance(dets, Tensor):
dets_t = dets.detach().cpu()
else:
dets_t = torch.from_numpy(dets)
......@@ -365,15 +411,19 @@ def nms_match(dets, iou_threshold):
indata_dict = {'iou_threshold': float(iou_threshold)}
matched = ext_module.nms_match(*indata_list, **indata_dict)
if torch.__version__ == 'parrots':
matched = matched.tolist()
matched = matched.tolist() # type: ignore
if isinstance(dets, torch.Tensor):
if isinstance(dets, Tensor):
return [dets.new_tensor(m, dtype=torch.long) for m in matched]
else:
return [np.array(m, dtype=np.int) for m in matched]
return [np.array(m, dtype=int) for m in matched]
def nms_rotated(dets, scores, iou_threshold, labels=None):
def nms_rotated(dets: Tensor,
scores: Tensor,
iou_threshold: float,
labels: Optional[Tensor] = None,
clockwise: bool = True) -> Tuple[Tensor, Tensor]:
"""Performs non-maximum suppression (NMS) on the rotated boxes according to
their intersection-over-union (IoU).
......@@ -381,23 +431,33 @@ def nms_rotated(dets, scores, iou_threshold, labels=None):
IoU greater than iou_threshold with another (higher scoring) rotated box.
Args:
boxes (Tensor): Rotated boxes in shape (N, 5). They are expected to \
be in (x_ctr, y_ctr, width, height, angle_radian) format.
scores (Tensor): scores in shape (N, ).
dets (torch.Tensor): Rotated boxes in shape (N, 5).
They are expected to be in
(x_ctr, y_ctr, width, height, angle_radian) format.
scores (torch.Tensor): scores in shape (N, ).
iou_threshold (float): IoU thresh for NMS.
labels (Tensor): boxes' label in shape (N,).
labels (torch.Tensor, optional): boxes' label in shape (N,).
clockwise (bool): flag indicating whether the positive angular
orientation is clockwise. default True.
`New in version 1.4.3.`
Returns:
tuple: kept dets(boxes and scores) and indice, which is always the \
same data type as the input.
tuple: kept dets(boxes and scores) and indice, which is always the
same data type as the input.
"""
if dets.shape[0] == 0:
return dets, None
if not clockwise:
flip_mat = dets.new_ones(dets.shape[-1])
flip_mat[-1] = -1
dets_cw = dets * flip_mat
else:
dets_cw = dets
multi_label = labels is not None
if multi_label:
dets_wl = torch.cat((dets, labels.unsqueeze(1)), 1)
dets_wl = torch.cat((dets_cw, labels.unsqueeze(1)), 1) # type: ignore
else:
dets_wl = dets
dets_wl = dets_cw
_, order = scores.sort(0, descending=True)
dets_sorted = dets_wl.index_select(0, order)
......
# Copyright (c) OpenMMLab. All rights reserved.
from typing import List, Union
import numpy as np
import torch
from torch import Tensor
from ..utils import ext_loader
ext_module = ext_loader.load_ext('_ext', ['pixel_group'])
def pixel_group(score, mask, embedding, kernel_label, kernel_contour,
kernel_region_num, distance_threshold):
def pixel_group(
score: Union[np.ndarray, Tensor],
mask: Union[np.ndarray, Tensor],
embedding: Union[np.ndarray, Tensor],
kernel_label: Union[np.ndarray, Tensor],
kernel_contour: Union[np.ndarray, Tensor],
kernel_region_num: int,
distance_threshold: float,
) -> List[List[float]]:
"""Group pixels into text instances, which is widely used text detection
methods.
Arguments:
score (np.array or Tensor): The foreground score with size hxw.
score (np.array or torch.Tensor): The foreground score with size hxw.
mask (np.array or Tensor): The foreground mask with size hxw.
embedding (np.array or Tensor): The embedding with size hxwxc to
embedding (np.array or torch.Tensor): The embedding with size hxwxc to
distinguish instances.
kernel_label (np.array or Tensor): The instance kernel index with
kernel_label (np.array or torch.Tensor): The instance kernel index with
size hxw.
kernel_contour (np.array or torch.Tensor): The kernel contour with
size hxw.
kernel_contour (np.array or Tensor): The kernel contour with size hxw.
kernel_region_num (int): The instance kernel region number.
distance_threshold (float): The embedding distance threshold between
kernel and pixel in one instance.
Returns:
pixel_assignment (List[List[float]]): The instance coordinate list.
Each element consists of averaged confidence, pixel number, and
coordinates (x_i, y_i for all pixels) in order.
list[list[float]]: The instance coordinates and attributes list. Each
element consists of averaged confidence, pixel number, and coordinates
(x_i, y_i for all pixels) in order.
"""
assert isinstance(score, (torch.Tensor, np.ndarray))
assert isinstance(mask, (torch.Tensor, np.ndarray))
......
# Modified from https://github.com/facebookresearch/detectron2/tree/master/projects/PointRend # noqa
from os import path as osp
from typing import Tuple, Union
import torch
import torch.nn as nn
import torch.nn.functional as F
from torch import Tensor
from torch.nn.modules.utils import _pair
from torch.onnx.operators import shape_as_tensor
def bilinear_grid_sample(im, grid, align_corners=False):
def bilinear_grid_sample(im: Tensor,
grid: Tensor,
align_corners: bool = False) -> Tensor:
"""Given an input and a flow-field grid, computes the output using input
values and pixel locations from grid. Supported only bilinear interpolation
method to sample the input pixels.
......@@ -17,11 +21,12 @@ def bilinear_grid_sample(im, grid, align_corners=False):
Args:
im (torch.Tensor): Input feature map, shape (N, C, H, W)
grid (torch.Tensor): Point coordinates, shape (N, Hg, Wg, 2)
align_corners {bool}: If set to True, the extrema (-1 and 1) are
align_corners (bool): If set to True, the extrema (-1 and 1) are
considered as referring to the center points of the input’s
corner pixels. If set to False, they are instead considered as
referring to the corner points of the input’s corner pixels,
making the sampling more resolution agnostic.
Returns:
torch.Tensor: A tensor with sampled points, shape (N, C, Hg, Wg)
"""
......@@ -84,47 +89,52 @@ def bilinear_grid_sample(im, grid, align_corners=False):
return (Ia * wa + Ib * wb + Ic * wc + Id * wd).reshape(n, c, gh, gw)
def is_in_onnx_export_without_custom_ops():
def is_in_onnx_export_without_custom_ops() -> bool:
from mmcv.ops import get_onnxruntime_op_path
ort_custom_op_path = get_onnxruntime_op_path()
return torch.onnx.is_in_onnx_export(
) and not osp.exists(ort_custom_op_path)
def normalize(grid):
def normalize(grid: Tensor) -> Tensor:
"""Normalize input grid from [-1, 1] to [0, 1]
Args:
grid (Tensor): The grid to be normalize, range [-1, 1].
grid (torch.Tensor): The grid to be normalize, range [-1, 1].
Returns:
Tensor: Normalized grid, range [0, 1].
torch.Tensor: Normalized grid, range [0, 1].
"""
return (grid + 1.0) / 2.0
def denormalize(grid):
def denormalize(grid: Tensor) -> Tensor:
"""Denormalize input grid from range [0, 1] to [-1, 1]
Args:
grid (Tensor): The grid to be denormalize, range [0, 1].
grid (torch.Tensor): The grid to be denormalize, range [0, 1].
Returns:
Tensor: Denormalized grid, range [-1, 1].
torch.Tensor: Denormalized grid, range [-1, 1].
"""
return grid * 2.0 - 1.0
def generate_grid(num_grid, size, device):
def generate_grid(num_grid: int, size: Tuple[int, int],
device: torch.device) -> Tensor:
"""Generate regular square grid of points in [0, 1] x [0, 1] coordinate
space.
Args:
num_grid (int): The number of grids to sample, one for each region.
size (tuple(int, int)): The side size of the regular grid.
size (tuple[int, int]): The side size of the regular grid.
device (torch.device): Desired device of returned tensor.
Returns:
(torch.Tensor): A tensor of shape (num_grid, size[0]*size[1], 2) that
contains coordinates for the regular grids.
torch.Tensor: A tensor of shape (num_grid, size[0]*size[1], 2) that
contains coordinates for the regular grids.
"""
affine_trans = torch.tensor([[[1., 0., 0.], [0., 1., 0.]]], device=device)
......@@ -134,16 +144,17 @@ def generate_grid(num_grid, size, device):
return grid.view(1, -1, 2).expand(num_grid, -1, -1)
def rel_roi_point_to_abs_img_point(rois, rel_roi_points):
def rel_roi_point_to_abs_img_point(rois: Tensor,
rel_roi_points: Tensor) -> Tensor:
"""Convert roi based relative point coordinates to image based absolute
point coordinates.
Args:
rois (Tensor): RoIs or BBoxes, shape (N, 4) or (N, 5)
rel_roi_points (Tensor): Point coordinates inside RoI, relative to
RoI, location, range (0, 1), shape (N, P, 2)
rois (torch.Tensor): RoIs or BBoxes, shape (N, 4) or (N, 5)
rel_roi_points (torch.Tensor): Point coordinates inside RoI, relative
to RoI, location, range (0, 1), shape (N, P, 2)
Returns:
Tensor: Image based absolute point coordinates, shape (N, P, 2)
torch.Tensor: Image based absolute point coordinates, shape (N, P, 2)
"""
with torch.no_grad():
......@@ -165,12 +176,13 @@ def rel_roi_point_to_abs_img_point(rois, rel_roi_points):
return abs_img_points
def get_shape_from_feature_map(x):
def get_shape_from_feature_map(x: Tensor) -> Tensor:
"""Get spatial resolution of input feature map considering exporting to
onnx mode.
Args:
x (torch.Tensor): Input tensor, shape (N, C, H, W)
Returns:
torch.Tensor: Spatial resolution (width, height), shape (1, 1, 2)
"""
......@@ -183,19 +195,22 @@ def get_shape_from_feature_map(x):
return img_shape
def abs_img_point_to_rel_img_point(abs_img_points, img, spatial_scale=1.):
def abs_img_point_to_rel_img_point(abs_img_points: Tensor,
img: Union[tuple, Tensor],
spatial_scale: float = 1.) -> Tensor:
"""Convert image based absolute point coordinates to image based relative
coordinates for sampling.
Args:
abs_img_points (Tensor): Image based absolute point coordinates,
abs_img_points (torch.Tensor): Image based absolute point coordinates,
shape (N, P, 2)
img (tuple/Tensor): (height, width) of image or feature map.
spatial_scale (float): Scale points by this factor. Default: 1.
img (tuple or torch.Tensor): (height, width) of image or feature map.
spatial_scale (float, optional): Scale points by this factor.
Default: 1.
Returns:
Tensor: Image based relative point coordinates for sampling,
shape (N, P, 2)
Tensor: Image based relative point coordinates for sampling, shape
(N, P, 2).
"""
assert (isinstance(img, tuple) and len(img) == 2) or \
......@@ -213,23 +228,24 @@ def abs_img_point_to_rel_img_point(abs_img_points, img, spatial_scale=1.):
return abs_img_points / scale * spatial_scale
def rel_roi_point_to_rel_img_point(rois,
rel_roi_points,
img,
spatial_scale=1.):
def rel_roi_point_to_rel_img_point(rois: Tensor,
rel_roi_points: Tensor,
img: Union[tuple, Tensor],
spatial_scale: float = 1.) -> Tensor:
"""Convert roi based relative point coordinates to image based absolute
point coordinates.
Args:
rois (Tensor): RoIs or BBoxes, shape (N, 4) or (N, 5)
rel_roi_points (Tensor): Point coordinates inside RoI, relative to
RoI, location, range (0, 1), shape (N, P, 2)
img (tuple/Tensor): (height, width) of image or feature map.
spatial_scale (float): Scale points by this factor. Default: 1.
rois (torch.Tensor): RoIs or BBoxes, shape (N, 4) or (N, 5)
rel_roi_points (torch.Tensor): Point coordinates inside RoI, relative
to RoI, location, range (0, 1), shape (N, P, 2)
img (tuple or torch.Tensor): (height, width) of image or feature map.
spatial_scale (float, optional): Scale points by this factor.
Default: 1.
Returns:
Tensor: Image based relative point coordinates for sampling,
shape (N, P, 2)
torch.Tensor: Image based relative point coordinates for sampling,
shape (N, P, 2).
"""
abs_img_point = rel_roi_point_to_abs_img_point(rois, rel_roi_points)
......@@ -239,20 +255,25 @@ def rel_roi_point_to_rel_img_point(rois,
return rel_img_point
def point_sample(input, points, align_corners=False, **kwargs):
def point_sample(input: Tensor,
points: Tensor,
align_corners: bool = False,
**kwargs) -> Tensor:
"""A wrapper around :func:`grid_sample` to support 3D point_coords tensors
Unlike :func:`torch.nn.functional.grid_sample` it assumes point_coords to
lie inside ``[0, 1] x [0, 1]`` square.
Args:
input (Tensor): Feature map, shape (N, C, H, W).
points (Tensor): Image based absolute point coordinates (normalized),
range [0, 1] x [0, 1], shape (N, P, 2) or (N, Hgrid, Wgrid, 2).
align_corners (bool): Whether align_corners. Default: False
input (torch.Tensor): Feature map, shape (N, C, H, W).
points (torch.Tensor): Image based absolute point coordinates
(normalized), range [0, 1] x [0, 1], shape (N, P, 2) or
(N, Hgrid, Wgrid, 2).
align_corners (bool, optional): Whether align_corners.
Default: False
Returns:
Tensor: Features of `point` on `input`, shape (N, C, P) or
(N, C, Hgrid, Wgrid).
torch.Tensor: Features of `point` on `input`, shape (N, C, P) or
(N, C, Hgrid, Wgrid).
"""
add_dim = False
......@@ -275,7 +296,10 @@ def point_sample(input, points, align_corners=False, **kwargs):
class SimpleRoIAlign(nn.Module):
def __init__(self, output_size, spatial_scale, aligned=True):
def __init__(self,
output_size: Tuple[int],
spatial_scale: float,
aligned: bool = True) -> None:
"""Simple RoI align in PointRend, faster than standard RoIAlign.
Args:
......@@ -286,14 +310,14 @@ class SimpleRoIAlign(nn.Module):
If True, align the results more perfectly.
"""
super(SimpleRoIAlign, self).__init__()
super().__init__()
self.output_size = _pair(output_size)
self.spatial_scale = float(spatial_scale)
# to be consistent with other RoI ops
self.use_torchvision = False
self.aligned = aligned
def forward(self, features, rois):
def forward(self, features: Tensor, rois: Tensor) -> Tensor:
num_imgs = features.size(0)
num_rois = rois.size(0)
rel_roi_points = generate_grid(
......@@ -329,7 +353,7 @@ class SimpleRoIAlign(nn.Module):
return roi_feats
def __repr__(self):
def __repr__(self) -> str:
format_str = self.__class__.__name__
format_str += '(output_size={}, spatial_scale={}'.format(
self.output_size, self.spatial_scale)
......
import torch
from torch import Tensor
from ..utils import ext_loader
......@@ -8,17 +9,18 @@ ext_module = ext_loader.load_ext('_ext', [
])
def points_in_boxes_part(points, boxes):
def points_in_boxes_part(points: Tensor, boxes: Tensor) -> Tensor:
"""Find the box in which each point is (CUDA).
Args:
points (torch.Tensor): [B, M, 3], [x, y, z] in LiDAR/DEPTH coordinate
points (torch.Tensor): [B, M, 3], [x, y, z] in LiDAR/DEPTH coordinate.
boxes (torch.Tensor): [B, T, 7],
num_valid_boxes <= T, [x, y, z, x_size, y_size, z_size, rz] in
LiDAR/DEPTH coordinate, (x, y, z) is the bottom center
LiDAR/DEPTH coordinate, (x, y, z) is the bottom center.
Returns:
box_idxs_of_pts (torch.Tensor): (B, M), default background = -1
torch.Tensor: Return the box indices of points with the shape of
(B, M). Default background = -1.
"""
assert points.shape[0] == boxes.shape[0], \
'Points and boxes should have the same batch size, ' \
......@@ -55,7 +57,7 @@ def points_in_boxes_part(points, boxes):
return box_idxs_of_pts
def points_in_boxes_cpu(points, boxes):
def points_in_boxes_cpu(points: Tensor, boxes: Tensor) -> Tensor:
"""Find all boxes in which each point is (CPU). The CPU version of
:meth:`points_in_boxes_all`.
......@@ -67,7 +69,8 @@ def points_in_boxes_cpu(points, boxes):
(x, y, z) is the bottom center.
Returns:
box_idxs_of_pts (torch.Tensor): (B, M, T), default background = 0.
torch.Tensor: Return the box indices of points with the shape of
(B, M, T). Default background = 0.
"""
assert points.shape[0] == boxes.shape[0], \
'Points and boxes should have the same batch size, ' \
......@@ -92,7 +95,7 @@ def points_in_boxes_cpu(points, boxes):
return point_indices
def points_in_boxes_all(points, boxes):
def points_in_boxes_all(points: Tensor, boxes: Tensor) -> Tensor:
"""Find all boxes in which each point is (CUDA).
Args:
......@@ -102,7 +105,8 @@ def points_in_boxes_all(points, boxes):
(x, y, z) is the bottom center.
Returns:
box_idxs_of_pts (torch.Tensor): (B, M, T), default background = 0.
torch.Tensor: Return the box indices of points with the shape of
(B, M, T). Default background = 0.
"""
assert boxes.shape[0] == points.shape[0], \
'Points and boxes should have the same batch size, ' \
......
import torch
from torch import Tensor
from ..utils import ext_loader
ext_module = ext_loader.load_ext('_ext', ['points_in_polygons_forward'])
def points_in_polygons(points: Tensor, polygons: Tensor) -> Tensor:
"""Judging whether points are inside polygons, which is used in the ATSS
assignment for the rotated boxes.
It should be noted that when the point is just at the polygon boundary, the
judgment will be inaccurate, but the effect on assignment is limited.
Args:
points (torch.Tensor): It has shape (B, 2), indicating (x, y).
M means the number of predicted points.
polygons (torch.Tensor): It has shape (M, 8), indicating
(x1, y1, x2, y2, x3, y3, x4, y4). M means the number of
ground truth polygons.
Returns:
torch.Tensor: Return the result with the shape of (B, M),
1 indicates that the point is inside the polygon,
0 indicates that the point is outside the polygon.
"""
assert points.shape[1] == 2, \
'points dimension should be 2, ' \
f'but got unexpected shape {points.shape[1]}'
assert polygons.shape[1] == 8, \
'polygons dimension should be 8, ' \
f'but got unexpected shape {polygons.shape[1]}'
output = torch.full([points.shape[0], polygons.shape[0]],
0.).cuda().float()
ext_module.points_in_polygons_forward(points.contiguous(),
polygons.contiguous(), output)
return output
from typing import List
import torch
from torch import Tensor
from torch import nn as nn
from mmcv.runner import force_fp32
......@@ -8,17 +9,19 @@ from .furthest_point_sample import (furthest_point_sample,
furthest_point_sample_with_dist)
def calc_square_dist(point_feat_a, point_feat_b, norm=True):
def calc_square_dist(point_feat_a: Tensor,
point_feat_b: Tensor,
norm: bool = True) -> Tensor:
"""Calculating square distance between a and b.
Args:
point_feat_a (Tensor): (B, N, C) Feature vector of each point.
point_feat_b (Tensor): (B, M, C) Feature vector of each point.
norm (Bool, optional): Whether to normalize the distance.
point_feat_a (torch.Tensor): (B, N, C) Feature vector of each point.
point_feat_b (torch.Tensor): (B, M, C) Feature vector of each point.
norm (bool, optional): Whether to normalize the distance.
Default: True.
Returns:
Tensor: (B, N, M) Distance between each pair points.
torch.Tensor: (B, N, M) Square distance between each point pair.
"""
num_channel = point_feat_a.shape[-1]
# [bs, n, 1]
......@@ -34,7 +37,7 @@ def calc_square_dist(point_feat_a, point_feat_b, norm=True):
return dist
def get_sampler_cls(sampler_type):
def get_sampler_cls(sampler_type: str) -> nn.Module:
"""Get the type and mode of points sampler.
Args:
......@@ -74,7 +77,7 @@ class PointsSampler(nn.Module):
def __init__(self,
num_point: List[int],
fps_mod_list: List[str] = ['D-FPS'],
fps_sample_range_list: List[int] = [-1]):
fps_sample_range_list: List[int] = [-1]) -> None:
super().__init__()
# FPS would be applied to different fps_mod in the list,
# so the length of the num_point should be equal to
......@@ -89,18 +92,18 @@ class PointsSampler(nn.Module):
self.fp16_enabled = False
@force_fp32()
def forward(self, points_xyz, features):
def forward(self, points_xyz: Tensor, features: Tensor) -> Tensor:
"""
Args:
points_xyz (Tensor): (B, N, 3) xyz coordinates of the features.
features (Tensor): (B, C, N) Descriptors of the features.
points_xyz (torch.Tensor): (B, N, 3) xyz coordinates of
the points.
features (torch.Tensor): (B, C, N) features of the points.
Returns:
Tensor: (B, npoint, sample_num) Indices of sampled points.
torch.Tensor: (B, npoint, sample_num) Indices of sampled points.
"""
indices = []
last_fps_end_index = 0
for fps_sample_range, sampler, npoint in zip(
self.fps_sample_range_list, self.samplers, self.num_point):
assert fps_sample_range < points_xyz.shape[1]
......@@ -112,8 +115,8 @@ class PointsSampler(nn.Module):
else:
sample_features = None
else:
sample_points_xyz = \
points_xyz[:, last_fps_end_index:fps_sample_range]
sample_points_xyz = points_xyz[:, last_fps_end_index:
fps_sample_range]
if features is not None:
sample_features = features[:, :, last_fps_end_index:
fps_sample_range]
......@@ -124,7 +127,7 @@ class PointsSampler(nn.Module):
npoint)
indices.append(fps_idx + last_fps_end_index)
last_fps_end_index += fps_sample_range
last_fps_end_index = fps_sample_range
indices = torch.cat(indices, dim=1)
return indices
......@@ -133,10 +136,10 @@ class PointsSampler(nn.Module):
class DFPSSampler(nn.Module):
"""Using Euclidean distances of points for FPS."""
def __init__(self):
def __init__(self) -> None:
super().__init__()
def forward(self, points, features, npoint):
def forward(self, points: Tensor, features: Tensor, npoint: int) -> Tensor:
"""Sampling points with D-FPS."""
fps_idx = furthest_point_sample(points.contiguous(), npoint)
return fps_idx
......@@ -145,10 +148,10 @@ class DFPSSampler(nn.Module):
class FFPSSampler(nn.Module):
"""Using feature distances for FPS."""
def __init__(self):
def __init__(self) -> None:
super().__init__()
def forward(self, points, features, npoint):
def forward(self, points: Tensor, features: Tensor, npoint: int) -> Tensor:
"""Sampling points with F-FPS."""
assert features is not None, \
'feature input to FFPS_Sampler should not be None'
......@@ -162,10 +165,10 @@ class FFPSSampler(nn.Module):
class FSSampler(nn.Module):
"""Using F-FPS and D-FPS simultaneously."""
def __init__(self):
def __init__(self) -> None:
super().__init__()
def forward(self, points, features, npoint):
def forward(self, points: Tensor, features: Tensor, npoint: int) -> Tensor:
"""Sampling points with FS_Sampling."""
assert features is not None, \
'feature input to FS_Sampler should not be None'
......
# Copyright (c) OpenMMLab. All rights reserved.
from typing import Tuple, Union
import torch
import torch.nn as nn
from torch.autograd import Function
from torch.autograd.function import once_differentiable
from torch.nn.modules.utils import _pair
from ..utils import ext_loader
ext_module = ext_loader.load_ext(
'_ext',
['prroi_pool_forward', 'prroi_pool_backward', 'prroi_pool_coor_backward'])
class PrRoIPoolFunction(Function):
@staticmethod
def symbolic(g, features, rois, output_size, spatial_scale):
return g.op(
'mmcv::PrRoIPool',
features,
rois,
pooled_height_i=int(output_size[0]),
pooled_width_i=int(output_size[1]),
spatial_scale_f=float(spatial_scale))
@staticmethod
def forward(ctx,
features: torch.Tensor,
rois: torch.Tensor,
output_size: Tuple,
spatial_scale: float = 1.0) -> torch.Tensor:
if 'FloatTensor' not in features.type(
) or 'FloatTensor' not in rois.type():
raise ValueError(
'Precise RoI Pooling only takes float input, got '
f'{features.type()} for features and {rois.type()} for rois.')
pooled_height = int(output_size[0])
pooled_width = int(output_size[1])
spatial_scale = float(spatial_scale)
features = features.contiguous()
rois = rois.contiguous()
output_shape = (rois.size(0), features.size(1), pooled_height,
pooled_width)
output = features.new_zeros(output_shape)
params = (pooled_height, pooled_width, spatial_scale)
ext_module.prroi_pool_forward(features, rois, output, *params)
ctx.params = params
# everything here is contiguous.
ctx.save_for_backward(features, rois, output)
return output
@staticmethod
@once_differentiable
def backward(
ctx, grad_output: torch.Tensor
) -> Tuple[torch.Tensor, torch.Tensor, None, None, None]:
features, rois, output = ctx.saved_tensors
grad_input = grad_output.new_zeros(*features.shape)
grad_coor = grad_output.new_zeros(*rois.shape)
if features.requires_grad:
grad_output = grad_output.contiguous()
ext_module.prroi_pool_backward(grad_output, rois, grad_input,
*ctx.params)
if rois.requires_grad:
grad_output = grad_output.contiguous()
ext_module.prroi_pool_coor_backward(output, grad_output, features,
rois, grad_coor, *ctx.params)
return grad_input, grad_coor, None, None, None
prroi_pool = PrRoIPoolFunction.apply
class PrRoIPool(nn.Module):
"""The operation of precision RoI pooling. The implementation of PrRoIPool
is modified from https://github.com/vacancy/PreciseRoIPooling/
Precise RoI Pooling (PrRoIPool) is an integration-based (bilinear
interpolation) average pooling method for RoI Pooling. It avoids any
quantization and has a continuous gradient on bounding box coordinates.
It is:
1. different from the original RoI Pooling proposed in Fast R-CNN. PrRoI
Pooling uses average pooling instead of max pooling for each bin and has a
continuous gradient on bounding box coordinates. That is, one can take the
derivatives of some loss function w.r.t the coordinates of each RoI and
optimize the RoI coordinates.
2. different from the RoI Align proposed in Mask R-CNN. PrRoI Pooling uses
a full integration-based average pooling instead of sampling a constant
number of points. This makes the gradient w.r.t. the coordinates
continuous.
Args:
output_size (Union[int, tuple]): h, w.
spatial_scale (float, optional): scale the input boxes by this number.
Defaults to 1.0.
"""
def __init__(self,
output_size: Union[int, tuple],
spatial_scale: float = 1.0):
super().__init__()
self.output_size = _pair(output_size)
self.spatial_scale = float(spatial_scale)
def forward(self, features: torch.Tensor,
rois: torch.Tensor) -> torch.Tensor:
"""Forward function.
Args:
features (torch.Tensor): The feature map.
rois (torch.Tensor): The RoI bboxes in [tl_x, tl_y, br_x, br_y]
format.
Returns:
torch.Tensor: The pooled results.
"""
return prroi_pool(features, rois, self.output_size, self.spatial_scale)
def __repr__(self):
s = self.__class__.__name__
s += f'(output_size={self.output_size}, '
s += f'spatial_scale={self.spatial_scale})'
return s
# Modified from https://github.com/hszhao/semseg/blob/master/lib/psa
from typing import Optional, Tuple
import torch
from torch import nn
from torch.autograd import Function
from torch.nn.modules.utils import _pair
......@@ -20,7 +23,8 @@ class PSAMaskFunction(Function):
mask_size_i=mask_size)
@staticmethod
def forward(ctx, input, psa_type, mask_size):
def forward(ctx, input: torch.Tensor, psa_type: str,
mask_size: int) -> torch.Tensor:
ctx.psa_type = psa_type
ctx.mask_size = _pair(mask_size)
ctx.save_for_backward(input)
......@@ -45,7 +49,9 @@ class PSAMaskFunction(Function):
return output
@staticmethod
def backward(ctx, grad_output):
def backward(
ctx, grad_output: torch.Tensor
) -> Tuple[torch.Tensor, None, None, None]:
input = ctx.saved_tensors[0]
psa_type = ctx.psa_type
h_mask, w_mask = ctx.mask_size
......@@ -71,8 +77,8 @@ psa_mask = PSAMaskFunction.apply
class PSAMask(nn.Module):
def __init__(self, psa_type, mask_size=None):
super(PSAMask, self).__init__()
def __init__(self, psa_type: str, mask_size: Optional[tuple] = None):
super().__init__()
assert psa_type in ['collect', 'distribute']
if psa_type == 'collect':
psa_type_enum = 0
......@@ -82,7 +88,7 @@ class PSAMask(nn.Module):
self.mask_size = mask_size
self.psa_type = psa_type
def forward(self, input):
def forward(self, input: torch.Tensor) -> torch.Tensor:
return psa_mask(input, self.psa_type_enum, self.mask_size)
def __repr__(self):
......
# Copyright (c) OpenMMLab. All rights reserved.
from typing import Any, Optional, Tuple, Union
import torch
import torch.nn as nn
from torch.autograd import Function
from ..utils import ext_loader, is_tuple_of
ext_module = ext_loader.load_ext(
'_ext', ['riroi_align_rotated_forward', 'riroi_align_rotated_backward'])
class RiRoIAlignRotatedFunction(Function):
@staticmethod
def forward(ctx: Any,
features: torch.Tensor,
rois: torch.Tensor,
out_size: Union[int, tuple],
spatial_scale: float,
num_samples: int = 0,
num_orientations: int = 8,
clockwise: bool = False) -> torch.Tensor:
if isinstance(out_size, int):
out_h = out_size
out_w = out_size
elif is_tuple_of(out_size, int):
assert len(out_size) == 2
out_h, out_w = out_size
else:
raise TypeError(
f'"out_size" should be an integer or tuple of integers,'
f' but got {out_size}')
ctx.spatial_scale = spatial_scale
ctx.num_samples = num_samples
ctx.num_orientations = num_orientations
ctx.clockwise = clockwise
ctx.save_for_backward(rois)
ctx.feature_size = features.size()
batch_size, num_channels, _, _ = features.size()
num_rois = rois.size(0)
output = features.new_zeros(num_rois, num_channels, out_h, out_w)
ext_module.riroi_align_rotated_forward(
features,
rois,
output,
pooled_height=out_h,
pooled_width=out_w,
spatial_scale=spatial_scale,
num_samples=num_samples,
num_orientations=num_orientations,
clockwise=clockwise)
return output
@staticmethod
def backward(
ctx: Any, grad_output: torch.Tensor
) -> Optional[Tuple[torch.Tensor, None, None, None, None, None, None]]:
feature_size = ctx.feature_size
spatial_scale = ctx.spatial_scale
num_orientations = ctx.num_orientations
clockwise = ctx.clockwise
num_samples = ctx.num_samples
rois = ctx.saved_tensors[0]
assert feature_size is not None
batch_size, num_channels, feature_h, feature_w = feature_size
out_w = grad_output.size(3)
out_h = grad_output.size(2)
grad_input = None
if ctx.needs_input_grad[0]:
grad_input = rois.new_zeros(batch_size, num_channels, feature_h,
feature_w)
ext_module.riroi_align_rotated_backward(
grad_output.contiguous(),
rois,
grad_input,
pooled_height=out_h,
pooled_width=out_w,
spatial_scale=spatial_scale,
num_samples=num_samples,
num_orientations=num_orientations,
clockwise=clockwise)
return grad_input, None, None, None, None, None, None
return None
riroi_align_rotated = RiRoIAlignRotatedFunction.apply
class RiRoIAlignRotated(nn.Module):
"""Rotation-invariant RoI align pooling layer for rotated proposals.
It accepts a feature map of shape (N, C, H, W) and rois with shape
(n, 6) with each roi decoded as (batch_index, center_x, center_y,
w, h, angle). The angle is in radian.
The details are described in the paper `ReDet: A Rotation-equivariant
Detector for Aerial Object Detection <https://arxiv.org/abs/2103.07733>`_.
Args:
out_size (tuple): fixed dimensional RoI output with shape (h, w).
spatial_scale (float): scale the input boxes by this number
num_samples (int): number of inputs samples to take for each
output sample. 0 to take samples densely for current models.
num_orientations (int): number of oriented channels.
clockwise (bool): If True, the angle in each proposal follows a
clockwise fashion in image space, otherwise, the angle is
counterclockwise. Default: False.
"""
def __init__(self,
out_size: tuple,
spatial_scale: float,
num_samples: int = 0,
num_orientations: int = 8,
clockwise: bool = False):
super().__init__()
self.out_size = out_size
self.spatial_scale = float(spatial_scale)
self.num_samples = int(num_samples)
self.num_orientations = int(num_orientations)
self.clockwise = clockwise
def forward(self, features: torch.Tensor,
rois: torch.Tensor) -> torch.Tensor:
return RiRoIAlignRotatedFunction.apply(features, rois, self.out_size,
self.spatial_scale,
self.num_samples,
self.num_orientations,
self.clockwise)
Markdown is supported
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment