Commit fdeee889 authored by limm's avatar limm
Browse files

release v1.6.1 of mmcv

parent df465820
# Copyright (c) OpenMMLab. All rights reserved. # Copyright (c) OpenMMLab. All rights reserved.
from .active_rotated_filter import active_rotated_filter
from .assign_score_withk import assign_score_withk from .assign_score_withk import assign_score_withk
from .ball_query import ball_query from .ball_query import ball_query
from .bbox import bbox_overlaps from .bbox import bbox_overlaps
...@@ -6,7 +7,9 @@ from .border_align import BorderAlign, border_align ...@@ -6,7 +7,9 @@ from .border_align import BorderAlign, border_align
from .box_iou_rotated import box_iou_rotated from .box_iou_rotated import box_iou_rotated
from .carafe import CARAFE, CARAFENaive, CARAFEPack, carafe, carafe_naive from .carafe import CARAFE, CARAFENaive, CARAFEPack, carafe, carafe_naive
from .cc_attention import CrissCrossAttention from .cc_attention import CrissCrossAttention
from .chamfer_distance import chamfer_distance
from .contour_expand import contour_expand from .contour_expand import contour_expand
from .convex_iou import convex_giou, convex_iou
from .corner_pool import CornerPool from .corner_pool import CornerPool
from .correlation import Correlation from .correlation import Correlation
from .deform_conv import DeformConv2d, DeformConv2dPack, deform_conv2d from .deform_conv import DeformConv2d, DeformConv2dPack, deform_conv2d
...@@ -16,6 +19,7 @@ from .deprecated_wrappers import Conv2d_deprecated as Conv2d ...@@ -16,6 +19,7 @@ from .deprecated_wrappers import Conv2d_deprecated as Conv2d
from .deprecated_wrappers import ConvTranspose2d_deprecated as ConvTranspose2d from .deprecated_wrappers import ConvTranspose2d_deprecated as ConvTranspose2d
from .deprecated_wrappers import Linear_deprecated as Linear from .deprecated_wrappers import Linear_deprecated as Linear
from .deprecated_wrappers import MaxPool2d_deprecated as MaxPool2d from .deprecated_wrappers import MaxPool2d_deprecated as MaxPool2d
from .diff_iou_rotated import diff_iou_rotated_2d, diff_iou_rotated_3d
from .focal_loss import (SigmoidFocalLoss, SoftmaxFocalLoss, from .focal_loss import (SigmoidFocalLoss, SoftmaxFocalLoss,
sigmoid_focal_loss, softmax_focal_loss) sigmoid_focal_loss, softmax_focal_loss)
from .furthest_point_sample import (furthest_point_sample, from .furthest_point_sample import (furthest_point_sample,
...@@ -25,9 +29,11 @@ from .gather_points import gather_points ...@@ -25,9 +29,11 @@ from .gather_points import gather_points
from .group_points import GroupAll, QueryAndGroup, grouping_operation from .group_points import GroupAll, QueryAndGroup, grouping_operation
from .info import (get_compiler_version, get_compiling_cuda_version, from .info import (get_compiler_version, get_compiling_cuda_version,
get_onnxruntime_op_path) get_onnxruntime_op_path)
from .iou3d import boxes_iou_bev, nms_bev, nms_normal_bev from .iou3d import (boxes_iou3d, boxes_iou_bev, boxes_overlap_bev, nms3d,
nms3d_normal, nms_bev, nms_normal_bev)
from .knn import knn from .knn import knn
from .masked_conv import MaskedConv2d, masked_conv2d from .masked_conv import MaskedConv2d, masked_conv2d
from .min_area_polygons import min_area_polygons
from .modulated_deform_conv import (ModulatedDeformConv2d, from .modulated_deform_conv import (ModulatedDeformConv2d,
ModulatedDeformConv2dPack, ModulatedDeformConv2dPack,
modulated_deform_conv2d) modulated_deform_conv2d)
...@@ -38,15 +44,25 @@ from .point_sample import (SimpleRoIAlign, point_sample, ...@@ -38,15 +44,25 @@ from .point_sample import (SimpleRoIAlign, point_sample,
rel_roi_point_to_rel_img_point) rel_roi_point_to_rel_img_point)
from .points_in_boxes import (points_in_boxes_all, points_in_boxes_cpu, from .points_in_boxes import (points_in_boxes_all, points_in_boxes_cpu,
points_in_boxes_part) points_in_boxes_part)
from .points_in_polygons import points_in_polygons
from .points_sampler import PointsSampler from .points_sampler import PointsSampler
from .prroi_pool import PrRoIPool, prroi_pool
from .psa_mask import PSAMask from .psa_mask import PSAMask
from .riroi_align_rotated import RiRoIAlignRotated, riroi_align_rotated
from .roi_align import RoIAlign, roi_align from .roi_align import RoIAlign, roi_align
from .roi_align_rotated import RoIAlignRotated, roi_align_rotated from .roi_align_rotated import RoIAlignRotated, roi_align_rotated
from .roi_pool import RoIPool, roi_pool from .roi_pool import RoIPool, roi_pool
from .roiaware_pool3d import RoIAwarePool3d from .roiaware_pool3d import RoIAwarePool3d
from .roipoint_pool3d import RoIPointPool3d from .roipoint_pool3d import RoIPointPool3d
from .rotated_feature_align import rotated_feature_align
from .saconv import SAConv2d from .saconv import SAConv2d
from .scatter_points import DynamicScatter, dynamic_scatter from .scatter_points import DynamicScatter, dynamic_scatter
from .sparse_conv import (SparseConv2d, SparseConv3d, SparseConvTranspose2d,
SparseConvTranspose3d, SparseInverseConv2d,
SparseInverseConv3d, SubMConv2d, SubMConv3d)
from .sparse_modules import SparseModule, SparseSequential
from .sparse_pool import SparseMaxPool2d, SparseMaxPool3d
from .sparse_structure import SparseConvTensor, scatter_nd
from .sync_bn import SyncBatchNorm from .sync_bn import SyncBatchNorm
from .three_interpolate import three_interpolate from .three_interpolate import three_interpolate
from .three_nn import three_nn from .three_nn import three_nn
...@@ -70,12 +86,21 @@ __all__ = [ ...@@ -70,12 +86,21 @@ __all__ = [
'SAConv2d', 'TINShift', 'tin_shift', 'assign_score_withk', 'SAConv2d', 'TINShift', 'tin_shift', 'assign_score_withk',
'box_iou_rotated', 'RoIPointPool3d', 'nms_rotated', 'knn', 'ball_query', 'box_iou_rotated', 'RoIPointPool3d', 'nms_rotated', 'knn', 'ball_query',
'upfirdn2d', 'FusedBiasLeakyReLU', 'fused_bias_leakyrelu', 'upfirdn2d', 'FusedBiasLeakyReLU', 'fused_bias_leakyrelu',
'rotated_feature_align', 'RiRoIAlignRotated', 'riroi_align_rotated',
'RoIAlignRotated', 'roi_align_rotated', 'pixel_group', 'QueryAndGroup', 'RoIAlignRotated', 'roi_align_rotated', 'pixel_group', 'QueryAndGroup',
'GroupAll', 'grouping_operation', 'contour_expand', 'three_nn', 'GroupAll', 'grouping_operation', 'contour_expand', 'three_nn',
'three_interpolate', 'MultiScaleDeformableAttention', 'BorderAlign', 'three_interpolate', 'MultiScaleDeformableAttention', 'BorderAlign',
'border_align', 'gather_points', 'furthest_point_sample', 'border_align', 'gather_points', 'furthest_point_sample',
'furthest_point_sample_with_dist', 'PointsSampler', 'Correlation', 'furthest_point_sample_with_dist', 'PointsSampler', 'Correlation',
'boxes_iou_bev', 'nms_bev', 'nms_normal_bev', 'Voxelization', 'boxes_iou3d', 'boxes_iou_bev', 'boxes_overlap_bev', 'nms_bev',
'voxelization', 'dynamic_scatter', 'DynamicScatter', 'RoIAwarePool3d', 'nms_normal_bev', 'nms3d', 'nms3d_normal', 'Voxelization', 'voxelization',
'points_in_boxes_part', 'points_in_boxes_cpu', 'points_in_boxes_all' 'dynamic_scatter', 'DynamicScatter', 'RoIAwarePool3d', 'SparseConv2d',
'SparseConv3d', 'SparseConvTranspose2d', 'SparseConvTranspose3d',
'SparseInverseConv2d', 'SparseInverseConv3d', 'SubMConv2d', 'SubMConv3d',
'SparseModule', 'SparseSequential', 'SparseMaxPool2d', 'SparseMaxPool3d',
'SparseConvTensor', 'scatter_nd', 'points_in_boxes_part',
'points_in_boxes_cpu', 'points_in_boxes_all', 'points_in_polygons',
'min_area_polygons', 'active_rotated_filter', 'convex_iou', 'convex_giou',
'diff_iou_rotated_2d', 'diff_iou_rotated_3d', 'chamfer_distance',
'PrRoIPool', 'prroi_pool'
] ]
# Copyright (c) OpenMMLab. All rights reserved.
from typing import Tuple
import torch
from torch.autograd import Function
from torch.autograd.function import once_differentiable
from ..utils import ext_loader
ext_module = ext_loader.load_ext(
'_ext',
['active_rotated_filter_forward', 'active_rotated_filter_backward'])
class ActiveRotatedFilterFunction(Function):
"""Encoding the orientation information and generating orientation-
sensitive features.
The details are described in the paper `Align Deep Features for Oriented
Object Detection <https://arxiv.org/abs/2008.09397>_`.
"""
@staticmethod
def forward(ctx, input: torch.Tensor,
indices: torch.Tensor) -> torch.Tensor:
"""
Args:
input (torch.Tensor): Input features with shape
[num_output_planes, num_input_planes, num_orientations, H, W].
indices (torch.Tensor): Indices with shape
[num_orientations, H, W, num_rotations].
Returns:
torch.Tensor: Refined features with shape [num_output_planes *
num_rotations, num_input_planes * num_orientations, H, W].
"""
ctx.save_for_backward(input, indices)
op, ip, o, h, w = input.size()
o, h, w, r = indices.size()
output = input.new_zeros((op * r, ip * o, h, w))
ext_module.active_rotated_filter_forward(input, indices, output)
return output
@staticmethod
@once_differentiable
def backward(ctx, grad_out: torch.Tensor) -> Tuple[torch.Tensor, None]:
"""
Args:
grad_output (torch.Tensor): The gradiant of output features
with shape [num_output_planes * num_rotations,
num_input_planes * num_orientations, H, W].
Returns:
torch.Tensor: The gradiant of input features with shape
[num_output_planes, num_input_planes, num_orientations, H, W].
"""
input, indices = ctx.saved_tensors
grad_in = torch.zeros_like(input)
ext_module.active_rotated_filter_backward(grad_out, indices, grad_in)
return grad_in, None
active_rotated_filter = ActiveRotatedFilterFunction.apply
from typing import Tuple
import torch
from torch.autograd import Function from torch.autograd import Function
from ..utils import ext_loader from ..utils import ext_loader
...@@ -27,11 +30,11 @@ class AssignScoreWithK(Function): ...@@ -27,11 +30,11 @@ class AssignScoreWithK(Function):
@staticmethod @staticmethod
def forward(ctx, def forward(ctx,
scores, scores: torch.Tensor,
point_features, point_features: torch.Tensor,
center_features, center_features: torch.Tensor,
knn_idx, knn_idx: torch.Tensor,
aggregate='sum'): aggregate: str = 'sum') -> torch.Tensor:
""" """
Args: Args:
scores (torch.Tensor): (B, npoint, K, M), predicted scores to scores (torch.Tensor): (B, npoint, K, M), predicted scores to
...@@ -78,15 +81,20 @@ class AssignScoreWithK(Function): ...@@ -78,15 +81,20 @@ class AssignScoreWithK(Function):
return output return output
@staticmethod @staticmethod
def backward(ctx, grad_out): def backward(
ctx, grad_out: torch.Tensor
) -> Tuple[torch.Tensor, torch.Tensor, torch.Tensor, None, None]:
""" """
Args: Args:
grad_out (torch.Tensor): (B, out_dim, npoint, K) grad_out (torch.Tensor): (B, out_dim, npoint, K)
Returns: Returns:
grad_scores (torch.Tensor): (B, npoint, K, M) tuple[torch.Tensor]: A tuple contains five elements. The first one
grad_point_features (torch.Tensor): (B, N, M, out_dim) is the gradient of ``scores`` whose shape is (B, npoint, K, M). The
grad_center_features (torch.Tensor): (B, N, M, out_dim) second is the gradient of ``point_features`` whose shape is
(B, N, M, out_dim). The third is the gradient of
``center_features`` with the shape of (B, N, M, out_dim). The last
two are ``None``.
""" """
_, point_features, center_features, scores, knn_idx = ctx.saved_tensors _, point_features, center_features, scores, knn_idx = ctx.saved_tensors
......
# Copyright (c) OpenMMLab. All rights reserved. # Copyright (c) OpenMMLab. All rights reserved.
from typing import Tuple
import torch import torch
from torch.autograd import Function from torch.autograd import Function
...@@ -18,12 +20,13 @@ class BallQuery(Function): ...@@ -18,12 +20,13 @@ class BallQuery(Function):
min_radius (float): minimum radius of the balls. min_radius (float): minimum radius of the balls.
max_radius (float): maximum radius of the balls. max_radius (float): maximum radius of the balls.
sample_num (int): maximum number of features in the balls. sample_num (int): maximum number of features in the balls.
xyz (Tensor): (B, N, 3) xyz coordinates of the features. xyz (torch.Tensor): (B, N, 3) xyz coordinates of the features.
center_xyz (Tensor): (B, npoint, 3) centers of the ball query. center_xyz (torch.Tensor): (B, npoint, 3) centers of the ball
query.
Returns: Returns:
Tensor: (B, npoint, nsample) tensor with the indices of torch.Tensor: (B, npoint, nsample) tensor with the indices of the
the features that form the query balls. features that form the query balls.
""" """
assert center_xyz.is_contiguous() assert center_xyz.is_contiguous()
assert xyz.is_contiguous() assert xyz.is_contiguous()
...@@ -48,7 +51,7 @@ class BallQuery(Function): ...@@ -48,7 +51,7 @@ class BallQuery(Function):
return idx return idx
@staticmethod @staticmethod
def backward(ctx, a=None): def backward(ctx, a=None) -> Tuple[None, None, None, None]:
return None, None, None, None return None, None, None, None
......
# Copyright (c) OpenMMLab. All rights reserved. # Copyright (c) OpenMMLab. All rights reserved.
import torch
from ..utils import ext_loader from ..utils import ext_loader
ext_module = ext_loader.load_ext('_ext', ['bbox_overlaps']) ext_module = ext_loader.load_ext('_ext', ['bbox_overlaps'])
def bbox_overlaps(bboxes1, bboxes2, mode='iou', aligned=False, offset=0): def _bbox_overlaps_cpu(bboxes1: torch.Tensor,
bboxes2: torch.Tensor,
mode: str = 'iou',
aligned: bool = False,
offset: int = 0) -> torch.Tensor:
assert mode in ['iou', 'iof']
if aligned:
lt = torch.max(bboxes1[:, :2], bboxes2[:, :2]) # [rows, 2]
rb = torch.min(bboxes1[:, 2:], bboxes2[:, 2:]) # [rows, 2]
wh = (rb - lt + offset).clamp(min=0) # [rows, 2]
overlap = wh[:, 0] * wh[:, 1]
area1 = (bboxes1[:, 2] - bboxes1[:, 0] + offset) * (
bboxes1[:, 3] - bboxes1[:, 1] + offset)
if mode == 'iou':
area2 = (bboxes2[:, 2] - bboxes2[:, 0] + offset) * (
bboxes2[:, 3] - bboxes2[:, 1] + offset)
ious = overlap / (area1 + area2 - overlap)
else:
ious = overlap / area1
else:
lt = torch.max(bboxes1[:, None, :2], bboxes2[:, :2]) # [rows, cols, 2]
rb = torch.min(bboxes1[:, None, 2:], bboxes2[:, 2:]) # [rows, cols, 2]
wh = (rb - lt + offset).clamp(min=0) # [rows, cols, 2]
overlap = wh[:, :, 0] * wh[:, :, 1]
area1 = (bboxes1[:, 2] - bboxes1[:, 0] + offset) * (
bboxes1[:, 3] - bboxes1[:, 1] + offset)
if mode == 'iou':
area2 = (bboxes2[:, 2] - bboxes2[:, 0] + offset) * (
bboxes2[:, 3] - bboxes2[:, 1] + offset)
ious = overlap / (area1[:, None] + area2 - overlap)
else:
ious = overlap / (area1[:, None])
return ious
def bbox_overlaps(bboxes1: torch.Tensor,
bboxes2: torch.Tensor,
mode: str = 'iou',
aligned: bool = False,
offset: int = 0) -> torch.Tensor:
"""Calculate overlap between two set of bboxes. """Calculate overlap between two set of bboxes.
If ``aligned`` is ``False``, then calculate the ious between each bbox If ``aligned`` is ``False``, then calculate the ious between each bbox
...@@ -12,14 +59,16 @@ def bbox_overlaps(bboxes1, bboxes2, mode='iou', aligned=False, offset=0): ...@@ -12,14 +59,16 @@ def bbox_overlaps(bboxes1, bboxes2, mode='iou', aligned=False, offset=0):
bboxes1 and bboxes2. bboxes1 and bboxes2.
Args: Args:
bboxes1 (Tensor): shape (m, 4) in <x1, y1, x2, y2> format or empty. bboxes1 (torch.Tensor): shape (m, 4) in <x1, y1, x2, y2> format or
bboxes2 (Tensor): shape (n, 4) in <x1, y1, x2, y2> format or empty. empty.
If aligned is ``True``, then m and n must be equal. bboxes2 (torch.Tensor): shape (n, 4) in <x1, y1, x2, y2> format or
empty. If aligned is ``True``, then m and n must be equal.
mode (str): "iou" (intersection over union) or iof (intersection over mode (str): "iou" (intersection over union) or iof (intersection over
foreground). foreground).
Returns: Returns:
ious(Tensor): shape (m, n) if aligned == False else shape (m, 1) torch.Tensor: Return the ious betweens boxes. If ``aligned`` is
``False``, the shape of ious is (m, n) else (m, 1).
Example: Example:
>>> bboxes1 = torch.FloatTensor([ >>> bboxes1 = torch.FloatTensor([
...@@ -63,10 +112,19 @@ def bbox_overlaps(bboxes1, bboxes2, mode='iou', aligned=False, offset=0): ...@@ -63,10 +112,19 @@ def bbox_overlaps(bboxes1, bboxes2, mode='iou', aligned=False, offset=0):
if rows * cols == 0: if rows * cols == 0:
return bboxes1.new(rows, 1) if aligned else bboxes1.new(rows, cols) return bboxes1.new(rows, 1) if aligned else bboxes1.new(rows, cols)
if aligned: if bboxes1.device.type == 'cpu':
ious = bboxes1.new_zeros(rows) return _bbox_overlaps_cpu(
bboxes1, bboxes2, mode=mode, aligned=aligned, offset=offset)
else: else:
ious = bboxes1.new_zeros((rows, cols)) if aligned:
ext_module.bbox_overlaps( ious = bboxes1.new_zeros(rows)
bboxes1, bboxes2, ious, mode=mode_flag, aligned=aligned, offset=offset) else:
return ious ious = bboxes1.new_zeros((rows, cols))
ext_module.bbox_overlaps(
bboxes1,
bboxes2,
ious,
mode=mode_flag,
aligned=aligned,
offset=offset)
return ious
...@@ -2,6 +2,8 @@ ...@@ -2,6 +2,8 @@
# modified from # modified from
# https://github.com/Megvii-BaseDetection/cvpods/blob/master/cvpods/layers/border_align.py # https://github.com/Megvii-BaseDetection/cvpods/blob/master/cvpods/layers/border_align.py
from typing import Tuple
import torch import torch
import torch.nn as nn import torch.nn as nn
from torch.autograd import Function from torch.autograd import Function
...@@ -21,7 +23,8 @@ class BorderAlignFunction(Function): ...@@ -21,7 +23,8 @@ class BorderAlignFunction(Function):
'mmcv::MMCVBorderAlign', input, boxes, pool_size_i=pool_size) 'mmcv::MMCVBorderAlign', input, boxes, pool_size_i=pool_size)
@staticmethod @staticmethod
def forward(ctx, input, boxes, pool_size): def forward(ctx, input: torch.Tensor, boxes: torch.Tensor,
pool_size: int) -> torch.Tensor:
ctx.pool_size = pool_size ctx.pool_size = pool_size
ctx.input_shape = input.size() ctx.input_shape = input.size()
...@@ -45,7 +48,8 @@ class BorderAlignFunction(Function): ...@@ -45,7 +48,8 @@ class BorderAlignFunction(Function):
@staticmethod @staticmethod
@once_differentiable @once_differentiable
def backward(ctx, grad_output): def backward(ctx,
grad_output: torch.Tensor) -> Tuple[torch.Tensor, None, None]:
boxes, argmax_idx = ctx.saved_tensors boxes, argmax_idx = ctx.saved_tensors
grad_input = grad_output.new_zeros(ctx.input_shape) grad_input = grad_output.new_zeros(ctx.input_shape)
# complex head architecture may cause grad_output uncontiguous # complex head architecture may cause grad_output uncontiguous
...@@ -72,24 +76,25 @@ class BorderAlign(nn.Module): ...@@ -72,24 +76,25 @@ class BorderAlign(nn.Module):
For each border line (e.g. top, left, bottom or right) of each box, For each border line (e.g. top, left, bottom or right) of each box,
border_align does the following: border_align does the following:
1. uniformly samples `pool_size`+1 positions on this line, involving \
the start and end points. 1. uniformly samples ``pool_size`` +1 positions on this line, involving
2. the corresponding features on these points are computed by \ the start and end points.
bilinear interpolation. 2. the corresponding features on these points are computed by bilinear
3. max pooling over all the `pool_size`+1 positions are used for \ interpolation.
computing pooled feature. 3. max pooling over all the ``pool_size`` +1 positions are used for
computing pooled feature.
Args: Args:
pool_size (int): number of positions sampled over the boxes' borders pool_size (int): number of positions sampled over the boxes' borders
(e.g. top, bottom, left, right). (e.g. top, bottom, left, right).
""" """
def __init__(self, pool_size): def __init__(self, pool_size: int):
super(BorderAlign, self).__init__() super().__init__()
self.pool_size = pool_size self.pool_size = pool_size
def forward(self, input, boxes): def forward(self, input: torch.Tensor,
boxes: torch.Tensor) -> torch.Tensor:
""" """
Args: Args:
input: Features with shape [N,4C,H,W]. Channels ranged in [0,C), input: Features with shape [N,4C,H,W]. Channels ranged in [0,C),
...@@ -98,8 +103,8 @@ class BorderAlign(nn.Module): ...@@ -98,8 +103,8 @@ class BorderAlign(nn.Module):
boxes: Boxes with shape [N,H*W,4]. Coordinate format (x1,y1,x2,y2). boxes: Boxes with shape [N,H*W,4]. Coordinate format (x1,y1,x2,y2).
Returns: Returns:
Tensor: Pooled features with shape [N,C,H*W,4]. The order is torch.Tensor: Pooled features with shape [N,C,H*W,4]. The order is
(top,left,bottom,right) for the last dimension. (top,left,bottom,right) for the last dimension.
""" """
return border_align(input, boxes, self.pool_size) return border_align(input, boxes, self.pool_size)
......
# Copyright (c) OpenMMLab. All rights reserved. # Copyright (c) OpenMMLab. All rights reserved.
import torch
from ..utils import ext_loader from ..utils import ext_loader
ext_module = ext_loader.load_ext('_ext', ['box_iou_rotated']) ext_module = ext_loader.load_ext('_ext', ['box_iou_rotated'])
def box_iou_rotated(bboxes1, bboxes2, mode='iou', aligned=False): def box_iou_rotated(bboxes1: torch.Tensor,
bboxes2: torch.Tensor,
mode: str = 'iou',
aligned: bool = False,
clockwise: bool = True) -> torch.Tensor:
"""Return intersection-over-union (Jaccard index) of boxes. """Return intersection-over-union (Jaccard index) of boxes.
Both sets of boxes are expected to be in Both sets of boxes are expected to be in
...@@ -14,18 +20,110 @@ def box_iou_rotated(bboxes1, bboxes2, mode='iou', aligned=False): ...@@ -14,18 +20,110 @@ def box_iou_rotated(bboxes1, bboxes2, mode='iou', aligned=False):
of bboxes1 and bboxes2, otherwise the ious between each aligned pair of of bboxes1 and bboxes2, otherwise the ious between each aligned pair of
bboxes1 and bboxes2. bboxes1 and bboxes2.
Arguments: .. note::
boxes1 (Tensor): rotated bboxes 1. \ The operator assumes:
It has shape (N, 5), indicating (x, y, w, h, theta) for each row.
Note that theta is in radian. 1) The positive direction along x axis is left -> right.
boxes2 (Tensor): rotated bboxes 2. \
It has shape (M, 5), indicating (x, y, w, h, theta) for each row. 2) The positive direction along y axis is top -> down.
Note that theta is in radian.
3) The w border is in parallel with x axis when angle = 0.
However, there are 2 opposite definitions of the positive angular
direction, clockwise (CW) and counter-clockwise (CCW). MMCV supports
both definitions and uses CW by default.
Please set ``clockwise=False`` if you are using the CCW definition.
The coordinate system when ``clockwise`` is ``True`` (default)
.. code-block:: none
0-------------------> x (0 rad)
| A-------------B
| | |
| | box h
| | angle=0 |
| D------w------C
v
y (pi/2 rad)
In such coordination system the rotation matrix is
.. math::
\\begin{pmatrix}
\\cos\\alpha & -\\sin\\alpha \\\\
\\sin\\alpha & \\cos\\alpha
\\end{pmatrix}
The coordinates of the corner point A can be calculated as:
.. math::
P_A=
\\begin{pmatrix} x_A \\\\ y_A\\end{pmatrix}
=
\\begin{pmatrix} x_{center} \\\\ y_{center}\\end{pmatrix} +
\\begin{pmatrix}\\cos\\alpha & -\\sin\\alpha \\\\
\\sin\\alpha & \\cos\\alpha\\end{pmatrix}
\\begin{pmatrix} -0.5w \\\\ -0.5h\\end{pmatrix} \\\\
=
\\begin{pmatrix} x_{center}-0.5w\\cos\\alpha+0.5h\\sin\\alpha
\\\\
y_{center}-0.5w\\sin\\alpha-0.5h\\cos\\alpha\\end{pmatrix}
The coordinate system when ``clockwise`` is ``False``
.. code-block:: none
0-------------------> x (0 rad)
| A-------------B
| | |
| | box h
| | angle=0 |
| D------w------C
v
y (-pi/2 rad)
In such coordination system the rotation matrix is
.. math::
\\begin{pmatrix}
\\cos\\alpha & \\sin\\alpha \\\\
-\\sin\\alpha & \\cos\\alpha
\\end{pmatrix}
The coordinates of the corner point A can be calculated as:
.. math::
P_A=
\\begin{pmatrix} x_A \\\\ y_A\\end{pmatrix}
=
\\begin{pmatrix} x_{center} \\\\ y_{center}\\end{pmatrix} +
\\begin{pmatrix}\\cos\\alpha & \\sin\\alpha \\\\
-\\sin\\alpha & \\cos\\alpha\\end{pmatrix}
\\begin{pmatrix} -0.5w \\\\ -0.5h\\end{pmatrix} \\\\
=
\\begin{pmatrix} x_{center}-0.5w\\cos\\alpha-0.5h\\sin\\alpha
\\\\
y_{center}+0.5w\\sin\\alpha-0.5h\\cos\\alpha\\end{pmatrix}
Args:
boxes1 (torch.Tensor): rotated bboxes 1. It has shape (N, 5),
indicating (x, y, w, h, theta) for each row. Note that theta is in
radian.
boxes2 (torch.Tensor): rotated bboxes 2. It has shape (M, 5),
indicating (x, y, w, h, theta) for each row. Note that theta is in
radian.
mode (str): "iou" (intersection over union) or iof (intersection over mode (str): "iou" (intersection over union) or iof (intersection over
foreground). foreground).
clockwise (bool): flag indicating whether the positive angular
orientation is clockwise. default True.
`New in version 1.4.3.`
Returns: Returns:
ious(Tensor): shape (N, M) if aligned == False else shape (N,) torch.Tensor: Return the ious betweens boxes. If ``aligned`` is
``False``, the shape of ious is (N, M) else (N,).
""" """
assert mode in ['iou', 'iof'] assert mode in ['iou', 'iof']
mode_dict = {'iou': 0, 'iof': 1} mode_dict = {'iou': 0, 'iof': 1}
...@@ -35,7 +133,12 @@ def box_iou_rotated(bboxes1, bboxes2, mode='iou', aligned=False): ...@@ -35,7 +133,12 @@ def box_iou_rotated(bboxes1, bboxes2, mode='iou', aligned=False):
if aligned: if aligned:
ious = bboxes1.new_zeros(rows) ious = bboxes1.new_zeros(rows)
else: else:
ious = bboxes1.new_zeros((rows * cols)) ious = bboxes1.new_zeros(rows * cols)
if not clockwise:
flip_mat = bboxes1.new_ones(bboxes1.shape[-1])
flip_mat[-1] = -1
bboxes1 = bboxes1 * flip_mat
bboxes2 = bboxes2 * flip_mat
bboxes1 = bboxes1.contiguous() bboxes1 = bboxes1.contiguous()
bboxes2 = bboxes2.contiguous() bboxes2 = bboxes2.contiguous()
ext_module.box_iou_rotated( ext_module.box_iou_rotated(
......
# Copyright (c) OpenMMLab. All rights reserved. # Copyright (c) OpenMMLab. All rights reserved.
from typing import Tuple
import torch import torch
import torch.nn as nn import torch.nn as nn
import torch.nn.functional as F import torch.nn.functional as F
from torch import Tensor
from torch.autograd import Function from torch.autograd import Function
from torch.nn.modules.module import Module from torch.nn.modules.module import Module
...@@ -17,7 +20,8 @@ ext_module = ext_loader.load_ext('_ext', [ ...@@ -17,7 +20,8 @@ ext_module = ext_loader.load_ext('_ext', [
class CARAFENaiveFunction(Function): class CARAFENaiveFunction(Function):
@staticmethod @staticmethod
def symbolic(g, features, masks, kernel_size, group_size, scale_factor): def symbolic(g, features: Tensor, masks: Tensor, kernel_size: int,
group_size: int, scale_factor: int) -> Tensor:
return g.op( return g.op(
'mmcv::MMCVCARAFENaive', 'mmcv::MMCVCARAFENaive',
features, features,
...@@ -27,7 +31,8 @@ class CARAFENaiveFunction(Function): ...@@ -27,7 +31,8 @@ class CARAFENaiveFunction(Function):
scale_factor_f=scale_factor) scale_factor_f=scale_factor)
@staticmethod @staticmethod
def forward(ctx, features, masks, kernel_size, group_size, scale_factor): def forward(ctx, features: Tensor, masks: Tensor, kernel_size: int,
group_size: int, scale_factor: int) -> Tensor:
assert scale_factor >= 1 assert scale_factor >= 1
assert masks.size(1) == kernel_size * kernel_size * group_size assert masks.size(1) == kernel_size * kernel_size * group_size
assert masks.size(-1) == features.size(-1) * scale_factor assert masks.size(-1) == features.size(-1) * scale_factor
...@@ -50,12 +55,15 @@ class CARAFENaiveFunction(Function): ...@@ -50,12 +55,15 @@ class CARAFENaiveFunction(Function):
group_size=group_size, group_size=group_size,
scale_factor=scale_factor) scale_factor=scale_factor)
if features.requires_grad or masks.requires_grad: if features.requires_grad or masks.requires_grad or \
torch.__version__ == 'parrots':
ctx.save_for_backward(features, masks) ctx.save_for_backward(features, masks)
return output return output
@staticmethod @staticmethod
def backward(ctx, grad_output): def backward(
ctx,
grad_output: Tensor) -> Tuple[Tensor, Tensor, None, None, None]:
assert grad_output.is_cuda assert grad_output.is_cuda
features, masks = ctx.saved_tensors features, masks = ctx.saved_tensors
...@@ -83,8 +91,8 @@ carafe_naive = CARAFENaiveFunction.apply ...@@ -83,8 +91,8 @@ carafe_naive = CARAFENaiveFunction.apply
class CARAFENaive(Module): class CARAFENaive(Module):
def __init__(self, kernel_size, group_size, scale_factor): def __init__(self, kernel_size: int, group_size: int, scale_factor: int):
super(CARAFENaive, self).__init__() super().__init__()
assert isinstance(kernel_size, int) and isinstance( assert isinstance(kernel_size, int) and isinstance(
group_size, int) and isinstance(scale_factor, int) group_size, int) and isinstance(scale_factor, int)
...@@ -92,7 +100,7 @@ class CARAFENaive(Module): ...@@ -92,7 +100,7 @@ class CARAFENaive(Module):
self.group_size = group_size self.group_size = group_size
self.scale_factor = scale_factor self.scale_factor = scale_factor
def forward(self, features, masks): def forward(self, features: Tensor, masks: Tensor) -> Tensor:
return carafe_naive(features, masks, self.kernel_size, self.group_size, return carafe_naive(features, masks, self.kernel_size, self.group_size,
self.scale_factor) self.scale_factor)
...@@ -100,7 +108,8 @@ class CARAFENaive(Module): ...@@ -100,7 +108,8 @@ class CARAFENaive(Module):
class CARAFEFunction(Function): class CARAFEFunction(Function):
@staticmethod @staticmethod
def symbolic(g, features, masks, kernel_size, group_size, scale_factor): def symbolic(g, features: Tensor, masks: Tensor, kernel_size: int,
group_size: int, scale_factor: int) -> Tensor:
return g.op( return g.op(
'mmcv::MMCVCARAFE', 'mmcv::MMCVCARAFE',
features, features,
...@@ -110,7 +119,8 @@ class CARAFEFunction(Function): ...@@ -110,7 +119,8 @@ class CARAFEFunction(Function):
scale_factor_f=scale_factor) scale_factor_f=scale_factor)
@staticmethod @staticmethod
def forward(ctx, features, masks, kernel_size, group_size, scale_factor): def forward(ctx, features: Tensor, masks: Tensor, kernel_size: int,
group_size: int, scale_factor: int) -> Tensor:
assert scale_factor >= 1 assert scale_factor >= 1
assert masks.size(1) == kernel_size * kernel_size * group_size assert masks.size(1) == kernel_size * kernel_size * group_size
assert masks.size(-1) == features.size(-1) * scale_factor assert masks.size(-1) == features.size(-1) * scale_factor
...@@ -139,12 +149,15 @@ class CARAFEFunction(Function): ...@@ -139,12 +149,15 @@ class CARAFEFunction(Function):
group_size=group_size, group_size=group_size,
scale_factor=scale_factor) scale_factor=scale_factor)
if features.requires_grad or masks.requires_grad: if features.requires_grad or masks.requires_grad or \
torch.__version__ == 'parrots':
ctx.save_for_backward(features, masks, rfeatures) ctx.save_for_backward(features, masks, rfeatures)
return output return output
@staticmethod @staticmethod
def backward(ctx, grad_output): def backward(
ctx,
grad_output: Tensor) -> Tuple[Tensor, Tensor, None, None, None]:
assert grad_output.is_cuda assert grad_output.is_cuda
features, masks, rfeatures = ctx.saved_tensors features, masks, rfeatures = ctx.saved_tensors
...@@ -180,7 +193,8 @@ carafe = CARAFEFunction.apply ...@@ -180,7 +193,8 @@ carafe = CARAFEFunction.apply
class CARAFE(Module): class CARAFE(Module):
""" CARAFE: Content-Aware ReAssembly of FEatures """ CARAFE: Content-Aware ReAssembly of FEatures
Please refer to https://arxiv.org/abs/1905.02188 for more details. Please refer to `CARAFE: Content-Aware ReAssembly of FEatures
<https://arxiv.org/abs/1905.02188>`_ for more details.
Args: Args:
kernel_size (int): reassemble kernel size kernel_size (int): reassemble kernel size
...@@ -191,8 +205,8 @@ class CARAFE(Module): ...@@ -191,8 +205,8 @@ class CARAFE(Module):
upsampled feature map upsampled feature map
""" """
def __init__(self, kernel_size, group_size, scale_factor): def __init__(self, kernel_size: int, group_size: int, scale_factor: int):
super(CARAFE, self).__init__() super().__init__()
assert isinstance(kernel_size, int) and isinstance( assert isinstance(kernel_size, int) and isinstance(
group_size, int) and isinstance(scale_factor, int) group_size, int) and isinstance(scale_factor, int)
...@@ -200,7 +214,7 @@ class CARAFE(Module): ...@@ -200,7 +214,7 @@ class CARAFE(Module):
self.group_size = group_size self.group_size = group_size
self.scale_factor = scale_factor self.scale_factor = scale_factor
def forward(self, features, masks): def forward(self, features: Tensor, masks: Tensor) -> Tensor:
return carafe(features, masks, self.kernel_size, self.group_size, return carafe(features, masks, self.kernel_size, self.group_size,
self.scale_factor) self.scale_factor)
...@@ -211,8 +225,8 @@ class CARAFEPack(nn.Module): ...@@ -211,8 +225,8 @@ class CARAFEPack(nn.Module):
compressor 2) content encoder 3) CARAFE op. compressor 2) content encoder 3) CARAFE op.
Official implementation of ICCV 2019 paper Official implementation of ICCV 2019 paper
CARAFE: Content-Aware ReAssembly of FEatures `CARAFE: Content-Aware ReAssembly of FEatures
Please refer to https://arxiv.org/abs/1905.02188 for more details. <https://arxiv.org/abs/1905.02188>`_.
Args: Args:
channels (int): input feature channels channels (int): input feature channels
...@@ -228,14 +242,14 @@ class CARAFEPack(nn.Module): ...@@ -228,14 +242,14 @@ class CARAFEPack(nn.Module):
""" """
def __init__(self, def __init__(self,
channels, channels: int,
scale_factor, scale_factor: int,
up_kernel=5, up_kernel: int = 5,
up_group=1, up_group: int = 1,
encoder_kernel=3, encoder_kernel: int = 3,
encoder_dilation=1, encoder_dilation: int = 1,
compressed_channels=64): compressed_channels: int = 64):
super(CARAFEPack, self).__init__() super().__init__()
self.channels = channels self.channels = channels
self.scale_factor = scale_factor self.scale_factor = scale_factor
self.up_kernel = up_kernel self.up_kernel = up_kernel
...@@ -261,7 +275,7 @@ class CARAFEPack(nn.Module): ...@@ -261,7 +275,7 @@ class CARAFEPack(nn.Module):
xavier_init(m, distribution='uniform') xavier_init(m, distribution='uniform')
normal_init(self.content_encoder, std=0.001) normal_init(self.content_encoder, std=0.001)
def kernel_normalizer(self, mask): def kernel_normalizer(self, mask: Tensor) -> Tensor:
mask = F.pixel_shuffle(mask, self.scale_factor) mask = F.pixel_shuffle(mask, self.scale_factor)
n, mask_c, h, w = mask.size() n, mask_c, h, w = mask.size()
# use float division explicitly, # use float division explicitly,
...@@ -274,11 +288,11 @@ class CARAFEPack(nn.Module): ...@@ -274,11 +288,11 @@ class CARAFEPack(nn.Module):
return mask return mask
def feature_reassemble(self, x, mask): def feature_reassemble(self, x: Tensor, mask: Tensor) -> Tensor:
x = carafe(x, mask, self.up_kernel, self.up_group, self.scale_factor) x = carafe(x, mask, self.up_kernel, self.up_group, self.scale_factor)
return x return x
def forward(self, x): def forward(self, x: Tensor) -> Tensor:
compressed_x = self.channel_compressor(x) compressed_x = self.channel_compressor(x)
mask = self.content_encoder(compressed_x) mask = self.content_encoder(compressed_x)
mask = self.kernel_normalizer(mask) mask = self.kernel_normalizer(mask)
......
...@@ -6,7 +6,7 @@ import torch.nn.functional as F ...@@ -6,7 +6,7 @@ import torch.nn.functional as F
from mmcv.cnn import PLUGIN_LAYERS, Scale from mmcv.cnn import PLUGIN_LAYERS, Scale
def NEG_INF_DIAG(n, device): def NEG_INF_DIAG(n: int, device: torch.device) -> torch.Tensor:
"""Returns a diagonal matrix of size [n, n]. """Returns a diagonal matrix of size [n, n].
The diagonal are all "-inf". This is for avoiding calculating the The diagonal are all "-inf". This is for avoiding calculating the
...@@ -41,7 +41,7 @@ class CrissCrossAttention(nn.Module): ...@@ -41,7 +41,7 @@ class CrissCrossAttention(nn.Module):
in_channels (int): Channels of the input feature map. in_channels (int): Channels of the input feature map.
""" """
def __init__(self, in_channels): def __init__(self, in_channels: int) -> None:
super().__init__() super().__init__()
self.query_conv = nn.Conv2d(in_channels, in_channels // 8, 1) self.query_conv = nn.Conv2d(in_channels, in_channels // 8, 1)
self.key_conv = nn.Conv2d(in_channels, in_channels // 8, 1) self.key_conv = nn.Conv2d(in_channels, in_channels // 8, 1)
...@@ -49,14 +49,15 @@ class CrissCrossAttention(nn.Module): ...@@ -49,14 +49,15 @@ class CrissCrossAttention(nn.Module):
self.gamma = Scale(0.) self.gamma = Scale(0.)
self.in_channels = in_channels self.in_channels = in_channels
def forward(self, x): def forward(self, x: torch.Tensor) -> torch.Tensor:
"""forward function of Criss-Cross Attention. """forward function of Criss-Cross Attention.
Args: Args:
x (Tensor): Input feature. \ x (torch.Tensor): Input feature with the shape of
shape (batch_size, in_channels, height, width) (batch_size, in_channels, height, width).
Returns: Returns:
Tensor: Output of the layer, with shape of \ torch.Tensor: Output of the layer, with the shape of
(batch_size, in_channels, height, width) (batch_size, in_channels, height, width)
""" """
B, C, H, W = x.size() B, C, H, W = x.size()
...@@ -77,7 +78,7 @@ class CrissCrossAttention(nn.Module): ...@@ -77,7 +78,7 @@ class CrissCrossAttention(nn.Module):
return out return out
def __repr__(self): def __repr__(self) -> str:
s = self.__class__.__name__ s = self.__class__.__name__
s += f'(in_channels={self.in_channels})' s += f'(in_channels={self.in_channels})'
return s return s
# Copyright (c) OpenMMLab. All rights reserved.
from typing import Sequence, Tuple
import torch
from torch import Tensor
from torch.autograd import Function
from torch.autograd.function import once_differentiable
from ..utils import ext_loader
ext_module = ext_loader.load_ext(
'_ext', ['chamfer_distance_forward', 'chamfer_distance_backward'])
class ChamferDistanceFunction(Function):
"""This is an implementation of the 2D Chamfer Distance.
It has been used in the paper `Oriented RepPoints for Aerial Object
Detection (CVPR 2022) <https://arxiv.org/abs/2105.11111>_`.
"""
@staticmethod
def forward(ctx, xyz1: Tensor, xyz2: Tensor) -> Sequence[Tensor]:
"""
Args:
xyz1 (Tensor): Point set with shape (B, N, 2).
xyz2 (Tensor): Point set with shape (B, N, 2).
Returns:
Sequence[Tensor]:
- dist1 (Tensor): Chamfer distance (xyz1 to xyz2) with
shape (B, N).
- dist2 (Tensor): Chamfer distance (xyz2 to xyz1) with
shape (B, N).
- idx1 (Tensor): Index of chamfer distance (xyz1 to xyz2)
with shape (B, N), which be used in compute gradient.
- idx2 (Tensor): Index of chamfer distance (xyz2 to xyz2)
with shape (B, N), which be used in compute gradient.
"""
batch_size, n, _ = xyz1.size()
_, m, _ = xyz2.size()
device = xyz1.device
xyz1 = xyz1.contiguous()
xyz2 = xyz2.contiguous()
dist1 = torch.zeros(batch_size, n).to(device)
dist2 = torch.zeros(batch_size, m).to(device)
idx1 = torch.zeros(batch_size, n).type(torch.IntTensor).to(device)
idx2 = torch.zeros(batch_size, m).type(torch.IntTensor).to(device)
ext_module.chamfer_distance_forward(xyz1, xyz2, dist1, dist2, idx1,
idx2)
ctx.save_for_backward(xyz1, xyz2, idx1, idx2)
return dist1, dist2, idx1, idx2
@staticmethod
@once_differentiable
def backward(ctx, grad_dist1: Tensor, grad_dist2: Tensor,
grad_idx1: Tensor,
grad_idx2: Tensor) -> Tuple[Tensor, Tensor]:
"""
Args:
grad_dist1 (Tensor): Gradient of chamfer distance
(xyz1 to xyz2) with shape (B, N).
grad_dist2 (Tensor): Gradient of chamfer distance
(xyz2 to xyz1) with shape (B, N).
grad_idx1 (Tensor): Index of chamfer distance (xyz1 to xyz2)
with shape (B, N), which be used in compute gradient.
grad_idx2 (Tensor): Index of chamfer distance (xyz2 to xyz2)
with shape (B, N), which be used in compute gradient.
Returns:
Tuple[Tensor, Tensor]:
- grad_xyz1 (Tensor): Gradient of the point set with shape \
(B, N, 2).
- grad_xyz2 (Tensor):Gradient of the point set with shape \
(B, N, 2).
"""
xyz1, xyz2, idx1, idx2 = ctx.saved_tensors
device = grad_dist1.device
grad_dist1 = grad_dist1.contiguous()
grad_dist2 = grad_dist2.contiguous()
grad_xyz1 = torch.zeros(xyz1.size()).to(device)
grad_xyz2 = torch.zeros(xyz2.size()).to(device)
ext_module.chamfer_distance_backward(xyz1, xyz2, grad_xyz1, grad_xyz2,
grad_dist1, grad_dist2, idx1,
idx2)
return grad_xyz1, grad_xyz2
chamfer_distance = ChamferDistanceFunction.apply
# Copyright (c) OpenMMLab. All rights reserved. # Copyright (c) OpenMMLab. All rights reserved.
from typing import Union
import numpy as np import numpy as np
import torch import torch
...@@ -7,21 +9,22 @@ from ..utils import ext_loader ...@@ -7,21 +9,22 @@ from ..utils import ext_loader
ext_module = ext_loader.load_ext('_ext', ['contour_expand']) ext_module = ext_loader.load_ext('_ext', ['contour_expand'])
def contour_expand(kernel_mask, internal_kernel_label, min_kernel_area, def contour_expand(kernel_mask: Union[np.array, torch.Tensor],
kernel_num): internal_kernel_label: Union[np.array, torch.Tensor],
min_kernel_area: int, kernel_num: int) -> list:
"""Expand kernel contours so that foreground pixels are assigned into """Expand kernel contours so that foreground pixels are assigned into
instances. instances.
Arguments: Args:
kernel_mask (np.array or Tensor): The instance kernel mask with kernel_mask (np.array or torch.Tensor): The instance kernel mask with
size hxw. size hxw.
internal_kernel_label (np.array or Tensor): The instance internal internal_kernel_label (np.array or torch.Tensor): The instance internal
kernel label with size hxw. kernel label with size hxw.
min_kernel_area (int): The minimum kernel area. min_kernel_area (int): The minimum kernel area.
kernel_num (int): The instance kernel number. kernel_num (int): The instance kernel number.
Returns: Returns:
label (list): The instance index map with size hxw. list: The instance index map with size hxw.
""" """
assert isinstance(kernel_mask, (torch.Tensor, np.ndarray)) assert isinstance(kernel_mask, (torch.Tensor, np.ndarray))
assert isinstance(internal_kernel_label, (torch.Tensor, np.ndarray)) assert isinstance(internal_kernel_label, (torch.Tensor, np.ndarray))
...@@ -42,7 +45,7 @@ def contour_expand(kernel_mask, internal_kernel_label, min_kernel_area, ...@@ -42,7 +45,7 @@ def contour_expand(kernel_mask, internal_kernel_label, min_kernel_area,
internal_kernel_label, internal_kernel_label,
min_kernel_area=min_kernel_area, min_kernel_area=min_kernel_area,
kernel_num=kernel_num) kernel_num=kernel_num)
label = label.tolist() label = label.tolist() # type: ignore
else: else:
label = ext_module.contour_expand(kernel_mask, internal_kernel_label, label = ext_module.contour_expand(kernel_mask, internal_kernel_label,
min_kernel_area, kernel_num) min_kernel_area, kernel_num)
......
# Copyright (c) OpenMMLab. All rights reserved.
from typing import Tuple
import torch
from ..utils import ext_loader
ext_module = ext_loader.load_ext('_ext', ['convex_iou', 'convex_giou'])
def convex_giou(pointsets: torch.Tensor,
polygons: torch.Tensor) -> Tuple[torch.Tensor, torch.Tensor]:
"""Return generalized intersection-over-union (Jaccard index) between point
sets and polygons.
Args:
pointsets (torch.Tensor): It has shape (N, 18),
indicating (x1, y1, x2, y2, ..., x9, y9) for each row.
polygons (torch.Tensor): It has shape (N, 8),
indicating (x1, y1, x2, y2, x3, y3, x4, y4) for each row.
Returns:
tuple[torch.Tensor, torch.Tensor]: The first element is the gious
between point sets and polygons with the shape (N,). The second
element is the gradient of point sets with the shape (N, 18).
"""
output = pointsets.new_zeros((pointsets.size(0), 19))
ext_module.convex_giou(pointsets, polygons, output)
convex_giou = output[:, -1]
points_grad = output[:, 0:-1]
return convex_giou, points_grad
def convex_iou(pointsets: torch.Tensor,
polygons: torch.Tensor) -> torch.Tensor:
"""Return intersection-over-union (Jaccard index) between point sets and
polygons.
Args:
pointsets (torch.Tensor): It has shape (N, 18),
indicating (x1, y1, x2, y2, ..., x9, y9) for each row.
polygons (torch.Tensor): It has shape (K, 8),
indicating (x1, y1, x2, y2, x3, y3, x4, y4) for each row.
Returns:
torch.Tensor: Return the ious between point sets and polygons with the
shape (N, K).
"""
N, K = pointsets.size(0), polygons.size(0)
ious = pointsets.new_zeros((N, K))
ext_module.convex_iou(pointsets, polygons, ious)
return ious
# Copyright (c) OpenMMLab. All rights reserved. # Copyright (c) OpenMMLab. All rights reserved.
import torch import torch
from torch import nn from torch import Tensor, nn
from torch.autograd import Function from torch.autograd import Function
from ..utils import ext_loader _mode_dict = {'top': 0, 'bottom': 1, 'left': 2, 'right': 3}
ext_module = ext_loader.load_ext('_ext', [
'top_pool_forward', 'top_pool_backward', 'bottom_pool_forward',
'bottom_pool_backward', 'left_pool_forward', 'left_pool_backward',
'right_pool_forward', 'right_pool_backward'
])
_mode_dict = {'top': 0, 'bottom': 1, 'left': 2, 'right': 3} def _corner_pool(x: Tensor, dim: int, flip: bool) -> Tensor:
size = x.size(dim)
output = x.clone()
ind = 1
while ind < size:
if flip:
cur_start = 0
cur_len = size - ind
next_start = ind
next_len = size - ind
else:
cur_start = ind
cur_len = size - ind
next_start = 0
next_len = size - ind
# max_temp should be cloned for backward computation
max_temp = output.narrow(dim, cur_start, cur_len).clone()
cur_temp = output.narrow(dim, cur_start, cur_len)
next_temp = output.narrow(dim, next_start, next_len)
cur_temp[...] = torch.where(max_temp > next_temp, max_temp, next_temp)
ind = ind << 1
return output
class TopPoolFunction(Function): class TopPoolFunction(Function):
@staticmethod @staticmethod
def symbolic(g, input): def symbolic(g, input: Tensor) -> Tensor:
output = g.op( output = g.op(
'mmcv::MMCVCornerPool', input, mode_i=int(_mode_dict['top'])) 'mmcv::MMCVCornerPool', input, mode_i=int(_mode_dict['top']))
return output return output
@staticmethod @staticmethod
def forward(ctx, input): def forward(ctx, input: Tensor) -> Tensor:
output = ext_module.top_pool_forward(input) return _corner_pool(input, 2, True)
ctx.save_for_backward(input)
return output
@staticmethod
def backward(ctx, grad_output):
input, = ctx.saved_tensors
output = ext_module.top_pool_backward(input, grad_output)
return output
class BottomPoolFunction(Function): class BottomPoolFunction(Function):
@staticmethod @staticmethod
def symbolic(g, input): def symbolic(g, input: Tensor) -> Tensor:
output = g.op( output = g.op(
'mmcv::MMCVCornerPool', input, mode_i=int(_mode_dict['bottom'])) 'mmcv::MMCVCornerPool', input, mode_i=int(_mode_dict['bottom']))
return output return output
@staticmethod @staticmethod
def forward(ctx, input): def forward(ctx, input: Tensor) -> Tensor:
output = ext_module.bottom_pool_forward(input) return _corner_pool(input, 2, False)
ctx.save_for_backward(input)
return output
@staticmethod
def backward(ctx, grad_output):
input, = ctx.saved_tensors
output = ext_module.bottom_pool_backward(input, grad_output)
return output
class LeftPoolFunction(Function): class LeftPoolFunction(Function):
@staticmethod @staticmethod
def symbolic(g, input): def symbolic(g, input: Tensor) -> Tensor:
output = g.op( output = g.op(
'mmcv::MMCVCornerPool', input, mode_i=int(_mode_dict['left'])) 'mmcv::MMCVCornerPool', input, mode_i=int(_mode_dict['left']))
return output return output
@staticmethod @staticmethod
def forward(ctx, input): def forward(ctx, input: Tensor) -> Tensor:
output = ext_module.left_pool_forward(input) return _corner_pool(input, 3, True)
ctx.save_for_backward(input)
return output
@staticmethod
def backward(ctx, grad_output):
input, = ctx.saved_tensors
output = ext_module.left_pool_backward(input, grad_output)
return output
class RightPoolFunction(Function): class RightPoolFunction(Function):
@staticmethod @staticmethod
def symbolic(g, input): def symbolic(g, input: Tensor) -> Tensor:
output = g.op( output = g.op(
'mmcv::MMCVCornerPool', input, mode_i=int(_mode_dict['right'])) 'mmcv::MMCVCornerPool', input, mode_i=int(_mode_dict['right']))
return output return output
@staticmethod @staticmethod
def forward(ctx, input): def forward(ctx, input: Tensor) -> Tensor:
output = ext_module.right_pool_forward(input) return _corner_pool(input, 3, False)
ctx.save_for_backward(input)
return output
@staticmethod
def backward(ctx, grad_output):
input, = ctx.saved_tensors
output = ext_module.right_pool_backward(input, grad_output)
return output
class CornerPool(nn.Module): class CornerPool(nn.Module):
...@@ -104,11 +93,13 @@ class CornerPool(nn.Module): ...@@ -104,11 +93,13 @@ class CornerPool(nn.Module):
Corner Pooling is a new type of pooling layer that helps a Corner Pooling is a new type of pooling layer that helps a
convolutional network better localize corners of bounding boxes. convolutional network better localize corners of bounding boxes.
Please refer to https://arxiv.org/abs/1808.01244 for more details. Please refer to `CornerNet: Detecting Objects as Paired Keypoints
<https://arxiv.org/abs/1808.01244>`_ for more details.
Code is modified from https://github.com/princeton-vl/CornerNet-Lite. Code is modified from https://github.com/princeton-vl/CornerNet-Lite.
Args: Args:
mode(str): Pooling orientation for the pooling layer mode (str): Pooling orientation for the pooling layer
- 'bottom': Bottom Pooling - 'bottom': Bottom Pooling
- 'left': Left Pooling - 'left': Left Pooling
...@@ -133,13 +124,13 @@ class CornerPool(nn.Module): ...@@ -133,13 +124,13 @@ class CornerPool(nn.Module):
'top': (2, True), 'top': (2, True),
} }
def __init__(self, mode): def __init__(self, mode: str):
super(CornerPool, self).__init__() super().__init__()
assert mode in self.pool_functions assert mode in self.pool_functions
self.mode = mode self.mode = mode
self.corner_pool = self.pool_functions[mode] self.corner_pool: Function = self.pool_functions[mode]
def forward(self, x): def forward(self, x: Tensor) -> Tensor:
if torch.__version__ != 'parrots' and torch.__version__ >= '1.5.0': if torch.__version__ != 'parrots' and torch.__version__ >= '1.5.0':
if torch.onnx.is_in_onnx_export(): if torch.onnx.is_in_onnx_export():
assert torch.__version__ >= '1.7.0', \ assert torch.__version__ >= '1.7.0', \
...@@ -158,4 +149,8 @@ class CornerPool(nn.Module): ...@@ -158,4 +149,8 @@ class CornerPool(nn.Module):
pool_tensor = pool_tensor.flip(dim) pool_tensor = pool_tensor.flip(dim)
return pool_tensor return pool_tensor
else: else:
return self.corner_pool.apply(x) if torch.onnx.is_in_onnx_export():
return self.corner_pool.apply(x)
else:
dim, flip = self.cummax_dim_flip[self.mode]
return _corner_pool(x, dim, flip)
# Copyright (c) OpenMMLab. All rights reserved. # Copyright (c) OpenMMLab. All rights reserved.
from typing import Tuple
import torch import torch
from torch import Tensor, nn from torch import Tensor, nn
from torch.autograd import Function from torch.autograd import Function
...@@ -15,14 +17,14 @@ class CorrelationFunction(Function): ...@@ -15,14 +17,14 @@ class CorrelationFunction(Function):
@staticmethod @staticmethod
def forward(ctx, def forward(ctx,
input1, input1: Tensor,
input2, input2: Tensor,
kernel_size=1, kernel_size: int = 1,
max_displacement=1, max_displacement: int = 1,
stride=1, stride: int = 1,
padding=1, padding: int = 1,
dilation=1, dilation: int = 1,
dilation_patch=1): dilation_patch: int = 1) -> Tensor:
ctx.save_for_backward(input1, input2) ctx.save_for_backward(input1, input2)
...@@ -60,7 +62,9 @@ class CorrelationFunction(Function): ...@@ -60,7 +62,9 @@ class CorrelationFunction(Function):
@staticmethod @staticmethod
@once_differentiable @once_differentiable
def backward(ctx, grad_output): def backward(
ctx, grad_output: Tensor
) -> Tuple[Tensor, Tensor, None, None, None, None, None, None]:
input1, input2 = ctx.saved_tensors input1, input2 = ctx.saved_tensors
kH, kW = ctx.kernel_size kH, kW = ctx.kernel_size
......
...@@ -13,11 +13,19 @@ This folder contains all non-python code for MMCV custom ops. Please follow the ...@@ -13,11 +13,19 @@ This folder contains all non-python code for MMCV custom ops. Please follow the
│ ├── pytorch_cpp_helper.hpp │ ├── pytorch_cpp_helper.hpp
│ ├── pytorch_cuda_helper.hpp │ ├── pytorch_cuda_helper.hpp
│ ├── pytorch_device_registry.hpp │ ├── pytorch_device_registry.hpp
│   └── cuda │   ├── cuda
│   ├── common_cuda_helper.hpp │   │ ├── common_cuda_helper.hpp
│   ├── parrots_cudawarpfunction.cuh │   │ ├── parrots_cudawarpfunction.cuh
│   ├── ... │   │ ├── ...
│   └── ops_cuda_kernel.cuh │   │ └── ops_cuda_kernel.cuh
|   ├── mps
│   │ ├── MPSLibrary.h
│   │ ├── ...
│   │ └── MPSUtils.h
|   ├── mlu
│   │ └── ...
|   └── utils
│   │ └── ...
├── onnxruntime ├── onnxruntime
│   ├── onnxruntime_register.h │   ├── onnxruntime_register.h
│   ├── onnxruntime_session_options_config_keys.h │   ├── onnxruntime_session_options_config_keys.h
...@@ -41,9 +49,15 @@ This folder contains all non-python code for MMCV custom ops. Please follow the ...@@ -41,9 +49,15 @@ This folder contains all non-python code for MMCV custom ops. Please follow the
│   ├── cuda │   ├── cuda
│   │   ├── ... │   │   ├── ...
│   │   └── ops_cuda.cu │   │   └── ops_cuda.cu
│   └── cpu │   ├── cpu
│   │   ├── ...
│   │   └── ops.cpp
│   ├── mps
│   │   ├── ...
│   |   └── op_mps.mm
│   └── mlu
│      ├── ... │      ├── ...
│      └── ops.cpp │      └── op_mlu.cpp
└── tensorrt └── tensorrt
├── trt_cuda_helper.cuh ├── trt_cuda_helper.cuh
├── trt_plugin_helper.hpp ├── trt_plugin_helper.hpp
...@@ -63,108 +77,113 @@ This folder contains all non-python code for MMCV custom ops. Please follow the ...@@ -63,108 +77,113 @@ This folder contains all non-python code for MMCV custom ops. Please follow the
- `common`: This directory contains all tools and shared codes. - `common`: This directory contains all tools and shared codes.
- `cuda`: The cuda kernels which can be shared by all backends. **HIP** kernel is also here since they have similar syntax. - `cuda`: The cuda kernels which can be shared by all backends. **HIP** kernel is also here since they have similar syntax.
- `onnxruntime`: **ONNX Runtime** support for custom ops. - `mps`: The tools used to support MPS ops. **NOTE** that MPS support is **experimental**.
- `mlu`: The MLU kernels used to support [Cambricon](https://www.cambricon.com/) device.
- `utils`: The kernels and utils of spconv.
- `onnxruntime`: **ONNX Runtime** support for custom ops. Has been deprecated, please try the latest custom ops in [MMDeploy](https://github.com/open-mmlab/mmdeploy).
- `cpu`: CPU implementation of supported ops. - `cpu`: CPU implementation of supported ops.
- `parrots`: **Parrots** is a deep learning frame for model training and inference. Parrots custom ops are placed in this directory. - `parrots`: **Parrots** is a deep learning frame for model training and inference. Parrots custom ops are placed in this directory.
- `pytorch`: **PyTorch** custom ops are supported by binding C++ to Python with **pybind11**. The ops implementation and binding codes are placed in this directory. - `pytorch`: **PyTorch** custom ops are supported by binding C++ to Python with **pybind11**. The ops implementation and binding codes are placed in this directory.
- `cuda`: This directory contains cuda kernel launchers, which feed memory pointers of tensor to the cuda kernel in `common/cuda`. The launchers provide c++ interface of cuda implementation of corresponding custom ops. - `cuda`: This directory contains cuda kernel launchers, which feed memory pointers of tensor to the cuda kernel in `common/cuda`. The launchers provide c++ interface of cuda implementation of corresponding custom ops.
- `cpu`: This directory contain cpu implementations of corresponding custom ops. - `cpu`: This directory contain cpu implementations of corresponding custom ops.
- `tensorrt`: **TensorRT** support for custom ops. - `mlu`: This directory contain launchers of each MLU kernels.
- `mps`: MPS ops implementation and launchers.
- `tensorrt`: **TensorRT** support for custom ops. Has been deprecated, please try the latest custom ops in [MMDeploy](https://github.com/open-mmlab/mmdeploy).
- `plugins`: This directory contains the implementation of the supported custom ops. Some ops might also use shared cuda kernel in `common/cuda`. - `plugins`: This directory contains the implementation of the supported custom ops. Some ops might also use shared cuda kernel in `common/cuda`.
## How to add new PyTorch ops? ## How to add new PyTorch ops?
1. (Optional) Add shared kernel in `common` to support special hardware platform. 1. (Optional) Add shared kernel in `common` to support special hardware platform.
```c++ ```c++
// src/common/cuda/new_ops_cuda_kernel.cuh // src/common/cuda/new_ops_cuda_kernel.cuh
template <typename T> template <typename T>
__global__ void new_ops_forward_cuda_kernel(const T* input, T* output, ...) { __global__ void new_ops_forward_cuda_kernel(const T* input, T* output, ...) {
// forward here // forward here
} }
``` ```
Add cuda kernel launcher in `pytorch/cuda`. Add cuda kernel launcher in `pytorch/cuda`.
```c++ ```c++
// src/pytorch/cuda // src/pytorch/cuda
#include <new_ops_cuda_kernel.cuh> #include <new_ops_cuda_kernel.cuh>
void NewOpsForwardCUDAKernelLauncher(Tensor input, Tensor output, ...){ void NewOpsForwardCUDAKernelLauncher(Tensor input, Tensor output, ...){
// initialize // initialize
at::cuda::CUDAGuard device_guard(input.device()); at::cuda::CUDAGuard device_guard(input.device());
cudaStream_t stream = at::cuda::getCurrentCUDAStream(); cudaStream_t stream = at::cuda::getCurrentCUDAStream();
... ...
AT_DISPATCH_FLOATING_TYPES_AND_HALF( AT_DISPATCH_FLOATING_TYPES_AND_HALF(
input.scalar_type(), "new_ops_forward_cuda_kernel", ([&] { input.scalar_type(), "new_ops_forward_cuda_kernel", ([&] {
new_ops_forward_cuda_kernel<scalar_t> new_ops_forward_cuda_kernel<scalar_t>
<<<GET_BLOCKS(output_size), THREADS_PER_BLOCK, 0, stream>>>( <<<GET_BLOCKS(output_size), THREADS_PER_BLOCK, 0, stream>>>(
input.data_ptr<scalar_t>(), output.data_ptr<scalar_t>(),...); input.data_ptr<scalar_t>(), output.data_ptr<scalar_t>(),...);
})); }));
AT_CUDA_CHECK(cudaGetLastError()); AT_CUDA_CHECK(cudaGetLastError());
} }
``` ```
2. Register implementation for different devices. 2. Register implementation for different devices.
```c++ ```c++
// src/pytorch/cuda/cudabind.cpp // src/pytorch/cuda/cudabind.cpp
... ...
Tensor new_ops_forward_cuda(Tensor input, Tensor output, ...){ Tensor new_ops_forward_cuda(Tensor input, Tensor output, ...){
// implement cuda forward here // implement cuda forward here
// use `NewOpsForwardCUDAKernelLauncher` here // use `NewOpsForwardCUDAKernelLauncher` here
} }
// declare interface here. // declare interface here.
Tensor new_ops_forward_impl(Tensor input, Tensor output, ...); Tensor new_ops_forward_impl(Tensor input, Tensor output, ...);
// register the implementation for given device (CUDA here). // register the implementation for given device (CUDA here).
REGISTER_DEVICE_IMPL(new_ops_forward_impl, CUDA, new_ops_forward_cuda); REGISTER_DEVICE_IMPL(new_ops_forward_impl, CUDA, new_ops_forward_cuda);
``` ```
3. Add ops implementation in `pytorch` directory. Select different implementations according to device type. 3. Add ops implementation in `pytorch` directory. Select different implementations according to device type.
```c++ ```c++
// src/pytorch/new_ops.cpp // src/pytorch/new_ops.cpp
Tensor new_ops_forward_impl(Tensor input, Tensor output, ...){ Tensor new_ops_forward_impl(Tensor input, Tensor output, ...){
// dispatch the implementation according to the device type of input. // dispatch the implementation according to the device type of input.
DISPATCH_DEVICE_IMPL(new_ops_forward_impl, input, output, ...); DISPATCH_DEVICE_IMPL(new_ops_forward_impl, input, output, ...);
} }
... ...
Tensor new_ops_forward(Tensor input, Tensor output, ...){ Tensor new_ops_forward(Tensor input, Tensor output, ...){
return new_ops_forward_impl(input, output, ...); return new_ops_forward_impl(input, output, ...);
} }
``` ```
4. Binding the implementation in `pytorch/pybind.cpp` 4. Binding the implementation in `pytorch/pybind.cpp`
```c++ ```c++
// src/pytorch/pybind.cpp // src/pytorch/pybind.cpp
... ...
Tensor new_ops_forward(Tensor input, Tensor output, ...); Tensor new_ops_forward(Tensor input, Tensor output, ...);
... ...
// bind with pybind11 // bind with pybind11
m.def("new_ops_forward", &new_ops_forward, "new_ops_forward", m.def("new_ops_forward", &new_ops_forward, "new_ops_forward",
py::arg("input"), py::arg("output"), ...); py::arg("input"), py::arg("output"), ...);
... ...
``` ```
5. Build MMCV again. Enjoy new ops in python 5. Build MMCV again. Enjoy new ops in python
```python ```python
from ..utils import ext_loader from ..utils import ext_loader
ext_module = ext_loader.load_ext('_ext', ['new_ops_forward']) ext_module = ext_loader.load_ext('_ext', ['new_ops_forward'])
... ...
ext_module.new_ops_forward(input, output, ...) ext_module.new_ops_forward(input, output, ...)
``` ```
...@@ -220,6 +220,10 @@ HOST_DEVICE_INLINE int convex_hull_graham(const Point<T> (&p)[24], ...@@ -220,6 +220,10 @@ HOST_DEVICE_INLINE int convex_hull_graham(const Point<T> (&p)[24],
return temp > 0; return temp > 0;
} }
}); });
// compute distance to origin after sort, since the points are now different.
for (int i = 0; i < num_in; i++) {
dist[i] = dot_2d<T>(q[i], q[i]);
}
#endif #endif
// Step 4: // Step 4:
......
// Copyright (c) OpenMMLab. All rights reserved.
// Modified from
// https://github.com/csuhan/s2anet/blob/master/mmdet/ops/orn/src/cuda/ActiveRotatingFilter_cuda.cu
#ifndef ACTIVE_ROTATED_FILTER_CUDA_KERNEL_CUH
#define ACTIVE_ROTATED_FILTER_CUDA_KERNEL_CUH
#ifdef MMCV_USE_PARROTS
#include "parrots_cuda_helper.hpp"
#else
#include "pytorch_cuda_helper.hpp"
#endif
template <typename scalar_t>
__global__ void active_rotated_filter_forward_cuda_kernel(
const int nthreads, const scalar_t* weight_data, const int* indices_data,
const int num_input_planes, const int num_output_planes,
const int num_orientations, const int num_rotations, const int nEntry,
scalar_t* output_data) {
CUDA_1D_KERNEL_LOOP(index, nthreads) {
int l = index % nEntry;
int j = (index / nEntry) % num_input_planes;
int i = index / nEntry / num_input_planes;
int k;
scalar_t val = *(weight_data + index);
for (k = 0; k < num_rotations; k++) {
int idx = (int)(*(indices_data + l * num_rotations + k)) - 1;
scalar_t* target = output_data +
i * (num_rotations * num_input_planes * nEntry) +
k * (num_input_planes * nEntry) + j * (nEntry) + idx;
*target = val;
}
}
}
template <typename scalar_t>
__global__ void active_rotated_filter_backward_cuda_kernel(
const int nthreads, const scalar_t* gradWeight_data,
const int* indices_data, const int num_input_planes,
const int num_output_planes, const int num_orientations,
const int num_rotations, const int nEntry, scalar_t* weight_data) {
CUDA_1D_KERNEL_LOOP(index, nthreads) {
int l = index % nEntry;
int j = (index / nEntry) % num_input_planes;
int i = index / nEntry / num_input_planes;
int k;
scalar_t* val = weight_data + index;
*val = 0;
scalar_t tmp = 0;
for (k = 0; k < num_rotations; k++) {
int idx = (int)(*(indices_data + l * num_rotations + k)) - 1;
scalar_t target =
*(gradWeight_data + i * (num_rotations * num_input_planes * nEntry) +
k * (num_input_planes * nEntry) + j * (nEntry) + idx);
tmp = tmp + target;
}
*val = tmp;
}
}
#endif // ACTIVE_ROTATED_FILTER_CUDA_KERNEL_CUH
...@@ -22,34 +22,34 @@ __global__ void assign_score_withk_forward_cuda_kernel( ...@@ -22,34 +22,34 @@ __global__ void assign_score_withk_forward_cuda_kernel(
const int O, const int aggregate, const T* points, const T* centers, const int O, const int aggregate, const T* points, const T* centers,
const T* scores, const int64_t* knn_idx, T* output) { const T* scores, const int64_t* knn_idx, T* output) {
// ----- parallel loop for B, N1, K and O --------- // ----- parallel loop for B, N1, K and O ---------
long i = blockIdx.x * blockDim.x + threadIdx.x; CUDA_1D_KERNEL_LOOP(i, B * O * N1 * K) {
if (i >= B * N1 * K * O) return; // ------- loop for M ----------
// ------- loop for M ---------- const int b = (int)(i / (O * N1 * K));
const int b = (int)(i / (O * N1 * K)); const int o = (int)(i % (O * N1 * K) / (N1 * K));
const int o = (int)(i % (O * N1 * K) / (N1 * K)); const int n = (int)(i % (N1 * K) / K);
const int n = (int)(i % (N1 * K) / K); const int k = (int)(i % K);
const int k = (int)(i % K); const int cn = (int)knn_idx[b * K * N1 + n * K +
const int cn = (int)knn_idx[b * K * N1 + n * K + 0]; // The first neighbor is the center point
0]; // The first neighbor is the center point const int kn = (int)knn_idx[b * K * N1 + n * K + k];
const int kn = (int)knn_idx[b * K * N1 + n * K + k]; if (kn >= N0 ||
if (kn >= N0 || kn < 0) { // if index overflows, it is out of the neighborhood range
kn < 0) { // if index overflows, it is out of the neighborhood range return;
return; }
} assert(b < B);
assert(b < B); assert(kn < N0);
assert(kn < N0); assert(cn < N0);
assert(cn < N0); assert(o < O);
assert(o < O); assert(n < N1);
assert(n < N1); const int out_idx = b * N1 * O * K + o * N1 * K + n * K + k;
const int out_idx = b * N1 * O * K + o * N1 * K + n * K + k; T val = output[out_idx];
T val = output[out_idx]; for (int m = 0; m < M; m++) {
for (int m = 0; m < M; m++) { val += points[b * N0 * M * O + kn * M * O + m * O + o] *
val += points[b * N0 * M * O + kn * M * O + m * O + o] * scores[b * N1 * K * M + n * K * M + k * M + m] -
scores[b * N1 * K * M + n * K * M + k * M + m] - centers[b * N0 * M * O + cn * M * O + m * O + o] *
centers[b * N0 * M * O + cn * M * O + m * O + o] * scores[b * N1 * K * M + n * K * M + k * M + m];
scores[b * N1 * K * M + n * K * M + k * M + m]; }
output[out_idx] = val;
} }
output[out_idx] = val;
} }
template <typename T> template <typename T>
...@@ -58,27 +58,27 @@ __global__ void assign_score_withk_points_backward_cuda_kernel( ...@@ -58,27 +58,27 @@ __global__ void assign_score_withk_points_backward_cuda_kernel(
const int O, const int aggregate, const T* grad_out, const T* scores, const int O, const int aggregate, const T* grad_out, const T* scores,
const int64_t* knn_idx, T* grad_points, T* grad_centers) { const int64_t* knn_idx, T* grad_points, T* grad_centers) {
// ----- parallel loop for B, M, O --------- // ----- parallel loop for B, M, O ---------
long i = blockIdx.x * blockDim.x + threadIdx.x; CUDA_1D_KERNEL_LOOP(i, B * M * O) {
if (i >= B * M * O) return; int b = (int)(i / (M * O));
int b = (int)(i / (M * O)); int m = (int)(i % (M * O) / O);
int m = (int)(i % (M * O) / O); int o = (int)(i % O);
int o = (int)(i % O);
// ----- loop for N,K --------- // ----- loop for N,K ---------
for (int n = 0; n < N; n++) { for (int n = 0; n < N; n++) {
for (int k = 0; k < K; k++) { for (int k = 0; k < K; k++) {
int kn = knn_idx[b * N * K + n * K + k]; int kn = knn_idx[b * N * K + n * K + k];
int cn = knn_idx[b * N * K + n * K + 0]; int cn = knn_idx[b * N * K + n * K + 0];
if (kn >= N0 || if (kn >= N0 || kn < 0) { // if index overflows, it is out of the
kn < 0) { // if index overflows, it is out of the neighborhood range // neighborhood range
continue; continue;
}
atomicAdd(grad_points + b * N0 * M * O + kn * M * O + m * O + o,
scores[b * N * K * M + n * K * M + k * M + m] *
grad_out[b * O * N * K + o * N * K + n * K + k]);
atomicAdd(grad_centers + b * N0 * M * O + cn * M * O + m * O + o,
-scores[b * N * K * M + n * K * M + k * M + m] *
grad_out[b * O * N * K + o * N * K + n * K + k]);
} }
atomicAdd(grad_points + b * N0 * M * O + kn * M * O + m * O + o,
scores[b * N * K * M + n * K * M + k * M + m] *
grad_out[b * O * N * K + o * N * K + n * K + k]);
atomicAdd(grad_centers + b * N0 * M * O + cn * M * O + m * O + o,
-scores[b * N * K * M + n * K * M + k * M + m] *
grad_out[b * O * N * K + o * N * K + n * K + k]);
} }
} }
} }
...@@ -89,28 +89,28 @@ __global__ void assign_score_withk_scores_backward_cuda_kernel( ...@@ -89,28 +89,28 @@ __global__ void assign_score_withk_scores_backward_cuda_kernel(
const int O, const int aggregate, const T* grad_out, const T* points, const int O, const int aggregate, const T* grad_out, const T* points,
const T* centers, const int64_t* knn_idx, T* grad_scores) { const T* centers, const int64_t* knn_idx, T* grad_scores) {
// ----- parallel loop for B, N, K, M --------- // ----- parallel loop for B, N, K, M ---------
long i = blockIdx.x * blockDim.x + threadIdx.x; CUDA_1D_KERNEL_LOOP(i, B * N * K * M) {
if (i >= B * N * K * M) return; const int b = (int)(i / (N * M * K));
const int b = (int)(i / (N * M * K)); const int n = (int)(i % (N * M * K) / M / K);
const int n = (int)(i % (N * M * K) / M / K); const int k = (int)(i % (M * K) / M);
const int k = (int)(i % (M * K) / M); const int m = (int)(i % M);
const int m = (int)(i % M); const int cn = knn_idx[b * N * K + n * K + 0];
const int cn = knn_idx[b * N * K + n * K + 0]; const int kn = knn_idx[b * N * K + n * K + k];
const int kn = knn_idx[b * N * K + n * K + k]; if (kn >= N0 ||
if (kn >= N0 || kn < 0) { // if index overflows, it is out of the neighborhood range
kn < 0) { // if index overflows, it is out of the neighborhood range return;
return; }
}
// -------------- loop for O ------------------------ // -------------- loop for O ------------------------
const int out_idx = b * N * K * M + n * K * M + k * M + m; const int out_idx = b * N * K * M + n * K * M + k * M + m;
T val = grad_scores[out_idx]; T val = grad_scores[out_idx];
for (int o = 0; o < O; o++) { for (int o = 0; o < O; o++) {
val += (points[b * N0 * M * O + kn * M * O + m * O + o] - val += (points[b * N0 * M * O + kn * M * O + m * O + o] -
centers[b * N0 * M * O + cn * M * O + m * O + o]) * centers[b * N0 * M * O + cn * M * O + m * O + o]) *
grad_out[b * O * N * K + o * N * K + n * K + k]; grad_out[b * O * N * K + o * N * K + n * K + k];
}
grad_scores[out_idx] = val;
} }
grad_scores[out_idx] = val;
} }
#endif // ASSIGN_SCORE_WITHK_CUDA_KERNEL_CUH #endif // ASSIGN_SCORE_WITHK_CUDA_KERNEL_CUH
...@@ -21,35 +21,36 @@ __global__ void ball_query_forward_cuda_kernel(int b, int n, int m, ...@@ -21,35 +21,36 @@ __global__ void ball_query_forward_cuda_kernel(int b, int n, int m,
// output: // output:
// idx: (B, M, nsample) // idx: (B, M, nsample)
int bs_idx = blockIdx.y; int bs_idx = blockIdx.y;
int pt_idx = blockIdx.x * blockDim.x + threadIdx.x; CUDA_1D_KERNEL_LOOP(pt_idx, m) {
if (bs_idx >= b || pt_idx >= m) return; if (bs_idx >= b) return;
new_xyz += bs_idx * m * 3 + pt_idx * 3; new_xyz += bs_idx * m * 3 + pt_idx * 3;
xyz += bs_idx * n * 3; xyz += bs_idx * n * 3;
idx += bs_idx * m * nsample + pt_idx * nsample; idx += bs_idx * m * nsample + pt_idx * nsample;
float max_radius2 = max_radius * max_radius; float max_radius2 = max_radius * max_radius;
float min_radius2 = min_radius * min_radius; float min_radius2 = min_radius * min_radius;
T new_x = new_xyz[0]; T new_x = new_xyz[0];
T new_y = new_xyz[1]; T new_y = new_xyz[1];
T new_z = new_xyz[2]; T new_z = new_xyz[2];
int cnt = 0; int cnt = 0;
for (int k = 0; k < n; ++k) { for (int k = 0; k < n; ++k) {
T x = xyz[k * 3 + 0]; T x = xyz[k * 3 + 0];
T y = xyz[k * 3 + 1]; T y = xyz[k * 3 + 1];
T z = xyz[k * 3 + 2]; T z = xyz[k * 3 + 2];
T d2 = (new_x - x) * (new_x - x) + (new_y - y) * (new_y - y) + T d2 = (new_x - x) * (new_x - x) + (new_y - y) * (new_y - y) +
(new_z - z) * (new_z - z); (new_z - z) * (new_z - z);
if (d2 == 0 || (d2 >= min_radius2 && d2 < max_radius2)) { if (d2 == 0 || (d2 >= min_radius2 && d2 < max_radius2)) {
if (cnt == 0) { if (cnt == 0) {
for (int l = 0; l < nsample; ++l) { for (int l = 0; l < nsample; ++l) {
idx[l] = k; idx[l] = k;
}
} }
idx[cnt] = k;
++cnt;
if (cnt >= nsample) break;
} }
idx[cnt] = k;
++cnt;
if (cnt >= nsample) break;
} }
} }
} }
......
...@@ -8,6 +8,27 @@ ...@@ -8,6 +8,27 @@
#include "pytorch_cuda_helper.hpp" #include "pytorch_cuda_helper.hpp"
#endif #endif
template <typename T>
__device__ __forceinline__ void load_bbox(const T* bbox, const int base, T& x1,
T& y1, T& x2, T& y2) {
x1 = bbox[base];
y1 = bbox[base + 1];
x2 = bbox[base + 2];
y2 = bbox[base + 3];
}
template <>
__device__ __forceinline__ void load_bbox<float>(const float* bbox,
const int base, float& x1,
float& y1, float& x2,
float& y2) {
const float4 bbox_offset = reinterpret_cast<const float4*>(bbox + base)[0];
x1 = bbox_offset.x;
y1 = bbox_offset.y;
x2 = bbox_offset.z;
y2 = bbox_offset.w;
}
template <typename T> template <typename T>
__global__ void bbox_overlaps_cuda_kernel(const T* bbox1, const T* bbox2, __global__ void bbox_overlaps_cuda_kernel(const T* bbox1, const T* bbox2,
T* ious, const int num_bbox1, T* ious, const int num_bbox1,
...@@ -16,69 +37,111 @@ __global__ void bbox_overlaps_cuda_kernel(const T* bbox1, const T* bbox2, ...@@ -16,69 +37,111 @@ __global__ void bbox_overlaps_cuda_kernel(const T* bbox1, const T* bbox2,
const int offset) { const int offset) {
if (aligned) { if (aligned) {
CUDA_1D_KERNEL_LOOP(index, num_bbox1) { CUDA_1D_KERNEL_LOOP(index, num_bbox1) {
int b1 = index; const int b1 = index;
int b2 = index; const int b2 = index;
int base1 = b1 * 4; const int base1 = b1 << 2; // b1 * 4
T b1_x1 = bbox1[base1]; T b1_x1, b1_y1, b1_x2, b1_y2;
T b1_y1 = bbox1[base1 + 1]; load_bbox<T>(bbox1, base1, b1_x1, b1_y1, b1_x2, b1_y2);
T b1_x2 = bbox1[base1 + 2]; const T b1_area = (b1_x2 - b1_x1 + offset) * (b1_y2 - b1_y1 + offset);
T b1_y2 = bbox1[base1 + 3];
T b1_area = (b1_x2 - b1_x1 + offset) * (b1_y2 - b1_y1 + offset); const int base2 = b2 << 2; // b2 * 4
T b2_x1, b2_y1, b2_x2, b2_y2;
int base2 = b2 * 4; load_bbox<T>(bbox2, base2, b2_x1, b2_y1, b2_x2, b2_y2);
T b2_x1 = bbox2[base2]; const T b2_area = (b2_x2 - b2_x1 + offset) * (b2_y2 - b2_y1 + offset);
T b2_y1 = bbox2[base2 + 1];
T b2_x2 = bbox2[base2 + 2]; const T left = fmaxf(b1_x1, b2_x1), right = fminf(b1_x2, b2_x2);
T b2_y2 = bbox2[base2 + 3]; const T top = fmaxf(b1_y1, b2_y1), bottom = fminf(b1_y2, b2_y2);
T b2_area = (b2_x2 - b2_x1 + offset) * (b2_y2 - b2_y1 + offset); const T width = fmaxf(right - left + offset, 0.f);
const T height = fmaxf(bottom - top + offset, 0.f);
T left = fmaxf(b1_x1, b2_x1), right = fminf(b1_x2, b2_x2); const T interS = width * height;
T top = fmaxf(b1_y1, b2_y1), bottom = fminf(b1_y2, b2_y2);
T width = fmaxf(right - left + offset, 0.f); const T baseS =
T height = fmaxf(bottom - top + offset, 0.f); fmaxf(mode == 0 ? b1_area + b2_area - interS : b1_area, T(offset));
T interS = width * height;
T baseS = 1.0;
if (mode == 0) {
baseS = fmaxf(b1_area + b2_area - interS, T(offset));
} else if (mode == 1) {
baseS = fmaxf(b1_area, T(offset));
}
ious[index] = interS / baseS; ious[index] = interS / baseS;
} }
} else { } else {
CUDA_1D_KERNEL_LOOP(index, num_bbox1 * num_bbox2) { CUDA_1D_KERNEL_LOOP(index, num_bbox1 * num_bbox2) {
int b1 = index / num_bbox2; const int b1 = index / num_bbox2;
int b2 = index % num_bbox2; const int b2 = index % num_bbox2;
int base1 = b1 * 4; const int base1 = b1 << 2; // b1 * 4
T b1_x1 = bbox1[base1]; T b1_x1, b1_y1, b1_x2, b1_y2;
T b1_y1 = bbox1[base1 + 1]; load_bbox<T>(bbox1, base1, b1_x1, b1_y1, b1_x2, b1_y2);
T b1_x2 = bbox1[base1 + 2]; const T b1_area = (b1_x2 - b1_x1 + offset) * (b1_y2 - b1_y1 + offset);
T b1_y2 = bbox1[base1 + 3];
T b1_area = (b1_x2 - b1_x1 + offset) * (b1_y2 - b1_y1 + offset); const int base2 = b2 << 2; // b2 * 4
T b2_x1, b2_y1, b2_x2, b2_y2;
int base2 = b2 * 4; load_bbox<T>(bbox2, base2, b2_x1, b2_y1, b2_x2, b2_y2);
T b2_x1 = bbox2[base2]; const T b2_area = (b2_x2 - b2_x1 + offset) * (b2_y2 - b2_y1 + offset);
T b2_y1 = bbox2[base2 + 1];
T b2_x2 = bbox2[base2 + 2]; const T left = fmaxf(b1_x1, b2_x1), right = fminf(b1_x2, b2_x2);
T b2_y2 = bbox2[base2 + 3]; const T top = fmaxf(b1_y1, b2_y1), bottom = fminf(b1_y2, b2_y2);
T b2_area = (b2_x2 - b2_x1 + offset) * (b2_y2 - b2_y1 + offset); const T width = fmaxf(right - left + offset, 0.f);
const T height = fmaxf(bottom - top + offset, 0.f);
T left = fmaxf(b1_x1, b2_x1), right = fminf(b1_x2, b2_x2); const T interS = width * height;
T top = fmaxf(b1_y1, b2_y1), bottom = fminf(b1_y2, b2_y2);
T width = fmaxf(right - left + offset, 0.f); const T baseS =
T height = fmaxf(bottom - top + offset, 0.f); fmaxf(mode == 0 ? b1_area + b2_area - interS : b1_area, T(offset));
T interS = width * height;
T baseS = 1.0;
if (mode == 0) {
baseS = fmaxf(b1_area + b2_area - interS, T(offset));
} else if (mode == 1) {
baseS = fmaxf(b1_area, T(offset));
}
ious[index] = interS / baseS; ious[index] = interS / baseS;
} }
} }
} }
#if __CUDA_ARCH__ >= 530
__device__ __forceinline__ __half __half_area(const __half x1, const __half y1,
const __half x2, const __half y2,
const __half offset) {
const __half half_w = __hadd(__hsub(x2, x1), offset);
const __half half_h = __hadd(__hsub(y2, y1), offset);
return __hmul(half_w, half_h);
}
__device__ __forceinline__ __half __half_max(const __half a, const __half b) {
return __hge(a, b) ? a : b;
}
__device__ __forceinline__ __half __half_min(const __half a, const __half b) {
return __hle(a, b) ? a : b;
}
// fp16 won't provide much increase when aligned==true. It is useful when
// aligned==false, which would give you ~40% bonus.
__device__ void bbox_overlaps_cuda_kernel_half(
const __half* bbox1, const __half* bbox2, __half* ious, const int num_bbox1,
const int num_bbox2, const int mode, const bool aligned, const int offset) {
const int num_output = aligned ? num_bbox1 : num_bbox1 * num_bbox2;
const __half h_offset = __int2half_rn(offset);
CUDA_1D_KERNEL_LOOP(index, num_output) {
const int b1 = aligned ? index : index / num_bbox2;
const int b2 = aligned ? index : index % num_bbox2;
const int base1 = b1 << 2;
__half b1_x1, b1_y1, b1_x2, b1_y2;
load_bbox<__half>(bbox1, base1, b1_x1, b1_y1, b1_x2, b1_y2);
const __half b1_area = __half_area(b1_x1, b1_y1, b1_x2, b1_y2, h_offset);
const int base2 = b2 << 2;
__half b2_x1, b2_y1, b2_x2, b2_y2;
load_bbox<__half>(bbox2, base2, b2_x1, b2_y1, b2_x2, b2_y2);
const __half b2_area = __half_area(b2_x1, b2_y1, b2_x2, b2_y2, h_offset);
const __half left = __half_max(b1_x1, b2_x1),
right = __half_min(b1_x2, b2_x2);
const __half top = __half_max(b1_y1, b2_y1),
bottom = __half_min(b1_y2, b2_y2);
const __half width =
__half_max(__hadd(__hsub(right, left), h_offset), __float2half(0.f));
const __half height =
__half_max(__hadd(__hsub(bottom, top), h_offset), __float2half(0.f));
const __half interS = __hmul(width, height);
const __half baseS = __half_max(
mode == 0 ? __hsub(__hadd(b1_area, b2_area), interS) : b1_area,
h_offset);
ious[index] = __hdiv(interS, baseS);
}
}
#endif // __CUDA_ARCH__ >= 530
#endif // BBOX_OVERLAPS_CUDA_KERNEL_CUH #endif // BBOX_OVERLAPS_CUDA_KERNEL_CUH
Markdown is supported
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment