Commit bce7d0c3 authored by yinchimaoliang's avatar yinchimaoliang
Browse files

Merge branch 'master_temp' into indoor_pipeline

parents 1756485e 868c5fab
...@@ -93,7 +93,8 @@ class_names = ['Car'] ...@@ -93,7 +93,8 @@ class_names = ['Car']
img_norm_cfg = dict( img_norm_cfg = dict(
mean=[123.675, 116.28, 103.53], std=[58.395, 57.12, 57.375], to_rgb=True) mean=[123.675, 116.28, 103.53], std=[58.395, 57.12, 57.375], to_rgb=True)
input_modality = dict( input_modality = dict(
use_lidar=True, use_lidar=False,
use_lidar_reduced=True,
use_depth=False, use_depth=False,
use_lidar_intensity=True, use_lidar_intensity=True,
use_camera=False, use_camera=False,
......
...@@ -113,7 +113,8 @@ class_names = ['Pedestrian', 'Cyclist', 'Car'] ...@@ -113,7 +113,8 @@ class_names = ['Pedestrian', 'Cyclist', 'Car']
img_norm_cfg = dict( img_norm_cfg = dict(
mean=[123.675, 116.28, 103.53], std=[58.395, 57.12, 57.375], to_rgb=True) mean=[123.675, 116.28, 103.53], std=[58.395, 57.12, 57.375], to_rgb=True)
input_modality = dict( input_modality = dict(
use_lidar=True, use_lidar=False,
use_lidar_reduced=True,
use_depth=False, use_depth=False,
use_lidar_intensity=True, use_lidar_intensity=True,
use_camera=True, use_camera=True,
......
...@@ -91,7 +91,8 @@ class_names = ['Car'] ...@@ -91,7 +91,8 @@ class_names = ['Car']
img_norm_cfg = dict( img_norm_cfg = dict(
mean=[123.675, 116.28, 103.53], std=[58.395, 57.12, 57.375], to_rgb=True) mean=[123.675, 116.28, 103.53], std=[58.395, 57.12, 57.375], to_rgb=True)
input_modality = dict( input_modality = dict(
use_lidar=True, use_lidar=False,
use_lidar_reduced=True,
use_depth=False, use_depth=False,
use_lidar_intensity=True, use_lidar_intensity=True,
use_camera=True, use_camera=True,
......
...@@ -90,7 +90,8 @@ class_names = ['Car'] ...@@ -90,7 +90,8 @@ class_names = ['Car']
img_norm_cfg = dict( img_norm_cfg = dict(
mean=[123.675, 116.28, 103.53], std=[58.395, 57.12, 57.375], to_rgb=True) mean=[123.675, 116.28, 103.53], std=[58.395, 57.12, 57.375], to_rgb=True)
input_modality = dict( input_modality = dict(
use_lidar=True, use_lidar=False,
use_lidar_reduced=True,
use_depth=False, use_depth=False,
use_lidar_intensity=True, use_lidar_intensity=True,
use_camera=False, use_camera=False,
......
...@@ -89,7 +89,8 @@ class_names = ['Car'] ...@@ -89,7 +89,8 @@ class_names = ['Car']
img_norm_cfg = dict( img_norm_cfg = dict(
mean=[123.675, 116.28, 103.53], std=[58.395, 57.12, 57.375], to_rgb=True) mean=[123.675, 116.28, 103.53], std=[58.395, 57.12, 57.375], to_rgb=True)
input_modality = dict( input_modality = dict(
use_lidar=True, use_lidar=False,
use_lidar_reduced=True,
use_depth=False, use_depth=False,
use_lidar_intensity=True, use_lidar_intensity=True,
use_camera=False, use_camera=False,
......
import torch import torch
from mmdet3d.ops.iou3d import boxes_iou3d_gpu from mmdet3d.ops.iou3d import boxes_iou3d_gpu_camera, boxes_iou3d_gpu_lidar
from mmdet.core.bbox import bbox_overlaps from mmdet.core.bbox import bbox_overlaps
from mmdet.core.bbox.iou_calculators.builder import IOU_CALCULATORS from mmdet.core.bbox.iou_calculators.builder import IOU_CALCULATORS
from .. import box_torch_ops from .. import box_torch_ops
...@@ -22,10 +22,18 @@ class BboxOverlapsNearest3D(object): ...@@ -22,10 +22,18 @@ class BboxOverlapsNearest3D(object):
@IOU_CALCULATORS.register_module() @IOU_CALCULATORS.register_module()
class BboxOverlaps3D(object): class BboxOverlaps3D(object):
"""3D IoU Calculator""" """3D IoU Calculator
def __call__(self, bboxes1, bboxes2, mode='iou', is_aligned=False): Args:
return bbox_overlaps_3d(bboxes1, bboxes2, mode, is_aligned) coordinate (str): 'camera' or 'lidar' coordinate system
"""
def __init__(self, coordinate):
assert coordinate in ['camera', 'lidar']
self.coordinate = coordinate
def __call__(self, bboxes1, bboxes2, mode='iou'):
return bbox_overlaps_3d(bboxes1, bboxes2, mode, self.coordinate)
def __repr__(self): def __repr__(self):
repr_str = self.__class__.__name__ repr_str = self.__class__.__name__
...@@ -62,7 +70,7 @@ def bbox_overlaps_nearest_3d(bboxes1, bboxes2, mode='iou', is_aligned=False): ...@@ -62,7 +70,7 @@ def bbox_overlaps_nearest_3d(bboxes1, bboxes2, mode='iou', is_aligned=False):
return ret return ret
def bbox_overlaps_3d(bboxes1, bboxes2, mode='iou'): def bbox_overlaps_3d(bboxes1, bboxes2, mode='iou', coordinate='camera'):
"""Calculate 3D IoU using cuda implementation """Calculate 3D IoU using cuda implementation
Args: Args:
...@@ -70,6 +78,7 @@ def bbox_overlaps_3d(bboxes1, bboxes2, mode='iou'): ...@@ -70,6 +78,7 @@ def bbox_overlaps_3d(bboxes1, bboxes2, mode='iou'):
bboxes2: Tensor, shape (M, 7) [x, y, z, h, w, l, ry] bboxes2: Tensor, shape (M, 7) [x, y, z, h, w, l, ry]
mode: mode (str): "iou" (intersection over union) or mode: mode (str): "iou" (intersection over union) or
iof (intersection over foreground). iof (intersection over foreground).
coordinate (str): 'camera' or 'lidar' coordinate system
Return: Return:
iou: (M, N) not support aligned mode currently iou: (M, N) not support aligned mode currently
...@@ -77,4 +86,11 @@ def bbox_overlaps_3d(bboxes1, bboxes2, mode='iou'): ...@@ -77,4 +86,11 @@ def bbox_overlaps_3d(bboxes1, bboxes2, mode='iou'):
# TODO: check the input dimension meanings, # TODO: check the input dimension meanings,
# this is inconsistent with that in bbox_overlaps_nearest_3d # this is inconsistent with that in bbox_overlaps_nearest_3d
assert bboxes1.size(-1) == bboxes2.size(-1) == 7 assert bboxes1.size(-1) == bboxes2.size(-1) == 7
return boxes_iou3d_gpu(bboxes1, bboxes2, mode) assert coordinate in ['camera', 'lidar']
if coordinate == 'camera':
return boxes_iou3d_gpu_camera(bboxes1, bboxes2, mode)
elif coordinate == 'lidar':
return boxes_iou3d_gpu_lidar(bboxes1, bboxes2, mode)
else:
raise NotImplementedError
...@@ -3,9 +3,10 @@ from mmdet.core.bbox.samplers import (BaseSampler, CombinedSampler, ...@@ -3,9 +3,10 @@ from mmdet.core.bbox.samplers import (BaseSampler, CombinedSampler,
IoUBalancedNegSampler, OHEMSampler, IoUBalancedNegSampler, OHEMSampler,
PseudoSampler, RandomSampler, PseudoSampler, RandomSampler,
SamplingResult) SamplingResult)
from .iou_neg_piecewise_sampler import IoUNegPiecewiseSampler
__all__ = [ __all__ = [
'BaseSampler', 'PseudoSampler', 'RandomSampler', 'BaseSampler', 'PseudoSampler', 'RandomSampler',
'InstanceBalancedPosSampler', 'IoUBalancedNegSampler', 'CombinedSampler', 'InstanceBalancedPosSampler', 'IoUBalancedNegSampler', 'CombinedSampler',
'OHEMSampler', 'SamplingResult' 'OHEMSampler', 'SamplingResult', 'IoUNegPiecewiseSampler'
] ]
import torch
from mmdet.core.bbox.builder import BBOX_SAMPLERS
from . import RandomSampler, SamplingResult
@BBOX_SAMPLERS.register_module
class IoUNegPiecewiseSampler(RandomSampler):
"""IoU Piece-wise Sampling
Sampling negtive proposals according to a list of IoU thresholds.
The negtive proposals are divided into several pieces according
to `neg_iou_piece_thrs`. And the ratio of each piece is indicated
by `neg_piece_fractions`.
Args:
num (int): number of proposals.
pos_fraction (float): the fraction of positive proposals.
neg_piece_fractions (list): a list contains fractions that indicates
the ratio of each piece of total negtive samplers.
neg_iou_piece_thrs (list): a list contains IoU thresholds that
indicate the upper bound of this piece.
neg_pos_ub (float): the total ratio to limit the upper bound
number of negtive samples
add_gt_as_proposals (bool): whether to add gt as proposals.
"""
def __init__(self,
num,
pos_fraction=None,
neg_piece_fractions=None,
neg_iou_piece_thrs=None,
neg_pos_ub=-1,
add_gt_as_proposals=False,
return_iou=False):
super(IoUNegPiecewiseSampler,
self).__init__(num, pos_fraction, neg_pos_ub,
add_gt_as_proposals)
assert isinstance(neg_piece_fractions, list)
assert len(neg_piece_fractions) == len(neg_iou_piece_thrs)
self.neg_piece_fractions = neg_piece_fractions
self.neg_iou_thr = neg_iou_piece_thrs
self.return_iou = return_iou
self.neg_piece_num = len(self.neg_piece_fractions)
def _sample_pos(self, assign_result, num_expected, **kwargs):
"""Randomly sample some positive samples."""
pos_inds = torch.nonzero(assign_result.gt_inds > 0, as_tuple=False)
if pos_inds.numel() != 0:
pos_inds = pos_inds.squeeze(1)
if pos_inds.numel() <= num_expected:
return pos_inds
else:
return self.random_choice(pos_inds, num_expected)
def _sample_neg(self, assign_result, num_expected, **kwargs):
neg_inds = torch.nonzero(assign_result.gt_inds == 0)
if neg_inds.numel() != 0:
neg_inds = neg_inds.squeeze(1)
if len(neg_inds) <= num_expected:
return neg_inds
else:
neg_inds_choice = neg_inds.new_zeros([0])
extend_num = 0
max_overlaps = assign_result.max_overlaps[neg_inds]
for piece_inds in range(self.neg_piece_num):
if piece_inds == self.neg_piece_num - 1: # for the last piece
piece_expected_num = num_expected - len(neg_inds_choice)
min_iou_thr = 0
else:
# if the numbers of negative samplers in previous
# pieces are less than the expected number, extend
# the same number in the current piece.
piece_expected_num = int(
num_expected *
self.neg_piece_fractions[piece_inds]) + extend_num
min_iou_thr = self.neg_iou_thr[piece_inds + 1]
max_iou_thr = self.neg_iou_thr[piece_inds]
piece_neg_inds = torch.nonzero(
(max_overlaps >= min_iou_thr)
& (max_overlaps < max_iou_thr)).view(-1)
if len(piece_neg_inds) < piece_expected_num:
neg_inds_choice = torch.cat(
[neg_inds_choice, neg_inds[piece_neg_inds]], dim=0)
extend_num += piece_expected_num - len(piece_neg_inds)
else:
piece_choice = self.random_choice(piece_neg_inds,
piece_expected_num)
neg_inds_choice = torch.cat(
[neg_inds_choice, neg_inds[piece_choice]], dim=0)
extend_num = 0
return neg_inds_choice
def sample(self,
assign_result,
bboxes,
gt_bboxes,
gt_labels=None,
**kwargs):
"""Sample positive and negative bboxes.
This is a simple implementation of bbox sampling given candidates,
assigning results and ground truth bboxes.
Args:
assign_result (:obj:`AssignResult`): Bbox assigning results.
bboxes (Tensor): Boxes to be sampled from.
gt_bboxes (Tensor): Ground truth bboxes.
gt_labels (Tensor, optional): Class labels of ground truth bboxes.
Returns:
:obj:`SamplingResult`: Sampling result.
"""
if len(bboxes.shape) < 2:
bboxes = bboxes[None, :]
gt_flags = bboxes.new_zeros((bboxes.shape[0], ), dtype=torch.bool)
if self.add_gt_as_proposals and len(gt_bboxes) > 0:
if gt_labels is None:
raise ValueError(
'gt_labels must be given when add_gt_as_proposals is True')
bboxes = torch.cat([gt_bboxes, bboxes], dim=0)
assign_result.add_gt_(gt_labels)
gt_ones = bboxes.new_ones(gt_bboxes.shape[0], dtype=torch.bool)
gt_flags = torch.cat([gt_ones, gt_flags])
num_expected_pos = int(self.num * self.pos_fraction)
pos_inds = self.pos_sampler._sample_pos(
assign_result, num_expected_pos, bboxes=bboxes, **kwargs)
# We found that sampled indices have duplicated items occasionally.
# (may be a bug of PyTorch)
pos_inds = pos_inds.unique()
num_sampled_pos = pos_inds.numel()
num_expected_neg = self.num - num_sampled_pos
if self.neg_pos_ub >= 0:
_pos = max(1, num_sampled_pos)
neg_upper_bound = int(self.neg_pos_ub * _pos)
if num_expected_neg > neg_upper_bound:
num_expected_neg = neg_upper_bound
neg_inds = self.neg_sampler._sample_neg(
assign_result, num_expected_neg, bboxes=bboxes, **kwargs)
neg_inds = neg_inds.unique()
sampling_result = SamplingResult(pos_inds, neg_inds, bboxes, gt_bboxes,
assign_result, gt_flags)
if self.return_iou:
# PartA2 needs iou score to regression.
sampling_result.iou = assign_result.max_overlaps[torch.cat(
[pos_inds, neg_inds])]
sampling_result.iou.detach_()
return sampling_result
...@@ -184,6 +184,8 @@ class KittiDataset(torch_data.Dataset): ...@@ -184,6 +184,8 @@ class KittiDataset(torch_data.Dataset):
if self.modality['use_depth'] and self.modality['use_lidar']: if self.modality['use_depth'] and self.modality['use_lidar']:
points = self.get_lidar_depth_reduced(sample_idx) points = self.get_lidar_depth_reduced(sample_idx)
elif self.modality['use_lidar']: elif self.modality['use_lidar']:
points = self.get_lidar(sample_idx)
elif self.modality['use_lidar_reduced']:
points = self.get_lidar_reduced(sample_idx) points = self.get_lidar_reduced(sample_idx)
elif self.modality['use_depth']: elif self.modality['use_depth']:
points = self.get_pure_depth_reduced(sample_idx) points = self.get_pure_depth_reduced(sample_idx)
...@@ -238,8 +240,6 @@ class KittiDataset(torch_data.Dataset): ...@@ -238,8 +240,6 @@ class KittiDataset(torch_data.Dataset):
axis=1).astype(np.float32) axis=1).astype(np.float32)
difficulty = annos['difficulty'] difficulty = annos['difficulty']
# this change gt_bboxes_3d to velodyne coordinates # this change gt_bboxes_3d to velodyne coordinates
import pdb
pdb.set_trace()
gt_bboxes_3d = box_np_ops.box_camera_to_lidar(gt_bboxes_3d, rect, gt_bboxes_3d = box_np_ops.box_camera_to_lidar(gt_bboxes_3d, rect,
Trv2c) Trv2c)
# only center format is allowed. so we need to convert # only center format is allowed. so we need to convert
......
import warnings
import numba import numba
import numpy as np import numpy as np
from numba.errors import NumbaPerformanceWarning
from mmdet3d.core.bbox import box_np_ops from mmdet3d.core.bbox import box_np_ops
warnings.filterwarnings("ignore", category=NumbaPerformanceWarning)
@numba.njit @numba.njit
def _rotation_box2d_jit_(corners, angle, rot_mat_T): def _rotation_box2d_jit_(corners, angle, rot_mat_T):
......
from .iou3d_utils import (boxes_iou3d_gpu, boxes_iou_bev, nms_gpu, from .iou3d_utils import (boxes_iou3d_gpu_camera, boxes_iou3d_gpu_lidar,
nms_normal_gpu) boxes_iou_bev, nms_gpu, nms_normal_gpu)
__all__ = ['boxes_iou_bev', 'boxes_iou3d_gpu', 'nms_gpu', 'nms_normal_gpu'] __all__ = [
'boxes_iou_bev', 'boxes_iou3d_gpu_camera', 'nms_gpu', 'nms_normal_gpu',
'boxes_iou3d_gpu_lidar'
]
...@@ -20,17 +20,22 @@ def boxes_iou_bev(boxes_a, boxes_b): ...@@ -20,17 +20,22 @@ def boxes_iou_bev(boxes_a, boxes_b):
return ans_iou return ans_iou
def boxes_iou3d_gpu(boxes_a, boxes_b, mode='iou'): def boxes_iou3d_gpu_camera(boxes_a, boxes_b, mode='iou'):
""" """Calculate 3d iou of boxes in camera coordinate
:param boxes_a: (N, 7) [x, y, z, h, w, l, ry]
:param boxes_b: (M, 7) [x, y, z, h, w, l, ry] Args:
:param mode "iou" (intersection over union) or iof (intersection over boxes_a (FloatTensor): (N, 7) [x, y, z, h, w, l, ry]
in LiDAR coordinate
boxes_b (FloatTensor): (M, 7) [x, y, z, h, w, l, ry]
mode (str): "iou" (intersection over union) or iof (intersection over
foreground). foreground).
:return:
ans_iou: (M, N) Returns:
FloatTensor: (M, N)
""" """
boxes_a_bev = boxes3d_to_bev_torch(boxes_a)
boxes_b_bev = boxes3d_to_bev_torch(boxes_b) boxes_a_bev = boxes3d_to_bev_torch_camera(boxes_a)
boxes_b_bev = boxes3d_to_bev_torch_camera(boxes_b)
# bev overlap # bev overlap
overlaps_bev = torch.cuda.FloatTensor( overlaps_bev = torch.cuda.FloatTensor(
...@@ -51,15 +56,62 @@ def boxes_iou3d_gpu(boxes_a, boxes_b, mode='iou'): ...@@ -51,15 +56,62 @@ def boxes_iou3d_gpu(boxes_a, boxes_b, mode='iou'):
# 3d iou # 3d iou
overlaps_3d = overlaps_bev * overlaps_h overlaps_3d = overlaps_bev * overlaps_h
vol_a = (boxes_a[:, 3] * boxes_a[:, 4] * boxes_a[:, 5]).view(-1, 1) volume_a = (boxes_a[:, 3] * boxes_a[:, 4] * boxes_a[:, 5]).view(-1, 1)
vol_b = (boxes_b[:, 3] * boxes_b[:, 4] * boxes_b[:, 5]).view(1, -1) volume_b = (boxes_b[:, 3] * boxes_b[:, 4] * boxes_b[:, 5]).view(1, -1)
if mode == 'iou': if mode == 'iou':
# the clamp func is used to avoid division of 0 # the clamp func is used to avoid division of 0
iou3d = overlaps_3d / torch.clamp( iou3d = overlaps_3d / torch.clamp(
vol_a + vol_b - overlaps_3d, min=1e-8) volume_a + volume_b - overlaps_3d, min=1e-8)
else: else:
iou3d = overlaps_3d / torch.clamp(vol_a, min=1e-8) iou3d = overlaps_3d / torch.clamp(volume_a, min=1e-8)
return iou3d
def boxes_iou3d_gpu_lidar(boxes_a, boxes_b, mode='iou'):
"""Calculate 3d iou of boxes in lidar coordinate
Args:
boxes_a (FloatTensor): (N, 7) [x, y, z, w, l, h, ry]
in LiDAR coordinate
boxes_b (FloatTensor): (M, 7) [x, y, z, w, l, h, ry]
mode (str): "iou" (intersection over union) or iof (intersection over
foreground).
:Returns:
FloatTensor: (M, N)
"""
boxes_a_bev = boxes3d_to_bev_torch_lidar(boxes_a)
boxes_b_bev = boxes3d_to_bev_torch_lidar(boxes_b)
# height overlap
boxes_a_height_max = (boxes_a[:, 2] + boxes_a[:, 5]).view(-1, 1)
boxes_a_height_min = boxes_a[:, 2].view(-1, 1)
boxes_b_height_max = (boxes_b[:, 2] + boxes_b[:, 5]).view(1, -1)
boxes_b_height_min = boxes_b[:, 2].view(1, -1)
# bev overlap
overlaps_bev = boxes_a.new_zeros(
torch.Size((boxes_a.shape[0], boxes_b.shape[0]))) # (N, M)
iou3d_cuda.boxes_overlap_bev_gpu(boxes_a_bev.contiguous(),
boxes_b_bev.contiguous(), overlaps_bev)
max_of_min = torch.max(boxes_a_height_min, boxes_b_height_min)
min_of_max = torch.min(boxes_a_height_max, boxes_b_height_max)
overlaps_h = torch.clamp(min_of_max - max_of_min, min=0)
# 3d iou
overlaps_3d = overlaps_bev * overlaps_h
volume_a = (boxes_a[:, 3] * boxes_a[:, 4] * boxes_a[:, 5]).view(-1, 1)
volume_b = (boxes_b[:, 3] * boxes_b[:, 4] * boxes_b[:, 5]).view(1, -1)
if mode == 'iou':
# the clamp func is used to avoid division of 0
iou3d = overlaps_3d / torch.clamp(
volume_a + volume_b - overlaps_3d, min=1e-8)
else:
iou3d = overlaps_3d / torch.clamp(volume_a, min=1e-8)
return iou3d return iou3d
...@@ -98,16 +150,39 @@ def nms_normal_gpu(boxes, scores, thresh): ...@@ -98,16 +150,39 @@ def nms_normal_gpu(boxes, scores, thresh):
return order[keep[:num_out].cuda()].contiguous() return order[keep[:num_out].cuda()].contiguous()
def boxes3d_to_bev_torch(boxes3d): def boxes3d_to_bev_torch_camera(boxes3d):
""" """covert boxes3d to bev in in camera coords
:param boxes3d: (N, 7) [x, y, z, h, w, l, ry] in camera coords
:return: Args:
boxes_bev: (N, 5) [x1, y1, x2, y2, ry] boxes3d (FloartTensor): (N, 7) [x, y, z, h, w, l, ry] in camera coords
Return:
FloartTensor: (N, 5) [x1, y1, x2, y2, ry]
""" """
boxes_bev = boxes3d.new(torch.Size((boxes3d.shape[0], 5))) boxes_bev = boxes3d.new(torch.Size((boxes3d.shape[0], 5)))
cu, cv = boxes3d[:, 0], boxes3d[:, 2] cu, cv = boxes3d[:, 0], boxes3d[:, 2]
half_l, half_w = boxes3d[:, 5] / 2, boxes3d[:, 4] / 2 half_l, half_w = boxes3d[:, 5] / 2, boxes3d[:, 4] / 2
boxes_bev[:, 0], boxes_bev[:, 1] = cu - half_l, cv - half_w boxes_bev[:, 0], boxes_bev[:, 1] = cu - half_l, cv - half_w
boxes_bev[:, 2], boxes_bev[:, 3] = cu + half_l, cv + half_w boxes_bev[:, 2], boxes_bev[:, 3] = cu + half_l, cv + half_w
boxes_bev[:, 4] = boxes3d[:, 6] boxes_bev[:, 4] = boxes3d[:, 6]
return boxes_bev return boxes_bev
def boxes3d_to_bev_torch_lidar(boxes3d):
"""covert boxes3d to bev in in LiDAR coords
Args:
boxes3d (FloartTensor): (N, 7) [x, y, z, w, l, h, ry] in LiDAR coords
Returns:
FloartTensor: (N, 5) [x1, y1, x2, y2, ry]
"""
boxes_bev = boxes3d.new(torch.Size((boxes3d.shape[0], 5)))
x, y = boxes3d[:, 0], boxes3d[:, 1]
half_l, half_w = boxes3d[:, 4] / 2, boxes3d[:, 3] / 2
boxes_bev[:, 0], boxes_bev[:, 1] = x - half_w, y - half_l
boxes_bev[:, 2], boxes_bev[:, 3] = x + half_w, y + half_l
boxes_bev[:, 4] = boxes3d[:, 6]
return boxes_bev
...@@ -7,8 +7,8 @@ ...@@ -7,8 +7,8 @@
#include <assert.h> #include <assert.h>
#include <math.h> #include <math.h>
#include <stdio.h> #include <stdio.h>
#include <torch/extension.h>
#include <torch/serialize/tensor.h> #include <torch/serialize/tensor.h>
#include <torch/types.h>
#define THREADS_PER_BLOCK 256 #define THREADS_PER_BLOCK 256
#define DIVUP(m, n) ((m) / (n) + ((m) % (n) > 0)) #define DIVUP(m, n) ((m) / (n) + ((m) % (n) > 0))
......
//Modified from // Modified from
//https://github.com/sshaoshuai/PCDet/blob/master/pcdet/ops/roiaware_pool3d/src/roiaware_pool3d_kernel.cu // https://github.com/sshaoshuai/PCDet/blob/master/pcdet/ops/roiaware_pool3d/src/roiaware_pool3d_kernel.cu
//RoI-aware point cloud feature pooling // RoI-aware point cloud feature pooling
//Written by Shaoshuai Shi // Written by Shaoshuai Shi
//All Rights Reserved 2019. // All Rights Reserved 2019.
#include <torch/serialize/tensor.h>
#include <torch/extension.h>
#include <assert.h> #include <assert.h>
#include <math.h> #include <math.h>
#include <stdio.h> #include <stdio.h>
#include <torch/serialize/tensor.h>
#include <torch/types.h>
#define THREADS_PER_BLOCK 256 #define THREADS_PER_BLOCK 256
#define DIVUP(m,n) ((m) / (n) + ((m) % (n) > 0)) #define DIVUP(m, n) ((m) / (n) + ((m) % (n) > 0))
// #define DEBUG // #define DEBUG
__device__ inline void lidar_to_local_coords(float shift_x, float shift_y,
__device__ inline void lidar_to_local_coords(float shift_x, float shift_y, float rz, float &local_x, float &local_y){ float rz, float &local_x,
// should rotate pi/2 + alpha to translate LiDAR to local float &local_y) {
float rot_angle = rz + M_PI / 2; // should rotate pi/2 + alpha to translate LiDAR to local
float cosa = cos(rot_angle), sina = sin(rot_angle); float rot_angle = rz + M_PI / 2;
local_x = shift_x * cosa + shift_y * (-sina); float cosa = cos(rot_angle), sina = sin(rot_angle);
local_y = shift_x * sina + shift_y * cosa; local_x = shift_x * cosa + shift_y * (-sina);
local_y = shift_x * sina + shift_y * cosa;
} }
__device__ inline int check_pt_in_box3d(const float *pt, const float *box3d,
__device__ inline int check_pt_in_box3d(const float *pt, const float *box3d, float &local_x, float &local_y){ float &local_x, float &local_y) {
// param pt: (x, y, z) // param pt: (x, y, z)
// param box3d: (cx, cy, cz, w, l, h, rz) in LiDAR coordinate, cz in the bottom center // param box3d: (cx, cy, cz, w, l, h, rz) in LiDAR coordinate, cz in the
float x = pt[0], y = pt[1], z = pt[2]; // bottom center
float cx = box3d[0], cy = box3d[1], cz = box3d[2]; float x = pt[0], y = pt[1], z = pt[2];
float w = box3d[3], l = box3d[4], h = box3d[5], rz = box3d[6]; float cx = box3d[0], cy = box3d[1], cz = box3d[2];
cz += h / 2.0; // shift to the center since cz in box3d is the bottom center float w = box3d[3], l = box3d[4], h = box3d[5], rz = box3d[6];
cz += h / 2.0; // shift to the center since cz in box3d is the bottom center
if (fabsf(z - cz) > h / 2.0) return 0;
lidar_to_local_coords(x - cx, y - cy, rz, local_x, local_y); if (fabsf(z - cz) > h / 2.0) return 0;
float in_flag = (local_x > -l / 2.0) & (local_x < l / 2.0) & (local_y > -w / 2.0) & (local_y < w / 2.0); lidar_to_local_coords(x - cx, y - cy, rz, local_x, local_y);
return in_flag; float in_flag = (local_x > -l / 2.0) & (local_x < l / 2.0) &
(local_y > -w / 2.0) & (local_y < w / 2.0);
return in_flag;
} }
__global__ void generate_pts_mask_for_box3d(int boxes_num, int pts_num,
__global__ void generate_pts_mask_for_box3d(int boxes_num, int pts_num, int out_x, int out_y, int out_z, int out_x, int out_y, int out_z,
const float *rois, const float *pts, int *pts_mask){ const float *rois, const float *pts,
// params rois: (N, 7) [x, y, z, w, l, h, rz] in LiDAR coordinate int *pts_mask) {
// params pts: (npoints, 3) [x, y, z] // params rois: (N, 7) [x, y, z, w, l, h, rz] in LiDAR coordinate
// params pts_mask: (N, npoints): -1 means point doesnot in this box, otherwise: encode (x_idxs, y_idxs, z_idxs) by binary bit // params pts: (npoints, 3) [x, y, z]
int pt_idx = blockIdx.x * blockDim.x + threadIdx.x; // params pts_mask: (N, npoints): -1 means point doesnot in this box,
int box_idx = blockIdx.y; // otherwise: encode (x_idxs, y_idxs, z_idxs) by binary bit
if (pt_idx >= pts_num || box_idx >= boxes_num) return; int pt_idx = blockIdx.x * blockDim.x + threadIdx.x;
int box_idx = blockIdx.y;
pts += pt_idx * 3; if (pt_idx >= pts_num || box_idx >= boxes_num) return;
rois += box_idx * 7;
pts_mask += box_idx * pts_num + pt_idx; pts += pt_idx * 3;
rois += box_idx * 7;
float local_x = 0, local_y = 0; pts_mask += box_idx * pts_num + pt_idx;
int cur_in_flag = check_pt_in_box3d(pts, rois, local_x, local_y);
float local_x = 0, local_y = 0;
pts_mask[0] = -1; int cur_in_flag = check_pt_in_box3d(pts, rois, local_x, local_y);
if (cur_in_flag > 0){
float local_z = pts[2] - rois[2]; pts_mask[0] = -1;
float w = rois[3], l = rois[4], h = rois[5]; if (cur_in_flag > 0) {
float local_z = pts[2] - rois[2];
float x_res = l / out_x; float w = rois[3], l = rois[4], h = rois[5];
float y_res = w / out_y;
float z_res = h / out_z; float x_res = l / out_x;
float y_res = w / out_y;
unsigned int x_idx = int((local_x + l / 2) / x_res); float z_res = h / out_z;
unsigned int y_idx = int((local_y + w / 2) / y_res);
unsigned int z_idx = int(local_z / z_res); unsigned int x_idx = int((local_x + l / 2) / x_res);
unsigned int y_idx = int((local_y + w / 2) / y_res);
x_idx = min(max(x_idx, 0), out_x - 1); unsigned int z_idx = int(local_z / z_res);
y_idx = min(max(y_idx, 0), out_y - 1);
z_idx = min(max(z_idx, 0), out_z - 1); x_idx = min(max(x_idx, 0), out_x - 1);
y_idx = min(max(y_idx, 0), out_y - 1);
unsigned int idx_encoding = (x_idx << 16) + (y_idx << 8) + z_idx; z_idx = min(max(z_idx, 0), out_z - 1);
unsigned int idx_encoding = (x_idx << 16) + (y_idx << 8) + z_idx;
#ifdef DEBUG #ifdef DEBUG
printf("mask: pts_%d(%.3f, %.3f, %.3f), local(%.3f, %.3f, %.3f), idx(%d, %d, %d), res(%.3f, %.3f, %.3f), idx_encoding=%x\n", printf(
pt_idx, pts[0], pts[1], pts[2], local_x, local_y, local_z, x_idx, y_idx, z_idx, x_res, y_res, z_res, idx_encoding); "mask: pts_%d(%.3f, %.3f, %.3f), local(%.3f, %.3f, %.3f), idx(%d, %d, "
"%d), res(%.3f, %.3f, %.3f), idx_encoding=%x\n",
pt_idx, pts[0], pts[1], pts[2], local_x, local_y, local_z, x_idx, y_idx,
z_idx, x_res, y_res, z_res, idx_encoding);
#endif #endif
pts_mask[0] = idx_encoding; pts_mask[0] = idx_encoding;
} }
} }
__global__ void collect_inside_pts_for_box3d(int boxes_num, int pts_num,
__global__ void collect_inside_pts_for_box3d(int boxes_num, int pts_num, int max_pts_each_voxel, int max_pts_each_voxel, int out_x,
int out_x, int out_y, int out_z, const int *pts_mask, int *pts_idx_of_voxels){ int out_y, int out_z,
// params pts_mask: (N, npoints) 0 or 1 const int *pts_mask,
// params pts_idx_of_voxels: (N, out_x, out_y, out_z, max_pts_each_voxel) int *pts_idx_of_voxels) {
// params pts_mask: (N, npoints) 0 or 1
int box_idx = blockIdx.x * blockDim.x + threadIdx.x; // params pts_idx_of_voxels: (N, out_x, out_y, out_z, max_pts_each_voxel)
if (box_idx >= boxes_num) return;
int box_idx = blockIdx.x * blockDim.x + threadIdx.x;
int max_num_pts = max_pts_each_voxel - 1; // index 0 is the counter if (box_idx >= boxes_num) return;
pts_idx_of_voxels += box_idx * out_x * out_y * out_z * max_pts_each_voxel;
int max_num_pts = max_pts_each_voxel - 1; // index 0 is the counter
for (int k = 0; k < pts_num; k++){ pts_idx_of_voxels += box_idx * out_x * out_y * out_z * max_pts_each_voxel;
if (pts_mask[box_idx * pts_num + k] != -1){
unsigned int idx_encoding = pts_mask[box_idx * pts_num + k]; for (int k = 0; k < pts_num; k++) {
unsigned int x_idx = (idx_encoding >> 16) & 0xFF; if (pts_mask[box_idx * pts_num + k] != -1) {
unsigned int y_idx = (idx_encoding >> 8) & 0xFF; unsigned int idx_encoding = pts_mask[box_idx * pts_num + k];
unsigned int z_idx = idx_encoding & 0xFF; unsigned int x_idx = (idx_encoding >> 16) & 0xFF;
unsigned int base_offset = x_idx * out_y * out_z * max_pts_each_voxel + y_idx * out_z * max_pts_each_voxel + z_idx * max_pts_each_voxel; unsigned int y_idx = (idx_encoding >> 8) & 0xFF;
unsigned int cnt = pts_idx_of_voxels[base_offset]; unsigned int z_idx = idx_encoding & 0xFF;
if (cnt < max_num_pts){ unsigned int base_offset = x_idx * out_y * out_z * max_pts_each_voxel +
pts_idx_of_voxels[base_offset + cnt + 1] = k; y_idx * out_z * max_pts_each_voxel +
pts_idx_of_voxels[base_offset]++; z_idx * max_pts_each_voxel;
} unsigned int cnt = pts_idx_of_voxels[base_offset];
if (cnt < max_num_pts) {
pts_idx_of_voxels[base_offset + cnt + 1] = k;
pts_idx_of_voxels[base_offset]++;
}
#ifdef DEBUG #ifdef DEBUG
printf("collect: pts_%d, idx(%d, %d, %d), idx_encoding=%x\n", printf("collect: pts_%d, idx(%d, %d, %d), idx_encoding=%x\n", k, x_idx,
k, x_idx, y_idx, z_idx, idx_encoding); y_idx, z_idx, idx_encoding);
#endif #endif
}
} }
}
} }
__global__ void roiaware_maxpool3d(int boxes_num, int pts_num, int channels,
__global__ void roiaware_maxpool3d(int boxes_num, int pts_num, int channels, int max_pts_each_voxel, int out_x, int max_pts_each_voxel, int out_x, int out_y,
int out_y, int out_z, const float *pts_feature, const int *pts_idx_of_voxels, float *pooled_features, int *argmax){ int out_z, const float *pts_feature,
// params pts_feature: (npoints, C) const int *pts_idx_of_voxels,
// params pts_idx_of_voxels: (N, out_x, out_y, out_z, max_pts_each_voxel), index 0 is the counter float *pooled_features, int *argmax) {
// params pooled_features: (N, out_x, out_y, out_z, C) // params pts_feature: (npoints, C)
// params argmax: (N, out_x, out_y, out_z, C) // params pts_idx_of_voxels: (N, out_x, out_y, out_z, max_pts_each_voxel),
// index 0 is the counter params pooled_features: (N, out_x, out_y, out_z, C)
int box_idx = blockIdx.z; // params argmax: (N, out_x, out_y, out_z, C)
int channel_idx = blockIdx.y;
int voxel_idx_flat = blockIdx.x * blockDim.x + threadIdx.x; int box_idx = blockIdx.z;
int channel_idx = blockIdx.y;
int x_idx = voxel_idx_flat / (out_y * out_z); int voxel_idx_flat = blockIdx.x * blockDim.x + threadIdx.x;
int y_idx = (voxel_idx_flat - x_idx * (out_y * out_z)) / out_z;
int z_idx = voxel_idx_flat % out_z; int x_idx = voxel_idx_flat / (out_y * out_z);
if (box_idx >= boxes_num || channel_idx >= channels|| x_idx >= out_x || y_idx >= out_y || z_idx >= out_z) return; int y_idx = (voxel_idx_flat - x_idx * (out_y * out_z)) / out_z;
int z_idx = voxel_idx_flat % out_z;
if (box_idx >= boxes_num || channel_idx >= channels || x_idx >= out_x ||
y_idx >= out_y || z_idx >= out_z)
return;
#ifdef DEBUG #ifdef DEBUG
printf("src pts_idx_of_voxels: (%p, ), argmax: %p\n", pts_idx_of_voxels, argmax); printf("src pts_idx_of_voxels: (%p, ), argmax: %p\n", pts_idx_of_voxels,
argmax);
#endif #endif
int offset_base = x_idx * out_y * out_z + y_idx * out_z + z_idx; int offset_base = x_idx * out_y * out_z + y_idx * out_z + z_idx;
pts_idx_of_voxels += box_idx * out_x * out_y * out_z * max_pts_each_voxel + offset_base * max_pts_each_voxel; pts_idx_of_voxels += box_idx * out_x * out_y * out_z * max_pts_each_voxel +
pooled_features += box_idx * out_x * out_y * out_z * channels + offset_base * channels + channel_idx; offset_base * max_pts_each_voxel;
argmax += box_idx * out_x * out_y * out_z * channels + offset_base * channels + channel_idx; pooled_features += box_idx * out_x * out_y * out_z * channels +
offset_base * channels + channel_idx;
argmax += box_idx * out_x * out_y * out_z * channels +
offset_base * channels + channel_idx;
int argmax_idx = -1; int argmax_idx = -1;
float max_val = -1e50; float max_val = -1e50;
int total_pts = pts_idx_of_voxels[0]; int total_pts = pts_idx_of_voxels[0];
for (int k = 1; k <= total_pts; k++){ for (int k = 1; k <= total_pts; k++) {
if (pts_feature[pts_idx_of_voxels[k] * channels + channel_idx] > max_val){ if (pts_feature[pts_idx_of_voxels[k] * channels + channel_idx] > max_val) {
max_val = pts_feature[pts_idx_of_voxels[k] * channels + channel_idx]; max_val = pts_feature[pts_idx_of_voxels[k] * channels + channel_idx];
argmax_idx = pts_idx_of_voxels[k]; argmax_idx = pts_idx_of_voxels[k];
}
} }
}
if (argmax_idx != -1){ if (argmax_idx != -1) {
pooled_features[0] = max_val; pooled_features[0] = max_val;
} }
argmax[0] = argmax_idx; argmax[0] = argmax_idx;
#ifdef DEBUG #ifdef DEBUG
printf("channel_%d idx(%d, %d, %d), argmax_idx=(%d, %.3f), total=%d, after pts_idx: %p, argmax: (%p, %d)\n", printf(
channel_idx, x_idx, y_idx, z_idx, argmax_idx, max_val, total_pts, pts_idx_of_voxels, argmax, argmax_idx); "channel_%d idx(%d, %d, %d), argmax_idx=(%d, %.3f), total=%d, after "
"pts_idx: %p, argmax: (%p, %d)\n",
channel_idx, x_idx, y_idx, z_idx, argmax_idx, max_val, total_pts,
pts_idx_of_voxels, argmax, argmax_idx);
#endif #endif
} }
__global__ void roiaware_avgpool3d(int boxes_num, int pts_num, int channels,
__global__ void roiaware_avgpool3d(int boxes_num, int pts_num, int channels, int max_pts_each_voxel, int out_x, int max_pts_each_voxel, int out_x, int out_y,
int out_y, int out_z, const float *pts_feature, const int *pts_idx_of_voxels, float *pooled_features){ int out_z, const float *pts_feature,
// params pts_feature: (npoints, C) const int *pts_idx_of_voxels,
// params pts_idx_of_voxels: (N, out_x, out_y, out_z, max_pts_each_voxel), index 0 is the counter float *pooled_features) {
// params pooled_features: (N, out_x, out_y, out_z, C) // params pts_feature: (npoints, C)
// params argmax: (N, out_x, out_y, out_z, C) // params pts_idx_of_voxels: (N, out_x, out_y, out_z, max_pts_each_voxel),
// index 0 is the counter params pooled_features: (N, out_x, out_y, out_z, C)
int box_idx = blockIdx.z; // params argmax: (N, out_x, out_y, out_z, C)
int channel_idx = blockIdx.y;
int voxel_idx_flat = blockIdx.x * blockDim.x + threadIdx.x; int box_idx = blockIdx.z;
int channel_idx = blockIdx.y;
int x_idx = voxel_idx_flat / (out_y * out_z); int voxel_idx_flat = blockIdx.x * blockDim.x + threadIdx.x;
int y_idx = (voxel_idx_flat - x_idx * (out_y * out_z)) / out_z;
int z_idx = voxel_idx_flat % out_z; int x_idx = voxel_idx_flat / (out_y * out_z);
if (box_idx >= boxes_num || channel_idx >= channels|| x_idx >= out_x || y_idx >= out_y || z_idx >= out_z) return; int y_idx = (voxel_idx_flat - x_idx * (out_y * out_z)) / out_z;
int z_idx = voxel_idx_flat % out_z;
int offset_base = x_idx * out_y * out_z + y_idx * out_z + z_idx; if (box_idx >= boxes_num || channel_idx >= channels || x_idx >= out_x ||
pts_idx_of_voxels += box_idx * out_x * out_y * out_z * max_pts_each_voxel + offset_base * max_pts_each_voxel; y_idx >= out_y || z_idx >= out_z)
pooled_features += box_idx * out_x * out_y * out_z * channels + offset_base * channels + channel_idx; return;
float sum_val = 0; int offset_base = x_idx * out_y * out_z + y_idx * out_z + z_idx;
int total_pts = pts_idx_of_voxels[0]; pts_idx_of_voxels += box_idx * out_x * out_y * out_z * max_pts_each_voxel +
offset_base * max_pts_each_voxel;
for (int k = 1; k <= total_pts; k++){ pooled_features += box_idx * out_x * out_y * out_z * channels +
sum_val += pts_feature[pts_idx_of_voxels[k] * channels + channel_idx]; offset_base * channels + channel_idx;
}
float sum_val = 0;
if (total_pts > 0){ int total_pts = pts_idx_of_voxels[0];
pooled_features[0] = sum_val / total_pts;
} for (int k = 1; k <= total_pts; k++) {
sum_val += pts_feature[pts_idx_of_voxels[k] * channels + channel_idx];
}
if (total_pts > 0) {
pooled_features[0] = sum_val / total_pts;
}
} }
void roiaware_pool3d_launcher(int boxes_num, int pts_num, int channels,
int max_pts_each_voxel, int out_x, int out_y,
void roiaware_pool3d_launcher(int boxes_num, int pts_num, int channels, int max_pts_each_voxel, int out_x, int out_y, int out_z, int out_z, const float *rois, const float *pts,
const float *rois, const float *pts, const float *pts_feature, int *argmax, int *pts_idx_of_voxels, float *pooled_features, int pool_method){ const float *pts_feature, int *argmax,
// params rois: (N, 7) [x, y, z, w, l, h, rz] in LiDAR coordinate int *pts_idx_of_voxels, float *pooled_features,
// params pts: (npoints, 3) [x, y, z] in LiDAR coordinate int pool_method) {
// params pts_feature: (npoints, C) // params rois: (N, 7) [x, y, z, w, l, h, rz] in LiDAR coordinate
// params argmax: (N, out_x, out_y, out_z, C) // params pts: (npoints, 3) [x, y, z] in LiDAR coordinate
// params pts_idx_of_voxels: (N, out_x, out_y, out_z, max_pts_each_voxel) // params pts_feature: (npoints, C)
// params pooled_features: (N, out_x, out_y, out_z, C) // params argmax: (N, out_x, out_y, out_z, C)
// params pool_method: 0: max_pool 1: avg_pool // params pts_idx_of_voxels: (N, out_x, out_y, out_z, max_pts_each_voxel)
// params pooled_features: (N, out_x, out_y, out_z, C)
int *pts_mask = NULL; // params pool_method: 0: max_pool 1: avg_pool
cudaMalloc(&pts_mask, boxes_num * pts_num * sizeof(int)); // (N, M)
cudaMemset(pts_mask, -1, boxes_num * pts_num * sizeof(int)); int *pts_mask = NULL;
cudaMalloc(&pts_mask, boxes_num * pts_num * sizeof(int)); // (N, M)
dim3 blocks_mask(DIVUP(pts_num, THREADS_PER_BLOCK), boxes_num); cudaMemset(pts_mask, -1, boxes_num * pts_num * sizeof(int));
dim3 threads(THREADS_PER_BLOCK);
generate_pts_mask_for_box3d<<<blocks_mask, threads>>>(boxes_num, pts_num, out_x, out_y, out_z, rois, pts, pts_mask); dim3 blocks_mask(DIVUP(pts_num, THREADS_PER_BLOCK), boxes_num);
dim3 threads(THREADS_PER_BLOCK);
// TODO: Merge the collect and pool functions, SS generate_pts_mask_for_box3d<<<blocks_mask, threads>>>(
boxes_num, pts_num, out_x, out_y, out_z, rois, pts, pts_mask);
dim3 blocks_collect(DIVUP(boxes_num, THREADS_PER_BLOCK));
collect_inside_pts_for_box3d<<<blocks_collect, threads>>>(boxes_num, pts_num, max_pts_each_voxel, // TODO: Merge the collect and pool functions, SS
out_x, out_y, out_z, pts_mask, pts_idx_of_voxels);
dim3 blocks_collect(DIVUP(boxes_num, THREADS_PER_BLOCK));
dim3 blocks_pool(DIVUP(out_x * out_y * out_z, THREADS_PER_BLOCK), channels, boxes_num); collect_inside_pts_for_box3d<<<blocks_collect, threads>>>(
if (pool_method == 0){ boxes_num, pts_num, max_pts_each_voxel, out_x, out_y, out_z, pts_mask,
roiaware_maxpool3d<<<blocks_pool, threads>>>(boxes_num, pts_num, channels, max_pts_each_voxel, out_x, out_y, out_z, pts_idx_of_voxels);
pts_feature, pts_idx_of_voxels, pooled_features, argmax);
} dim3 blocks_pool(DIVUP(out_x * out_y * out_z, THREADS_PER_BLOCK), channels,
else if (pool_method == 1){ boxes_num);
roiaware_avgpool3d<<<blocks_pool, threads>>>(boxes_num, pts_num, channels, max_pts_each_voxel, out_x, out_y, out_z, if (pool_method == 0) {
pts_feature, pts_idx_of_voxels, pooled_features); roiaware_maxpool3d<<<blocks_pool, threads>>>(
} boxes_num, pts_num, channels, max_pts_each_voxel, out_x, out_y, out_z,
pts_feature, pts_idx_of_voxels, pooled_features, argmax);
} else if (pool_method == 1) {
cudaFree(pts_mask); roiaware_avgpool3d<<<blocks_pool, threads>>>(
boxes_num, pts_num, channels, max_pts_each_voxel, out_x, out_y, out_z,
pts_feature, pts_idx_of_voxels, pooled_features);
}
cudaFree(pts_mask);
#ifdef DEBUG #ifdef DEBUG
cudaDeviceSynchronize(); // for using printf in kernel function cudaDeviceSynchronize(); // for using printf in kernel function
#endif #endif
} }
__global__ void roiaware_maxpool3d_backward(int boxes_num, int channels,
__global__ void roiaware_maxpool3d_backward(int boxes_num, int channels, int out_x, int out_y, int out_z, int out_x, int out_y, int out_z,
const int *argmax, const float *grad_out, float *grad_in){ const int *argmax,
// params argmax: (N, out_x, out_y, out_z, C) const float *grad_out,
// params grad_out: (N, out_x, out_y, out_z, C) float *grad_in) {
// params grad_in: (npoints, C), return value // params argmax: (N, out_x, out_y, out_z, C)
// params grad_out: (N, out_x, out_y, out_z, C)
int box_idx = blockIdx.z; // params grad_in: (npoints, C), return value
int channel_idx = blockIdx.y;
int voxel_idx_flat = blockIdx.x * blockDim.x + threadIdx.x; int box_idx = blockIdx.z;
int channel_idx = blockIdx.y;
int x_idx = voxel_idx_flat / (out_y * out_z); int voxel_idx_flat = blockIdx.x * blockDim.x + threadIdx.x;
int y_idx = (voxel_idx_flat - x_idx * (out_y * out_z)) / out_z;
int z_idx = voxel_idx_flat % out_z; int x_idx = voxel_idx_flat / (out_y * out_z);
if (box_idx >= boxes_num || channel_idx >= channels|| x_idx >= out_x || y_idx >= out_y || z_idx >= out_z) return; int y_idx = (voxel_idx_flat - x_idx * (out_y * out_z)) / out_z;
int z_idx = voxel_idx_flat % out_z;
int offset_base = x_idx * out_y * out_z + y_idx * out_z + z_idx; if (box_idx >= boxes_num || channel_idx >= channels || x_idx >= out_x ||
argmax += box_idx * out_x * out_y * out_z * channels + offset_base * channels + channel_idx; y_idx >= out_y || z_idx >= out_z)
grad_out += box_idx * out_x * out_y * out_z * channels + offset_base * channels + channel_idx; return;
if (argmax[0] == -1) return; int offset_base = x_idx * out_y * out_z + y_idx * out_z + z_idx;
argmax += box_idx * out_x * out_y * out_z * channels +
atomicAdd(grad_in + argmax[0] * channels + channel_idx, grad_out[0] * 1); offset_base * channels + channel_idx;
grad_out += box_idx * out_x * out_y * out_z * channels +
offset_base * channels + channel_idx;
if (argmax[0] == -1) return;
atomicAdd(grad_in + argmax[0] * channels + channel_idx, grad_out[0] * 1);
} }
__global__ void roiaware_avgpool3d_backward(int boxes_num, int channels,
__global__ void roiaware_avgpool3d_backward(int boxes_num, int channels, int out_x, int out_y, int out_z, int out_x, int out_y, int out_z,
int max_pts_each_voxel, const int *pts_idx_of_voxels, const float *grad_out, float *grad_in){ int max_pts_each_voxel,
// params pts_idx_of_voxels: (N, out_x, out_y, out_z, max_pts_each_voxel) const int *pts_idx_of_voxels,
// params grad_out: (N, out_x, out_y, out_z, C) const float *grad_out,
// params grad_in: (npoints, C), return value float *grad_in) {
// params pts_idx_of_voxels: (N, out_x, out_y, out_z, max_pts_each_voxel)
int box_idx = blockIdx.z; // params grad_out: (N, out_x, out_y, out_z, C)
int channel_idx = blockIdx.y; // params grad_in: (npoints, C), return value
int voxel_idx_flat = blockIdx.x * blockDim.x + threadIdx.x;
int box_idx = blockIdx.z;
int x_idx = voxel_idx_flat / (out_y * out_z); int channel_idx = blockIdx.y;
int y_idx = (voxel_idx_flat - x_idx * (out_y * out_z)) / out_z; int voxel_idx_flat = blockIdx.x * blockDim.x + threadIdx.x;
int z_idx = voxel_idx_flat % out_z;
if (box_idx >= boxes_num || channel_idx >= channels|| x_idx >= out_x || y_idx >= out_y || z_idx >= out_z) return; int x_idx = voxel_idx_flat / (out_y * out_z);
int y_idx = (voxel_idx_flat - x_idx * (out_y * out_z)) / out_z;
int offset_base = x_idx * out_y * out_z + y_idx * out_z + z_idx; int z_idx = voxel_idx_flat % out_z;
pts_idx_of_voxels += box_idx * out_x * out_y * out_z * max_pts_each_voxel + offset_base * max_pts_each_voxel; if (box_idx >= boxes_num || channel_idx >= channels || x_idx >= out_x ||
grad_out += box_idx * out_x * out_y * out_z * channels + offset_base * channels + channel_idx; y_idx >= out_y || z_idx >= out_z)
return;
int total_pts = pts_idx_of_voxels[0]; int offset_base = x_idx * out_y * out_z + y_idx * out_z + z_idx;
float cur_grad = 1 / fmaxf(float(total_pts), 1.0); pts_idx_of_voxels += box_idx * out_x * out_y * out_z * max_pts_each_voxel +
for (int k = 1; k <= total_pts; k++){ offset_base * max_pts_each_voxel;
atomicAdd(grad_in + pts_idx_of_voxels[k] * channels + channel_idx, grad_out[0] * cur_grad); grad_out += box_idx * out_x * out_y * out_z * channels +
} offset_base * channels + channel_idx;
int total_pts = pts_idx_of_voxels[0];
float cur_grad = 1 / fmaxf(float(total_pts), 1.0);
for (int k = 1; k <= total_pts; k++) {
atomicAdd(grad_in + pts_idx_of_voxels[k] * channels + channel_idx,
grad_out[0] * cur_grad);
}
} }
void roiaware_pool3d_backward_launcher(int boxes_num, int out_x, int out_y,
int out_z, int channels,
void roiaware_pool3d_backward_launcher(int boxes_num, int out_x, int out_y, int out_z, int channels, int max_pts_each_voxel, int max_pts_each_voxel,
const int *pts_idx_of_voxels, const int *argmax, const float *grad_out, float *grad_in, int pool_method){ const int *pts_idx_of_voxels,
// params pts_idx_of_voxels: (N, out_x, out_y, out_z, max_pts_each_voxel) const int *argmax, const float *grad_out,
// params argmax: (N, out_x, out_y, out_z, C) float *grad_in, int pool_method) {
// params grad_out: (N, out_x, out_y, out_z, C) // params pts_idx_of_voxels: (N, out_x, out_y, out_z, max_pts_each_voxel)
// params grad_in: (npoints, C), return value // params argmax: (N, out_x, out_y, out_z, C)
// params pool_method: 0: max_pool, 1: avg_pool // params grad_out: (N, out_x, out_y, out_z, C)
// params grad_in: (npoints, C), return value
dim3 blocks(DIVUP(out_x * out_y * out_z, THREADS_PER_BLOCK), channels, boxes_num); // params pool_method: 0: max_pool, 1: avg_pool
dim3 threads(THREADS_PER_BLOCK);
if (pool_method == 0){ dim3 blocks(DIVUP(out_x * out_y * out_z, THREADS_PER_BLOCK), channels,
roiaware_maxpool3d_backward<<<blocks, threads>>>( boxes_num);
boxes_num, channels, out_x, out_y, out_z, argmax, grad_out, grad_in dim3 threads(THREADS_PER_BLOCK);
); if (pool_method == 0) {
} roiaware_maxpool3d_backward<<<blocks, threads>>>(
else if (pool_method == 1){ boxes_num, channels, out_x, out_y, out_z, argmax, grad_out, grad_in);
roiaware_avgpool3d_backward<<<blocks, threads>>>( } else if (pool_method == 1) {
boxes_num, channels, out_x, out_y, out_z, max_pts_each_voxel, pts_idx_of_voxels, grad_out, grad_in roiaware_avgpool3d_backward<<<blocks, threads>>>(
); boxes_num, channels, out_x, out_y, out_z, max_pts_each_voxel,
} pts_idx_of_voxels, grad_out, grad_in);
}
} }
from mmcv.cnn import build_norm_layer from mmcv.cnn import build_norm_layer
from torch import nn from torch import nn
import mmdet3d.ops.spconv as spconv
from mmdet.models.backbones.resnet import BasicBlock, Bottleneck from mmdet.models.backbones.resnet import BasicBlock, Bottleneck
from . import spconv
def conv3x3(in_planes, out_planes, stride=1, indice_key=None): def conv3x3(in_planes, out_planes, stride=1, indice_key=None):
......
import torch
from mmdet3d.core.bbox.assigners import MaxIoUAssigner
from mmdet3d.core.bbox.samplers import IoUNegPiecewiseSampler
def test_iou_piecewise_sampler():
assigner = MaxIoUAssigner(
pos_iou_thr=0.55,
neg_iou_thr=0.55,
min_pos_iou=0.55,
ignore_iof_thr=-1,
iou_calculator=dict(type='BboxOverlaps3D', coordinate='lidar'))
bboxes = torch.tensor(
[[32, 32, 16, 8, 38, 42, -0.3], [32, 32, 16, 8, 38, 42, -0.3],
[32, 32, 16, 8, 38, 42, -0.3], [32, 32, 16, 8, 38, 42, -0.3],
[0, 0, 0, 10, 10, 10, 0.2], [10, 10, 10, 20, 20, 15, 0.6],
[5, 5, 5, 15, 15, 15, 0.7], [5, 5, 5, 15, 15, 15, 0.7],
[5, 5, 5, 15, 15, 15, 0.7], [32, 32, 16, 8, 38, 42, -0.3],
[32, 32, 16, 8, 38, 42, -0.3], [32, 32, 16, 8, 38, 42, -0.3]],
dtype=torch.float32).cuda()
gt_bboxes = torch.tensor(
[[0, 0, 0, 10, 10, 9, 0.2], [5, 10, 10, 20, 20, 15, 0.6]],
dtype=torch.float32).cuda()
gt_labels = torch.tensor([1, 1], dtype=torch.int64).cuda()
assign_result = assigner.assign(bboxes, gt_bboxes, gt_labels=gt_labels)
sampler = IoUNegPiecewiseSampler(
num=10,
pos_fraction=0.55,
neg_piece_fractions=[0.8, 0.2],
neg_iou_piece_thrs=[0.55, 0.1],
neg_pos_ub=-1,
add_gt_as_proposals=False)
sample_result = sampler.sample(assign_result, bboxes, gt_bboxes, gt_labels)
assert sample_result.pos_inds == 4
assert len(sample_result.pos_bboxes) == len(sample_result.pos_inds)
assert len(sample_result.neg_bboxes) == len(sample_result.neg_inds)
#!/usr/bin/env bash
CONFIG=$1
CHECKPOINT=$2
GPUS=$3
PORT=${PORT:-29500}
PYTHONPATH="$(dirname $0)/..":$PYTHONPATH \
python -m torch.distributed.launch --nproc_per_node=$GPUS --master_port=$PORT \
$(dirname "$0")/test.py $CONFIG $CHECKPOINT --launcher pytorch ${@:4}
#!/usr/bin/env bash #!/usr/bin/env bash
PYTHON=${PYTHON:-"python"}
CONFIG=$1 CONFIG=$1
GPUS=$2 GPUS=$2
PORT=${PORT:-29500}
$PYTHON -m torch.distributed.launch --nproc_per_node=$GPUS \ PYTHONPATH="$(dirname $0)/..":$PYTHONPATH \
python -m torch.distributed.launch --nproc_per_node=$GPUS --master_port=$PORT \
$(dirname "$0")/train.py $CONFIG --launcher pytorch ${@:3} $(dirname "$0")/train.py $CONFIG --launcher pytorch ${@:3}
#!/usr/bin/env bash #!/usr/bin/env bash
set -x set -x
export PYTHONPATH=`pwd`:$PYTHONPATH
PARTITION=$1 PARTITION=$1
JOB_NAME=$2 JOB_NAME=$2
...@@ -9,14 +8,17 @@ CONFIG=$3 ...@@ -9,14 +8,17 @@ CONFIG=$3
CHECKPOINT=$4 CHECKPOINT=$4
GPUS=${GPUS:-8} GPUS=${GPUS:-8}
GPUS_PER_NODE=${GPUS_PER_NODE:-8} GPUS_PER_NODE=${GPUS_PER_NODE:-8}
CPUS_PER_TASK=${CPUS_PER_TASK:-5}
PY_ARGS=${@:5} PY_ARGS=${@:5}
SRUN_ARGS=${SRUN_ARGS:-""} SRUN_ARGS=${SRUN_ARGS:-""}
PYTHONPATH="$(dirname $0)/..":$PYTHONPATH \
srun -p ${PARTITION} \ srun -p ${PARTITION} \
--job-name=${JOB_NAME} \ --job-name=${JOB_NAME} \
--gres=gpu:${GPUS_PER_NODE} \ --gres=gpu:${GPUS_PER_NODE} \
--ntasks=${GPUS} \ --ntasks=${GPUS} \
--ntasks-per-node=${GPUS_PER_NODE} \ --ntasks-per-node=${GPUS_PER_NODE} \
--cpus-per-task=${CPUS_PER_TASK} \
--kill-on-bad-exit=1 \ --kill-on-bad-exit=1 \
${SRUN_ARGS} \ ${SRUN_ARGS} \
python -u tools/test.py ${CONFIG} ${CHECKPOINT} --launcher="slurm" ${PY_ARGS} python -u tools/test.py ${CONFIG} ${CHECKPOINT} --launcher="slurm" ${PY_ARGS}
...@@ -8,15 +8,17 @@ CONFIG=$3 ...@@ -8,15 +8,17 @@ CONFIG=$3
WORK_DIR=$4 WORK_DIR=$4
GPUS=${GPUS:-8} GPUS=${GPUS:-8}
GPUS_PER_NODE=${GPUS_PER_NODE:-8} GPUS_PER_NODE=${GPUS_PER_NODE:-8}
CPUS_PER_TASK=${CPUS_PER_TASK:-5}
SRUN_ARGS=${SRUN_ARGS:-""} SRUN_ARGS=${SRUN_ARGS:-""}
PY_ARGS=${PY_ARGS:-"--validate"} PY_ARGS=${@:5}
PYTHONPATH="$(dirname $0)/..":$PYTHONPATH \
srun -p ${PARTITION} \ srun -p ${PARTITION} \
--job-name=${JOB_NAME} \ --job-name=${JOB_NAME} \
--gres=gpu:${GPUS_PER_NODE} \ --gres=gpu:${GPUS_PER_NODE} \
--ntasks=${GPUS} \ --ntasks=${GPUS} \
--ntasks-per-node=${GPUS_PER_NODE} \ --ntasks-per-node=${GPUS_PER_NODE} \
--cpus-per-task=${CPUS_PER_TASK} \
--kill-on-bad-exit=1 \ --kill-on-bad-exit=1 \
${SRUN_ARGS} \ ${SRUN_ARGS} \
python -u tools/train.py ${CONFIG} --work-dir=${WORK_DIR} --launcher="slurm" ${PY_ARGS} python -u tools/train.py ${CONFIG} --work-dir=${WORK_DIR} --launcher="slurm" ${PY_ARGS}
Markdown is supported
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment