Commit bce7d0c3 authored by yinchimaoliang's avatar yinchimaoliang
Browse files

Merge branch 'master_temp' into indoor_pipeline

parents 1756485e 868c5fab
...@@ -93,7 +93,8 @@ class_names = ['Car'] ...@@ -93,7 +93,8 @@ class_names = ['Car']
img_norm_cfg = dict( img_norm_cfg = dict(
mean=[123.675, 116.28, 103.53], std=[58.395, 57.12, 57.375], to_rgb=True) mean=[123.675, 116.28, 103.53], std=[58.395, 57.12, 57.375], to_rgb=True)
input_modality = dict( input_modality = dict(
use_lidar=True, use_lidar=False,
use_lidar_reduced=True,
use_depth=False, use_depth=False,
use_lidar_intensity=True, use_lidar_intensity=True,
use_camera=False, use_camera=False,
......
...@@ -113,7 +113,8 @@ class_names = ['Pedestrian', 'Cyclist', 'Car'] ...@@ -113,7 +113,8 @@ class_names = ['Pedestrian', 'Cyclist', 'Car']
img_norm_cfg = dict( img_norm_cfg = dict(
mean=[123.675, 116.28, 103.53], std=[58.395, 57.12, 57.375], to_rgb=True) mean=[123.675, 116.28, 103.53], std=[58.395, 57.12, 57.375], to_rgb=True)
input_modality = dict( input_modality = dict(
use_lidar=True, use_lidar=False,
use_lidar_reduced=True,
use_depth=False, use_depth=False,
use_lidar_intensity=True, use_lidar_intensity=True,
use_camera=True, use_camera=True,
......
...@@ -91,7 +91,8 @@ class_names = ['Car'] ...@@ -91,7 +91,8 @@ class_names = ['Car']
img_norm_cfg = dict( img_norm_cfg = dict(
mean=[123.675, 116.28, 103.53], std=[58.395, 57.12, 57.375], to_rgb=True) mean=[123.675, 116.28, 103.53], std=[58.395, 57.12, 57.375], to_rgb=True)
input_modality = dict( input_modality = dict(
use_lidar=True, use_lidar=False,
use_lidar_reduced=True,
use_depth=False, use_depth=False,
use_lidar_intensity=True, use_lidar_intensity=True,
use_camera=True, use_camera=True,
......
...@@ -90,7 +90,8 @@ class_names = ['Car'] ...@@ -90,7 +90,8 @@ class_names = ['Car']
img_norm_cfg = dict( img_norm_cfg = dict(
mean=[123.675, 116.28, 103.53], std=[58.395, 57.12, 57.375], to_rgb=True) mean=[123.675, 116.28, 103.53], std=[58.395, 57.12, 57.375], to_rgb=True)
input_modality = dict( input_modality = dict(
use_lidar=True, use_lidar=False,
use_lidar_reduced=True,
use_depth=False, use_depth=False,
use_lidar_intensity=True, use_lidar_intensity=True,
use_camera=False, use_camera=False,
......
...@@ -89,7 +89,8 @@ class_names = ['Car'] ...@@ -89,7 +89,8 @@ class_names = ['Car']
img_norm_cfg = dict( img_norm_cfg = dict(
mean=[123.675, 116.28, 103.53], std=[58.395, 57.12, 57.375], to_rgb=True) mean=[123.675, 116.28, 103.53], std=[58.395, 57.12, 57.375], to_rgb=True)
input_modality = dict( input_modality = dict(
use_lidar=True, use_lidar=False,
use_lidar_reduced=True,
use_depth=False, use_depth=False,
use_lidar_intensity=True, use_lidar_intensity=True,
use_camera=False, use_camera=False,
......
import torch import torch
from mmdet3d.ops.iou3d import boxes_iou3d_gpu from mmdet3d.ops.iou3d import boxes_iou3d_gpu_camera, boxes_iou3d_gpu_lidar
from mmdet.core.bbox import bbox_overlaps from mmdet.core.bbox import bbox_overlaps
from mmdet.core.bbox.iou_calculators.builder import IOU_CALCULATORS from mmdet.core.bbox.iou_calculators.builder import IOU_CALCULATORS
from .. import box_torch_ops from .. import box_torch_ops
...@@ -22,10 +22,18 @@ class BboxOverlapsNearest3D(object): ...@@ -22,10 +22,18 @@ class BboxOverlapsNearest3D(object):
@IOU_CALCULATORS.register_module() @IOU_CALCULATORS.register_module()
class BboxOverlaps3D(object): class BboxOverlaps3D(object):
"""3D IoU Calculator""" """3D IoU Calculator
def __call__(self, bboxes1, bboxes2, mode='iou', is_aligned=False): Args:
return bbox_overlaps_3d(bboxes1, bboxes2, mode, is_aligned) coordinate (str): 'camera' or 'lidar' coordinate system
"""
def __init__(self, coordinate):
assert coordinate in ['camera', 'lidar']
self.coordinate = coordinate
def __call__(self, bboxes1, bboxes2, mode='iou'):
return bbox_overlaps_3d(bboxes1, bboxes2, mode, self.coordinate)
def __repr__(self): def __repr__(self):
repr_str = self.__class__.__name__ repr_str = self.__class__.__name__
...@@ -62,7 +70,7 @@ def bbox_overlaps_nearest_3d(bboxes1, bboxes2, mode='iou', is_aligned=False): ...@@ -62,7 +70,7 @@ def bbox_overlaps_nearest_3d(bboxes1, bboxes2, mode='iou', is_aligned=False):
return ret return ret
def bbox_overlaps_3d(bboxes1, bboxes2, mode='iou'): def bbox_overlaps_3d(bboxes1, bboxes2, mode='iou', coordinate='camera'):
"""Calculate 3D IoU using cuda implementation """Calculate 3D IoU using cuda implementation
Args: Args:
...@@ -70,6 +78,7 @@ def bbox_overlaps_3d(bboxes1, bboxes2, mode='iou'): ...@@ -70,6 +78,7 @@ def bbox_overlaps_3d(bboxes1, bboxes2, mode='iou'):
bboxes2: Tensor, shape (M, 7) [x, y, z, h, w, l, ry] bboxes2: Tensor, shape (M, 7) [x, y, z, h, w, l, ry]
mode: mode (str): "iou" (intersection over union) or mode: mode (str): "iou" (intersection over union) or
iof (intersection over foreground). iof (intersection over foreground).
coordinate (str): 'camera' or 'lidar' coordinate system
Return: Return:
iou: (M, N) not support aligned mode currently iou: (M, N) not support aligned mode currently
...@@ -77,4 +86,11 @@ def bbox_overlaps_3d(bboxes1, bboxes2, mode='iou'): ...@@ -77,4 +86,11 @@ def bbox_overlaps_3d(bboxes1, bboxes2, mode='iou'):
# TODO: check the input dimension meanings, # TODO: check the input dimension meanings,
# this is inconsistent with that in bbox_overlaps_nearest_3d # this is inconsistent with that in bbox_overlaps_nearest_3d
assert bboxes1.size(-1) == bboxes2.size(-1) == 7 assert bboxes1.size(-1) == bboxes2.size(-1) == 7
return boxes_iou3d_gpu(bboxes1, bboxes2, mode) assert coordinate in ['camera', 'lidar']
if coordinate == 'camera':
return boxes_iou3d_gpu_camera(bboxes1, bboxes2, mode)
elif coordinate == 'lidar':
return boxes_iou3d_gpu_lidar(bboxes1, bboxes2, mode)
else:
raise NotImplementedError
...@@ -3,9 +3,10 @@ from mmdet.core.bbox.samplers import (BaseSampler, CombinedSampler, ...@@ -3,9 +3,10 @@ from mmdet.core.bbox.samplers import (BaseSampler, CombinedSampler,
IoUBalancedNegSampler, OHEMSampler, IoUBalancedNegSampler, OHEMSampler,
PseudoSampler, RandomSampler, PseudoSampler, RandomSampler,
SamplingResult) SamplingResult)
from .iou_neg_piecewise_sampler import IoUNegPiecewiseSampler
__all__ = [ __all__ = [
'BaseSampler', 'PseudoSampler', 'RandomSampler', 'BaseSampler', 'PseudoSampler', 'RandomSampler',
'InstanceBalancedPosSampler', 'IoUBalancedNegSampler', 'CombinedSampler', 'InstanceBalancedPosSampler', 'IoUBalancedNegSampler', 'CombinedSampler',
'OHEMSampler', 'SamplingResult' 'OHEMSampler', 'SamplingResult', 'IoUNegPiecewiseSampler'
] ]
import torch
from mmdet.core.bbox.builder import BBOX_SAMPLERS
from . import RandomSampler, SamplingResult
@BBOX_SAMPLERS.register_module
class IoUNegPiecewiseSampler(RandomSampler):
"""IoU Piece-wise Sampling
Sampling negtive proposals according to a list of IoU thresholds.
The negtive proposals are divided into several pieces according
to `neg_iou_piece_thrs`. And the ratio of each piece is indicated
by `neg_piece_fractions`.
Args:
num (int): number of proposals.
pos_fraction (float): the fraction of positive proposals.
neg_piece_fractions (list): a list contains fractions that indicates
the ratio of each piece of total negtive samplers.
neg_iou_piece_thrs (list): a list contains IoU thresholds that
indicate the upper bound of this piece.
neg_pos_ub (float): the total ratio to limit the upper bound
number of negtive samples
add_gt_as_proposals (bool): whether to add gt as proposals.
"""
def __init__(self,
num,
pos_fraction=None,
neg_piece_fractions=None,
neg_iou_piece_thrs=None,
neg_pos_ub=-1,
add_gt_as_proposals=False,
return_iou=False):
super(IoUNegPiecewiseSampler,
self).__init__(num, pos_fraction, neg_pos_ub,
add_gt_as_proposals)
assert isinstance(neg_piece_fractions, list)
assert len(neg_piece_fractions) == len(neg_iou_piece_thrs)
self.neg_piece_fractions = neg_piece_fractions
self.neg_iou_thr = neg_iou_piece_thrs
self.return_iou = return_iou
self.neg_piece_num = len(self.neg_piece_fractions)
def _sample_pos(self, assign_result, num_expected, **kwargs):
"""Randomly sample some positive samples."""
pos_inds = torch.nonzero(assign_result.gt_inds > 0, as_tuple=False)
if pos_inds.numel() != 0:
pos_inds = pos_inds.squeeze(1)
if pos_inds.numel() <= num_expected:
return pos_inds
else:
return self.random_choice(pos_inds, num_expected)
def _sample_neg(self, assign_result, num_expected, **kwargs):
neg_inds = torch.nonzero(assign_result.gt_inds == 0)
if neg_inds.numel() != 0:
neg_inds = neg_inds.squeeze(1)
if len(neg_inds) <= num_expected:
return neg_inds
else:
neg_inds_choice = neg_inds.new_zeros([0])
extend_num = 0
max_overlaps = assign_result.max_overlaps[neg_inds]
for piece_inds in range(self.neg_piece_num):
if piece_inds == self.neg_piece_num - 1: # for the last piece
piece_expected_num = num_expected - len(neg_inds_choice)
min_iou_thr = 0
else:
# if the numbers of negative samplers in previous
# pieces are less than the expected number, extend
# the same number in the current piece.
piece_expected_num = int(
num_expected *
self.neg_piece_fractions[piece_inds]) + extend_num
min_iou_thr = self.neg_iou_thr[piece_inds + 1]
max_iou_thr = self.neg_iou_thr[piece_inds]
piece_neg_inds = torch.nonzero(
(max_overlaps >= min_iou_thr)
& (max_overlaps < max_iou_thr)).view(-1)
if len(piece_neg_inds) < piece_expected_num:
neg_inds_choice = torch.cat(
[neg_inds_choice, neg_inds[piece_neg_inds]], dim=0)
extend_num += piece_expected_num - len(piece_neg_inds)
else:
piece_choice = self.random_choice(piece_neg_inds,
piece_expected_num)
neg_inds_choice = torch.cat(
[neg_inds_choice, neg_inds[piece_choice]], dim=0)
extend_num = 0
return neg_inds_choice
def sample(self,
assign_result,
bboxes,
gt_bboxes,
gt_labels=None,
**kwargs):
"""Sample positive and negative bboxes.
This is a simple implementation of bbox sampling given candidates,
assigning results and ground truth bboxes.
Args:
assign_result (:obj:`AssignResult`): Bbox assigning results.
bboxes (Tensor): Boxes to be sampled from.
gt_bboxes (Tensor): Ground truth bboxes.
gt_labels (Tensor, optional): Class labels of ground truth bboxes.
Returns:
:obj:`SamplingResult`: Sampling result.
"""
if len(bboxes.shape) < 2:
bboxes = bboxes[None, :]
gt_flags = bboxes.new_zeros((bboxes.shape[0], ), dtype=torch.bool)
if self.add_gt_as_proposals and len(gt_bboxes) > 0:
if gt_labels is None:
raise ValueError(
'gt_labels must be given when add_gt_as_proposals is True')
bboxes = torch.cat([gt_bboxes, bboxes], dim=0)
assign_result.add_gt_(gt_labels)
gt_ones = bboxes.new_ones(gt_bboxes.shape[0], dtype=torch.bool)
gt_flags = torch.cat([gt_ones, gt_flags])
num_expected_pos = int(self.num * self.pos_fraction)
pos_inds = self.pos_sampler._sample_pos(
assign_result, num_expected_pos, bboxes=bboxes, **kwargs)
# We found that sampled indices have duplicated items occasionally.
# (may be a bug of PyTorch)
pos_inds = pos_inds.unique()
num_sampled_pos = pos_inds.numel()
num_expected_neg = self.num - num_sampled_pos
if self.neg_pos_ub >= 0:
_pos = max(1, num_sampled_pos)
neg_upper_bound = int(self.neg_pos_ub * _pos)
if num_expected_neg > neg_upper_bound:
num_expected_neg = neg_upper_bound
neg_inds = self.neg_sampler._sample_neg(
assign_result, num_expected_neg, bboxes=bboxes, **kwargs)
neg_inds = neg_inds.unique()
sampling_result = SamplingResult(pos_inds, neg_inds, bboxes, gt_bboxes,
assign_result, gt_flags)
if self.return_iou:
# PartA2 needs iou score to regression.
sampling_result.iou = assign_result.max_overlaps[torch.cat(
[pos_inds, neg_inds])]
sampling_result.iou.detach_()
return sampling_result
...@@ -184,6 +184,8 @@ class KittiDataset(torch_data.Dataset): ...@@ -184,6 +184,8 @@ class KittiDataset(torch_data.Dataset):
if self.modality['use_depth'] and self.modality['use_lidar']: if self.modality['use_depth'] and self.modality['use_lidar']:
points = self.get_lidar_depth_reduced(sample_idx) points = self.get_lidar_depth_reduced(sample_idx)
elif self.modality['use_lidar']: elif self.modality['use_lidar']:
points = self.get_lidar(sample_idx)
elif self.modality['use_lidar_reduced']:
points = self.get_lidar_reduced(sample_idx) points = self.get_lidar_reduced(sample_idx)
elif self.modality['use_depth']: elif self.modality['use_depth']:
points = self.get_pure_depth_reduced(sample_idx) points = self.get_pure_depth_reduced(sample_idx)
...@@ -238,8 +240,6 @@ class KittiDataset(torch_data.Dataset): ...@@ -238,8 +240,6 @@ class KittiDataset(torch_data.Dataset):
axis=1).astype(np.float32) axis=1).astype(np.float32)
difficulty = annos['difficulty'] difficulty = annos['difficulty']
# this change gt_bboxes_3d to velodyne coordinates # this change gt_bboxes_3d to velodyne coordinates
import pdb
pdb.set_trace()
gt_bboxes_3d = box_np_ops.box_camera_to_lidar(gt_bboxes_3d, rect, gt_bboxes_3d = box_np_ops.box_camera_to_lidar(gt_bboxes_3d, rect,
Trv2c) Trv2c)
# only center format is allowed. so we need to convert # only center format is allowed. so we need to convert
......
import warnings
import numba import numba
import numpy as np import numpy as np
from numba.errors import NumbaPerformanceWarning
from mmdet3d.core.bbox import box_np_ops from mmdet3d.core.bbox import box_np_ops
warnings.filterwarnings("ignore", category=NumbaPerformanceWarning)
@numba.njit @numba.njit
def _rotation_box2d_jit_(corners, angle, rot_mat_T): def _rotation_box2d_jit_(corners, angle, rot_mat_T):
......
from .iou3d_utils import (boxes_iou3d_gpu, boxes_iou_bev, nms_gpu, from .iou3d_utils import (boxes_iou3d_gpu_camera, boxes_iou3d_gpu_lidar,
nms_normal_gpu) boxes_iou_bev, nms_gpu, nms_normal_gpu)
__all__ = ['boxes_iou_bev', 'boxes_iou3d_gpu', 'nms_gpu', 'nms_normal_gpu'] __all__ = [
'boxes_iou_bev', 'boxes_iou3d_gpu_camera', 'nms_gpu', 'nms_normal_gpu',
'boxes_iou3d_gpu_lidar'
]
...@@ -20,17 +20,22 @@ def boxes_iou_bev(boxes_a, boxes_b): ...@@ -20,17 +20,22 @@ def boxes_iou_bev(boxes_a, boxes_b):
return ans_iou return ans_iou
def boxes_iou3d_gpu(boxes_a, boxes_b, mode='iou'): def boxes_iou3d_gpu_camera(boxes_a, boxes_b, mode='iou'):
""" """Calculate 3d iou of boxes in camera coordinate
:param boxes_a: (N, 7) [x, y, z, h, w, l, ry]
:param boxes_b: (M, 7) [x, y, z, h, w, l, ry] Args:
:param mode "iou" (intersection over union) or iof (intersection over boxes_a (FloatTensor): (N, 7) [x, y, z, h, w, l, ry]
in LiDAR coordinate
boxes_b (FloatTensor): (M, 7) [x, y, z, h, w, l, ry]
mode (str): "iou" (intersection over union) or iof (intersection over
foreground). foreground).
:return:
ans_iou: (M, N) Returns:
FloatTensor: (M, N)
""" """
boxes_a_bev = boxes3d_to_bev_torch(boxes_a)
boxes_b_bev = boxes3d_to_bev_torch(boxes_b) boxes_a_bev = boxes3d_to_bev_torch_camera(boxes_a)
boxes_b_bev = boxes3d_to_bev_torch_camera(boxes_b)
# bev overlap # bev overlap
overlaps_bev = torch.cuda.FloatTensor( overlaps_bev = torch.cuda.FloatTensor(
...@@ -51,15 +56,62 @@ def boxes_iou3d_gpu(boxes_a, boxes_b, mode='iou'): ...@@ -51,15 +56,62 @@ def boxes_iou3d_gpu(boxes_a, boxes_b, mode='iou'):
# 3d iou # 3d iou
overlaps_3d = overlaps_bev * overlaps_h overlaps_3d = overlaps_bev * overlaps_h
vol_a = (boxes_a[:, 3] * boxes_a[:, 4] * boxes_a[:, 5]).view(-1, 1) volume_a = (boxes_a[:, 3] * boxes_a[:, 4] * boxes_a[:, 5]).view(-1, 1)
vol_b = (boxes_b[:, 3] * boxes_b[:, 4] * boxes_b[:, 5]).view(1, -1) volume_b = (boxes_b[:, 3] * boxes_b[:, 4] * boxes_b[:, 5]).view(1, -1)
if mode == 'iou': if mode == 'iou':
# the clamp func is used to avoid division of 0 # the clamp func is used to avoid division of 0
iou3d = overlaps_3d / torch.clamp( iou3d = overlaps_3d / torch.clamp(
vol_a + vol_b - overlaps_3d, min=1e-8) volume_a + volume_b - overlaps_3d, min=1e-8)
else: else:
iou3d = overlaps_3d / torch.clamp(vol_a, min=1e-8) iou3d = overlaps_3d / torch.clamp(volume_a, min=1e-8)
return iou3d
def boxes_iou3d_gpu_lidar(boxes_a, boxes_b, mode='iou'):
"""Calculate 3d iou of boxes in lidar coordinate
Args:
boxes_a (FloatTensor): (N, 7) [x, y, z, w, l, h, ry]
in LiDAR coordinate
boxes_b (FloatTensor): (M, 7) [x, y, z, w, l, h, ry]
mode (str): "iou" (intersection over union) or iof (intersection over
foreground).
:Returns:
FloatTensor: (M, N)
"""
boxes_a_bev = boxes3d_to_bev_torch_lidar(boxes_a)
boxes_b_bev = boxes3d_to_bev_torch_lidar(boxes_b)
# height overlap
boxes_a_height_max = (boxes_a[:, 2] + boxes_a[:, 5]).view(-1, 1)
boxes_a_height_min = boxes_a[:, 2].view(-1, 1)
boxes_b_height_max = (boxes_b[:, 2] + boxes_b[:, 5]).view(1, -1)
boxes_b_height_min = boxes_b[:, 2].view(1, -1)
# bev overlap
overlaps_bev = boxes_a.new_zeros(
torch.Size((boxes_a.shape[0], boxes_b.shape[0]))) # (N, M)
iou3d_cuda.boxes_overlap_bev_gpu(boxes_a_bev.contiguous(),
boxes_b_bev.contiguous(), overlaps_bev)
max_of_min = torch.max(boxes_a_height_min, boxes_b_height_min)
min_of_max = torch.min(boxes_a_height_max, boxes_b_height_max)
overlaps_h = torch.clamp(min_of_max - max_of_min, min=0)
# 3d iou
overlaps_3d = overlaps_bev * overlaps_h
volume_a = (boxes_a[:, 3] * boxes_a[:, 4] * boxes_a[:, 5]).view(-1, 1)
volume_b = (boxes_b[:, 3] * boxes_b[:, 4] * boxes_b[:, 5]).view(1, -1)
if mode == 'iou':
# the clamp func is used to avoid division of 0
iou3d = overlaps_3d / torch.clamp(
volume_a + volume_b - overlaps_3d, min=1e-8)
else:
iou3d = overlaps_3d / torch.clamp(volume_a, min=1e-8)
return iou3d return iou3d
...@@ -98,16 +150,39 @@ def nms_normal_gpu(boxes, scores, thresh): ...@@ -98,16 +150,39 @@ def nms_normal_gpu(boxes, scores, thresh):
return order[keep[:num_out].cuda()].contiguous() return order[keep[:num_out].cuda()].contiguous()
def boxes3d_to_bev_torch(boxes3d): def boxes3d_to_bev_torch_camera(boxes3d):
""" """covert boxes3d to bev in in camera coords
:param boxes3d: (N, 7) [x, y, z, h, w, l, ry] in camera coords
:return: Args:
boxes_bev: (N, 5) [x1, y1, x2, y2, ry] boxes3d (FloartTensor): (N, 7) [x, y, z, h, w, l, ry] in camera coords
Return:
FloartTensor: (N, 5) [x1, y1, x2, y2, ry]
""" """
boxes_bev = boxes3d.new(torch.Size((boxes3d.shape[0], 5))) boxes_bev = boxes3d.new(torch.Size((boxes3d.shape[0], 5)))
cu, cv = boxes3d[:, 0], boxes3d[:, 2] cu, cv = boxes3d[:, 0], boxes3d[:, 2]
half_l, half_w = boxes3d[:, 5] / 2, boxes3d[:, 4] / 2 half_l, half_w = boxes3d[:, 5] / 2, boxes3d[:, 4] / 2
boxes_bev[:, 0], boxes_bev[:, 1] = cu - half_l, cv - half_w boxes_bev[:, 0], boxes_bev[:, 1] = cu - half_l, cv - half_w
boxes_bev[:, 2], boxes_bev[:, 3] = cu + half_l, cv + half_w boxes_bev[:, 2], boxes_bev[:, 3] = cu + half_l, cv + half_w
boxes_bev[:, 4] = boxes3d[:, 6] boxes_bev[:, 4] = boxes3d[:, 6]
return boxes_bev return boxes_bev
def boxes3d_to_bev_torch_lidar(boxes3d):
"""covert boxes3d to bev in in LiDAR coords
Args:
boxes3d (FloartTensor): (N, 7) [x, y, z, w, l, h, ry] in LiDAR coords
Returns:
FloartTensor: (N, 5) [x1, y1, x2, y2, ry]
"""
boxes_bev = boxes3d.new(torch.Size((boxes3d.shape[0], 5)))
x, y = boxes3d[:, 0], boxes3d[:, 1]
half_l, half_w = boxes3d[:, 4] / 2, boxes3d[:, 3] / 2
boxes_bev[:, 0], boxes_bev[:, 1] = x - half_w, y - half_l
boxes_bev[:, 2], boxes_bev[:, 3] = x + half_w, y + half_l
boxes_bev[:, 4] = boxes3d[:, 6]
return boxes_bev
...@@ -7,8 +7,8 @@ ...@@ -7,8 +7,8 @@
#include <assert.h> #include <assert.h>
#include <math.h> #include <math.h>
#include <stdio.h> #include <stdio.h>
#include <torch/extension.h>
#include <torch/serialize/tensor.h> #include <torch/serialize/tensor.h>
#include <torch/types.h>
#define THREADS_PER_BLOCK 256 #define THREADS_PER_BLOCK 256
#define DIVUP(m, n) ((m) / (n) + ((m) % (n) > 0)) #define DIVUP(m, n) ((m) / (n) + ((m) % (n) > 0))
......
//Modified from // Modified from
//https://github.com/sshaoshuai/PCDet/blob/master/pcdet/ops/roiaware_pool3d/src/roiaware_pool3d_kernel.cu // https://github.com/sshaoshuai/PCDet/blob/master/pcdet/ops/roiaware_pool3d/src/roiaware_pool3d_kernel.cu
//RoI-aware point cloud feature pooling // RoI-aware point cloud feature pooling
//Written by Shaoshuai Shi // Written by Shaoshuai Shi
//All Rights Reserved 2019. // All Rights Reserved 2019.
#include <torch/serialize/tensor.h>
#include <torch/extension.h>
#include <assert.h> #include <assert.h>
#include <math.h> #include <math.h>
#include <stdio.h> #include <stdio.h>
#include <torch/serialize/tensor.h>
#include <torch/types.h>
#define THREADS_PER_BLOCK 256 #define THREADS_PER_BLOCK 256
#define DIVUP(m,n) ((m) / (n) + ((m) % (n) > 0)) #define DIVUP(m, n) ((m) / (n) + ((m) % (n) > 0))
// #define DEBUG // #define DEBUG
__device__ inline void lidar_to_local_coords(float shift_x, float shift_y,
__device__ inline void lidar_to_local_coords(float shift_x, float shift_y, float rz, float &local_x, float &local_y){ float rz, float &local_x,
float &local_y) {
// should rotate pi/2 + alpha to translate LiDAR to local // should rotate pi/2 + alpha to translate LiDAR to local
float rot_angle = rz + M_PI / 2; float rot_angle = rz + M_PI / 2;
float cosa = cos(rot_angle), sina = sin(rot_angle); float cosa = cos(rot_angle), sina = sin(rot_angle);
...@@ -26,10 +25,11 @@ __device__ inline void lidar_to_local_coords(float shift_x, float shift_y, float ...@@ -26,10 +25,11 @@ __device__ inline void lidar_to_local_coords(float shift_x, float shift_y, float
local_y = shift_x * sina + shift_y * cosa; local_y = shift_x * sina + shift_y * cosa;
} }
__device__ inline int check_pt_in_box3d(const float *pt, const float *box3d,
__device__ inline int check_pt_in_box3d(const float *pt, const float *box3d, float &local_x, float &local_y){ float &local_x, float &local_y) {
// param pt: (x, y, z) // param pt: (x, y, z)
// param box3d: (cx, cy, cz, w, l, h, rz) in LiDAR coordinate, cz in the bottom center // param box3d: (cx, cy, cz, w, l, h, rz) in LiDAR coordinate, cz in the
// bottom center
float x = pt[0], y = pt[1], z = pt[2]; float x = pt[0], y = pt[1], z = pt[2];
float cx = box3d[0], cy = box3d[1], cz = box3d[2]; float cx = box3d[0], cy = box3d[1], cz = box3d[2];
float w = box3d[3], l = box3d[4], h = box3d[5], rz = box3d[6]; float w = box3d[3], l = box3d[4], h = box3d[5], rz = box3d[6];
...@@ -37,16 +37,19 @@ __device__ inline int check_pt_in_box3d(const float *pt, const float *box3d, flo ...@@ -37,16 +37,19 @@ __device__ inline int check_pt_in_box3d(const float *pt, const float *box3d, flo
if (fabsf(z - cz) > h / 2.0) return 0; if (fabsf(z - cz) > h / 2.0) return 0;
lidar_to_local_coords(x - cx, y - cy, rz, local_x, local_y); lidar_to_local_coords(x - cx, y - cy, rz, local_x, local_y);
float in_flag = (local_x > -l / 2.0) & (local_x < l / 2.0) & (local_y > -w / 2.0) & (local_y < w / 2.0); float in_flag = (local_x > -l / 2.0) & (local_x < l / 2.0) &
(local_y > -w / 2.0) & (local_y < w / 2.0);
return in_flag; return in_flag;
} }
__global__ void generate_pts_mask_for_box3d(int boxes_num, int pts_num,
__global__ void generate_pts_mask_for_box3d(int boxes_num, int pts_num, int out_x, int out_y, int out_z, int out_x, int out_y, int out_z,
const float *rois, const float *pts, int *pts_mask){ const float *rois, const float *pts,
int *pts_mask) {
// params rois: (N, 7) [x, y, z, w, l, h, rz] in LiDAR coordinate // params rois: (N, 7) [x, y, z, w, l, h, rz] in LiDAR coordinate
// params pts: (npoints, 3) [x, y, z] // params pts: (npoints, 3) [x, y, z]
// params pts_mask: (N, npoints): -1 means point doesnot in this box, otherwise: encode (x_idxs, y_idxs, z_idxs) by binary bit // params pts_mask: (N, npoints): -1 means point doesnot in this box,
// otherwise: encode (x_idxs, y_idxs, z_idxs) by binary bit
int pt_idx = blockIdx.x * blockDim.x + threadIdx.x; int pt_idx = blockIdx.x * blockDim.x + threadIdx.x;
int box_idx = blockIdx.y; int box_idx = blockIdx.y;
if (pt_idx >= pts_num || box_idx >= boxes_num) return; if (pt_idx >= pts_num || box_idx >= boxes_num) return;
...@@ -59,7 +62,7 @@ __global__ void generate_pts_mask_for_box3d(int boxes_num, int pts_num, int out_ ...@@ -59,7 +62,7 @@ __global__ void generate_pts_mask_for_box3d(int boxes_num, int pts_num, int out_
int cur_in_flag = check_pt_in_box3d(pts, rois, local_x, local_y); int cur_in_flag = check_pt_in_box3d(pts, rois, local_x, local_y);
pts_mask[0] = -1; pts_mask[0] = -1;
if (cur_in_flag > 0){ if (cur_in_flag > 0) {
float local_z = pts[2] - rois[2]; float local_z = pts[2] - rois[2];
float w = rois[3], l = rois[4], h = rois[5]; float w = rois[3], l = rois[4], h = rois[5];
...@@ -77,17 +80,22 @@ __global__ void generate_pts_mask_for_box3d(int boxes_num, int pts_num, int out_ ...@@ -77,17 +80,22 @@ __global__ void generate_pts_mask_for_box3d(int boxes_num, int pts_num, int out_
unsigned int idx_encoding = (x_idx << 16) + (y_idx << 8) + z_idx; unsigned int idx_encoding = (x_idx << 16) + (y_idx << 8) + z_idx;
#ifdef DEBUG #ifdef DEBUG
printf("mask: pts_%d(%.3f, %.3f, %.3f), local(%.3f, %.3f, %.3f), idx(%d, %d, %d), res(%.3f, %.3f, %.3f), idx_encoding=%x\n", printf(
pt_idx, pts[0], pts[1], pts[2], local_x, local_y, local_z, x_idx, y_idx, z_idx, x_res, y_res, z_res, idx_encoding); "mask: pts_%d(%.3f, %.3f, %.3f), local(%.3f, %.3f, %.3f), idx(%d, %d, "
"%d), res(%.3f, %.3f, %.3f), idx_encoding=%x\n",
pt_idx, pts[0], pts[1], pts[2], local_x, local_y, local_z, x_idx, y_idx,
z_idx, x_res, y_res, z_res, idx_encoding);
#endif #endif
pts_mask[0] = idx_encoding; pts_mask[0] = idx_encoding;
} }
} }
__global__ void collect_inside_pts_for_box3d(int boxes_num, int pts_num,
__global__ void collect_inside_pts_for_box3d(int boxes_num, int pts_num, int max_pts_each_voxel, int max_pts_each_voxel, int out_x,
int out_x, int out_y, int out_z, const int *pts_mask, int *pts_idx_of_voxels){ int out_y, int out_z,
const int *pts_mask,
int *pts_idx_of_voxels) {
// params pts_mask: (N, npoints) 0 or 1 // params pts_mask: (N, npoints) 0 or 1
// params pts_idx_of_voxels: (N, out_x, out_y, out_z, max_pts_each_voxel) // params pts_idx_of_voxels: (N, out_x, out_y, out_z, max_pts_each_voxel)
...@@ -97,33 +105,36 @@ __global__ void collect_inside_pts_for_box3d(int boxes_num, int pts_num, int max ...@@ -97,33 +105,36 @@ __global__ void collect_inside_pts_for_box3d(int boxes_num, int pts_num, int max
int max_num_pts = max_pts_each_voxel - 1; // index 0 is the counter int max_num_pts = max_pts_each_voxel - 1; // index 0 is the counter
pts_idx_of_voxels += box_idx * out_x * out_y * out_z * max_pts_each_voxel; pts_idx_of_voxels += box_idx * out_x * out_y * out_z * max_pts_each_voxel;
for (int k = 0; k < pts_num; k++){ for (int k = 0; k < pts_num; k++) {
if (pts_mask[box_idx * pts_num + k] != -1){ if (pts_mask[box_idx * pts_num + k] != -1) {
unsigned int idx_encoding = pts_mask[box_idx * pts_num + k]; unsigned int idx_encoding = pts_mask[box_idx * pts_num + k];
unsigned int x_idx = (idx_encoding >> 16) & 0xFF; unsigned int x_idx = (idx_encoding >> 16) & 0xFF;
unsigned int y_idx = (idx_encoding >> 8) & 0xFF; unsigned int y_idx = (idx_encoding >> 8) & 0xFF;
unsigned int z_idx = idx_encoding & 0xFF; unsigned int z_idx = idx_encoding & 0xFF;
unsigned int base_offset = x_idx * out_y * out_z * max_pts_each_voxel + y_idx * out_z * max_pts_each_voxel + z_idx * max_pts_each_voxel; unsigned int base_offset = x_idx * out_y * out_z * max_pts_each_voxel +
y_idx * out_z * max_pts_each_voxel +
z_idx * max_pts_each_voxel;
unsigned int cnt = pts_idx_of_voxels[base_offset]; unsigned int cnt = pts_idx_of_voxels[base_offset];
if (cnt < max_num_pts){ if (cnt < max_num_pts) {
pts_idx_of_voxels[base_offset + cnt + 1] = k; pts_idx_of_voxels[base_offset + cnt + 1] = k;
pts_idx_of_voxels[base_offset]++; pts_idx_of_voxels[base_offset]++;
} }
#ifdef DEBUG #ifdef DEBUG
printf("collect: pts_%d, idx(%d, %d, %d), idx_encoding=%x\n", printf("collect: pts_%d, idx(%d, %d, %d), idx_encoding=%x\n", k, x_idx,
k, x_idx, y_idx, z_idx, idx_encoding); y_idx, z_idx, idx_encoding);
#endif #endif
} }
} }
} }
__global__ void roiaware_maxpool3d(int boxes_num, int pts_num, int channels,
__global__ void roiaware_maxpool3d(int boxes_num, int pts_num, int channels, int max_pts_each_voxel, int out_x, int max_pts_each_voxel, int out_x, int out_y,
int out_y, int out_z, const float *pts_feature, const int *pts_idx_of_voxels, float *pooled_features, int *argmax){ int out_z, const float *pts_feature,
const int *pts_idx_of_voxels,
float *pooled_features, int *argmax) {
// params pts_feature: (npoints, C) // params pts_feature: (npoints, C)
// params pts_idx_of_voxels: (N, out_x, out_y, out_z, max_pts_each_voxel), index 0 is the counter // params pts_idx_of_voxels: (N, out_x, out_y, out_z, max_pts_each_voxel),
// params pooled_features: (N, out_x, out_y, out_z, C) // index 0 is the counter params pooled_features: (N, out_x, out_y, out_z, C)
// params argmax: (N, out_x, out_y, out_z, C) // params argmax: (N, out_x, out_y, out_z, C)
int box_idx = blockIdx.z; int box_idx = blockIdx.z;
...@@ -133,46 +144,57 @@ __global__ void roiaware_maxpool3d(int boxes_num, int pts_num, int channels, int ...@@ -133,46 +144,57 @@ __global__ void roiaware_maxpool3d(int boxes_num, int pts_num, int channels, int
int x_idx = voxel_idx_flat / (out_y * out_z); int x_idx = voxel_idx_flat / (out_y * out_z);
int y_idx = (voxel_idx_flat - x_idx * (out_y * out_z)) / out_z; int y_idx = (voxel_idx_flat - x_idx * (out_y * out_z)) / out_z;
int z_idx = voxel_idx_flat % out_z; int z_idx = voxel_idx_flat % out_z;
if (box_idx >= boxes_num || channel_idx >= channels|| x_idx >= out_x || y_idx >= out_y || z_idx >= out_z) return; if (box_idx >= boxes_num || channel_idx >= channels || x_idx >= out_x ||
y_idx >= out_y || z_idx >= out_z)
return;
#ifdef DEBUG #ifdef DEBUG
printf("src pts_idx_of_voxels: (%p, ), argmax: %p\n", pts_idx_of_voxels, argmax); printf("src pts_idx_of_voxels: (%p, ), argmax: %p\n", pts_idx_of_voxels,
argmax);
#endif #endif
int offset_base = x_idx * out_y * out_z + y_idx * out_z + z_idx; int offset_base = x_idx * out_y * out_z + y_idx * out_z + z_idx;
pts_idx_of_voxels += box_idx * out_x * out_y * out_z * max_pts_each_voxel + offset_base * max_pts_each_voxel; pts_idx_of_voxels += box_idx * out_x * out_y * out_z * max_pts_each_voxel +
pooled_features += box_idx * out_x * out_y * out_z * channels + offset_base * channels + channel_idx; offset_base * max_pts_each_voxel;
argmax += box_idx * out_x * out_y * out_z * channels + offset_base * channels + channel_idx; pooled_features += box_idx * out_x * out_y * out_z * channels +
offset_base * channels + channel_idx;
argmax += box_idx * out_x * out_y * out_z * channels +
offset_base * channels + channel_idx;
int argmax_idx = -1; int argmax_idx = -1;
float max_val = -1e50; float max_val = -1e50;
int total_pts = pts_idx_of_voxels[0]; int total_pts = pts_idx_of_voxels[0];
for (int k = 1; k <= total_pts; k++){ for (int k = 1; k <= total_pts; k++) {
if (pts_feature[pts_idx_of_voxels[k] * channels + channel_idx] > max_val){ if (pts_feature[pts_idx_of_voxels[k] * channels + channel_idx] > max_val) {
max_val = pts_feature[pts_idx_of_voxels[k] * channels + channel_idx]; max_val = pts_feature[pts_idx_of_voxels[k] * channels + channel_idx];
argmax_idx = pts_idx_of_voxels[k]; argmax_idx = pts_idx_of_voxels[k];
} }
} }
if (argmax_idx != -1){ if (argmax_idx != -1) {
pooled_features[0] = max_val; pooled_features[0] = max_val;
} }
argmax[0] = argmax_idx; argmax[0] = argmax_idx;
#ifdef DEBUG #ifdef DEBUG
printf("channel_%d idx(%d, %d, %d), argmax_idx=(%d, %.3f), total=%d, after pts_idx: %p, argmax: (%p, %d)\n", printf(
channel_idx, x_idx, y_idx, z_idx, argmax_idx, max_val, total_pts, pts_idx_of_voxels, argmax, argmax_idx); "channel_%d idx(%d, %d, %d), argmax_idx=(%d, %.3f), total=%d, after "
"pts_idx: %p, argmax: (%p, %d)\n",
channel_idx, x_idx, y_idx, z_idx, argmax_idx, max_val, total_pts,
pts_idx_of_voxels, argmax, argmax_idx);
#endif #endif
} }
__global__ void roiaware_avgpool3d(int boxes_num, int pts_num, int channels,
__global__ void roiaware_avgpool3d(int boxes_num, int pts_num, int channels, int max_pts_each_voxel, int out_x, int max_pts_each_voxel, int out_x, int out_y,
int out_y, int out_z, const float *pts_feature, const int *pts_idx_of_voxels, float *pooled_features){ int out_z, const float *pts_feature,
const int *pts_idx_of_voxels,
float *pooled_features) {
// params pts_feature: (npoints, C) // params pts_feature: (npoints, C)
// params pts_idx_of_voxels: (N, out_x, out_y, out_z, max_pts_each_voxel), index 0 is the counter // params pts_idx_of_voxels: (N, out_x, out_y, out_z, max_pts_each_voxel),
// params pooled_features: (N, out_x, out_y, out_z, C) // index 0 is the counter params pooled_features: (N, out_x, out_y, out_z, C)
// params argmax: (N, out_x, out_y, out_z, C) // params argmax: (N, out_x, out_y, out_z, C)
int box_idx = blockIdx.z; int box_idx = blockIdx.z;
...@@ -182,28 +204,34 @@ __global__ void roiaware_avgpool3d(int boxes_num, int pts_num, int channels, int ...@@ -182,28 +204,34 @@ __global__ void roiaware_avgpool3d(int boxes_num, int pts_num, int channels, int
int x_idx = voxel_idx_flat / (out_y * out_z); int x_idx = voxel_idx_flat / (out_y * out_z);
int y_idx = (voxel_idx_flat - x_idx * (out_y * out_z)) / out_z; int y_idx = (voxel_idx_flat - x_idx * (out_y * out_z)) / out_z;
int z_idx = voxel_idx_flat % out_z; int z_idx = voxel_idx_flat % out_z;
if (box_idx >= boxes_num || channel_idx >= channels|| x_idx >= out_x || y_idx >= out_y || z_idx >= out_z) return; if (box_idx >= boxes_num || channel_idx >= channels || x_idx >= out_x ||
y_idx >= out_y || z_idx >= out_z)
return;
int offset_base = x_idx * out_y * out_z + y_idx * out_z + z_idx; int offset_base = x_idx * out_y * out_z + y_idx * out_z + z_idx;
pts_idx_of_voxels += box_idx * out_x * out_y * out_z * max_pts_each_voxel + offset_base * max_pts_each_voxel; pts_idx_of_voxels += box_idx * out_x * out_y * out_z * max_pts_each_voxel +
pooled_features += box_idx * out_x * out_y * out_z * channels + offset_base * channels + channel_idx; offset_base * max_pts_each_voxel;
pooled_features += box_idx * out_x * out_y * out_z * channels +
offset_base * channels + channel_idx;
float sum_val = 0; float sum_val = 0;
int total_pts = pts_idx_of_voxels[0]; int total_pts = pts_idx_of_voxels[0];
for (int k = 1; k <= total_pts; k++){ for (int k = 1; k <= total_pts; k++) {
sum_val += pts_feature[pts_idx_of_voxels[k] * channels + channel_idx]; sum_val += pts_feature[pts_idx_of_voxels[k] * channels + channel_idx];
} }
if (total_pts > 0){ if (total_pts > 0) {
pooled_features[0] = sum_val / total_pts; pooled_features[0] = sum_val / total_pts;
} }
} }
void roiaware_pool3d_launcher(int boxes_num, int pts_num, int channels,
int max_pts_each_voxel, int out_x, int out_y,
void roiaware_pool3d_launcher(int boxes_num, int pts_num, int channels, int max_pts_each_voxel, int out_x, int out_y, int out_z, int out_z, const float *rois, const float *pts,
const float *rois, const float *pts, const float *pts_feature, int *argmax, int *pts_idx_of_voxels, float *pooled_features, int pool_method){ const float *pts_feature, int *argmax,
int *pts_idx_of_voxels, float *pooled_features,
int pool_method) {
// params rois: (N, 7) [x, y, z, w, l, h, rz] in LiDAR coordinate // params rois: (N, 7) [x, y, z, w, l, h, rz] in LiDAR coordinate
// params pts: (npoints, 3) [x, y, z] in LiDAR coordinate // params pts: (npoints, 3) [x, y, z] in LiDAR coordinate
// params pts_feature: (npoints, C) // params pts_feature: (npoints, C)
...@@ -218,25 +246,28 @@ void roiaware_pool3d_launcher(int boxes_num, int pts_num, int channels, int max_ ...@@ -218,25 +246,28 @@ void roiaware_pool3d_launcher(int boxes_num, int pts_num, int channels, int max_
dim3 blocks_mask(DIVUP(pts_num, THREADS_PER_BLOCK), boxes_num); dim3 blocks_mask(DIVUP(pts_num, THREADS_PER_BLOCK), boxes_num);
dim3 threads(THREADS_PER_BLOCK); dim3 threads(THREADS_PER_BLOCK);
generate_pts_mask_for_box3d<<<blocks_mask, threads>>>(boxes_num, pts_num, out_x, out_y, out_z, rois, pts, pts_mask); generate_pts_mask_for_box3d<<<blocks_mask, threads>>>(
boxes_num, pts_num, out_x, out_y, out_z, rois, pts, pts_mask);
// TODO: Merge the collect and pool functions, SS // TODO: Merge the collect and pool functions, SS
dim3 blocks_collect(DIVUP(boxes_num, THREADS_PER_BLOCK)); dim3 blocks_collect(DIVUP(boxes_num, THREADS_PER_BLOCK));
collect_inside_pts_for_box3d<<<blocks_collect, threads>>>(boxes_num, pts_num, max_pts_each_voxel, collect_inside_pts_for_box3d<<<blocks_collect, threads>>>(
out_x, out_y, out_z, pts_mask, pts_idx_of_voxels); boxes_num, pts_num, max_pts_each_voxel, out_x, out_y, out_z, pts_mask,
pts_idx_of_voxels);
dim3 blocks_pool(DIVUP(out_x * out_y * out_z, THREADS_PER_BLOCK), channels, boxes_num);
if (pool_method == 0){ dim3 blocks_pool(DIVUP(out_x * out_y * out_z, THREADS_PER_BLOCK), channels,
roiaware_maxpool3d<<<blocks_pool, threads>>>(boxes_num, pts_num, channels, max_pts_each_voxel, out_x, out_y, out_z, boxes_num);
if (pool_method == 0) {
roiaware_maxpool3d<<<blocks_pool, threads>>>(
boxes_num, pts_num, channels, max_pts_each_voxel, out_x, out_y, out_z,
pts_feature, pts_idx_of_voxels, pooled_features, argmax); pts_feature, pts_idx_of_voxels, pooled_features, argmax);
} } else if (pool_method == 1) {
else if (pool_method == 1){ roiaware_avgpool3d<<<blocks_pool, threads>>>(
roiaware_avgpool3d<<<blocks_pool, threads>>>(boxes_num, pts_num, channels, max_pts_each_voxel, out_x, out_y, out_z, boxes_num, pts_num, channels, max_pts_each_voxel, out_x, out_y, out_z,
pts_feature, pts_idx_of_voxels, pooled_features); pts_feature, pts_idx_of_voxels, pooled_features);
} }
cudaFree(pts_mask); cudaFree(pts_mask);
#ifdef DEBUG #ifdef DEBUG
...@@ -244,9 +275,11 @@ void roiaware_pool3d_launcher(int boxes_num, int pts_num, int channels, int max_ ...@@ -244,9 +275,11 @@ void roiaware_pool3d_launcher(int boxes_num, int pts_num, int channels, int max_
#endif #endif
} }
__global__ void roiaware_maxpool3d_backward(int boxes_num, int channels,
__global__ void roiaware_maxpool3d_backward(int boxes_num, int channels, int out_x, int out_y, int out_z, int out_x, int out_y, int out_z,
const int *argmax, const float *grad_out, float *grad_in){ const int *argmax,
const float *grad_out,
float *grad_in) {
// params argmax: (N, out_x, out_y, out_z, C) // params argmax: (N, out_x, out_y, out_z, C)
// params grad_out: (N, out_x, out_y, out_z, C) // params grad_out: (N, out_x, out_y, out_z, C)
// params grad_in: (npoints, C), return value // params grad_in: (npoints, C), return value
...@@ -258,20 +291,27 @@ __global__ void roiaware_maxpool3d_backward(int boxes_num, int channels, int out ...@@ -258,20 +291,27 @@ __global__ void roiaware_maxpool3d_backward(int boxes_num, int channels, int out
int x_idx = voxel_idx_flat / (out_y * out_z); int x_idx = voxel_idx_flat / (out_y * out_z);
int y_idx = (voxel_idx_flat - x_idx * (out_y * out_z)) / out_z; int y_idx = (voxel_idx_flat - x_idx * (out_y * out_z)) / out_z;
int z_idx = voxel_idx_flat % out_z; int z_idx = voxel_idx_flat % out_z;
if (box_idx >= boxes_num || channel_idx >= channels|| x_idx >= out_x || y_idx >= out_y || z_idx >= out_z) return; if (box_idx >= boxes_num || channel_idx >= channels || x_idx >= out_x ||
y_idx >= out_y || z_idx >= out_z)
return;
int offset_base = x_idx * out_y * out_z + y_idx * out_z + z_idx; int offset_base = x_idx * out_y * out_z + y_idx * out_z + z_idx;
argmax += box_idx * out_x * out_y * out_z * channels + offset_base * channels + channel_idx; argmax += box_idx * out_x * out_y * out_z * channels +
grad_out += box_idx * out_x * out_y * out_z * channels + offset_base * channels + channel_idx; offset_base * channels + channel_idx;
grad_out += box_idx * out_x * out_y * out_z * channels +
offset_base * channels + channel_idx;
if (argmax[0] == -1) return; if (argmax[0] == -1) return;
atomicAdd(grad_in + argmax[0] * channels + channel_idx, grad_out[0] * 1); atomicAdd(grad_in + argmax[0] * channels + channel_idx, grad_out[0] * 1);
} }
__global__ void roiaware_avgpool3d_backward(int boxes_num, int channels,
__global__ void roiaware_avgpool3d_backward(int boxes_num, int channels, int out_x, int out_y, int out_z, int out_x, int out_y, int out_z,
int max_pts_each_voxel, const int *pts_idx_of_voxels, const float *grad_out, float *grad_in){ int max_pts_each_voxel,
const int *pts_idx_of_voxels,
const float *grad_out,
float *grad_in) {
// params pts_idx_of_voxels: (N, out_x, out_y, out_z, max_pts_each_voxel) // params pts_idx_of_voxels: (N, out_x, out_y, out_z, max_pts_each_voxel)
// params grad_out: (N, out_x, out_y, out_z, C) // params grad_out: (N, out_x, out_y, out_z, C)
// params grad_in: (npoints, C), return value // params grad_in: (npoints, C), return value
...@@ -283,41 +323,45 @@ __global__ void roiaware_avgpool3d_backward(int boxes_num, int channels, int out ...@@ -283,41 +323,45 @@ __global__ void roiaware_avgpool3d_backward(int boxes_num, int channels, int out
int x_idx = voxel_idx_flat / (out_y * out_z); int x_idx = voxel_idx_flat / (out_y * out_z);
int y_idx = (voxel_idx_flat - x_idx * (out_y * out_z)) / out_z; int y_idx = (voxel_idx_flat - x_idx * (out_y * out_z)) / out_z;
int z_idx = voxel_idx_flat % out_z; int z_idx = voxel_idx_flat % out_z;
if (box_idx >= boxes_num || channel_idx >= channels|| x_idx >= out_x || y_idx >= out_y || z_idx >= out_z) return; if (box_idx >= boxes_num || channel_idx >= channels || x_idx >= out_x ||
y_idx >= out_y || z_idx >= out_z)
return;
int offset_base = x_idx * out_y * out_z + y_idx * out_z + z_idx; int offset_base = x_idx * out_y * out_z + y_idx * out_z + z_idx;
pts_idx_of_voxels += box_idx * out_x * out_y * out_z * max_pts_each_voxel + offset_base * max_pts_each_voxel; pts_idx_of_voxels += box_idx * out_x * out_y * out_z * max_pts_each_voxel +
grad_out += box_idx * out_x * out_y * out_z * channels + offset_base * channels + channel_idx; offset_base * max_pts_each_voxel;
grad_out += box_idx * out_x * out_y * out_z * channels +
offset_base * channels + channel_idx;
int total_pts = pts_idx_of_voxels[0]; int total_pts = pts_idx_of_voxels[0];
float cur_grad = 1 / fmaxf(float(total_pts), 1.0); float cur_grad = 1 / fmaxf(float(total_pts), 1.0);
for (int k = 1; k <= total_pts; k++){ for (int k = 1; k <= total_pts; k++) {
atomicAdd(grad_in + pts_idx_of_voxels[k] * channels + channel_idx, grad_out[0] * cur_grad); atomicAdd(grad_in + pts_idx_of_voxels[k] * channels + channel_idx,
grad_out[0] * cur_grad);
} }
} }
void roiaware_pool3d_backward_launcher(int boxes_num, int out_x, int out_y,
int out_z, int channels,
void roiaware_pool3d_backward_launcher(int boxes_num, int out_x, int out_y, int out_z, int channels, int max_pts_each_voxel, int max_pts_each_voxel,
const int *pts_idx_of_voxels, const int *argmax, const float *grad_out, float *grad_in, int pool_method){ const int *pts_idx_of_voxels,
const int *argmax, const float *grad_out,
float *grad_in, int pool_method) {
// params pts_idx_of_voxels: (N, out_x, out_y, out_z, max_pts_each_voxel) // params pts_idx_of_voxels: (N, out_x, out_y, out_z, max_pts_each_voxel)
// params argmax: (N, out_x, out_y, out_z, C) // params argmax: (N, out_x, out_y, out_z, C)
// params grad_out: (N, out_x, out_y, out_z, C) // params grad_out: (N, out_x, out_y, out_z, C)
// params grad_in: (npoints, C), return value // params grad_in: (npoints, C), return value
// params pool_method: 0: max_pool, 1: avg_pool // params pool_method: 0: max_pool, 1: avg_pool
dim3 blocks(DIVUP(out_x * out_y * out_z, THREADS_PER_BLOCK), channels, boxes_num); dim3 blocks(DIVUP(out_x * out_y * out_z, THREADS_PER_BLOCK), channels,
boxes_num);
dim3 threads(THREADS_PER_BLOCK); dim3 threads(THREADS_PER_BLOCK);
if (pool_method == 0){ if (pool_method == 0) {
roiaware_maxpool3d_backward<<<blocks, threads>>>( roiaware_maxpool3d_backward<<<blocks, threads>>>(
boxes_num, channels, out_x, out_y, out_z, argmax, grad_out, grad_in boxes_num, channels, out_x, out_y, out_z, argmax, grad_out, grad_in);
); } else if (pool_method == 1) {
}
else if (pool_method == 1){
roiaware_avgpool3d_backward<<<blocks, threads>>>( roiaware_avgpool3d_backward<<<blocks, threads>>>(
boxes_num, channels, out_x, out_y, out_z, max_pts_each_voxel, pts_idx_of_voxels, grad_out, grad_in boxes_num, channels, out_x, out_y, out_z, max_pts_each_voxel,
); pts_idx_of_voxels, grad_out, grad_in);
} }
} }
from mmcv.cnn import build_norm_layer from mmcv.cnn import build_norm_layer
from torch import nn from torch import nn
import mmdet3d.ops.spconv as spconv
from mmdet.models.backbones.resnet import BasicBlock, Bottleneck from mmdet.models.backbones.resnet import BasicBlock, Bottleneck
from . import spconv
def conv3x3(in_planes, out_planes, stride=1, indice_key=None): def conv3x3(in_planes, out_planes, stride=1, indice_key=None):
......
import torch
from mmdet3d.core.bbox.assigners import MaxIoUAssigner
from mmdet3d.core.bbox.samplers import IoUNegPiecewiseSampler
def test_iou_piecewise_sampler():
assigner = MaxIoUAssigner(
pos_iou_thr=0.55,
neg_iou_thr=0.55,
min_pos_iou=0.55,
ignore_iof_thr=-1,
iou_calculator=dict(type='BboxOverlaps3D', coordinate='lidar'))
bboxes = torch.tensor(
[[32, 32, 16, 8, 38, 42, -0.3], [32, 32, 16, 8, 38, 42, -0.3],
[32, 32, 16, 8, 38, 42, -0.3], [32, 32, 16, 8, 38, 42, -0.3],
[0, 0, 0, 10, 10, 10, 0.2], [10, 10, 10, 20, 20, 15, 0.6],
[5, 5, 5, 15, 15, 15, 0.7], [5, 5, 5, 15, 15, 15, 0.7],
[5, 5, 5, 15, 15, 15, 0.7], [32, 32, 16, 8, 38, 42, -0.3],
[32, 32, 16, 8, 38, 42, -0.3], [32, 32, 16, 8, 38, 42, -0.3]],
dtype=torch.float32).cuda()
gt_bboxes = torch.tensor(
[[0, 0, 0, 10, 10, 9, 0.2], [5, 10, 10, 20, 20, 15, 0.6]],
dtype=torch.float32).cuda()
gt_labels = torch.tensor([1, 1], dtype=torch.int64).cuda()
assign_result = assigner.assign(bboxes, gt_bboxes, gt_labels=gt_labels)
sampler = IoUNegPiecewiseSampler(
num=10,
pos_fraction=0.55,
neg_piece_fractions=[0.8, 0.2],
neg_iou_piece_thrs=[0.55, 0.1],
neg_pos_ub=-1,
add_gt_as_proposals=False)
sample_result = sampler.sample(assign_result, bboxes, gt_bboxes, gt_labels)
assert sample_result.pos_inds == 4
assert len(sample_result.pos_bboxes) == len(sample_result.pos_inds)
assert len(sample_result.neg_bboxes) == len(sample_result.neg_inds)
#!/usr/bin/env bash
CONFIG=$1
CHECKPOINT=$2
GPUS=$3
PORT=${PORT:-29500}
PYTHONPATH="$(dirname $0)/..":$PYTHONPATH \
python -m torch.distributed.launch --nproc_per_node=$GPUS --master_port=$PORT \
$(dirname "$0")/test.py $CONFIG $CHECKPOINT --launcher pytorch ${@:4}
#!/usr/bin/env bash #!/usr/bin/env bash
PYTHON=${PYTHON:-"python"}
CONFIG=$1 CONFIG=$1
GPUS=$2 GPUS=$2
PORT=${PORT:-29500}
$PYTHON -m torch.distributed.launch --nproc_per_node=$GPUS \ PYTHONPATH="$(dirname $0)/..":$PYTHONPATH \
python -m torch.distributed.launch --nproc_per_node=$GPUS --master_port=$PORT \
$(dirname "$0")/train.py $CONFIG --launcher pytorch ${@:3} $(dirname "$0")/train.py $CONFIG --launcher pytorch ${@:3}
#!/usr/bin/env bash #!/usr/bin/env bash
set -x set -x
export PYTHONPATH=`pwd`:$PYTHONPATH
PARTITION=$1 PARTITION=$1
JOB_NAME=$2 JOB_NAME=$2
...@@ -9,14 +8,17 @@ CONFIG=$3 ...@@ -9,14 +8,17 @@ CONFIG=$3
CHECKPOINT=$4 CHECKPOINT=$4
GPUS=${GPUS:-8} GPUS=${GPUS:-8}
GPUS_PER_NODE=${GPUS_PER_NODE:-8} GPUS_PER_NODE=${GPUS_PER_NODE:-8}
CPUS_PER_TASK=${CPUS_PER_TASK:-5}
PY_ARGS=${@:5} PY_ARGS=${@:5}
SRUN_ARGS=${SRUN_ARGS:-""} SRUN_ARGS=${SRUN_ARGS:-""}
PYTHONPATH="$(dirname $0)/..":$PYTHONPATH \
srun -p ${PARTITION} \ srun -p ${PARTITION} \
--job-name=${JOB_NAME} \ --job-name=${JOB_NAME} \
--gres=gpu:${GPUS_PER_NODE} \ --gres=gpu:${GPUS_PER_NODE} \
--ntasks=${GPUS} \ --ntasks=${GPUS} \
--ntasks-per-node=${GPUS_PER_NODE} \ --ntasks-per-node=${GPUS_PER_NODE} \
--cpus-per-task=${CPUS_PER_TASK} \
--kill-on-bad-exit=1 \ --kill-on-bad-exit=1 \
${SRUN_ARGS} \ ${SRUN_ARGS} \
python -u tools/test.py ${CONFIG} ${CHECKPOINT} --launcher="slurm" ${PY_ARGS} python -u tools/test.py ${CONFIG} ${CHECKPOINT} --launcher="slurm" ${PY_ARGS}
...@@ -8,15 +8,17 @@ CONFIG=$3 ...@@ -8,15 +8,17 @@ CONFIG=$3
WORK_DIR=$4 WORK_DIR=$4
GPUS=${GPUS:-8} GPUS=${GPUS:-8}
GPUS_PER_NODE=${GPUS_PER_NODE:-8} GPUS_PER_NODE=${GPUS_PER_NODE:-8}
CPUS_PER_TASK=${CPUS_PER_TASK:-5}
SRUN_ARGS=${SRUN_ARGS:-""} SRUN_ARGS=${SRUN_ARGS:-""}
PY_ARGS=${PY_ARGS:-"--validate"} PY_ARGS=${@:5}
PYTHONPATH="$(dirname $0)/..":$PYTHONPATH \
srun -p ${PARTITION} \ srun -p ${PARTITION} \
--job-name=${JOB_NAME} \ --job-name=${JOB_NAME} \
--gres=gpu:${GPUS_PER_NODE} \ --gres=gpu:${GPUS_PER_NODE} \
--ntasks=${GPUS} \ --ntasks=${GPUS} \
--ntasks-per-node=${GPUS_PER_NODE} \ --ntasks-per-node=${GPUS_PER_NODE} \
--cpus-per-task=${CPUS_PER_TASK} \
--kill-on-bad-exit=1 \ --kill-on-bad-exit=1 \
${SRUN_ARGS} \ ${SRUN_ARGS} \
python -u tools/train.py ${CONFIG} --work-dir=${WORK_DIR} --launcher="slurm" ${PY_ARGS} python -u tools/train.py ${CONFIG} --work-dir=${WORK_DIR} --launcher="slurm" ${PY_ARGS}
Markdown is supported
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment