Commit bce7d0c3 authored by yinchimaoliang's avatar yinchimaoliang
Browse files

Merge branch 'master_temp' into indoor_pipeline

parents 1756485e 868c5fab
......@@ -93,7 +93,8 @@ class_names = ['Car']
img_norm_cfg = dict(
mean=[123.675, 116.28, 103.53], std=[58.395, 57.12, 57.375], to_rgb=True)
input_modality = dict(
use_lidar=True,
use_lidar=False,
use_lidar_reduced=True,
use_depth=False,
use_lidar_intensity=True,
use_camera=False,
......
......@@ -113,7 +113,8 @@ class_names = ['Pedestrian', 'Cyclist', 'Car']
img_norm_cfg = dict(
mean=[123.675, 116.28, 103.53], std=[58.395, 57.12, 57.375], to_rgb=True)
input_modality = dict(
use_lidar=True,
use_lidar=False,
use_lidar_reduced=True,
use_depth=False,
use_lidar_intensity=True,
use_camera=True,
......
......@@ -91,7 +91,8 @@ class_names = ['Car']
img_norm_cfg = dict(
mean=[123.675, 116.28, 103.53], std=[58.395, 57.12, 57.375], to_rgb=True)
input_modality = dict(
use_lidar=True,
use_lidar=False,
use_lidar_reduced=True,
use_depth=False,
use_lidar_intensity=True,
use_camera=True,
......
......@@ -90,7 +90,8 @@ class_names = ['Car']
img_norm_cfg = dict(
mean=[123.675, 116.28, 103.53], std=[58.395, 57.12, 57.375], to_rgb=True)
input_modality = dict(
use_lidar=True,
use_lidar=False,
use_lidar_reduced=True,
use_depth=False,
use_lidar_intensity=True,
use_camera=False,
......
......@@ -89,7 +89,8 @@ class_names = ['Car']
img_norm_cfg = dict(
mean=[123.675, 116.28, 103.53], std=[58.395, 57.12, 57.375], to_rgb=True)
input_modality = dict(
use_lidar=True,
use_lidar=False,
use_lidar_reduced=True,
use_depth=False,
use_lidar_intensity=True,
use_camera=False,
......
import torch
from mmdet3d.ops.iou3d import boxes_iou3d_gpu
from mmdet3d.ops.iou3d import boxes_iou3d_gpu_camera, boxes_iou3d_gpu_lidar
from mmdet.core.bbox import bbox_overlaps
from mmdet.core.bbox.iou_calculators.builder import IOU_CALCULATORS
from .. import box_torch_ops
......@@ -22,10 +22,18 @@ class BboxOverlapsNearest3D(object):
@IOU_CALCULATORS.register_module()
class BboxOverlaps3D(object):
"""3D IoU Calculator"""
"""3D IoU Calculator
def __call__(self, bboxes1, bboxes2, mode='iou', is_aligned=False):
return bbox_overlaps_3d(bboxes1, bboxes2, mode, is_aligned)
Args:
coordinate (str): 'camera' or 'lidar' coordinate system
"""
def __init__(self, coordinate):
assert coordinate in ['camera', 'lidar']
self.coordinate = coordinate
def __call__(self, bboxes1, bboxes2, mode='iou'):
return bbox_overlaps_3d(bboxes1, bboxes2, mode, self.coordinate)
def __repr__(self):
repr_str = self.__class__.__name__
......@@ -62,7 +70,7 @@ def bbox_overlaps_nearest_3d(bboxes1, bboxes2, mode='iou', is_aligned=False):
return ret
def bbox_overlaps_3d(bboxes1, bboxes2, mode='iou'):
def bbox_overlaps_3d(bboxes1, bboxes2, mode='iou', coordinate='camera'):
"""Calculate 3D IoU using cuda implementation
Args:
......@@ -70,6 +78,7 @@ def bbox_overlaps_3d(bboxes1, bboxes2, mode='iou'):
bboxes2: Tensor, shape (M, 7) [x, y, z, h, w, l, ry]
mode: mode (str): "iou" (intersection over union) or
iof (intersection over foreground).
coordinate (str): 'camera' or 'lidar' coordinate system
Return:
iou: (M, N) not support aligned mode currently
......@@ -77,4 +86,11 @@ def bbox_overlaps_3d(bboxes1, bboxes2, mode='iou'):
# TODO: check the input dimension meanings,
# this is inconsistent with that in bbox_overlaps_nearest_3d
assert bboxes1.size(-1) == bboxes2.size(-1) == 7
return boxes_iou3d_gpu(bboxes1, bboxes2, mode)
assert coordinate in ['camera', 'lidar']
if coordinate == 'camera':
return boxes_iou3d_gpu_camera(bboxes1, bboxes2, mode)
elif coordinate == 'lidar':
return boxes_iou3d_gpu_lidar(bboxes1, bboxes2, mode)
else:
raise NotImplementedError
......@@ -3,9 +3,10 @@ from mmdet.core.bbox.samplers import (BaseSampler, CombinedSampler,
IoUBalancedNegSampler, OHEMSampler,
PseudoSampler, RandomSampler,
SamplingResult)
from .iou_neg_piecewise_sampler import IoUNegPiecewiseSampler
__all__ = [
'BaseSampler', 'PseudoSampler', 'RandomSampler',
'InstanceBalancedPosSampler', 'IoUBalancedNegSampler', 'CombinedSampler',
'OHEMSampler', 'SamplingResult'
'OHEMSampler', 'SamplingResult', 'IoUNegPiecewiseSampler'
]
import torch
from mmdet.core.bbox.builder import BBOX_SAMPLERS
from . import RandomSampler, SamplingResult
@BBOX_SAMPLERS.register_module
class IoUNegPiecewiseSampler(RandomSampler):
"""IoU Piece-wise Sampling
Sampling negtive proposals according to a list of IoU thresholds.
The negtive proposals are divided into several pieces according
to `neg_iou_piece_thrs`. And the ratio of each piece is indicated
by `neg_piece_fractions`.
Args:
num (int): number of proposals.
pos_fraction (float): the fraction of positive proposals.
neg_piece_fractions (list): a list contains fractions that indicates
the ratio of each piece of total negtive samplers.
neg_iou_piece_thrs (list): a list contains IoU thresholds that
indicate the upper bound of this piece.
neg_pos_ub (float): the total ratio to limit the upper bound
number of negtive samples
add_gt_as_proposals (bool): whether to add gt as proposals.
"""
def __init__(self,
num,
pos_fraction=None,
neg_piece_fractions=None,
neg_iou_piece_thrs=None,
neg_pos_ub=-1,
add_gt_as_proposals=False,
return_iou=False):
super(IoUNegPiecewiseSampler,
self).__init__(num, pos_fraction, neg_pos_ub,
add_gt_as_proposals)
assert isinstance(neg_piece_fractions, list)
assert len(neg_piece_fractions) == len(neg_iou_piece_thrs)
self.neg_piece_fractions = neg_piece_fractions
self.neg_iou_thr = neg_iou_piece_thrs
self.return_iou = return_iou
self.neg_piece_num = len(self.neg_piece_fractions)
def _sample_pos(self, assign_result, num_expected, **kwargs):
"""Randomly sample some positive samples."""
pos_inds = torch.nonzero(assign_result.gt_inds > 0, as_tuple=False)
if pos_inds.numel() != 0:
pos_inds = pos_inds.squeeze(1)
if pos_inds.numel() <= num_expected:
return pos_inds
else:
return self.random_choice(pos_inds, num_expected)
def _sample_neg(self, assign_result, num_expected, **kwargs):
neg_inds = torch.nonzero(assign_result.gt_inds == 0)
if neg_inds.numel() != 0:
neg_inds = neg_inds.squeeze(1)
if len(neg_inds) <= num_expected:
return neg_inds
else:
neg_inds_choice = neg_inds.new_zeros([0])
extend_num = 0
max_overlaps = assign_result.max_overlaps[neg_inds]
for piece_inds in range(self.neg_piece_num):
if piece_inds == self.neg_piece_num - 1: # for the last piece
piece_expected_num = num_expected - len(neg_inds_choice)
min_iou_thr = 0
else:
# if the numbers of negative samplers in previous
# pieces are less than the expected number, extend
# the same number in the current piece.
piece_expected_num = int(
num_expected *
self.neg_piece_fractions[piece_inds]) + extend_num
min_iou_thr = self.neg_iou_thr[piece_inds + 1]
max_iou_thr = self.neg_iou_thr[piece_inds]
piece_neg_inds = torch.nonzero(
(max_overlaps >= min_iou_thr)
& (max_overlaps < max_iou_thr)).view(-1)
if len(piece_neg_inds) < piece_expected_num:
neg_inds_choice = torch.cat(
[neg_inds_choice, neg_inds[piece_neg_inds]], dim=0)
extend_num += piece_expected_num - len(piece_neg_inds)
else:
piece_choice = self.random_choice(piece_neg_inds,
piece_expected_num)
neg_inds_choice = torch.cat(
[neg_inds_choice, neg_inds[piece_choice]], dim=0)
extend_num = 0
return neg_inds_choice
def sample(self,
assign_result,
bboxes,
gt_bboxes,
gt_labels=None,
**kwargs):
"""Sample positive and negative bboxes.
This is a simple implementation of bbox sampling given candidates,
assigning results and ground truth bboxes.
Args:
assign_result (:obj:`AssignResult`): Bbox assigning results.
bboxes (Tensor): Boxes to be sampled from.
gt_bboxes (Tensor): Ground truth bboxes.
gt_labels (Tensor, optional): Class labels of ground truth bboxes.
Returns:
:obj:`SamplingResult`: Sampling result.
"""
if len(bboxes.shape) < 2:
bboxes = bboxes[None, :]
gt_flags = bboxes.new_zeros((bboxes.shape[0], ), dtype=torch.bool)
if self.add_gt_as_proposals and len(gt_bboxes) > 0:
if gt_labels is None:
raise ValueError(
'gt_labels must be given when add_gt_as_proposals is True')
bboxes = torch.cat([gt_bboxes, bboxes], dim=0)
assign_result.add_gt_(gt_labels)
gt_ones = bboxes.new_ones(gt_bboxes.shape[0], dtype=torch.bool)
gt_flags = torch.cat([gt_ones, gt_flags])
num_expected_pos = int(self.num * self.pos_fraction)
pos_inds = self.pos_sampler._sample_pos(
assign_result, num_expected_pos, bboxes=bboxes, **kwargs)
# We found that sampled indices have duplicated items occasionally.
# (may be a bug of PyTorch)
pos_inds = pos_inds.unique()
num_sampled_pos = pos_inds.numel()
num_expected_neg = self.num - num_sampled_pos
if self.neg_pos_ub >= 0:
_pos = max(1, num_sampled_pos)
neg_upper_bound = int(self.neg_pos_ub * _pos)
if num_expected_neg > neg_upper_bound:
num_expected_neg = neg_upper_bound
neg_inds = self.neg_sampler._sample_neg(
assign_result, num_expected_neg, bboxes=bboxes, **kwargs)
neg_inds = neg_inds.unique()
sampling_result = SamplingResult(pos_inds, neg_inds, bboxes, gt_bboxes,
assign_result, gt_flags)
if self.return_iou:
# PartA2 needs iou score to regression.
sampling_result.iou = assign_result.max_overlaps[torch.cat(
[pos_inds, neg_inds])]
sampling_result.iou.detach_()
return sampling_result
......@@ -184,6 +184,8 @@ class KittiDataset(torch_data.Dataset):
if self.modality['use_depth'] and self.modality['use_lidar']:
points = self.get_lidar_depth_reduced(sample_idx)
elif self.modality['use_lidar']:
points = self.get_lidar(sample_idx)
elif self.modality['use_lidar_reduced']:
points = self.get_lidar_reduced(sample_idx)
elif self.modality['use_depth']:
points = self.get_pure_depth_reduced(sample_idx)
......@@ -238,8 +240,6 @@ class KittiDataset(torch_data.Dataset):
axis=1).astype(np.float32)
difficulty = annos['difficulty']
# this change gt_bboxes_3d to velodyne coordinates
import pdb
pdb.set_trace()
gt_bboxes_3d = box_np_ops.box_camera_to_lidar(gt_bboxes_3d, rect,
Trv2c)
# only center format is allowed. so we need to convert
......
import warnings
import numba
import numpy as np
from numba.errors import NumbaPerformanceWarning
from mmdet3d.core.bbox import box_np_ops
warnings.filterwarnings("ignore", category=NumbaPerformanceWarning)
@numba.njit
def _rotation_box2d_jit_(corners, angle, rot_mat_T):
......
from .iou3d_utils import (boxes_iou3d_gpu, boxes_iou_bev, nms_gpu,
nms_normal_gpu)
from .iou3d_utils import (boxes_iou3d_gpu_camera, boxes_iou3d_gpu_lidar,
boxes_iou_bev, nms_gpu, nms_normal_gpu)
__all__ = ['boxes_iou_bev', 'boxes_iou3d_gpu', 'nms_gpu', 'nms_normal_gpu']
__all__ = [
'boxes_iou_bev', 'boxes_iou3d_gpu_camera', 'nms_gpu', 'nms_normal_gpu',
'boxes_iou3d_gpu_lidar'
]
......@@ -20,17 +20,22 @@ def boxes_iou_bev(boxes_a, boxes_b):
return ans_iou
def boxes_iou3d_gpu(boxes_a, boxes_b, mode='iou'):
"""
:param boxes_a: (N, 7) [x, y, z, h, w, l, ry]
:param boxes_b: (M, 7) [x, y, z, h, w, l, ry]
:param mode "iou" (intersection over union) or iof (intersection over
def boxes_iou3d_gpu_camera(boxes_a, boxes_b, mode='iou'):
"""Calculate 3d iou of boxes in camera coordinate
Args:
boxes_a (FloatTensor): (N, 7) [x, y, z, h, w, l, ry]
in LiDAR coordinate
boxes_b (FloatTensor): (M, 7) [x, y, z, h, w, l, ry]
mode (str): "iou" (intersection over union) or iof (intersection over
foreground).
:return:
ans_iou: (M, N)
Returns:
FloatTensor: (M, N)
"""
boxes_a_bev = boxes3d_to_bev_torch(boxes_a)
boxes_b_bev = boxes3d_to_bev_torch(boxes_b)
boxes_a_bev = boxes3d_to_bev_torch_camera(boxes_a)
boxes_b_bev = boxes3d_to_bev_torch_camera(boxes_b)
# bev overlap
overlaps_bev = torch.cuda.FloatTensor(
......@@ -51,15 +56,62 @@ def boxes_iou3d_gpu(boxes_a, boxes_b, mode='iou'):
# 3d iou
overlaps_3d = overlaps_bev * overlaps_h
vol_a = (boxes_a[:, 3] * boxes_a[:, 4] * boxes_a[:, 5]).view(-1, 1)
vol_b = (boxes_b[:, 3] * boxes_b[:, 4] * boxes_b[:, 5]).view(1, -1)
volume_a = (boxes_a[:, 3] * boxes_a[:, 4] * boxes_a[:, 5]).view(-1, 1)
volume_b = (boxes_b[:, 3] * boxes_b[:, 4] * boxes_b[:, 5]).view(1, -1)
if mode == 'iou':
# the clamp func is used to avoid division of 0
iou3d = overlaps_3d / torch.clamp(
vol_a + vol_b - overlaps_3d, min=1e-8)
volume_a + volume_b - overlaps_3d, min=1e-8)
else:
iou3d = overlaps_3d / torch.clamp(vol_a, min=1e-8)
iou3d = overlaps_3d / torch.clamp(volume_a, min=1e-8)
return iou3d
def boxes_iou3d_gpu_lidar(boxes_a, boxes_b, mode='iou'):
"""Calculate 3d iou of boxes in lidar coordinate
Args:
boxes_a (FloatTensor): (N, 7) [x, y, z, w, l, h, ry]
in LiDAR coordinate
boxes_b (FloatTensor): (M, 7) [x, y, z, w, l, h, ry]
mode (str): "iou" (intersection over union) or iof (intersection over
foreground).
:Returns:
FloatTensor: (M, N)
"""
boxes_a_bev = boxes3d_to_bev_torch_lidar(boxes_a)
boxes_b_bev = boxes3d_to_bev_torch_lidar(boxes_b)
# height overlap
boxes_a_height_max = (boxes_a[:, 2] + boxes_a[:, 5]).view(-1, 1)
boxes_a_height_min = boxes_a[:, 2].view(-1, 1)
boxes_b_height_max = (boxes_b[:, 2] + boxes_b[:, 5]).view(1, -1)
boxes_b_height_min = boxes_b[:, 2].view(1, -1)
# bev overlap
overlaps_bev = boxes_a.new_zeros(
torch.Size((boxes_a.shape[0], boxes_b.shape[0]))) # (N, M)
iou3d_cuda.boxes_overlap_bev_gpu(boxes_a_bev.contiguous(),
boxes_b_bev.contiguous(), overlaps_bev)
max_of_min = torch.max(boxes_a_height_min, boxes_b_height_min)
min_of_max = torch.min(boxes_a_height_max, boxes_b_height_max)
overlaps_h = torch.clamp(min_of_max - max_of_min, min=0)
# 3d iou
overlaps_3d = overlaps_bev * overlaps_h
volume_a = (boxes_a[:, 3] * boxes_a[:, 4] * boxes_a[:, 5]).view(-1, 1)
volume_b = (boxes_b[:, 3] * boxes_b[:, 4] * boxes_b[:, 5]).view(1, -1)
if mode == 'iou':
# the clamp func is used to avoid division of 0
iou3d = overlaps_3d / torch.clamp(
volume_a + volume_b - overlaps_3d, min=1e-8)
else:
iou3d = overlaps_3d / torch.clamp(volume_a, min=1e-8)
return iou3d
......@@ -98,16 +150,39 @@ def nms_normal_gpu(boxes, scores, thresh):
return order[keep[:num_out].cuda()].contiguous()
def boxes3d_to_bev_torch(boxes3d):
"""
:param boxes3d: (N, 7) [x, y, z, h, w, l, ry] in camera coords
:return:
boxes_bev: (N, 5) [x1, y1, x2, y2, ry]
def boxes3d_to_bev_torch_camera(boxes3d):
"""covert boxes3d to bev in in camera coords
Args:
boxes3d (FloartTensor): (N, 7) [x, y, z, h, w, l, ry] in camera coords
Return:
FloartTensor: (N, 5) [x1, y1, x2, y2, ry]
"""
boxes_bev = boxes3d.new(torch.Size((boxes3d.shape[0], 5)))
cu, cv = boxes3d[:, 0], boxes3d[:, 2]
half_l, half_w = boxes3d[:, 5] / 2, boxes3d[:, 4] / 2
boxes_bev[:, 0], boxes_bev[:, 1] = cu - half_l, cv - half_w
boxes_bev[:, 2], boxes_bev[:, 3] = cu + half_l, cv + half_w
boxes_bev[:, 4] = boxes3d[:, 6]
return boxes_bev
def boxes3d_to_bev_torch_lidar(boxes3d):
"""covert boxes3d to bev in in LiDAR coords
Args:
boxes3d (FloartTensor): (N, 7) [x, y, z, w, l, h, ry] in LiDAR coords
Returns:
FloartTensor: (N, 5) [x1, y1, x2, y2, ry]
"""
boxes_bev = boxes3d.new(torch.Size((boxes3d.shape[0], 5)))
x, y = boxes3d[:, 0], boxes3d[:, 1]
half_l, half_w = boxes3d[:, 4] / 2, boxes3d[:, 3] / 2
boxes_bev[:, 0], boxes_bev[:, 1] = x - half_w, y - half_l
boxes_bev[:, 2], boxes_bev[:, 3] = x + half_w, y + half_l
boxes_bev[:, 4] = boxes3d[:, 6]
return boxes_bev
......@@ -7,8 +7,8 @@
#include <assert.h>
#include <math.h>
#include <stdio.h>
#include <torch/extension.h>
#include <torch/serialize/tensor.h>
#include <torch/types.h>
#define THREADS_PER_BLOCK 256
#define DIVUP(m, n) ((m) / (n) + ((m) % (n) > 0))
......
//Modified from
//https://github.com/sshaoshuai/PCDet/blob/master/pcdet/ops/roiaware_pool3d/src/roiaware_pool3d_kernel.cu
//RoI-aware point cloud feature pooling
//Written by Shaoshuai Shi
//All Rights Reserved 2019.
// Modified from
// https://github.com/sshaoshuai/PCDet/blob/master/pcdet/ops/roiaware_pool3d/src/roiaware_pool3d_kernel.cu
// RoI-aware point cloud feature pooling
// Written by Shaoshuai Shi
// All Rights Reserved 2019.
#include <torch/serialize/tensor.h>
#include <torch/extension.h>
#include <assert.h>
#include <math.h>
#include <stdio.h>
#include <torch/serialize/tensor.h>
#include <torch/types.h>
#define THREADS_PER_BLOCK 256
#define DIVUP(m,n) ((m) / (n) + ((m) % (n) > 0))
#define DIVUP(m, n) ((m) / (n) + ((m) % (n) > 0))
// #define DEBUG
__device__ inline void lidar_to_local_coords(float shift_x, float shift_y, float rz, float &local_x, float &local_y){
// should rotate pi/2 + alpha to translate LiDAR to local
float rot_angle = rz + M_PI / 2;
float cosa = cos(rot_angle), sina = sin(rot_angle);
local_x = shift_x * cosa + shift_y * (-sina);
local_y = shift_x * sina + shift_y * cosa;
__device__ inline void lidar_to_local_coords(float shift_x, float shift_y,
float rz, float &local_x,
float &local_y) {
// should rotate pi/2 + alpha to translate LiDAR to local
float rot_angle = rz + M_PI / 2;
float cosa = cos(rot_angle), sina = sin(rot_angle);
local_x = shift_x * cosa + shift_y * (-sina);
local_y = shift_x * sina + shift_y * cosa;
}
__device__ inline int check_pt_in_box3d(const float *pt, const float *box3d, float &local_x, float &local_y){
// param pt: (x, y, z)
// param box3d: (cx, cy, cz, w, l, h, rz) in LiDAR coordinate, cz in the bottom center
float x = pt[0], y = pt[1], z = pt[2];
float cx = box3d[0], cy = box3d[1], cz = box3d[2];
float w = box3d[3], l = box3d[4], h = box3d[5], rz = box3d[6];
cz += h / 2.0; // shift to the center since cz in box3d is the bottom center
if (fabsf(z - cz) > h / 2.0) return 0;
lidar_to_local_coords(x - cx, y - cy, rz, local_x, local_y);
float in_flag = (local_x > -l / 2.0) & (local_x < l / 2.0) & (local_y > -w / 2.0) & (local_y < w / 2.0);
return in_flag;
__device__ inline int check_pt_in_box3d(const float *pt, const float *box3d,
float &local_x, float &local_y) {
// param pt: (x, y, z)
// param box3d: (cx, cy, cz, w, l, h, rz) in LiDAR coordinate, cz in the
// bottom center
float x = pt[0], y = pt[1], z = pt[2];
float cx = box3d[0], cy = box3d[1], cz = box3d[2];
float w = box3d[3], l = box3d[4], h = box3d[5], rz = box3d[6];
cz += h / 2.0; // shift to the center since cz in box3d is the bottom center
if (fabsf(z - cz) > h / 2.0) return 0;
lidar_to_local_coords(x - cx, y - cy, rz, local_x, local_y);
float in_flag = (local_x > -l / 2.0) & (local_x < l / 2.0) &
(local_y > -w / 2.0) & (local_y < w / 2.0);
return in_flag;
}
__global__ void generate_pts_mask_for_box3d(int boxes_num, int pts_num, int out_x, int out_y, int out_z,
const float *rois, const float *pts, int *pts_mask){
// params rois: (N, 7) [x, y, z, w, l, h, rz] in LiDAR coordinate
// params pts: (npoints, 3) [x, y, z]
// params pts_mask: (N, npoints): -1 means point doesnot in this box, otherwise: encode (x_idxs, y_idxs, z_idxs) by binary bit
int pt_idx = blockIdx.x * blockDim.x + threadIdx.x;
int box_idx = blockIdx.y;
if (pt_idx >= pts_num || box_idx >= boxes_num) return;
pts += pt_idx * 3;
rois += box_idx * 7;
pts_mask += box_idx * pts_num + pt_idx;
float local_x = 0, local_y = 0;
int cur_in_flag = check_pt_in_box3d(pts, rois, local_x, local_y);
pts_mask[0] = -1;
if (cur_in_flag > 0){
float local_z = pts[2] - rois[2];
float w = rois[3], l = rois[4], h = rois[5];
float x_res = l / out_x;
float y_res = w / out_y;
float z_res = h / out_z;
unsigned int x_idx = int((local_x + l / 2) / x_res);
unsigned int y_idx = int((local_y + w / 2) / y_res);
unsigned int z_idx = int(local_z / z_res);
x_idx = min(max(x_idx, 0), out_x - 1);
y_idx = min(max(y_idx, 0), out_y - 1);
z_idx = min(max(z_idx, 0), out_z - 1);
unsigned int idx_encoding = (x_idx << 16) + (y_idx << 8) + z_idx;
__global__ void generate_pts_mask_for_box3d(int boxes_num, int pts_num,
int out_x, int out_y, int out_z,
const float *rois, const float *pts,
int *pts_mask) {
// params rois: (N, 7) [x, y, z, w, l, h, rz] in LiDAR coordinate
// params pts: (npoints, 3) [x, y, z]
// params pts_mask: (N, npoints): -1 means point doesnot in this box,
// otherwise: encode (x_idxs, y_idxs, z_idxs) by binary bit
int pt_idx = blockIdx.x * blockDim.x + threadIdx.x;
int box_idx = blockIdx.y;
if (pt_idx >= pts_num || box_idx >= boxes_num) return;
pts += pt_idx * 3;
rois += box_idx * 7;
pts_mask += box_idx * pts_num + pt_idx;
float local_x = 0, local_y = 0;
int cur_in_flag = check_pt_in_box3d(pts, rois, local_x, local_y);
pts_mask[0] = -1;
if (cur_in_flag > 0) {
float local_z = pts[2] - rois[2];
float w = rois[3], l = rois[4], h = rois[5];
float x_res = l / out_x;
float y_res = w / out_y;
float z_res = h / out_z;
unsigned int x_idx = int((local_x + l / 2) / x_res);
unsigned int y_idx = int((local_y + w / 2) / y_res);
unsigned int z_idx = int(local_z / z_res);
x_idx = min(max(x_idx, 0), out_x - 1);
y_idx = min(max(y_idx, 0), out_y - 1);
z_idx = min(max(z_idx, 0), out_z - 1);
unsigned int idx_encoding = (x_idx << 16) + (y_idx << 8) + z_idx;
#ifdef DEBUG
printf("mask: pts_%d(%.3f, %.3f, %.3f), local(%.3f, %.3f, %.3f), idx(%d, %d, %d), res(%.3f, %.3f, %.3f), idx_encoding=%x\n",
pt_idx, pts[0], pts[1], pts[2], local_x, local_y, local_z, x_idx, y_idx, z_idx, x_res, y_res, z_res, idx_encoding);
printf(
"mask: pts_%d(%.3f, %.3f, %.3f), local(%.3f, %.3f, %.3f), idx(%d, %d, "
"%d), res(%.3f, %.3f, %.3f), idx_encoding=%x\n",
pt_idx, pts[0], pts[1], pts[2], local_x, local_y, local_z, x_idx, y_idx,
z_idx, x_res, y_res, z_res, idx_encoding);
#endif
pts_mask[0] = idx_encoding;
}
pts_mask[0] = idx_encoding;
}
}
__global__ void collect_inside_pts_for_box3d(int boxes_num, int pts_num, int max_pts_each_voxel,
int out_x, int out_y, int out_z, const int *pts_mask, int *pts_idx_of_voxels){
// params pts_mask: (N, npoints) 0 or 1
// params pts_idx_of_voxels: (N, out_x, out_y, out_z, max_pts_each_voxel)
int box_idx = blockIdx.x * blockDim.x + threadIdx.x;
if (box_idx >= boxes_num) return;
int max_num_pts = max_pts_each_voxel - 1; // index 0 is the counter
pts_idx_of_voxels += box_idx * out_x * out_y * out_z * max_pts_each_voxel;
for (int k = 0; k < pts_num; k++){
if (pts_mask[box_idx * pts_num + k] != -1){
unsigned int idx_encoding = pts_mask[box_idx * pts_num + k];
unsigned int x_idx = (idx_encoding >> 16) & 0xFF;
unsigned int y_idx = (idx_encoding >> 8) & 0xFF;
unsigned int z_idx = idx_encoding & 0xFF;
unsigned int base_offset = x_idx * out_y * out_z * max_pts_each_voxel + y_idx * out_z * max_pts_each_voxel + z_idx * max_pts_each_voxel;
unsigned int cnt = pts_idx_of_voxels[base_offset];
if (cnt < max_num_pts){
pts_idx_of_voxels[base_offset + cnt + 1] = k;
pts_idx_of_voxels[base_offset]++;
}
__global__ void collect_inside_pts_for_box3d(int boxes_num, int pts_num,
int max_pts_each_voxel, int out_x,
int out_y, int out_z,
const int *pts_mask,
int *pts_idx_of_voxels) {
// params pts_mask: (N, npoints) 0 or 1
// params pts_idx_of_voxels: (N, out_x, out_y, out_z, max_pts_each_voxel)
int box_idx = blockIdx.x * blockDim.x + threadIdx.x;
if (box_idx >= boxes_num) return;
int max_num_pts = max_pts_each_voxel - 1; // index 0 is the counter
pts_idx_of_voxels += box_idx * out_x * out_y * out_z * max_pts_each_voxel;
for (int k = 0; k < pts_num; k++) {
if (pts_mask[box_idx * pts_num + k] != -1) {
unsigned int idx_encoding = pts_mask[box_idx * pts_num + k];
unsigned int x_idx = (idx_encoding >> 16) & 0xFF;
unsigned int y_idx = (idx_encoding >> 8) & 0xFF;
unsigned int z_idx = idx_encoding & 0xFF;
unsigned int base_offset = x_idx * out_y * out_z * max_pts_each_voxel +
y_idx * out_z * max_pts_each_voxel +
z_idx * max_pts_each_voxel;
unsigned int cnt = pts_idx_of_voxels[base_offset];
if (cnt < max_num_pts) {
pts_idx_of_voxels[base_offset + cnt + 1] = k;
pts_idx_of_voxels[base_offset]++;
}
#ifdef DEBUG
printf("collect: pts_%d, idx(%d, %d, %d), idx_encoding=%x\n",
k, x_idx, y_idx, z_idx, idx_encoding);
printf("collect: pts_%d, idx(%d, %d, %d), idx_encoding=%x\n", k, x_idx,
y_idx, z_idx, idx_encoding);
#endif
}
}
}
}
__global__ void roiaware_maxpool3d(int boxes_num, int pts_num, int channels, int max_pts_each_voxel, int out_x,
int out_y, int out_z, const float *pts_feature, const int *pts_idx_of_voxels, float *pooled_features, int *argmax){
// params pts_feature: (npoints, C)
// params pts_idx_of_voxels: (N, out_x, out_y, out_z, max_pts_each_voxel), index 0 is the counter
// params pooled_features: (N, out_x, out_y, out_z, C)
// params argmax: (N, out_x, out_y, out_z, C)
int box_idx = blockIdx.z;
int channel_idx = blockIdx.y;
int voxel_idx_flat = blockIdx.x * blockDim.x + threadIdx.x;
int x_idx = voxel_idx_flat / (out_y * out_z);
int y_idx = (voxel_idx_flat - x_idx * (out_y * out_z)) / out_z;
int z_idx = voxel_idx_flat % out_z;
if (box_idx >= boxes_num || channel_idx >= channels|| x_idx >= out_x || y_idx >= out_y || z_idx >= out_z) return;
__global__ void roiaware_maxpool3d(int boxes_num, int pts_num, int channels,
int max_pts_each_voxel, int out_x, int out_y,
int out_z, const float *pts_feature,
const int *pts_idx_of_voxels,
float *pooled_features, int *argmax) {
// params pts_feature: (npoints, C)
// params pts_idx_of_voxels: (N, out_x, out_y, out_z, max_pts_each_voxel),
// index 0 is the counter params pooled_features: (N, out_x, out_y, out_z, C)
// params argmax: (N, out_x, out_y, out_z, C)
int box_idx = blockIdx.z;
int channel_idx = blockIdx.y;
int voxel_idx_flat = blockIdx.x * blockDim.x + threadIdx.x;
int x_idx = voxel_idx_flat / (out_y * out_z);
int y_idx = (voxel_idx_flat - x_idx * (out_y * out_z)) / out_z;
int z_idx = voxel_idx_flat % out_z;
if (box_idx >= boxes_num || channel_idx >= channels || x_idx >= out_x ||
y_idx >= out_y || z_idx >= out_z)
return;
#ifdef DEBUG
printf("src pts_idx_of_voxels: (%p, ), argmax: %p\n", pts_idx_of_voxels, argmax);
printf("src pts_idx_of_voxels: (%p, ), argmax: %p\n", pts_idx_of_voxels,
argmax);
#endif
int offset_base = x_idx * out_y * out_z + y_idx * out_z + z_idx;
pts_idx_of_voxels += box_idx * out_x * out_y * out_z * max_pts_each_voxel + offset_base * max_pts_each_voxel;
pooled_features += box_idx * out_x * out_y * out_z * channels + offset_base * channels + channel_idx;
argmax += box_idx * out_x * out_y * out_z * channels + offset_base * channels + channel_idx;
int offset_base = x_idx * out_y * out_z + y_idx * out_z + z_idx;
pts_idx_of_voxels += box_idx * out_x * out_y * out_z * max_pts_each_voxel +
offset_base * max_pts_each_voxel;
pooled_features += box_idx * out_x * out_y * out_z * channels +
offset_base * channels + channel_idx;
argmax += box_idx * out_x * out_y * out_z * channels +
offset_base * channels + channel_idx;
int argmax_idx = -1;
float max_val = -1e50;
int argmax_idx = -1;
float max_val = -1e50;
int total_pts = pts_idx_of_voxels[0];
int total_pts = pts_idx_of_voxels[0];
for (int k = 1; k <= total_pts; k++){
if (pts_feature[pts_idx_of_voxels[k] * channels + channel_idx] > max_val){
max_val = pts_feature[pts_idx_of_voxels[k] * channels + channel_idx];
argmax_idx = pts_idx_of_voxels[k];
}
for (int k = 1; k <= total_pts; k++) {
if (pts_feature[pts_idx_of_voxels[k] * channels + channel_idx] > max_val) {
max_val = pts_feature[pts_idx_of_voxels[k] * channels + channel_idx];
argmax_idx = pts_idx_of_voxels[k];
}
}
if (argmax_idx != -1){
pooled_features[0] = max_val;
}
argmax[0] = argmax_idx;
if (argmax_idx != -1) {
pooled_features[0] = max_val;
}
argmax[0] = argmax_idx;
#ifdef DEBUG
printf("channel_%d idx(%d, %d, %d), argmax_idx=(%d, %.3f), total=%d, after pts_idx: %p, argmax: (%p, %d)\n",
channel_idx, x_idx, y_idx, z_idx, argmax_idx, max_val, total_pts, pts_idx_of_voxels, argmax, argmax_idx);
printf(
"channel_%d idx(%d, %d, %d), argmax_idx=(%d, %.3f), total=%d, after "
"pts_idx: %p, argmax: (%p, %d)\n",
channel_idx, x_idx, y_idx, z_idx, argmax_idx, max_val, total_pts,
pts_idx_of_voxels, argmax, argmax_idx);
#endif
}
__global__ void roiaware_avgpool3d(int boxes_num, int pts_num, int channels, int max_pts_each_voxel, int out_x,
int out_y, int out_z, const float *pts_feature, const int *pts_idx_of_voxels, float *pooled_features){
// params pts_feature: (npoints, C)
// params pts_idx_of_voxels: (N, out_x, out_y, out_z, max_pts_each_voxel), index 0 is the counter
// params pooled_features: (N, out_x, out_y, out_z, C)
// params argmax: (N, out_x, out_y, out_z, C)
int box_idx = blockIdx.z;
int channel_idx = blockIdx.y;
int voxel_idx_flat = blockIdx.x * blockDim.x + threadIdx.x;
int x_idx = voxel_idx_flat / (out_y * out_z);
int y_idx = (voxel_idx_flat - x_idx * (out_y * out_z)) / out_z;
int z_idx = voxel_idx_flat % out_z;
if (box_idx >= boxes_num || channel_idx >= channels|| x_idx >= out_x || y_idx >= out_y || z_idx >= out_z) return;
int offset_base = x_idx * out_y * out_z + y_idx * out_z + z_idx;
pts_idx_of_voxels += box_idx * out_x * out_y * out_z * max_pts_each_voxel + offset_base * max_pts_each_voxel;
pooled_features += box_idx * out_x * out_y * out_z * channels + offset_base * channels + channel_idx;
float sum_val = 0;
int total_pts = pts_idx_of_voxels[0];
for (int k = 1; k <= total_pts; k++){
sum_val += pts_feature[pts_idx_of_voxels[k] * channels + channel_idx];
}
if (total_pts > 0){
pooled_features[0] = sum_val / total_pts;
}
__global__ void roiaware_avgpool3d(int boxes_num, int pts_num, int channels,
int max_pts_each_voxel, int out_x, int out_y,
int out_z, const float *pts_feature,
const int *pts_idx_of_voxels,
float *pooled_features) {
// params pts_feature: (npoints, C)
// params pts_idx_of_voxels: (N, out_x, out_y, out_z, max_pts_each_voxel),
// index 0 is the counter params pooled_features: (N, out_x, out_y, out_z, C)
// params argmax: (N, out_x, out_y, out_z, C)
int box_idx = blockIdx.z;
int channel_idx = blockIdx.y;
int voxel_idx_flat = blockIdx.x * blockDim.x + threadIdx.x;
int x_idx = voxel_idx_flat / (out_y * out_z);
int y_idx = (voxel_idx_flat - x_idx * (out_y * out_z)) / out_z;
int z_idx = voxel_idx_flat % out_z;
if (box_idx >= boxes_num || channel_idx >= channels || x_idx >= out_x ||
y_idx >= out_y || z_idx >= out_z)
return;
int offset_base = x_idx * out_y * out_z + y_idx * out_z + z_idx;
pts_idx_of_voxels += box_idx * out_x * out_y * out_z * max_pts_each_voxel +
offset_base * max_pts_each_voxel;
pooled_features += box_idx * out_x * out_y * out_z * channels +
offset_base * channels + channel_idx;
float sum_val = 0;
int total_pts = pts_idx_of_voxels[0];
for (int k = 1; k <= total_pts; k++) {
sum_val += pts_feature[pts_idx_of_voxels[k] * channels + channel_idx];
}
if (total_pts > 0) {
pooled_features[0] = sum_val / total_pts;
}
}
void roiaware_pool3d_launcher(int boxes_num, int pts_num, int channels, int max_pts_each_voxel, int out_x, int out_y, int out_z,
const float *rois, const float *pts, const float *pts_feature, int *argmax, int *pts_idx_of_voxels, float *pooled_features, int pool_method){
// params rois: (N, 7) [x, y, z, w, l, h, rz] in LiDAR coordinate
// params pts: (npoints, 3) [x, y, z] in LiDAR coordinate
// params pts_feature: (npoints, C)
// params argmax: (N, out_x, out_y, out_z, C)
// params pts_idx_of_voxels: (N, out_x, out_y, out_z, max_pts_each_voxel)
// params pooled_features: (N, out_x, out_y, out_z, C)
// params pool_method: 0: max_pool 1: avg_pool
int *pts_mask = NULL;
cudaMalloc(&pts_mask, boxes_num * pts_num * sizeof(int)); // (N, M)
cudaMemset(pts_mask, -1, boxes_num * pts_num * sizeof(int));
dim3 blocks_mask(DIVUP(pts_num, THREADS_PER_BLOCK), boxes_num);
dim3 threads(THREADS_PER_BLOCK);
generate_pts_mask_for_box3d<<<blocks_mask, threads>>>(boxes_num, pts_num, out_x, out_y, out_z, rois, pts, pts_mask);
// TODO: Merge the collect and pool functions, SS
dim3 blocks_collect(DIVUP(boxes_num, THREADS_PER_BLOCK));
collect_inside_pts_for_box3d<<<blocks_collect, threads>>>(boxes_num, pts_num, max_pts_each_voxel,
out_x, out_y, out_z, pts_mask, pts_idx_of_voxels);
dim3 blocks_pool(DIVUP(out_x * out_y * out_z, THREADS_PER_BLOCK), channels, boxes_num);
if (pool_method == 0){
roiaware_maxpool3d<<<blocks_pool, threads>>>(boxes_num, pts_num, channels, max_pts_each_voxel, out_x, out_y, out_z,
pts_feature, pts_idx_of_voxels, pooled_features, argmax);
}
else if (pool_method == 1){
roiaware_avgpool3d<<<blocks_pool, threads>>>(boxes_num, pts_num, channels, max_pts_each_voxel, out_x, out_y, out_z,
pts_feature, pts_idx_of_voxels, pooled_features);
}
cudaFree(pts_mask);
void roiaware_pool3d_launcher(int boxes_num, int pts_num, int channels,
int max_pts_each_voxel, int out_x, int out_y,
int out_z, const float *rois, const float *pts,
const float *pts_feature, int *argmax,
int *pts_idx_of_voxels, float *pooled_features,
int pool_method) {
// params rois: (N, 7) [x, y, z, w, l, h, rz] in LiDAR coordinate
// params pts: (npoints, 3) [x, y, z] in LiDAR coordinate
// params pts_feature: (npoints, C)
// params argmax: (N, out_x, out_y, out_z, C)
// params pts_idx_of_voxels: (N, out_x, out_y, out_z, max_pts_each_voxel)
// params pooled_features: (N, out_x, out_y, out_z, C)
// params pool_method: 0: max_pool 1: avg_pool
int *pts_mask = NULL;
cudaMalloc(&pts_mask, boxes_num * pts_num * sizeof(int)); // (N, M)
cudaMemset(pts_mask, -1, boxes_num * pts_num * sizeof(int));
dim3 blocks_mask(DIVUP(pts_num, THREADS_PER_BLOCK), boxes_num);
dim3 threads(THREADS_PER_BLOCK);
generate_pts_mask_for_box3d<<<blocks_mask, threads>>>(
boxes_num, pts_num, out_x, out_y, out_z, rois, pts, pts_mask);
// TODO: Merge the collect and pool functions, SS
dim3 blocks_collect(DIVUP(boxes_num, THREADS_PER_BLOCK));
collect_inside_pts_for_box3d<<<blocks_collect, threads>>>(
boxes_num, pts_num, max_pts_each_voxel, out_x, out_y, out_z, pts_mask,
pts_idx_of_voxels);
dim3 blocks_pool(DIVUP(out_x * out_y * out_z, THREADS_PER_BLOCK), channels,
boxes_num);
if (pool_method == 0) {
roiaware_maxpool3d<<<blocks_pool, threads>>>(
boxes_num, pts_num, channels, max_pts_each_voxel, out_x, out_y, out_z,
pts_feature, pts_idx_of_voxels, pooled_features, argmax);
} else if (pool_method == 1) {
roiaware_avgpool3d<<<blocks_pool, threads>>>(
boxes_num, pts_num, channels, max_pts_each_voxel, out_x, out_y, out_z,
pts_feature, pts_idx_of_voxels, pooled_features);
}
cudaFree(pts_mask);
#ifdef DEBUG
cudaDeviceSynchronize(); // for using printf in kernel function
cudaDeviceSynchronize(); // for using printf in kernel function
#endif
}
__global__ void roiaware_maxpool3d_backward(int boxes_num, int channels, int out_x, int out_y, int out_z,
const int *argmax, const float *grad_out, float *grad_in){
// params argmax: (N, out_x, out_y, out_z, C)
// params grad_out: (N, out_x, out_y, out_z, C)
// params grad_in: (npoints, C), return value
int box_idx = blockIdx.z;
int channel_idx = blockIdx.y;
int voxel_idx_flat = blockIdx.x * blockDim.x + threadIdx.x;
int x_idx = voxel_idx_flat / (out_y * out_z);
int y_idx = (voxel_idx_flat - x_idx * (out_y * out_z)) / out_z;
int z_idx = voxel_idx_flat % out_z;
if (box_idx >= boxes_num || channel_idx >= channels|| x_idx >= out_x || y_idx >= out_y || z_idx >= out_z) return;
int offset_base = x_idx * out_y * out_z + y_idx * out_z + z_idx;
argmax += box_idx * out_x * out_y * out_z * channels + offset_base * channels + channel_idx;
grad_out += box_idx * out_x * out_y * out_z * channels + offset_base * channels + channel_idx;
if (argmax[0] == -1) return;
atomicAdd(grad_in + argmax[0] * channels + channel_idx, grad_out[0] * 1);
__global__ void roiaware_maxpool3d_backward(int boxes_num, int channels,
int out_x, int out_y, int out_z,
const int *argmax,
const float *grad_out,
float *grad_in) {
// params argmax: (N, out_x, out_y, out_z, C)
// params grad_out: (N, out_x, out_y, out_z, C)
// params grad_in: (npoints, C), return value
int box_idx = blockIdx.z;
int channel_idx = blockIdx.y;
int voxel_idx_flat = blockIdx.x * blockDim.x + threadIdx.x;
int x_idx = voxel_idx_flat / (out_y * out_z);
int y_idx = (voxel_idx_flat - x_idx * (out_y * out_z)) / out_z;
int z_idx = voxel_idx_flat % out_z;
if (box_idx >= boxes_num || channel_idx >= channels || x_idx >= out_x ||
y_idx >= out_y || z_idx >= out_z)
return;
int offset_base = x_idx * out_y * out_z + y_idx * out_z + z_idx;
argmax += box_idx * out_x * out_y * out_z * channels +
offset_base * channels + channel_idx;
grad_out += box_idx * out_x * out_y * out_z * channels +
offset_base * channels + channel_idx;
if (argmax[0] == -1) return;
atomicAdd(grad_in + argmax[0] * channels + channel_idx, grad_out[0] * 1);
}
__global__ void roiaware_avgpool3d_backward(int boxes_num, int channels, int out_x, int out_y, int out_z,
int max_pts_each_voxel, const int *pts_idx_of_voxels, const float *grad_out, float *grad_in){
// params pts_idx_of_voxels: (N, out_x, out_y, out_z, max_pts_each_voxel)
// params grad_out: (N, out_x, out_y, out_z, C)
// params grad_in: (npoints, C), return value
int box_idx = blockIdx.z;
int channel_idx = blockIdx.y;
int voxel_idx_flat = blockIdx.x * blockDim.x + threadIdx.x;
int x_idx = voxel_idx_flat / (out_y * out_z);
int y_idx = (voxel_idx_flat - x_idx * (out_y * out_z)) / out_z;
int z_idx = voxel_idx_flat % out_z;
if (box_idx >= boxes_num || channel_idx >= channels|| x_idx >= out_x || y_idx >= out_y || z_idx >= out_z) return;
int offset_base = x_idx * out_y * out_z + y_idx * out_z + z_idx;
pts_idx_of_voxels += box_idx * out_x * out_y * out_z * max_pts_each_voxel + offset_base * max_pts_each_voxel;
grad_out += box_idx * out_x * out_y * out_z * channels + offset_base * channels + channel_idx;
int total_pts = pts_idx_of_voxels[0];
float cur_grad = 1 / fmaxf(float(total_pts), 1.0);
for (int k = 1; k <= total_pts; k++){
atomicAdd(grad_in + pts_idx_of_voxels[k] * channels + channel_idx, grad_out[0] * cur_grad);
}
__global__ void roiaware_avgpool3d_backward(int boxes_num, int channels,
int out_x, int out_y, int out_z,
int max_pts_each_voxel,
const int *pts_idx_of_voxels,
const float *grad_out,
float *grad_in) {
// params pts_idx_of_voxels: (N, out_x, out_y, out_z, max_pts_each_voxel)
// params grad_out: (N, out_x, out_y, out_z, C)
// params grad_in: (npoints, C), return value
int box_idx = blockIdx.z;
int channel_idx = blockIdx.y;
int voxel_idx_flat = blockIdx.x * blockDim.x + threadIdx.x;
int x_idx = voxel_idx_flat / (out_y * out_z);
int y_idx = (voxel_idx_flat - x_idx * (out_y * out_z)) / out_z;
int z_idx = voxel_idx_flat % out_z;
if (box_idx >= boxes_num || channel_idx >= channels || x_idx >= out_x ||
y_idx >= out_y || z_idx >= out_z)
return;
int offset_base = x_idx * out_y * out_z + y_idx * out_z + z_idx;
pts_idx_of_voxels += box_idx * out_x * out_y * out_z * max_pts_each_voxel +
offset_base * max_pts_each_voxel;
grad_out += box_idx * out_x * out_y * out_z * channels +
offset_base * channels + channel_idx;
int total_pts = pts_idx_of_voxels[0];
float cur_grad = 1 / fmaxf(float(total_pts), 1.0);
for (int k = 1; k <= total_pts; k++) {
atomicAdd(grad_in + pts_idx_of_voxels[k] * channels + channel_idx,
grad_out[0] * cur_grad);
}
}
void roiaware_pool3d_backward_launcher(int boxes_num, int out_x, int out_y, int out_z, int channels, int max_pts_each_voxel,
const int *pts_idx_of_voxels, const int *argmax, const float *grad_out, float *grad_in, int pool_method){
// params pts_idx_of_voxels: (N, out_x, out_y, out_z, max_pts_each_voxel)
// params argmax: (N, out_x, out_y, out_z, C)
// params grad_out: (N, out_x, out_y, out_z, C)
// params grad_in: (npoints, C), return value
// params pool_method: 0: max_pool, 1: avg_pool
dim3 blocks(DIVUP(out_x * out_y * out_z, THREADS_PER_BLOCK), channels, boxes_num);
dim3 threads(THREADS_PER_BLOCK);
if (pool_method == 0){
roiaware_maxpool3d_backward<<<blocks, threads>>>(
boxes_num, channels, out_x, out_y, out_z, argmax, grad_out, grad_in
);
}
else if (pool_method == 1){
roiaware_avgpool3d_backward<<<blocks, threads>>>(
boxes_num, channels, out_x, out_y, out_z, max_pts_each_voxel, pts_idx_of_voxels, grad_out, grad_in
);
}
void roiaware_pool3d_backward_launcher(int boxes_num, int out_x, int out_y,
int out_z, int channels,
int max_pts_each_voxel,
const int *pts_idx_of_voxels,
const int *argmax, const float *grad_out,
float *grad_in, int pool_method) {
// params pts_idx_of_voxels: (N, out_x, out_y, out_z, max_pts_each_voxel)
// params argmax: (N, out_x, out_y, out_z, C)
// params grad_out: (N, out_x, out_y, out_z, C)
// params grad_in: (npoints, C), return value
// params pool_method: 0: max_pool, 1: avg_pool
dim3 blocks(DIVUP(out_x * out_y * out_z, THREADS_PER_BLOCK), channels,
boxes_num);
dim3 threads(THREADS_PER_BLOCK);
if (pool_method == 0) {
roiaware_maxpool3d_backward<<<blocks, threads>>>(
boxes_num, channels, out_x, out_y, out_z, argmax, grad_out, grad_in);
} else if (pool_method == 1) {
roiaware_avgpool3d_backward<<<blocks, threads>>>(
boxes_num, channels, out_x, out_y, out_z, max_pts_each_voxel,
pts_idx_of_voxels, grad_out, grad_in);
}
}
from mmcv.cnn import build_norm_layer
from torch import nn
import mmdet3d.ops.spconv as spconv
from mmdet.models.backbones.resnet import BasicBlock, Bottleneck
from . import spconv
def conv3x3(in_planes, out_planes, stride=1, indice_key=None):
......
import torch
from mmdet3d.core.bbox.assigners import MaxIoUAssigner
from mmdet3d.core.bbox.samplers import IoUNegPiecewiseSampler
def test_iou_piecewise_sampler():
assigner = MaxIoUAssigner(
pos_iou_thr=0.55,
neg_iou_thr=0.55,
min_pos_iou=0.55,
ignore_iof_thr=-1,
iou_calculator=dict(type='BboxOverlaps3D', coordinate='lidar'))
bboxes = torch.tensor(
[[32, 32, 16, 8, 38, 42, -0.3], [32, 32, 16, 8, 38, 42, -0.3],
[32, 32, 16, 8, 38, 42, -0.3], [32, 32, 16, 8, 38, 42, -0.3],
[0, 0, 0, 10, 10, 10, 0.2], [10, 10, 10, 20, 20, 15, 0.6],
[5, 5, 5, 15, 15, 15, 0.7], [5, 5, 5, 15, 15, 15, 0.7],
[5, 5, 5, 15, 15, 15, 0.7], [32, 32, 16, 8, 38, 42, -0.3],
[32, 32, 16, 8, 38, 42, -0.3], [32, 32, 16, 8, 38, 42, -0.3]],
dtype=torch.float32).cuda()
gt_bboxes = torch.tensor(
[[0, 0, 0, 10, 10, 9, 0.2], [5, 10, 10, 20, 20, 15, 0.6]],
dtype=torch.float32).cuda()
gt_labels = torch.tensor([1, 1], dtype=torch.int64).cuda()
assign_result = assigner.assign(bboxes, gt_bboxes, gt_labels=gt_labels)
sampler = IoUNegPiecewiseSampler(
num=10,
pos_fraction=0.55,
neg_piece_fractions=[0.8, 0.2],
neg_iou_piece_thrs=[0.55, 0.1],
neg_pos_ub=-1,
add_gt_as_proposals=False)
sample_result = sampler.sample(assign_result, bboxes, gt_bboxes, gt_labels)
assert sample_result.pos_inds == 4
assert len(sample_result.pos_bboxes) == len(sample_result.pos_inds)
assert len(sample_result.neg_bboxes) == len(sample_result.neg_inds)
#!/usr/bin/env bash
CONFIG=$1
CHECKPOINT=$2
GPUS=$3
PORT=${PORT:-29500}
PYTHONPATH="$(dirname $0)/..":$PYTHONPATH \
python -m torch.distributed.launch --nproc_per_node=$GPUS --master_port=$PORT \
$(dirname "$0")/test.py $CONFIG $CHECKPOINT --launcher pytorch ${@:4}
#!/usr/bin/env bash
PYTHON=${PYTHON:-"python"}
CONFIG=$1
GPUS=$2
PORT=${PORT:-29500}
$PYTHON -m torch.distributed.launch --nproc_per_node=$GPUS \
PYTHONPATH="$(dirname $0)/..":$PYTHONPATH \
python -m torch.distributed.launch --nproc_per_node=$GPUS --master_port=$PORT \
$(dirname "$0")/train.py $CONFIG --launcher pytorch ${@:3}
#!/usr/bin/env bash
set -x
export PYTHONPATH=`pwd`:$PYTHONPATH
PARTITION=$1
JOB_NAME=$2
......@@ -9,14 +8,17 @@ CONFIG=$3
CHECKPOINT=$4
GPUS=${GPUS:-8}
GPUS_PER_NODE=${GPUS_PER_NODE:-8}
CPUS_PER_TASK=${CPUS_PER_TASK:-5}
PY_ARGS=${@:5}
SRUN_ARGS=${SRUN_ARGS:-""}
PYTHONPATH="$(dirname $0)/..":$PYTHONPATH \
srun -p ${PARTITION} \
--job-name=${JOB_NAME} \
--gres=gpu:${GPUS_PER_NODE} \
--ntasks=${GPUS} \
--ntasks-per-node=${GPUS_PER_NODE} \
--cpus-per-task=${CPUS_PER_TASK} \
--kill-on-bad-exit=1 \
${SRUN_ARGS} \
python -u tools/test.py ${CONFIG} ${CHECKPOINT} --launcher="slurm" ${PY_ARGS}
......@@ -8,15 +8,17 @@ CONFIG=$3
WORK_DIR=$4
GPUS=${GPUS:-8}
GPUS_PER_NODE=${GPUS_PER_NODE:-8}
CPUS_PER_TASK=${CPUS_PER_TASK:-5}
SRUN_ARGS=${SRUN_ARGS:-""}
PY_ARGS=${PY_ARGS:-"--validate"}
PY_ARGS=${@:5}
PYTHONPATH="$(dirname $0)/..":$PYTHONPATH \
srun -p ${PARTITION} \
--job-name=${JOB_NAME} \
--gres=gpu:${GPUS_PER_NODE} \
--ntasks=${GPUS} \
--ntasks-per-node=${GPUS_PER_NODE} \
--cpus-per-task=${CPUS_PER_TASK} \
--kill-on-bad-exit=1 \
${SRUN_ARGS} \
python -u tools/train.py ${CONFIG} --work-dir=${WORK_DIR} --launcher="slurm" ${PY_ARGS}
Markdown is supported
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment