Commit 49121b64 authored by yinchimaoliang's avatar yinchimaoliang
Browse files

Merge branch 'master_temp' into scannet_dataset

parents f8f05baf 868c5fab
import numpy as np
from mmdet.datasets.builder import PIPELINES
@PIPELINES.register_module()
class IndoorFlipData(object):
"""Indoor Flip Data.
Flip point cloud and ground truth boxes.
The point cloud will ve flipped along the yz plane
and the xz plane with a certain probability.
Args:
flip_ratio_yz (float): Probability of being flipped along yz plane.
Default: 0.5.
flip_ratio_xz (float): Probability of being flipped along xz plane.
Default: 0.5.
"""
def __init__(self, flip_ratio_yz=0.5, flip_ratio_xz=0.5):
self.flip_ratio_yz = flip_ratio_yz
self.flip_ratio_xz = flip_ratio_xz
def __call__(self, results):
points = results['points']
gt_bboxes_3d = results['gt_bboxes_3d']
aligned = True if gt_bboxes_3d.shape[1] == 6 else False
if np.random.random() < self.flip_ratio_yz:
# Flipping along the YZ plane
points[:, 0] = -1 * points[:, 0]
gt_bboxes_3d[:, 0] = -1 * gt_bboxes_3d[:, 0]
if not aligned:
gt_bboxes_3d[:, 6] = np.pi - gt_bboxes_3d[:, 6]
results['flip_yz'] = True
if aligned and np.random.random() < self.flip_ratio_xz:
# Flipping along the XZ plane
points[:, 1] = -1 * points[:, 1]
gt_bboxes_3d[:, 1] = -1 * gt_bboxes_3d[:, 1]
results['flip_xz'] = True
results['points'] = points
results['gt_bboxes_3d'] = gt_bboxes_3d
return results
def __repr__(self):
repr_str = self.__class__.__name__
repr_str += '(flip_ratio_yz={})'.format(self.flip_ratio_yz)
repr_str += '(flip_ratio_xz={})'.format(self.flip_ratio_xz)
return repr_str
@PIPELINES.register_module()
class IndoorPointsColorJitter(object):
"""Indoor Points Color Jitter.
Randomly change the brightness and color of the point cloud, and
drop out the points' colors with a certain range and probability.
Args:
color_mean (List[float]): Mean color of the point cloud.
Default: [0.5, 0.5, 0.5].
bright_range (List[float]): Range of brightness.
Default: [0.8, 1.2].
color_shift_range (List[float]): Range of color shift.
Default: [0.95, 1.05].
jitter_range (List[float]): Range of jittering.
Default: [-0.025, 0.025].
drop_prob (float): Probability to drop out points' color.
Default: 0.3
"""
def __init__(self,
color_mean=[0.5, 0.5, 0.5],
bright_range=[0.8, 1.2],
color_shift_range=[0.95, 1.05],
jitter_range=[-0.025, 0.025],
drop_prob=0.3):
self.color_mean = color_mean
self.bright_range = bright_range
self.color_shift_range = color_shift_range
self.jitter_range = jitter_range
self.drop_prob = drop_prob
def __call__(self, results):
points = results['points']
assert points.shape[1] >= 6, \
f'Expect points have channel >=6, got {points.shape[1]}.'
rgb_color = points[:, 3:6] + self.color_mean
# brightness change for each channel
rgb_color *= np.random.uniform(self.bright_range[0],
self.bright_range[1], 3)
# color shift for each channel
rgb_color += np.random.uniform(self.color_shift_range[0],
self.color_shift_range[1], 3)
# jittering on each pixel
rgb_color += np.expand_dims(
np.random.uniform(self.jitter_range[0], self.jitter_range[1]), -1)
rgb_color = np.clip(rgb_color, 0, 1)
# randomly drop out points' colors
rgb_color *= np.expand_dims(
np.random.random(points.shape[0]) > self.drop_prob, -1)
points[:, 3:6] = rgb_color - self.color_mean
results['points'] = points
return results
def __repr__(self):
repr_str = self.__class__.__name__
repr_str += '(color_mean={})'.format(self.color_mean)
repr_str += '(bright_range={})'.format(self.bright_range)
repr_str += '(color_shift_range={})'.format(self.color_shift_range)
repr_str += '(jitter_range={})'.format(self.jitter_range)
repr_str += '(drop_prob={})'.format(self.drop_prob)
# TODO: merge outdoor indoor transform.
# TODO: try transform noise.
@PIPELINES.register_module()
class IndoorGlobalRotScale(object):
"""Indoor Global Rotate Scale.
Augment sunrgbd and scannet data with global rotating and scaling.
Args:
use_height (bool): Whether to use height.
Default: True.
rot_range (List[float]): Range of rotation.
Default: None.
scale_range (List[float]): Range of scale.
Default: None.
"""
def __init__(self, use_height=True, rot_range=None, scale_range=None):
self.use_height = use_height
self.rot_range = rot_range
self.scale_range = scale_range
def _rotz(self, t):
"""Rotate About Z.
Rotation about the z-axis.
Args:
t (float): Angle of rotation.
Returns:
rot_mat (ndarray): Matrix of rotation.
"""
c = np.cos(t)
s = np.sin(t)
rot_mat = np.array([[c, -s, 0], [s, c, 0], [0, 0, 1]])
return rot_mat
def _rotate_aligned_boxes(self, input_boxes, rot_mat):
"""Rotate Aligned Boxes.
Rotate function for the aligned boxes.
Args:
input_boxes (ndarray): 3D boxes.
rot_mat (ndarray): Rotation matrix.
Returns:
rotated_boxes (ndarry): 3D boxes after rotation.
"""
centers, lengths = input_boxes[:, 0:3], input_boxes[:, 3:6]
new_centers = np.dot(centers, rot_mat.T)
dx, dy = lengths[:, 0] / 2.0, lengths[:, 1] / 2.0
new_x = np.zeros((dx.shape[0], 4))
new_y = np.zeros((dx.shape[0], 4))
for i, corner in enumerate([(-1, -1), (1, -1), (1, 1), (-1, 1)]):
corners = np.zeros((dx.shape[0], 3))
corners[:, 0] = corner[0] * dx
corners[:, 1] = corner[1] * dy
corners = np.dot(corners, rot_mat.T)
new_x[:, i] = corners[:, 0]
new_y[:, i] = corners[:, 1]
new_dx = 2.0 * np.max(new_x, 1)
new_dy = 2.0 * np.max(new_y, 1)
new_lengths = np.stack((new_dx, new_dy, lengths[:, 2]), axis=1)
return np.concatenate([new_centers, new_lengths], axis=1)
def __call__(self, results):
points = results['points']
gt_bboxes_3d = results['gt_bboxes_3d']
aligned = True if gt_bboxes_3d.shape[1] == 6 else False
if self.rot_range is not None:
assert len(self.rot_range) == 2, \
f'Expect length of rot range =2, ' \
f'got {len(self.rot_range)}.'
rot_angle = np.random.uniform(self.rot_range[0], self.rot_range[1])
rot_mat = self._rotz(rot_angle)
points[:, :3] = np.dot(points[:, :3], rot_mat.T)
if aligned:
gt_bboxes_3d = self._rotate_aligned_boxes(
gt_bboxes_3d, rot_mat)
else:
gt_bboxes_3d[:, :3] = np.dot(gt_bboxes_3d[:, :3], rot_mat.T)
gt_bboxes_3d[:, 6] -= rot_angle
if self.scale_range is not None:
assert len(self.scale_range) == 2, \
f'Expect length of scale range =2, ' \
f'got {len(self.scale_range)}.'
# Augment point cloud scale
scale_ratio = np.random.uniform(self.scale_range[0],
self.scale_range[1])
points[:, :3] *= scale_ratio
gt_bboxes_3d[:, :3] *= scale_ratio
gt_bboxes_3d[:, 3:6] *= scale_ratio
if self.use_height:
points[:, -1] *= scale_ratio
results['points'] = points
results['gt_bboxes_3d'] = gt_bboxes_3d
return results
def __repr__(self):
repr_str = self.__class__.__name__
repr_str += '(use_height={})'.format(self.use_height)
repr_str += '(rot_range={})'.format(self.rot_range)
repr_str += '(scale_range={})'.format(self.scale_range)
return repr_str
import mmcv
import numpy as np
from mmdet.datasets.builder import PIPELINES
@PIPELINES.register_module()
class IndoorPointsColorNormalize(object):
"""Indoor Points Color Normalize
Normalize color of the points.
Args:
color_mean (List[float]): Mean color of the point cloud.
"""
def __init__(self, color_mean):
self.color_mean = color_mean
def __call__(self, results):
points = results['points']
assert points.shape[1] >= 6,\
f'Expect points have channel >=6, got {points.shape[1]}'
points[:, 3:6] = points[:, 3:6] - np.array(self.color_mean) / 256.0
results['points'] = points
return results
def __repr__(self):
repr_str = self.__class__.__name__
repr_str += '(color_mean={})'.format(self.color_mean)
return repr_str
@PIPELINES.register_module()
class IndoorLoadPointsFromFile(object):
"""Indoor Load Points From File.
Load sunrgbd and scannet points from file.
Args:
use_height (bool): Whether to use height.
load_dim (int): The dimension of the loaded points.
Default: 6.
use_dim (List[int]): Which dimensions of the points to be used.
Default: [0, 1, 2].
"""
def __init__(self, use_height, load_dim=6, use_dim=[0, 1, 2]):
self.use_height = use_height
assert max(use_dim) < load_dim, \
f'Expect all used dimensions < {load_dim}, got {use_dim}'
self.load_dim = load_dim
self.use_dim = use_dim
def __call__(self, results):
pts_filename = results['pts_filename']
mmcv.check_file_exist(pts_filename)
points = np.load(pts_filename)
points = points.reshape(-1, self.load_dim)
points = points[:, self.use_dim]
if self.use_height:
floor_height = np.percentile(points[:, 2], 0.99)
height = points[:, 2] - floor_height
points = np.concatenate([points, np.expand_dims(height, 1)], 1)
results['points'] = points
return results
def __repr__(self):
repr_str = self.__class__.__name__
repr_str += '(use_height={})'.format(self.use_height)
repr_str += '(mean_color={})'.format(self.color_mean)
repr_str += '(load_dim={})'.format(self.load_dim)
repr_str += '(use_dim={})'.format(self.use_dim)
return repr_str
@PIPELINES.register_module
class IndoorLoadAnnotations3D(object):
"""Indoor Load Annotations3D.
Load instance mask and semantic mask of points.
"""
def __init__(self):
pass
def __call__(self, results):
pts_instance_mask_path = results['pts_instance_mask_path']
pts_semantic_mask_path = results['pts_semantic_mask_path']
mmcv.check_file_exist(pts_instance_mask_path)
mmcv.check_file_exist(pts_semantic_mask_path)
pts_instance_mask = np.load(pts_instance_mask_path)
pts_semantic_mask = np.load(pts_semantic_mask_path)
results['pts_instance_mask'] = pts_instance_mask
results['pts_semantic_mask'] = pts_semantic_mask
return results
def __repr__(self):
repr_str = self.__class__.__name__
return repr_str
......@@ -4,7 +4,7 @@ from mmdet.datasets.builder import PIPELINES
@PIPELINES.register_module()
class PointSample(object):
class IndoorPointSample(object):
"""Point Sample.
Sampling data to a certain number.
......@@ -46,7 +46,7 @@ class PointSample(object):
return points[choices]
def __call__(self, results):
points = results.get('points', None)
points = results['points']
points, choices = self.points_random_sampling(
points, self.num_points, return_choices=True)
pts_instance_mask = results.get('pts_instance_mask', None)
......
from mmdet.models.roi_heads.roi_extractors import SingleRoIExtractor
from .single_roiaware_extractor import Single3DRoIAwareExtractor
__all__ = ['SingleRoIExtractor']
__all__ = ['SingleRoIExtractor', 'Single3DRoIAwareExtractor']
import torch
import torch.nn as nn
from mmdet3d import ops
from mmdet.models.builder import ROI_EXTRACTORS
@ROI_EXTRACTORS.register_module
class Single3DRoIAwareExtractor(nn.Module):
"""Point-wise roi-aware Extractor
Extract Point-wise roi features.
Args:
roi_layer (dict): the config of roi layer
"""
def __init__(self, roi_layer=None):
super(Single3DRoIAwareExtractor, self).__init__()
self.roi_layer = self.build_roi_layers(roi_layer)
def build_roi_layers(self, layer_cfg):
cfg = layer_cfg.copy()
layer_type = cfg.pop('type')
assert hasattr(ops, layer_type)
layer_cls = getattr(ops, layer_type)
roi_layers = layer_cls(**cfg)
return roi_layers
def forward(self, feats, coordinate, batch_inds, rois):
"""Extract point-wise roi features
Args:
feats (FloatTensor): point-wise features with
shape (batch, npoints, channels) for pooling
coordinate (FloatTensor): coordinate of each point
batch_inds (longTensor): indicate the batch of each point
rois (FloatTensor): roi boxes with batch indices
Returns:
FloatTensor: pooled features
"""
pooled_roi_feats = []
for batch_idx in range(int(batch_inds.max()) + 1):
roi_inds = (rois[..., 0].int() == batch_idx)
coors_inds = (batch_inds.int() == batch_idx)
pooled_roi_feat = self.roi_layer(rois[..., 1:][roi_inds],
coordinate[coors_inds],
feats[coors_inds])
pooled_roi_feats.append(pooled_roi_feat)
pooled_roi_feats = torch.cat(pooled_roi_feats, 0)
return pooled_roi_feats
......@@ -2,28 +2,18 @@ from mmdet.ops import (RoIAlign, SigmoidFocalLoss, get_compiler_version,
get_compiling_cuda_version, nms, roi_align,
sigmoid_focal_loss)
from .norm import NaiveSyncBatchNorm1d, NaiveSyncBatchNorm2d
from .roiaware_pool3d import (RoIAwarePool3d, points_in_boxes_cpu,
points_in_boxes_gpu)
from .sparse_block import (SparseBasicBlock, SparseBasicBlockV0,
SparseBottleneck, SparseBottleneckV0)
from .voxel import DynamicScatter, Voxelization, dynamic_scatter, voxelization
__all__ = [
'nms',
'soft_nms',
'RoIAlign',
'roi_align',
'get_compiler_version',
'get_compiling_cuda_version',
'NaiveSyncBatchNorm1d',
'NaiveSyncBatchNorm2d',
'batched_nms',
'Voxelization',
'voxelization',
'dynamic_scatter',
'DynamicScatter',
'sigmoid_focal_loss',
'SigmoidFocalLoss',
'SparseBasicBlockV0',
'SparseBottleneckV0',
'SparseBasicBlock',
'SparseBottleneck',
'nms', 'soft_nms', 'RoIAlign', 'roi_align', 'get_compiler_version',
'get_compiling_cuda_version', 'NaiveSyncBatchNorm1d',
'NaiveSyncBatchNorm2d', 'batched_nms', 'Voxelization', 'voxelization',
'dynamic_scatter', 'DynamicScatter', 'sigmoid_focal_loss',
'SigmoidFocalLoss', 'SparseBasicBlockV0', 'SparseBottleneckV0',
'SparseBasicBlock', 'SparseBottleneck', 'RoIAwarePool3d',
'points_in_boxes_gpu', 'points_in_boxes_cpu'
]
from .iou3d_utils import (boxes_iou3d_gpu, boxes_iou_bev, nms_gpu,
nms_normal_gpu)
from .iou3d_utils import (boxes_iou3d_gpu_camera, boxes_iou3d_gpu_lidar,
boxes_iou_bev, nms_gpu, nms_normal_gpu)
__all__ = ['boxes_iou_bev', 'boxes_iou3d_gpu', 'nms_gpu', 'nms_normal_gpu']
__all__ = [
'boxes_iou_bev', 'boxes_iou3d_gpu_camera', 'nms_gpu', 'nms_normal_gpu',
'boxes_iou3d_gpu_lidar'
]
......@@ -20,17 +20,22 @@ def boxes_iou_bev(boxes_a, boxes_b):
return ans_iou
def boxes_iou3d_gpu(boxes_a, boxes_b, mode='iou'):
"""
:param boxes_a: (N, 7) [x, y, z, h, w, l, ry]
:param boxes_b: (M, 7) [x, y, z, h, w, l, ry]
:param mode "iou" (intersection over union) or iof (intersection over
def boxes_iou3d_gpu_camera(boxes_a, boxes_b, mode='iou'):
"""Calculate 3d iou of boxes in camera coordinate
Args:
boxes_a (FloatTensor): (N, 7) [x, y, z, h, w, l, ry]
in LiDAR coordinate
boxes_b (FloatTensor): (M, 7) [x, y, z, h, w, l, ry]
mode (str): "iou" (intersection over union) or iof (intersection over
foreground).
:return:
ans_iou: (M, N)
Returns:
FloatTensor: (M, N)
"""
boxes_a_bev = boxes3d_to_bev_torch(boxes_a)
boxes_b_bev = boxes3d_to_bev_torch(boxes_b)
boxes_a_bev = boxes3d_to_bev_torch_camera(boxes_a)
boxes_b_bev = boxes3d_to_bev_torch_camera(boxes_b)
# bev overlap
overlaps_bev = torch.cuda.FloatTensor(
......@@ -51,15 +56,62 @@ def boxes_iou3d_gpu(boxes_a, boxes_b, mode='iou'):
# 3d iou
overlaps_3d = overlaps_bev * overlaps_h
vol_a = (boxes_a[:, 3] * boxes_a[:, 4] * boxes_a[:, 5]).view(-1, 1)
vol_b = (boxes_b[:, 3] * boxes_b[:, 4] * boxes_b[:, 5]).view(1, -1)
volume_a = (boxes_a[:, 3] * boxes_a[:, 4] * boxes_a[:, 5]).view(-1, 1)
volume_b = (boxes_b[:, 3] * boxes_b[:, 4] * boxes_b[:, 5]).view(1, -1)
if mode == 'iou':
# the clamp func is used to avoid division of 0
iou3d = overlaps_3d / torch.clamp(
vol_a + vol_b - overlaps_3d, min=1e-8)
volume_a + volume_b - overlaps_3d, min=1e-8)
else:
iou3d = overlaps_3d / torch.clamp(vol_a, min=1e-8)
iou3d = overlaps_3d / torch.clamp(volume_a, min=1e-8)
return iou3d
def boxes_iou3d_gpu_lidar(boxes_a, boxes_b, mode='iou'):
"""Calculate 3d iou of boxes in lidar coordinate
Args:
boxes_a (FloatTensor): (N, 7) [x, y, z, w, l, h, ry]
in LiDAR coordinate
boxes_b (FloatTensor): (M, 7) [x, y, z, w, l, h, ry]
mode (str): "iou" (intersection over union) or iof (intersection over
foreground).
:Returns:
FloatTensor: (M, N)
"""
boxes_a_bev = boxes3d_to_bev_torch_lidar(boxes_a)
boxes_b_bev = boxes3d_to_bev_torch_lidar(boxes_b)
# height overlap
boxes_a_height_max = (boxes_a[:, 2] + boxes_a[:, 5]).view(-1, 1)
boxes_a_height_min = boxes_a[:, 2].view(-1, 1)
boxes_b_height_max = (boxes_b[:, 2] + boxes_b[:, 5]).view(1, -1)
boxes_b_height_min = boxes_b[:, 2].view(1, -1)
# bev overlap
overlaps_bev = boxes_a.new_zeros(
torch.Size((boxes_a.shape[0], boxes_b.shape[0]))) # (N, M)
iou3d_cuda.boxes_overlap_bev_gpu(boxes_a_bev.contiguous(),
boxes_b_bev.contiguous(), overlaps_bev)
max_of_min = torch.max(boxes_a_height_min, boxes_b_height_min)
min_of_max = torch.min(boxes_a_height_max, boxes_b_height_max)
overlaps_h = torch.clamp(min_of_max - max_of_min, min=0)
# 3d iou
overlaps_3d = overlaps_bev * overlaps_h
volume_a = (boxes_a[:, 3] * boxes_a[:, 4] * boxes_a[:, 5]).view(-1, 1)
volume_b = (boxes_b[:, 3] * boxes_b[:, 4] * boxes_b[:, 5]).view(1, -1)
if mode == 'iou':
# the clamp func is used to avoid division of 0
iou3d = overlaps_3d / torch.clamp(
volume_a + volume_b - overlaps_3d, min=1e-8)
else:
iou3d = overlaps_3d / torch.clamp(volume_a, min=1e-8)
return iou3d
......@@ -98,16 +150,39 @@ def nms_normal_gpu(boxes, scores, thresh):
return order[keep[:num_out].cuda()].contiguous()
def boxes3d_to_bev_torch(boxes3d):
"""
:param boxes3d: (N, 7) [x, y, z, h, w, l, ry] in camera coords
:return:
boxes_bev: (N, 5) [x1, y1, x2, y2, ry]
def boxes3d_to_bev_torch_camera(boxes3d):
"""covert boxes3d to bev in in camera coords
Args:
boxes3d (FloartTensor): (N, 7) [x, y, z, h, w, l, ry] in camera coords
Return:
FloartTensor: (N, 5) [x1, y1, x2, y2, ry]
"""
boxes_bev = boxes3d.new(torch.Size((boxes3d.shape[0], 5)))
cu, cv = boxes3d[:, 0], boxes3d[:, 2]
half_l, half_w = boxes3d[:, 5] / 2, boxes3d[:, 4] / 2
boxes_bev[:, 0], boxes_bev[:, 1] = cu - half_l, cv - half_w
boxes_bev[:, 2], boxes_bev[:, 3] = cu + half_l, cv + half_w
boxes_bev[:, 4] = boxes3d[:, 6]
return boxes_bev
def boxes3d_to_bev_torch_lidar(boxes3d):
"""covert boxes3d to bev in in LiDAR coords
Args:
boxes3d (FloartTensor): (N, 7) [x, y, z, w, l, h, ry] in LiDAR coords
Returns:
FloartTensor: (N, 5) [x1, y1, x2, y2, ry]
"""
boxes_bev = boxes3d.new(torch.Size((boxes3d.shape[0], 5)))
x, y = boxes3d[:, 0], boxes3d[:, 1]
half_l, half_w = boxes3d[:, 4] / 2, boxes3d[:, 3] / 2
boxes_bev[:, 0], boxes_bev[:, 1] = x - half_w, y - half_l
boxes_bev[:, 2], boxes_bev[:, 3] = x + half_w, y + half_l
boxes_bev[:, 4] = boxes3d[:, 6]
return boxes_bev
......@@ -7,8 +7,8 @@
#include <assert.h>
#include <math.h>
#include <stdio.h>
#include <torch/extension.h>
#include <torch/serialize/tensor.h>
#include <torch/types.h>
#define THREADS_PER_BLOCK 256
#define DIVUP(m, n) ((m) / (n) + ((m) % (n) > 0))
......
//Modified from
//https://github.com/sshaoshuai/PCDet/blob/master/pcdet/ops/roiaware_pool3d/src/roiaware_pool3d_kernel.cu
//RoI-aware point cloud feature pooling
//Written by Shaoshuai Shi
//All Rights Reserved 2019.
// Modified from
// https://github.com/sshaoshuai/PCDet/blob/master/pcdet/ops/roiaware_pool3d/src/roiaware_pool3d_kernel.cu
// RoI-aware point cloud feature pooling
// Written by Shaoshuai Shi
// All Rights Reserved 2019.
#include <torch/serialize/tensor.h>
#include <torch/extension.h>
#include <assert.h>
#include <math.h>
#include <stdio.h>
#include <torch/serialize/tensor.h>
#include <torch/types.h>
#define THREADS_PER_BLOCK 256
#define DIVUP(m,n) ((m) / (n) + ((m) % (n) > 0))
#define DIVUP(m, n) ((m) / (n) + ((m) % (n) > 0))
// #define DEBUG
__device__ inline void lidar_to_local_coords(float shift_x, float shift_y, float rz, float &local_x, float &local_y){
__device__ inline void lidar_to_local_coords(float shift_x, float shift_y,
float rz, float &local_x,
float &local_y) {
// should rotate pi/2 + alpha to translate LiDAR to local
float rot_angle = rz + M_PI / 2;
float cosa = cos(rot_angle), sina = sin(rot_angle);
......@@ -26,10 +25,11 @@ __device__ inline void lidar_to_local_coords(float shift_x, float shift_y, float
local_y = shift_x * sina + shift_y * cosa;
}
__device__ inline int check_pt_in_box3d(const float *pt, const float *box3d, float &local_x, float &local_y){
__device__ inline int check_pt_in_box3d(const float *pt, const float *box3d,
float &local_x, float &local_y) {
// param pt: (x, y, z)
// param box3d: (cx, cy, cz, w, l, h, rz) in LiDAR coordinate, cz in the bottom center
// param box3d: (cx, cy, cz, w, l, h, rz) in LiDAR coordinate, cz in the
// bottom center
float x = pt[0], y = pt[1], z = pt[2];
float cx = box3d[0], cy = box3d[1], cz = box3d[2];
float w = box3d[3], l = box3d[4], h = box3d[5], rz = box3d[6];
......@@ -37,16 +37,19 @@ __device__ inline int check_pt_in_box3d(const float *pt, const float *box3d, flo
if (fabsf(z - cz) > h / 2.0) return 0;
lidar_to_local_coords(x - cx, y - cy, rz, local_x, local_y);
float in_flag = (local_x > -l / 2.0) & (local_x < l / 2.0) & (local_y > -w / 2.0) & (local_y < w / 2.0);
float in_flag = (local_x > -l / 2.0) & (local_x < l / 2.0) &
(local_y > -w / 2.0) & (local_y < w / 2.0);
return in_flag;
}
__global__ void generate_pts_mask_for_box3d(int boxes_num, int pts_num, int out_x, int out_y, int out_z,
const float *rois, const float *pts, int *pts_mask){
__global__ void generate_pts_mask_for_box3d(int boxes_num, int pts_num,
int out_x, int out_y, int out_z,
const float *rois, const float *pts,
int *pts_mask) {
// params rois: (N, 7) [x, y, z, w, l, h, rz] in LiDAR coordinate
// params pts: (npoints, 3) [x, y, z]
// params pts_mask: (N, npoints): -1 means point doesnot in this box, otherwise: encode (x_idxs, y_idxs, z_idxs) by binary bit
// params pts_mask: (N, npoints): -1 means point doesnot in this box,
// otherwise: encode (x_idxs, y_idxs, z_idxs) by binary bit
int pt_idx = blockIdx.x * blockDim.x + threadIdx.x;
int box_idx = blockIdx.y;
if (pt_idx >= pts_num || box_idx >= boxes_num) return;
......@@ -59,7 +62,7 @@ __global__ void generate_pts_mask_for_box3d(int boxes_num, int pts_num, int out_
int cur_in_flag = check_pt_in_box3d(pts, rois, local_x, local_y);
pts_mask[0] = -1;
if (cur_in_flag > 0){
if (cur_in_flag > 0) {
float local_z = pts[2] - rois[2];
float w = rois[3], l = rois[4], h = rois[5];
......@@ -77,17 +80,22 @@ __global__ void generate_pts_mask_for_box3d(int boxes_num, int pts_num, int out_
unsigned int idx_encoding = (x_idx << 16) + (y_idx << 8) + z_idx;
#ifdef DEBUG
printf("mask: pts_%d(%.3f, %.3f, %.3f), local(%.3f, %.3f, %.3f), idx(%d, %d, %d), res(%.3f, %.3f, %.3f), idx_encoding=%x\n",
pt_idx, pts[0], pts[1], pts[2], local_x, local_y, local_z, x_idx, y_idx, z_idx, x_res, y_res, z_res, idx_encoding);
printf(
"mask: pts_%d(%.3f, %.3f, %.3f), local(%.3f, %.3f, %.3f), idx(%d, %d, "
"%d), res(%.3f, %.3f, %.3f), idx_encoding=%x\n",
pt_idx, pts[0], pts[1], pts[2], local_x, local_y, local_z, x_idx, y_idx,
z_idx, x_res, y_res, z_res, idx_encoding);
#endif
pts_mask[0] = idx_encoding;
}
}
__global__ void collect_inside_pts_for_box3d(int boxes_num, int pts_num, int max_pts_each_voxel,
int out_x, int out_y, int out_z, const int *pts_mask, int *pts_idx_of_voxels){
__global__ void collect_inside_pts_for_box3d(int boxes_num, int pts_num,
int max_pts_each_voxel, int out_x,
int out_y, int out_z,
const int *pts_mask,
int *pts_idx_of_voxels) {
// params pts_mask: (N, npoints) 0 or 1
// params pts_idx_of_voxels: (N, out_x, out_y, out_z, max_pts_each_voxel)
......@@ -97,33 +105,36 @@ __global__ void collect_inside_pts_for_box3d(int boxes_num, int pts_num, int max
int max_num_pts = max_pts_each_voxel - 1; // index 0 is the counter
pts_idx_of_voxels += box_idx * out_x * out_y * out_z * max_pts_each_voxel;
for (int k = 0; k < pts_num; k++){
if (pts_mask[box_idx * pts_num + k] != -1){
for (int k = 0; k < pts_num; k++) {
if (pts_mask[box_idx * pts_num + k] != -1) {
unsigned int idx_encoding = pts_mask[box_idx * pts_num + k];
unsigned int x_idx = (idx_encoding >> 16) & 0xFF;
unsigned int y_idx = (idx_encoding >> 8) & 0xFF;
unsigned int z_idx = idx_encoding & 0xFF;
unsigned int base_offset = x_idx * out_y * out_z * max_pts_each_voxel + y_idx * out_z * max_pts_each_voxel + z_idx * max_pts_each_voxel;
unsigned int base_offset = x_idx * out_y * out_z * max_pts_each_voxel +
y_idx * out_z * max_pts_each_voxel +
z_idx * max_pts_each_voxel;
unsigned int cnt = pts_idx_of_voxels[base_offset];
if (cnt < max_num_pts){
if (cnt < max_num_pts) {
pts_idx_of_voxels[base_offset + cnt + 1] = k;
pts_idx_of_voxels[base_offset]++;
}
#ifdef DEBUG
printf("collect: pts_%d, idx(%d, %d, %d), idx_encoding=%x\n",
k, x_idx, y_idx, z_idx, idx_encoding);
printf("collect: pts_%d, idx(%d, %d, %d), idx_encoding=%x\n", k, x_idx,
y_idx, z_idx, idx_encoding);
#endif
}
}
}
__global__ void roiaware_maxpool3d(int boxes_num, int pts_num, int channels, int max_pts_each_voxel, int out_x,
int out_y, int out_z, const float *pts_feature, const int *pts_idx_of_voxels, float *pooled_features, int *argmax){
__global__ void roiaware_maxpool3d(int boxes_num, int pts_num, int channels,
int max_pts_each_voxel, int out_x, int out_y,
int out_z, const float *pts_feature,
const int *pts_idx_of_voxels,
float *pooled_features, int *argmax) {
// params pts_feature: (npoints, C)
// params pts_idx_of_voxels: (N, out_x, out_y, out_z, max_pts_each_voxel), index 0 is the counter
// params pooled_features: (N, out_x, out_y, out_z, C)
// params pts_idx_of_voxels: (N, out_x, out_y, out_z, max_pts_each_voxel),
// index 0 is the counter params pooled_features: (N, out_x, out_y, out_z, C)
// params argmax: (N, out_x, out_y, out_z, C)
int box_idx = blockIdx.z;
......@@ -133,46 +144,57 @@ __global__ void roiaware_maxpool3d(int boxes_num, int pts_num, int channels, int
int x_idx = voxel_idx_flat / (out_y * out_z);
int y_idx = (voxel_idx_flat - x_idx * (out_y * out_z)) / out_z;
int z_idx = voxel_idx_flat % out_z;
if (box_idx >= boxes_num || channel_idx >= channels|| x_idx >= out_x || y_idx >= out_y || z_idx >= out_z) return;
if (box_idx >= boxes_num || channel_idx >= channels || x_idx >= out_x ||
y_idx >= out_y || z_idx >= out_z)
return;
#ifdef DEBUG
printf("src pts_idx_of_voxels: (%p, ), argmax: %p\n", pts_idx_of_voxels, argmax);
printf("src pts_idx_of_voxels: (%p, ), argmax: %p\n", pts_idx_of_voxels,
argmax);
#endif
int offset_base = x_idx * out_y * out_z + y_idx * out_z + z_idx;
pts_idx_of_voxels += box_idx * out_x * out_y * out_z * max_pts_each_voxel + offset_base * max_pts_each_voxel;
pooled_features += box_idx * out_x * out_y * out_z * channels + offset_base * channels + channel_idx;
argmax += box_idx * out_x * out_y * out_z * channels + offset_base * channels + channel_idx;
pts_idx_of_voxels += box_idx * out_x * out_y * out_z * max_pts_each_voxel +
offset_base * max_pts_each_voxel;
pooled_features += box_idx * out_x * out_y * out_z * channels +
offset_base * channels + channel_idx;
argmax += box_idx * out_x * out_y * out_z * channels +
offset_base * channels + channel_idx;
int argmax_idx = -1;
float max_val = -1e50;
int total_pts = pts_idx_of_voxels[0];
for (int k = 1; k <= total_pts; k++){
if (pts_feature[pts_idx_of_voxels[k] * channels + channel_idx] > max_val){
for (int k = 1; k <= total_pts; k++) {
if (pts_feature[pts_idx_of_voxels[k] * channels + channel_idx] > max_val) {
max_val = pts_feature[pts_idx_of_voxels[k] * channels + channel_idx];
argmax_idx = pts_idx_of_voxels[k];
}
}
if (argmax_idx != -1){
if (argmax_idx != -1) {
pooled_features[0] = max_val;
}
argmax[0] = argmax_idx;
#ifdef DEBUG
printf("channel_%d idx(%d, %d, %d), argmax_idx=(%d, %.3f), total=%d, after pts_idx: %p, argmax: (%p, %d)\n",
channel_idx, x_idx, y_idx, z_idx, argmax_idx, max_val, total_pts, pts_idx_of_voxels, argmax, argmax_idx);
printf(
"channel_%d idx(%d, %d, %d), argmax_idx=(%d, %.3f), total=%d, after "
"pts_idx: %p, argmax: (%p, %d)\n",
channel_idx, x_idx, y_idx, z_idx, argmax_idx, max_val, total_pts,
pts_idx_of_voxels, argmax, argmax_idx);
#endif
}
__global__ void roiaware_avgpool3d(int boxes_num, int pts_num, int channels, int max_pts_each_voxel, int out_x,
int out_y, int out_z, const float *pts_feature, const int *pts_idx_of_voxels, float *pooled_features){
__global__ void roiaware_avgpool3d(int boxes_num, int pts_num, int channels,
int max_pts_each_voxel, int out_x, int out_y,
int out_z, const float *pts_feature,
const int *pts_idx_of_voxels,
float *pooled_features) {
// params pts_feature: (npoints, C)
// params pts_idx_of_voxels: (N, out_x, out_y, out_z, max_pts_each_voxel), index 0 is the counter
// params pooled_features: (N, out_x, out_y, out_z, C)
// params pts_idx_of_voxels: (N, out_x, out_y, out_z, max_pts_each_voxel),
// index 0 is the counter params pooled_features: (N, out_x, out_y, out_z, C)
// params argmax: (N, out_x, out_y, out_z, C)
int box_idx = blockIdx.z;
......@@ -182,28 +204,34 @@ __global__ void roiaware_avgpool3d(int boxes_num, int pts_num, int channels, int
int x_idx = voxel_idx_flat / (out_y * out_z);
int y_idx = (voxel_idx_flat - x_idx * (out_y * out_z)) / out_z;
int z_idx = voxel_idx_flat % out_z;
if (box_idx >= boxes_num || channel_idx >= channels|| x_idx >= out_x || y_idx >= out_y || z_idx >= out_z) return;
if (box_idx >= boxes_num || channel_idx >= channels || x_idx >= out_x ||
y_idx >= out_y || z_idx >= out_z)
return;
int offset_base = x_idx * out_y * out_z + y_idx * out_z + z_idx;
pts_idx_of_voxels += box_idx * out_x * out_y * out_z * max_pts_each_voxel + offset_base * max_pts_each_voxel;
pooled_features += box_idx * out_x * out_y * out_z * channels + offset_base * channels + channel_idx;
pts_idx_of_voxels += box_idx * out_x * out_y * out_z * max_pts_each_voxel +
offset_base * max_pts_each_voxel;
pooled_features += box_idx * out_x * out_y * out_z * channels +
offset_base * channels + channel_idx;
float sum_val = 0;
int total_pts = pts_idx_of_voxels[0];
for (int k = 1; k <= total_pts; k++){
for (int k = 1; k <= total_pts; k++) {
sum_val += pts_feature[pts_idx_of_voxels[k] * channels + channel_idx];
}
if (total_pts > 0){
if (total_pts > 0) {
pooled_features[0] = sum_val / total_pts;
}
}
void roiaware_pool3d_launcher(int boxes_num, int pts_num, int channels, int max_pts_each_voxel, int out_x, int out_y, int out_z,
const float *rois, const float *pts, const float *pts_feature, int *argmax, int *pts_idx_of_voxels, float *pooled_features, int pool_method){
void roiaware_pool3d_launcher(int boxes_num, int pts_num, int channels,
int max_pts_each_voxel, int out_x, int out_y,
int out_z, const float *rois, const float *pts,
const float *pts_feature, int *argmax,
int *pts_idx_of_voxels, float *pooled_features,
int pool_method) {
// params rois: (N, 7) [x, y, z, w, l, h, rz] in LiDAR coordinate
// params pts: (npoints, 3) [x, y, z] in LiDAR coordinate
// params pts_feature: (npoints, C)
......@@ -218,25 +246,28 @@ void roiaware_pool3d_launcher(int boxes_num, int pts_num, int channels, int max_
dim3 blocks_mask(DIVUP(pts_num, THREADS_PER_BLOCK), boxes_num);
dim3 threads(THREADS_PER_BLOCK);
generate_pts_mask_for_box3d<<<blocks_mask, threads>>>(boxes_num, pts_num, out_x, out_y, out_z, rois, pts, pts_mask);
generate_pts_mask_for_box3d<<<blocks_mask, threads>>>(
boxes_num, pts_num, out_x, out_y, out_z, rois, pts, pts_mask);
// TODO: Merge the collect and pool functions, SS
dim3 blocks_collect(DIVUP(boxes_num, THREADS_PER_BLOCK));
collect_inside_pts_for_box3d<<<blocks_collect, threads>>>(boxes_num, pts_num, max_pts_each_voxel,
out_x, out_y, out_z, pts_mask, pts_idx_of_voxels);
dim3 blocks_pool(DIVUP(out_x * out_y * out_z, THREADS_PER_BLOCK), channels, boxes_num);
if (pool_method == 0){
roiaware_maxpool3d<<<blocks_pool, threads>>>(boxes_num, pts_num, channels, max_pts_each_voxel, out_x, out_y, out_z,
collect_inside_pts_for_box3d<<<blocks_collect, threads>>>(
boxes_num, pts_num, max_pts_each_voxel, out_x, out_y, out_z, pts_mask,
pts_idx_of_voxels);
dim3 blocks_pool(DIVUP(out_x * out_y * out_z, THREADS_PER_BLOCK), channels,
boxes_num);
if (pool_method == 0) {
roiaware_maxpool3d<<<blocks_pool, threads>>>(
boxes_num, pts_num, channels, max_pts_each_voxel, out_x, out_y, out_z,
pts_feature, pts_idx_of_voxels, pooled_features, argmax);
}
else if (pool_method == 1){
roiaware_avgpool3d<<<blocks_pool, threads>>>(boxes_num, pts_num, channels, max_pts_each_voxel, out_x, out_y, out_z,
} else if (pool_method == 1) {
roiaware_avgpool3d<<<blocks_pool, threads>>>(
boxes_num, pts_num, channels, max_pts_each_voxel, out_x, out_y, out_z,
pts_feature, pts_idx_of_voxels, pooled_features);
}
cudaFree(pts_mask);
#ifdef DEBUG
......@@ -244,9 +275,11 @@ void roiaware_pool3d_launcher(int boxes_num, int pts_num, int channels, int max_
#endif
}
__global__ void roiaware_maxpool3d_backward(int boxes_num, int channels, int out_x, int out_y, int out_z,
const int *argmax, const float *grad_out, float *grad_in){
__global__ void roiaware_maxpool3d_backward(int boxes_num, int channels,
int out_x, int out_y, int out_z,
const int *argmax,
const float *grad_out,
float *grad_in) {
// params argmax: (N, out_x, out_y, out_z, C)
// params grad_out: (N, out_x, out_y, out_z, C)
// params grad_in: (npoints, C), return value
......@@ -258,20 +291,27 @@ __global__ void roiaware_maxpool3d_backward(int boxes_num, int channels, int out
int x_idx = voxel_idx_flat / (out_y * out_z);
int y_idx = (voxel_idx_flat - x_idx * (out_y * out_z)) / out_z;
int z_idx = voxel_idx_flat % out_z;
if (box_idx >= boxes_num || channel_idx >= channels|| x_idx >= out_x || y_idx >= out_y || z_idx >= out_z) return;
if (box_idx >= boxes_num || channel_idx >= channels || x_idx >= out_x ||
y_idx >= out_y || z_idx >= out_z)
return;
int offset_base = x_idx * out_y * out_z + y_idx * out_z + z_idx;
argmax += box_idx * out_x * out_y * out_z * channels + offset_base * channels + channel_idx;
grad_out += box_idx * out_x * out_y * out_z * channels + offset_base * channels + channel_idx;
argmax += box_idx * out_x * out_y * out_z * channels +
offset_base * channels + channel_idx;
grad_out += box_idx * out_x * out_y * out_z * channels +
offset_base * channels + channel_idx;
if (argmax[0] == -1) return;
atomicAdd(grad_in + argmax[0] * channels + channel_idx, grad_out[0] * 1);
}
__global__ void roiaware_avgpool3d_backward(int boxes_num, int channels, int out_x, int out_y, int out_z,
int max_pts_each_voxel, const int *pts_idx_of_voxels, const float *grad_out, float *grad_in){
__global__ void roiaware_avgpool3d_backward(int boxes_num, int channels,
int out_x, int out_y, int out_z,
int max_pts_each_voxel,
const int *pts_idx_of_voxels,
const float *grad_out,
float *grad_in) {
// params pts_idx_of_voxels: (N, out_x, out_y, out_z, max_pts_each_voxel)
// params grad_out: (N, out_x, out_y, out_z, C)
// params grad_in: (npoints, C), return value
......@@ -283,41 +323,45 @@ __global__ void roiaware_avgpool3d_backward(int boxes_num, int channels, int out
int x_idx = voxel_idx_flat / (out_y * out_z);
int y_idx = (voxel_idx_flat - x_idx * (out_y * out_z)) / out_z;
int z_idx = voxel_idx_flat % out_z;
if (box_idx >= boxes_num || channel_idx >= channels|| x_idx >= out_x || y_idx >= out_y || z_idx >= out_z) return;
if (box_idx >= boxes_num || channel_idx >= channels || x_idx >= out_x ||
y_idx >= out_y || z_idx >= out_z)
return;
int offset_base = x_idx * out_y * out_z + y_idx * out_z + z_idx;
pts_idx_of_voxels += box_idx * out_x * out_y * out_z * max_pts_each_voxel + offset_base * max_pts_each_voxel;
grad_out += box_idx * out_x * out_y * out_z * channels + offset_base * channels + channel_idx;
pts_idx_of_voxels += box_idx * out_x * out_y * out_z * max_pts_each_voxel +
offset_base * max_pts_each_voxel;
grad_out += box_idx * out_x * out_y * out_z * channels +
offset_base * channels + channel_idx;
int total_pts = pts_idx_of_voxels[0];
float cur_grad = 1 / fmaxf(float(total_pts), 1.0);
for (int k = 1; k <= total_pts; k++){
atomicAdd(grad_in + pts_idx_of_voxels[k] * channels + channel_idx, grad_out[0] * cur_grad);
for (int k = 1; k <= total_pts; k++) {
atomicAdd(grad_in + pts_idx_of_voxels[k] * channels + channel_idx,
grad_out[0] * cur_grad);
}
}
void roiaware_pool3d_backward_launcher(int boxes_num, int out_x, int out_y, int out_z, int channels, int max_pts_each_voxel,
const int *pts_idx_of_voxels, const int *argmax, const float *grad_out, float *grad_in, int pool_method){
void roiaware_pool3d_backward_launcher(int boxes_num, int out_x, int out_y,
int out_z, int channels,
int max_pts_each_voxel,
const int *pts_idx_of_voxels,
const int *argmax, const float *grad_out,
float *grad_in, int pool_method) {
// params pts_idx_of_voxels: (N, out_x, out_y, out_z, max_pts_each_voxel)
// params argmax: (N, out_x, out_y, out_z, C)
// params grad_out: (N, out_x, out_y, out_z, C)
// params grad_in: (npoints, C), return value
// params pool_method: 0: max_pool, 1: avg_pool
dim3 blocks(DIVUP(out_x * out_y * out_z, THREADS_PER_BLOCK), channels, boxes_num);
dim3 blocks(DIVUP(out_x * out_y * out_z, THREADS_PER_BLOCK), channels,
boxes_num);
dim3 threads(THREADS_PER_BLOCK);
if (pool_method == 0){
if (pool_method == 0) {
roiaware_maxpool3d_backward<<<blocks, threads>>>(
boxes_num, channels, out_x, out_y, out_z, argmax, grad_out, grad_in
);
}
else if (pool_method == 1){
boxes_num, channels, out_x, out_y, out_z, argmax, grad_out, grad_in);
} else if (pool_method == 1) {
roiaware_avgpool3d_backward<<<blocks, threads>>>(
boxes_num, channels, out_x, out_y, out_z, max_pts_each_voxel, pts_idx_of_voxels, grad_out, grad_in
);
boxes_num, channels, out_x, out_y, out_z, max_pts_each_voxel,
pts_idx_of_voxels, grad_out, grad_in);
}
}
from mmcv.cnn import build_norm_layer
from torch import nn
import mmdet3d.ops.spconv as spconv
from mmdet.models.backbones.resnet import BasicBlock, Bottleneck
from . import spconv
def conv3x3(in_planes, out_planes, stride=1, indice_key=None):
......
import numpy as np
import pytest
import torch
from mmdet3d.core.bbox import (Box3DMode, CameraInstance3DBoxes,
LiDARInstance3DBoxes)
def test_lidar_boxes3d():
# test empty initialization
empty_boxes = []
boxes = LiDARInstance3DBoxes(empty_boxes)
assert boxes.tensor.shape[0] == 0
assert boxes.tensor.shape[1] == 7
# Test init with numpy array
np_boxes = np.array(
[[1.7802081, 2.516249, -1.7501148, 1.75, 3.39, 1.65, 1.48],
[8.959413, 2.4567227, -1.6357126, 1.54, 4.01, 1.57, 1.62]],
dtype=np.float32)
boxes_1 = LiDARInstance3DBoxes(np_boxes)
assert torch.allclose(boxes_1.tensor, torch.from_numpy(np_boxes))
# test properties
assert boxes_1.volume.size(0) == 2
assert (boxes_1.center == boxes_1.bottom_center).all()
assert repr(boxes) == (
'LiDARInstance3DBoxes(\n tensor([], size=(0, 7)))')
# test init with torch.Tensor
th_boxes = torch.tensor(
[[
28.29669987, -0.5557558, -1.30332506, 1.47000003, 2.23000002,
1.48000002, -1.57000005
],
[
26.66901946, 21.82302134, -1.73605708, 1.55999994, 3.48000002,
1.39999998, -1.69000006
],
[
31.31977974, 8.16214412, -1.62177875, 1.74000001, 3.76999998,
1.48000002, 2.78999996
]],
dtype=torch.float32)
boxes_2 = LiDARInstance3DBoxes(th_boxes)
assert torch.allclose(boxes_2.tensor, th_boxes)
# test clone/to/device
boxes_2 = boxes_2.clone()
boxes_1 = boxes_1.to(boxes_2.device)
# test box concatenation
expected_tensor = torch.tensor(
[[1.7802081, 2.516249, -1.7501148, 1.75, 3.39, 1.65, 1.48],
[8.959413, 2.4567227, -1.6357126, 1.54, 4.01, 1.57, 1.62],
[28.2967, -0.5557558, -1.303325, 1.47, 2.23, 1.48, -1.57],
[26.66902, 21.82302, -1.736057, 1.56, 3.48, 1.4, -1.69],
[31.31978, 8.162144, -1.6217787, 1.74, 3.77, 1.48, 2.79]])
boxes = LiDARInstance3DBoxes.cat([boxes_1, boxes_2])
assert torch.allclose(boxes.tensor, expected_tensor)
# concatenate empty list
empty_boxes = LiDARInstance3DBoxes.cat([])
assert empty_boxes.tensor.shape[0] == 0
assert empty_boxes.tensor.shape[-1] == 7
# test box flip
expected_tensor = torch.tensor(
[[1.7802081, -2.516249, -1.7501148, 1.75, 3.39, 1.65, 1.6615927],
[8.959413, -2.4567227, -1.6357126, 1.54, 4.01, 1.57, 1.5215927],
[28.2967, 0.5557558, -1.303325, 1.47, 2.23, 1.48, 4.7115927],
[26.66902, -21.82302, -1.736057, 1.56, 3.48, 1.4, 4.8315926],
[31.31978, -8.162144, -1.6217787, 1.74, 3.77, 1.48, 0.35159278]])
boxes.flip()
assert torch.allclose(boxes.tensor, expected_tensor)
# test box rotation
expected_tensor = torch.tensor(
[[1.0385344, -2.9020846, -1.7501148, 1.75, 3.39, 1.65, 1.9336663],
[7.969653, -4.774011, -1.6357126, 1.54, 4.01, 1.57, 1.7936664],
[27.405172, -7.0688415, -1.303325, 1.47, 2.23, 1.48, 4.9836664],
[19.823532, -28.187025, -1.736057, 1.56, 3.48, 1.4, 5.1036663],
[27.974297, -16.27845, -1.6217787, 1.74, 3.77, 1.48, 0.6236664]])
boxes.rotate(0.27207362796436096)
assert torch.allclose(boxes.tensor, expected_tensor)
# test box scaling
expected_tensor = torch.tensor([[
1.0443488, -2.9183323, -1.7599131, 1.7597977, 3.4089797, 1.6592377,
1.9336663
],
[
8.014273, -4.8007393, -1.6448704,
1.5486219, 4.0324507, 1.57879,
1.7936664
],
[
27.558605, -7.1084175, -1.310622,
1.4782301, 2.242485, 1.488286,
4.9836664
],
[
19.934517, -28.344835, -1.7457767,
1.5687338, 3.4994833, 1.4078381,
5.1036663
],
[
28.130915, -16.369587, -1.6308585,
1.7497417, 3.791107, 1.488286,
0.6236664
]])
boxes.scale(1.00559866335275)
assert torch.allclose(boxes.tensor, expected_tensor)
# test box translation
expected_tensor = torch.tensor([[
1.1281544, -3.0507944, -1.9169292, 1.7597977, 3.4089797, 1.6592377,
1.9336663
],
[
8.098079, -4.9332013, -1.8018866,
1.5486219, 4.0324507, 1.57879,
1.7936664
],
[
27.64241, -7.2408795, -1.4676381,
1.4782301, 2.242485, 1.488286,
4.9836664
],
[
20.018322, -28.477297, -1.9027928,
1.5687338, 3.4994833, 1.4078381,
5.1036663
],
[
28.21472, -16.502048, -1.7878747,
1.7497417, 3.791107, 1.488286,
0.6236664
]])
boxes.translate([0.0838056, -0.13246193, -0.15701613])
assert torch.allclose(boxes.tensor, expected_tensor)
# test bbox in_range_bev
expected_tensor = torch.tensor([1, 1, 1, 1, 1], dtype=torch.bool)
mask = boxes.in_range_bev([0., -40., 70.4, 40.])
assert (mask == expected_tensor).all()
mask = boxes.nonempty()
assert (mask == expected_tensor).all()
# test bbox in_range
expected_tensor = torch.tensor([1, 1, 0, 0, 0], dtype=torch.bool)
mask = boxes.in_range_3d([0, -20, -2, 22, 2, 5])
assert (mask == expected_tensor).all()
# test bbox indexing
index_boxes = boxes[2:5]
expected_tensor = torch.tensor([[
27.64241, -7.2408795, -1.4676381, 1.4782301, 2.242485, 1.488286,
4.9836664
],
[
20.018322, -28.477297, -1.9027928,
1.5687338, 3.4994833, 1.4078381,
5.1036663
],
[
28.21472, -16.502048, -1.7878747,
1.7497417, 3.791107, 1.488286,
0.6236664
]])
assert len(index_boxes) == 3
assert torch.allclose(index_boxes.tensor, expected_tensor)
index_boxes = boxes[2]
expected_tensor = torch.tensor([[
27.64241, -7.2408795, -1.4676381, 1.4782301, 2.242485, 1.488286,
4.9836664
]])
assert len(index_boxes) == 1
assert torch.allclose(index_boxes.tensor, expected_tensor)
index_boxes = boxes[[2, 4]]
expected_tensor = torch.tensor([[
27.64241, -7.2408795, -1.4676381, 1.4782301, 2.242485, 1.488286,
4.9836664
],
[
28.21472, -16.502048, -1.7878747,
1.7497417, 3.791107, 1.488286,
0.6236664
]])
assert len(index_boxes) == 2
assert torch.allclose(index_boxes.tensor, expected_tensor)
# test iteration
for i, box in enumerate(index_boxes):
torch.allclose(box, expected_tensor[i])
# test properties
assert torch.allclose(boxes.bottom_center, boxes.tensor[:, :3])
expected_tensor = (
boxes.tensor[:, :3] - boxes.tensor[:, 3:6] *
(torch.tensor([0.5, 0.5, 0]) - torch.tensor([0.5, 0.5, 0.5])))
assert torch.allclose(boxes.gravity_center, expected_tensor)
boxes.limit_yaw()
assert (boxes.tensor[:, 6] <= np.pi / 2).all()
assert (boxes.tensor[:, 6] >= -np.pi / 2).all()
Box3DMode.convert(boxes, Box3DMode.LIDAR, Box3DMode.LIDAR)
expected_tesor = boxes.tensor.clone()
assert torch.allclose(expected_tesor, boxes.tensor)
boxes.flip()
boxes.flip()
boxes.limit_yaw()
assert torch.allclose(expected_tesor, boxes.tensor)
# test nearest_bev
expected_tensor = torch.tensor([[-0.5763, -3.9307, 2.8326, -2.1709],
[6.0819, -5.7075, 10.1143, -4.1589],
[26.5212, -7.9800, 28.7637, -6.5018],
[18.2686, -29.2617, 21.7681, -27.6929],
[27.3398, -18.3976, 29.0896, -14.6065]])
# the pytorch print loses some precision
assert torch.allclose(
boxes.nearset_bev, expected_tensor, rtol=1e-4, atol=1e-7)
# obtained by the print of the original implementation
expected_tensor = torch.tensor([[[2.4093e+00, -4.4784e+00, -1.9169e+00],
[2.4093e+00, -4.4784e+00, -2.5769e-01],
[-7.7767e-01, -3.2684e+00, -2.5769e-01],
[-7.7767e-01, -3.2684e+00, -1.9169e+00],
[3.0340e+00, -2.8332e+00, -1.9169e+00],
[3.0340e+00, -2.8332e+00, -2.5769e-01],
[-1.5301e-01, -1.6232e+00, -2.5769e-01],
[-1.5301e-01, -1.6232e+00, -1.9169e+00]],
[[9.8933e+00, -6.1340e+00, -1.8019e+00],
[9.8933e+00, -6.1340e+00, -2.2310e-01],
[5.9606e+00, -5.2427e+00, -2.2310e-01],
[5.9606e+00, -5.2427e+00, -1.8019e+00],
[1.0236e+01, -4.6237e+00, -1.8019e+00],
[1.0236e+01, -4.6237e+00, -2.2310e-01],
[6.3029e+00, -3.7324e+00, -2.2310e-01],
[6.3029e+00, -3.7324e+00, -1.8019e+00]],
[[2.8525e+01, -8.2534e+00, -1.4676e+00],
[2.8525e+01, -8.2534e+00, 2.0648e-02],
[2.6364e+01, -7.6525e+00, 2.0648e-02],
[2.6364e+01, -7.6525e+00, -1.4676e+00],
[2.8921e+01, -6.8292e+00, -1.4676e+00],
[2.8921e+01, -6.8292e+00, 2.0648e-02],
[2.6760e+01, -6.2283e+00, 2.0648e-02],
[2.6760e+01, -6.2283e+00, -1.4676e+00]],
[[2.1337e+01, -2.9870e+01, -1.9028e+00],
[2.1337e+01, -2.9870e+01, -4.9495e-01],
[1.8102e+01, -2.8535e+01, -4.9495e-01],
[1.8102e+01, -2.8535e+01, -1.9028e+00],
[2.1935e+01, -2.8420e+01, -1.9028e+00],
[2.1935e+01, -2.8420e+01, -4.9495e-01],
[1.8700e+01, -2.7085e+01, -4.9495e-01],
[1.8700e+01, -2.7085e+01, -1.9028e+00]],
[[2.6398e+01, -1.7530e+01, -1.7879e+00],
[2.6398e+01, -1.7530e+01, -2.9959e-01],
[2.8612e+01, -1.4452e+01, -2.9959e-01],
[2.8612e+01, -1.4452e+01, -1.7879e+00],
[2.7818e+01, -1.8552e+01, -1.7879e+00],
[2.7818e+01, -1.8552e+01, -2.9959e-01],
[3.0032e+01, -1.5474e+01, -2.9959e-01],
[3.0032e+01, -1.5474e+01, -1.7879e+00]]])
# the pytorch print loses some precision
assert torch.allclose(boxes.corners, expected_tensor, rtol=1e-4, atol=1e-7)
def test_boxes_conversion():
"""Test the conversion of boxes between different modes.
ComandLine:
xdoctest tests/test_box3d.py::test_boxes_conversion zero
"""
lidar_boxes = LiDARInstance3DBoxes(
[[1.7802081, 2.516249, -1.7501148, 1.75, 3.39, 1.65, 1.48],
[8.959413, 2.4567227, -1.6357126, 1.54, 4.01, 1.57, 1.62],
[28.2967, -0.5557558, -1.303325, 1.47, 2.23, 1.48, -1.57],
[26.66902, 21.82302, -1.736057, 1.56, 3.48, 1.4, -1.69],
[31.31978, 8.162144, -1.6217787, 1.74, 3.77, 1.48, 2.79]])
cam_box_tensor = Box3DMode.convert(lidar_boxes.tensor, Box3DMode.LIDAR,
Box3DMode.CAM)
lidar_box_tensor = Box3DMode.convert(cam_box_tensor, Box3DMode.CAM,
Box3DMode.LIDAR)
expected_tensor = torch.tensor(
[[1.7802081, 2.516249, -1.7501148, 1.75, 3.39, 1.65, 1.48],
[8.959413, 2.4567227, -1.6357126, 1.54, 4.01, 1.57, 1.62],
[28.2967, -0.5557558, -1.303325, 1.47, 2.23, 1.48, -1.57],
[26.66902, 21.82302, -1.736057, 1.56, 3.48, 1.4, -1.69],
[31.31978, 8.162144, -1.6217787, 1.74, 3.77, 1.48, 2.79]])
assert torch.allclose(expected_tensor, lidar_box_tensor)
assert torch.allclose(lidar_boxes.tensor, lidar_box_tensor)
depth_box_tensor = Box3DMode.convert(cam_box_tensor, Box3DMode.CAM,
Box3DMode.DEPTH)
depth_to_cam_box_tensor = Box3DMode.convert(depth_box_tensor,
Box3DMode.DEPTH, Box3DMode.CAM)
assert torch.allclose(cam_box_tensor, depth_to_cam_box_tensor)
# test error raise with not supported conversion
with pytest.raises(NotImplementedError):
Box3DMode.convert(lidar_box_tensor, Box3DMode.LIDAR, Box3DMode.DEPTH)
with pytest.raises(NotImplementedError):
Box3DMode.convert(depth_box_tensor, Box3DMode.DEPTH, Box3DMode.LIDAR)
# test similar mode conversion
same_results = Box3DMode.convert(depth_box_tensor, Box3DMode.DEPTH,
Box3DMode.DEPTH)
assert (same_results == depth_box_tensor).all()
# test conversion with a given rt_mat
camera_boxes = CameraInstance3DBoxes(
[[0.06, 1.77, 21.4, 3.2, 1.61, 1.66, -1.54],
[6.59, 1.53, 6.76, 12.78, 3.66, 2.28, 1.55],
[6.71, 1.59, 22.18, 14.73, 3.64, 2.32, 1.59],
[7.11, 1.58, 34.54, 10.04, 3.61, 2.32, 1.61],
[7.78, 1.65, 45.95, 12.83, 3.63, 2.34, 1.64]])
rect = torch.tensor(
[[0.9999239, 0.00983776, -0.00744505, 0.],
[-0.0098698, 0.9999421, -0.00427846, 0.],
[0.00740253, 0.00435161, 0.9999631, 0.], [0., 0., 0., 1.]],
dtype=torch.float32)
Trv2c = torch.tensor(
[[7.533745e-03, -9.999714e-01, -6.166020e-04, -4.069766e-03],
[1.480249e-02, 7.280733e-04, -9.998902e-01, -7.631618e-02],
[9.998621e-01, 7.523790e-03, 1.480755e-02, -2.717806e-01],
[0.000000e+00, 0.000000e+00, 0.000000e+00, 1.000000e+00]],
dtype=torch.float32)
expected_tensor = torch.tensor(
[[
2.16902434e+01, -4.06038554e-02, -1.61906639e+00, 1.65999997e+00,
3.20000005e+00, 1.61000001e+00, -1.53999996e+00
],
[
7.05006905e+00, -6.57459601e+00, -1.60107949e+00, 2.27999997e+00,
1.27799997e+01, 3.66000009e+00, 1.54999995e+00
],
[
2.24698818e+01, -6.69203759e+00, -1.50118145e+00, 2.31999993e+00,
1.47299995e+01, 3.64000010e+00, 1.59000003e+00
],
[
3.48291965e+01, -7.09058388e+00, -1.36622983e+00, 2.31999993e+00,
1.00400000e+01, 3.60999990e+00, 1.61000001e+00
],
[
4.62394617e+01, -7.75838800e+00, -1.32405020e+00, 2.33999991e+00,
1.28299999e+01, 3.63000011e+00, 1.63999999e+00
]],
dtype=torch.float32)
rt_mat = rect @ Trv2c
# test coversion with Box type
cam_to_lidar_box = Box3DMode.convert(camera_boxes, Box3DMode.CAM,
Box3DMode.LIDAR, rt_mat.inverse())
assert torch.allclose(cam_to_lidar_box.tensor, expected_tensor)
lidar_to_cam_box = Box3DMode.convert(cam_to_lidar_box.tensor,
Box3DMode.LIDAR, Box3DMode.CAM,
rt_mat)
assert torch.allclose(lidar_to_cam_box, camera_boxes.tensor)
# test numpy convert
cam_to_lidar_box = Box3DMode.convert(camera_boxes.tensor.numpy(),
Box3DMode.CAM, Box3DMode.LIDAR,
rt_mat.inverse().numpy())
assert np.allclose(cam_to_lidar_box, expected_tensor.numpy())
# test list convert
cam_to_lidar_box = Box3DMode.convert(
camera_boxes.tensor[0].numpy().tolist(), Box3DMode.CAM,
Box3DMode.LIDAR,
rt_mat.inverse().numpy())
assert np.allclose(np.array(cam_to_lidar_box), expected_tensor[0].numpy())
def test_camera_boxes3d():
# Test init with numpy array
np_boxes = np.array(
[[1.7802081, 2.516249, -1.7501148, 1.75, 3.39, 1.65, 1.48],
[8.959413, 2.4567227, -1.6357126, 1.54, 4.01, 1.57, 1.62]],
dtype=np.float32)
boxes_1 = Box3DMode.convert(
LiDARInstance3DBoxes(np_boxes), Box3DMode.LIDAR, Box3DMode.CAM)
assert isinstance(boxes_1, CameraInstance3DBoxes)
cam_np_boxes = Box3DMode.convert(np_boxes, Box3DMode.LIDAR, Box3DMode.CAM)
assert torch.allclose(boxes_1.tensor,
boxes_1.tensor.new_tensor(cam_np_boxes))
# test init with torch.Tensor
th_boxes = torch.tensor(
[[
28.29669987, -0.5557558, -1.30332506, 1.47000003, 2.23000002,
1.48000002, -1.57000005
],
[
26.66901946, 21.82302134, -1.73605708, 1.55999994, 3.48000002,
1.39999998, -1.69000006
],
[
31.31977974, 8.16214412, -1.62177875, 1.74000001, 3.76999998,
1.48000002, 2.78999996
]],
dtype=torch.float32)
cam_th_boxes = Box3DMode.convert(th_boxes, Box3DMode.LIDAR, Box3DMode.CAM)
boxes_2 = CameraInstance3DBoxes(cam_th_boxes)
assert torch.allclose(boxes_2.tensor, cam_th_boxes)
# test clone/to/device
boxes_2 = boxes_2.clone()
boxes_1 = boxes_1.to(boxes_2.device)
# test box concatenation
expected_tensor = Box3DMode.convert(
torch.tensor(
[[1.7802081, 2.516249, -1.7501148, 1.75, 3.39, 1.65, 1.48],
[8.959413, 2.4567227, -1.6357126, 1.54, 4.01, 1.57, 1.62],
[28.2967, -0.5557558, -1.303325, 1.47, 2.23, 1.48, -1.57],
[26.66902, 21.82302, -1.736057, 1.56, 3.48, 1.4, -1.69],
[31.31978, 8.162144, -1.6217787, 1.74, 3.77, 1.48, 2.79]]),
Box3DMode.LIDAR, Box3DMode.CAM)
boxes = CameraInstance3DBoxes.cat([boxes_1, boxes_2])
assert torch.allclose(boxes.tensor, expected_tensor)
# test box flip
expected_tensor = Box3DMode.convert(
torch.tensor(
[[1.7802081, -2.516249, -1.7501148, 1.75, 3.39, 1.65, 1.6615927],
[8.959413, -2.4567227, -1.6357126, 1.54, 4.01, 1.57, 1.5215927],
[28.2967, 0.5557558, -1.303325, 1.47, 2.23, 1.48, 4.7115927],
[26.66902, -21.82302, -1.736057, 1.56, 3.48, 1.4, 4.8315926],
[31.31978, -8.162144, -1.6217787, 1.74, 3.77, 1.48, 0.35159278]]),
Box3DMode.LIDAR, Box3DMode.CAM)
boxes.flip()
assert torch.allclose(boxes.tensor, expected_tensor)
# test box rotation
expected_tensor = Box3DMode.convert(
torch.tensor(
[[1.0385344, -2.9020846, -1.7501148, 1.75, 3.39, 1.65, 1.9336663],
[7.969653, -4.774011, -1.6357126, 1.54, 4.01, 1.57, 1.7936664],
[27.405172, -7.0688415, -1.303325, 1.47, 2.23, 1.48, 4.9836664],
[19.823532, -28.187025, -1.736057, 1.56, 3.48, 1.4, 5.1036663],
[27.974297, -16.27845, -1.6217787, 1.74, 3.77, 1.48, 0.6236664]]),
Box3DMode.LIDAR, Box3DMode.CAM)
boxes.rotate(torch.tensor(0.27207362796436096))
assert torch.allclose(boxes.tensor, expected_tensor)
# test box scaling
expected_tensor = Box3DMode.convert(
torch.tensor([[
1.0443488, -2.9183323, -1.7599131, 1.7597977, 3.4089797, 1.6592377,
1.9336663
],
[
8.014273, -4.8007393, -1.6448704, 1.5486219,
4.0324507, 1.57879, 1.7936664
],
[
27.558605, -7.1084175, -1.310622, 1.4782301,
2.242485, 1.488286, 4.9836664
],
[
19.934517, -28.344835, -1.7457767, 1.5687338,
3.4994833, 1.4078381, 5.1036663
],
[
28.130915, -16.369587, -1.6308585, 1.7497417,
3.791107, 1.488286, 0.6236664
]]), Box3DMode.LIDAR, Box3DMode.CAM)
boxes.scale(1.00559866335275)
assert torch.allclose(boxes.tensor, expected_tensor)
# test box translation
expected_tensor = Box3DMode.convert(
torch.tensor([[
1.1281544, -3.0507944, -1.9169292, 1.7597977, 3.4089797, 1.6592377,
1.9336663
],
[
8.098079, -4.9332013, -1.8018866, 1.5486219,
4.0324507, 1.57879, 1.7936664
],
[
27.64241, -7.2408795, -1.4676381, 1.4782301,
2.242485, 1.488286, 4.9836664
],
[
20.018322, -28.477297, -1.9027928, 1.5687338,
3.4994833, 1.4078381, 5.1036663
],
[
28.21472, -16.502048, -1.7878747, 1.7497417,
3.791107, 1.488286, 0.6236664
]]), Box3DMode.LIDAR, Box3DMode.CAM)
boxes.translate(torch.tensor([0.13246193, 0.15701613, 0.0838056]))
assert torch.allclose(boxes.tensor, expected_tensor)
# test bbox in_range_bev
expected_tensor = torch.tensor([1, 1, 1, 1, 1], dtype=torch.bool)
mask = boxes.in_range_bev([0., -40., 70.4, 40.])
assert (mask == expected_tensor).all()
mask = boxes.nonempty()
assert (mask == expected_tensor).all()
# test bbox in_range
expected_tensor = torch.tensor([1, 1, 0, 0, 0], dtype=torch.bool)
mask = boxes.in_range_3d([-2, -5, 0, 20, 2, 22])
assert (mask == expected_tensor).all()
# test properties
assert torch.allclose(boxes.bottom_center, boxes.tensor[:, :3])
expected_tensor = (
boxes.tensor[:, :3] - boxes.tensor[:, 3:6] *
(torch.tensor([0.5, 1.0, 0.5]) - torch.tensor([0.5, 0.5, 0.5])))
assert torch.allclose(boxes.gravity_center, expected_tensor)
boxes.limit_yaw()
assert (boxes.tensor[:, 6] <= np.pi / 2).all()
assert (boxes.tensor[:, 6] >= -np.pi / 2).all()
Box3DMode.convert(boxes, Box3DMode.LIDAR, Box3DMode.LIDAR)
expected_tesor = boxes.tensor.clone()
assert torch.allclose(expected_tesor, boxes.tensor)
boxes.flip()
boxes.flip()
boxes.limit_yaw()
assert torch.allclose(expected_tesor, boxes.tensor)
# test nearest_bev
# BEV box in lidar coordinates (x, y)
lidar_expected_tensor = torch.tensor(
[[-0.5763, -3.9307, 2.8326, -2.1709],
[6.0819, -5.7075, 10.1143, -4.1589],
[26.5212, -7.9800, 28.7637, -6.5018],
[18.2686, -29.2617, 21.7681, -27.6929],
[27.3398, -18.3976, 29.0896, -14.6065]])
# BEV box in camera coordinate (-y, x)
expected_tensor = lidar_expected_tensor.clone()
expected_tensor[:, 0::2] = -lidar_expected_tensor[:, [3, 1]]
expected_tensor[:, 1::2] = lidar_expected_tensor[:, 0::2]
# the pytorch print loses some precision
assert torch.allclose(
boxes.nearset_bev, expected_tensor, rtol=1e-4, atol=1e-7)
# obtained by the print of the original implementation
expected_tensor = torch.tensor([[[3.2684e+00, 2.5769e-01, -7.7767e-01],
[1.6232e+00, 2.5769e-01, -1.5301e-01],
[1.6232e+00, 1.9169e+00, -1.5301e-01],
[3.2684e+00, 1.9169e+00, -7.7767e-01],
[4.4784e+00, 2.5769e-01, 2.4093e+00],
[2.8332e+00, 2.5769e-01, 3.0340e+00],
[2.8332e+00, 1.9169e+00, 3.0340e+00],
[4.4784e+00, 1.9169e+00, 2.4093e+00]],
[[5.2427e+00, 2.2310e-01, 5.9606e+00],
[3.7324e+00, 2.2310e-01, 6.3029e+00],
[3.7324e+00, 1.8019e+00, 6.3029e+00],
[5.2427e+00, 1.8019e+00, 5.9606e+00],
[6.1340e+00, 2.2310e-01, 9.8933e+00],
[4.6237e+00, 2.2310e-01, 1.0236e+01],
[4.6237e+00, 1.8019e+00, 1.0236e+01],
[6.1340e+00, 1.8019e+00, 9.8933e+00]],
[[7.6525e+00, -2.0648e-02, 2.6364e+01],
[6.2283e+00, -2.0648e-02, 2.6760e+01],
[6.2283e+00, 1.4676e+00, 2.6760e+01],
[7.6525e+00, 1.4676e+00, 2.6364e+01],
[8.2534e+00, -2.0648e-02, 2.8525e+01],
[6.8292e+00, -2.0648e-02, 2.8921e+01],
[6.8292e+00, 1.4676e+00, 2.8921e+01],
[8.2534e+00, 1.4676e+00, 2.8525e+01]],
[[2.8535e+01, 4.9495e-01, 1.8102e+01],
[2.7085e+01, 4.9495e-01, 1.8700e+01],
[2.7085e+01, 1.9028e+00, 1.8700e+01],
[2.8535e+01, 1.9028e+00, 1.8102e+01],
[2.9870e+01, 4.9495e-01, 2.1337e+01],
[2.8420e+01, 4.9495e-01, 2.1935e+01],
[2.8420e+01, 1.9028e+00, 2.1935e+01],
[2.9870e+01, 1.9028e+00, 2.1337e+01]],
[[1.4452e+01, 2.9959e-01, 2.8612e+01],
[1.5474e+01, 2.9959e-01, 3.0032e+01],
[1.5474e+01, 1.7879e+00, 3.0032e+01],
[1.4452e+01, 1.7879e+00, 2.8612e+01],
[1.7530e+01, 2.9959e-01, 2.6398e+01],
[1.8552e+01, 2.9959e-01, 2.7818e+01],
[1.8552e+01, 1.7879e+00, 2.7818e+01],
[1.7530e+01, 1.7879e+00, 2.6398e+01]]])
# the pytorch print loses some precision
assert torch.allclose(boxes.corners, expected_tensor, rtol=1e-4, atol=1e-7)
import numpy as np
from mmdet3d.datasets.pipelines import IndoorFlipData, IndoorGlobalRotScale
def test_indoor_flip_data():
np.random.seed(0)
sunrgbd_indoor_flip_data = IndoorFlipData(1, 1)
sunrgbd_results = dict()
sunrgbd_results['points'] = np.array(
[[1.02828765e+00, 3.65790772e+00, 1.97294697e-01, 1.61959505e+00],
[-3.95979017e-01, 1.05465031e+00, -7.49204338e-01, 6.73096001e-01]])
sunrgbd_results['gt_bboxes_3d'] = np.array([[
0.213684, 1.036364, -0.982323, 0.61541, 0.572574, 0.872728, 3.07028526
],
[
-0.449953, 1.395455,
-1.027778, 1.500956,
1.637298, 0.636364,
-1.58242359
]])
sunrgbd_results = sunrgbd_indoor_flip_data(sunrgbd_results)
sunrgbd_points = sunrgbd_results['points']
sunrgbd_gt_bboxes_3d = sunrgbd_results['gt_bboxes_3d']
expected_sunrgbd_points = np.array(
[[-1.02828765, 3.65790772, 0.1972947, 1.61959505],
[0.39597902, 1.05465031, -0.74920434, 0.673096]])
expected_sunrgbd_gt_bboxes_3d = np.array([[
-0.213684, 1.036364, -0.982323, 0.61541, 0.572574, 0.872728, 0.07130739
], [
0.449953, 1.395455, -1.027778, 1.500956, 1.637298, 0.636364, 4.72401624
]])
assert np.allclose(sunrgbd_points, expected_sunrgbd_points)
assert np.allclose(sunrgbd_gt_bboxes_3d, expected_sunrgbd_gt_bboxes_3d)
np.random.seed(0)
scannet_indoor_flip_data = IndoorFlipData(1, 1)
scannet_results = dict()
scannet_results['points'] = np.array(
[[1.6110241e+00, -1.6903955e-01, 5.8115810e-01, 5.9897250e-01],
[1.3978075e+00, 4.2035791e-01, 3.8729519e-01, 4.0510958e-01]])
scannet_results['gt_bboxes_3d'] = np.array([[
0.55903838, 0.48201692, 0.65688646, 0.65370704, 0.60029864, 0.5163464
], [
-0.03226406, 1.70392646, 0.60348618, 0.65165804, 0.72084366, 0.64667457
]])
scannet_results = scannet_indoor_flip_data(scannet_results)
scannet_points = scannet_results['points']
scannet_gt_bboxes_3d = scannet_results['gt_bboxes_3d']
expected_scannet_points = np.array(
[[-1.6110241, 0.16903955, 0.5811581, 0.5989725],
[-1.3978075, -0.42035791, 0.38729519, 0.40510958]])
expected_scannet_gt_bboxes_3d = np.array([[
-0.55903838, -0.48201692, 0.65688646, 0.65370704, 0.60029864, 0.5163464
], [
0.03226406, -1.70392646, 0.60348618, 0.65165804, 0.72084366, 0.64667457
]])
assert np.allclose(scannet_points, expected_scannet_points)
assert np.allclose(scannet_gt_bboxes_3d, expected_scannet_gt_bboxes_3d)
def test_global_rot_scale():
np.random.seed(0)
sunrgbd_augment = IndoorGlobalRotScale(
True, rot_range=[-np.pi / 6, np.pi / 6], scale_range=[0.85, 1.15])
sunrgbd_results = dict()
sunrgbd_results['points'] = np.array(
[[1.02828765e+00, 3.65790772e+00, 1.97294697e-01, 1.61959505e+00],
[-3.95979017e-01, 1.05465031e+00, -7.49204338e-01, 6.73096001e-01]])
sunrgbd_results['gt_bboxes_3d'] = np.array([[
0.213684, 1.036364, -0.982323, 0.61541, 0.572574, 0.872728, 3.07028526
],
[
-0.449953, 1.395455,
-1.027778, 1.500956,
1.637298, 0.636364,
-1.58242359
]])
sunrgbd_results = sunrgbd_augment(sunrgbd_results)
sunrgbd_points = sunrgbd_results['points']
sunrgbd_gt_bboxes_3d = sunrgbd_results['gt_bboxes_3d']
expected_sunrgbd_points = np.array(
[[0.89427376, 3.94489646, 0.21003141, 1.72415094],
[-0.47835783, 1.09972989, -0.79757058, 0.71654893]])
expected_sunrgbd_gt_bboxes_3d = np.array([[
0.17080999, 1.11345031, -1.04573864, 0.65513891, 0.60953755,
0.92906854, 3.01916788
],
[
-0.55427876, 1.45912611,
-1.09412807, 1.59785293,
1.74299674, 0.67744563,
-1.63354097
]])
assert np.allclose(sunrgbd_points, expected_sunrgbd_points)
assert np.allclose(sunrgbd_gt_bboxes_3d, expected_sunrgbd_gt_bboxes_3d)
np.random.seed(0)
scannet_augment = IndoorGlobalRotScale(
True, rot_range=[-np.pi * 1 / 36, np.pi * 1 / 36], scale_range=None)
scannet_results = dict()
scannet_results['points'] = np.array(
[[1.6110241e+00, -1.6903955e-01, 5.8115810e-01, 5.9897250e-01],
[1.3978075e+00, 4.2035791e-01, 3.8729519e-01, 4.0510958e-01]])
scannet_results['gt_bboxes_3d'] = np.array([[
0.55903838, 0.48201692, 0.65688646, 0.65370704, 0.60029864, 0.5163464
], [
-0.03226406, 1.70392646, 0.60348618, 0.65165804, 0.72084366, 0.64667457
]])
scannet_results = scannet_augment(scannet_results)
scannet_points = scannet_results['points']
scannet_gt_bboxes_3d = scannet_results['gt_bboxes_3d']
expected_scannet_points = np.array(
[[1.61240576, -0.15530836, 0.5811581, 0.5989725],
[1.39417555, 0.43225122, 0.38729519, 0.40510958]])
expected_scannet_gt_bboxes_3d = np.array([[
0.55491157, 0.48676213, 0.65688646, 0.65879754, 0.60584609, 0.5163464
], [
-0.04677942, 1.70358975, 0.60348618, 0.65777559, 0.72636927, 0.64667457
]])
assert np.allclose(scannet_points, expected_scannet_points)
assert np.allclose(scannet_gt_bboxes_3d, expected_scannet_gt_bboxes_3d)
import os.path as osp
import mmcv
import numpy as np
from mmdet3d.datasets.pipelines import (IndoorLoadAnnotations3D,
IndoorLoadPointsFromFile)
def test_indoor_load_points_from_file():
sunrgbd_info = mmcv.load('./tests/data/sunrgbd/sunrgbd_infos.pkl')
sunrgbd_load_points_from_file = IndoorLoadPointsFromFile(True, 6)
sunrgbd_results = dict()
data_path = './tests/data/sunrgbd/sunrgbd_trainval'
sunrgbd_info = sunrgbd_info[0]
scan_name = sunrgbd_info['point_cloud']['lidar_idx']
sunrgbd_results['pts_filename'] = osp.join(data_path, 'lidar',
f'{scan_name:06d}.npy')
sunrgbd_results = sunrgbd_load_points_from_file(sunrgbd_results)
sunrgbd_point_cloud = sunrgbd_results['points']
assert sunrgbd_point_cloud.shape == (100, 4)
scannet_info = mmcv.load('./tests/data/scannet/scannet_infos.pkl')
scannet_load_data = IndoorLoadPointsFromFile(True)
scannet_results = dict()
data_path = './tests/data/scannet/scannet_train_instance_data'
scannet_results['data_path'] = data_path
scannet_info = scannet_info[0]
scan_name = scannet_info['point_cloud']['lidar_idx']
scannet_results['pts_filename'] = osp.join(data_path,
f'{scan_name}_vert.npy')
scannet_results = scannet_load_data(scannet_results)
scannet_point_cloud = scannet_results['points']
assert scannet_point_cloud.shape == (100, 4)
def test_load_annotations3D():
sunrgbd_info = mmcv.load('./tests/data/sunrgbd/sunrgbd_infos.pkl')[0]
if sunrgbd_info['annos']['gt_num'] != 0:
sunrgbd_gt_bboxes_3d = sunrgbd_info['annos']['gt_boxes_upright_depth']
sunrgbd_gt_labels = sunrgbd_info['annos']['class'].reshape(-1, 1)
sunrgbd_gt_bboxes_3d_mask = np.ones_like(sunrgbd_gt_labels)
else:
sunrgbd_gt_bboxes_3d = np.zeros((1, 6), dtype=np.float32)
sunrgbd_gt_labels = np.zeros((1, 1))
sunrgbd_gt_bboxes_3d_mask = np.zeros((1, 1))
assert sunrgbd_gt_bboxes_3d.shape == (3, 7)
assert sunrgbd_gt_labels.shape == (3, 1)
assert sunrgbd_gt_bboxes_3d_mask.shape == (3, 1)
scannet_info = mmcv.load('./tests/data/scannet/scannet_infos.pkl')[0]
scannet_load_annotations3D = IndoorLoadAnnotations3D()
scannet_results = dict()
data_path = './tests/data/scannet/scannet_train_instance_data'
if scannet_info['annos']['gt_num'] != 0:
scannet_gt_bboxes_3d = scannet_info['annos']['gt_boxes_upright_depth']
scannet_gt_labels = scannet_info['annos']['class'].reshape(-1, 1)
scannet_gt_bboxes_3d_mask = np.ones_like(scannet_gt_labels)
else:
scannet_gt_bboxes_3d = np.zeros((1, 6), dtype=np.float32)
scannet_gt_labels = np.zeros((1, 1))
scannet_gt_bboxes_3d_mask = np.zeros((1, 1))
scan_name = scannet_info['point_cloud']['lidar_idx']
scannet_results['pts_instance_mask_path'] = osp.join(
data_path, f'{scan_name}_ins_label.npy')
scannet_results['pts_semantic_mask_path'] = osp.join(
data_path, f'{scan_name}_sem_label.npy')
scannet_results['info'] = scannet_info
scannet_results['gt_bboxes_3d'] = scannet_gt_bboxes_3d
scannet_results['gt_labels'] = scannet_gt_labels
scannet_results['gt_bboxes_3d_mask'] = scannet_gt_bboxes_3d_mask
scannet_results = scannet_load_annotations3D(scannet_results)
scannet_gt_boxes = scannet_results['gt_bboxes_3d']
scannet_gt_lbaels = scannet_results['gt_labels']
scannet_gt_boxes_mask = scannet_results['gt_bboxes_3d_mask']
scannet_pts_instance_mask = scannet_results['pts_instance_mask']
scannet_pts_semantic_mask = scannet_results['pts_semantic_mask']
assert scannet_gt_boxes.shape == (27, 6)
assert scannet_gt_lbaels.shape == (27, 1)
assert scannet_gt_boxes_mask.shape == (27, 1)
assert scannet_pts_instance_mask.shape == (100, )
assert scannet_pts_semantic_mask.shape == (100, )
import numpy as np
from mmdet3d.datasets.pipelines.indoor_sample import PointSample
from mmdet3d.datasets.pipelines import IndoorPointSample
def test_indoor_sample():
np.random.seed(0)
scannet_sample_points = PointSample(5)
scannet_sample_points = IndoorPointSample(5)
scannet_results = dict()
scannet_points = np.array([[1.0719866, -0.7870435, 0.8408122, 0.9196809],
[1.103661, 0.81065744, 2.6616862, 2.7405548],
......@@ -24,11 +24,9 @@ def test_indoor_sample():
scannet_pts_semantic_mask = np.array([38, 1, 1, 40, 0, 40, 1, 1, 1, 0])
scannet_results['pts_semantic_mask'] = scannet_pts_semantic_mask
scannet_results = scannet_sample_points(scannet_results)
scannet_points_result = scannet_results.get('points', None)
scannet_instance_labels_result = scannet_results.get(
'pts_instance_mask', None)
scannet_semantic_labels_result = scannet_results.get(
'pts_semantic_mask', None)
scannet_points_result = scannet_results['points']
scannet_instance_labels_result = scannet_results['pts_instance_mask']
scannet_semantic_labels_result = scannet_results['pts_semantic_mask']
scannet_choices = np.array([2, 8, 4, 9, 1])
assert np.allclose(scannet_points[scannet_choices], scannet_points_result)
assert np.all(scannet_pts_instance_mask[scannet_choices] ==
......@@ -37,7 +35,7 @@ def test_indoor_sample():
scannet_semantic_labels_result)
np.random.seed(0)
sunrgbd_sample_points = PointSample(5)
sunrgbd_sample_points = IndoorPointSample(5)
sunrgbd_results = dict()
sunrgbd_point_cloud = np.array(
[[-1.8135729e-01, 1.4695230e+00, -1.2780589e+00, 7.8938007e-03],
......@@ -53,6 +51,6 @@ def test_indoor_sample():
sunrgbd_results['points'] = sunrgbd_point_cloud
sunrgbd_results = sunrgbd_sample_points(sunrgbd_results)
sunrgbd_choices = np.array([2, 8, 4, 9, 1])
sunrgbd_points_result = sunrgbd_results.get('points', None)
sunrgbd_points_result = sunrgbd_results['points']
assert np.allclose(sunrgbd_point_cloud[sunrgbd_choices],
sunrgbd_points_result)
Markdown is supported
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment