Commit e9029c0e authored by zhangwenwei's avatar zhangwenwei
Browse files

Merge branch 'merge_rot_scale' into 'master'

Merge rot scale

See merge request open-mmlab/mmdet.3d!82
parents 5a1575a0 92ae69fb
from mmdet.datasets.pipelines import Compose
from .dbsampler import DataBaseSampler
from .formating import DefaultFormatBundle, DefaultFormatBundle3D
from .indoor_augment import (IndoorFlipData, IndoorGlobalRotScaleTrans,
IndoorPointsColorJitter)
from .indoor_loading import (LoadAnnotations3D, LoadPointsFromFile,
NormalizePointsColor)
from .indoor_sample import IndoorPointSample
from .loading import LoadMultiViewImageFromFiles
from .point_seg_class_mapping import PointSegClassMapping
from .formating import Collect3D, DefaultFormatBundle, DefaultFormatBundle3D
from .loading import (LoadAnnotations3D, LoadMultiViewImageFromFiles,
LoadPointsFromFile, NormalizePointsColor,
PointSegClassMapping)
from .test_time_aug import MultiScaleFlipAug3D
from .transforms_3d import (GlobalRotScaleTrans, ObjectNoise,
ObjectRangeFilter, ObjectSample, PointShuffle,
PointsRangeFilter, RandomFlip3D)
from .transforms_3d import (GlobalRotScaleTrans, IndoorPointSample,
ObjectNoise, ObjectRangeFilter, ObjectSample,
PointShuffle, PointsRangeFilter, RandomFlip3D)
__all__ = [
'ObjectSample', 'RandomFlip3D', 'ObjectNoise', 'GlobalRotScaleTrans',
'PointShuffle', 'ObjectRangeFilter', 'PointsRangeFilter', 'Collect3D',
'Compose', 'LoadMultiViewImageFromFiles', 'LoadPointsFromFile',
'DefaultFormatBundle', 'DefaultFormatBundle3D', 'DataBaseSampler',
'IndoorGlobalRotScaleTrans', 'IndoorPointsColorJitter', 'IndoorFlipData',
'NormalizePointsColor', 'LoadAnnotations3D', 'IndoorPointSample',
'PointSegClassMapping', 'MultiScaleFlipAug3D'
]
......@@ -58,7 +58,6 @@ class DataBaseSampler(object):
data_root,
rate,
prepare,
object_rot_range,
sample_groups,
classes=None):
super().__init__()
......@@ -66,7 +65,6 @@ class DataBaseSampler(object):
self.info_path = info_path
self.rate = rate
self.prepare = prepare
self.object_rot_range = object_rot_range
self.classes = classes
self.cat2label = {name: i for i, name in enumerate(classes)}
self.label2cat = {i: name for i, name in enumerate(classes)}
......@@ -103,11 +101,6 @@ class DataBaseSampler(object):
self.sampler_dict = {}
for k, v in self.group_db_infos.items():
self.sampler_dict[k] = BatchSampler(v, k, shuffle=True)
self.object_rot_range = object_rot_range
self.object_rot_enable = np.abs(self.object_rot_range[0] -
self.object_rot_range[1]) >= 1e-3
# TODO: No group_sampling currently
@staticmethod
......@@ -183,11 +176,6 @@ class DataBaseSampler(object):
info['path']) if self.data_root else info['path']
s_points = np.fromfile(
file_path, dtype=np.float32).reshape([-1, 4])
if 'rot_transform' in info:
rot = info['rot_transform']
s_points[:, :3] = box_np_ops.rotation_points_single_angle(
s_points[:, :3], rot, axis=2)
s_points[:, :3] += info['box3d_lidar'][:3]
count += 1
......@@ -219,24 +207,7 @@ class DataBaseSampler(object):
gt_bboxes[:, 0:2], gt_bboxes[:, 3:5], gt_bboxes[:, 6])
sp_boxes = np.stack([i['box3d_lidar'] for i in sampled], axis=0)
valid_mask = np.zeros([gt_bboxes.shape[0]], dtype=np.bool_)
valid_mask = np.concatenate(
[valid_mask,
np.ones([sp_boxes.shape[0]], dtype=np.bool_)], axis=0)
boxes = np.concatenate([gt_bboxes, sp_boxes], axis=0).copy()
if self.object_rot_enable:
assert False, 'This part needs to be checked'
# place samples to any place in a circle.
# TODO: rm it if not needed
data_augment_utils.noise_per_object_v3_(
boxes,
None,
valid_mask,
0,
0,
self._global_rot_range,
num_try=100)
sp_boxes_new = boxes[gt_bboxes.shape[0]:]
sp_boxes_bv = box_np_ops.center_to_corner_box2d(
......@@ -253,11 +224,5 @@ class DataBaseSampler(object):
coll_mat[i] = False
coll_mat[:, i] = False
else:
if self.object_rot_enable:
assert False, 'This part needs to be checked'
sampled[i - num_gt]['box3d_lidar'][:2] = boxes[i, :2]
sampled[i - num_gt]['box3d_lidar'][-1] = boxes[i, -1]
sampled[i - num_gt]['rot_transform'] = (
boxes[i, -1] - sp_boxes[i - num_gt, -1])
valid_samples.append(sampled[i - num_gt])
return valid_samples
......@@ -74,7 +74,8 @@ class Collect3D(object):
def __init__(self,
keys,
meta_keys=('filename', 'ori_shape', 'img_shape', 'lidar2img',
'pad_shape', 'scale_factor', 'flip', 'pcd_flip',
'pad_shape', 'scale_factor', 'flip',
'pcd_horizontal_flip', 'pcd_vertical_flip',
'box_mode_3d', 'box_type_3d', 'img_norm_cfg',
'rect', 'Trv2c', 'P2', 'pcd_trans', 'sample_idx',
'pcd_scale_factor', 'pcd_rotation')):
......
import numpy as np
from mmdet.datasets.builder import PIPELINES
@PIPELINES.register_module()
class IndoorFlipData(object):
"""Indoor flip data.
Flip point cloud and ground truth boxes.
The point cloud will be flipped along the yz plane
and the xz plane with a certain probability.
Args:
flip_ratio_yz (float): Probability of being flipped along yz plane.
Default: 0.5.
flip_ratio_xz (float): Probability of being flipped along xz plane.
Default: 0.5.
"""
def __init__(self, flip_ratio_yz=0.5, flip_ratio_xz=0.5):
self.flip_ratio_yz = flip_ratio_yz
self.flip_ratio_xz = flip_ratio_xz
def __call__(self, results):
points = results['points']
gt_bboxes_3d = results['gt_bboxes_3d']
results['flip_yz'] = False
results['flip_xz'] = False
if np.random.random() < self.flip_ratio_yz:
# Flipping along the YZ plane
points[:, 0] = -1 * points[:, 0]
gt_bboxes_3d.flip('horizontal')
results['flip_yz'] = True
if not gt_bboxes_3d.with_yaw and np.random.random(
) < self.flip_ratio_xz:
# Flipping along the XZ plane
points[:, 1] = -1 * points[:, 1]
gt_bboxes_3d.flip('vertical')
results['flip_xz'] = True
results['points'] = points
results['gt_bboxes_3d'] = gt_bboxes_3d
return results
def __repr__(self):
repr_str = self.__class__.__name__
repr_str += '(flip_ratio_yz={})'.format(self.flip_ratio_yz)
repr_str += '(flip_ratio_xz={})'.format(self.flip_ratio_xz)
return repr_str
@PIPELINES.register_module()
class IndoorPointsColorJitter(object):
"""Indoor points color jitter.
Randomly change the brightness and color of the point cloud, and
drop out the points' colors with a certain range and probability.
Args:
color_mean (list[float]): Mean color of the point cloud.
Default: [0.5, 0.5, 0.5].
bright_range (list[float]): Range of brightness.
Default: [0.8, 1.2].
color_shift_range (list[float]): Range of color shift.
Default: [0.95, 1.05].
jitter_range (list[float]): Range of jittering.
Default: [-0.025, 0.025].
drop_prob (float): Probability to drop out points' color.
Default: 0.3
"""
def __init__(self,
color_mean=[0.5, 0.5, 0.5],
bright_range=[0.8, 1.2],
color_shift_range=[0.95, 1.05],
jitter_range=[-0.025, 0.025],
drop_prob=0.3):
self.color_mean = color_mean
self.bright_range = bright_range
self.color_shift_range = color_shift_range
self.jitter_range = jitter_range
self.drop_prob = drop_prob
def __call__(self, results):
points = results['points']
assert points.shape[1] >= 6, \
f'Expect points have channel >=6, got {points.shape[1]}.'
rgb_color = points[:, 3:6] + self.color_mean
# brightness change for each channel
rgb_color *= np.random.uniform(self.bright_range[0],
self.bright_range[1], 3)
# color shift for each channel
rgb_color += np.random.uniform(self.color_shift_range[0],
self.color_shift_range[1], 3)
# jittering on each pixel
rgb_color += np.expand_dims(
np.random.uniform(self.jitter_range[0], self.jitter_range[1]), -1)
rgb_color = np.clip(rgb_color, 0, 1)
# randomly drop out points' colors
rgb_color *= np.expand_dims(
np.random.random(points.shape[0]) > self.drop_prob, -1)
points[:, 3:6] = rgb_color - self.color_mean
results['points'] = points
return results
def __repr__(self):
repr_str = self.__class__.__name__
repr_str += '(color_mean={})'.format(self.color_mean)
repr_str += '(bright_range={})'.format(self.bright_range)
repr_str += '(color_shift_range={})'.format(self.color_shift_range)
repr_str += '(jitter_range={})'.format(self.jitter_range)
repr_str += '(drop_prob={})'.format(self.drop_prob)
# TODO: merge outdoor indoor transform.
# TODO: try transform noise.
@PIPELINES.register_module()
class IndoorGlobalRotScaleTrans(object):
"""Indoor global rotate and scale.
Augment sunrgbd and scannet data with global rotating and scaling.
Args:
shift_height (bool): Whether to use height.
Default: True.
rot_range (list[float]): Range of rotation.
Default: None.
scale_range (list[float]): Range of scale.
Default: None.
"""
def __init__(self, shift_height=True, rot_range=None, scale_range=None):
self.shift_height = shift_height
self.rot_range = np.pi * np.array(rot_range)
self.scale_range = scale_range
def _rotz(self, t):
"""Rotate About Z.
Rotation about the z-axis.
Args:
t (float): Angle of rotation.
Returns:
rot_mat (ndarray): Matrix of rotation.
"""
c = np.cos(t)
s = np.sin(t)
rot_mat = np.array([[c, -s, 0], [s, c, 0], [0, 0, 1]])
return rot_mat
def __call__(self, results):
points = results['points']
gt_bboxes_3d = results['gt_bboxes_3d']
if self.rot_range is not None:
assert len(self.rot_range) == 2, \
f'Expect length of rot range =2, ' \
f'got {len(self.rot_range)}.'
rot_angle = np.random.uniform(self.rot_range[0], self.rot_range[1])
if gt_bboxes_3d.tensor.shape[0] != 0:
gt_bboxes_3d.rotate(rot_angle)
points[:, :3] = np.dot(points[:, :3], self._rotz(rot_angle).T)
results['rot_angle'] = rot_angle
if self.scale_range is not None:
assert len(self.scale_range) == 2, \
f'Expect length of scale range =2, ' \
f'got {len(self.scale_range)}.'
# Augment point cloud scale
scale_ratio = np.random.uniform(self.scale_range[0],
self.scale_range[1])
points[:, :3] *= scale_ratio
gt_bboxes_3d.scale(scale_ratio)
if self.shift_height:
points[:, -1] *= scale_ratio
results['scale_ratio'] = scale_ratio
results['points'] = points
results['gt_bboxes_3d'] = gt_bboxes_3d
return results
def __repr__(self):
repr_str = self.__class__.__name__
repr_str += '(shift_height={})'.format(self.shift_height)
repr_str += '(rot_range={})'.format(self.rot_range)
repr_str += '(scale_range={})'.format(self.scale_range)
return repr_str
import mmcv
import numpy as np
from mmdet.datasets.builder import PIPELINES
from mmdet.datasets.pipelines import LoadAnnotations
@PIPELINES.register_module()
class NormalizePointsColor(object):
"""Normalize color of points
Normalize color of the points.
Args:
color_mean (list[float]): Mean color of the point cloud.
"""
def __init__(self, color_mean):
self.color_mean = color_mean
def __call__(self, results):
points = results['points']
assert points.shape[1] >= 6,\
f'Expect points have channel >=6, got {points.shape[1]}'
points[:, 3:6] = points[:, 3:6] - np.array(self.color_mean) / 256.0
results['points'] = points
return results
def __repr__(self):
repr_str = self.__class__.__name__
repr_str += '(color_mean={})'.format(self.color_mean)
return repr_str
@PIPELINES.register_module()
class LoadPointsFromFile(object):
"""Load Points From File.
Load sunrgbd and scannet points from file.
Args:
shift_height (bool): Whether to use shifted height.
load_dim (int): The dimension of the loaded points.
Default: 6.
use_dim (list[int]): Which dimensions of the points to be used.
Default: [0, 1, 2]. For KITTI dataset, set use_dim=4
or use_dim=[0, 1, 2, 3] to use the intensity dimension
file_client_args (dict): Config dict of file clients, refer to
https://github.com/open-mmlab/mmcv/blob/master/mmcv/fileio/file_client.py
for more details.
"""
def __init__(self,
load_dim=6,
use_dim=[0, 1, 2],
shift_height=False,
file_client_args=dict(backend='disk')):
self.shift_height = shift_height
if isinstance(use_dim, int):
use_dim = list(range(use_dim))
assert max(use_dim) < load_dim, \
f'Expect all used dimensions < {load_dim}, got {use_dim}'
self.load_dim = load_dim
self.use_dim = use_dim
self.file_client_args = file_client_args.copy()
self.file_client = None
def _load_points(self, pts_filename):
if self.file_client is None:
self.file_client = mmcv.FileClient(**self.file_client_args)
try:
pts_bytes = self.file_client.get(pts_filename)
points = np.frombuffer(pts_bytes, dtype=np.float32)
except ConnectionError:
mmcv.check_file_exist(pts_filename)
if pts_filename.endswith('.npy'):
points = np.load(pts_filename)
else:
points = np.fromfile(pts_filename, dtype=np.float32)
return points
def __call__(self, results):
pts_filename = results['pts_filename']
points = self._load_points(pts_filename)
points = points.reshape(-1, self.load_dim)
points = points[:, self.use_dim]
if self.shift_height:
floor_height = np.percentile(points[:, 2], 0.99)
height = points[:, 2] - floor_height
points = np.concatenate([points, np.expand_dims(height, 1)], 1)
results['points'] = points
return results
def __repr__(self):
repr_str = self.__class__.__name__
repr_str += '(shift_height={})'.format(self.shift_height)
repr_str += '(mean_color={})'.format(self.color_mean)
repr_str += '(load_dim={})'.format(self.load_dim)
repr_str += '(use_dim={})'.format(self.use_dim)
return repr_str
@PIPELINES.register_module()
class LoadAnnotations3D(LoadAnnotations):
"""Load Annotations3D.
Load instance mask and semantic mask of points and
encapsulate the items into related fields.
Args:
with_bbox_3d (bool, optional): Whether to load 3D boxes.
Defaults to True.
with_label_3d (bool, optional): Whether to load 3D labels.
Defaults to True.
with_mask_3d (bool, optional): Whether to load 3D instance masks.
for points. Defaults to False.
with_seg_3d (bool, optional): Whether to load 3D semantic masks.
for points. Defaults to False.
with_bbox (bool, optional): Whether to load 2D boxes.
Defaults to False.
with_label (bool, optional): Whether to load 2D labels.
Defaults to False.
with_mask (bool, optional): Whether to load 2D instance masks.
Defaults to False.
with_seg (bool, optional): Whether to load 2D semantic masks.
Defaults to False.
poly2mask (bool, optional): Whether to convert polygon annotations
to bitmasks. Defaults to True.
file_client_args (dict): Config dict of file clients, refer to
https://github.com/open-mmlab/mmcv/blob/master/mmcv/fileio/file_client.py
for more details.
"""
def __init__(self,
with_bbox_3d=True,
with_label_3d=True,
with_mask_3d=False,
with_seg_3d=False,
with_bbox=False,
with_label=False,
with_mask=False,
with_seg=False,
poly2mask=True,
file_client_args=dict(backend='disk')):
super().__init__(
with_bbox,
with_label,
with_mask,
with_seg,
poly2mask,
file_client_args=file_client_args)
self.with_bbox_3d = with_bbox_3d
self.with_label_3d = with_label_3d
self.with_mask_3d = with_mask_3d
self.with_seg_3d = with_seg_3d
def _load_bboxes_3d(self, results):
results['gt_bboxes_3d'] = results['ann_info']['gt_bboxes_3d']
results['bbox3d_fields'].append('gt_bboxes_3d')
return results
def _load_labels_3d(self, results):
results['gt_labels_3d'] = results['ann_info']['gt_labels_3d']
return results
def _load_masks_3d(self, results):
pts_instance_mask_path = results['ann_info']['pts_instance_mask_path']
if self.file_client is None:
self.file_client = mmcv.FileClient(**self.file_client_args)
try:
mask_bytes = self.file_client.get(pts_instance_mask_path)
pts_instance_mask = np.frombuffer(mask_bytes, dtype=np.int)
except ConnectionError:
mmcv.check_file_exist(pts_instance_mask_path)
pts_instance_mask = np.fromfile(
pts_instance_mask_path, dtype=np.long)
results['pts_instance_mask'] = pts_instance_mask
results['pts_mask_fields'].append('pts_instance_mask')
return results
def _load_semantic_seg_3d(self, results):
pts_semantic_mask_path = results['ann_info']['pts_semantic_mask_path']
if self.file_client is None:
self.file_client = mmcv.FileClient(**self.file_client_args)
try:
mask_bytes = self.file_client.get(pts_semantic_mask_path)
# add .copy() to fix read-only bug
pts_semantic_mask = np.frombuffer(mask_bytes, dtype=np.int).copy()
except ConnectionError:
mmcv.check_file_exist(pts_semantic_mask_path)
pts_semantic_mask = np.fromfile(
pts_semantic_mask_path, dtype=np.long)
results['pts_semantic_mask'] = pts_semantic_mask
results['pts_seg_fields'].append('pts_semantic_mask')
return results
def __call__(self, results):
results = super().__call__(results)
if self.with_bbox_3d:
results = self._load_bboxes_3d(results)
if results is None:
return None
if self.with_label_3d:
results = self._load_labels_3d(results)
if self.with_mask_3d:
results = self._load_masks_3d(results)
if self.with_seg_3d:
results = self._load_semantic_seg_3d(results)
return results
def __repr__(self):
indent_str = ' '
repr_str = self.__class__.__name__ + '(\n'
repr_str += f'{indent_str}with_bbox_3d={self.with_bbox_3d},\n'
repr_str += f'{indent_str}with_label_3d={self.with_label_3d},\n'
repr_str += f'{indent_str}with_mask_3d={self.with_mask_3d},\n'
repr_str += f'{indent_str}with_seg_3d={self.with_seg_3d},\n'
repr_str += f'{indent_str}with_bbox={self.with_bbox},\n'
repr_str += f'{indent_str}with_label={self.with_label},\n'
repr_str += f'{indent_str}with_mask={self.with_mask},\n'
repr_str += f'{indent_str}with_seg={self.with_seg},\n'
repr_str += f'{indent_str}poly2mask={self.poly2mask})'
return repr_str
import numpy as np
from mmdet.datasets.builder import PIPELINES
@PIPELINES.register_module()
class IndoorPointSample(object):
"""Indoor point sample.
Sampling data to a certain number.
Args:
name (str): Name of the dataset.
num_points (int): Number of points to be sampled.
"""
def __init__(self, num_points):
self.num_points = num_points
def points_random_sampling(self,
points,
num_samples,
replace=None,
return_choices=False):
"""Points random sampling.
Sample points to a certain number.
Args:
points (ndarray): 3D Points.
num_samples (int): Number of samples to be sampled.
replace (bool): Whether the sample is with or without replacement.
return_choices (bool): Whether return choice.
Returns:
points (ndarray): 3D Points.
choices (ndarray): The generated random samples.
"""
if replace is None:
replace = (points.shape[0] < num_samples)
choices = np.random.choice(
points.shape[0], num_samples, replace=replace)
if return_choices:
return points[choices], choices
else:
return points[choices]
def __call__(self, results):
points = results['points']
points, choices = self.points_random_sampling(
points, self.num_points, return_choices=True)
pts_instance_mask = results.get('pts_instance_mask', None)
pts_semantic_mask = results.get('pts_semantic_mask', None)
results['points'] = points
if pts_instance_mask is not None and pts_semantic_mask is not None:
pts_instance_mask = pts_instance_mask[choices]
pts_semantic_mask = pts_semantic_mask[choices]
results['pts_instance_mask'] = pts_instance_mask
results['pts_semantic_mask'] = pts_semantic_mask
return results
def __repr__(self):
repr_str = self.__class__.__name__
repr_str += '(num_points={})'.format(self.num_points)
return repr_str
......@@ -2,6 +2,7 @@ import mmcv
import numpy as np
from mmdet.datasets.builder import PIPELINES
from mmdet.datasets.pipelines import LoadAnnotations
@PIPELINES.register_module()
......@@ -103,3 +104,263 @@ class LoadPointsFromMultiSweeps(object):
def __repr__(self):
return f'{self.__class__.__name__}(sweeps_num={self.sweeps_num})'
@PIPELINES.register_module()
class PointSegClassMapping(object):
"""Map original semantic class to valid category ids.
Map valid classes as 0~len(valid_cat_ids)-1 and
others as len(valid_cat_ids).
Args:
valid_cat_ids (tuple[int): A tuple of valid category.
"""
def __init__(self, valid_cat_ids):
self.valid_cat_ids = valid_cat_ids
def __call__(self, results):
assert 'pts_semantic_mask' in results
pts_semantic_mask = results['pts_semantic_mask']
neg_cls = len(self.valid_cat_ids)
for i in range(pts_semantic_mask.shape[0]):
if pts_semantic_mask[i] in self.valid_cat_ids:
converted_id = self.valid_cat_ids.index(pts_semantic_mask[i])
pts_semantic_mask[i] = converted_id
else:
pts_semantic_mask[i] = neg_cls
results['pts_semantic_mask'] = pts_semantic_mask
return results
def __repr__(self):
repr_str = self.__class__.__name__
repr_str += '(valid_cat_ids={})'.format(self.valid_cat_ids)
return repr_str
@PIPELINES.register_module()
class NormalizePointsColor(object):
"""Normalize color of points
Normalize color of the points.
Args:
color_mean (list[float]): Mean color of the point cloud.
"""
def __init__(self, color_mean):
self.color_mean = color_mean
def __call__(self, results):
points = results['points']
assert points.shape[1] >= 6,\
f'Expect points have channel >=6, got {points.shape[1]}'
points[:, 3:6] = points[:, 3:6] - np.array(self.color_mean) / 256.0
results['points'] = points
return results
def __repr__(self):
repr_str = self.__class__.__name__
repr_str += '(color_mean={})'.format(self.color_mean)
return repr_str
@PIPELINES.register_module()
class LoadPointsFromFile(object):
"""Load Points From File.
Load sunrgbd and scannet points from file.
Args:
shift_height (bool): Whether to use shifted height.
load_dim (int): The dimension of the loaded points.
Default: 6.
use_dim (list[int]): Which dimensions of the points to be used.
Default: [0, 1, 2]. For KITTI dataset, set use_dim=4
or use_dim=[0, 1, 2, 3] to use the intensity dimension
file_client_args (dict): Config dict of file clients, refer to
https://github.com/open-mmlab/mmcv/blob/master/mmcv/fileio/file_client.py
for more details.
"""
def __init__(self,
load_dim=6,
use_dim=[0, 1, 2],
shift_height=False,
file_client_args=dict(backend='disk')):
self.shift_height = shift_height
if isinstance(use_dim, int):
use_dim = list(range(use_dim))
assert max(use_dim) < load_dim, \
f'Expect all used dimensions < {load_dim}, got {use_dim}'
self.load_dim = load_dim
self.use_dim = use_dim
self.file_client_args = file_client_args.copy()
self.file_client = None
def _load_points(self, pts_filename):
if self.file_client is None:
self.file_client = mmcv.FileClient(**self.file_client_args)
try:
pts_bytes = self.file_client.get(pts_filename)
points = np.frombuffer(pts_bytes, dtype=np.float32)
except ConnectionError:
mmcv.check_file_exist(pts_filename)
if pts_filename.endswith('.npy'):
points = np.load(pts_filename)
else:
points = np.fromfile(pts_filename, dtype=np.float32)
return points
def __call__(self, results):
pts_filename = results['pts_filename']
points = self._load_points(pts_filename)
points = points.reshape(-1, self.load_dim)
points = points[:, self.use_dim]
if self.shift_height:
floor_height = np.percentile(points[:, 2], 0.99)
height = points[:, 2] - floor_height
points = np.concatenate([points, np.expand_dims(height, 1)], 1)
results['points'] = points
return results
def __repr__(self):
repr_str = self.__class__.__name__
repr_str += '(shift_height={})'.format(self.shift_height)
repr_str += '(mean_color={})'.format(self.color_mean)
repr_str += '(load_dim={})'.format(self.load_dim)
repr_str += '(use_dim={})'.format(self.use_dim)
return repr_str
@PIPELINES.register_module()
class LoadAnnotations3D(LoadAnnotations):
"""Load Annotations3D.
Load instance mask and semantic mask of points and
encapsulate the items into related fields.
Args:
with_bbox_3d (bool, optional): Whether to load 3D boxes.
Defaults to True.
with_label_3d (bool, optional): Whether to load 3D labels.
Defaults to True.
with_mask_3d (bool, optional): Whether to load 3D instance masks.
for points. Defaults to False.
with_seg_3d (bool, optional): Whether to load 3D semantic masks.
for points. Defaults to False.
with_bbox (bool, optional): Whether to load 2D boxes.
Defaults to False.
with_label (bool, optional): Whether to load 2D labels.
Defaults to False.
with_mask (bool, optional): Whether to load 2D instance masks.
Defaults to False.
with_seg (bool, optional): Whether to load 2D semantic masks.
Defaults to False.
poly2mask (bool, optional): Whether to convert polygon annotations
to bitmasks. Defaults to True.
file_client_args (dict): Config dict of file clients, refer to
https://github.com/open-mmlab/mmcv/blob/master/mmcv/fileio/file_client.py
for more details.
"""
def __init__(self,
with_bbox_3d=True,
with_label_3d=True,
with_mask_3d=False,
with_seg_3d=False,
with_bbox=False,
with_label=False,
with_mask=False,
with_seg=False,
poly2mask=True,
file_client_args=dict(backend='disk')):
super().__init__(
with_bbox,
with_label,
with_mask,
with_seg,
poly2mask,
file_client_args=file_client_args)
self.with_bbox_3d = with_bbox_3d
self.with_label_3d = with_label_3d
self.with_mask_3d = with_mask_3d
self.with_seg_3d = with_seg_3d
def _load_bboxes_3d(self, results):
results['gt_bboxes_3d'] = results['ann_info']['gt_bboxes_3d']
results['bbox3d_fields'].append('gt_bboxes_3d')
return results
def _load_labels_3d(self, results):
results['gt_labels_3d'] = results['ann_info']['gt_labels_3d']
return results
def _load_masks_3d(self, results):
pts_instance_mask_path = results['ann_info']['pts_instance_mask_path']
if self.file_client is None:
self.file_client = mmcv.FileClient(**self.file_client_args)
try:
mask_bytes = self.file_client.get(pts_instance_mask_path)
pts_instance_mask = np.frombuffer(mask_bytes, dtype=np.int)
except ConnectionError:
mmcv.check_file_exist(pts_instance_mask_path)
pts_instance_mask = np.fromfile(
pts_instance_mask_path, dtype=np.long)
results['pts_instance_mask'] = pts_instance_mask
results['pts_mask_fields'].append('pts_instance_mask')
return results
def _load_semantic_seg_3d(self, results):
pts_semantic_mask_path = results['ann_info']['pts_semantic_mask_path']
if self.file_client is None:
self.file_client = mmcv.FileClient(**self.file_client_args)
try:
mask_bytes = self.file_client.get(pts_semantic_mask_path)
# add .copy() to fix read-only bug
pts_semantic_mask = np.frombuffer(mask_bytes, dtype=np.int).copy()
except ConnectionError:
mmcv.check_file_exist(pts_semantic_mask_path)
pts_semantic_mask = np.fromfile(
pts_semantic_mask_path, dtype=np.long)
results['pts_semantic_mask'] = pts_semantic_mask
results['pts_seg_fields'].append('pts_semantic_mask')
return results
def __call__(self, results):
results = super().__call__(results)
if self.with_bbox_3d:
results = self._load_bboxes_3d(results)
if results is None:
return None
if self.with_label_3d:
results = self._load_labels_3d(results)
if self.with_mask_3d:
results = self._load_masks_3d(results)
if self.with_seg_3d:
results = self._load_semantic_seg_3d(results)
return results
def __repr__(self):
indent_str = ' '
repr_str = self.__class__.__name__ + '(\n'
repr_str += f'{indent_str}with_bbox_3d={self.with_bbox_3d},\n'
repr_str += f'{indent_str}with_label_3d={self.with_label_3d},\n'
repr_str += f'{indent_str}with_mask_3d={self.with_mask_3d},\n'
repr_str += f'{indent_str}with_seg_3d={self.with_seg_3d},\n'
repr_str += f'{indent_str}with_bbox={self.with_bbox},\n'
repr_str += f'{indent_str}with_label={self.with_label},\n'
repr_str += f'{indent_str}with_mask={self.with_mask},\n'
repr_str += f'{indent_str}with_seg={self.with_seg},\n'
repr_str += f'{indent_str}poly2mask={self.poly2mask})'
return repr_str
from mmdet.datasets.builder import PIPELINES
@PIPELINES.register_module()
class PointSegClassMapping(object):
"""Map original semantic class to valid category ids.
Map valid classes as 0~len(valid_cat_ids)-1 and
others as len(valid_cat_ids).
Args:
valid_cat_ids (tuple[int): A tuple of valid category.
"""
def __init__(self, valid_cat_ids):
self.valid_cat_ids = valid_cat_ids
def __call__(self, results):
assert 'pts_semantic_mask' in results
pts_semantic_mask = results['pts_semantic_mask']
neg_cls = len(self.valid_cat_ids)
for i in range(pts_semantic_mask.shape[0]):
if pts_semantic_mask[i] in self.valid_cat_ids:
converted_id = self.valid_cat_ids.index(pts_semantic_mask[i])
pts_semantic_mask[i] = converted_id
else:
pts_semantic_mask[i] = neg_cls
results['pts_semantic_mask'] = pts_semantic_mask
return results
def __repr__(self):
repr_str = self.__class__.__name__
repr_str += '(valid_cat_ids={})'.format(self.valid_cat_ids)
return repr_str
......@@ -21,35 +21,67 @@ class RandomFlip3D(RandomFlip):
images. If True, it will apply the same flip as that to 2D images.
If False, it will decide whether to flip randomly and independently
to that of 2D images.
flip_ratio (float, optional): The flipping probability.
flip_ratio_bev_horizontal (float, optional): The flipping probability
in horizontal direction.
flip_ratio_bev_vertical (float, optional): The flipping probability
in vertical direction.
"""
def __init__(self, sync_2d=True, **kwargs):
super(RandomFlip3D, self).__init__(**kwargs)
def __init__(self,
sync_2d=True,
flip_ratio_bev_horizontal=0.0,
flip_ratio_bev_vertical=0.0,
**kwargs):
super(RandomFlip3D, self).__init__(
flip_ratio=flip_ratio_bev_horizontal, **kwargs)
self.sync_2d = sync_2d
def random_flip_data_3d(self, input_dict):
input_dict['points'][:, 1] = -input_dict['points'][:, 1]
self.flip_ratio_bev_vertical = flip_ratio_bev_vertical
if flip_ratio_bev_horizontal is not None:
assert isinstance(
flip_ratio_bev_horizontal,
(int, float)) and 0 <= flip_ratio_bev_horizontal <= 1
if flip_ratio_bev_vertical is not None:
assert isinstance(
flip_ratio_bev_vertical,
(int, float)) and 0 <= flip_ratio_bev_vertical <= 1
def random_flip_data_3d(self, input_dict, direction='horizontal'):
assert direction in ['horizontal', 'vertical']
for key in input_dict['bbox3d_fields']:
input_dict[key].flip()
input_dict['points'] = input_dict[key].flip(
direction, points=input_dict['points'])
def __call__(self, input_dict):
# filp 2D image and its annotations
super(RandomFlip3D, self).__call__(input_dict)
if self.sync_2d:
input_dict['pcd_flip'] = input_dict['flip']
input_dict['pcd_horizontal_flip'] = input_dict['flip']
input_dict['pcd_vertical_flip'] = False
else:
flip = True if np.random.rand() < self.flip_ratio else False
input_dict['pcd_flip'] = flip
if input_dict['pcd_flip']:
self.random_flip_data_3d(input_dict)
if 'pcd_horizontal_flip' not in input_dict:
flip_horizontal = True if np.random.rand(
) < self.flip_ratio else False
input_dict['pcd_horizontal_flip'] = flip_horizontal
if 'pcd_vertical_flip' not in input_dict:
flip_vertical = True if np.random.rand(
) < self.flip_ratio_bev_vertical else False
input_dict['pcd_vertical_flip'] = flip_vertical
if input_dict['pcd_horizontal_flip']:
self.random_flip_data_3d(input_dict, 'horizontal')
if input_dict['pcd_vertical_flip']:
self.random_flip_data_3d(input_dict, 'vertical')
return input_dict
def __repr__(self):
return self.__class__.__name__ + '(flip_ratio={}, sync_2d={})'.format(
self.flip_ratio, self.sync_2d)
repr_str = self.__class__.__name__
repr_str += '(sync_2d={},'.format(self.sync_2d)
repr_str += '(flip_ratio_bev_horizontal={},'.format(
self.flip_ratio_bev_horizontal)
repr_str += '(flip_ratio_bev_vertical={},'.format(
self.flip_ratio_bev_vertical)
return repr_str
@PIPELINES.register_module()
......@@ -195,15 +227,19 @@ class GlobalRotScaleTrans(object):
noise. This apply random translation to a scene by a noise, which
is sampled from a gaussian distribution whose standard deviation
is set by ``translation_std``. Default to [0, 0, 0]
shift_height (bool): whether to shift height
(the fourth dimension of indoor points) when scaling.
"""
def __init__(self,
rot_range=[-0.78539816, 0.78539816],
scale_ratio_range=[0.95, 1.05],
translation_std=[0, 0, 0]):
translation_std=[0, 0, 0],
shift_height=False):
self.rot_range = rot_range
self.scale_ratio_range = scale_ratio_range
self.translation_std = translation_std
self.shift_height = shift_height
def _trans_bbox_points(self, input_dict):
if not isinstance(self.translation_std, (list, tuple, np.ndarray)):
......@@ -227,18 +263,19 @@ class GlobalRotScaleTrans(object):
rotation = [-rotation, rotation]
noise_rotation = np.random.uniform(rotation[0], rotation[1])
points = input_dict['points']
points[:, :3], rot_mat_T = box_np_ops.rotation_points_single_angle(
points[:, :3], noise_rotation, axis=2)
input_dict['points'] = points
input_dict['pcd_rotation'] = rot_mat_T
for key in input_dict['bbox3d_fields']:
input_dict[key].rotate(noise_rotation)
if len(input_dict[key].tensor) != 0:
points, rot_mat_T = input_dict[key].rotate(
noise_rotation, input_dict['points'])
input_dict['points'] = points
input_dict['pcd_rotation'] = rot_mat_T
def _scale_bbox_points(self, input_dict):
scale = input_dict['pcd_scale_factor']
input_dict['points'][:, :3] *= scale
if self.shift_height:
input_dict['points'][:, -1] *= scale
for key in input_dict['bbox3d_fields']:
input_dict[key].scale(scale)
......@@ -262,6 +299,7 @@ class GlobalRotScaleTrans(object):
repr_str += '(rot_range={},'.format(self.rot_range)
repr_str += ' scale_ratio_range={},'.format(self.scale_ratio_range)
repr_str += ' translation_std={})'.format(self.translation_std)
repr_str += ' shift_height={})'.format(self.shift_height)
return repr_str
......@@ -283,23 +321,6 @@ class ObjectRangeFilter(object):
self.pcd_range = np.array(point_cloud_range, dtype=np.float32)
self.bev_range = self.pcd_range[[0, 1, 3, 4]]
@staticmethod
def filter_gt_box_outside_range(gt_bboxes_3d, limit_range):
"""remove gtbox outside training range.
this function should be applied after other prep functions
Args:
gt_bboxes_3d ([type]): [description]
limit_range ([type]): [description]
"""
gt_bboxes_3d_bv = box_np_ops.center_to_corner_box2d(
gt_bboxes_3d[:, [0, 1]], gt_bboxes_3d[:, [3, 3 + 1]],
gt_bboxes_3d[:, 6])
bounding_box = box_np_ops.minmax_to_corner_2d(
np.asarray(limit_range)[np.newaxis, ...])
ret = box_np_ops.points_in_convex_polygon_jit(
gt_bboxes_3d_bv.reshape(-1, 2), bounding_box)
return np.any(ret.reshape(-1, 4), axis=1)
def __call__(self, input_dict):
gt_bboxes_3d = input_dict['gt_bboxes_3d']
gt_labels_3d = input_dict['gt_labels_3d']
......@@ -371,3 +392,67 @@ class ObjectNameFilter(object):
repr_str = self.__class__.__name__
repr_str += f'(classes={self.classes})'
return repr_str
@PIPELINES.register_module()
class IndoorPointSample(object):
"""Indoor point sample.
Sampling data to a certain number.
Args:
name (str): Name of the dataset.
num_points (int): Number of points to be sampled.
"""
def __init__(self, num_points):
self.num_points = num_points
def points_random_sampling(self,
points,
num_samples,
replace=None,
return_choices=False):
"""Points random sampling.
Sample points to a certain number.
Args:
points (ndarray): 3D Points.
num_samples (int): Number of samples to be sampled.
replace (bool): Whether the sample is with or without replacement.
return_choices (bool): Whether return choice.
Returns:
points (ndarray): 3D Points.
choices (ndarray): The generated random samples.
"""
if replace is None:
replace = (points.shape[0] < num_samples)
choices = np.random.choice(
points.shape[0], num_samples, replace=replace)
if return_choices:
return points[choices], choices
else:
return points[choices]
def __call__(self, results):
points = results['points']
points, choices = self.points_random_sampling(
points, self.num_points, return_choices=True)
pts_instance_mask = results.get('pts_instance_mask', None)
pts_semantic_mask = results.get('pts_semantic_mask', None)
results['points'] = points
if pts_instance_mask is not None and pts_semantic_mask is not None:
pts_instance_mask = pts_instance_mask[choices]
pts_semantic_mask = pts_semantic_mask[choices]
results['pts_instance_mask'] = pts_instance_mask
results['pts_semantic_mask'] = pts_semantic_mask
return results
def __repr__(self):
repr_str = self.__class__.__name__
repr_str += '(num_points={})'.format(self.num_points)
return repr_str
......@@ -56,9 +56,22 @@ class Base3DRoIHead(nn.Module, metaclass=ABCMeta):
gt_bboxes,
gt_labels,
gt_bboxes_ignore=None,
gt_masks=None,
**kwargs):
"""Forward function during training"""
"""Forward function during training
Args:
x (dict): Contains features from the first stage.
img_metas (list[dict]): Meta info of each image.
proposal_list (list[dict]): Proposal information from rpn.
gt_bboxes (list[:obj:BaseInstance3DBoxes]):
GT bboxes of each sample. The bboxes are encapsulated
by 3D box structures.
gt_labels (list[LongTensor]): GT labels of each sample.
gt_bboxes_ignore (list[Tensor], optional): Specify which bounding.
Returns:
dict: losses from each head.
"""
pass
def simple_test(self,
......
......@@ -231,6 +231,15 @@ class PartA2BboxHead(nn.Module):
normal_init(self.conv_reg[-1].conv, mean=0, std=0.001)
def forward(self, seg_feats, part_feats):
"""Forward pass.
Args:
seg_feats (torch.Tensor): Point-wise semantic features.
part_feats (torch.Tensor): Point-wise part prediction features.
Returns:
tuple[torch.Tensor]: Score of class and bbox predictions.
"""
# (B * N, out_x, out_y, out_z, 4)
rcnn_batch_size = part_feats.shape[0]
......@@ -273,6 +282,22 @@ class PartA2BboxHead(nn.Module):
def loss(self, cls_score, bbox_pred, rois, labels, bbox_targets,
pos_gt_bboxes, reg_mask, label_weights, bbox_weights):
"""Coumputing losses.
Args:
cls_score (Torch.tensor): Scores of each roi.
bbox_pred (Torch.tensor): Predictions of bboxes.
rois (Torch.tensor): Roi bboxes.
labels (Torch.tensor): Labels of class.
bbox_targets (Torch.tensor): Target of positive bboxes.
pos_gt_bboxes (Torch.tensor): Gt of positive bboxes.
reg_mask (Torch.tensor): Mask for positive bboxes.
label_weights (Torch.tensor): Weights of class loss.
bbox_weights (Torch.tensor): Weights of bbox loss.
Returns:
dict: Computed losses.
"""
losses = dict()
rcnn_batch_size = cls_score.shape[0]
......@@ -325,6 +350,17 @@ class PartA2BboxHead(nn.Module):
return losses
def get_targets(self, sampling_results, rcnn_train_cfg, concat=True):
"""Generate targets.
Args:
sampling_results (list[:obj:SamplingResult]):
Sampled results from rois.
rcnn_train_cfg (ConfigDict): Training config of rcnn.
concat (bool): Whether to concatenate targets between batches.
Returns:
tuple: Targets of boxes and class prediction.
"""
pos_bboxes_list = [res.pos_bboxes for res in sampling_results]
pos_gt_bboxes_list = [res.pos_gt_bboxes for res in sampling_results]
iou_list = [res.iou for res in sampling_results]
......@@ -444,6 +480,20 @@ class PartA2BboxHead(nn.Module):
class_pred,
img_metas,
cfg=None):
"""Generate bboxes from bbox head predictions.
Args:
rois (torch.Tensor): Roi bboxes.
cls_score (torch.Tensor): Scores of bboxes.
bbox_pred (torch.Tensor): Bbox predictions
class_labels (torch.Tensor): Label of classes
class_pred (torch.Tensor): Score for nms.
img_metas (list[dict]): Contain pcd and img's meta info.
cfg (ConfigDict): Testing config.
Returns:
list[tuple]: Decoded bbox, scores and labels after nms.
"""
roi_batch_id = rois[..., 0]
roi_boxes = rois[..., 1:] # boxes without batch id
batch_size = int(roi_batch_id.max().item() + 1)
......
......@@ -50,6 +50,15 @@ class PointwiseSemanticHead(nn.Module):
self.loss_part = build_loss(loss_part)
def forward(self, x):
"""Forward pass.
Args:
x (torch.Tensor): Features from the first stage.
Returns:
dict: part features, segmentation and part predictions.
"""
seg_preds = self.seg_cls_layer(x) # (N, 1)
part_preds = self.seg_reg_layer(x) # (N, 3)
......
......@@ -10,7 +10,17 @@ from .base_3droi_head import Base3DRoIHead
@HEADS.register_module()
class PartAggregationROIHead(Base3DRoIHead):
"""Part aggregation roi head for PartA2"""
"""Part aggregation roi head for PartA2
Args:
semantic_head (ConfigDict): Config of semantic head.
num_classes (int): The number of classes.
seg_roi_extractor (ConfigDict): Config of seg_roi_extractor.
part_roi_extractor (ConfigDict): Config of part_roi_extractor.
bbox_head (ConfigDict): Config of bbox_head.
train_cfg (ConfigDict): Training config.
test_cfg (ConfigDict): Testing config.
"""
def __init__(self,
semantic_head,
......@@ -156,6 +166,18 @@ class PartAggregationROIHead(Base3DRoIHead):
return bbox_results
def _bbox_forward(self, seg_feats, part_feats, voxels_dict, rois):
"""Forward function of roi_extractor and bbox_head.
Args:
seg_feats (torch.Tensor): Point-wise semantic features.
part_feats (torch.Tensor): Point-wise part prediction features.
voxels_dict (dict): Contains information of voxels.
rois (Tensor): Roi boxes.
Returns:
dict: Contains predictions of bbox_head and
features of roi_extractor.
"""
pooled_seg_feats = self.seg_roi_extractor(seg_feats,
voxels_dict['voxel_centers'],
voxels_dict['coors'][..., 0],
......
......@@ -25,12 +25,16 @@ def test_getitem():
with_mask_3d=True,
with_seg_3d=True),
dict(type='IndoorPointSample', num_points=5),
dict(type='IndoorFlipData', flip_ratio_yz=1.0, flip_ratio_xz=1.0),
dict(
type='IndoorGlobalRotScaleTrans',
shift_height=True,
rot_range=[-1 / 36, 1 / 36],
scale_range=None),
type='RandomFlip3D',
sync_2d=False,
flip_ratio_bev_horizontal=1.0,
flip_ratio_bev_vertical=1.0),
dict(
type='GlobalRotScaleTrans',
rot_range=[-0.087266, 0.087266],
scale_ratio_range=[1.0, 1.0],
shift_height=True),
dict(type='DefaultFormatBundle3D', class_names=class_names),
dict(
type='Collect3D',
......@@ -38,9 +42,7 @@ def test_getitem():
'points', 'gt_bboxes_3d', 'gt_labels_3d', 'pts_semantic_mask',
'pts_instance_mask'
],
meta_keys=[
'file_name', 'flip_xz', 'flip_yz', 'sample_idx', 'rot_angle'
]),
meta_keys=['file_name', 'sample_idx', 'pcd_rotation']),
]
scannet_dataset = ScanNetDataset(root_path, ann_file, pipelines)
......@@ -51,28 +53,24 @@ def test_getitem():
pts_semantic_mask = data['pts_semantic_mask']._data
pts_instance_mask = data['pts_instance_mask']._data
file_name = data['img_metas']._data['file_name']
flip_xz = data['img_metas']._data['flip_xz']
flip_yz = data['img_metas']._data['flip_yz']
rot_angle = data['img_metas']._data['rot_angle']
pcd_rotation = data['img_metas']._data['pcd_rotation']
sample_idx = data['img_metas']._data['sample_idx']
assert file_name == './tests/data/scannet/' \
'points/scene0000_00.bin'
assert flip_xz is True
assert flip_yz is True
assert abs(rot_angle - (-0.005471397477913809)) < 1e-5
expected_rotation = np.array([[0.99654, 0.08311407, 0.],
[-0.08311407, 0.99654, 0.], [0., 0., 1.]])
assert file_name == './tests/data/scannet/points/scene0000_00.bin'
assert np.allclose(pcd_rotation, expected_rotation, 1e-3)
assert sample_idx == 'scene0000_00'
expected_points = np.array(
[[-2.9078157, -1.9569951, 2.3543026, 2.389488],
[-0.71360034, -3.4359822, 2.1330001, 2.1681855],
[-1.332374, 1.474838, -0.04405887, -0.00887359],
[2.1336637, -1.3265059, -0.02880373, 0.00638155],
[0.43895668, -3.0259454, 1.5560012, 1.5911865]])
expected_points = torch.tensor([[-2.7231, -2.2068, 2.3543, 2.3895],
[-0.4065, -3.4857, 2.1330, 2.1682],
[-1.4578, 1.3510, -0.0441, -0.0089],
[2.2428, -1.1323, -0.0288, 0.0064],
[0.7052, -2.9752, 1.5560, 1.5912]])
expected_gt_bboxes_3d = torch.tensor(
[[-1.5005, -3.5126, 1.5704, 1.7457, 0.2415, 0.5724, 0.0000],
[-2.8849, 3.4962, 1.1911, 0.6617, 0.1743, 0.6715, 0.0000],
[-1.1586, -2.1924, 0.0093, 0.5557, 2.5376, 1.2145, 0.0000],
[-2.9305, -2.4856, 0.8288, 0.6270, 1.8462, 0.2870, 0.0000],
[3.3115, -0.0048, -0.0090, 0.4619, 3.8605, 2.1603, 0.0000]])
[[-1.1835, -3.6317, 1.5704, 1.7577, 0.3761, 0.5724, 0.0000],
[-3.1832, 3.2269, 1.1911, 0.6727, 0.2251, 0.6715, 0.0000],
[-0.9598, -2.2864, 0.0093, 0.7506, 2.5709, 1.2145, 0.0000],
[-2.6988, -2.7354, 0.8288, 0.7680, 1.8877, 0.2870, 0.0000],
[3.2989, 0.2885, -0.0090, 0.7600, 3.8814, 2.1603, 0.0000]])
expected_gt_labels = np.array([
6, 6, 4, 9, 11, 11, 10, 0, 15, 17, 17, 17, 3, 12, 4, 4, 14, 1, 0, 0, 0,
0, 0, 0, 5, 5, 5
......@@ -82,7 +80,7 @@ def test_getitem():
original_classes = scannet_dataset.CLASSES
assert scannet_dataset.CLASSES == class_names
assert np.allclose(points, expected_points)
assert torch.allclose(points, expected_points, 1e-2)
assert gt_bboxes_3d.tensor[:5].shape == (5, 7)
assert torch.allclose(gt_bboxes_3d.tensor[:5], expected_gt_bboxes_3d, 1e-2)
assert np.all(gt_labels.numpy() == expected_gt_labels)
......
......@@ -17,20 +17,24 @@ def test_getitem():
load_dim=6,
use_dim=[0, 1, 2]),
dict(type='LoadAnnotations3D'),
dict(type='IndoorFlipData', flip_ratio_yz=1.0),
dict(
type='IndoorGlobalRotScaleTrans',
shift_height=True,
rot_range=[-1 / 6, 1 / 6],
scale_range=[0.85, 1.15]),
type='RandomFlip3D',
sync_2d=False,
flip_ratio_bev_horizontal=0.5,
),
dict(
type='GlobalRotScaleTrans',
rot_range=[-0.523599, 0.523599],
scale_ratio_range=[0.85, 1.15],
shift_height=True),
dict(type='IndoorPointSample', num_points=5),
dict(type='DefaultFormatBundle3D', class_names=class_names),
dict(
type='Collect3D',
keys=['points', 'gt_bboxes_3d', 'gt_labels_3d'],
meta_keys=[
'file_name', 'flip_xz', 'flip_yz', 'sample_idx', 'scale_ratio',
'rot_angle'
'file_name', 'pcd_horizontal_flip', 'sample_idx',
'pcd_scale_factor', 'pcd_rotation'
]),
]
......@@ -40,32 +44,32 @@ def test_getitem():
gt_bboxes_3d = data['gt_bboxes_3d']._data
gt_labels_3d = data['gt_labels_3d']._data
file_name = data['img_metas']._data['file_name']
flip_xz = data['img_metas']._data['flip_xz']
flip_yz = data['img_metas']._data['flip_yz']
scale_ratio = data['img_metas']._data['scale_ratio']
rot_angle = data['img_metas']._data['rot_angle']
pcd_horizontal_flip = data['img_metas']._data['pcd_horizontal_flip']
pcd_scale_factor = data['img_metas']._data['pcd_scale_factor']
pcd_rotation = data['img_metas']._data['pcd_rotation']
sample_idx = data['img_metas']._data['sample_idx']
assert file_name == './tests/data/sunrgbd' \
'/points/000001.bin'
assert flip_xz is False
assert flip_yz is True
assert abs(scale_ratio - 1.0308290128214932) < 1e-5
assert abs(rot_angle - 0.22534577750874518) < 1e-5
pcd_rotation_expected = np.array([[0.99889565, 0.04698427, 0.],
[-0.04698427, 0.99889565, 0.],
[0., 0., 1.]])
assert file_name == './tests/data/sunrgbd/points/000001.bin'
assert pcd_horizontal_flip is False
assert abs(pcd_scale_factor - 0.9770964398016714) < 1e-5
assert np.allclose(pcd_rotation, pcd_rotation_expected, 1e-3)
assert sample_idx == 1
expected_points = np.array([[0.6512, 1.5781, 0.0710, 0.0499],
[0.6473, 1.5701, 0.0657, 0.0447],
[0.6464, 1.5635, 0.0826, 0.0616],
[0.6453, 1.5603, 0.0849, 0.0638],
[0.6488, 1.5786, 0.0461, 0.0251]])
expected_points = torch.tensor([[-0.9904, 1.2596, 0.1105, 0.0905],
[-0.9948, 1.2758, 0.0437, 0.0238],
[-0.9866, 1.2641, 0.0504, 0.0304],
[-0.9915, 1.2586, 0.1265, 0.1065],
[-0.9890, 1.2561, 0.1216, 0.1017]])
expected_gt_bboxes_3d = torch.tensor(
[[-2.0125, 3.9473, -1.2696, 2.3730, 1.9458, 2.0303, 1.2206],
[-3.7037, 4.2396, -1.3126, 0.6032, 0.9104, 1.0033, 1.2663],
[0.6529, 2.1638, -1.2370, 0.7348, 1.6113, 2.1694, 2.8140]])
[[0.8308, 4.1168, -1.2035, 2.2493, 1.8444, 1.9245, 1.6486],
[2.3002, 4.8149, -1.2442, 0.5718, 0.8629, 0.9510, 1.6030],
[-1.1477, 1.8090, -1.1725, 0.6965, 1.5273, 2.0563, 0.0552]])
expected_gt_labels = np.array([0, 7, 6])
original_classes = sunrgbd_dataset.CLASSES
assert np.allclose(points, expected_points, 1e-2)
assert np.allclose(gt_bboxes_3d.tensor, expected_gt_bboxes_3d, 1e-3)
assert torch.allclose(points, expected_points, 1e-2)
assert torch.allclose(gt_bboxes_3d.tensor, expected_gt_bboxes_3d, 1e-3)
assert np.all(gt_labels_3d.numpy() == expected_gt_labels)
assert original_classes == class_names
......
import numpy as np
import torch
from mmdet3d.core.bbox import DepthInstance3DBoxes
from mmdet3d.datasets.pipelines import (IndoorFlipData,
IndoorGlobalRotScaleTrans)
def test_indoor_flip_data():
np.random.seed(0)
sunrgbd_indoor_flip_data = IndoorFlipData(1, 1)
sunrgbd_results = dict()
sunrgbd_results['points'] = np.array(
[[1.02828765e+00, 3.65790772e+00, 1.97294697e-01, 1.61959505e+00],
[-3.95979017e-01, 1.05465031e+00, -7.49204338e-01, 6.73096001e-01]])
sunrgbd_results['gt_bboxes_3d'] = DepthInstance3DBoxes(
np.array([[
0.213684, 1.036364, -0.982323, 0.61541, 0.572574, 0.872728,
3.07028526
],
[
-0.449953, 1.395455, -1.027778, 1.500956, 1.637298,
0.636364, -1.58242359
]]))
sunrgbd_results = sunrgbd_indoor_flip_data(sunrgbd_results)
sunrgbd_points = sunrgbd_results['points']
sunrgbd_gt_bboxes_3d = sunrgbd_results['gt_bboxes_3d']
expected_sunrgbd_points = np.array(
[[-1.02828765, 3.65790772, 0.1972947, 1.61959505],
[0.39597902, 1.05465031, -0.74920434, 0.673096]])
expected_sunrgbd_gt_bboxes_3d = torch.tensor(
[[-0.2137, 1.0364, -0.9823, 0.6154, 0.5726, 0.8727, 0.0713],
[0.4500, 1.3955, -1.0278, 1.5010, 1.6373, 0.6364, 4.7240]])
assert np.allclose(sunrgbd_points, expected_sunrgbd_points)
assert torch.allclose(sunrgbd_gt_bboxes_3d.tensor,
expected_sunrgbd_gt_bboxes_3d, 1e-3)
np.random.seed(0)
scannet_indoor_flip_data = IndoorFlipData(1, 1)
scannet_results = dict()
scannet_results['points'] = np.array(
[[1.6110241e+00, -1.6903955e-01, 5.8115810e-01, 5.9897250e-01],
[1.3978075e+00, 4.2035791e-01, 3.8729519e-01, 4.0510958e-01]])
scannet_results['gt_bboxes_3d'] = DepthInstance3DBoxes(
np.array([[
0.55903838, 0.48201692, 0.65688646, 0.65370704, 0.60029864,
0.5163464
],
[
-0.03226406, 1.70392646, 0.60348618, 0.65165804,
0.72084366, 0.64667457
]]),
box_dim=6,
with_yaw=False)
scannet_results = scannet_indoor_flip_data(scannet_results)
scannet_points = scannet_results['points']
scannet_gt_bboxes_3d = scannet_results['gt_bboxes_3d']
expected_scannet_points = np.array(
[[-1.6110241, 0.16903955, 0.5811581, 0.5989725],
[-1.3978075, -0.42035791, 0.38729519, 0.40510958]])
expected_scannet_gt_bboxes_3d = torch.tensor(
[[-0.5590, -0.4820, 0.6569, 0.6537, 0.6003, 0.5163, 0.0000],
[0.0323, -1.7039, 0.6035, 0.6517, 0.7208, 0.6467, 0.0000]])
assert np.allclose(scannet_points, expected_scannet_points)
assert torch.allclose(scannet_gt_bboxes_3d.tensor,
expected_scannet_gt_bboxes_3d, 1e-2)
def test_global_rot_scale():
np.random.seed(0)
sunrgbd_augment = IndoorGlobalRotScaleTrans(
True, rot_range=[-1 / 6, 1 / 6], scale_range=[0.85, 1.15])
sunrgbd_results = dict()
sunrgbd_results['points'] = np.array(
[[1.02828765e+00, 3.65790772e+00, 1.97294697e-01, 1.61959505e+00],
[-3.95979017e-01, 1.05465031e+00, -7.49204338e-01, 6.73096001e-01]])
sunrgbd_results['gt_bboxes_3d'] = DepthInstance3DBoxes(
np.array([[
0.213684, 1.036364, -0.982323, 0.61541, 0.572574, 0.872728,
3.07028526
],
[
-0.449953, 1.395455, -1.027778, 1.500956, 1.637298,
0.636364, -1.58242359
]]))
sunrgbd_results = sunrgbd_augment(sunrgbd_results)
sunrgbd_points = sunrgbd_results['points']
sunrgbd_gt_bboxes_3d = sunrgbd_results['gt_bboxes_3d']
expected_sunrgbd_points = np.array(
[[0.89427376, 3.94489646, 0.21003141, 1.72415094],
[-0.47835783, 1.09972989, -0.79757058, 0.71654893]])
expected_sunrgbd_gt_bboxes_3d = torch.tensor(
[[0.1708, 1.1135, -1.0457, 0.6551, 0.6095, 0.9291, 3.0192],
[-0.5543, 1.4591, -1.0941, 1.5979, 1.7430, 0.6774, -1.6335]])
assert np.allclose(sunrgbd_points, expected_sunrgbd_points)
assert torch.allclose(sunrgbd_gt_bboxes_3d.tensor,
expected_sunrgbd_gt_bboxes_3d, 1e-3)
np.random.seed(0)
scannet_augment = IndoorGlobalRotScaleTrans(
True, rot_range=[-1 * 1 / 36, 1 / 36], scale_range=None)
scannet_results = dict()
scannet_results['points'] = np.array(
[[1.6110241e+00, -1.6903955e-01, 5.8115810e-01, 5.9897250e-01],
[1.3978075e+00, 4.2035791e-01, 3.8729519e-01, 4.0510958e-01]])
scannet_results['gt_bboxes_3d'] = DepthInstance3DBoxes(
np.array([[
0.55903838, 0.48201692, 0.65688646, 0.65370704, 0.60029864,
0.5163464
],
[
-0.03226406, 1.70392646, 0.60348618, 0.65165804,
0.72084366, 0.64667457
]]),
box_dim=6,
with_yaw=False)
scannet_results = scannet_augment(scannet_results)
scannet_points = scannet_results['points']
scannet_gt_bboxes_3d = scannet_results['gt_bboxes_3d']
expected_scannet_points = np.array(
[[1.61240576, -0.15530836, 0.5811581, 0.5989725],
[1.39417555, 0.43225122, 0.38729519, 0.40510958]])
expected_scannet_gt_bboxes_3d = torch.tensor(
[[0.5549, 0.4868, 0.6569, 0.6588, 0.6058, 0.5163, 0.0000],
[-0.0468, 1.7036, 0.6035, 0.6578, 0.7264, 0.6467, 0.0000]])
assert np.allclose(scannet_points, expected_scannet_points)
assert torch.allclose(scannet_gt_bboxes_3d.tensor,
expected_scannet_gt_bboxes_3d, 1e-3)
......@@ -28,12 +28,16 @@ def test_scannet_pipeline():
with_mask_3d=True,
with_seg_3d=True),
dict(type='IndoorPointSample', num_points=5),
dict(type='IndoorFlipData', flip_ratio_yz=1.0, flip_ratio_xz=1.0),
dict(
type='IndoorGlobalRotScaleTrans',
shift_height=True,
rot_range=[-1 / 36, 1 / 36],
scale_range=None),
type='RandomFlip3D',
sync_2d=False,
flip_ratio_bev_horizontal=1.0,
flip_ratio_bev_vertical=1.0),
dict(
type='GlobalRotScaleTrans',
rot_range=[-0.087266, 0.087266],
scale_ratio_range=[1.0, 1.0],
shift_height=True),
dict(type='DefaultFormatBundle3D', class_names=class_names),
dict(
type='Collect3D',
......@@ -63,6 +67,7 @@ def test_scannet_pipeline():
scannet_gt_bboxes_3d, box_dim=6, with_yaw=False)
results['ann_info']['gt_labels_3d'] = scannet_gt_labels_3d
results['img_fields'] = []
results['bbox3d_fields'] = []
results['pts_mask_fields'] = []
results['pts_seg_fields'] = []
......@@ -74,25 +79,24 @@ def test_scannet_pipeline():
gt_labels_3d = results['gt_labels_3d']._data
pts_semantic_mask = results['pts_semantic_mask']._data
pts_instance_mask = results['pts_instance_mask']._data
expected_points = np.array(
[[-2.9078157, -1.9569951, 2.3543026, 2.389488],
[-0.71360034, -3.4359822, 2.1330001, 2.1681855],
[-1.332374, 1.474838, -0.04405887, -0.00887359],
[2.1336637, -1.3265059, -0.02880373, 0.00638155],
[0.43895668, -3.0259454, 1.5560012, 1.5911865]])
expected_points = torch.tensor([[-2.7231, -2.2068, 2.3543, 2.3895],
[-0.4065, -3.4857, 2.1330, 2.1682],
[-1.4578, 1.3510, -0.0441, -0.0089],
[2.2428, -1.1323, -0.0288, 0.0064],
[0.7052, -2.9752, 1.5560, 1.5912]])
expected_gt_bboxes_3d = torch.tensor(
[[-1.5005, -3.5126, 1.8565, 1.7457, 0.2415, 0.5724, 0.0000],
[-2.8849, 3.4962, 1.5268, 0.6617, 0.1743, 0.6715, 0.0000],
[-1.1586, -2.1924, 0.6165, 0.5557, 2.5376, 1.2145, 0.0000],
[-2.9305, -2.4856, 0.9722, 0.6270, 1.8462, 0.2870, 0.0000],
[3.3115, -0.0048, 1.0712, 0.4619, 3.8605, 2.1603, 0.0000]])
[[-1.1835, -3.6317, 1.8565, 1.7577, 0.3761, 0.5724, 0.0000],
[-3.1832, 3.2269, 1.5268, 0.6727, 0.2251, 0.6715, 0.0000],
[-0.9598, -2.2864, 0.6165, 0.7506, 2.5709, 1.2145, 0.0000],
[-2.6988, -2.7354, 0.9722, 0.7680, 1.8877, 0.2870, 0.0000],
[3.2989, 0.2885, 1.0712, 0.7600, 3.8814, 2.1603, 0.0000]])
expected_gt_labels_3d = np.array([
6, 6, 4, 9, 11, 11, 10, 0, 15, 17, 17, 17, 3, 12, 4, 4, 14, 1, 0, 0, 0,
0, 0, 0, 5, 5, 5
])
expected_pts_semantic_mask = np.array([3, 1, 2, 2, 15])
expected_pts_instance_mask = np.array([44, 22, 10, 10, 57])
assert np.allclose(points, expected_points)
assert torch.allclose(points, expected_points, 1e-2)
assert torch.allclose(gt_bboxes_3d.tensor[:5, :], expected_gt_bboxes_3d,
1e-2)
assert np.all(gt_labels_3d.numpy() == expected_gt_labels_3d)
......@@ -111,12 +115,16 @@ def test_sunrgbd_pipeline():
load_dim=6,
use_dim=[0, 1, 2]),
dict(type='LoadAnnotations3D'),
dict(type='IndoorFlipData', flip_ratio_yz=1.0),
dict(
type='IndoorGlobalRotScaleTrans',
shift_height=True,
rot_range=[-1 / 6, 1 / 6],
scale_range=[0.85, 1.15]),
type='RandomFlip3D',
sync_2d=False,
flip_ratio_bev_horizontal=1.0,
),
dict(
type='GlobalRotScaleTrans',
rot_range=[-0.523599, 0.523599],
scale_ratio_range=[0.85, 1.15],
shift_height=True),
dict(type='IndoorPointSample', num_points=5),
dict(type='DefaultFormatBundle3D', class_names=class_names),
dict(
......@@ -140,6 +148,7 @@ def test_sunrgbd_pipeline():
results['ann_info'] = dict()
results['ann_info']['gt_bboxes_3d'] = DepthInstance3DBoxes(gt_bboxes_3d)
results['ann_info']['gt_labels_3d'] = gt_labels_3d
results['img_fields'] = []
results['bbox3d_fields'] = []
results['pts_mask_fields'] = []
results['pts_seg_fields'] = []
......@@ -148,16 +157,16 @@ def test_sunrgbd_pipeline():
points = results['points']._data
gt_bboxes_3d = results['gt_bboxes_3d']._data
gt_labels_3d = results['gt_labels_3d']._data
expected_points = np.array([[0.6512, 1.5781, 0.0710, 0.0499],
[0.6473, 1.5701, 0.0657, 0.0447],
[0.6464, 1.5635, 0.0826, 0.0616],
[0.6453, 1.5603, 0.0849, 0.0638],
[0.6488, 1.5786, 0.0461, 0.0251]])
expected_points = torch.tensor([[0.8678, 1.3470, 0.1105, 0.0905],
[0.8707, 1.3635, 0.0437, 0.0238],
[0.8636, 1.3511, 0.0504, 0.0304],
[0.8690, 1.3461, 0.1265, 0.1065],
[0.8668, 1.3434, 0.1216, 0.1017]])
expected_gt_bboxes_3d = torch.tensor(
[[-2.0125, 3.9473, -0.2545, 2.3730, 1.9458, 2.0303, 1.2206],
[-3.7037, 4.2396, -0.8109, 0.6032, 0.9104, 1.0033, 1.2663],
[0.6529, 2.1638, -0.1523, 0.7348, 1.6113, 2.1694, 2.8140]])
[[-1.2136, 4.0206, -0.2412, 2.2493, 1.8444, 1.9245, 1.3989],
[-2.7420, 4.5777, -0.7686, 0.5718, 0.8629, 0.9510, 1.4446],
[0.9729, 1.9087, -0.1443, 0.6965, 1.5273, 2.0563, 2.9924]])
expected_gt_labels_3d = np.array([0, 7, 6])
assert torch.allclose(gt_bboxes_3d.tensor, expected_gt_bboxes_3d, 1e-3)
assert np.allclose(gt_labels_3d.flatten(), expected_gt_labels_3d)
assert np.allclose(points, expected_points, 1e-2)
assert torch.allclose(points, expected_points, 1e-2)
......@@ -19,7 +19,7 @@ def test_outdoor_aug_pipeline():
translation_std=[1.0, 1.0, 0.5],
global_rot_range=[0.0, 0.0],
rot_range=[-0.78539816, 0.78539816]),
dict(type='RandomFlip3D', flip_ratio=0.5),
dict(type='RandomFlip3D', flip_ratio_bev_horizontal=0.5),
dict(
type='GlobalRotScaleTrans',
rot_range=[-0.78539816, 0.78539816],
......@@ -137,7 +137,7 @@ def test_outdoor_velocity_aug_pipeline():
rot_range=[-0.3925, 0.3925],
scale_ratio_range=[0.95, 1.05],
translation_std=[0, 0, 0]),
dict(type='RandomFlip3D', flip_ratio=0.5),
dict(type='RandomFlip3D', flip_ratio_bev_horizontal=0.5),
dict(type='PointsRangeFilter', point_cloud_range=point_cloud_range),
dict(type='ObjectRangeFilter', point_cloud_range=point_cloud_range),
dict(type='PointShuffle'),
......
Markdown is supported
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment