Commit 92ae69fb authored by wuyuefeng's avatar wuyuefeng Committed by zhangwenwei
Browse files

Merge rot scale

parent 5a1575a0
from mmdet.datasets.pipelines import Compose
from .dbsampler import DataBaseSampler
from .formating import DefaultFormatBundle, DefaultFormatBundle3D
from .indoor_augment import (IndoorFlipData, IndoorGlobalRotScaleTrans,
IndoorPointsColorJitter)
from .indoor_loading import (LoadAnnotations3D, LoadPointsFromFile,
NormalizePointsColor)
from .indoor_sample import IndoorPointSample
from .loading import LoadMultiViewImageFromFiles
from .point_seg_class_mapping import PointSegClassMapping
from .formating import Collect3D, DefaultFormatBundle, DefaultFormatBundle3D
from .loading import (LoadAnnotations3D, LoadMultiViewImageFromFiles,
LoadPointsFromFile, NormalizePointsColor,
PointSegClassMapping)
from .test_time_aug import MultiScaleFlipAug3D
from .transforms_3d import (GlobalRotScaleTrans, ObjectNoise,
ObjectRangeFilter, ObjectSample, PointShuffle,
PointsRangeFilter, RandomFlip3D)
from .transforms_3d import (GlobalRotScaleTrans, IndoorPointSample,
ObjectNoise, ObjectRangeFilter, ObjectSample,
PointShuffle, PointsRangeFilter, RandomFlip3D)
__all__ = [
'ObjectSample', 'RandomFlip3D', 'ObjectNoise', 'GlobalRotScaleTrans',
'PointShuffle', 'ObjectRangeFilter', 'PointsRangeFilter', 'Collect3D',
'Compose', 'LoadMultiViewImageFromFiles', 'LoadPointsFromFile',
'DefaultFormatBundle', 'DefaultFormatBundle3D', 'DataBaseSampler',
'IndoorGlobalRotScaleTrans', 'IndoorPointsColorJitter', 'IndoorFlipData',
'NormalizePointsColor', 'LoadAnnotations3D', 'IndoorPointSample',
'PointSegClassMapping', 'MultiScaleFlipAug3D'
]
......@@ -58,7 +58,6 @@ class DataBaseSampler(object):
data_root,
rate,
prepare,
object_rot_range,
sample_groups,
classes=None):
super().__init__()
......@@ -66,7 +65,6 @@ class DataBaseSampler(object):
self.info_path = info_path
self.rate = rate
self.prepare = prepare
self.object_rot_range = object_rot_range
self.classes = classes
self.cat2label = {name: i for i, name in enumerate(classes)}
self.label2cat = {i: name for i, name in enumerate(classes)}
......@@ -103,11 +101,6 @@ class DataBaseSampler(object):
self.sampler_dict = {}
for k, v in self.group_db_infos.items():
self.sampler_dict[k] = BatchSampler(v, k, shuffle=True)
self.object_rot_range = object_rot_range
self.object_rot_enable = np.abs(self.object_rot_range[0] -
self.object_rot_range[1]) >= 1e-3
# TODO: No group_sampling currently
@staticmethod
......@@ -183,11 +176,6 @@ class DataBaseSampler(object):
info['path']) if self.data_root else info['path']
s_points = np.fromfile(
file_path, dtype=np.float32).reshape([-1, 4])
if 'rot_transform' in info:
rot = info['rot_transform']
s_points[:, :3] = box_np_ops.rotation_points_single_angle(
s_points[:, :3], rot, axis=2)
s_points[:, :3] += info['box3d_lidar'][:3]
count += 1
......@@ -219,24 +207,7 @@ class DataBaseSampler(object):
gt_bboxes[:, 0:2], gt_bboxes[:, 3:5], gt_bboxes[:, 6])
sp_boxes = np.stack([i['box3d_lidar'] for i in sampled], axis=0)
valid_mask = np.zeros([gt_bboxes.shape[0]], dtype=np.bool_)
valid_mask = np.concatenate(
[valid_mask,
np.ones([sp_boxes.shape[0]], dtype=np.bool_)], axis=0)
boxes = np.concatenate([gt_bboxes, sp_boxes], axis=0).copy()
if self.object_rot_enable:
assert False, 'This part needs to be checked'
# place samples to any place in a circle.
# TODO: rm it if not needed
data_augment_utils.noise_per_object_v3_(
boxes,
None,
valid_mask,
0,
0,
self._global_rot_range,
num_try=100)
sp_boxes_new = boxes[gt_bboxes.shape[0]:]
sp_boxes_bv = box_np_ops.center_to_corner_box2d(
......@@ -253,11 +224,5 @@ class DataBaseSampler(object):
coll_mat[i] = False
coll_mat[:, i] = False
else:
if self.object_rot_enable:
assert False, 'This part needs to be checked'
sampled[i - num_gt]['box3d_lidar'][:2] = boxes[i, :2]
sampled[i - num_gt]['box3d_lidar'][-1] = boxes[i, -1]
sampled[i - num_gt]['rot_transform'] = (
boxes[i, -1] - sp_boxes[i - num_gt, -1])
valid_samples.append(sampled[i - num_gt])
return valid_samples
......@@ -74,7 +74,8 @@ class Collect3D(object):
def __init__(self,
keys,
meta_keys=('filename', 'ori_shape', 'img_shape', 'lidar2img',
'pad_shape', 'scale_factor', 'flip', 'pcd_flip',
'pad_shape', 'scale_factor', 'flip',
'pcd_horizontal_flip', 'pcd_vertical_flip',
'box_mode_3d', 'box_type_3d', 'img_norm_cfg',
'rect', 'Trv2c', 'P2', 'pcd_trans', 'sample_idx',
'pcd_scale_factor', 'pcd_rotation')):
......
import numpy as np
from mmdet.datasets.builder import PIPELINES
@PIPELINES.register_module()
class IndoorFlipData(object):
"""Indoor flip data.
Flip point cloud and ground truth boxes.
The point cloud will be flipped along the yz plane
and the xz plane with a certain probability.
Args:
flip_ratio_yz (float): Probability of being flipped along yz plane.
Default: 0.5.
flip_ratio_xz (float): Probability of being flipped along xz plane.
Default: 0.5.
"""
def __init__(self, flip_ratio_yz=0.5, flip_ratio_xz=0.5):
self.flip_ratio_yz = flip_ratio_yz
self.flip_ratio_xz = flip_ratio_xz
def __call__(self, results):
points = results['points']
gt_bboxes_3d = results['gt_bboxes_3d']
results['flip_yz'] = False
results['flip_xz'] = False
if np.random.random() < self.flip_ratio_yz:
# Flipping along the YZ plane
points[:, 0] = -1 * points[:, 0]
gt_bboxes_3d.flip('horizontal')
results['flip_yz'] = True
if not gt_bboxes_3d.with_yaw and np.random.random(
) < self.flip_ratio_xz:
# Flipping along the XZ plane
points[:, 1] = -1 * points[:, 1]
gt_bboxes_3d.flip('vertical')
results['flip_xz'] = True
results['points'] = points
results['gt_bboxes_3d'] = gt_bboxes_3d
return results
def __repr__(self):
repr_str = self.__class__.__name__
repr_str += '(flip_ratio_yz={})'.format(self.flip_ratio_yz)
repr_str += '(flip_ratio_xz={})'.format(self.flip_ratio_xz)
return repr_str
@PIPELINES.register_module()
class IndoorPointsColorJitter(object):
"""Indoor points color jitter.
Randomly change the brightness and color of the point cloud, and
drop out the points' colors with a certain range and probability.
Args:
color_mean (list[float]): Mean color of the point cloud.
Default: [0.5, 0.5, 0.5].
bright_range (list[float]): Range of brightness.
Default: [0.8, 1.2].
color_shift_range (list[float]): Range of color shift.
Default: [0.95, 1.05].
jitter_range (list[float]): Range of jittering.
Default: [-0.025, 0.025].
drop_prob (float): Probability to drop out points' color.
Default: 0.3
"""
def __init__(self,
color_mean=[0.5, 0.5, 0.5],
bright_range=[0.8, 1.2],
color_shift_range=[0.95, 1.05],
jitter_range=[-0.025, 0.025],
drop_prob=0.3):
self.color_mean = color_mean
self.bright_range = bright_range
self.color_shift_range = color_shift_range
self.jitter_range = jitter_range
self.drop_prob = drop_prob
def __call__(self, results):
points = results['points']
assert points.shape[1] >= 6, \
f'Expect points have channel >=6, got {points.shape[1]}.'
rgb_color = points[:, 3:6] + self.color_mean
# brightness change for each channel
rgb_color *= np.random.uniform(self.bright_range[0],
self.bright_range[1], 3)
# color shift for each channel
rgb_color += np.random.uniform(self.color_shift_range[0],
self.color_shift_range[1], 3)
# jittering on each pixel
rgb_color += np.expand_dims(
np.random.uniform(self.jitter_range[0], self.jitter_range[1]), -1)
rgb_color = np.clip(rgb_color, 0, 1)
# randomly drop out points' colors
rgb_color *= np.expand_dims(
np.random.random(points.shape[0]) > self.drop_prob, -1)
points[:, 3:6] = rgb_color - self.color_mean
results['points'] = points
return results
def __repr__(self):
repr_str = self.__class__.__name__
repr_str += '(color_mean={})'.format(self.color_mean)
repr_str += '(bright_range={})'.format(self.bright_range)
repr_str += '(color_shift_range={})'.format(self.color_shift_range)
repr_str += '(jitter_range={})'.format(self.jitter_range)
repr_str += '(drop_prob={})'.format(self.drop_prob)
# TODO: merge outdoor indoor transform.
# TODO: try transform noise.
@PIPELINES.register_module()
class IndoorGlobalRotScaleTrans(object):
"""Indoor global rotate and scale.
Augment sunrgbd and scannet data with global rotating and scaling.
Args:
shift_height (bool): Whether to use height.
Default: True.
rot_range (list[float]): Range of rotation.
Default: None.
scale_range (list[float]): Range of scale.
Default: None.
"""
def __init__(self, shift_height=True, rot_range=None, scale_range=None):
self.shift_height = shift_height
self.rot_range = np.pi * np.array(rot_range)
self.scale_range = scale_range
def _rotz(self, t):
"""Rotate About Z.
Rotation about the z-axis.
Args:
t (float): Angle of rotation.
Returns:
rot_mat (ndarray): Matrix of rotation.
"""
c = np.cos(t)
s = np.sin(t)
rot_mat = np.array([[c, -s, 0], [s, c, 0], [0, 0, 1]])
return rot_mat
def __call__(self, results):
points = results['points']
gt_bboxes_3d = results['gt_bboxes_3d']
if self.rot_range is not None:
assert len(self.rot_range) == 2, \
f'Expect length of rot range =2, ' \
f'got {len(self.rot_range)}.'
rot_angle = np.random.uniform(self.rot_range[0], self.rot_range[1])
if gt_bboxes_3d.tensor.shape[0] != 0:
gt_bboxes_3d.rotate(rot_angle)
points[:, :3] = np.dot(points[:, :3], self._rotz(rot_angle).T)
results['rot_angle'] = rot_angle
if self.scale_range is not None:
assert len(self.scale_range) == 2, \
f'Expect length of scale range =2, ' \
f'got {len(self.scale_range)}.'
# Augment point cloud scale
scale_ratio = np.random.uniform(self.scale_range[0],
self.scale_range[1])
points[:, :3] *= scale_ratio
gt_bboxes_3d.scale(scale_ratio)
if self.shift_height:
points[:, -1] *= scale_ratio
results['scale_ratio'] = scale_ratio
results['points'] = points
results['gt_bboxes_3d'] = gt_bboxes_3d
return results
def __repr__(self):
repr_str = self.__class__.__name__
repr_str += '(shift_height={})'.format(self.shift_height)
repr_str += '(rot_range={})'.format(self.rot_range)
repr_str += '(scale_range={})'.format(self.scale_range)
return repr_str
import mmcv
import numpy as np
from mmdet.datasets.builder import PIPELINES
from mmdet.datasets.pipelines import LoadAnnotations
@PIPELINES.register_module()
class NormalizePointsColor(object):
"""Normalize color of points
Normalize color of the points.
Args:
color_mean (list[float]): Mean color of the point cloud.
"""
def __init__(self, color_mean):
self.color_mean = color_mean
def __call__(self, results):
points = results['points']
assert points.shape[1] >= 6,\
f'Expect points have channel >=6, got {points.shape[1]}'
points[:, 3:6] = points[:, 3:6] - np.array(self.color_mean) / 256.0
results['points'] = points
return results
def __repr__(self):
repr_str = self.__class__.__name__
repr_str += '(color_mean={})'.format(self.color_mean)
return repr_str
@PIPELINES.register_module()
class LoadPointsFromFile(object):
"""Load Points From File.
Load sunrgbd and scannet points from file.
Args:
shift_height (bool): Whether to use shifted height.
load_dim (int): The dimension of the loaded points.
Default: 6.
use_dim (list[int]): Which dimensions of the points to be used.
Default: [0, 1, 2]. For KITTI dataset, set use_dim=4
or use_dim=[0, 1, 2, 3] to use the intensity dimension
file_client_args (dict): Config dict of file clients, refer to
https://github.com/open-mmlab/mmcv/blob/master/mmcv/fileio/file_client.py
for more details.
"""
def __init__(self,
load_dim=6,
use_dim=[0, 1, 2],
shift_height=False,
file_client_args=dict(backend='disk')):
self.shift_height = shift_height
if isinstance(use_dim, int):
use_dim = list(range(use_dim))
assert max(use_dim) < load_dim, \
f'Expect all used dimensions < {load_dim}, got {use_dim}'
self.load_dim = load_dim
self.use_dim = use_dim
self.file_client_args = file_client_args.copy()
self.file_client = None
def _load_points(self, pts_filename):
if self.file_client is None:
self.file_client = mmcv.FileClient(**self.file_client_args)
try:
pts_bytes = self.file_client.get(pts_filename)
points = np.frombuffer(pts_bytes, dtype=np.float32)
except ConnectionError:
mmcv.check_file_exist(pts_filename)
if pts_filename.endswith('.npy'):
points = np.load(pts_filename)
else:
points = np.fromfile(pts_filename, dtype=np.float32)
return points
def __call__(self, results):
pts_filename = results['pts_filename']
points = self._load_points(pts_filename)
points = points.reshape(-1, self.load_dim)
points = points[:, self.use_dim]
if self.shift_height:
floor_height = np.percentile(points[:, 2], 0.99)
height = points[:, 2] - floor_height
points = np.concatenate([points, np.expand_dims(height, 1)], 1)
results['points'] = points
return results
def __repr__(self):
repr_str = self.__class__.__name__
repr_str += '(shift_height={})'.format(self.shift_height)
repr_str += '(mean_color={})'.format(self.color_mean)
repr_str += '(load_dim={})'.format(self.load_dim)
repr_str += '(use_dim={})'.format(self.use_dim)
return repr_str
@PIPELINES.register_module()
class LoadAnnotations3D(LoadAnnotations):
"""Load Annotations3D.
Load instance mask and semantic mask of points and
encapsulate the items into related fields.
Args:
with_bbox_3d (bool, optional): Whether to load 3D boxes.
Defaults to True.
with_label_3d (bool, optional): Whether to load 3D labels.
Defaults to True.
with_mask_3d (bool, optional): Whether to load 3D instance masks.
for points. Defaults to False.
with_seg_3d (bool, optional): Whether to load 3D semantic masks.
for points. Defaults to False.
with_bbox (bool, optional): Whether to load 2D boxes.
Defaults to False.
with_label (bool, optional): Whether to load 2D labels.
Defaults to False.
with_mask (bool, optional): Whether to load 2D instance masks.
Defaults to False.
with_seg (bool, optional): Whether to load 2D semantic masks.
Defaults to False.
poly2mask (bool, optional): Whether to convert polygon annotations
to bitmasks. Defaults to True.
file_client_args (dict): Config dict of file clients, refer to
https://github.com/open-mmlab/mmcv/blob/master/mmcv/fileio/file_client.py
for more details.
"""
def __init__(self,
with_bbox_3d=True,
with_label_3d=True,
with_mask_3d=False,
with_seg_3d=False,
with_bbox=False,
with_label=False,
with_mask=False,
with_seg=False,
poly2mask=True,
file_client_args=dict(backend='disk')):
super().__init__(
with_bbox,
with_label,
with_mask,
with_seg,
poly2mask,
file_client_args=file_client_args)
self.with_bbox_3d = with_bbox_3d
self.with_label_3d = with_label_3d
self.with_mask_3d = with_mask_3d
self.with_seg_3d = with_seg_3d
def _load_bboxes_3d(self, results):
results['gt_bboxes_3d'] = results['ann_info']['gt_bboxes_3d']
results['bbox3d_fields'].append('gt_bboxes_3d')
return results
def _load_labels_3d(self, results):
results['gt_labels_3d'] = results['ann_info']['gt_labels_3d']
return results
def _load_masks_3d(self, results):
pts_instance_mask_path = results['ann_info']['pts_instance_mask_path']
if self.file_client is None:
self.file_client = mmcv.FileClient(**self.file_client_args)
try:
mask_bytes = self.file_client.get(pts_instance_mask_path)
pts_instance_mask = np.frombuffer(mask_bytes, dtype=np.int)
except ConnectionError:
mmcv.check_file_exist(pts_instance_mask_path)
pts_instance_mask = np.fromfile(
pts_instance_mask_path, dtype=np.long)
results['pts_instance_mask'] = pts_instance_mask
results['pts_mask_fields'].append('pts_instance_mask')
return results
def _load_semantic_seg_3d(self, results):
pts_semantic_mask_path = results['ann_info']['pts_semantic_mask_path']
if self.file_client is None:
self.file_client = mmcv.FileClient(**self.file_client_args)
try:
mask_bytes = self.file_client.get(pts_semantic_mask_path)
# add .copy() to fix read-only bug
pts_semantic_mask = np.frombuffer(mask_bytes, dtype=np.int).copy()
except ConnectionError:
mmcv.check_file_exist(pts_semantic_mask_path)
pts_semantic_mask = np.fromfile(
pts_semantic_mask_path, dtype=np.long)
results['pts_semantic_mask'] = pts_semantic_mask
results['pts_seg_fields'].append('pts_semantic_mask')
return results
def __call__(self, results):
results = super().__call__(results)
if self.with_bbox_3d:
results = self._load_bboxes_3d(results)
if results is None:
return None
if self.with_label_3d:
results = self._load_labels_3d(results)
if self.with_mask_3d:
results = self._load_masks_3d(results)
if self.with_seg_3d:
results = self._load_semantic_seg_3d(results)
return results
def __repr__(self):
indent_str = ' '
repr_str = self.__class__.__name__ + '(\n'
repr_str += f'{indent_str}with_bbox_3d={self.with_bbox_3d},\n'
repr_str += f'{indent_str}with_label_3d={self.with_label_3d},\n'
repr_str += f'{indent_str}with_mask_3d={self.with_mask_3d},\n'
repr_str += f'{indent_str}with_seg_3d={self.with_seg_3d},\n'
repr_str += f'{indent_str}with_bbox={self.with_bbox},\n'
repr_str += f'{indent_str}with_label={self.with_label},\n'
repr_str += f'{indent_str}with_mask={self.with_mask},\n'
repr_str += f'{indent_str}with_seg={self.with_seg},\n'
repr_str += f'{indent_str}poly2mask={self.poly2mask})'
return repr_str
import numpy as np
from mmdet.datasets.builder import PIPELINES
@PIPELINES.register_module()
class IndoorPointSample(object):
"""Indoor point sample.
Sampling data to a certain number.
Args:
name (str): Name of the dataset.
num_points (int): Number of points to be sampled.
"""
def __init__(self, num_points):
self.num_points = num_points
def points_random_sampling(self,
points,
num_samples,
replace=None,
return_choices=False):
"""Points random sampling.
Sample points to a certain number.
Args:
points (ndarray): 3D Points.
num_samples (int): Number of samples to be sampled.
replace (bool): Whether the sample is with or without replacement.
return_choices (bool): Whether return choice.
Returns:
points (ndarray): 3D Points.
choices (ndarray): The generated random samples.
"""
if replace is None:
replace = (points.shape[0] < num_samples)
choices = np.random.choice(
points.shape[0], num_samples, replace=replace)
if return_choices:
return points[choices], choices
else:
return points[choices]
def __call__(self, results):
points = results['points']
points, choices = self.points_random_sampling(
points, self.num_points, return_choices=True)
pts_instance_mask = results.get('pts_instance_mask', None)
pts_semantic_mask = results.get('pts_semantic_mask', None)
results['points'] = points
if pts_instance_mask is not None and pts_semantic_mask is not None:
pts_instance_mask = pts_instance_mask[choices]
pts_semantic_mask = pts_semantic_mask[choices]
results['pts_instance_mask'] = pts_instance_mask
results['pts_semantic_mask'] = pts_semantic_mask
return results
def __repr__(self):
repr_str = self.__class__.__name__
repr_str += '(num_points={})'.format(self.num_points)
return repr_str
......@@ -2,6 +2,7 @@ import mmcv
import numpy as np
from mmdet.datasets.builder import PIPELINES
from mmdet.datasets.pipelines import LoadAnnotations
@PIPELINES.register_module()
......@@ -103,3 +104,263 @@ class LoadPointsFromMultiSweeps(object):
def __repr__(self):
return f'{self.__class__.__name__}(sweeps_num={self.sweeps_num})'
@PIPELINES.register_module()
class PointSegClassMapping(object):
"""Map original semantic class to valid category ids.
Map valid classes as 0~len(valid_cat_ids)-1 and
others as len(valid_cat_ids).
Args:
valid_cat_ids (tuple[int): A tuple of valid category.
"""
def __init__(self, valid_cat_ids):
self.valid_cat_ids = valid_cat_ids
def __call__(self, results):
assert 'pts_semantic_mask' in results
pts_semantic_mask = results['pts_semantic_mask']
neg_cls = len(self.valid_cat_ids)
for i in range(pts_semantic_mask.shape[0]):
if pts_semantic_mask[i] in self.valid_cat_ids:
converted_id = self.valid_cat_ids.index(pts_semantic_mask[i])
pts_semantic_mask[i] = converted_id
else:
pts_semantic_mask[i] = neg_cls
results['pts_semantic_mask'] = pts_semantic_mask
return results
def __repr__(self):
repr_str = self.__class__.__name__
repr_str += '(valid_cat_ids={})'.format(self.valid_cat_ids)
return repr_str
@PIPELINES.register_module()
class NormalizePointsColor(object):
"""Normalize color of points
Normalize color of the points.
Args:
color_mean (list[float]): Mean color of the point cloud.
"""
def __init__(self, color_mean):
self.color_mean = color_mean
def __call__(self, results):
points = results['points']
assert points.shape[1] >= 6,\
f'Expect points have channel >=6, got {points.shape[1]}'
points[:, 3:6] = points[:, 3:6] - np.array(self.color_mean) / 256.0
results['points'] = points
return results
def __repr__(self):
repr_str = self.__class__.__name__
repr_str += '(color_mean={})'.format(self.color_mean)
return repr_str
@PIPELINES.register_module()
class LoadPointsFromFile(object):
"""Load Points From File.
Load sunrgbd and scannet points from file.
Args:
shift_height (bool): Whether to use shifted height.
load_dim (int): The dimension of the loaded points.
Default: 6.
use_dim (list[int]): Which dimensions of the points to be used.
Default: [0, 1, 2]. For KITTI dataset, set use_dim=4
or use_dim=[0, 1, 2, 3] to use the intensity dimension
file_client_args (dict): Config dict of file clients, refer to
https://github.com/open-mmlab/mmcv/blob/master/mmcv/fileio/file_client.py
for more details.
"""
def __init__(self,
load_dim=6,
use_dim=[0, 1, 2],
shift_height=False,
file_client_args=dict(backend='disk')):
self.shift_height = shift_height
if isinstance(use_dim, int):
use_dim = list(range(use_dim))
assert max(use_dim) < load_dim, \
f'Expect all used dimensions < {load_dim}, got {use_dim}'
self.load_dim = load_dim
self.use_dim = use_dim
self.file_client_args = file_client_args.copy()
self.file_client = None
def _load_points(self, pts_filename):
if self.file_client is None:
self.file_client = mmcv.FileClient(**self.file_client_args)
try:
pts_bytes = self.file_client.get(pts_filename)
points = np.frombuffer(pts_bytes, dtype=np.float32)
except ConnectionError:
mmcv.check_file_exist(pts_filename)
if pts_filename.endswith('.npy'):
points = np.load(pts_filename)
else:
points = np.fromfile(pts_filename, dtype=np.float32)
return points
def __call__(self, results):
pts_filename = results['pts_filename']
points = self._load_points(pts_filename)
points = points.reshape(-1, self.load_dim)
points = points[:, self.use_dim]
if self.shift_height:
floor_height = np.percentile(points[:, 2], 0.99)
height = points[:, 2] - floor_height
points = np.concatenate([points, np.expand_dims(height, 1)], 1)
results['points'] = points
return results
def __repr__(self):
repr_str = self.__class__.__name__
repr_str += '(shift_height={})'.format(self.shift_height)
repr_str += '(mean_color={})'.format(self.color_mean)
repr_str += '(load_dim={})'.format(self.load_dim)
repr_str += '(use_dim={})'.format(self.use_dim)
return repr_str
@PIPELINES.register_module()
class LoadAnnotations3D(LoadAnnotations):
"""Load Annotations3D.
Load instance mask and semantic mask of points and
encapsulate the items into related fields.
Args:
with_bbox_3d (bool, optional): Whether to load 3D boxes.
Defaults to True.
with_label_3d (bool, optional): Whether to load 3D labels.
Defaults to True.
with_mask_3d (bool, optional): Whether to load 3D instance masks.
for points. Defaults to False.
with_seg_3d (bool, optional): Whether to load 3D semantic masks.
for points. Defaults to False.
with_bbox (bool, optional): Whether to load 2D boxes.
Defaults to False.
with_label (bool, optional): Whether to load 2D labels.
Defaults to False.
with_mask (bool, optional): Whether to load 2D instance masks.
Defaults to False.
with_seg (bool, optional): Whether to load 2D semantic masks.
Defaults to False.
poly2mask (bool, optional): Whether to convert polygon annotations
to bitmasks. Defaults to True.
file_client_args (dict): Config dict of file clients, refer to
https://github.com/open-mmlab/mmcv/blob/master/mmcv/fileio/file_client.py
for more details.
"""
def __init__(self,
with_bbox_3d=True,
with_label_3d=True,
with_mask_3d=False,
with_seg_3d=False,
with_bbox=False,
with_label=False,
with_mask=False,
with_seg=False,
poly2mask=True,
file_client_args=dict(backend='disk')):
super().__init__(
with_bbox,
with_label,
with_mask,
with_seg,
poly2mask,
file_client_args=file_client_args)
self.with_bbox_3d = with_bbox_3d
self.with_label_3d = with_label_3d
self.with_mask_3d = with_mask_3d
self.with_seg_3d = with_seg_3d
def _load_bboxes_3d(self, results):
results['gt_bboxes_3d'] = results['ann_info']['gt_bboxes_3d']
results['bbox3d_fields'].append('gt_bboxes_3d')
return results
def _load_labels_3d(self, results):
results['gt_labels_3d'] = results['ann_info']['gt_labels_3d']
return results
def _load_masks_3d(self, results):
pts_instance_mask_path = results['ann_info']['pts_instance_mask_path']
if self.file_client is None:
self.file_client = mmcv.FileClient(**self.file_client_args)
try:
mask_bytes = self.file_client.get(pts_instance_mask_path)
pts_instance_mask = np.frombuffer(mask_bytes, dtype=np.int)
except ConnectionError:
mmcv.check_file_exist(pts_instance_mask_path)
pts_instance_mask = np.fromfile(
pts_instance_mask_path, dtype=np.long)
results['pts_instance_mask'] = pts_instance_mask
results['pts_mask_fields'].append('pts_instance_mask')
return results
def _load_semantic_seg_3d(self, results):
pts_semantic_mask_path = results['ann_info']['pts_semantic_mask_path']
if self.file_client is None:
self.file_client = mmcv.FileClient(**self.file_client_args)
try:
mask_bytes = self.file_client.get(pts_semantic_mask_path)
# add .copy() to fix read-only bug
pts_semantic_mask = np.frombuffer(mask_bytes, dtype=np.int).copy()
except ConnectionError:
mmcv.check_file_exist(pts_semantic_mask_path)
pts_semantic_mask = np.fromfile(
pts_semantic_mask_path, dtype=np.long)
results['pts_semantic_mask'] = pts_semantic_mask
results['pts_seg_fields'].append('pts_semantic_mask')
return results
def __call__(self, results):
results = super().__call__(results)
if self.with_bbox_3d:
results = self._load_bboxes_3d(results)
if results is None:
return None
if self.with_label_3d:
results = self._load_labels_3d(results)
if self.with_mask_3d:
results = self._load_masks_3d(results)
if self.with_seg_3d:
results = self._load_semantic_seg_3d(results)
return results
def __repr__(self):
indent_str = ' '
repr_str = self.__class__.__name__ + '(\n'
repr_str += f'{indent_str}with_bbox_3d={self.with_bbox_3d},\n'
repr_str += f'{indent_str}with_label_3d={self.with_label_3d},\n'
repr_str += f'{indent_str}with_mask_3d={self.with_mask_3d},\n'
repr_str += f'{indent_str}with_seg_3d={self.with_seg_3d},\n'
repr_str += f'{indent_str}with_bbox={self.with_bbox},\n'
repr_str += f'{indent_str}with_label={self.with_label},\n'
repr_str += f'{indent_str}with_mask={self.with_mask},\n'
repr_str += f'{indent_str}with_seg={self.with_seg},\n'
repr_str += f'{indent_str}poly2mask={self.poly2mask})'
return repr_str
from mmdet.datasets.builder import PIPELINES
@PIPELINES.register_module()
class PointSegClassMapping(object):
"""Map original semantic class to valid category ids.
Map valid classes as 0~len(valid_cat_ids)-1 and
others as len(valid_cat_ids).
Args:
valid_cat_ids (tuple[int): A tuple of valid category.
"""
def __init__(self, valid_cat_ids):
self.valid_cat_ids = valid_cat_ids
def __call__(self, results):
assert 'pts_semantic_mask' in results
pts_semantic_mask = results['pts_semantic_mask']
neg_cls = len(self.valid_cat_ids)
for i in range(pts_semantic_mask.shape[0]):
if pts_semantic_mask[i] in self.valid_cat_ids:
converted_id = self.valid_cat_ids.index(pts_semantic_mask[i])
pts_semantic_mask[i] = converted_id
else:
pts_semantic_mask[i] = neg_cls
results['pts_semantic_mask'] = pts_semantic_mask
return results
def __repr__(self):
repr_str = self.__class__.__name__
repr_str += '(valid_cat_ids={})'.format(self.valid_cat_ids)
return repr_str
......@@ -21,35 +21,67 @@ class RandomFlip3D(RandomFlip):
images. If True, it will apply the same flip as that to 2D images.
If False, it will decide whether to flip randomly and independently
to that of 2D images.
flip_ratio (float, optional): The flipping probability.
flip_ratio_bev_horizontal (float, optional): The flipping probability
in horizontal direction.
flip_ratio_bev_vertical (float, optional): The flipping probability
in vertical direction.
"""
def __init__(self, sync_2d=True, **kwargs):
super(RandomFlip3D, self).__init__(**kwargs)
def __init__(self,
sync_2d=True,
flip_ratio_bev_horizontal=0.0,
flip_ratio_bev_vertical=0.0,
**kwargs):
super(RandomFlip3D, self).__init__(
flip_ratio=flip_ratio_bev_horizontal, **kwargs)
self.sync_2d = sync_2d
def random_flip_data_3d(self, input_dict):
input_dict['points'][:, 1] = -input_dict['points'][:, 1]
self.flip_ratio_bev_vertical = flip_ratio_bev_vertical
if flip_ratio_bev_horizontal is not None:
assert isinstance(
flip_ratio_bev_horizontal,
(int, float)) and 0 <= flip_ratio_bev_horizontal <= 1
if flip_ratio_bev_vertical is not None:
assert isinstance(
flip_ratio_bev_vertical,
(int, float)) and 0 <= flip_ratio_bev_vertical <= 1
def random_flip_data_3d(self, input_dict, direction='horizontal'):
assert direction in ['horizontal', 'vertical']
for key in input_dict['bbox3d_fields']:
input_dict[key].flip()
input_dict['points'] = input_dict[key].flip(
direction, points=input_dict['points'])
def __call__(self, input_dict):
# filp 2D image and its annotations
super(RandomFlip3D, self).__call__(input_dict)
if self.sync_2d:
input_dict['pcd_flip'] = input_dict['flip']
input_dict['pcd_horizontal_flip'] = input_dict['flip']
input_dict['pcd_vertical_flip'] = False
else:
flip = True if np.random.rand() < self.flip_ratio else False
input_dict['pcd_flip'] = flip
if input_dict['pcd_flip']:
self.random_flip_data_3d(input_dict)
if 'pcd_horizontal_flip' not in input_dict:
flip_horizontal = True if np.random.rand(
) < self.flip_ratio else False
input_dict['pcd_horizontal_flip'] = flip_horizontal
if 'pcd_vertical_flip' not in input_dict:
flip_vertical = True if np.random.rand(
) < self.flip_ratio_bev_vertical else False
input_dict['pcd_vertical_flip'] = flip_vertical
if input_dict['pcd_horizontal_flip']:
self.random_flip_data_3d(input_dict, 'horizontal')
if input_dict['pcd_vertical_flip']:
self.random_flip_data_3d(input_dict, 'vertical')
return input_dict
def __repr__(self):
return self.__class__.__name__ + '(flip_ratio={}, sync_2d={})'.format(
self.flip_ratio, self.sync_2d)
repr_str = self.__class__.__name__
repr_str += '(sync_2d={},'.format(self.sync_2d)
repr_str += '(flip_ratio_bev_horizontal={},'.format(
self.flip_ratio_bev_horizontal)
repr_str += '(flip_ratio_bev_vertical={},'.format(
self.flip_ratio_bev_vertical)
return repr_str
@PIPELINES.register_module()
......@@ -195,15 +227,19 @@ class GlobalRotScaleTrans(object):
noise. This apply random translation to a scene by a noise, which
is sampled from a gaussian distribution whose standard deviation
is set by ``translation_std``. Default to [0, 0, 0]
shift_height (bool): whether to shift height
(the fourth dimension of indoor points) when scaling.
"""
def __init__(self,
rot_range=[-0.78539816, 0.78539816],
scale_ratio_range=[0.95, 1.05],
translation_std=[0, 0, 0]):
translation_std=[0, 0, 0],
shift_height=False):
self.rot_range = rot_range
self.scale_ratio_range = scale_ratio_range
self.translation_std = translation_std
self.shift_height = shift_height
def _trans_bbox_points(self, input_dict):
if not isinstance(self.translation_std, (list, tuple, np.ndarray)):
......@@ -227,18 +263,19 @@ class GlobalRotScaleTrans(object):
rotation = [-rotation, rotation]
noise_rotation = np.random.uniform(rotation[0], rotation[1])
points = input_dict['points']
points[:, :3], rot_mat_T = box_np_ops.rotation_points_single_angle(
points[:, :3], noise_rotation, axis=2)
input_dict['points'] = points
input_dict['pcd_rotation'] = rot_mat_T
for key in input_dict['bbox3d_fields']:
input_dict[key].rotate(noise_rotation)
if len(input_dict[key].tensor) != 0:
points, rot_mat_T = input_dict[key].rotate(
noise_rotation, input_dict['points'])
input_dict['points'] = points
input_dict['pcd_rotation'] = rot_mat_T
def _scale_bbox_points(self, input_dict):
scale = input_dict['pcd_scale_factor']
input_dict['points'][:, :3] *= scale
if self.shift_height:
input_dict['points'][:, -1] *= scale
for key in input_dict['bbox3d_fields']:
input_dict[key].scale(scale)
......@@ -262,6 +299,7 @@ class GlobalRotScaleTrans(object):
repr_str += '(rot_range={},'.format(self.rot_range)
repr_str += ' scale_ratio_range={},'.format(self.scale_ratio_range)
repr_str += ' translation_std={})'.format(self.translation_std)
repr_str += ' shift_height={})'.format(self.shift_height)
return repr_str
......@@ -283,23 +321,6 @@ class ObjectRangeFilter(object):
self.pcd_range = np.array(point_cloud_range, dtype=np.float32)
self.bev_range = self.pcd_range[[0, 1, 3, 4]]
@staticmethod
def filter_gt_box_outside_range(gt_bboxes_3d, limit_range):
"""remove gtbox outside training range.
this function should be applied after other prep functions
Args:
gt_bboxes_3d ([type]): [description]
limit_range ([type]): [description]
"""
gt_bboxes_3d_bv = box_np_ops.center_to_corner_box2d(
gt_bboxes_3d[:, [0, 1]], gt_bboxes_3d[:, [3, 3 + 1]],
gt_bboxes_3d[:, 6])
bounding_box = box_np_ops.minmax_to_corner_2d(
np.asarray(limit_range)[np.newaxis, ...])
ret = box_np_ops.points_in_convex_polygon_jit(
gt_bboxes_3d_bv.reshape(-1, 2), bounding_box)
return np.any(ret.reshape(-1, 4), axis=1)
def __call__(self, input_dict):
gt_bboxes_3d = input_dict['gt_bboxes_3d']
gt_labels_3d = input_dict['gt_labels_3d']
......@@ -371,3 +392,67 @@ class ObjectNameFilter(object):
repr_str = self.__class__.__name__
repr_str += f'(classes={self.classes})'
return repr_str
@PIPELINES.register_module()
class IndoorPointSample(object):
"""Indoor point sample.
Sampling data to a certain number.
Args:
name (str): Name of the dataset.
num_points (int): Number of points to be sampled.
"""
def __init__(self, num_points):
self.num_points = num_points
def points_random_sampling(self,
points,
num_samples,
replace=None,
return_choices=False):
"""Points random sampling.
Sample points to a certain number.
Args:
points (ndarray): 3D Points.
num_samples (int): Number of samples to be sampled.
replace (bool): Whether the sample is with or without replacement.
return_choices (bool): Whether return choice.
Returns:
points (ndarray): 3D Points.
choices (ndarray): The generated random samples.
"""
if replace is None:
replace = (points.shape[0] < num_samples)
choices = np.random.choice(
points.shape[0], num_samples, replace=replace)
if return_choices:
return points[choices], choices
else:
return points[choices]
def __call__(self, results):
points = results['points']
points, choices = self.points_random_sampling(
points, self.num_points, return_choices=True)
pts_instance_mask = results.get('pts_instance_mask', None)
pts_semantic_mask = results.get('pts_semantic_mask', None)
results['points'] = points
if pts_instance_mask is not None and pts_semantic_mask is not None:
pts_instance_mask = pts_instance_mask[choices]
pts_semantic_mask = pts_semantic_mask[choices]
results['pts_instance_mask'] = pts_instance_mask
results['pts_semantic_mask'] = pts_semantic_mask
return results
def __repr__(self):
repr_str = self.__class__.__name__
repr_str += '(num_points={})'.format(self.num_points)
return repr_str
......@@ -56,9 +56,22 @@ class Base3DRoIHead(nn.Module, metaclass=ABCMeta):
gt_bboxes,
gt_labels,
gt_bboxes_ignore=None,
gt_masks=None,
**kwargs):
"""Forward function during training"""
"""Forward function during training
Args:
x (dict): Contains features from the first stage.
img_metas (list[dict]): Meta info of each image.
proposal_list (list[dict]): Proposal information from rpn.
gt_bboxes (list[:obj:BaseInstance3DBoxes]):
GT bboxes of each sample. The bboxes are encapsulated
by 3D box structures.
gt_labels (list[LongTensor]): GT labels of each sample.
gt_bboxes_ignore (list[Tensor], optional): Specify which bounding.
Returns:
dict: losses from each head.
"""
pass
def simple_test(self,
......
......@@ -231,6 +231,15 @@ class PartA2BboxHead(nn.Module):
normal_init(self.conv_reg[-1].conv, mean=0, std=0.001)
def forward(self, seg_feats, part_feats):
"""Forward pass.
Args:
seg_feats (torch.Tensor): Point-wise semantic features.
part_feats (torch.Tensor): Point-wise part prediction features.
Returns:
tuple[torch.Tensor]: Score of class and bbox predictions.
"""
# (B * N, out_x, out_y, out_z, 4)
rcnn_batch_size = part_feats.shape[0]
......@@ -273,6 +282,22 @@ class PartA2BboxHead(nn.Module):
def loss(self, cls_score, bbox_pred, rois, labels, bbox_targets,
pos_gt_bboxes, reg_mask, label_weights, bbox_weights):
"""Coumputing losses.
Args:
cls_score (Torch.tensor): Scores of each roi.
bbox_pred (Torch.tensor): Predictions of bboxes.
rois (Torch.tensor): Roi bboxes.
labels (Torch.tensor): Labels of class.
bbox_targets (Torch.tensor): Target of positive bboxes.
pos_gt_bboxes (Torch.tensor): Gt of positive bboxes.
reg_mask (Torch.tensor): Mask for positive bboxes.
label_weights (Torch.tensor): Weights of class loss.
bbox_weights (Torch.tensor): Weights of bbox loss.
Returns:
dict: Computed losses.
"""
losses = dict()
rcnn_batch_size = cls_score.shape[0]
......@@ -325,6 +350,17 @@ class PartA2BboxHead(nn.Module):
return losses
def get_targets(self, sampling_results, rcnn_train_cfg, concat=True):
"""Generate targets.
Args:
sampling_results (list[:obj:SamplingResult]):
Sampled results from rois.
rcnn_train_cfg (ConfigDict): Training config of rcnn.
concat (bool): Whether to concatenate targets between batches.
Returns:
tuple: Targets of boxes and class prediction.
"""
pos_bboxes_list = [res.pos_bboxes for res in sampling_results]
pos_gt_bboxes_list = [res.pos_gt_bboxes for res in sampling_results]
iou_list = [res.iou for res in sampling_results]
......@@ -444,6 +480,20 @@ class PartA2BboxHead(nn.Module):
class_pred,
img_metas,
cfg=None):
"""Generate bboxes from bbox head predictions.
Args:
rois (torch.Tensor): Roi bboxes.
cls_score (torch.Tensor): Scores of bboxes.
bbox_pred (torch.Tensor): Bbox predictions
class_labels (torch.Tensor): Label of classes
class_pred (torch.Tensor): Score for nms.
img_metas (list[dict]): Contain pcd and img's meta info.
cfg (ConfigDict): Testing config.
Returns:
list[tuple]: Decoded bbox, scores and labels after nms.
"""
roi_batch_id = rois[..., 0]
roi_boxes = rois[..., 1:] # boxes without batch id
batch_size = int(roi_batch_id.max().item() + 1)
......
......@@ -50,6 +50,15 @@ class PointwiseSemanticHead(nn.Module):
self.loss_part = build_loss(loss_part)
def forward(self, x):
"""Forward pass.
Args:
x (torch.Tensor): Features from the first stage.
Returns:
dict: part features, segmentation and part predictions.
"""
seg_preds = self.seg_cls_layer(x) # (N, 1)
part_preds = self.seg_reg_layer(x) # (N, 3)
......
......@@ -10,7 +10,17 @@ from .base_3droi_head import Base3DRoIHead
@HEADS.register_module()
class PartAggregationROIHead(Base3DRoIHead):
"""Part aggregation roi head for PartA2"""
"""Part aggregation roi head for PartA2
Args:
semantic_head (ConfigDict): Config of semantic head.
num_classes (int): The number of classes.
seg_roi_extractor (ConfigDict): Config of seg_roi_extractor.
part_roi_extractor (ConfigDict): Config of part_roi_extractor.
bbox_head (ConfigDict): Config of bbox_head.
train_cfg (ConfigDict): Training config.
test_cfg (ConfigDict): Testing config.
"""
def __init__(self,
semantic_head,
......@@ -156,6 +166,18 @@ class PartAggregationROIHead(Base3DRoIHead):
return bbox_results
def _bbox_forward(self, seg_feats, part_feats, voxels_dict, rois):
"""Forward function of roi_extractor and bbox_head.
Args:
seg_feats (torch.Tensor): Point-wise semantic features.
part_feats (torch.Tensor): Point-wise part prediction features.
voxels_dict (dict): Contains information of voxels.
rois (Tensor): Roi boxes.
Returns:
dict: Contains predictions of bbox_head and
features of roi_extractor.
"""
pooled_seg_feats = self.seg_roi_extractor(seg_feats,
voxels_dict['voxel_centers'],
voxels_dict['coors'][..., 0],
......
......@@ -25,12 +25,16 @@ def test_getitem():
with_mask_3d=True,
with_seg_3d=True),
dict(type='IndoorPointSample', num_points=5),
dict(type='IndoorFlipData', flip_ratio_yz=1.0, flip_ratio_xz=1.0),
dict(
type='IndoorGlobalRotScaleTrans',
shift_height=True,
rot_range=[-1 / 36, 1 / 36],
scale_range=None),
type='RandomFlip3D',
sync_2d=False,
flip_ratio_bev_horizontal=1.0,
flip_ratio_bev_vertical=1.0),
dict(
type='GlobalRotScaleTrans',
rot_range=[-0.087266, 0.087266],
scale_ratio_range=[1.0, 1.0],
shift_height=True),
dict(type='DefaultFormatBundle3D', class_names=class_names),
dict(
type='Collect3D',
......@@ -38,9 +42,7 @@ def test_getitem():
'points', 'gt_bboxes_3d', 'gt_labels_3d', 'pts_semantic_mask',
'pts_instance_mask'
],
meta_keys=[
'file_name', 'flip_xz', 'flip_yz', 'sample_idx', 'rot_angle'
]),
meta_keys=['file_name', 'sample_idx', 'pcd_rotation']),
]
scannet_dataset = ScanNetDataset(root_path, ann_file, pipelines)
......@@ -51,28 +53,24 @@ def test_getitem():
pts_semantic_mask = data['pts_semantic_mask']._data
pts_instance_mask = data['pts_instance_mask']._data
file_name = data['img_metas']._data['file_name']
flip_xz = data['img_metas']._data['flip_xz']
flip_yz = data['img_metas']._data['flip_yz']
rot_angle = data['img_metas']._data['rot_angle']
pcd_rotation = data['img_metas']._data['pcd_rotation']
sample_idx = data['img_metas']._data['sample_idx']
assert file_name == './tests/data/scannet/' \
'points/scene0000_00.bin'
assert flip_xz is True
assert flip_yz is True
assert abs(rot_angle - (-0.005471397477913809)) < 1e-5
expected_rotation = np.array([[0.99654, 0.08311407, 0.],
[-0.08311407, 0.99654, 0.], [0., 0., 1.]])
assert file_name == './tests/data/scannet/points/scene0000_00.bin'
assert np.allclose(pcd_rotation, expected_rotation, 1e-3)
assert sample_idx == 'scene0000_00'
expected_points = np.array(
[[-2.9078157, -1.9569951, 2.3543026, 2.389488],
[-0.71360034, -3.4359822, 2.1330001, 2.1681855],
[-1.332374, 1.474838, -0.04405887, -0.00887359],
[2.1336637, -1.3265059, -0.02880373, 0.00638155],
[0.43895668, -3.0259454, 1.5560012, 1.5911865]])
expected_points = torch.tensor([[-2.7231, -2.2068, 2.3543, 2.3895],
[-0.4065, -3.4857, 2.1330, 2.1682],
[-1.4578, 1.3510, -0.0441, -0.0089],
[2.2428, -1.1323, -0.0288, 0.0064],
[0.7052, -2.9752, 1.5560, 1.5912]])
expected_gt_bboxes_3d = torch.tensor(
[[-1.5005, -3.5126, 1.5704, 1.7457, 0.2415, 0.5724, 0.0000],
[-2.8849, 3.4962, 1.1911, 0.6617, 0.1743, 0.6715, 0.0000],
[-1.1586, -2.1924, 0.0093, 0.5557, 2.5376, 1.2145, 0.0000],
[-2.9305, -2.4856, 0.8288, 0.6270, 1.8462, 0.2870, 0.0000],
[3.3115, -0.0048, -0.0090, 0.4619, 3.8605, 2.1603, 0.0000]])
[[-1.1835, -3.6317, 1.5704, 1.7577, 0.3761, 0.5724, 0.0000],
[-3.1832, 3.2269, 1.1911, 0.6727, 0.2251, 0.6715, 0.0000],
[-0.9598, -2.2864, 0.0093, 0.7506, 2.5709, 1.2145, 0.0000],
[-2.6988, -2.7354, 0.8288, 0.7680, 1.8877, 0.2870, 0.0000],
[3.2989, 0.2885, -0.0090, 0.7600, 3.8814, 2.1603, 0.0000]])
expected_gt_labels = np.array([
6, 6, 4, 9, 11, 11, 10, 0, 15, 17, 17, 17, 3, 12, 4, 4, 14, 1, 0, 0, 0,
0, 0, 0, 5, 5, 5
......@@ -82,7 +80,7 @@ def test_getitem():
original_classes = scannet_dataset.CLASSES
assert scannet_dataset.CLASSES == class_names
assert np.allclose(points, expected_points)
assert torch.allclose(points, expected_points, 1e-2)
assert gt_bboxes_3d.tensor[:5].shape == (5, 7)
assert torch.allclose(gt_bboxes_3d.tensor[:5], expected_gt_bboxes_3d, 1e-2)
assert np.all(gt_labels.numpy() == expected_gt_labels)
......
......@@ -17,20 +17,24 @@ def test_getitem():
load_dim=6,
use_dim=[0, 1, 2]),
dict(type='LoadAnnotations3D'),
dict(type='IndoorFlipData', flip_ratio_yz=1.0),
dict(
type='IndoorGlobalRotScaleTrans',
shift_height=True,
rot_range=[-1 / 6, 1 / 6],
scale_range=[0.85, 1.15]),
type='RandomFlip3D',
sync_2d=False,
flip_ratio_bev_horizontal=0.5,
),
dict(
type='GlobalRotScaleTrans',
rot_range=[-0.523599, 0.523599],
scale_ratio_range=[0.85, 1.15],
shift_height=True),
dict(type='IndoorPointSample', num_points=5),
dict(type='DefaultFormatBundle3D', class_names=class_names),
dict(
type='Collect3D',
keys=['points', 'gt_bboxes_3d', 'gt_labels_3d'],
meta_keys=[
'file_name', 'flip_xz', 'flip_yz', 'sample_idx', 'scale_ratio',
'rot_angle'
'file_name', 'pcd_horizontal_flip', 'sample_idx',
'pcd_scale_factor', 'pcd_rotation'
]),
]
......@@ -40,32 +44,32 @@ def test_getitem():
gt_bboxes_3d = data['gt_bboxes_3d']._data
gt_labels_3d = data['gt_labels_3d']._data
file_name = data['img_metas']._data['file_name']
flip_xz = data['img_metas']._data['flip_xz']
flip_yz = data['img_metas']._data['flip_yz']
scale_ratio = data['img_metas']._data['scale_ratio']
rot_angle = data['img_metas']._data['rot_angle']
pcd_horizontal_flip = data['img_metas']._data['pcd_horizontal_flip']
pcd_scale_factor = data['img_metas']._data['pcd_scale_factor']
pcd_rotation = data['img_metas']._data['pcd_rotation']
sample_idx = data['img_metas']._data['sample_idx']
assert file_name == './tests/data/sunrgbd' \
'/points/000001.bin'
assert flip_xz is False
assert flip_yz is True
assert abs(scale_ratio - 1.0308290128214932) < 1e-5
assert abs(rot_angle - 0.22534577750874518) < 1e-5
pcd_rotation_expected = np.array([[0.99889565, 0.04698427, 0.],
[-0.04698427, 0.99889565, 0.],
[0., 0., 1.]])
assert file_name == './tests/data/sunrgbd/points/000001.bin'
assert pcd_horizontal_flip is False
assert abs(pcd_scale_factor - 0.9770964398016714) < 1e-5
assert np.allclose(pcd_rotation, pcd_rotation_expected, 1e-3)
assert sample_idx == 1
expected_points = np.array([[0.6512, 1.5781, 0.0710, 0.0499],
[0.6473, 1.5701, 0.0657, 0.0447],
[0.6464, 1.5635, 0.0826, 0.0616],
[0.6453, 1.5603, 0.0849, 0.0638],
[0.6488, 1.5786, 0.0461, 0.0251]])
expected_points = torch.tensor([[-0.9904, 1.2596, 0.1105, 0.0905],
[-0.9948, 1.2758, 0.0437, 0.0238],
[-0.9866, 1.2641, 0.0504, 0.0304],
[-0.9915, 1.2586, 0.1265, 0.1065],
[-0.9890, 1.2561, 0.1216, 0.1017]])
expected_gt_bboxes_3d = torch.tensor(
[[-2.0125, 3.9473, -1.2696, 2.3730, 1.9458, 2.0303, 1.2206],
[-3.7037, 4.2396, -1.3126, 0.6032, 0.9104, 1.0033, 1.2663],
[0.6529, 2.1638, -1.2370, 0.7348, 1.6113, 2.1694, 2.8140]])
[[0.8308, 4.1168, -1.2035, 2.2493, 1.8444, 1.9245, 1.6486],
[2.3002, 4.8149, -1.2442, 0.5718, 0.8629, 0.9510, 1.6030],
[-1.1477, 1.8090, -1.1725, 0.6965, 1.5273, 2.0563, 0.0552]])
expected_gt_labels = np.array([0, 7, 6])
original_classes = sunrgbd_dataset.CLASSES
assert np.allclose(points, expected_points, 1e-2)
assert np.allclose(gt_bboxes_3d.tensor, expected_gt_bboxes_3d, 1e-3)
assert torch.allclose(points, expected_points, 1e-2)
assert torch.allclose(gt_bboxes_3d.tensor, expected_gt_bboxes_3d, 1e-3)
assert np.all(gt_labels_3d.numpy() == expected_gt_labels)
assert original_classes == class_names
......
import numpy as np
import torch
from mmdet3d.core.bbox import DepthInstance3DBoxes
from mmdet3d.datasets.pipelines import (IndoorFlipData,
IndoorGlobalRotScaleTrans)
def test_indoor_flip_data():
np.random.seed(0)
sunrgbd_indoor_flip_data = IndoorFlipData(1, 1)
sunrgbd_results = dict()
sunrgbd_results['points'] = np.array(
[[1.02828765e+00, 3.65790772e+00, 1.97294697e-01, 1.61959505e+00],
[-3.95979017e-01, 1.05465031e+00, -7.49204338e-01, 6.73096001e-01]])
sunrgbd_results['gt_bboxes_3d'] = DepthInstance3DBoxes(
np.array([[
0.213684, 1.036364, -0.982323, 0.61541, 0.572574, 0.872728,
3.07028526
],
[
-0.449953, 1.395455, -1.027778, 1.500956, 1.637298,
0.636364, -1.58242359
]]))
sunrgbd_results = sunrgbd_indoor_flip_data(sunrgbd_results)
sunrgbd_points = sunrgbd_results['points']
sunrgbd_gt_bboxes_3d = sunrgbd_results['gt_bboxes_3d']
expected_sunrgbd_points = np.array(
[[-1.02828765, 3.65790772, 0.1972947, 1.61959505],
[0.39597902, 1.05465031, -0.74920434, 0.673096]])
expected_sunrgbd_gt_bboxes_3d = torch.tensor(
[[-0.2137, 1.0364, -0.9823, 0.6154, 0.5726, 0.8727, 0.0713],
[0.4500, 1.3955, -1.0278, 1.5010, 1.6373, 0.6364, 4.7240]])
assert np.allclose(sunrgbd_points, expected_sunrgbd_points)
assert torch.allclose(sunrgbd_gt_bboxes_3d.tensor,
expected_sunrgbd_gt_bboxes_3d, 1e-3)
np.random.seed(0)
scannet_indoor_flip_data = IndoorFlipData(1, 1)
scannet_results = dict()
scannet_results['points'] = np.array(
[[1.6110241e+00, -1.6903955e-01, 5.8115810e-01, 5.9897250e-01],
[1.3978075e+00, 4.2035791e-01, 3.8729519e-01, 4.0510958e-01]])
scannet_results['gt_bboxes_3d'] = DepthInstance3DBoxes(
np.array([[
0.55903838, 0.48201692, 0.65688646, 0.65370704, 0.60029864,
0.5163464
],
[
-0.03226406, 1.70392646, 0.60348618, 0.65165804,
0.72084366, 0.64667457
]]),
box_dim=6,
with_yaw=False)
scannet_results = scannet_indoor_flip_data(scannet_results)
scannet_points = scannet_results['points']
scannet_gt_bboxes_3d = scannet_results['gt_bboxes_3d']
expected_scannet_points = np.array(
[[-1.6110241, 0.16903955, 0.5811581, 0.5989725],
[-1.3978075, -0.42035791, 0.38729519, 0.40510958]])
expected_scannet_gt_bboxes_3d = torch.tensor(
[[-0.5590, -0.4820, 0.6569, 0.6537, 0.6003, 0.5163, 0.0000],
[0.0323, -1.7039, 0.6035, 0.6517, 0.7208, 0.6467, 0.0000]])
assert np.allclose(scannet_points, expected_scannet_points)
assert torch.allclose(scannet_gt_bboxes_3d.tensor,
expected_scannet_gt_bboxes_3d, 1e-2)
def test_global_rot_scale():
np.random.seed(0)
sunrgbd_augment = IndoorGlobalRotScaleTrans(
True, rot_range=[-1 / 6, 1 / 6], scale_range=[0.85, 1.15])
sunrgbd_results = dict()
sunrgbd_results['points'] = np.array(
[[1.02828765e+00, 3.65790772e+00, 1.97294697e-01, 1.61959505e+00],
[-3.95979017e-01, 1.05465031e+00, -7.49204338e-01, 6.73096001e-01]])
sunrgbd_results['gt_bboxes_3d'] = DepthInstance3DBoxes(
np.array([[
0.213684, 1.036364, -0.982323, 0.61541, 0.572574, 0.872728,
3.07028526
],
[
-0.449953, 1.395455, -1.027778, 1.500956, 1.637298,
0.636364, -1.58242359
]]))
sunrgbd_results = sunrgbd_augment(sunrgbd_results)
sunrgbd_points = sunrgbd_results['points']
sunrgbd_gt_bboxes_3d = sunrgbd_results['gt_bboxes_3d']
expected_sunrgbd_points = np.array(
[[0.89427376, 3.94489646, 0.21003141, 1.72415094],
[-0.47835783, 1.09972989, -0.79757058, 0.71654893]])
expected_sunrgbd_gt_bboxes_3d = torch.tensor(
[[0.1708, 1.1135, -1.0457, 0.6551, 0.6095, 0.9291, 3.0192],
[-0.5543, 1.4591, -1.0941, 1.5979, 1.7430, 0.6774, -1.6335]])
assert np.allclose(sunrgbd_points, expected_sunrgbd_points)
assert torch.allclose(sunrgbd_gt_bboxes_3d.tensor,
expected_sunrgbd_gt_bboxes_3d, 1e-3)
np.random.seed(0)
scannet_augment = IndoorGlobalRotScaleTrans(
True, rot_range=[-1 * 1 / 36, 1 / 36], scale_range=None)
scannet_results = dict()
scannet_results['points'] = np.array(
[[1.6110241e+00, -1.6903955e-01, 5.8115810e-01, 5.9897250e-01],
[1.3978075e+00, 4.2035791e-01, 3.8729519e-01, 4.0510958e-01]])
scannet_results['gt_bboxes_3d'] = DepthInstance3DBoxes(
np.array([[
0.55903838, 0.48201692, 0.65688646, 0.65370704, 0.60029864,
0.5163464
],
[
-0.03226406, 1.70392646, 0.60348618, 0.65165804,
0.72084366, 0.64667457
]]),
box_dim=6,
with_yaw=False)
scannet_results = scannet_augment(scannet_results)
scannet_points = scannet_results['points']
scannet_gt_bboxes_3d = scannet_results['gt_bboxes_3d']
expected_scannet_points = np.array(
[[1.61240576, -0.15530836, 0.5811581, 0.5989725],
[1.39417555, 0.43225122, 0.38729519, 0.40510958]])
expected_scannet_gt_bboxes_3d = torch.tensor(
[[0.5549, 0.4868, 0.6569, 0.6588, 0.6058, 0.5163, 0.0000],
[-0.0468, 1.7036, 0.6035, 0.6578, 0.7264, 0.6467, 0.0000]])
assert np.allclose(scannet_points, expected_scannet_points)
assert torch.allclose(scannet_gt_bboxes_3d.tensor,
expected_scannet_gt_bboxes_3d, 1e-3)
......@@ -28,12 +28,16 @@ def test_scannet_pipeline():
with_mask_3d=True,
with_seg_3d=True),
dict(type='IndoorPointSample', num_points=5),
dict(type='IndoorFlipData', flip_ratio_yz=1.0, flip_ratio_xz=1.0),
dict(
type='IndoorGlobalRotScaleTrans',
shift_height=True,
rot_range=[-1 / 36, 1 / 36],
scale_range=None),
type='RandomFlip3D',
sync_2d=False,
flip_ratio_bev_horizontal=1.0,
flip_ratio_bev_vertical=1.0),
dict(
type='GlobalRotScaleTrans',
rot_range=[-0.087266, 0.087266],
scale_ratio_range=[1.0, 1.0],
shift_height=True),
dict(type='DefaultFormatBundle3D', class_names=class_names),
dict(
type='Collect3D',
......@@ -63,6 +67,7 @@ def test_scannet_pipeline():
scannet_gt_bboxes_3d, box_dim=6, with_yaw=False)
results['ann_info']['gt_labels_3d'] = scannet_gt_labels_3d
results['img_fields'] = []
results['bbox3d_fields'] = []
results['pts_mask_fields'] = []
results['pts_seg_fields'] = []
......@@ -74,25 +79,24 @@ def test_scannet_pipeline():
gt_labels_3d = results['gt_labels_3d']._data
pts_semantic_mask = results['pts_semantic_mask']._data
pts_instance_mask = results['pts_instance_mask']._data
expected_points = np.array(
[[-2.9078157, -1.9569951, 2.3543026, 2.389488],
[-0.71360034, -3.4359822, 2.1330001, 2.1681855],
[-1.332374, 1.474838, -0.04405887, -0.00887359],
[2.1336637, -1.3265059, -0.02880373, 0.00638155],
[0.43895668, -3.0259454, 1.5560012, 1.5911865]])
expected_points = torch.tensor([[-2.7231, -2.2068, 2.3543, 2.3895],
[-0.4065, -3.4857, 2.1330, 2.1682],
[-1.4578, 1.3510, -0.0441, -0.0089],
[2.2428, -1.1323, -0.0288, 0.0064],
[0.7052, -2.9752, 1.5560, 1.5912]])
expected_gt_bboxes_3d = torch.tensor(
[[-1.5005, -3.5126, 1.8565, 1.7457, 0.2415, 0.5724, 0.0000],
[-2.8849, 3.4962, 1.5268, 0.6617, 0.1743, 0.6715, 0.0000],
[-1.1586, -2.1924, 0.6165, 0.5557, 2.5376, 1.2145, 0.0000],
[-2.9305, -2.4856, 0.9722, 0.6270, 1.8462, 0.2870, 0.0000],
[3.3115, -0.0048, 1.0712, 0.4619, 3.8605, 2.1603, 0.0000]])
[[-1.1835, -3.6317, 1.8565, 1.7577, 0.3761, 0.5724, 0.0000],
[-3.1832, 3.2269, 1.5268, 0.6727, 0.2251, 0.6715, 0.0000],
[-0.9598, -2.2864, 0.6165, 0.7506, 2.5709, 1.2145, 0.0000],
[-2.6988, -2.7354, 0.9722, 0.7680, 1.8877, 0.2870, 0.0000],
[3.2989, 0.2885, 1.0712, 0.7600, 3.8814, 2.1603, 0.0000]])
expected_gt_labels_3d = np.array([
6, 6, 4, 9, 11, 11, 10, 0, 15, 17, 17, 17, 3, 12, 4, 4, 14, 1, 0, 0, 0,
0, 0, 0, 5, 5, 5
])
expected_pts_semantic_mask = np.array([3, 1, 2, 2, 15])
expected_pts_instance_mask = np.array([44, 22, 10, 10, 57])
assert np.allclose(points, expected_points)
assert torch.allclose(points, expected_points, 1e-2)
assert torch.allclose(gt_bboxes_3d.tensor[:5, :], expected_gt_bboxes_3d,
1e-2)
assert np.all(gt_labels_3d.numpy() == expected_gt_labels_3d)
......@@ -111,12 +115,16 @@ def test_sunrgbd_pipeline():
load_dim=6,
use_dim=[0, 1, 2]),
dict(type='LoadAnnotations3D'),
dict(type='IndoorFlipData', flip_ratio_yz=1.0),
dict(
type='IndoorGlobalRotScaleTrans',
shift_height=True,
rot_range=[-1 / 6, 1 / 6],
scale_range=[0.85, 1.15]),
type='RandomFlip3D',
sync_2d=False,
flip_ratio_bev_horizontal=1.0,
),
dict(
type='GlobalRotScaleTrans',
rot_range=[-0.523599, 0.523599],
scale_ratio_range=[0.85, 1.15],
shift_height=True),
dict(type='IndoorPointSample', num_points=5),
dict(type='DefaultFormatBundle3D', class_names=class_names),
dict(
......@@ -140,6 +148,7 @@ def test_sunrgbd_pipeline():
results['ann_info'] = dict()
results['ann_info']['gt_bboxes_3d'] = DepthInstance3DBoxes(gt_bboxes_3d)
results['ann_info']['gt_labels_3d'] = gt_labels_3d
results['img_fields'] = []
results['bbox3d_fields'] = []
results['pts_mask_fields'] = []
results['pts_seg_fields'] = []
......@@ -148,16 +157,16 @@ def test_sunrgbd_pipeline():
points = results['points']._data
gt_bboxes_3d = results['gt_bboxes_3d']._data
gt_labels_3d = results['gt_labels_3d']._data
expected_points = np.array([[0.6512, 1.5781, 0.0710, 0.0499],
[0.6473, 1.5701, 0.0657, 0.0447],
[0.6464, 1.5635, 0.0826, 0.0616],
[0.6453, 1.5603, 0.0849, 0.0638],
[0.6488, 1.5786, 0.0461, 0.0251]])
expected_points = torch.tensor([[0.8678, 1.3470, 0.1105, 0.0905],
[0.8707, 1.3635, 0.0437, 0.0238],
[0.8636, 1.3511, 0.0504, 0.0304],
[0.8690, 1.3461, 0.1265, 0.1065],
[0.8668, 1.3434, 0.1216, 0.1017]])
expected_gt_bboxes_3d = torch.tensor(
[[-2.0125, 3.9473, -0.2545, 2.3730, 1.9458, 2.0303, 1.2206],
[-3.7037, 4.2396, -0.8109, 0.6032, 0.9104, 1.0033, 1.2663],
[0.6529, 2.1638, -0.1523, 0.7348, 1.6113, 2.1694, 2.8140]])
[[-1.2136, 4.0206, -0.2412, 2.2493, 1.8444, 1.9245, 1.3989],
[-2.7420, 4.5777, -0.7686, 0.5718, 0.8629, 0.9510, 1.4446],
[0.9729, 1.9087, -0.1443, 0.6965, 1.5273, 2.0563, 2.9924]])
expected_gt_labels_3d = np.array([0, 7, 6])
assert torch.allclose(gt_bboxes_3d.tensor, expected_gt_bboxes_3d, 1e-3)
assert np.allclose(gt_labels_3d.flatten(), expected_gt_labels_3d)
assert np.allclose(points, expected_points, 1e-2)
assert torch.allclose(points, expected_points, 1e-2)
......@@ -19,7 +19,7 @@ def test_outdoor_aug_pipeline():
translation_std=[1.0, 1.0, 0.5],
global_rot_range=[0.0, 0.0],
rot_range=[-0.78539816, 0.78539816]),
dict(type='RandomFlip3D', flip_ratio=0.5),
dict(type='RandomFlip3D', flip_ratio_bev_horizontal=0.5),
dict(
type='GlobalRotScaleTrans',
rot_range=[-0.78539816, 0.78539816],
......@@ -137,7 +137,7 @@ def test_outdoor_velocity_aug_pipeline():
rot_range=[-0.3925, 0.3925],
scale_ratio_range=[0.95, 1.05],
translation_std=[0, 0, 0]),
dict(type='RandomFlip3D', flip_ratio=0.5),
dict(type='RandomFlip3D', flip_ratio_bev_horizontal=0.5),
dict(type='PointsRangeFilter', point_cloud_range=point_cloud_range),
dict(type='ObjectRangeFilter', point_cloud_range=point_cloud_range),
dict(type='PointShuffle'),
......
Markdown is supported
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment