Commit 39b294f5 authored by jshilong's avatar jshilong Committed by ChaimZhu
Browse files

Refactor part of transforms

parent 7bb011af
# Copyright (c) OpenMMLab. All rights reserved. # Copyright (c) OpenMMLab. All rights reserved.
from .compose import Compose from .compose import Compose
from .dbsampler import DataBaseSampler from .dbsampler import DataBaseSampler
from .formating import Collect3D, DefaultFormatBundle, DefaultFormatBundle3D from .formating import Pack3DDetInputs
from .loading import (LoadAnnotations3D, LoadImageFromFileMono3D, from .loading import (LoadAnnotations3D, LoadImageFromFileMono3D,
LoadMultiViewImageFromFiles, LoadPointsFromDict, LoadMultiViewImageFromFiles, LoadPointsFromDict,
LoadPointsFromFile, LoadPointsFromMultiSweeps, LoadPointsFromFile, LoadPointsFromMultiSweeps,
...@@ -19,9 +19,10 @@ from .transforms_3d import (AffineResize, BackgroundPointsFilter, ...@@ -19,9 +19,10 @@ from .transforms_3d import (AffineResize, BackgroundPointsFilter,
__all__ = [ __all__ = [
'ObjectSample', 'RandomFlip3D', 'ObjectNoise', 'GlobalRotScaleTrans', 'ObjectSample', 'RandomFlip3D', 'ObjectNoise', 'GlobalRotScaleTrans',
'PointShuffle', 'ObjectRangeFilter', 'PointsRangeFilter', 'Collect3D', 'PointShuffle', 'ObjectRangeFilter', 'PointsRangeFilter',
'Pack3DDetInputs',
'Compose', 'LoadMultiViewImageFromFiles', 'LoadPointsFromFile', 'Compose', 'LoadMultiViewImageFromFiles', 'LoadPointsFromFile',
'DefaultFormatBundle', 'DefaultFormatBundle3D', 'DataBaseSampler', 'DataBaseSampler',
'NormalizePointsColor', 'LoadAnnotations3D', 'IndoorPointSample', 'NormalizePointsColor', 'LoadAnnotations3D', 'IndoorPointSample',
'PointSample', 'PointSegClassMapping', 'MultiScaleFlipAug3D', 'PointSample', 'PointSegClassMapping', 'MultiScaleFlipAug3D',
'LoadPointsFromMultiSweeps', 'BackgroundPointsFilter', 'LoadPointsFromMultiSweeps', 'BackgroundPointsFilter',
......
# Copyright (c) OpenMMLab. All rights reserved. # Copyright (c) OpenMMLab. All rights reserved.
import numpy as np import numpy as np
from mmcv.parallel import DataContainer as DC from mmcv import BaseTransform
from mmcv.transforms import to_tensor from mmcv.transforms import to_tensor
from mmengine import InstanceData
from mmdet3d.core import Det3DDataSample
from mmdet3d.core.bbox import BaseInstance3DBoxes from mmdet3d.core.bbox import BaseInstance3DBoxes
from mmdet3d.core.points import BasePoints from mmdet3d.core.points import BasePoints
from mmdet3d.registry import TRANSFORMS from mmdet3d.registry import TRANSFORMS
@TRANSFORMS.register_module() @TRANSFORMS.register_module()
class DefaultFormatBundle(object): class Pack3DDetInputs(BaseTransform):
"""Default formatting bundle. INPUTS_KEYS = ['points', 'img']
INSTANCEDATA_3D_KEYS = [
'gt_bboxes_3d', 'gt_labels_3d', 'attr_labels', 'depths', 'centers_2d'
]
INSTANCEDATA_2D_KEYS = [
'gt_bboxes',
'gt_labels',
]
SEG_KEYS = [
'gt_seg_map', 'pts_instance_mask', 'pts_semantic_mask',
'gt_semantic_seg'
]
It simplifies the pipeline of formatting common fields, including "img", def __init__(
"proposals", "gt_bboxes", "gt_labels", "gt_masks" and "gt_semantic_seg". self,
These fields are formatted as follows. keys: dict,
meta_keys: dict = ('filename', 'ori_shape', 'img_shape', 'lidar2img',
- img: (1)transpose, (2)to tensor, (3)to DataContainer (stack=True) 'depth2img', 'cam2img', 'pad_shape', 'scale_factor',
- proposals: (1)to tensor, (2)to DataContainer 'flip', 'pcd_horizontal_flip', 'pcd_vertical_flip',
- gt_bboxes: (1)to tensor, (2)to DataContainer 'box_mode_3d', 'box_type_3d', 'img_norm_cfg',
- gt_bboxes_ignore: (1)to tensor, (2)to DataContainer 'pcd_trans', 'sample_idx', 'pcd_scale_factor',
- gt_labels: (1)to tensor, (2)to DataContainer 'pcd_rotation', 'pcd_rotation_angle',
- gt_masks: (1)to tensor, (2)to DataContainer (cpu_only=True) 'pts_filename', 'transformation_3d_flow',
- gt_semantic_seg: (1)unsqueeze dim-0 (2)to tensor, 'trans_mat', 'affine_aug')):
(3)to DataContainer (stack=True) self.keys = keys
""" self.meta_keys = meta_keys
def __init__(self, ): def _remove_prefix(self, key: str) -> str:
return if key.startswith('gt_'):
key = key[3:]
return key
def __call__(self, results): def transform(self, results: dict) -> dict:
"""Call function to transform and format common fields in results. """Method to pack the input data.
Args: Args:
results (dict): Result dict contains the data to convert. results (dict): Result dict from the data pipeline.
Returns: Returns:
dict: The result dict contains the data that is formatted with dict:
default bundle.
- 'inputs' (dict): The forward data of models. It usually contains
following keys:
- points
- img
- 'data_sample' (obj:`Det3DDataSample`): The annotation info of the
sample.
""" """
packed_results = dict()
# Format 3D data
if 'points' in results:
assert isinstance(results['points'], BasePoints)
results['points'] = results['points'].tensor
if 'img' in results: if 'img' in results:
if isinstance(results['img'], list): if isinstance(results['img'], list):
# process multiple imgs in single frame # process multiple imgs in single frame
imgs = [img.transpose(2, 0, 1) for img in results['img']] imgs = [img.transpose(2, 0, 1) for img in results['img']]
imgs = np.ascontiguousarray(np.stack(imgs, axis=0)) imgs = np.ascontiguousarray(np.stack(imgs, axis=0))
results['img'] = DC(to_tensor(imgs), stack=True) results['img'] = to_tensor(imgs)
else: else:
img = np.ascontiguousarray(results['img'].transpose(2, 0, 1)) img = results['img']
results['img'] = DC(to_tensor(img), stack=True) if len(img.shape) < 3:
img = np.expand_dims(img, -1)
results['img'] = np.ascontiguousarray(img.transpose(2, 0, 1))
for key in [ for key in [
'proposals', 'gt_bboxes', 'gt_bboxes_ignore', 'gt_labels', 'proposals', 'gt_bboxes', 'gt_bboxes_ignore', 'gt_labels',
'gt_labels_3d', 'attr_labels', 'pts_instance_mask', 'gt_labels_3d', 'attr_labels', 'pts_instance_mask',
...@@ -56,211 +90,57 @@ class DefaultFormatBundle(object): ...@@ -56,211 +90,57 @@ class DefaultFormatBundle(object):
if key not in results: if key not in results:
continue continue
if isinstance(results[key], list): if isinstance(results[key], list):
results[key] = DC([to_tensor(res) for res in results[key]]) results[key] = [to_tensor(res) for res in results[key]]
else: else:
results[key] = DC(to_tensor(results[key])) results[key] = to_tensor(results[key])
if 'gt_bboxes_3d' in results: if 'gt_bboxes_3d' in results:
if isinstance(results['gt_bboxes_3d'], BaseInstance3DBoxes): if not isinstance(results['gt_bboxes_3d'], BaseInstance3DBoxes):
results['gt_bboxes_3d'] = DC( results['gt_bboxes_3d'] = to_tensor(results['gt_bboxes_3d'])
results['gt_bboxes_3d'], cpu_only=True)
else:
results['gt_bboxes_3d'] = DC(
to_tensor(results['gt_bboxes_3d']))
if 'gt_masks' in results:
results['gt_masks'] = DC(results['gt_masks'], cpu_only=True)
if 'gt_semantic_seg' in results: if 'gt_semantic_seg' in results:
results['gt_semantic_seg'] = DC( results['gt_semantic_seg'] = to_tensor(
to_tensor(results['gt_semantic_seg'][None, ...]), stack=True) results['gt_semantic_seg'][None])
if 'gt_seg_map' in results:
return results results['gt_seg_map'] = results['gt_seg_map'][None, ...]
def __repr__(self):
return self.__class__.__name__
@TRANSFORMS.register_module()
class Collect3D(object):
"""Collect data from the loader relevant to the specific task.
This is usually the last stage of the data loader pipeline. Typically keys
is set to some subset of "img", "proposals", "gt_bboxes",
"gt_bboxes_ignore", "gt_labels", and/or "gt_masks".
The "img_meta" item is always populated. The contents of the "img_meta" data_sample = Det3DDataSample()
dictionary depends on "meta_keys". By default this includes: gt_instances_3d = InstanceData()
gt_instances = InstanceData()
- 'img_shape': shape of the image input to the network as a tuple seg_data = dict()
(h, w, c). Note that images may be zero padded on the
bottom/right if the batch tensor is larger than this shape.
- 'scale_factor': a float indicating the preprocessing scale
- 'flip': a boolean indicating if image flip transform was used
- 'filename': path to the image file
- 'ori_shape': original shape of the image as a tuple (h, w, c)
- 'pad_shape': image shape after padding
- 'lidar2img': transform from lidar to image
- 'depth2img': transform from depth to image
- 'cam2img': transform from camera to image
- 'pcd_horizontal_flip': a boolean indicating if point cloud is
flipped horizontally
- 'pcd_vertical_flip': a boolean indicating if point cloud is
flipped vertically
- 'box_mode_3d': 3D box mode
- 'box_type_3d': 3D box type
- 'img_norm_cfg': a dict of normalization information:
- mean: per channel mean subtraction
- std: per channel std divisor
- to_rgb: bool indicating if bgr was converted to rgb
- 'pcd_trans': point cloud transformations
- 'sample_idx': sample index
- 'pcd_scale_factor': point cloud scale factor
- 'pcd_rotation': rotation applied to point cloud
- 'pts_filename': path to point cloud file.
Args:
keys (Sequence[str]): Keys of results to be collected in ``data``.
meta_keys (Sequence[str], optional): Meta keys to be converted to
``mmcv.DataContainer`` and collected in ``data[img_metas]``.
Default: ('filename', 'ori_shape', 'img_shape', 'lidar2img',
'depth2img', 'cam2img', 'pad_shape', 'scale_factor', 'flip',
'pcd_horizontal_flip', 'pcd_vertical_flip', 'box_mode_3d',
'box_type_3d', 'img_norm_cfg', 'pcd_trans',
'sample_idx', 'pcd_scale_factor', 'pcd_rotation', 'pts_filename')
"""
def __init__(
self,
keys,
meta_keys=('filename', 'ori_shape', 'img_shape', 'lidar2img',
'depth2img', 'cam2img', 'pad_shape', 'scale_factor', 'flip',
'pcd_horizontal_flip', 'pcd_vertical_flip', 'box_mode_3d',
'box_type_3d', 'img_norm_cfg', 'pcd_trans', 'sample_idx',
'pcd_scale_factor', 'pcd_rotation', 'pcd_rotation_angle',
'pts_filename', 'transformation_3d_flow', 'trans_mat',
'affine_aug')):
self.keys = keys
self.meta_keys = meta_keys
def __call__(self, results):
"""Call function to collect keys in results. The keys in ``meta_keys``
will be converted to :obj:`mmcv.DataContainer`.
Args:
results (dict): Result dict contains the data to collect.
Returns:
dict: The result dict contains the following keys
- keys in ``self.keys``
- ``img_metas``
"""
data = {}
img_metas = {} img_metas = {}
for key in self.meta_keys: for key in self.meta_keys:
if key in results: if key in results:
img_metas[key] = results[key] img_metas[key] = results[key]
data_sample.set_metainfo(img_metas)
data['img_metas'] = DC(img_metas, cpu_only=True) inputs = {}
for key in self.keys: for key in self.keys:
data[key] = results[key] if key in results:
return data if key in self.INPUTS_KEYS:
inputs[key] = results[key]
def __repr__(self): elif key in self.INSTANCEDATA_3D_KEYS:
"""str: Return a string that describes the module.""" gt_instances_3d[self._remove_prefix(key)] = results[key]
return self.__class__.__name__ + \ elif key in self.INSTANCEDATA_2D_KEYS:
f'(keys={self.keys}, meta_keys={self.meta_keys})' gt_instances[self._remove_prefix(key)] = results[key]
elif key in self.SEG_KEYS:
seg_data[self._remove_prefix(key)] = results[key]
@TRANSFORMS.register_module() else:
class DefaultFormatBundle3D(DefaultFormatBundle): raise NotImplementedError(f'Please modified '
"""Default formatting bundle. f'`Pack3DDetInputs` '
f'to put {key} to '
It simplifies the pipeline of formatting common fields for voxels, f'corresponding field')
including "proposals", "gt_bboxes", "gt_labels", "gt_masks" and
"gt_semantic_seg". data_sample.gt_instances_3d = gt_instances_3d
These fields are formatted as follows. data_sample.gt_instances = gt_instances
data_sample.seg_data = seg_data
- img: (1)transpose, (2)to tensor, (3)to DataContainer (stack=True) packed_results['data_sample'] = data_sample
- proposals: (1)to tensor, (2)to DataContainer packed_results['inputs'] = inputs
- gt_bboxes: (1)to tensor, (2)to DataContainer
- gt_bboxes_ignore: (1)to tensor, (2)to DataContainer return packed_results
- gt_labels: (1)to tensor, (2)to DataContainer
""" def __repr__(self) -> str:
def __init__(self, class_names, with_gt=True, with_label=True):
super(DefaultFormatBundle3D, self).__init__()
self.class_names = class_names
self.with_gt = with_gt
self.with_label = with_label
def __call__(self, results):
"""Call function to transform and format common fields in results.
Args:
results (dict): Result dict contains the data to convert.
Returns:
dict: The result dict contains the data that is formatted with
default bundle.
"""
# Format 3D data
if 'points' in results:
assert isinstance(results['points'], BasePoints)
results['points'] = DC(results['points'].tensor)
for key in ['voxels', 'coors', 'voxel_centers', 'num_points']:
if key not in results:
continue
results[key] = DC(to_tensor(results[key]), stack=False)
if self.with_gt:
# Clean GT bboxes in the final
if 'gt_bboxes_3d_mask' in results:
gt_bboxes_3d_mask = results['gt_bboxes_3d_mask']
results['gt_bboxes_3d'] = results['gt_bboxes_3d'][
gt_bboxes_3d_mask]
if 'gt_names_3d' in results:
results['gt_names_3d'] = results['gt_names_3d'][
gt_bboxes_3d_mask]
if 'centers2d' in results:
results['centers2d'] = results['centers2d'][
gt_bboxes_3d_mask]
if 'depths' in results:
results['depths'] = results['depths'][gt_bboxes_3d_mask]
if 'gt_bboxes_mask' in results:
gt_bboxes_mask = results['gt_bboxes_mask']
if 'gt_bboxes' in results:
results['gt_bboxes'] = results['gt_bboxes'][gt_bboxes_mask]
results['gt_names'] = results['gt_names'][gt_bboxes_mask]
if self.with_label:
if 'gt_names' in results and len(results['gt_names']) == 0:
results['gt_labels'] = np.array([], dtype=np.int64)
results['attr_labels'] = np.array([], dtype=np.int64)
elif 'gt_names' in results and isinstance(
results['gt_names'][0], list):
# gt_labels might be a list of list in multi-view setting
results['gt_labels'] = [
np.array([self.class_names.index(n) for n in res],
dtype=np.int64) for res in results['gt_names']
]
elif 'gt_names' in results:
results['gt_labels'] = np.array([
self.class_names.index(n) for n in results['gt_names']
],
dtype=np.int64)
# we still assume one pipeline for one frame LiDAR
# thus, the 3D name is list[string]
if 'gt_names_3d' in results:
results['gt_labels_3d'] = np.array([
self.class_names.index(n)
for n in results['gt_names_3d']
],
dtype=np.int64)
results = super(DefaultFormatBundle3D, self).__call__(results)
return results
def __repr__(self):
"""str: Return a string that describes the module."""
repr_str = self.__class__.__name__ repr_str = self.__class__.__name__
repr_str += f'(class_names={self.class_names}, ' repr_str += f'(keys={self.keys})'
repr_str += f'with_gt={self.with_gt}, with_label={self.with_label})' repr_str += f'(meta_keys={self.meta_keys})'
return repr_str return repr_str
# Copyright (c) OpenMMLab. All rights reserved. # Copyright (c) OpenMMLab. All rights reserved.
import mmcv import mmcv
import numpy as np import numpy as np
from mmcv import BaseTransform
from mmcv.transforms import LoadImageFromFile from mmcv.transforms import LoadImageFromFile
from mmdet3d.core.points import BasePoints, get_points_type from mmdet3d.core.points import BasePoints, get_points_type
...@@ -336,10 +337,18 @@ class NormalizePointsColor(object): ...@@ -336,10 +337,18 @@ class NormalizePointsColor(object):
@TRANSFORMS.register_module() @TRANSFORMS.register_module()
class LoadPointsFromFile(object): class LoadPointsFromFile(BaseTransform):
"""Load Points From File. """Load Points From File.
Load points from file. Required Keys:
- lidar_points (dict)
- lidar_path (str)
Added Keys:
- points (np.float32)
Args: Args:
coord_type (str): The type of coordinates of points cloud. coord_type (str): The type of coordinates of points cloud.
...@@ -362,13 +371,15 @@ class LoadPointsFromFile(object): ...@@ -362,13 +371,15 @@ class LoadPointsFromFile(object):
for more details. Defaults to dict(backend='disk'). for more details. Defaults to dict(backend='disk').
""" """
def __init__(self, def __init__(
coord_type, self,
load_dim=6, coord_type: str,
use_dim=[0, 1, 2], load_dim: int = 6,
shift_height=False, use_dim: list = [0, 1, 2],
use_color=False, shift_height: bool = False,
file_client_args=dict(backend='disk')): use_color: bool = False,
file_client_args: dict = dict(backend='disk')
) -> None:
self.shift_height = shift_height self.shift_height = shift_height
self.use_color = use_color self.use_color = use_color
if isinstance(use_dim, int): if isinstance(use_dim, int):
...@@ -383,7 +394,7 @@ class LoadPointsFromFile(object): ...@@ -383,7 +394,7 @@ class LoadPointsFromFile(object):
self.file_client_args = file_client_args.copy() self.file_client_args = file_client_args.copy()
self.file_client = None self.file_client = None
def _load_points(self, pts_filename): def _load_points(self, pts_filename: str) -> np.ndarray:
"""Private function to load point clouds data. """Private function to load point clouds data.
Args: Args:
...@@ -406,8 +417,8 @@ class LoadPointsFromFile(object): ...@@ -406,8 +417,8 @@ class LoadPointsFromFile(object):
return points return points
def __call__(self, results): def transform(self, results: dict) -> dict:
"""Call function to load points data from file. """Method to load points data from file.
Args: Args:
results (dict): Result dict containing point clouds data. results (dict): Result dict containing point clouds data.
...@@ -418,8 +429,8 @@ class LoadPointsFromFile(object): ...@@ -418,8 +429,8 @@ class LoadPointsFromFile(object):
- points (:obj:`BasePoints`): Point clouds data. - points (:obj:`BasePoints`): Point clouds data.
""" """
pts_filename = results['pts_filename'] pts_file_path = results['lidar_points']['lidar_path']
points = self._load_points(pts_filename) points = self._load_points(pts_file_path)
points = points.reshape(-1, self.load_dim) points = points.reshape(-1, self.load_dim)
points = points[:, self.use_dim] points = points[:, self.use_dim]
attribute_dims = None attribute_dims = None
...@@ -477,6 +488,52 @@ class LoadAnnotations3D(LoadAnnotations): ...@@ -477,6 +488,52 @@ class LoadAnnotations3D(LoadAnnotations):
Load instance mask and semantic mask of points and Load instance mask and semantic mask of points and
encapsulate the items into related fields. encapsulate the items into related fields.
Required Keys:
- ann_info (dict)
- gt_bboxes_3d (:obj:`LiDARInstance3DBoxes` |
:obj:`DepthInstance3DBoxes` | :obj:`CameraInstance3DBoxes`):
3D ground truth bboxes. Only when `with_bbox_3d` is True
- gt_labels_3d (np.int64): Labels of ground truths.
Only when `with_label_3d` is True.
- gt_bboxes (np.float32): 2D ground truth bboxes.
Only when `with_bbox` is True.
- gt_labels (np.ndarray): Labels of ground truths.
Only when `with_label` is True.
- depths (np.ndarray): Only when
`with_bbox_depth` is True.
- centers_2d (np.ndarray): Only when
`with_bbox_depth` is True.
- attr_labels (np.ndarray): Attribute labels of instances.
Only when `with_attr_label` is True.
- pts_instance_mask_path (str): Path of instance mask file.
Only when `with_mask_3d` is True.
- pts_semantic_mask_path (str): Path of semantic mask file.
Only when
Added Keys:
- gt_bboxes_3d (:obj:`LiDARInstance3DBoxes` |
:obj:`DepthInstance3DBoxes` | :obj:`CameraInstance3DBoxes`):
3D ground truth bboxes. Only when `with_bbox_3d` is True
- gt_labels_3d (np.int64): Labels of ground truths.
Only when `with_label_3d` is True.
- gt_bboxes (np.float32): 2D ground truth bboxes.
Only when `with_bbox` is True.
- gt_labels (np.int64): Labels of ground truths.
Only when `with_label` is True.
- depths (np.float32): Only when
`with_bbox_depth` is True.
- centers_2d (np.ndarray): Only when
`with_bbox_depth` is True.
- attr_labels (np.int64): Attribute labels of instances.
Only when `with_attr_label` is True.
- pts_instance_mask (np.int64): Instance mask of each point.
Only when `with_mask_3d` is True.
- pts_semantic_mask (np.int64): Semantic mask of each point.
Only when `with_seg_3d` is True.
Args: Args:
with_bbox_3d (bool, optional): Whether to load 3D boxes. with_bbox_3d (bool, optional): Whether to load 3D boxes.
Defaults to True. Defaults to True.
...@@ -501,32 +558,34 @@ class LoadAnnotations3D(LoadAnnotations): ...@@ -501,32 +558,34 @@ class LoadAnnotations3D(LoadAnnotations):
poly2mask (bool, optional): Whether to convert polygon annotations poly2mask (bool, optional): Whether to convert polygon annotations
to bitmasks. Defaults to True. to bitmasks. Defaults to True.
seg_3d_dtype (dtype, optional): Dtype of 3D semantic masks. seg_3d_dtype (dtype, optional): Dtype of 3D semantic masks.
Defaults to int64 Defaults to int64.
file_client_args (dict): Config dict of file clients, refer to file_client_args (dict): Config dict of file clients, refer to
https://github.com/open-mmlab/mmcv/blob/master/mmcv/fileio/file_client.py https://github.com/open-mmlab/mmcv/blob/master/mmcv/fileio/file_client.py
for more details. for more details.
""" """
def __init__(self, def __init__(
with_bbox_3d=True, self,
with_label_3d=True, with_bbox_3d: bool = True,
with_attr_label=False, with_label_3d: bool = True,
with_mask_3d=False, with_attr_label: bool = False,
with_seg_3d=False, with_mask_3d: bool = False,
with_bbox=False, with_seg_3d: bool = False,
with_label=False, with_bbox: bool = False,
with_mask=False, with_label: bool = False,
with_seg=False, with_mask: bool = False,
with_bbox_depth=False, with_seg: bool = False,
poly2mask=True, with_bbox_depth: bool = False,
seg_3d_dtype=np.int64, poly2mask: bool = True,
file_client_args=dict(backend='disk')): seg_3d_dtype: np.dtype = np.int64,
file_client_args: dict = dict(backend='disk')
) -> None:
super().__init__( super().__init__(
with_bbox, with_bbox=with_bbox,
with_label, with_label=with_label,
with_mask, with_mask=with_mask,
with_seg, with_seg=with_seg,
poly2mask, poly2mask=poly2mask,
file_client_args=file_client_args) file_client_args=file_client_args)
self.with_bbox_3d = with_bbox_3d self.with_bbox_3d = with_bbox_3d
self.with_bbox_depth = with_bbox_depth self.with_bbox_depth = with_bbox_depth
...@@ -536,8 +595,9 @@ class LoadAnnotations3D(LoadAnnotations): ...@@ -536,8 +595,9 @@ class LoadAnnotations3D(LoadAnnotations):
self.with_seg_3d = with_seg_3d self.with_seg_3d = with_seg_3d
self.seg_3d_dtype = seg_3d_dtype self.seg_3d_dtype = seg_3d_dtype
def _load_bboxes_3d(self, results): def _load_bboxes_3d(self, results: dict) -> dict:
"""Private function to load 3D bounding box annotations. """Private function to move the 3D bounding box annotation from
`ann_info` field to the root of `results`.
Args: Args:
results (dict): Result dict from :obj:`mmdet3d.CustomDataset`. results (dict): Result dict from :obj:`mmdet3d.CustomDataset`.
...@@ -545,11 +605,11 @@ class LoadAnnotations3D(LoadAnnotations): ...@@ -545,11 +605,11 @@ class LoadAnnotations3D(LoadAnnotations):
Returns: Returns:
dict: The dict containing loaded 3D bounding box annotations. dict: The dict containing loaded 3D bounding box annotations.
""" """
results['gt_bboxes_3d'] = results['ann_info']['gt_bboxes_3d'] results['gt_bboxes_3d'] = results['ann_info']['gt_bboxes_3d']
results['bbox3d_fields'].append('gt_bboxes_3d')
return results return results
def _load_bboxes_depth(self, results): def _load_bboxes_depth(self, results: dict) -> dict:
"""Private function to load 2.5D bounding box annotations. """Private function to load 2.5D bounding box annotations.
Args: Args:
...@@ -558,11 +618,12 @@ class LoadAnnotations3D(LoadAnnotations): ...@@ -558,11 +618,12 @@ class LoadAnnotations3D(LoadAnnotations):
Returns: Returns:
dict: The dict containing loaded 2.5D bounding box annotations. dict: The dict containing loaded 2.5D bounding box annotations.
""" """
results['centers2d'] = results['ann_info']['centers2d']
results['depths'] = results['ann_info']['depths'] results['depths'] = results['ann_info']['depths']
results['centers_2d'] = results['ann_info']['centers_2d']
return results return results
def _load_labels_3d(self, results): def _load_labels_3d(self, results: dict) -> dict:
"""Private function to load label annotations. """Private function to load label annotations.
Args: Args:
...@@ -571,10 +632,11 @@ class LoadAnnotations3D(LoadAnnotations): ...@@ -571,10 +632,11 @@ class LoadAnnotations3D(LoadAnnotations):
Returns: Returns:
dict: The dict containing loaded label annotations. dict: The dict containing loaded label annotations.
""" """
results['gt_labels_3d'] = results['ann_info']['gt_labels_3d'] results['gt_labels_3d'] = results['ann_info']['gt_labels_3d']
return results return results
def _load_attr_labels(self, results): def _load_attr_labels(self, results: dict) -> dict:
"""Private function to load label annotations. """Private function to load label annotations.
Args: Args:
...@@ -586,7 +648,7 @@ class LoadAnnotations3D(LoadAnnotations): ...@@ -586,7 +648,7 @@ class LoadAnnotations3D(LoadAnnotations):
results['attr_labels'] = results['ann_info']['attr_labels'] results['attr_labels'] = results['ann_info']['attr_labels']
return results return results
def _load_masks_3d(self, results): def _load_masks_3d(self, results: dict) -> dict:
"""Private function to load 3D mask annotations. """Private function to load 3D mask annotations.
Args: Args:
...@@ -595,7 +657,7 @@ class LoadAnnotations3D(LoadAnnotations): ...@@ -595,7 +657,7 @@ class LoadAnnotations3D(LoadAnnotations):
Returns: Returns:
dict: The dict containing loaded 3D mask annotations. dict: The dict containing loaded 3D mask annotations.
""" """
pts_instance_mask_path = results['ann_info']['pts_instance_mask_path'] pts_instance_mask_path = results['pts_instance_mask_path']
if self.file_client is None: if self.file_client is None:
self.file_client = mmcv.FileClient(**self.file_client_args) self.file_client = mmcv.FileClient(**self.file_client_args)
...@@ -608,10 +670,9 @@ class LoadAnnotations3D(LoadAnnotations): ...@@ -608,10 +670,9 @@ class LoadAnnotations3D(LoadAnnotations):
pts_instance_mask_path, dtype=np.int64) pts_instance_mask_path, dtype=np.int64)
results['pts_instance_mask'] = pts_instance_mask results['pts_instance_mask'] = pts_instance_mask
results['pts_mask_fields'].append('pts_instance_mask')
return results return results
def _load_semantic_seg_3d(self, results): def _load_semantic_seg_3d(self, results: dict) -> dict:
"""Private function to load 3D semantic segmentation annotations. """Private function to load 3D semantic segmentation annotations.
Args: Args:
...@@ -620,7 +681,7 @@ class LoadAnnotations3D(LoadAnnotations): ...@@ -620,7 +681,7 @@ class LoadAnnotations3D(LoadAnnotations):
Returns: Returns:
dict: The dict containing the semantic segmentation annotations. dict: The dict containing the semantic segmentation annotations.
""" """
pts_semantic_mask_path = results['ann_info']['pts_semantic_mask_path'] pts_semantic_mask_path = results['pts_semantic_mask_path']
if self.file_client is None: if self.file_client is None:
self.file_client = mmcv.FileClient(**self.file_client_args) self.file_client = mmcv.FileClient(**self.file_client_args)
...@@ -635,28 +696,23 @@ class LoadAnnotations3D(LoadAnnotations): ...@@ -635,28 +696,23 @@ class LoadAnnotations3D(LoadAnnotations):
pts_semantic_mask_path, dtype=np.int64) pts_semantic_mask_path, dtype=np.int64)
results['pts_semantic_mask'] = pts_semantic_mask results['pts_semantic_mask'] = pts_semantic_mask
results['pts_seg_fields'].append('pts_semantic_mask')
return results return results
def __call__(self, results): def transform(self, results: dict) -> dict:
"""Call function to load multiple types annotations. """Function to load multiple types annotations.
Args: Args:
results (dict): Result dict from :obj:`mmdet3d.CustomDataset`. results (dict): Result dict from :obj:`mmdet3d.CustomDataset`.
Returns: Returns:
dict: The dict containing loaded 3D bounding box, label, mask and dict: The dict containing loaded 3D bounding box, label, mask and
semantic segmentation annotations. semantic segmentation annotations.
""" """
results = super().__call__(results) results = super().transform(results)
if self.with_bbox_3d: if self.with_bbox_3d:
results = self._load_bboxes_3d(results) results = self._load_bboxes_3d(results)
if results is None:
return None
if self.with_bbox_depth: if self.with_bbox_depth:
results = self._load_bboxes_depth(results) results = self._load_bboxes_depth(results)
if results is None:
return None
if self.with_label_3d: if self.with_label_3d:
results = self._load_labels_3d(results) results = self._load_labels_3d(results)
if self.with_attr_label: if self.with_attr_label:
......
# Copyright (c) OpenMMLab. All rights reserved. # Copyright (c) OpenMMLab. All rights reserved.
import random import random
import warnings import warnings
from typing import List
import cv2 import cv2
import numpy as np import numpy as np
...@@ -75,6 +76,25 @@ class RandomFlip3D(RandomFlip): ...@@ -75,6 +76,25 @@ class RandomFlip3D(RandomFlip):
otherwise it will be randomly decided by a ratio specified in the init otherwise it will be randomly decided by a ratio specified in the init
method. method.
Required Keys:
- points (np.float32)
- gt_bboxes_3d (np.float32)
Modified Keys:
- points (np.float32)
- gt_bboxes_3d (np.float32)
Added Keys:
- points (np.float32)
- pcd_trans (np.float32)
- pcd_rotation (np.float32)
- pcd_rotation_angle (np.float32)
- pcd_scale_factor (np.float32)
Args: Args:
sync_2d (bool, optional): Whether to apply flip according to the 2D sync_2d (bool, optional): Whether to apply flip according to the 2D
images. If True, it will apply the same flip as that to 2D images. images. If True, it will apply the same flip as that to 2D images.
...@@ -87,13 +107,17 @@ class RandomFlip3D(RandomFlip): ...@@ -87,13 +107,17 @@ class RandomFlip3D(RandomFlip):
""" """
def __init__(self, def __init__(self,
sync_2d=True, sync_2d: bool = True,
flip_ratio_bev_horizontal=0.0, flip_ratio_bev_horizontal: float = 0.0,
flip_ratio_bev_vertical=0.0, flip_ratio_bev_vertical: float = 0.0,
**kwargs): **kwargs) -> None:
# `flip_ratio_bev_horizontal` is equal to
# for flip prob of 2d image when
# `sync_2d` is True
super(RandomFlip3D, self).__init__( super(RandomFlip3D, self).__init__(
flip_ratio=flip_ratio_bev_horizontal, **kwargs) prob=flip_ratio_bev_horizontal, direction='horizontal', **kwargs)
self.sync_2d = sync_2d self.sync_2d = sync_2d
self.flip_ratio_bev_horizontal = flip_ratio_bev_horizontal
self.flip_ratio_bev_vertical = flip_ratio_bev_vertical self.flip_ratio_bev_vertical = flip_ratio_bev_vertical
if flip_ratio_bev_horizontal is not None: if flip_ratio_bev_horizontal is not None:
assert isinstance( assert isinstance(
...@@ -104,9 +128,18 @@ class RandomFlip3D(RandomFlip): ...@@ -104,9 +128,18 @@ class RandomFlip3D(RandomFlip):
flip_ratio_bev_vertical, flip_ratio_bev_vertical,
(int, float)) and 0 <= flip_ratio_bev_vertical <= 1 (int, float)) and 0 <= flip_ratio_bev_vertical <= 1
def random_flip_data_3d(self, input_dict, direction='horizontal'): def random_flip_data_3d(self,
input_dict: dict,
direction: str = 'horizontal') -> None:
"""Flip 3D data randomly. """Flip 3D data randomly.
`random_flip_data_3d` should take these situations into consideration:
- 1. LIDAR-based 3d detection
- 2. LIDAR-based 3d segmentation
- 3. vision-only detection
- 4. multi-modality 3d detection.
Args: Args:
input_dict (dict): Result dict from loading pipeline. input_dict (dict): Result dict from loading pipeline.
direction (str, optional): Flip direction. direction (str, optional): Flip direction.
...@@ -117,27 +150,25 @@ class RandomFlip3D(RandomFlip): ...@@ -117,27 +150,25 @@ class RandomFlip3D(RandomFlip):
updated in the result dict. updated in the result dict.
""" """
assert direction in ['horizontal', 'vertical'] assert direction in ['horizontal', 'vertical']
# for semantic segmentation task, only points will be flipped.
if 'bbox3d_fields' not in input_dict: if 'gt_bboxes_3d' in input_dict:
input_dict['points'].flip(direction)
return
if len(input_dict['bbox3d_fields']) == 0: # test mode
input_dict['bbox3d_fields'].append('empty_box3d')
input_dict['empty_box3d'] = input_dict['box_type_3d'](
np.array([], dtype=np.float32))
assert len(input_dict['bbox3d_fields']) == 1
for key in input_dict['bbox3d_fields']:
if 'points' in input_dict: if 'points' in input_dict:
input_dict['points'] = input_dict[key].flip( input_dict['points'] = input_dict['gt_bboxes_3d'].flip(
direction, points=input_dict['points']) direction, points=input_dict['points'])
else: else:
input_dict[key].flip(direction) # vision-only detection
if 'centers2d' in input_dict: input_dict['gt_bboxes_3d'].flip(direction)
else:
input_dict['points'].flip(direction)
if 'centers_2d' in input_dict:
assert self.sync_2d is True and direction == 'horizontal', \ assert self.sync_2d is True and direction == 'horizontal', \
'Only support sync_2d=True and horizontal flip with images' 'Only support sync_2d=True and horizontal flip with images'
# TODO fix this ori_shape and other keys in vision based model
# TODO ori_shape to img_shape
w = input_dict['ori_shape'][1] w = input_dict['ori_shape'][1]
input_dict['centers2d'][..., 0] = \ input_dict['centers_2d'][..., 0] = \
w - input_dict['centers2d'][..., 0] w - input_dict['centers_2d'][..., 0]
# need to modify the horizontal position of camera center # need to modify the horizontal position of camera center
# along u-axis in the image (flip like centers2d) # along u-axis in the image (flip like centers2d)
# ['cam2img'][0][2] = c_u # ['cam2img'][0][2] = c_u
...@@ -145,7 +176,7 @@ class RandomFlip3D(RandomFlip): ...@@ -145,7 +176,7 @@ class RandomFlip3D(RandomFlip):
# https://github.com/open-mmlab/mmdetection3d/pull/744 # https://github.com/open-mmlab/mmdetection3d/pull/744
input_dict['cam2img'][0][2] = w - input_dict['cam2img'][0][2] input_dict['cam2img'][0][2] = w - input_dict['cam2img'][0][2]
def __call__(self, input_dict): def transform(self, input_dict: dict) -> dict:
"""Call function to flip points, values in the ``bbox3d_fields`` and """Call function to flip points, values in the ``bbox3d_fields`` and
also flip 2D image and its annotations. also flip 2D image and its annotations.
...@@ -158,15 +189,16 @@ class RandomFlip3D(RandomFlip): ...@@ -158,15 +189,16 @@ class RandomFlip3D(RandomFlip):
into result dict. into result dict.
""" """
# flip 2D image and its annotations # flip 2D image and its annotations
super(RandomFlip3D, self).__call__(input_dict) if 'img' in input_dict:
super(RandomFlip3D, self).transform(input_dict)
if self.sync_2d: if self.sync_2d and 'img' in input_dict:
input_dict['pcd_horizontal_flip'] = input_dict['flip'] input_dict['pcd_horizontal_flip'] = input_dict['flip']
input_dict['pcd_vertical_flip'] = False input_dict['pcd_vertical_flip'] = False
else: else:
if 'pcd_horizontal_flip' not in input_dict: if 'pcd_horizontal_flip' not in input_dict:
flip_horizontal = True if np.random.rand( flip_horizontal = True if np.random.rand(
) < self.flip_ratio else False ) < self.flip_ratio_bev_horizontal else False
input_dict['pcd_horizontal_flip'] = flip_horizontal input_dict['pcd_horizontal_flip'] = flip_horizontal
if 'pcd_vertical_flip' not in input_dict: if 'pcd_vertical_flip' not in input_dict:
flip_vertical = True if np.random.rand( flip_vertical = True if np.random.rand(
...@@ -563,9 +595,27 @@ class GlobalAlignment(object): ...@@ -563,9 +595,27 @@ class GlobalAlignment(object):
@TRANSFORMS.register_module() @TRANSFORMS.register_module()
class GlobalRotScaleTrans(object): class GlobalRotScaleTrans(BaseTransform):
"""Apply global rotation, scaling and translation to a 3D scene. """Apply global rotation, scaling and translation to a 3D scene.
Required Keys:
- points (np.float32)
- gt_bboxes_3d (np.float32)
Modified Keys:
- points (np.float32)
- gt_bboxes_3d (np.float32)
Added Keys:
- points (np.float32)
- pcd_trans (np.float32)
- pcd_rotation (np.float32)
- pcd_rotation_angle (np.float32)
- pcd_scale_factor (np.float32)
Args: Args:
rot_range (list[float], optional): Range of rotation angle. rot_range (list[float], optional): Range of rotation angle.
Defaults to [-0.78539816, 0.78539816] (close to [-pi/4, pi/4]). Defaults to [-0.78539816, 0.78539816] (close to [-pi/4, pi/4]).
...@@ -581,10 +631,10 @@ class GlobalRotScaleTrans(object): ...@@ -581,10 +631,10 @@ class GlobalRotScaleTrans(object):
""" """
def __init__(self, def __init__(self,
rot_range=[-0.78539816, 0.78539816], rot_range: List[float] = [-0.78539816, 0.78539816],
scale_ratio_range=[0.95, 1.05], scale_ratio_range: List[float] = [0.95, 1.05],
translation_std=[0, 0, 0], translation_std: List[int] = [0, 0, 0],
shift_height=False): shift_height: bool = False) -> None:
seq_types = (list, tuple, np.ndarray) seq_types = (list, tuple, np.ndarray)
if not isinstance(rot_range, seq_types): if not isinstance(rot_range, seq_types):
assert isinstance(rot_range, (int, float)), \ assert isinstance(rot_range, (int, float)), \
...@@ -594,6 +644,7 @@ class GlobalRotScaleTrans(object): ...@@ -594,6 +644,7 @@ class GlobalRotScaleTrans(object):
assert isinstance(scale_ratio_range, seq_types), \ assert isinstance(scale_ratio_range, seq_types), \
f'unsupported scale_ratio_range type {type(scale_ratio_range)}' f'unsupported scale_ratio_range type {type(scale_ratio_range)}'
self.scale_ratio_range = scale_ratio_range self.scale_ratio_range = scale_ratio_range
if not isinstance(translation_std, seq_types): if not isinstance(translation_std, seq_types):
...@@ -607,7 +658,7 @@ class GlobalRotScaleTrans(object): ...@@ -607,7 +658,7 @@ class GlobalRotScaleTrans(object):
self.translation_std = translation_std self.translation_std = translation_std
self.shift_height = shift_height self.shift_height = shift_height
def _trans_bbox_points(self, input_dict): def _trans_bbox_points(self, input_dict: dict) -> None:
"""Private function to translate bounding boxes and points. """Private function to translate bounding boxes and points.
Args: Args:
...@@ -615,18 +666,18 @@ class GlobalRotScaleTrans(object): ...@@ -615,18 +666,18 @@ class GlobalRotScaleTrans(object):
Returns: Returns:
dict: Results after translation, 'points', 'pcd_trans' dict: Results after translation, 'points', 'pcd_trans'
and keys in input_dict['bbox3d_fields'] are updated and `gt_bboxes_3d` is updated
in the result dict. in the result dict.
""" """
translation_std = np.array(self.translation_std, dtype=np.float32) translation_std = np.array(self.translation_std, dtype=np.float32)
trans_factor = np.random.normal(scale=translation_std, size=3).T trans_factor = np.random.normal(scale=translation_std, size=3).T
input_dict['points'].translate(trans_factor) input_dict['points'].translate(trans_factor)
input_dict['pcd_trans'] = trans_factor input_dict['pcd_trans'] = trans_factor
for key in input_dict['bbox3d_fields']: if 'gt_bboxes_3d' in input_dict:
input_dict[key].translate(trans_factor) input_dict['gt_bboxes_3d'].translate(trans_factor)
def _rot_bbox_points(self, input_dict): def _rot_bbox_points(self, input_dict: dict) -> None:
"""Private function to rotate bounding boxes and points. """Private function to rotate bounding boxes and points.
Args: Args:
...@@ -634,37 +685,35 @@ class GlobalRotScaleTrans(object): ...@@ -634,37 +685,35 @@ class GlobalRotScaleTrans(object):
Returns: Returns:
dict: Results after rotation, 'points', 'pcd_rotation' dict: Results after rotation, 'points', 'pcd_rotation'
and keys in input_dict['bbox3d_fields'] are updated and `gt_bboxes_3d` is updated
in the result dict. in the result dict.
""" """
rotation = self.rot_range rotation = self.rot_range
noise_rotation = np.random.uniform(rotation[0], rotation[1]) noise_rotation = np.random.uniform(rotation[0], rotation[1])
# if no bbox in input_dict, only rotate points if 'gt_bboxes_3d' in input_dict and \
if len(input_dict['bbox3d_fields']) == 0: len(input_dict['gt_bboxes_3d'].tensor) != 0:
# rotate points with bboxes
points, rot_mat_T = input_dict['gt_bboxes_3d'].rotate(
noise_rotation, input_dict['points'])
input_dict['points'] = points
else:
# if no bbox in input_dict, only rotate points
rot_mat_T = input_dict['points'].rotate(noise_rotation) rot_mat_T = input_dict['points'].rotate(noise_rotation)
input_dict['pcd_rotation'] = rot_mat_T
input_dict['pcd_rotation_angle'] = noise_rotation input_dict['pcd_rotation'] = rot_mat_T
return input_dict['pcd_rotation_angle'] = noise_rotation
# rotate points with bboxes def _scale_bbox_points(self, input_dict: dict) -> None:
for key in input_dict['bbox3d_fields']:
if len(input_dict[key].tensor) != 0:
points, rot_mat_T = input_dict[key].rotate(
noise_rotation, input_dict['points'])
input_dict['points'] = points
input_dict['pcd_rotation'] = rot_mat_T
input_dict['pcd_rotation_angle'] = noise_rotation
def _scale_bbox_points(self, input_dict):
"""Private function to scale bounding boxes and points. """Private function to scale bounding boxes and points.
Args: Args:
input_dict (dict): Result dict from loading pipeline. input_dict (dict): Result dict from loading pipeline.
Returns: Returns:
dict: Results after scaling, 'points'and keys in dict: Results after scaling, 'points' and
input_dict['bbox3d_fields'] are updated in the result dict. `gt_bboxes_3d` is updated
in the result dict.
""" """
scale = input_dict['pcd_scale_factor'] scale = input_dict['pcd_scale_factor']
points = input_dict['points'] points = input_dict['points']
...@@ -675,24 +724,25 @@ class GlobalRotScaleTrans(object): ...@@ -675,24 +724,25 @@ class GlobalRotScaleTrans(object):
points.tensor[:, points.attribute_dims['height']] *= scale points.tensor[:, points.attribute_dims['height']] *= scale
input_dict['points'] = points input_dict['points'] = points
for key in input_dict['bbox3d_fields']: if 'gt_bboxes_3d' in input_dict and \
input_dict[key].scale(scale) len(input_dict['gt_bboxes_3d'].tensor) != 0:
input_dict['gt_bboxes_3d'].scale(scale)
def _random_scale(self, input_dict): def _random_scale(self, input_dict: dict) -> None:
"""Private function to randomly set the scale factor. """Private function to randomly set the scale factor.
Args: Args:
input_dict (dict): Result dict from loading pipeline. input_dict (dict): Result dict from loading pipeline.
Returns: Returns:
dict: Results after scaling, 'pcd_scale_factor' are updated dict: Results after scaling, 'pcd_scale_factor'
in the result dict. are updated in the result dict.
""" """
scale_factor = np.random.uniform(self.scale_ratio_range[0], scale_factor = np.random.uniform(self.scale_ratio_range[0],
self.scale_ratio_range[1]) self.scale_ratio_range[1])
input_dict['pcd_scale_factor'] = scale_factor input_dict['pcd_scale_factor'] = scale_factor
def __call__(self, input_dict): def transform(self, input_dict: dict) -> dict:
"""Private function to rotate, scale and translate bounding boxes and """Private function to rotate, scale and translate bounding boxes and
points. points.
...@@ -701,8 +751,8 @@ class GlobalRotScaleTrans(object): ...@@ -701,8 +751,8 @@ class GlobalRotScaleTrans(object):
Returns: Returns:
dict: Results after scaling, 'points', 'pcd_rotation', dict: Results after scaling, 'points', 'pcd_rotation',
'pcd_scale_factor', 'pcd_trans' and keys in 'pcd_scale_factor', 'pcd_trans' and `gt_bboxes_3d` is updated
input_dict['bbox3d_fields'] are updated in the result dict. in the result dict.
""" """
if 'transformation_3d_flow' not in input_dict: if 'transformation_3d_flow' not in input_dict:
input_dict['transformation_3d_flow'] = [] input_dict['transformation_3d_flow'] = []
......
...@@ -3,13 +3,12 @@ import mmcv ...@@ -3,13 +3,12 @@ import mmcv
from mmcv.transforms import LoadImageFromFile from mmcv.transforms import LoadImageFromFile
# yapf: disable # yapf: disable
from mmdet3d.datasets.pipelines import (Collect3D, DefaultFormatBundle3D, from mmdet3d.datasets.pipelines import (LoadAnnotations3D,
LoadAnnotations3D,
LoadImageFromFileMono3D, LoadImageFromFileMono3D,
LoadMultiViewImageFromFiles, LoadMultiViewImageFromFiles,
LoadPointsFromFile, LoadPointsFromFile,
LoadPointsFromMultiSweeps, LoadPointsFromMultiSweeps,
MultiScaleFlipAug3D, MultiScaleFlipAug3D, Pack3DDetInputs,
PointSegClassMapping) PointSegClassMapping)
# yapf: enable # yapf: enable
from mmdet3d.registry import TRANSFORMS from mmdet3d.registry import TRANSFORMS
...@@ -32,9 +31,8 @@ def is_loading_function(transform): ...@@ -32,9 +31,8 @@ def is_loading_function(transform):
# TODO: use more elegant way to distinguish loading modules # TODO: use more elegant way to distinguish loading modules
loading_functions = (LoadImageFromFile, LoadPointsFromFile, loading_functions = (LoadImageFromFile, LoadPointsFromFile,
LoadAnnotations3D, LoadMultiViewImageFromFiles, LoadAnnotations3D, LoadMultiViewImageFromFiles,
LoadPointsFromMultiSweeps, DefaultFormatBundle3D, LoadPointsFromMultiSweeps, Pack3DDetInputs,
Collect3D, LoadImageFromFileMono3D, LoadImageFromFileMono3D, PointSegClassMapping)
PointSegClassMapping)
if isinstance(transform, dict): if isinstance(transform, dict):
obj_cls = TRANSFORMS.get(transform['type']) obj_cls = TRANSFORMS.get(transform['type'])
if obj_cls is None: if obj_cls is None:
......
# Copyright (c) OpenMMLab. All rights reserved.
import copy
import unittest
import torch
from mmengine.testing import assert_allclose
from utils import create_data_info_after_loading
from mmdet3d.datasets import RandomFlip3D
from mmdet3d.datasets.pipelines import GlobalRotScaleTrans
class TestGlobalRotScaleTrans(unittest.TestCase):
def test_globle_rotation_scale_trans(self):
rot_trans = GlobalRotScaleTrans(
rot_range=[-0.78, 0.78], scale_ratio_range=[1, 1])
scale_trans = GlobalRotScaleTrans(
rot_range=[0, 0], scale_ratio_range=[0.95, 1.05])
ori_data_info = create_data_info_after_loading()
data_info = copy.deepcopy(ori_data_info)
rot_data_info = rot_trans(data_info)
self.assertIn('pcd_rotation', rot_data_info)
self.assertIn('pcd_rotation_angle', rot_data_info)
self.assertIn('pcd_scale_factor', rot_data_info)
self.assertEqual(rot_data_info['pcd_scale_factor'], 1)
self.assertIs(-0.79 < rot_data_info['pcd_rotation_angle'] < 0.79, True)
# assert the rot angle should in rot_range
before_rot_gt_bbox_3d = ori_data_info['gt_bboxes_3d']
after_rot_gt_bbox_3d = rot_data_info['gt_bboxes_3d']
assert (after_rot_gt_bbox_3d.tensor[:, -1] -
before_rot_gt_bbox_3d.tensor[:, -1]).abs().max() < 0.79
data_info = copy.deepcopy(ori_data_info)
scale_data_info = scale_trans(data_info)
# assert the rot angle should in rot_range
before_scale_gt_bbox_3d = ori_data_info['gt_bboxes_3d'].tensor
after_scale_gt_bbox_3d = scale_data_info['gt_bboxes_3d'].tensor
before_scale_points = ori_data_info['points'].tensor
after_scale_points = scale_data_info['points'].tensor
self.assertEqual(scale_data_info['pcd_rotation_angle'], 0)
# assert scale_factor range
assert (0.94 < (after_scale_points / before_scale_points)).all()
assert (1.06 >
(after_scale_gt_bbox_3d / before_scale_gt_bbox_3d)).all()
class TestRandomFlip3D(unittest.TestCase):
def test_random_flip3d(self):
ori_data_info = create_data_info_after_loading()
no_flip_transform = RandomFlip3D(flip_ratio_bev_horizontal=0.)
always_flip_transform = RandomFlip3D(flip_ratio_bev_horizontal=1.)
data_info = copy.deepcopy(ori_data_info)
data_info = no_flip_transform(data_info)
self.assertIn('pcd_horizontal_flip', data_info)
assert_allclose(data_info['points'].tensor,
ori_data_info['points'].tensor)
torch.allclose(data_info['gt_bboxes_3d'].tensor,
ori_data_info['gt_bboxes_3d'].tensor)
data_info = copy.deepcopy(ori_data_info)
data_info = always_flip_transform(data_info)
assert_allclose(data_info['points'].tensor[:, 0],
ori_data_info['points'].tensor[:, 0])
assert_allclose(data_info['points'].tensor[:, 1],
-ori_data_info['points'].tensor[:, 1])
assert_allclose(data_info['points'].tensor[:, 2],
ori_data_info['points'].tensor[:, 2])
assert_allclose(data_info['gt_bboxes_3d'].tensor[:, 0],
ori_data_info['gt_bboxes_3d'].tensor[:, 0])
assert_allclose(data_info['gt_bboxes_3d'].tensor[:, 1],
-ori_data_info['gt_bboxes_3d'].tensor[:, 1])
assert_allclose(data_info['gt_bboxes_3d'].tensor[:, 2],
ori_data_info['gt_bboxes_3d'].tensor[:, 2])
# Copyright (c) OpenMMLab. All rights reserved.
import unittest
import torch
from mmengine.testing import assert_allclose
from utils import create_dummy_data_info
from mmdet3d.core import DepthPoints, LiDARPoints
from mmdet3d.datasets.pipelines.loading import (LoadAnnotations3D,
LoadPointsFromFile)
class TestLoadPointsFromFile(unittest.TestCase):
def test_load_points_from_file(self):
use_dim = 3
file_client_args = dict(backend='disk')
load_points_transform = LoadPointsFromFile(
coord_type='LIDAR',
load_dim=4,
use_dim=use_dim,
file_client_args=file_client_args)
data_info = create_dummy_data_info()
info = load_points_transform(data_info)
self.assertIn('points', info)
self.assertIsInstance(info['points'], LiDARPoints)
load_points_transform = LoadPointsFromFile(
coord_type='DEPTH',
load_dim=4,
use_dim=use_dim,
file_client_args=file_client_args)
info = load_points_transform(data_info)
self.assertIsInstance(info['points'], DepthPoints)
self.assertEqual(info['points'].shape[-1], use_dim)
load_points_transform = LoadPointsFromFile(
coord_type='DEPTH',
load_dim=4,
use_dim=use_dim,
shift_height=True,
file_client_args=file_client_args)
info = load_points_transform(data_info)
# extra height dim
self.assertEqual(info['points'].shape[-1], use_dim + 1)
repr_str = repr(load_points_transform)
self.assertIn('shift_height=True', repr_str)
self.assertIn('use_color=False', repr_str)
self.assertIn('load_dim=4', repr_str)
class TestLoadAnnotations3D(unittest.TestCase):
def test_load_points_from_file(self):
file_client_args = dict(backend='disk')
load_anns_transform = LoadAnnotations3D(
with_bbox_3d=True,
with_label_3d=True,
file_client_args=file_client_args)
self.assertIs(load_anns_transform.with_seg, False)
self.assertIs(load_anns_transform.with_bbox_3d, True)
self.assertIs(load_anns_transform.with_label_3d, True)
data_info = create_dummy_data_info()
info = load_anns_transform(data_info)
self.assertIn('gt_bboxes_3d', info)
assert_allclose(info['gt_bboxes_3d'].tensor.sum(),
torch.tensor(7.2650))
self.assertIn('gt_labels_3d', info)
assert_allclose(info['gt_labels_3d'], torch.tensor([1]))
repr_str = repr(load_anns_transform)
self.assertIn('with_bbox_3d=True', repr_str)
self.assertIn('with_label_3d=True', repr_str)
self.assertIn('with_bbox_depth=False', repr_str)
# Copyright (c) OpenMMLab. All rights reserved.
import unittest
import torch
from mmengine.testing import assert_allclose
from utils import create_data_info_after_loading
from mmdet3d.core import LiDARInstance3DBoxes
from mmdet3d.datasets.pipelines.formating import Pack3DDetInputs
class TestPack3DDetInputs(unittest.TestCase):
def test_packinputs(self):
ori_data_info = create_data_info_after_loading()
pack_input = Pack3DDetInputs(
keys=['points', 'gt_labels_3d', 'gt_bboxes_3d'])
packed_results = pack_input(ori_data_info)
inputs = packed_results['inputs']
# annotations
gt_instances = packed_results['data_sample'].gt_instances_3d
self.assertIn('points', inputs)
self.assertIsInstance(inputs['points'], torch.Tensor)
assert_allclose(inputs['points'].sum(), torch.tensor(13062.6436))
# assert to_tensor
self.assertIsInstance(inputs['points'], torch.Tensor)
self.assertIn('labels_3d', gt_instances)
assert_allclose(gt_instances.labels_3d, torch.tensor([1]))
# assert to_tensor
self.assertIsInstance(gt_instances.labels_3d, torch.Tensor)
self.assertIn('bboxes_3d', gt_instances)
self.assertIsInstance(gt_instances.bboxes_3d, LiDARInstance3DBoxes)
assert_allclose(gt_instances.bboxes_3d.tensor.sum(),
torch.tensor(7.2650))
# Copyright (c) OpenMMLab. All rights reserved.
import numpy as np
from mmdet3d.core import LiDARInstance3DBoxes
# create a dummy `results` to test the pipeline
from mmdet3d.datasets import LoadAnnotations3D, LoadPointsFromFile
def create_dummy_data_info(with_ann=True):
ann_info = {
'gt_bboxes':
np.array([[712.4, 143., 810.73, 307.92]]),
'gt_labels':
np.array([1]),
'gt_bboxes_3d':
LiDARInstance3DBoxes(
np.array(
[[8.7314, -1.8559, -1.5997, 1.2000, 0.4800, 1.8900,
-1.5808]])),
'gt_labels_3d':
np.array([1]),
'num_lidar_pts':
np.array([377]),
'difficulty':
np.array([0]),
'truncated':
np.array([0]),
'occluded':
np.array([0]),
'alpha':
np.array([-0.2]),
'score':
np.array([0.]),
'index':
np.array([0]),
'group_id':
np.array([0])
}
data_info = {
'sample_id':
0,
'images': {
'CAM0': {
'cam2img': [[707.0493, 0.0, 604.0814, 0.0],
[0.0, 707.0493, 180.5066, 0.0],
[0.0, 0.0, 1.0, 0.0], [0.0, 0.0, 0.0, 1.0]]
},
'CAM1': {
'cam2img': [[707.0493, 0.0, 604.0814, -379.7842],
[0.0, 707.0493, 180.5066, 0.0],
[0.0, 0.0, 1.0, 0.0], [0.0, 0.0, 0.0, 1.0]]
},
'CAM2': {
'img_path':
'tests/data/kitti/training/image_2/000000.png',
'height':
370,
'width':
1224,
'cam2img': [[707.0493, 0.0, 604.0814, 45.75831],
[0.0, 707.0493, 180.5066, -0.3454157],
[0.0, 0.0, 1.0, 0.004981016], [0.0, 0.0, 0.0, 1.0]]
},
'CAM3': {
'cam2img': [[707.0493, 0.0, 604.0814, -334.1081],
[0.0, 707.0493, 180.5066, 2.33066],
[0.0, 0.0, 1.0, 0.003201153], [0.0, 0.0, 0.0, 1.0]]
},
'R0_rect': [[
0.9999127984046936, 0.010092630051076412,
-0.008511931635439396, 0.0
],
[
-0.010127290152013302, 0.9999405741691589,
-0.004037670791149139, 0.0
],
[
0.008470674976706505, 0.0041235219687223434,
0.9999555945396423, 0.0
], [0.0, 0.0, 0.0, 1.0]]
},
'lidar_points': {
'num_pts_feats':
4,
'lidar_path':
'tests/data/kitti/training/velodyne_reduced/000000.bin',
'lidar2cam': [[
-0.0015960992313921452, -0.9999162554740906,
-0.012840436771512032, -0.022366708144545555
],
[
-0.00527064548805356, 0.012848696671426296,
-0.9999035596847534, -0.05967890843749046
],
[
0.9999848008155823, -0.0015282672829926014,
-0.005290712229907513, -0.33254900574684143
], [0.0, 0.0, 0.0, 1.0]],
'Tr_velo_to_cam': [[
0.006927963811904192, -0.9999722242355347, -0.0027578289154917,
-0.024577289819717407
],
[
-0.0011629819637164474,
0.0027498360723257065, -0.9999955296516418,
-0.06127237156033516
],
[
0.999975323677063, 0.006931141018867493,
-0.0011438990477472544, -0.33210289478302
], [0.0, 0.0, 0.0, 1.0]],
'Tr_imu_to_velo': [[
0.999997615814209, 0.0007553070900030434,
-0.002035825978964567, -0.8086758852005005
],
[
-0.0007854027207940817, 0.9998897910118103,
-0.014822980388998985, 0.3195559084415436
],
[
0.002024406101554632, 0.014824540354311466,
0.9998881220817566, -0.7997230887413025
], [0.0, 0.0, 0.0, 1.0]]
},
'instances': [{
'bbox': [712.4, 143.0, 810.73, 307.92],
'bbox_label':
-1,
'bbox_3d': [
1.840000033378601, 1.4700000286102295, 8.40999984741211,
1.2000000476837158, 1.8899999856948853, 0.47999998927116394,
0.009999999776482582
],
'bbox_label_3d':
-1,
'num_lidar_pts':
377,
'difficulty':
0,
'truncated':
0,
'occluded':
0,
'alpha':
-0.2,
'score':
0.0,
'index':
0,
'group_id':
0
}],
'plane':
None
}
if with_ann:
data_info['ann_info'] = ann_info
return data_info
def create_data_info_after_loading():
load_anns_transform = LoadAnnotations3D(
with_bbox_3d=True, with_label_3d=True)
load_points_transform = LoadPointsFromFile(
coord_type='LIDAR', load_dim=4, use_dim=3)
data_info = create_dummy_data_info()
data_info = load_points_transform(data_info)
data_info_after_loading = load_anns_transform(data_info)
return data_info_after_loading
Markdown is supported
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment