Unverified Commit 6c03a971 authored by Tai-Wang's avatar Tai-Wang Committed by GitHub
Browse files

Release v1.1.0rc1

Release v1.1.0rc1
parents 9611c2d0 ca42c312
......@@ -102,10 +102,10 @@ python tools/misc/browse_dataset.py configs/_base_/datasets/kitti-3d-3class.py -
python tools/misc/browse_dataset.py configs/_base_/datasets/kitti-3d-3class.py --task det --aug --output-dir ${OUTPUT_DIR} --online
```
如果您还想显示 2D 图像以及投影的 3D 边界框,则需要找到支持多模态数据加载的配置文件,然后将 `--task` 参数更改为 `multi_modality-det`。一个例子如下所示
如果您还想显示 2D 图像以及投影的 3D 边界框,则需要找到支持多模态数据加载的配置文件,然后将 `--task` 参数更改为 `multi-modality_det`。一个例子如下所示
```shell
python tools/misc/browse_dataset.py configs/mvxnet/mvxnet_fpn_dv_second_secfpn_8xb2-80e_kitti-3d-3class.py --task multi_modality-det --output-dir ${OUTPUT_DIR} --online
python tools/misc/browse_dataset.py configs/mvxnet/mvxnet_fpn_dv_second_secfpn_8xb2-80e_kitti-3d-3class.py --task multi-modality_det --output-dir ${OUTPUT_DIR} --online
```
![](../../resources/browse_dataset_multi_modality.png)
......@@ -121,7 +121,7 @@ python tools/misc/browse_dataset.py configs/_base_/datasets/scannet-seg.py --tas
在单目 3D 检测任务中浏览 nuScenes 数据集
```shell
python tools/misc/browse_dataset.py configs/_base_/datasets/nus-mono3d.py --task mono-det --output-dir ${OUTPUT_DIR} --online
python tools/misc/browse_dataset.py configs/_base_/datasets/nus-mono3d.py --task mono_det --output-dir ${OUTPUT_DIR} --online
```
![](../../resources/browse_dataset_mono.png)
......
......@@ -143,6 +143,7 @@ def inference_detector(model: nn.Module,
# load from point cloud file
data_ = dict(
lidar_points=dict(lidar_path=pcd),
timestamp=1,
# for ScanNet demo we need axis_align_matrix
axis_align_matrix=np.eye(4),
box_type_3d=box_type_3d,
......@@ -151,6 +152,7 @@ def inference_detector(model: nn.Module,
# directly use loaded point cloud
data_ = dict(
points=pcd,
timestamp=1,
# for ScanNet demo we need axis_align_matrix
axis_align_matrix=np.eye(4),
box_type_3d=box_type_3d,
......
# Copyright (c) OpenMMLab. All rights reserved.
from .builder import DATASETS, PIPELINES, build_dataset
from .convert_utils import get_2d_boxes
from .dataset_wrappers import CBGSDataset
from .det3d_dataset import Det3DDataset
from .kitti_dataset import KittiDataset
......@@ -22,8 +21,8 @@ from .transforms import (AffineResize, BackgroundPointsFilter, GlobalAlignment,
ObjectNameFilter, ObjectNoise, ObjectRangeFilter,
ObjectSample, PointSample, PointShuffle,
PointsRangeFilter, RandomDropPointsColor,
RandomFlip3D, RandomJitterPoints, RandomShiftScale,
VoxelBasedPointSampler)
RandomFlip3D, RandomJitterPoints, RandomResize3D,
RandomShiftScale, Resize3D, VoxelBasedPointSampler)
from .utils import get_loading_pipeline
from .waymo_dataset import WaymoDataset
......@@ -40,5 +39,6 @@ __all__ = [
'LoadPointsFromMultiSweeps', 'WaymoDataset', 'BackgroundPointsFilter',
'VoxelBasedPointSampler', 'get_loading_pipeline', 'RandomDropPointsColor',
'RandomJitterPoints', 'ObjectNameFilter', 'AffineResize',
'RandomShiftScale', 'LoadPointsFromDict', 'PIPELINES', 'get_2d_boxes'
'RandomShiftScale', 'LoadPointsFromDict', 'PIPELINES',
'Resize3D', 'RandomResize3D',
]
# Copyright (c) OpenMMLab. All rights reserved.
import copy
from collections import OrderedDict
from typing import List, Tuple, Union
from typing import List, Optional, Tuple, Union
import numpy as np
from nuscenes.utils.geometry_utils import view_points
......@@ -11,6 +11,11 @@ from shapely.geometry import MultiPoint, box
from mmdet3d.structures import Box3DMode, CameraInstance3DBoxes, points_cam2img
from mmdet3d.structures.ops import box_np_ops
kitti_categories = ('Pedestrian', 'Cyclist', 'Car', 'Van', 'Truck',
'Person_sitting', 'Tram', 'Misc')
waymo_categories = ('Car', 'Pedestrian', 'Cyclist')
nus_categories = ('car', 'truck', 'trailer', 'bus', 'construction_vehicle',
'bicycle', 'motorcycle', 'pedestrian', 'traffic_cone',
'barrier')
......@@ -48,8 +53,10 @@ LyftNameMapping = {
}
def get_2d_boxes(nusc, sample_data_token: str, visibilities: List[str]):
"""Get the 2D annotation records for a given `sample_data_token`.
def get_nuscenes_2d_boxes(nusc, sample_data_token: str,
visibilities: List[str]):
"""Get the 2d / mono3d annotation records for a given `sample_data_token of
nuscenes dataset.
Args:
sample_data_token (str): Sample data token belonging to a camera
......@@ -57,7 +64,7 @@ def get_2d_boxes(nusc, sample_data_token: str, visibilities: List[str]):
visibilities (list[str]): Visibility filter.
Return:
list[dict]: List of 2D annotation record that belongs to the input
list[dict]: List of 2d annotation record that belongs to the input
`sample_data_token`.
"""
......@@ -128,7 +135,7 @@ def get_2d_boxes(nusc, sample_data_token: str, visibilities: List[str]):
# Generate dictionary record to be included in the .json file.
repro_rec = generate_record(ann_rec, min_x, min_y, max_x, max_y,
sample_data_token, sd_rec['filename'])
'nuscenes')
# if repro_rec is None, we do not append it into repre_recs
if repro_rec is not None:
......@@ -178,23 +185,36 @@ def get_2d_boxes(nusc, sample_data_token: str, visibilities: List[str]):
return repro_recs
def get_waymo_2d_boxes(info, cam_idx, occluded, annos=None, mono3d=True):
"""Get the 2D annotation records for a given info.
def get_kitti_style_2d_boxes(info: dict,
cam_idx: int = 2,
occluded: Tuple[int] = (0, 1, 2, 3),
annos: Optional[dict] = None,
mono3d: bool = True,
dataset: str = 'kitti'):
"""Get the 2d / mono3d annotation records for a given info.
This function is used to get 2D annotations when loading annotations from
a dataset class. The original version in the data converter will be
deprecated in the future.
This function is used to get 2D/Mono3D annotations when loading annotations
from a kitti-style dataset class, such as KITTI and Waymo dataset.
Args:
info: Information of the given sample data.
occluded: Integer (0, 1, 2, 3) indicating occlusion state:
info (dict): Information of the given sample data.
cam_idx (int): Camera id which the 2d / mono3d annotations to obtain
belong to. In KITTI, typically only CAM 2 will be used,
and in Waymo, multi cameras could be used.
Defaults to 2.
occluded (tuple[int]): Integer (0, 1, 2, 3) indicating occlusion state:
0 = fully visible, 1 = partly occluded, 2 = largely occluded,
3 = unknown, -1 = DontCare
3 = unknown, -1 = DontCare.
Defaults to (0, 1, 2, 3).
annos (dict, optional): Original annotations.
mono3d (bool): Whether to get boxes with mono3d annotation.
Defaults to True.
dataset (str): Dataset name of getting 2d bboxes.
Defaults to `kitti`.
Return:
list[dict]: List of 2D annotation record that belongs to the input
`sample_data_token`.
list[dict]: List of 2d / mono3d annotation record that
belongs to the input camera id.
"""
# Get calibration information
camera_intrinsic = info['calib'][f'P{cam_idx}']
......@@ -224,7 +244,6 @@ def get_waymo_2d_boxes(info, cam_idx, occluded, annos=None, mono3d=True):
ann_rec['sample_annotation_token'] = \
f"{info['image']['image_idx']}.{ann_idx}"
ann_rec['sample_data_token'] = info['image']['image_idx']
sample_data_token = info['image']['image_idx']
loc = ann_rec['location'][np.newaxis, :]
dim = ann_rec['dimensions'][np.newaxis, :]
......@@ -266,9 +285,8 @@ def get_waymo_2d_boxes(info, cam_idx, occluded, annos=None, mono3d=True):
min_x, min_y, max_x, max_y = final_coords
# Generate dictionary record to be included in the .json file.
repro_rec = generate_waymo_mono3d_record(ann_rec, min_x, min_y, max_x,
max_y, sample_data_token,
info['image']['image_path'])
repro_rec = generate_record(ann_rec, min_x, min_y, max_x, max_y,
dataset)
# If mono3d=True, add 3D annotations in camera coordinates
if mono3d and (repro_rec is not None):
......@@ -288,11 +306,7 @@ def get_waymo_2d_boxes(info, cam_idx, occluded, annos=None, mono3d=True):
# samples with depth < 0 will be removed
if repro_rec['depth'] <= 0:
continue
repro_rec['attribute_name'] = -1 # no attribute in KITTI
repro_rec['attribute_id'] = -1
repro_recs.append(repro_rec)
repro_recs.append(repro_rec)
return repro_recs
......@@ -355,7 +369,7 @@ def post_process_coords(
def generate_record(ann_rec: dict, x1: float, y1: float, x2: float, y2: float,
sample_data_token: str, filename: str) -> OrderedDict:
dataset: str) -> OrderedDict:
"""Generate one 2D annotation record given various information on top of
the 2D bounding box coordinates.
......@@ -365,112 +379,40 @@ def generate_record(ann_rec: dict, x1: float, y1: float, x2: float, y2: float,
y1 (float): Minimum value of the y coordinate.
x2 (float): Maximum value of the x coordinate.
y2 (float): Maximum value of the y coordinate.
sample_data_token (str): Sample data token.
filename (str):The corresponding image file where the annotation
is present.
dataset (str): Name of dataset.
Returns:
dict: A sample mono3D annotation record.
- bbox_label (int): 2d box label id
- bbox_label_3d (int): 3d box label id
- bbox (list[float]): left x, top y, right x, bottom y
of 2d box
- bbox_3d_isvalid (bool): whether the box is valid
dict: A sample 2d annotation record.
- bbox_label (int): 2d box label id
- bbox_label_3d (int): 3d box label id
- bbox (list[float]): left x, top y, right x, bottom y
of 2d box
- bbox_3d_isvalid (bool): whether the box is valid
"""
repro_rec = OrderedDict()
repro_rec['sample_data_token'] = sample_data_token
coco_rec = dict()
relevant_keys = [
'attribute_tokens',
'category_name',
'instance_token',
'next',
'num_lidar_pts',
'num_radar_pts',
'prev',
'sample_annotation_token',
'sample_data_token',
'visibility_token',
]
for key, value in ann_rec.items():
if key in relevant_keys:
repro_rec[key] = value
repro_rec['bbox_corners'] = [x1, y1, x2, y2]
repro_rec['filename'] = filename
if repro_rec['category_name'] not in NuScenesNameMapping:
return None
cat_name = NuScenesNameMapping[repro_rec['category_name']]
coco_rec['bbox_label'] = nus_categories.index(cat_name)
coco_rec['bbox_label_3d'] = nus_categories.index(cat_name)
coco_rec['bbox'] = [x1, y1, x2, y2]
coco_rec['bbox_3d_isvalid'] = True
return coco_rec
def generate_waymo_mono3d_record(ann_rec, x1, y1, x2, y2, sample_data_token,
filename):
"""Generate one 2D annotation record given various information on top of
the 2D bounding box coordinates.
The original version in the data converter will be deprecated in the
future.
if dataset == 'nuscenes':
cat_name = ann_rec['category_name']
if cat_name not in NuScenesNameMapping:
return None
else:
cat_name = NuScenesNameMapping[cat_name]
categories = nus_categories
else:
cat_name = ann_rec['name']
if cat_name not in categories:
return None
if dataset == 'kitti':
categories = kitti_categories
elif dataset == 'waymo':
categories = waymo_categories
else:
raise NotImplementedError('Unsupported dataset!')
Args:
ann_rec (dict): Original 3d annotation record.
x1 (float): Minimum value of the x coordinate.
y1 (float): Minimum value of the y coordinate.
x2 (float): Maximum value of the x coordinate.
y2 (float): Maximum value of the y coordinate.
sample_data_token (str): Sample data token.
filename (str):The corresponding image file where the annotation
is present.
rec = dict()
rec['bbox_label'] = categories.index(cat_name)
rec['bbox_label_3d'] = rec['bbox_label']
rec['bbox'] = [x1, y1, x2, y2]
rec['bbox_3d_isvalid'] = True
Returns:
dict: A sample 2D annotation record.
- file_name (str): file name
- image_id (str): sample data token
- area (float): 2d box area
- category_name (str): category name
- category_id (int): category id
- bbox (list[float]): left x, top y, x_size, y_size of 2d box
- iscrowd (int): whether the area is crowd
"""
kitti_categories = ('Car', 'Pedestrian', 'Cyclist')
repro_rec = OrderedDict()
repro_rec['sample_data_token'] = sample_data_token
coco_rec = dict()
key_mapping = {
'name': 'category_name',
'num_points_in_gt': 'num_lidar_pts',
'sample_annotation_token': 'sample_annotation_token',
'sample_data_token': 'sample_data_token',
}
for key, value in ann_rec.items():
if key in key_mapping.keys():
repro_rec[key_mapping[key]] = value
repro_rec['bbox_corners'] = [x1, y1, x2, y2]
repro_rec['filename'] = filename
coco_rec['file_name'] = filename
coco_rec['image_id'] = sample_data_token
coco_rec['area'] = (y2 - y1) * (x2 - x1)
if repro_rec['category_name'] not in kitti_categories:
return None
cat_name = repro_rec['category_name']
coco_rec['category_name'] = cat_name
coco_rec['category_id'] = kitti_categories.index(cat_name)
coco_rec['bbox_label'] = coco_rec['category_id']
coco_rec['bbox_label_3d'] = coco_rec['bbox_label']
coco_rec['bbox'] = [x1, y1, x2 - x1, y2 - y1]
coco_rec['iscrowd'] = 0
return coco_rec
return rec
......@@ -26,11 +26,11 @@ class Det3DDataset(BaseDataset):
metainfo (dict, optional): Meta information for dataset, such as class
information. Defaults to None.
data_prefix (dict, optional): Prefix for training data. Defaults to
dict(pts='velodyne', img="").
dict(pts='velodyne', img='').
pipeline (list[dict], optional): Pipeline used for data processing.
Defaults to None.
modality (dict, optional): Modality to specify the sensor data used
as input, it usually has following keys.
as input, it usually has following keys:
- use_camera: bool
- use_lidar: bool
......@@ -40,7 +40,7 @@ class Det3DDataset(BaseDataset):
box_type_3d (str, optional): Type of 3D box of this dataset.
Based on the `box_type_3d`, the dataset will encapsulate the box
to its original format then converted them to `box_type_3d`.
Defaults to 'LiDAR'. Available options includes
Defaults to 'LiDAR'. Available options includes:
- 'LiDAR': Box in LiDAR coordinates, usually for
outdoor point cloud 3d detection.
......@@ -49,15 +49,15 @@ class Det3DDataset(BaseDataset):
- 'Camera': Box in camera coordinates, usually
for vision-based 3d detection.
filter_empty_gt (bool): Whether to filter the data with
filter_empty_gt (bool, optional): Whether to filter the data with
empty GT. Defaults to True.
test_mode (bool): Whether the dataset is in test mode.
test_mode (bool, optional): Whether the dataset is in test mode.
Defaults to False.
load_eval_anns (bool): Whether to load annotations
in test_mode, the annotation will be save in
`eval_ann_infos`, which can be use in Evaluator.
file_client_args (dict): Configuration of file client.
Defaults to `dict(backend='disk')`.
load_eval_anns (bool, optional): Whether to load annotations
in test_mode, the annotation will be save in `eval_ann_infos`,
which can be used in Evaluator. Defaults to True.
file_client_args (dict, optional): Configuration of file client.
Defaults to dict(backend='disk').
"""
def __init__(self,
......@@ -73,7 +73,7 @@ class Det3DDataset(BaseDataset):
test_mode: bool = False,
load_eval_anns=True,
file_client_args: dict = dict(backend='disk'),
**kwargs):
**kwargs) -> None:
# init file client
self.file_client = mmengine.FileClient(**file_client_args)
self.filter_empty_gt = filter_empty_gt
......@@ -125,7 +125,7 @@ class Det3DDataset(BaseDataset):
self.metainfo['box_type_3d'] = box_type_3d
self.metainfo['label_mapping'] = self.label_mapping
def _remove_dontcare(self, ann_info):
def _remove_dontcare(self, ann_info: dict) -> dict:
"""Remove annotations that do not need to be cared.
-1 indicate dontcare in MMDet3d.
......@@ -192,7 +192,8 @@ class Det3DDataset(BaseDataset):
'bbox_3d': 'gt_bboxes_3d',
'depth': 'depths',
'center_2d': 'centers_2d',
'attr_label': 'attr_labels'
'attr_label': 'attr_labels',
'velocity': 'velocities',
}
instances = info['instances']
# empty gt
......@@ -209,14 +210,18 @@ class Det3DDataset(BaseDataset):
self.label_mapping[item] for item in temp_anns
]
if ann_name in name_mapping:
ann_name = name_mapping[ann_name]
mapped_ann_name = name_mapping[ann_name]
else:
mapped_ann_name = ann_name
if 'label' in ann_name:
temp_anns = np.array(temp_anns).astype(np.int64)
else:
elif ann_name in name_mapping:
temp_anns = np.array(temp_anns).astype(np.float32)
else:
temp_anns = np.array(temp_anns)
ann_info[ann_name] = temp_anns
ann_info[mapped_ann_name] = temp_anns
ann_info['instances'] = info['instances']
return ann_info
......@@ -241,6 +246,7 @@ class Det3DDataset(BaseDataset):
self.data_prefix.get('pts', ''),
info['lidar_points']['lidar_path'])
info['num_pts_feats'] = info['lidar_points']['num_pts_feats']
info['lidar_path'] = info['lidar_points']['lidar_path']
if 'lidar_sweeps' in info:
for sweep in info['lidar_sweeps']:
......@@ -285,7 +291,7 @@ class Det3DDataset(BaseDataset):
return info
def prepare_data(self, index):
def prepare_data(self, index: int) -> Optional[dict]:
"""Data preparation for both training and testing stage.
Called by `__getitem__` of dataset.
......@@ -294,7 +300,7 @@ class Det3DDataset(BaseDataset):
index (int): Index for accessing the target data.
Returns:
dict: Data dict of the corresponding index.
dict | None: Data dict of the corresponding index.
"""
input_dict = self.get_data_info(index)
......
# Copyright (c) OpenMMLab. All rights reserved.
from typing import Callable, List, Optional, Union
from typing import Callable, List, Union
import numpy as np
......@@ -22,11 +22,12 @@ class KittiDataset(Det3DDataset):
Defaults to None.
modality (dict, optional): Modality to specify the sensor data used
as input. Defaults to `dict(use_lidar=True)`.
default_cam_key (str, optional): The default camera name adopted.
Defaults to 'CAM2'.
box_type_3d (str, optional): Type of 3D box of this dataset.
Based on the `box_type_3d`, the dataset will encapsulate the box
to its original format then converted them to `box_type_3d`.
Defaults to 'LiDAR' in this dataset. Available options includes
Defaults to 'LiDAR' in this dataset. Available options includes:
- 'LiDAR': Box in LiDAR coordinates.
- 'Depth': Box in depth coordinates, usually for indoor dataset.
......@@ -35,9 +36,9 @@ class KittiDataset(Det3DDataset):
Defaults to True.
test_mode (bool, optional): Whether the dataset is in test mode.
Defaults to False.
pcd_limit_range (list, optional): The range of point cloud used to
filter invalid predicted boxes.
Default: [0, -40, -3, 70.4, 40, 0.0].
pcd_limit_range (list[float], optional): The range of point cloud
used to filter invalid predicted boxes.
Defaults to [0, -40, -3, 70.4, 40, 0.0].
"""
# TODO: use full classes of kitti
METAINFO = {
......@@ -49,15 +50,18 @@ class KittiDataset(Det3DDataset):
data_root: str,
ann_file: str,
pipeline: List[Union[dict, Callable]] = [],
modality: Optional[dict] = dict(use_lidar=True),
modality: dict = dict(use_lidar=True),
default_cam_key: str = 'CAM2',
task: str = 'lidar_det',
box_type_3d: str = 'LiDAR',
filter_empty_gt: bool = True,
test_mode: bool = False,
pcd_limit_range: List[float] = [0, -40, -3, 70.4, 40, 0.0],
**kwargs):
**kwargs) -> None:
self.pcd_limit_range = pcd_limit_range
assert task in ('lidar_det', 'mono_det')
self.task = task
super().__init__(
data_root=data_root,
ann_file=ann_file,
......@@ -107,11 +111,14 @@ class KittiDataset(Det3DDataset):
info['plane'] = plane_lidar
if self.task == 'mono_det':
info['instances'] = info['cam_instances'][self.default_cam_key]
info = super().parse_data_info(info)
return info
def parse_ann_info(self, info):
def parse_ann_info(self, info: dict) -> dict:
"""Get annotation info according to the given index.
Args:
......@@ -135,6 +142,12 @@ class KittiDataset(Det3DDataset):
ann_info['gt_bboxes_3d'] = np.zeros((0, 7), dtype=np.float32)
ann_info['gt_labels_3d'] = np.zeros(0, dtype=np.int64)
if self.task == 'mono_det':
ann_info['gt_bboxes'] = np.zeros((0, 4), dtype=np.float32)
ann_info['gt_bboxes_labels'] = np.array(0, dtype=np.int64)
ann_info['centers_2d'] = np.zeros((0, 2), dtype=np.float32)
ann_info['depths'] = np.zeros((0), dtype=np.float32)
ann_info = self._remove_dontcare(ann_info)
# in kitti, lidar2cam = R0_rect @ Tr_velo_to_cam
lidar2cam = np.array(info['images']['CAM2']['lidar2cam'])
......
# Copyright (c) OpenMMLab. All rights reserved.
from typing import Dict, List
from typing import Callable, List, Union
import numpy as np
......@@ -24,18 +24,18 @@ class LyftDataset(Det3DDataset):
pipeline (list[dict], optional): Pipeline used for data processing.
Defaults to None.
modality (dict, optional): Modality to specify the sensor data used
as input. Defaults to None.
as input. Defaults to dict(use_camera=False, use_lidar=True).
box_type_3d (str): Type of 3D box of this dataset.
Based on the `box_type_3d`, the dataset will encapsulate the box
to its original format then converted them to `box_type_3d`.
Defaults to 'LiDAR' in this dataset. Available options includes
Defaults to 'LiDAR' in this dataset. Available options includes:
- 'LiDAR': Box in LiDAR coordinates.
- 'Depth': Box in depth coordinates, usually for indoor dataset.
- 'Camera': Box in camera coordinates.
filter_empty_gt (bool): Whether to filter empty GT.
filter_empty_gt (bool, optional): Whether to filter empty GT.
Defaults to True.
test_mode (bool): Whether the dataset is in test mode.
test_mode (bool, optional): Whether the dataset is in test mode.
Defaults to False.
"""
......@@ -48,8 +48,8 @@ class LyftDataset(Det3DDataset):
def __init__(self,
data_root: str,
ann_file: str,
pipeline: List[dict] = None,
modality: Dict = dict(use_camera=False, use_lidar=True),
pipeline: List[Union[dict, Callable]] = [],
modality: dict = dict(use_camera=False, use_lidar=True),
box_type_3d: str = 'LiDAR',
filter_empty_gt: bool = True,
test_mode: bool = False,
......
# Copyright (c) OpenMMLab. All rights reserved.
from os import path as osp
from typing import Dict, List
from typing import Callable, List, Union
import numpy as np
......@@ -22,25 +22,26 @@ class NuScenesDataset(Det3DDataset):
Args:
data_root (str): Path of dataset root.
ann_file (str): Path of annotation file.
task (str, optional): Detection task. Defaults to 'lidar_det'.
pipeline (list[dict], optional): Pipeline used for data processing.
Defaults to None.
box_type_3d (str): Type of 3D box of this dataset.
Based on the `box_type_3d`, the dataset will encapsulate the box
to its original format then converted them to `box_type_3d`.
Defaults to 'LiDAR' in this dataset. Available options includes.
Defaults to 'LiDAR' in this dataset. Available options includes:
- 'LiDAR': Box in LiDAR coordinates.
- 'Depth': Box in depth coordinates, usually for indoor dataset.
- 'Camera': Box in camera coordinates.
modality (dict, optional): Modality to specify the sensor data used
as input. Defaults to dict(use_camera=False,use_lidar=True).
filter_empty_gt (bool): Whether to filter empty GT.
as input. Defaults to dict(use_camera=False, use_lidar=True).
filter_empty_gt (bool, optional): Whether to filter empty GT.
Defaults to True.
test_mode (bool): Whether the dataset is in test mode.
test_mode (bool, optional): Whether the dataset is in test mode.
Defaults to False.
with_velocity (bool): Whether include velocity prediction
with_velocity (bool, optional): Whether to include velocity prediction
into the experiments. Defaults to True.
use_valid_flag (bool): Whether to use `use_valid_flag` key
use_valid_flag (bool, optional): Whether to use `use_valid_flag` key
in the info file as mask to filter gt_boxes and gt_names.
Defaults to False.
"""
......@@ -55,10 +56,10 @@ class NuScenesDataset(Det3DDataset):
def __init__(self,
data_root: str,
ann_file: str,
task: str = '3d',
pipeline: List[dict] = None,
task: str = 'lidar_det',
pipeline: List[Union[dict, Callable]] = [],
box_type_3d: str = 'LiDAR',
modality: Dict = dict(
modality: dict = dict(
use_camera=False,
use_lidar=True,
),
......@@ -66,12 +67,12 @@ class NuScenesDataset(Det3DDataset):
test_mode: bool = False,
with_velocity: bool = True,
use_valid_flag: bool = False,
**kwargs):
**kwargs) -> None:
self.use_valid_flag = use_valid_flag
self.with_velocity = with_velocity
# TODO: Redesign multi-view data process in the future
assert task in ('3d', 'mono3d', 'multi-view')
assert task in ('lidar_det', 'mono_det', 'multi-view_det')
self.task = task
assert box_type_3d.lower() in ('lidar', 'camera')
......@@ -85,6 +86,27 @@ class NuScenesDataset(Det3DDataset):
test_mode=test_mode,
**kwargs)
def _filter_with_mask(self, ann_info: dict) -> dict:
"""Remove annotations that do not need to be cared.
Args:
ann_info (dict): Dict of annotation infos.
Returns:
dict: Annotations after filtering.
"""
filtered_annotations = {}
if self.use_valid_flag:
filter_mask = ann_info['bbox_3d_isvalid']
else:
filter_mask = ann_info['num_lidar_pts'] > 0
for key in ann_info.keys():
if key != 'instances':
filtered_annotations[key] = (ann_info[key][filter_mask])
else:
filtered_annotations[key] = ann_info[key]
return filtered_annotations
def parse_ann_info(self, info: dict) -> dict:
"""Get annotation info according to the given index.
......@@ -99,66 +121,51 @@ class NuScenesDataset(Det3DDataset):
- gt_labels_3d (np.ndarray): Labels of ground truths.
"""
ann_info = super().parse_ann_info(info)
if ann_info is None:
# empty instance
anns_results = dict()
anns_results['gt_bboxes_3d'] = np.zeros((0, 7), dtype=np.float32)
anns_results['gt_labels_3d'] = np.zeros(0, dtype=np.int64)
return anns_results
if self.use_valid_flag:
mask = ann_info['bbox_3d_isvalid']
else:
mask = ann_info['num_lidar_pts'] > 0
gt_bboxes_3d = ann_info['gt_bboxes_3d'][mask]
gt_labels_3d = ann_info['gt_labels_3d'][mask]
if 'gt_bboxes' in ann_info:
gt_bboxes = ann_info['gt_bboxes'][mask]
gt_labels = ann_info['gt_labels'][mask]
attr_labels = ann_info['attr_labels'][mask]
if ann_info is not None:
ann_info = self._filter_with_mask(ann_info)
if self.with_velocity:
gt_bboxes_3d = ann_info['gt_bboxes_3d']
gt_velocities = ann_info['velocities']
nan_mask = np.isnan(gt_velocities[:, 0])
gt_velocities[nan_mask] = [0.0, 0.0]
gt_bboxes_3d = np.concatenate([gt_bboxes_3d, gt_velocities],
axis=-1)
ann_info['gt_bboxes_3d'] = gt_bboxes_3d
else:
gt_bboxes = np.zeros((0, 4), dtype=np.float32)
gt_labels = np.array([], dtype=np.int64)
attr_labels = np.array([], dtype=np.int64)
if 'centers_2d' in ann_info:
centers_2d = ann_info['centers_2d'][mask]
depths = ann_info['depths'][mask]
else:
centers_2d = np.zeros((0, 2), dtype=np.float32)
depths = np.zeros((0), dtype=np.float32)
if self.with_velocity:
gt_velocity = ann_info['velocity'][mask]
nan_mask = np.isnan(gt_velocity[:, 0])
gt_velocity[nan_mask] = [0.0, 0.0]
gt_bboxes_3d = np.concatenate([gt_bboxes_3d, gt_velocity], axis=-1)
# empty instance
ann_info = dict()
if self.with_velocity:
ann_info['gt_bboxes_3d'] = np.zeros((0, 9), dtype=np.float32)
else:
ann_info['gt_bboxes_3d'] = np.zeros((0, 7), dtype=np.float32)
ann_info['gt_labels_3d'] = np.zeros(0, dtype=np.int64)
if self.task == 'mono3d':
ann_info['gt_bboxes'] = np.zeros((0, 4), dtype=np.float32)
ann_info['gt_bboxes_labels'] = np.array(0, dtype=np.int64)
ann_info['attr_labels'] = np.array(0, dtype=np.int64)
ann_info['centers_2d'] = np.zeros((0, 2), dtype=np.float32)
ann_info['depths'] = np.zeros((0), dtype=np.float32)
# the nuscenes box center is [0.5, 0.5, 0.5], we change it to be
# the same as KITTI (0.5, 0.5, 0)
# TODO: Unify the coordinates
if self.task == 'mono3d':
if self.task == 'mono_det':
gt_bboxes_3d = CameraInstance3DBoxes(
gt_bboxes_3d,
box_dim=gt_bboxes_3d.shape[-1],
ann_info['gt_bboxes_3d'],
box_dim=ann_info['gt_bboxes_3d'].shape[-1],
origin=(0.5, 0.5, 0.5))
else:
gt_bboxes_3d = LiDARInstance3DBoxes(
gt_bboxes_3d,
box_dim=gt_bboxes_3d.shape[-1],
ann_info['gt_bboxes_3d'],
box_dim=ann_info['gt_bboxes_3d'].shape[-1],
origin=(0.5, 0.5, 0.5)).convert_to(self.box_mode_3d)
anns_results = dict(
gt_bboxes_3d=gt_bboxes_3d,
gt_labels_3d=gt_labels_3d,
gt_bboxes=gt_bboxes,
gt_labels=gt_labels,
attr_labels=attr_labels,
centers_2d=centers_2d,
depths=depths)
ann_info['gt_bboxes_3d'] = gt_bboxes_3d
return anns_results
return ann_info
def parse_data_info(self, info: dict) -> dict:
"""Process the raw data info.
......@@ -173,7 +180,7 @@ class NuScenesDataset(Det3DDataset):
dict: Has `ann_info` in training stage. And
all path has been converted to absolute path.
"""
if self.task == 'mono3d':
if self.task == 'mono_det':
data_list = []
if self.modality['use_lidar']:
info['lidar_points']['lidar_path'] = \
......
......@@ -36,7 +36,7 @@ class ScanNetDataset(Det3DDataset):
box_type_3d (str): Type of 3D box of this dataset.
Based on the `box_type_3d`, the dataset will encapsulate the box
to its original format then converted them to `box_type_3d`.
Defaults to 'Depth' in this dataset. Available options includes
Defaults to 'Depth' in this dataset. Available options includes:
- 'LiDAR': Box in LiDAR coordinates.
- 'Depth': Box in depth coordinates, usually for indoor dataset.
......@@ -61,13 +61,13 @@ class ScanNetDataset(Det3DDataset):
def __init__(self,
data_root: str,
ann_file: str,
metainfo: dict = None,
metainfo: Optional[dict] = None,
data_prefix: dict = dict(
pts='points',
pts_instance_mask='instance_mask',
pts_semantic_mask='semantic_mask'),
pipeline: List[Union[dict, Callable]] = [],
modality=dict(use_camera=False, use_lidar=True),
modality: dict = dict(use_camera=False, use_lidar=True),
box_type_3d: str = 'Depth',
filter_empty_gt: bool = True,
test_mode: bool = False,
......@@ -101,7 +101,7 @@ class ScanNetDataset(Det3DDataset):
assert self.modality['use_camera'] or self.modality['use_lidar']
@staticmethod
def _get_axis_align_matrix(info: dict) -> dict:
def _get_axis_align_matrix(info: dict) -> np.ndarray:
"""Get axis_align_matrix from info. If not exist, return identity mat.
Args:
......
......@@ -24,25 +24,25 @@ class SUNRGBDDataset(Det3DDataset):
ann_file (str): Path of annotation file.
metainfo (dict, optional): Meta information for dataset, such as class
information. Defaults to None.
data_prefix (dict): Prefix for data. Defaults to
`dict(pts='points',img='sunrgbd_trainval')`.
data_prefix (dict, optiona;): Prefix for data. Defaults to
dict(pts='points',img='sunrgbd_trainval').
pipeline (list[dict], optional): Pipeline used for data processing.
Defaults to None.
modality (dict, optional): Modality to specify the sensor data used
as input. Defaults to `dict(use_camera=True, use_lidar=True)`.
default_cam_key (str): The default camera name adopted.
Defaults to "CAM0".
as input. Defaults to dict(use_camera=True, use_lidar=True).
default_cam_key (str, optional): The default camera name adopted.
Defaults to 'CAM0'.
box_type_3d (str): Type of 3D box of this dataset.
Based on the `box_type_3d`, the dataset will encapsulate the box
to its original format then converted them to `box_type_3d`.
Defaults to 'Depth' in this dataset. Available options includes
Defaults to 'Depth' in this dataset. Available options includes:
- 'LiDAR': Box in LiDAR coordinates.
- 'Depth': Box in depth coordinates, usually for indoor dataset.
- 'Camera': Box in camera coordinates.
filter_empty_gt (bool): Whether to filter empty GT.
filter_empty_gt (bool, optional): Whether to filter empty GT.
Defaults to True.
test_mode (bool): Whether the dataset is in test mode.
test_mode (bool, optional): Whether the dataset is in test mode.
Defaults to False.
"""
METAINFO = {
......
......@@ -11,11 +11,12 @@ from .test_time_aug import MultiScaleFlipAug3D
from .transforms_3d import (AffineResize, BackgroundPointsFilter,
GlobalAlignment, GlobalRotScaleTrans,
IndoorPatchPointSample, IndoorPointSample,
ObjectNameFilter, ObjectNoise, ObjectRangeFilter,
ObjectSample, PointSample, PointShuffle,
MultiViewWrapper, ObjectNameFilter, ObjectNoise,
ObjectRangeFilter, ObjectSample,
PhotoMetricDistortion3D, PointSample, PointShuffle,
PointsRangeFilter, RandomDropPointsColor,
RandomFlip3D, RandomJitterPoints, RandomShiftScale,
VoxelBasedPointSampler)
RandomFlip3D, RandomJitterPoints, RandomResize3D,
RandomShiftScale, Resize3D, VoxelBasedPointSampler)
__all__ = [
'ObjectSample', 'RandomFlip3D', 'ObjectNoise', 'GlobalRotScaleTrans',
......@@ -29,5 +30,6 @@ __all__ = [
'VoxelBasedPointSampler', 'GlobalAlignment', 'IndoorPatchPointSample',
'LoadImageFromFileMono3D', 'ObjectNameFilter', 'RandomDropPointsColor',
'RandomJitterPoints', 'AffineResize', 'RandomShiftScale',
'LoadPointsFromDict'
'LoadPointsFromDict', 'Resize3D', 'RandomResize3D',
'MultiViewWrapper', 'PhotoMetricDistortion3D'
]
......@@ -32,7 +32,7 @@ class Compose:
data (dict): A result dict contains the data to transform.
Returns:
dict: Transformed data.
dict: Transformed data.
"""
for t in self.transforms:
......
# Copyright (c) OpenMMLab. All rights reserved.
import copy
import os
import warnings
from typing import List, Optional
import mmengine
import numpy as np
......@@ -16,18 +16,19 @@ class BatchSampler:
Args:
sample_list (list[dict]): List of samples.
name (str, optional): The category of samples. Default: None.
epoch (int, optional): Sampling epoch. Default: None.
shuffle (bool, optional): Whether to shuffle indices. Default: False.
drop_reminder (bool, optional): Drop reminder. Default: False.
name (str, optional): The category of samples. Defaults to None.
epoch (int, optional): Sampling epoch. Defaults to None.
shuffle (bool, optional): Whether to shuffle indices.
Defaults to False.
drop_reminder (bool, optional): Drop reminder. Defaults to False.
"""
def __init__(self,
sampled_list,
name=None,
epoch=None,
shuffle=True,
drop_reminder=False):
sampled_list: List[dict],
name: Optional[str] = None,
epoch: Optional[int] = None,
shuffle: bool = True,
drop_reminder: bool = False) -> None:
self._sampled_list = sampled_list
self._indices = np.arange(len(sampled_list))
if shuffle:
......@@ -40,7 +41,7 @@ class BatchSampler:
self._epoch_counter = 0
self._drop_reminder = drop_reminder
def _sample(self, num):
def _sample(self, num: int) -> List[int]:
"""Sample specific number of ground truths and return indices.
Args:
......@@ -57,7 +58,7 @@ class BatchSampler:
self._idx += num
return ret
def _reset(self):
def _reset(self) -> None:
"""Reset the index of batchsampler to zero."""
assert self._name is not None
# print("reset", self._name)
......@@ -65,7 +66,7 @@ class BatchSampler:
np.random.shuffle(self._indices)
self._idx = 0
def sample(self, num):
def sample(self, num: int) -> List[dict]:
"""Sample specific number of ground truths.
Args:
......@@ -88,24 +89,30 @@ class DataBaseSampler(object):
rate (float): Rate of actual sampled over maximum sampled number.
prepare (dict): Name of preparation functions and the input value.
sample_groups (dict): Sampled classes and numbers.
classes (list[str], optional): List of classes. Default: None.
points_loader(dict, optional): Config of points loader. Default:
dict(type='LoadPointsFromFile', load_dim=4, use_dim=[0,1,2,3])
classes (list[str], optional): List of classes. Defaults to None.
points_loader(dict, optional): Config of points loader. Defaults to
dict(type='LoadPointsFromFile', load_dim=4, use_dim=[0, 1, 2, 3]).
file_client_args (dict, optional): Config dict of file clients,
refer to
https://github.com/open-mmlab/mmengine/blob/main/mmengine/fileio/file_client.py
for more details. Defaults to dict(backend='disk').
"""
def __init__(self,
info_path,
data_root,
rate,
prepare,
sample_groups,
classes=None,
points_loader=dict(
type='LoadPointsFromFile',
coord_type='LIDAR',
load_dim=4,
use_dim=[0, 1, 2, 3]),
file_client_args=dict(backend='disk')):
def __init__(
self,
info_path: str,
data_root: str,
rate: float,
prepare: dict,
sample_groups: dict,
classes: Optional[List[str]] = None,
points_loader: dict = dict(
type='LoadPointsFromFile',
coord_type='LIDAR',
load_dim=4,
use_dim=[0, 1, 2, 3]),
file_client_args: dict = dict(backend='disk')
) -> None:
super().__init__()
self.data_root = data_root
self.info_path = info_path
......@@ -118,18 +125,9 @@ class DataBaseSampler(object):
self.file_client = mmengine.FileClient(**file_client_args)
# load data base infos
if hasattr(self.file_client, 'get_local_path'):
with self.file_client.get_local_path(info_path) as local_path:
# loading data from a file-like object needs file format
db_infos = mmengine.load(
open(local_path, 'rb'), file_format='pkl')
else:
warnings.warn(
'The used MMCV version does not have get_local_path. '
f'We treat the {info_path} as local paths and it '
'might cause errors if the path is not a local path. '
'Please use MMCV>= 1.3.16 if you meet errors.')
db_infos = mmengine.load(info_path)
with self.file_client.get_local_path(info_path) as local_path:
# loading data from a file-like object needs file format
db_infos = mmengine.load(open(local_path, 'rb'), file_format='pkl')
# filter database infos
from mmengine.logging import MMLogger
......@@ -163,7 +161,7 @@ class DataBaseSampler(object):
# TODO: No group_sampling currently
@staticmethod
def filter_by_difficulty(db_infos, removed_difficulty):
def filter_by_difficulty(db_infos: dict, removed_difficulty: list) -> dict:
"""Filter ground truths by difficulties.
Args:
......@@ -182,7 +180,7 @@ class DataBaseSampler(object):
return new_db_infos
@staticmethod
def filter_by_min_points(db_infos, min_gt_points_dict):
def filter_by_min_points(db_infos: dict, min_gt_points_dict: dict) -> dict:
"""Filter ground truths by number of points in the bbox.
Args:
......@@ -203,12 +201,19 @@ class DataBaseSampler(object):
db_infos[name] = filtered_infos
return db_infos
def sample_all(self, gt_bboxes, gt_labels, img=None, ground_plane=None):
def sample_all(self,
gt_bboxes: np.ndarray,
gt_labels: np.ndarray,
img: Optional[np.ndarray] = None,
ground_plane: Optional[np.ndarray] = None) -> dict:
"""Sampling all categories of bboxes.
Args:
gt_bboxes (np.ndarray): Ground truth bounding boxes.
gt_labels (np.ndarray): Ground truth labels of boxes.
img (np.ndarray, optional): Image array. Defaults to None.
ground_plane (np.ndarray, optional): Ground plane information.
Defaults to None.
Returns:
dict: Dict of sampled 'pseudo ground truths'.
......@@ -301,7 +306,8 @@ class DataBaseSampler(object):
return ret
def sample_class_v2(self, name, num, gt_bboxes):
def sample_class_v2(self, name: str, num: int,
gt_bboxes: np.ndarray) -> List[dict]:
"""Sampling specific categories of bounding boxes.
Args:
......
......@@ -63,15 +63,20 @@ class Pack3DDetInputs(BaseTransform):
def __init__(
self,
keys: dict,
meta_keys: dict = ('img_path', 'ori_shape', 'img_shape', 'lidar2img',
'depth2img', 'cam2img', 'pad_shape', 'scale_factor',
'flip', 'pcd_horizontal_flip', 'pcd_vertical_flip',
'box_mode_3d', 'box_type_3d', 'img_norm_cfg',
'pcd_trans', 'sample_idx', 'pcd_scale_factor',
'pcd_rotation', 'pcd_rotation_angle', 'lidar_path',
'transformation_3d_flow', 'trans_mat',
'affine_aug')):
keys: tuple,
meta_keys: tuple = ('img_path', 'ori_shape', 'img_shape', 'lidar2img',
'depth2img', 'cam2img', 'pad_shape',
'scale_factor', 'flip', 'pcd_horizontal_flip',
'pcd_vertical_flip', 'box_mode_3d', 'box_type_3d',
'img_norm_cfg', 'num_pts_feats', 'pcd_trans',
'sample_idx', 'pcd_scale_factor', 'pcd_rotation',
'pcd_rotation_angle', 'lidar_path',
'transformation_3d_flow', 'trans_mat',
'affine_aug', 'sweep_img_metas', 'ori_cam2img',
'cam2global', 'crop_offset', 'img_crop_offset',
'resize_img_shape', 'lidar2cam', 'ori_lidar2img',
'num_ref_frames', 'num_views', 'ego2global')
) -> None:
self.keys = keys
self.meta_keys = meta_keys
......@@ -98,7 +103,7 @@ class Pack3DDetInputs(BaseTransform):
- img
- 'data_samples' (obj:`Det3DDataSample`): The annotation info of
the sample.
the sample.
"""
# augtest
if isinstance(results, list):
......@@ -115,7 +120,7 @@ class Pack3DDetInputs(BaseTransform):
else:
raise NotImplementedError
def pack_single_results(self, results):
def pack_single_results(self, results: dict) -> dict:
"""Method to pack the single input data. when the value in this dict is
a list, it usually is in Augmentations Testing.
......@@ -131,7 +136,7 @@ class Pack3DDetInputs(BaseTransform):
- points
- img
- 'data_samples' (obj:`Det3DDataSample`): The annotation info
- 'data_samples' (:obj:`Det3DDataSample`): The annotation info
of the sample.
"""
# Format 3D data
......@@ -219,6 +224,7 @@ class Pack3DDetInputs(BaseTransform):
return packed_results
def __repr__(self) -> str:
"""str: Return a string that describes the module."""
repr_str = self.__class__.__name__
repr_str += f'(keys={self.keys})'
repr_str += f'(meta_keys={self.meta_keys})'
......
# Copyright (c) OpenMMLab. All rights reserved.
from typing import List
import copy
from typing import List, Optional, Union
import mmcv
import mmengine
......@@ -13,7 +14,7 @@ from mmdet.datasets.transforms import LoadAnnotations
@TRANSFORMS.register_module()
class LoadMultiViewImageFromFiles(object):
class LoadMultiViewImageFromFiles(BaseTransform):
"""Load multi channel images from a list of separate channel files.
Expects results['img_filename'] to be a list of filenames.
......@@ -23,13 +24,38 @@ class LoadMultiViewImageFromFiles(object):
Defaults to False.
color_type (str, optional): Color type of the file.
Defaults to 'unchanged'.
file_client_args (dict): Config dict of file clients,
refer to
https://github.com/open-mmlab/mmcv/blob/master/mmcv/fileio/file_client.py
for more details. Defaults to dict(backend='disk').
num_views (int): num of view in a frame. Default to 5.
num_ref_frames (int): num of frame in loading. Default to -1.
test_mode (bool): Whether is test mode in loading. Default to False.
set_default_scale (bool): Whether to set default scale. Default to
True.
"""
def __init__(self, to_float32=False, color_type='unchanged'):
def __init__(self,
to_float32: bool = False,
color_type: str = 'unchanged',
file_client_args: dict = dict(backend='disk'),
num_views: int = 5,
num_ref_frames: int = -1,
test_mode: bool = False,
set_default_scale: bool = True) -> None:
self.to_float32 = to_float32
self.color_type = color_type
self.file_client_args = file_client_args.copy()
self.file_client = None
self.num_views = num_views
# num_ref_frames is used for multi-sweep loading
self.num_ref_frames = num_ref_frames
# when test_mode=False, we randomly select previous frames
# otherwise, select the earliest one
self.test_mode = test_mode
self.set_default_scale = set_default_scale
def __call__(self, results):
def transform(self, results: dict) -> Optional[dict]:
"""Call function to load multi-view image from files.
Args:
......@@ -47,33 +73,151 @@ class LoadMultiViewImageFromFiles(object):
- scale_factor (float): Scale factor.
- img_norm_cfg (dict): Normalization configuration of images.
"""
filename = results['img_filename']
# TODO: consider split the multi-sweep part out of this pipeline
# Derive the mask and transform for loading of multi-sweep data
if self.num_ref_frames > 0:
# init choice with the current frame
init_choice = np.array([0], dtype=np.int64)
num_frames = len(results['img_filename']) // self.num_views - 1
if num_frames == 0: # no previous frame, then copy cur frames
choices = np.random.choice(
1, self.num_ref_frames, replace=True)
elif num_frames >= self.num_ref_frames:
# NOTE: suppose the info is saved following the order
# from latest to earlier frames
if self.test_mode:
choices = np.arange(num_frames - self.num_ref_frames,
num_frames) + 1
# NOTE: +1 is for selecting previous frames
else:
choices = np.random.choice(
num_frames, self.num_ref_frames, replace=False) + 1
elif num_frames > 0 and num_frames < self.num_ref_frames:
if self.test_mode:
base_choices = np.arange(num_frames) + 1
random_choices = np.random.choice(
num_frames,
self.num_ref_frames - num_frames,
replace=True) + 1
choices = np.concatenate([base_choices, random_choices])
else:
choices = np.random.choice(
num_frames, self.num_ref_frames, replace=True) + 1
else:
raise NotImplementedError
choices = np.concatenate([init_choice, choices])
select_filename = []
for choice in choices:
select_filename += results['img_filename'][choice *
self.num_views:
(choice + 1) *
self.num_views]
results['img_filename'] = select_filename
for key in ['cam2img', 'lidar2cam']:
if key in results:
select_results = []
for choice in choices:
select_results += results[key][choice *
self.num_views:(choice +
1) *
self.num_views]
results[key] = select_results
for key in ['ego2global']:
if key in results:
select_results = []
for choice in choices:
select_results += [results[key][choice]]
results[key] = select_results
# Transform lidar2cam to
# [cur_lidar]2[prev_img] and [cur_lidar]2[prev_cam]
for key in ['lidar2cam']:
if key in results:
# only change matrices of previous frames
for choice_idx in range(1, len(choices)):
pad_prev_ego2global = np.eye(4)
prev_ego2global = results['ego2global'][choice_idx]
pad_prev_ego2global[:prev_ego2global.
shape[0], :prev_ego2global.
shape[1]] = prev_ego2global
pad_cur_ego2global = np.eye(4)
cur_ego2global = results['ego2global'][0]
pad_cur_ego2global[:cur_ego2global.
shape[0], :cur_ego2global.
shape[1]] = cur_ego2global
cur2prev = np.linalg.inv(pad_prev_ego2global).dot(
pad_cur_ego2global)
for result_idx in range(choice_idx * self.num_views,
(choice_idx + 1) *
self.num_views):
results[key][result_idx] = \
results[key][result_idx].dot(cur2prev)
# Support multi-view images with different shapes
# TODO: record the origin shape and padded shape
filename, cam2img, lidar2cam = [], [], []
for _, cam_item in results['images'].items():
filename.append(cam_item['img_path'])
cam2img.append(cam_item['cam2img'])
lidar2cam.append(cam_item['lidar2cam'])
results['filename'] = filename
results['cam2img'] = cam2img
results['lidar2cam'] = lidar2cam
results['ori_cam2img'] = copy.deepcopy(results['cam2img'])
if self.file_client is None:
self.file_client = mmengine.FileClient(**self.file_client_args)
# img is of shape (h, w, c, num_views)
img = np.stack(
[mmcv.imread(name, self.color_type) for name in filename], axis=-1)
# h and w can be different for different views
img_bytes = [self.file_client.get(name) for name in filename]
imgs = [
mmcv.imfrombytes(img_byte, flag=self.color_type)
for img_byte in img_bytes
]
# handle the image with different shape
img_shapes = np.stack([img.shape for img in imgs], axis=0)
img_shape_max = np.max(img_shapes, axis=0)
img_shape_min = np.min(img_shapes, axis=0)
assert img_shape_min[-1] == img_shape_max[-1]
if not np.all(img_shape_max == img_shape_min):
pad_shape = img_shape_max[:2]
else:
pad_shape = None
if pad_shape is not None:
imgs = [
mmcv.impad(img, shape=pad_shape, pad_val=0) for img in imgs
]
img = np.stack(imgs, axis=-1)
if self.to_float32:
img = img.astype(np.float32)
results['filename'] = filename
# unravel to list, see `DefaultFormatBundle` in formatting.py
# unravel to list, see `DefaultFormatBundle` in formating.py
# which will transpose each image separately and then stack into array
results['img'] = [img[..., i] for i in range(img.shape[-1])]
results['img_shape'] = img.shape
results['ori_shape'] = img.shape
# Set initial values for default meta_keys
results['pad_shape'] = img.shape
results['scale_factor'] = 1.0
if self.set_default_scale:
results['scale_factor'] = 1.0
num_channels = 1 if len(img.shape) < 3 else img.shape[2]
results['img_norm_cfg'] = dict(
mean=np.zeros(num_channels, dtype=np.float32),
std=np.ones(num_channels, dtype=np.float32),
to_rgb=False)
results['num_views'] = self.num_views
results['num_ref_frames'] = self.num_ref_frames
return results
def __repr__(self):
"""str: Return a string that describes the module."""
repr_str = self.__class__.__name__
repr_str += f'(to_float32={self.to_float32}, '
repr_str += f"color_type='{self.color_type}')"
repr_str += f"color_type='{self.color_type}', "
repr_str += f'num_views={self.num_views}, '
repr_str += f'num_ref_frames={self.num_ref_frames}, '
repr_str += f'test_mode={self.test_mode})'
return repr_str
......@@ -139,7 +283,7 @@ class LoadPointsFromMultiSweeps(BaseTransform):
Defaults to [0, 1, 2, 4].
file_client_args (dict, optional): Config dict of file clients,
refer to
https://github.com/open-mmlab/mmcv/blob/master/mmcv/fileio/file_client.py
https://github.com/open-mmlab/mmengine/blob/main/mmengine/fileio/file_client.py
for more details. Defaults to dict(backend='disk').
pad_empty_sweeps (bool, optional): Whether to repeat keyframe when
sweeps is empty. Defaults to False.
......@@ -151,13 +295,13 @@ class LoadPointsFromMultiSweeps(BaseTransform):
"""
def __init__(self,
sweeps_num=10,
load_dim=5,
use_dim=[0, 1, 2, 4],
file_client_args=dict(backend='disk'),
pad_empty_sweeps=False,
remove_close=False,
test_mode=False):
sweeps_num: int = 10,
load_dim: int = 5,
use_dim: List[int] = [0, 1, 2, 4],
file_client_args: dict = dict(backend='disk'),
pad_empty_sweeps: bool = False,
remove_close: bool = False,
test_mode: bool = False) -> None:
self.load_dim = load_dim
self.sweeps_num = sweeps_num
self.use_dim = use_dim
......@@ -167,7 +311,7 @@ class LoadPointsFromMultiSweeps(BaseTransform):
self.remove_close = remove_close
self.test_mode = test_mode
def _load_points(self, pts_filename):
def _load_points(self, pts_filename: str) -> np.ndarray:
"""Private function to load point clouds data.
Args:
......@@ -189,7 +333,9 @@ class LoadPointsFromMultiSweeps(BaseTransform):
points = np.fromfile(pts_filename, dtype=np.float32)
return points
def _remove_close(self, points, radius=1.0):
def _remove_close(self,
points: Union[np.ndarray, BasePoints],
radius: float = 1.0) -> Union[np.ndarray, BasePoints]:
"""Removes point too close within a certain radius from origin.
Args:
......@@ -198,7 +344,7 @@ class LoadPointsFromMultiSweeps(BaseTransform):
Defaults to 1.0.
Returns:
np.ndarray: Points after removing.
np.ndarray | :obj:`BasePoints`: Points after removing.
"""
if isinstance(points, np.ndarray):
points_numpy = points
......@@ -211,7 +357,7 @@ class LoadPointsFromMultiSweeps(BaseTransform):
not_close = np.logical_not(np.logical_and(x_filt, y_filt))
return points[not_close]
def transform(self, results):
def transform(self, results: dict) -> dict:
"""Call function to load multi-sweep point clouds from files.
Args:
......@@ -220,7 +366,7 @@ class LoadPointsFromMultiSweeps(BaseTransform):
Returns:
dict: The result dict containing the multi-sweep points data.
Added key and value are described below.
Updated key and value are described below.
- points (np.ndarray | :obj:`BasePoints`): Multi-sweep point
cloud arrays.
......@@ -290,7 +436,7 @@ class PointSegClassMapping(BaseTransform):
others as len(valid_cat_ids).
"""
def transform(self, results: dict) -> None:
def transform(self, results: dict) -> dict:
"""Call function to map original semantic class to valid category ids.
Args:
......@@ -322,8 +468,6 @@ class PointSegClassMapping(BaseTransform):
def __repr__(self):
"""str: Return a string that describes the module."""
repr_str = self.__class__.__name__
repr_str += f'(valid_cat_ids={self.valid_cat_ids}, '
repr_str += f'max_cat_id={self.max_cat_id})'
return repr_str
......@@ -385,13 +529,14 @@ class LoadPointsFromFile(BaseTransform):
Args:
coord_type (str): The type of coordinates of points cloud.
Available options includes:
- 'LIDAR': Points in LiDAR coordinates.
- 'DEPTH': Points in depth coordinates, usually for indoor dataset.
- 'CAMERA': Points in camera coordinates.
load_dim (int, optional): The dimension of the loaded points.
Defaults to 6.
use_dim (list[int], optional): Which dimensions of the points to use.
Defaults to [0, 1, 2]. For KITTI dataset, set use_dim=4
use_dim (list[int] | int, optional): Which dimensions of the points
to use. Defaults to [0, 1, 2]. For KITTI dataset, set use_dim=4
or use_dim=[0, 1, 2, 3] to use the intensity dimension.
shift_height (bool, optional): Whether to use shifted height.
Defaults to False.
......@@ -399,7 +544,7 @@ class LoadPointsFromFile(BaseTransform):
Defaults to False.
file_client_args (dict, optional): Config dict of file clients,
refer to
https://github.com/open-mmlab/mmcv/blob/master/mmcv/fileio/file_client.py
https://github.com/open-mmlab/mmengine/blob/main/mmengine/fileio/file_client.py
for more details. Defaults to dict(backend='disk').
"""
......@@ -407,7 +552,7 @@ class LoadPointsFromFile(BaseTransform):
self,
coord_type: str,
load_dim: int = 6,
use_dim: list = [0, 1, 2],
use_dim: Union[int, List[int]] = [0, 1, 2],
shift_height: bool = False,
use_color: bool = False,
file_client_args: dict = dict(backend='disk')
......@@ -523,6 +668,7 @@ class LoadAnnotations3D(LoadAnnotations):
Required Keys:
- ann_info (dict)
- gt_bboxes_3d (:obj:`LiDARInstance3DBoxes` |
:obj:`DepthInstance3DBoxes` | :obj:`CameraInstance3DBoxes`):
3D ground truth bboxes. Only when `with_bbox_3d` is True
......@@ -592,7 +738,7 @@ class LoadAnnotations3D(LoadAnnotations):
seg_3d_dtype (dtype, optional): Dtype of 3D semantic masks.
Defaults to int64.
file_client_args (dict): Config dict of file clients, refer to
https://github.com/open-mmlab/mmcv/blob/master/mmcv/fileio/file_client.py
https://github.com/open-mmlab/mmengine/blob/main/mmengine/fileio/file_client.py
for more details.
"""
......
......@@ -16,7 +16,7 @@ class MultiScaleFlipAug3D(BaseTransform):
Args:
transforms (list[dict]): Transforms to apply in each augmentation.
img_scale (tuple | list[tuple]: Images scales for resizing.
img_scale (tuple | list[tuple]): Images scales for resizing.
pts_scale_ratio (float | list[float]): Points scale ratios for
resizing.
flip (bool, optional): Whether apply flip augmentation.
......@@ -25,11 +25,11 @@ class MultiScaleFlipAug3D(BaseTransform):
directions for images, options are "horizontal" and "vertical".
If flip_direction is list, multiple flip augmentations will
be applied. It has no effect when ``flip == False``.
Defaults to "horizontal".
pcd_horizontal_flip (bool, optional): Whether apply horizontal
Defaults to 'horizontal'.
pcd_horizontal_flip (bool, optional): Whether to apply horizontal
flip augmentation to point cloud. Defaults to True.
Note that it works only when 'flip' is turned on.
pcd_vertical_flip (bool, optional): Whether apply vertical flip
pcd_vertical_flip (bool, optional): Whether to apply vertical flip
augmentation to point cloud. Defaults to True.
Note that it works only when 'flip' is turned on.
"""
......@@ -46,7 +46,7 @@ class MultiScaleFlipAug3D(BaseTransform):
self.img_scale = img_scale if isinstance(img_scale,
list) else [img_scale]
self.pts_scale_ratio = pts_scale_ratio \
if isinstance(pts_scale_ratio, list) else[float(pts_scale_ratio)]
if isinstance(pts_scale_ratio, list) else [float(pts_scale_ratio)]
assert mmengine.is_list_of(self.img_scale, tuple)
assert mmengine.is_list_of(self.pts_scale_ratio, float)
......
# Copyright (c) OpenMMLab. All rights reserved.
import random
import warnings
from typing import Dict, List, Optional, Tuple, Union
from typing import List, Optional, Tuple, Union
import cv2
import mmcv
import numpy as np
from mmcv.transforms import BaseTransform
from mmcv.transforms import BaseTransform, RandomResize, Resize
from mmengine import is_tuple_of
from mmdet3d.models.task_modules import VoxelGenerator
......@@ -14,7 +15,9 @@ from mmdet3d.structures import (CameraInstance3DBoxes, DepthInstance3DBoxes,
LiDARInstance3DBoxes)
from mmdet3d.structures.ops import box_np_ops
from mmdet3d.structures.points import BasePoints
from mmdet.datasets.transforms import RandomFlip
from mmdet.datasets.transforms import (PhotoMetricDistortion, RandomCrop,
RandomFlip)
from .compose import Compose
from .data_augment_utils import noise_per_object_v3_
......@@ -76,7 +79,6 @@ class RandomFlip3D(RandomFlip):
otherwise it will be randomly decided by a ratio specified in the init
method.
Required Keys:
- points (np.float32)
......@@ -96,20 +98,25 @@ class RandomFlip3D(RandomFlip):
- pcd_scale_factor (np.float32)
Args:
sync_2d (bool, optional): Whether to apply flip according to the 2D
sync_2d (bool): Whether to apply flip according to the 2D
images. If True, it will apply the same flip as that to 2D images.
If False, it will decide whether to flip randomly and independently
to that of 2D images. Defaults to True.
flip_ratio_bev_horizontal (float, optional): The flipping probability
flip_ratio_bev_horizontal (float): The flipping probability
in horizontal direction. Defaults to 0.0.
flip_ratio_bev_vertical (float, optional): The flipping probability
flip_ratio_bev_vertical (float): The flipping probability
in vertical direction. Defaults to 0.0.
flip_box3d (bool): Whether to flip bounding box. In most of the case,
the box should be fliped. In cam-based bev detection, this is set
to false, since the flip of 2D images does not influence the 3D
box. Default to True.
"""
def __init__(self,
sync_2d: bool = True,
flip_ratio_bev_horizontal: float = 0.0,
flip_ratio_bev_vertical: float = 0.0,
flip_box3d: bool = True,
**kwargs) -> None:
# `flip_ratio_bev_horizontal` is equal to
# for flip prob of 2d image when
......@@ -119,6 +126,7 @@ class RandomFlip3D(RandomFlip):
self.sync_2d = sync_2d
self.flip_ratio_bev_horizontal = flip_ratio_bev_horizontal
self.flip_ratio_bev_vertical = flip_ratio_bev_vertical
self.flip_box3d = flip_box3d
if flip_ratio_bev_horizontal is not None:
assert isinstance(
flip_ratio_bev_horizontal,
......@@ -150,23 +158,21 @@ class RandomFlip3D(RandomFlip):
updated in the result dict.
"""
assert direction in ['horizontal', 'vertical']
if 'gt_bboxes_3d' in input_dict:
if 'points' in input_dict:
input_dict['points'] = input_dict['gt_bboxes_3d'].flip(
direction, points=input_dict['points'])
if self.flip_box3d:
if 'gt_bboxes_3d' in input_dict:
if 'points' in input_dict:
input_dict['points'] = input_dict['gt_bboxes_3d'].flip(
direction, points=input_dict['points'])
else:
# vision-only detection
input_dict['gt_bboxes_3d'].flip(direction)
else:
# vision-only detection
input_dict['gt_bboxes_3d'].flip(direction)
else:
input_dict['points'].flip(direction)
input_dict['points'].flip(direction)
if 'centers_2d' in input_dict:
assert self.sync_2d is True and direction == 'horizontal', \
'Only support sync_2d=True and horizontal flip with images'
# TODO fix this ori_shape and other keys in vision based model
# TODO ori_shape to img_shape
w = input_dict['ori_shape'][1]
w = input_dict['img_shape'][1]
input_dict['centers_2d'][..., 0] = \
w - input_dict['centers_2d'][..., 0]
# need to modify the horizontal position of camera center
......@@ -176,6 +182,25 @@ class RandomFlip3D(RandomFlip):
# https://github.com/open-mmlab/mmdetection3d/pull/744
input_dict['cam2img'][0][2] = w - input_dict['cam2img'][0][2]
def _flip_on_direction(self, results: dict) -> None:
"""Function to flip images, bounding boxes, semantic segmentation map
and keypoints.
Add the override feature that if 'flip' is already in results, use it
to do the augmentation.
"""
if 'flip' not in results:
cur_dir = self._choose_direction()
else:
cur_dir = results['flip_direction']
if cur_dir is None:
results['flip'] = False
results['flip_direction'] = None
else:
results['flip'] = True
results['flip_direction'] = cur_dir
self._flip(results)
def transform(self, input_dict: dict) -> dict:
"""Call function to flip points, values in the ``bbox3d_fields`` and
also flip 2D image and its annotations.
......@@ -329,7 +354,7 @@ class ObjectSample(BaseTransform):
def __init__(self,
db_sampler: dict,
sample_2d: bool = False,
use_ground_plane: bool = False):
use_ground_plane: bool = False) -> None:
self.sampler_cfg = db_sampler
self.sample_2d = sample_2d
if 'type' not in db_sampler.keys():
......@@ -367,11 +392,10 @@ class ObjectSample(BaseTransform):
gt_bboxes_3d = input_dict['gt_bboxes_3d']
gt_labels_3d = input_dict['gt_labels_3d']
if self.use_ground_plane and 'plane' in input_dict['ann_info']:
ground_plane = input_dict['plane']
if self.use_ground_plane:
ground_plane = input_dict.get('plane', None)
assert ground_plane is not None, '`use_ground_plane` is True ' \
'but find plane is None'
input_dict['plane'] = ground_plane
else:
ground_plane = None
# change to float for blending operation
......@@ -424,13 +448,9 @@ class ObjectSample(BaseTransform):
def __repr__(self):
"""str: Return a string that describes the module."""
repr_str = self.__class__.__name__
repr_str += f'db_sampler={self.db_sampler},'
repr_str += f' sample_2d={self.sample_2d},'
repr_str += f' data_root={self.sampler_cfg.data_root},'
repr_str += f' info_path={self.sampler_cfg.info_path},'
repr_str += f' rate={self.sampler_cfg.rate},'
repr_str += f' prepare={self.sampler_cfg.prepare},'
repr_str += f' classes={self.sampler_cfg.classes},'
repr_str += f' sample_groups={self.sampler_cfg.sample_groups}'
repr_str += f' use_ground_plane={self.use_ground_plane}'
return repr_str
......@@ -461,10 +481,10 @@ class ObjectNoise(BaseTransform):
"""
def __init__(self,
translation_std: list = [0.25, 0.25, 0.25],
global_rot_range: list = [0.0, 0.0],
rot_range: list = [-0.15707963267, 0.15707963267],
num_try: int = 100):
translation_std: List[float] = [0.25, 0.25, 0.25],
global_rot_range: List[float] = [0.0, 0.0],
rot_range: List[float] = [-0.15707963267, 0.15707963267],
num_try: int = 100) -> None:
self.translation_std = translation_std
self.global_rot_range = global_rot_range
self.rot_range = rot_range
......@@ -527,7 +547,7 @@ class GlobalAlignment(BaseTransform):
def __init__(self, rotation_axis: int) -> None:
self.rotation_axis = rotation_axis
def _trans_points(self, results: Dict, trans_factor: np.ndarray) -> None:
def _trans_points(self, results: dict, trans_factor: np.ndarray) -> None:
"""Private function to translate points.
Args:
......@@ -539,7 +559,7 @@ class GlobalAlignment(BaseTransform):
"""
results['points'].translate(trans_factor)
def _rot_points(self, results: Dict, rot_mat: np.ndarray) -> None:
def _rot_points(self, results: dict, rot_mat: np.ndarray) -> None:
"""Private function to rotate bounding boxes and points.
Args:
......@@ -565,7 +585,7 @@ class GlobalAlignment(BaseTransform):
is_valid &= (rot_mat[:, self.rotation_axis] == valid_array).all()
assert is_valid, f'invalid rotation matrix {rot_mat}'
def transform(self, results: Dict) -> Dict:
def transform(self, results: dict) -> dict:
"""Call function to shuffle points.
Args:
......@@ -591,6 +611,7 @@ class GlobalAlignment(BaseTransform):
return results
def __repr__(self):
"""str: Return a string that describes the module."""
repr_str = self.__class__.__name__
repr_str += f'(rotation_axis={self.rotation_axis})'
return repr_str
......@@ -809,6 +830,7 @@ class PointShuffle(BaseTransform):
return input_dict
def __repr__(self):
"""str: Return a string that describes the module."""
return self.__class__.__name__
......@@ -828,7 +850,7 @@ class ObjectRangeFilter(BaseTransform):
point_cloud_range (list[float]): Point cloud range.
"""
def __init__(self, point_cloud_range: list):
def __init__(self, point_cloud_range: List[float]):
self.pcd_range = np.array(point_cloud_range, dtype=np.float32)
def transform(self, input_dict: dict) -> dict:
......@@ -890,7 +912,7 @@ class PointsRangeFilter(BaseTransform):
point_cloud_range (list[float]): Point cloud range.
"""
def __init__(self, point_cloud_range: list):
def __init__(self, point_cloud_range: List[float]) -> None:
self.pcd_range = np.array(point_cloud_range, dtype=np.float32)
def transform(self, input_dict: dict) -> dict:
......@@ -943,7 +965,7 @@ class ObjectNameFilter(BaseTransform):
classes (list[str]): List of class names to be kept for training.
"""
def __init__(self, classes: list):
def __init__(self, classes: List[str]) -> None:
self.classes = classes
self.labels = list(range(len(self.classes)))
......@@ -1001,34 +1023,38 @@ class PointSample(BaseTransform):
def __init__(self,
num_points: int,
sample_range: float = None,
replace: bool = False):
sample_range: Optional[float] = None,
replace: bool = False) -> None:
self.num_points = num_points
self.sample_range = sample_range
self.replace = replace
def _points_random_sampling(self,
points,
num_samples,
sample_range=None,
replace=False,
return_choices=False):
def _points_random_sampling(
self,
points: BasePoints,
num_samples: int,
sample_range: Optional[float] = None,
replace: bool = False,
return_choices: bool = False
) -> Union[Tuple[BasePoints, np.ndarray], BasePoints]:
"""Points random sampling.
Sample points to a certain number.
Args:
points (np.ndarray | :obj:`BasePoints`): 3D Points.
points (:obj:`BasePoints`): 3D Points.
num_samples (int): Number of samples to be sampled.
sample_range (float, optional): Indicating the range where the
points will be sampled. Defaults to None.
replace (bool, optional): Sampling with or without replacement.
Defaults to None.
Defaults to False.
return_choices (bool, optional): Whether return choice.
Defaults to False.
Returns:
tuple[np.ndarray] | np.ndarray:
- points (np.ndarray | :obj:`BasePoints`): 3D Points.
tuple[:obj:`BasePoints`, np.ndarray] | :obj:`BasePoints`:
- points (:obj:`BasePoints`): 3D Points.
- choices (np.ndarray, optional): The generated random samples.
"""
if not replace:
......@@ -1036,7 +1062,7 @@ class PointSample(BaseTransform):
point_range = range(len(points))
if sample_range is not None and not replace:
# Only sampling the near points when len(points) >= num_samples
dist = np.linalg.norm(points.tensor, axis=1)
dist = np.linalg.norm(points.coord.numpy(), axis=1)
far_inds = np.where(dist >= sample_range)[0]
near_inds = np.where(dist < sample_range)[0]
# in case there are too many far points
......@@ -1060,6 +1086,7 @@ class PointSample(BaseTransform):
Args:
input_dict (dict): Result dict from loading pipeline.
Returns:
dict: Results after sampling, 'points', 'pts_instance_mask'
and 'pts_semantic_mask' keys are updated in the result dict.
......@@ -1219,8 +1246,9 @@ class IndoorPatchPointSample(BaseTransform):
return points
def _patch_points_sampling(self, points: BasePoints,
sem_mask: np.ndarray) -> BasePoints:
def _patch_points_sampling(
self, points: BasePoints,
sem_mask: np.ndarray) -> Tuple[BasePoints, np.ndarray]:
"""Patch points sampling.
First sample a valid patch.
......@@ -1231,7 +1259,7 @@ class IndoorPatchPointSample(BaseTransform):
sem_mask (np.ndarray): semantic segmentation mask for input points.
Returns:
tuple[:obj:`BasePoints`, np.ndarray] | :obj:`BasePoints`:
tuple[:obj:`BasePoints`, np.ndarray]:
- points (:obj:`BasePoints`): 3D Points.
- choices (np.ndarray): The generated random samples.
......@@ -1438,7 +1466,7 @@ class BackgroundPointsFilter(BaseTransform):
@TRANSFORMS.register_module()
class VoxelBasedPointSampler(object):
class VoxelBasedPointSampler(BaseTransform):
"""Voxel based point sampler.
Apply voxel sampling to multiple sweep points.
......@@ -1450,7 +1478,10 @@ class VoxelBasedPointSampler(object):
for input points.
"""
def __init__(self, cur_sweep_cfg, prev_sweep_cfg=None, time_dim=3):
def __init__(self,
cur_sweep_cfg: dict,
prev_sweep_cfg: Optional[dict] = None,
time_dim: int = 3) -> None:
self.cur_voxel_generator = VoxelGenerator(**cur_sweep_cfg)
self.cur_voxel_num = self.cur_voxel_generator._max_voxels
self.time_dim = time_dim
......@@ -1463,7 +1494,8 @@ class VoxelBasedPointSampler(object):
self.prev_voxel_generator = None
self.prev_voxel_num = 0
def _sample_points(self, points, sampler, point_dim):
def _sample_points(self, points: np.ndarray, sampler: VoxelGenerator,
point_dim: int) -> np.ndarray:
"""Sample points for each points subset.
Args:
......@@ -1489,7 +1521,7 @@ class VoxelBasedPointSampler(object):
return sample_points
def __call__(self, results):
def transform(self, results: dict) -> dict:
"""Call function to sample points from multiple sweeps.
Args:
......@@ -1665,8 +1697,9 @@ class AffineResize(BaseTransform):
if 'gt_bboxes' in results:
results['gt_bboxes'] = results['gt_bboxes'][valid_index]
if 'gt_labels' in results:
results['gt_labels'] = results['gt_labels'][valid_index]
if 'gt_bboxes_labels' in results:
results['gt_bboxes_labels'] = results['gt_bboxes_labels'][
valid_index]
if 'gt_masks' in results:
raise NotImplementedError(
'AffineResize only supports bbox.')
......@@ -1771,6 +1804,7 @@ class AffineResize(BaseTransform):
return ref_point3
def __repr__(self):
"""str: Return a string that describes the module."""
repr_str = self.__class__.__name__
repr_str += f'(img_scale={self.img_scale}, '
repr_str += f'down_ratio={self.down_ratio}) '
......@@ -1791,7 +1825,7 @@ class RandomShiftScale(BaseTransform):
aug_prob (float): The shifting and scaling probability.
"""
def __init__(self, shift_scale: Tuple[float], aug_prob: float):
def __init__(self, shift_scale: Tuple[float], aug_prob: float) -> None:
self.shift_scale = shift_scale
self.aug_prob = aug_prob
......@@ -1830,7 +1864,484 @@ class RandomShiftScale(BaseTransform):
return results
def __repr__(self):
"""str: Return a string that describes the module."""
repr_str = self.__class__.__name__
repr_str += f'(shift_scale={self.shift_scale}, '
repr_str += f'aug_prob={self.aug_prob}) '
return repr_str
@TRANSFORMS.register_module()
class Resize3D(Resize):
def _resize_3d(self, results):
"""Resize centers_2d and modify camera intrinisc with
``results['scale']``."""
if 'centers_2d' in results:
results['centers_2d'] *= results['scale_factor'][:2]
results['cam2img'][0] *= np.array(results['scale_factor'][0])
results['cam2img'][1] *= np.array(results['scale_factor'][1])
def transform(self, results: dict) -> dict:
"""Transform function to resize images, bounding boxes, semantic
segmentation map and keypoints.
Args:
results (dict): Result dict from loading pipeline.
Returns:
dict: Resized results, 'img', 'gt_bboxes', 'gt_seg_map',
'gt_keypoints', 'scale', 'scale_factor', 'img_shape',
and 'keep_ratio' keys are updated in result dict.
"""
super(Resize3D, self).transform(results)
self._resize_3d(results)
return results
@TRANSFORMS.register_module()
class RandomResize3D(RandomResize):
"""The difference between RandomResize3D and RandomResize:
1. Compared to RandomResize, this class would further
check if scale is already set in results.
2. During resizing, this class would modify the centers_2d
and cam2img with ``results['scale']``.
"""
def _resize_3d(self, results):
"""Resize centers_2d and modify camera intrinisc with
``results['scale']``."""
if 'centers_2d' in results:
results['centers_2d'] *= results['scale_factor'][:2]
results['cam2img'][0] *= np.array(results['scale_factor'][0])
results['cam2img'][1] *= np.array(results['scale_factor'][1])
def transform(self, results):
"""Transform function to resize images, bounding boxes, masks, semantic
segmentation map. Compared to RandomResize, this function would further
check if scale is already set in results.
Args:
results (dict): Result dict from loading pipeline.
Returns:
dict: Resized results, 'img_shape', 'pad_shape', 'scale_factor', \
'keep_ratio' keys are added into result dict.
"""
if 'scale' not in results:
results['scale'] = self._random_scale()
self.resize.scale = results['scale']
results = self.resize(results)
self._resize_3d(results)
return results
@TRANSFORMS.register_module()
class RandomCrop3D(RandomCrop):
"""3D version of RandomCrop. RamdomCrop3D supports the modifications of
camera intrinsic matrix and using predefined randomness variable to do the
augmentation.
The absolute ``crop_size`` is sampled based on ``crop_type`` and
``image_size``, then the cropped results are generated.
Required Keys:
- img
- gt_bboxes (np.float32) (optional)
- gt_bboxes_labels (np.int64) (optional)
- gt_masks (BitmapMasks | PolygonMasks) (optional)
- gt_ignore_flags (np.bool) (optional)
- gt_seg_map (np.uint8) (optional)
Modified Keys:
- img
- img_shape
- gt_bboxes (optional)
- gt_bboxes_labels (optional)
- gt_masks (optional)
- gt_ignore_flags (optional)
- gt_seg_map (optional)
Added Keys:
- homography_matrix
Args:
crop_size (tuple): The relative ratio or absolute pixels of
height and width.
crop_type (str): One of "relative_range", "relative",
"absolute", "absolute_range". "relative" randomly crops
(h * crop_size[0], w * crop_size[1]) part from an input of size
(h, w). "relative_range" uniformly samples relative crop size from
range [crop_size[0], 1] and [crop_size[1], 1] for height and width
respectively. "absolute" crops from an input with absolute size
(crop_size[0], crop_size[1]). "absolute_range" uniformly samples
crop_h in range [crop_size[0], min(h, crop_size[1])] and crop_w
in range [crop_size[0], min(w, crop_size[1])].
Defaults to "absolute".
allow_negative_crop (bool): Whether to allow a crop that does
not contain any bbox area. Defaults to False.
recompute_bbox (bool): Whether to re-compute the boxes based
on cropped instance masks. Defaults to False.
bbox_clip_border (bool): Whether clip the objects outside
the border of the image. Defaults to True.
rel_offset_h (tuple): The cropping interval of image height. Default
to (0., 1.).
rel_offset_w (tuple): The cropping interval of image width. Default
to (0., 1.).
Note:
- If the image is smaller than the absolute crop size, return the
original image.
- The keys for bboxes, labels and masks must be aligned. That is,
``gt_bboxes`` corresponds to ``gt_labels`` and ``gt_masks``, and
``gt_bboxes_ignore`` corresponds to ``gt_labels_ignore`` and
``gt_masks_ignore``.
- If the crop does not contain any gt-bbox region and
``allow_negative_crop`` is set to False, skip this image.
"""
def __init__(self,
crop_size,
crop_type='absolute',
allow_negative_crop=False,
recompute_bbox=False,
bbox_clip_border=True,
rel_offset_h=(0., 1.),
rel_offset_w=(0., 1.)):
super().__init__(
crop_size=crop_size,
crop_type=crop_type,
allow_negative_crop=allow_negative_crop,
recompute_bbox=recompute_bbox,
bbox_clip_border=bbox_clip_border)
# rel_offset specifies the relative offset range of cropping origin
# [0., 1.] means starting from 0*margin to 1*margin + 1
self.rel_offset_h = rel_offset_h
self.rel_offset_w = rel_offset_w
def _crop_data(self, results, crop_size, allow_negative_crop):
"""Function to randomly crop images, bounding boxes, masks, semantic
segmentation maps.
Args:
results (dict): Result dict from loading pipeline.
crop_size (tuple): Expected absolute size after cropping, (h, w).
allow_negative_crop (bool): Whether to allow a crop that does not
contain any bbox area. Default to False.
Returns:
dict: Randomly cropped results, 'img_shape' key in result dict is
updated according to crop size.
"""
assert crop_size[0] > 0 and crop_size[1] > 0
for key in results.get('img_fields', ['img']):
img = results[key]
if 'img_crop_offset' not in results:
margin_h = max(img.shape[0] - crop_size[0], 0)
margin_w = max(img.shape[1] - crop_size[1], 0)
# TOCHECK: a little different from LIGA implementation
offset_h = np.random.randint(
self.rel_offset_h[0] * margin_h,
self.rel_offset_h[1] * margin_h + 1)
offset_w = np.random.randint(
self.rel_offset_w[0] * margin_w,
self.rel_offset_w[1] * margin_w + 1)
else:
offset_w, offset_h = results['img_crop_offset']
crop_h = min(crop_size[0], img.shape[0])
crop_w = min(crop_size[1], img.shape[1])
crop_y1, crop_y2 = offset_h, offset_h + crop_h
crop_x1, crop_x2 = offset_w, offset_w + crop_w
# crop the image
img = img[crop_y1:crop_y2, crop_x1:crop_x2, ...]
img_shape = img.shape
results[key] = img
results['img_shape'] = img_shape
# crop bboxes accordingly and clip to the image boundary
for key in results.get('bbox_fields', []):
# e.g. gt_bboxes and gt_bboxes_ignore
bbox_offset = np.array([offset_w, offset_h, offset_w, offset_h],
dtype=np.float32)
bboxes = results[key] - bbox_offset
if self.bbox_clip_border:
bboxes[:, 0::2] = np.clip(bboxes[:, 0::2], 0, img_shape[1])
bboxes[:, 1::2] = np.clip(bboxes[:, 1::2], 0, img_shape[0])
valid_inds = (bboxes[:, 2] > bboxes[:, 0]) & (
bboxes[:, 3] > bboxes[:, 1])
# If the crop does not contain any gt-bbox area and
# allow_negative_crop is False, skip this image.
if (key == 'gt_bboxes' and not valid_inds.any()
and not allow_negative_crop):
return None
results[key] = bboxes[valid_inds, :]
# label fields. e.g. gt_labels and gt_labels_ignore
label_key = self.bbox2label.get(key)
if label_key in results:
results[label_key] = results[label_key][valid_inds]
# mask fields, e.g. gt_masks and gt_masks_ignore
mask_key = self.bbox2mask.get(key)
if mask_key in results:
results[mask_key] = results[mask_key][
valid_inds.nonzero()[0]].crop(
np.asarray([crop_x1, crop_y1, crop_x2, crop_y2]))
if self.recompute_bbox:
results[key] = results[mask_key].get_bboxes()
# crop semantic seg
for key in results.get('seg_fields', []):
results[key] = results[key][crop_y1:crop_y2, crop_x1:crop_x2]
# manipulate camera intrinsic matrix
# needs to apply offset to K instead of P2 (on KITTI)
if isinstance(results['cam2img'], list):
# TODO ignore this, but should handle it in the future
pass
else:
K = results['cam2img'][:3, :3].copy()
inv_K = np.linalg.inv(K)
T = np.matmul(inv_K, results['cam2img'][:3])
K[0, 2] -= crop_x1
K[1, 2] -= crop_y1
offset_cam2img = np.matmul(K, T)
results['cam2img'][:offset_cam2img.shape[0], :offset_cam2img.
shape[1]] = offset_cam2img
results['img_crop_offset'] = [offset_w, offset_h]
return results
def transform(self, results):
"""Transform function to randomly crop images, bounding boxes, masks,
semantic segmentation maps.
Args:
results (dict): Result dict from loading pipeline.
Returns:
dict: Randomly cropped results, 'img_shape' key in result dict is
updated according to crop size.
"""
image_size = results['img'].shape[:2]
if 'crop_size' not in results:
crop_size = self._get_crop_size(image_size)
results['crop_size'] = crop_size
else:
crop_size = results['crop_size']
results = self._crop_data(results, crop_size, self.allow_negative_crop)
return results
def __repr__(self):
repr_str = self.__class__.__name__
repr_str += f'(crop_size={self.crop_size}, '
repr_str += f'crop_type={self.crop_type}, '
repr_str += f'allow_negative_crop={self.allow_negative_crop}, '
repr_str += f'bbox_clip_border={self.bbox_clip_border}), '
repr_str += f'rel_offset_h={self.rel_offset_h}), '
repr_str += f'rel_offset_w={self.rel_offset_w})'
return repr_str
@TRANSFORMS.register_module()
class PhotoMetricDistortion3D(PhotoMetricDistortion):
"""Apply photometric distortion to image sequentially, every transformation
is applied with a probability of 0.5. The position of random contrast is in
second or second to last.
PhotoMetricDistortion3D further support using predefined randomness
variable to do the augmentation.
1. random brightness
2. random contrast (mode 0)
3. convert color from BGR to HSV
4. random saturation
5. random hue
6. convert color from HSV to BGR
7. random contrast (mode 1)
8. randomly swap channels
Required Keys:
- img (np.uint8)
Modified Keys:
- img (np.float32)
Args:
brightness_delta (int): delta of brightness.
contrast_range (sequence): range of contrast.
saturation_range (sequence): range of saturation.
hue_delta (int): delta of hue.
"""
def transform(self, results: dict) -> dict:
"""Transform function to perform photometric distortion on images.
Args:
results (dict): Result dict from loading pipeline.
Returns:
dict: Result dict with images distorted.
"""
assert 'img' in results, '`img` is not found in results'
img = results['img']
img = img.astype(np.float32)
if 'photometric_param' not in results:
photometric_param = self._random_flags()
results['photometric_param'] = photometric_param
else:
photometric_param = results['photometric_param']
(mode, brightness_flag, contrast_flag, saturation_flag, hue_flag,
swap_flag, delta_value, alpha_value, saturation_value, hue_value,
swap_value) = photometric_param
# random brightness
if brightness_flag:
img += delta_value
# mode == 0 --> do random contrast first
# mode == 1 --> do random contrast last
if mode == 1:
if contrast_flag:
img *= alpha_value
# convert color from BGR to HSV
img = mmcv.bgr2hsv(img)
# random saturation
if saturation_flag:
img[..., 1] *= saturation_value
# random hue
if hue_flag:
img[..., 0] += hue_value
img[..., 0][img[..., 0] > 360] -= 360
img[..., 0][img[..., 0] < 0] += 360
# convert color from HSV to BGR
img = mmcv.hsv2bgr(img)
# random contrast
if mode == 0:
if contrast_flag:
img *= alpha_value
# randomly swap channels
if swap_flag:
img = img[..., swap_value]
results['img'] = img
return results
@TRANSFORMS.register_module()
class MultiViewWrapper(BaseTransform):
"""Wrap transformation from single-view into multi-view.
The wrapper processes the images from multi-view one by one. For each
image, it constructs a pseudo dict according to the keys specified by the
'process_fields' parameter. After the transformation is finished, desired
information can be collected by specifying the keys in the 'collected_keys'
parameter. Multi-view images share the same transformation parameters
but do not share the same magnitude when a random transformation is
conducted.
Args:
transforms (list[dict]): A list of dict specifying the transformations
for the monocular situation.
override_aug_config (bool): flag of whether to use the same aug config
for multiview image. Default to True.
process_fields (list): Desired keys that the transformations should
be conducted on. Default to ['img', 'cam2img', 'lidar2cam'],
collected_keys (list): Collect information in transformation
like rotate angles, crop roi, and flip state. Default to
['scale', 'scale_factor', 'crop',
'crop_offset', 'ori_shape',
'pad_shape', 'img_shape',
'pad_fixed_size', 'pad_size_divisor',
'flip', 'flip_direction', 'rotate'],
randomness_keys (list): The keys that related to the randomness
in transformation Default to
['scale', 'scale_factor', 'crop_size', 'flip',
'flip_direction', 'photometric_param']
"""
def __init__(self,
transforms: dict,
override_aug_config: bool = True,
process_fields: list = ['img', 'cam2img', 'lidar2cam'],
collected_keys: list = [
'scale', 'scale_factor', 'crop', 'img_crop_offset',
'ori_shape', 'pad_shape', 'img_shape', 'pad_fixed_size',
'pad_size_divisor', 'flip', 'flip_direction', 'rotate'
],
randomness_keys: list = [
'scale', 'scale_factor', 'crop_size', 'img_crop_offset',
'flip', 'flip_direction', 'photometric_param'
]):
self.transforms = Compose(transforms)
self.override_aug_config = override_aug_config
self.collected_keys = collected_keys
self.process_fields = process_fields
self.randomness_keys = randomness_keys
def transform(self, input_dict):
"""Transform function to do the transform for multiview image.
Args:
results (dict): Result dict from loading pipeline.
Returns:
dict: output dict after transformtaion
"""
# store the augmentation related keys for each image.
for key in self.collected_keys:
if key not in input_dict or \
not isinstance(input_dict[key], list):
input_dict[key] = []
prev_process_dict = {}
for img_id in range(len(input_dict['img'])):
process_dict = {}
# override the process dict (e.g. scale in random scale,
# crop_size in random crop, flip, flip_direction in
# random flip)
if img_id != 0 and self.override_aug_config:
for key in self.randomness_keys:
if key in prev_process_dict:
process_dict[key] = prev_process_dict[key]
for key in self.process_fields:
if key in input_dict:
process_dict[key] = input_dict[key][img_id]
process_dict = self.transforms(process_dict)
# store the randomness variable in transformation.
prev_process_dict = process_dict
# store the related results to results_dict
for key in self.process_fields:
if key in process_dict:
input_dict[key][img_id] = process_dict[key]
# update the keys
for key in self.collected_keys:
if key in process_dict:
if len(input_dict[key]) == img_id + 1:
input_dict[key][img_id] = process_dict[key]
else:
input_dict[key].append(process_dict[key])
for key in self.collected_keys:
if len(input_dict[key]) == 0:
input_dict.pop(key)
return input_dict
......@@ -23,8 +23,8 @@ class WaymoDataset(KittiDataset):
Args:
data_root (str): Path of dataset root.
ann_file (str): Path of annotation file.
data_prefix (list[dict]): data prefix for point cloud and
camera data dict, default to dict(
data_prefix (dict): data prefix for point cloud and
camera data dict. Default to dict(
pts='velodyne',
CAM_FRONT='image_0',
CAM_FRONT_RIGHT='image_1',
......@@ -34,13 +34,14 @@ class WaymoDataset(KittiDataset):
pipeline (list[dict], optional): Pipeline used for data processing.
Defaults to None.
modality (dict, optional): Modality to specify the sensor data used
as input. Defaults to `dict(use_lidar=True)`.
as input. Defaults to dict(use_lidar=True).
default_cam_key (str, optional): Default camera key for lidar2img
association.
association. Defaults to 'CAM_FRONT'.
box_type_3d (str, optional): Type of 3D box of this dataset.
Based on the `box_type_3d`, the dataset will encapsulate the box
to its original format then converted them to `box_type_3d`.
Defaults to 'LiDAR' in this dataset. Available options includes
Defaults to 'LiDAR' in this dataset. Available options includes:
- 'LiDAR': Box in LiDAR coordinates.
- 'Depth': Box in depth coordinates, usually for indoor dataset.
- 'Camera': Box in camera coordinates.
......@@ -48,16 +49,18 @@ class WaymoDataset(KittiDataset):
Defaults to True.
test_mode (bool, optional): Whether the dataset is in test mode.
Defaults to False.
pcd_limit_range (list, optional): The range of point cloud used to
filter invalid predicted boxes.
Default: [-85, -85, -5, 85, 85, 5].
pcd_limit_range (list[float], optional): The range of point cloud
used to filter invalid predicted boxes.
Defaults to [-85, -85, -5, 85, 85, 5].
cam_sync_instances (bool, optional): If use the camera sync label
supported from waymo version 1.3.1.
supported from waymo version 1.3.1. Defaults to False.
load_interval (int, optional): load frame interval.
Defaults to 1.
task (str, optional): task for 3D detection (lidar, mono3d).
lidar: take all the ground trurh in the frame.
mono3d: take the groundtruth that can be seen in the cam.
max_sweeps (int, optional): max sweep for each frame.
Defaults to 'lidar'.
max_sweeps (int, optional): max sweep for each frame. Defaults to 0.
"""
METAINFO = {'CLASSES': ('Car', 'Pedestrian', 'Cyclist')}
......@@ -80,7 +83,7 @@ class WaymoDataset(KittiDataset):
pcd_limit_range: List[float] = [0, -40, -3, 70.4, 40, 0.0],
cam_sync_instances=False,
load_interval=1,
task='lidar',
task='lidar_det',
max_sweeps=0,
**kwargs):
self.load_interval = load_interval
......@@ -127,20 +130,19 @@ class WaymoDataset(KittiDataset):
ann_info = Det3DDataset.parse_ann_info(self, info)
if ann_info is None:
# empty instance
anns_results = {}
anns_results['gt_bboxes_3d'] = np.zeros((0, 7), dtype=np.float32)
anns_results['gt_labels_3d'] = np.zeros(0, dtype=np.int64)
return anns_results
ann_info = {}
ann_info['gt_bboxes_3d'] = np.zeros((0, 7), dtype=np.float32)
ann_info['gt_labels_3d'] = np.zeros(0, dtype=np.int64)
ann_info = self._remove_dontcare(ann_info)
# in kitti, lidar2cam = R0_rect @ Tr_velo_to_cam
# convert gt_bboxes_3d to velodyne coordinates with `lidar2cam`
if 'gt_bboxes' in ann_info:
gt_bboxes = ann_info['gt_bboxes']
gt_labels = ann_info['gt_labels']
gt_bboxes_labels = ann_info['gt_bboxes_labels']
else:
gt_bboxes = np.zeros((0, 4), dtype=np.float32)
gt_labels = np.array([], dtype=np.int64)
gt_bboxes_labels = np.zeros(0, dtype=np.int64)
if 'centers_2d' in ann_info:
centers_2d = ann_info['centers_2d']
depths = ann_info['depths']
......@@ -148,25 +150,27 @@ class WaymoDataset(KittiDataset):
centers_2d = np.zeros((0, 2), dtype=np.float32)
depths = np.zeros((0), dtype=np.float32)
if self.task == 'mono3d':
if self.task == 'mono_det':
gt_bboxes_3d = CameraInstance3DBoxes(
ann_info['gt_bboxes_3d'],
box_dim=ann_info['gt_bboxes_3d'].shape[-1],
origin=(0.5, 0.5, 0.5))
else:
# in waymo, lidar2cam = R0_rect @ Tr_velo_to_cam
# convert gt_bboxes_3d to velodyne coordinates with `lidar2cam`
lidar2cam = np.array(
info['images'][self.default_cam_key]['lidar2cam'])
gt_bboxes_3d = CameraInstance3DBoxes(
ann_info['gt_bboxes_3d']).convert_to(self.box_mode_3d,
np.linalg.inv(lidar2cam))
ann_info['gt_bboxes_3d'] = gt_bboxes_3d
anns_results = dict(
gt_bboxes_3d=gt_bboxes_3d,
gt_labels_3d=ann_info['gt_labels_3d'],
gt_bboxes=gt_bboxes,
gt_labels=gt_labels,
gt_bboxes_labels=gt_bboxes_labels,
centers_2d=centers_2d,
depths=depths)
......@@ -181,7 +185,7 @@ class WaymoDataset(KittiDataset):
def parse_data_info(self, info: dict) -> dict:
"""if task is lidar or multiview det, use super() method elif task is
mono3d, split the info from frame-wise to img-wise."""
if self.task != 'mono3d':
if self.task != 'mono_det':
if self.cam_sync_instances:
# use the cam sync labels
info['instances'] = info['cam_sync_instances']
......@@ -217,7 +221,7 @@ class WaymoDataset(KittiDataset):
# TODO check if need to modify the sample id
# TODO check when will use it except for evaluation.
camera_info['sample_id'] = info['sample_id']
camera_info['sample_idx'] = info['sample_idx']
if not self.test_mode:
# used in training
......
# Copyright (c) OpenMMLab. All rights reserved.
from .hooks import Det3DVisualizationHook
from .hooks import BenchmarkHook, Det3DVisualizationHook
__all__ = ['Det3DVisualizationHook']
__all__ = ['Det3DVisualizationHook', 'BenchmarkHook']
# Copyright (c) OpenMMLab. All rights reserved.
from .benchmark_hook import BenchmarkHook
from .visualization_hook import Det3DVisualizationHook
__all__ = ['Det3DVisualizationHook']
__all__ = ['Det3DVisualizationHook', 'BenchmarkHook']
Markdown is supported
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment