Unverified Commit 6c03a971 authored by Tai-Wang's avatar Tai-Wang Committed by GitHub
Browse files

Release v1.1.0rc1

Release v1.1.0rc1
parents 9611c2d0 ca42c312
......@@ -102,10 +102,10 @@ python tools/misc/browse_dataset.py configs/_base_/datasets/kitti-3d-3class.py -
python tools/misc/browse_dataset.py configs/_base_/datasets/kitti-3d-3class.py --task det --aug --output-dir ${OUTPUT_DIR} --online
```
如果您还想显示 2D 图像以及投影的 3D 边界框,则需要找到支持多模态数据加载的配置文件,然后将 `--task` 参数更改为 `multi_modality-det`。一个例子如下所示
如果您还想显示 2D 图像以及投影的 3D 边界框,则需要找到支持多模态数据加载的配置文件,然后将 `--task` 参数更改为 `multi-modality_det`。一个例子如下所示
```shell
python tools/misc/browse_dataset.py configs/mvxnet/mvxnet_fpn_dv_second_secfpn_8xb2-80e_kitti-3d-3class.py --task multi_modality-det --output-dir ${OUTPUT_DIR} --online
python tools/misc/browse_dataset.py configs/mvxnet/mvxnet_fpn_dv_second_secfpn_8xb2-80e_kitti-3d-3class.py --task multi-modality_det --output-dir ${OUTPUT_DIR} --online
```
![](../../resources/browse_dataset_multi_modality.png)
......@@ -121,7 +121,7 @@ python tools/misc/browse_dataset.py configs/_base_/datasets/scannet-seg.py --tas
在单目 3D 检测任务中浏览 nuScenes 数据集
```shell
python tools/misc/browse_dataset.py configs/_base_/datasets/nus-mono3d.py --task mono-det --output-dir ${OUTPUT_DIR} --online
python tools/misc/browse_dataset.py configs/_base_/datasets/nus-mono3d.py --task mono_det --output-dir ${OUTPUT_DIR} --online
```
![](../../resources/browse_dataset_mono.png)
......
......@@ -143,6 +143,7 @@ def inference_detector(model: nn.Module,
# load from point cloud file
data_ = dict(
lidar_points=dict(lidar_path=pcd),
timestamp=1,
# for ScanNet demo we need axis_align_matrix
axis_align_matrix=np.eye(4),
box_type_3d=box_type_3d,
......@@ -151,6 +152,7 @@ def inference_detector(model: nn.Module,
# directly use loaded point cloud
data_ = dict(
points=pcd,
timestamp=1,
# for ScanNet demo we need axis_align_matrix
axis_align_matrix=np.eye(4),
box_type_3d=box_type_3d,
......
# Copyright (c) OpenMMLab. All rights reserved.
from .builder import DATASETS, PIPELINES, build_dataset
from .convert_utils import get_2d_boxes
from .dataset_wrappers import CBGSDataset
from .det3d_dataset import Det3DDataset
from .kitti_dataset import KittiDataset
......@@ -22,8 +21,8 @@ from .transforms import (AffineResize, BackgroundPointsFilter, GlobalAlignment,
ObjectNameFilter, ObjectNoise, ObjectRangeFilter,
ObjectSample, PointSample, PointShuffle,
PointsRangeFilter, RandomDropPointsColor,
RandomFlip3D, RandomJitterPoints, RandomShiftScale,
VoxelBasedPointSampler)
RandomFlip3D, RandomJitterPoints, RandomResize3D,
RandomShiftScale, Resize3D, VoxelBasedPointSampler)
from .utils import get_loading_pipeline
from .waymo_dataset import WaymoDataset
......@@ -40,5 +39,6 @@ __all__ = [
'LoadPointsFromMultiSweeps', 'WaymoDataset', 'BackgroundPointsFilter',
'VoxelBasedPointSampler', 'get_loading_pipeline', 'RandomDropPointsColor',
'RandomJitterPoints', 'ObjectNameFilter', 'AffineResize',
'RandomShiftScale', 'LoadPointsFromDict', 'PIPELINES', 'get_2d_boxes'
'RandomShiftScale', 'LoadPointsFromDict', 'PIPELINES',
'Resize3D', 'RandomResize3D',
]
# Copyright (c) OpenMMLab. All rights reserved.
import copy
from collections import OrderedDict
from typing import List, Tuple, Union
from typing import List, Optional, Tuple, Union
import numpy as np
from nuscenes.utils.geometry_utils import view_points
......@@ -11,6 +11,11 @@ from shapely.geometry import MultiPoint, box
from mmdet3d.structures import Box3DMode, CameraInstance3DBoxes, points_cam2img
from mmdet3d.structures.ops import box_np_ops
kitti_categories = ('Pedestrian', 'Cyclist', 'Car', 'Van', 'Truck',
'Person_sitting', 'Tram', 'Misc')
waymo_categories = ('Car', 'Pedestrian', 'Cyclist')
nus_categories = ('car', 'truck', 'trailer', 'bus', 'construction_vehicle',
'bicycle', 'motorcycle', 'pedestrian', 'traffic_cone',
'barrier')
......@@ -48,8 +53,10 @@ LyftNameMapping = {
}
def get_2d_boxes(nusc, sample_data_token: str, visibilities: List[str]):
"""Get the 2D annotation records for a given `sample_data_token`.
def get_nuscenes_2d_boxes(nusc, sample_data_token: str,
visibilities: List[str]):
"""Get the 2d / mono3d annotation records for a given `sample_data_token of
nuscenes dataset.
Args:
sample_data_token (str): Sample data token belonging to a camera
......@@ -57,7 +64,7 @@ def get_2d_boxes(nusc, sample_data_token: str, visibilities: List[str]):
visibilities (list[str]): Visibility filter.
Return:
list[dict]: List of 2D annotation record that belongs to the input
list[dict]: List of 2d annotation record that belongs to the input
`sample_data_token`.
"""
......@@ -128,7 +135,7 @@ def get_2d_boxes(nusc, sample_data_token: str, visibilities: List[str]):
# Generate dictionary record to be included in the .json file.
repro_rec = generate_record(ann_rec, min_x, min_y, max_x, max_y,
sample_data_token, sd_rec['filename'])
'nuscenes')
# if repro_rec is None, we do not append it into repre_recs
if repro_rec is not None:
......@@ -178,23 +185,36 @@ def get_2d_boxes(nusc, sample_data_token: str, visibilities: List[str]):
return repro_recs
def get_waymo_2d_boxes(info, cam_idx, occluded, annos=None, mono3d=True):
"""Get the 2D annotation records for a given info.
def get_kitti_style_2d_boxes(info: dict,
cam_idx: int = 2,
occluded: Tuple[int] = (0, 1, 2, 3),
annos: Optional[dict] = None,
mono3d: bool = True,
dataset: str = 'kitti'):
"""Get the 2d / mono3d annotation records for a given info.
This function is used to get 2D annotations when loading annotations from
a dataset class. The original version in the data converter will be
deprecated in the future.
This function is used to get 2D/Mono3D annotations when loading annotations
from a kitti-style dataset class, such as KITTI and Waymo dataset.
Args:
info: Information of the given sample data.
occluded: Integer (0, 1, 2, 3) indicating occlusion state:
info (dict): Information of the given sample data.
cam_idx (int): Camera id which the 2d / mono3d annotations to obtain
belong to. In KITTI, typically only CAM 2 will be used,
and in Waymo, multi cameras could be used.
Defaults to 2.
occluded (tuple[int]): Integer (0, 1, 2, 3) indicating occlusion state:
0 = fully visible, 1 = partly occluded, 2 = largely occluded,
3 = unknown, -1 = DontCare
3 = unknown, -1 = DontCare.
Defaults to (0, 1, 2, 3).
annos (dict, optional): Original annotations.
mono3d (bool): Whether to get boxes with mono3d annotation.
Defaults to True.
dataset (str): Dataset name of getting 2d bboxes.
Defaults to `kitti`.
Return:
list[dict]: List of 2D annotation record that belongs to the input
`sample_data_token`.
list[dict]: List of 2d / mono3d annotation record that
belongs to the input camera id.
"""
# Get calibration information
camera_intrinsic = info['calib'][f'P{cam_idx}']
......@@ -224,7 +244,6 @@ def get_waymo_2d_boxes(info, cam_idx, occluded, annos=None, mono3d=True):
ann_rec['sample_annotation_token'] = \
f"{info['image']['image_idx']}.{ann_idx}"
ann_rec['sample_data_token'] = info['image']['image_idx']
sample_data_token = info['image']['image_idx']
loc = ann_rec['location'][np.newaxis, :]
dim = ann_rec['dimensions'][np.newaxis, :]
......@@ -266,9 +285,8 @@ def get_waymo_2d_boxes(info, cam_idx, occluded, annos=None, mono3d=True):
min_x, min_y, max_x, max_y = final_coords
# Generate dictionary record to be included in the .json file.
repro_rec = generate_waymo_mono3d_record(ann_rec, min_x, min_y, max_x,
max_y, sample_data_token,
info['image']['image_path'])
repro_rec = generate_record(ann_rec, min_x, min_y, max_x, max_y,
dataset)
# If mono3d=True, add 3D annotations in camera coordinates
if mono3d and (repro_rec is not None):
......@@ -288,11 +306,7 @@ def get_waymo_2d_boxes(info, cam_idx, occluded, annos=None, mono3d=True):
# samples with depth < 0 will be removed
if repro_rec['depth'] <= 0:
continue
repro_rec['attribute_name'] = -1 # no attribute in KITTI
repro_rec['attribute_id'] = -1
repro_recs.append(repro_rec)
repro_recs.append(repro_rec)
return repro_recs
......@@ -355,7 +369,7 @@ def post_process_coords(
def generate_record(ann_rec: dict, x1: float, y1: float, x2: float, y2: float,
sample_data_token: str, filename: str) -> OrderedDict:
dataset: str) -> OrderedDict:
"""Generate one 2D annotation record given various information on top of
the 2D bounding box coordinates.
......@@ -365,112 +379,40 @@ def generate_record(ann_rec: dict, x1: float, y1: float, x2: float, y2: float,
y1 (float): Minimum value of the y coordinate.
x2 (float): Maximum value of the x coordinate.
y2 (float): Maximum value of the y coordinate.
sample_data_token (str): Sample data token.
filename (str):The corresponding image file where the annotation
is present.
dataset (str): Name of dataset.
Returns:
dict: A sample mono3D annotation record.
- bbox_label (int): 2d box label id
- bbox_label_3d (int): 3d box label id
- bbox (list[float]): left x, top y, right x, bottom y
of 2d box
- bbox_3d_isvalid (bool): whether the box is valid
dict: A sample 2d annotation record.
- bbox_label (int): 2d box label id
- bbox_label_3d (int): 3d box label id
- bbox (list[float]): left x, top y, right x, bottom y
of 2d box
- bbox_3d_isvalid (bool): whether the box is valid
"""
repro_rec = OrderedDict()
repro_rec['sample_data_token'] = sample_data_token
coco_rec = dict()
relevant_keys = [
'attribute_tokens',
'category_name',
'instance_token',
'next',
'num_lidar_pts',
'num_radar_pts',
'prev',
'sample_annotation_token',
'sample_data_token',
'visibility_token',
]
for key, value in ann_rec.items():
if key in relevant_keys:
repro_rec[key] = value
repro_rec['bbox_corners'] = [x1, y1, x2, y2]
repro_rec['filename'] = filename
if repro_rec['category_name'] not in NuScenesNameMapping:
return None
cat_name = NuScenesNameMapping[repro_rec['category_name']]
coco_rec['bbox_label'] = nus_categories.index(cat_name)
coco_rec['bbox_label_3d'] = nus_categories.index(cat_name)
coco_rec['bbox'] = [x1, y1, x2, y2]
coco_rec['bbox_3d_isvalid'] = True
return coco_rec
def generate_waymo_mono3d_record(ann_rec, x1, y1, x2, y2, sample_data_token,
filename):
"""Generate one 2D annotation record given various information on top of
the 2D bounding box coordinates.
The original version in the data converter will be deprecated in the
future.
if dataset == 'nuscenes':
cat_name = ann_rec['category_name']
if cat_name not in NuScenesNameMapping:
return None
else:
cat_name = NuScenesNameMapping[cat_name]
categories = nus_categories
else:
cat_name = ann_rec['name']
if cat_name not in categories:
return None
if dataset == 'kitti':
categories = kitti_categories
elif dataset == 'waymo':
categories = waymo_categories
else:
raise NotImplementedError('Unsupported dataset!')
Args:
ann_rec (dict): Original 3d annotation record.
x1 (float): Minimum value of the x coordinate.
y1 (float): Minimum value of the y coordinate.
x2 (float): Maximum value of the x coordinate.
y2 (float): Maximum value of the y coordinate.
sample_data_token (str): Sample data token.
filename (str):The corresponding image file where the annotation
is present.
rec = dict()
rec['bbox_label'] = categories.index(cat_name)
rec['bbox_label_3d'] = rec['bbox_label']
rec['bbox'] = [x1, y1, x2, y2]
rec['bbox_3d_isvalid'] = True
Returns:
dict: A sample 2D annotation record.
- file_name (str): file name
- image_id (str): sample data token
- area (float): 2d box area
- category_name (str): category name
- category_id (int): category id
- bbox (list[float]): left x, top y, x_size, y_size of 2d box
- iscrowd (int): whether the area is crowd
"""
kitti_categories = ('Car', 'Pedestrian', 'Cyclist')
repro_rec = OrderedDict()
repro_rec['sample_data_token'] = sample_data_token
coco_rec = dict()
key_mapping = {
'name': 'category_name',
'num_points_in_gt': 'num_lidar_pts',
'sample_annotation_token': 'sample_annotation_token',
'sample_data_token': 'sample_data_token',
}
for key, value in ann_rec.items():
if key in key_mapping.keys():
repro_rec[key_mapping[key]] = value
repro_rec['bbox_corners'] = [x1, y1, x2, y2]
repro_rec['filename'] = filename
coco_rec['file_name'] = filename
coco_rec['image_id'] = sample_data_token
coco_rec['area'] = (y2 - y1) * (x2 - x1)
if repro_rec['category_name'] not in kitti_categories:
return None
cat_name = repro_rec['category_name']
coco_rec['category_name'] = cat_name
coco_rec['category_id'] = kitti_categories.index(cat_name)
coco_rec['bbox_label'] = coco_rec['category_id']
coco_rec['bbox_label_3d'] = coco_rec['bbox_label']
coco_rec['bbox'] = [x1, y1, x2 - x1, y2 - y1]
coco_rec['iscrowd'] = 0
return coco_rec
return rec
......@@ -26,11 +26,11 @@ class Det3DDataset(BaseDataset):
metainfo (dict, optional): Meta information for dataset, such as class
information. Defaults to None.
data_prefix (dict, optional): Prefix for training data. Defaults to
dict(pts='velodyne', img="").
dict(pts='velodyne', img='').
pipeline (list[dict], optional): Pipeline used for data processing.
Defaults to None.
modality (dict, optional): Modality to specify the sensor data used
as input, it usually has following keys.
as input, it usually has following keys:
- use_camera: bool
- use_lidar: bool
......@@ -40,7 +40,7 @@ class Det3DDataset(BaseDataset):
box_type_3d (str, optional): Type of 3D box of this dataset.
Based on the `box_type_3d`, the dataset will encapsulate the box
to its original format then converted them to `box_type_3d`.
Defaults to 'LiDAR'. Available options includes
Defaults to 'LiDAR'. Available options includes:
- 'LiDAR': Box in LiDAR coordinates, usually for
outdoor point cloud 3d detection.
......@@ -49,15 +49,15 @@ class Det3DDataset(BaseDataset):
- 'Camera': Box in camera coordinates, usually
for vision-based 3d detection.
filter_empty_gt (bool): Whether to filter the data with
filter_empty_gt (bool, optional): Whether to filter the data with
empty GT. Defaults to True.
test_mode (bool): Whether the dataset is in test mode.
test_mode (bool, optional): Whether the dataset is in test mode.
Defaults to False.
load_eval_anns (bool): Whether to load annotations
in test_mode, the annotation will be save in
`eval_ann_infos`, which can be use in Evaluator.
file_client_args (dict): Configuration of file client.
Defaults to `dict(backend='disk')`.
load_eval_anns (bool, optional): Whether to load annotations
in test_mode, the annotation will be save in `eval_ann_infos`,
which can be used in Evaluator. Defaults to True.
file_client_args (dict, optional): Configuration of file client.
Defaults to dict(backend='disk').
"""
def __init__(self,
......@@ -73,7 +73,7 @@ class Det3DDataset(BaseDataset):
test_mode: bool = False,
load_eval_anns=True,
file_client_args: dict = dict(backend='disk'),
**kwargs):
**kwargs) -> None:
# init file client
self.file_client = mmengine.FileClient(**file_client_args)
self.filter_empty_gt = filter_empty_gt
......@@ -125,7 +125,7 @@ class Det3DDataset(BaseDataset):
self.metainfo['box_type_3d'] = box_type_3d
self.metainfo['label_mapping'] = self.label_mapping
def _remove_dontcare(self, ann_info):
def _remove_dontcare(self, ann_info: dict) -> dict:
"""Remove annotations that do not need to be cared.
-1 indicate dontcare in MMDet3d.
......@@ -192,7 +192,8 @@ class Det3DDataset(BaseDataset):
'bbox_3d': 'gt_bboxes_3d',
'depth': 'depths',
'center_2d': 'centers_2d',
'attr_label': 'attr_labels'
'attr_label': 'attr_labels',
'velocity': 'velocities',
}
instances = info['instances']
# empty gt
......@@ -209,14 +210,18 @@ class Det3DDataset(BaseDataset):
self.label_mapping[item] for item in temp_anns
]
if ann_name in name_mapping:
ann_name = name_mapping[ann_name]
mapped_ann_name = name_mapping[ann_name]
else:
mapped_ann_name = ann_name
if 'label' in ann_name:
temp_anns = np.array(temp_anns).astype(np.int64)
else:
elif ann_name in name_mapping:
temp_anns = np.array(temp_anns).astype(np.float32)
else:
temp_anns = np.array(temp_anns)
ann_info[ann_name] = temp_anns
ann_info[mapped_ann_name] = temp_anns
ann_info['instances'] = info['instances']
return ann_info
......@@ -241,6 +246,7 @@ class Det3DDataset(BaseDataset):
self.data_prefix.get('pts', ''),
info['lidar_points']['lidar_path'])
info['num_pts_feats'] = info['lidar_points']['num_pts_feats']
info['lidar_path'] = info['lidar_points']['lidar_path']
if 'lidar_sweeps' in info:
for sweep in info['lidar_sweeps']:
......@@ -285,7 +291,7 @@ class Det3DDataset(BaseDataset):
return info
def prepare_data(self, index):
def prepare_data(self, index: int) -> Optional[dict]:
"""Data preparation for both training and testing stage.
Called by `__getitem__` of dataset.
......@@ -294,7 +300,7 @@ class Det3DDataset(BaseDataset):
index (int): Index for accessing the target data.
Returns:
dict: Data dict of the corresponding index.
dict | None: Data dict of the corresponding index.
"""
input_dict = self.get_data_info(index)
......
# Copyright (c) OpenMMLab. All rights reserved.
from typing import Callable, List, Optional, Union
from typing import Callable, List, Union
import numpy as np
......@@ -22,11 +22,12 @@ class KittiDataset(Det3DDataset):
Defaults to None.
modality (dict, optional): Modality to specify the sensor data used
as input. Defaults to `dict(use_lidar=True)`.
default_cam_key (str, optional): The default camera name adopted.
Defaults to 'CAM2'.
box_type_3d (str, optional): Type of 3D box of this dataset.
Based on the `box_type_3d`, the dataset will encapsulate the box
to its original format then converted them to `box_type_3d`.
Defaults to 'LiDAR' in this dataset. Available options includes
Defaults to 'LiDAR' in this dataset. Available options includes:
- 'LiDAR': Box in LiDAR coordinates.
- 'Depth': Box in depth coordinates, usually for indoor dataset.
......@@ -35,9 +36,9 @@ class KittiDataset(Det3DDataset):
Defaults to True.
test_mode (bool, optional): Whether the dataset is in test mode.
Defaults to False.
pcd_limit_range (list, optional): The range of point cloud used to
filter invalid predicted boxes.
Default: [0, -40, -3, 70.4, 40, 0.0].
pcd_limit_range (list[float], optional): The range of point cloud
used to filter invalid predicted boxes.
Defaults to [0, -40, -3, 70.4, 40, 0.0].
"""
# TODO: use full classes of kitti
METAINFO = {
......@@ -49,15 +50,18 @@ class KittiDataset(Det3DDataset):
data_root: str,
ann_file: str,
pipeline: List[Union[dict, Callable]] = [],
modality: Optional[dict] = dict(use_lidar=True),
modality: dict = dict(use_lidar=True),
default_cam_key: str = 'CAM2',
task: str = 'lidar_det',
box_type_3d: str = 'LiDAR',
filter_empty_gt: bool = True,
test_mode: bool = False,
pcd_limit_range: List[float] = [0, -40, -3, 70.4, 40, 0.0],
**kwargs):
**kwargs) -> None:
self.pcd_limit_range = pcd_limit_range
assert task in ('lidar_det', 'mono_det')
self.task = task
super().__init__(
data_root=data_root,
ann_file=ann_file,
......@@ -107,11 +111,14 @@ class KittiDataset(Det3DDataset):
info['plane'] = plane_lidar
if self.task == 'mono_det':
info['instances'] = info['cam_instances'][self.default_cam_key]
info = super().parse_data_info(info)
return info
def parse_ann_info(self, info):
def parse_ann_info(self, info: dict) -> dict:
"""Get annotation info according to the given index.
Args:
......@@ -135,6 +142,12 @@ class KittiDataset(Det3DDataset):
ann_info['gt_bboxes_3d'] = np.zeros((0, 7), dtype=np.float32)
ann_info['gt_labels_3d'] = np.zeros(0, dtype=np.int64)
if self.task == 'mono_det':
ann_info['gt_bboxes'] = np.zeros((0, 4), dtype=np.float32)
ann_info['gt_bboxes_labels'] = np.array(0, dtype=np.int64)
ann_info['centers_2d'] = np.zeros((0, 2), dtype=np.float32)
ann_info['depths'] = np.zeros((0), dtype=np.float32)
ann_info = self._remove_dontcare(ann_info)
# in kitti, lidar2cam = R0_rect @ Tr_velo_to_cam
lidar2cam = np.array(info['images']['CAM2']['lidar2cam'])
......
# Copyright (c) OpenMMLab. All rights reserved.
from typing import Dict, List
from typing import Callable, List, Union
import numpy as np
......@@ -24,18 +24,18 @@ class LyftDataset(Det3DDataset):
pipeline (list[dict], optional): Pipeline used for data processing.
Defaults to None.
modality (dict, optional): Modality to specify the sensor data used
as input. Defaults to None.
as input. Defaults to dict(use_camera=False, use_lidar=True).
box_type_3d (str): Type of 3D box of this dataset.
Based on the `box_type_3d`, the dataset will encapsulate the box
to its original format then converted them to `box_type_3d`.
Defaults to 'LiDAR' in this dataset. Available options includes
Defaults to 'LiDAR' in this dataset. Available options includes:
- 'LiDAR': Box in LiDAR coordinates.
- 'Depth': Box in depth coordinates, usually for indoor dataset.
- 'Camera': Box in camera coordinates.
filter_empty_gt (bool): Whether to filter empty GT.
filter_empty_gt (bool, optional): Whether to filter empty GT.
Defaults to True.
test_mode (bool): Whether the dataset is in test mode.
test_mode (bool, optional): Whether the dataset is in test mode.
Defaults to False.
"""
......@@ -48,8 +48,8 @@ class LyftDataset(Det3DDataset):
def __init__(self,
data_root: str,
ann_file: str,
pipeline: List[dict] = None,
modality: Dict = dict(use_camera=False, use_lidar=True),
pipeline: List[Union[dict, Callable]] = [],
modality: dict = dict(use_camera=False, use_lidar=True),
box_type_3d: str = 'LiDAR',
filter_empty_gt: bool = True,
test_mode: bool = False,
......
# Copyright (c) OpenMMLab. All rights reserved.
from os import path as osp
from typing import Dict, List
from typing import Callable, List, Union
import numpy as np
......@@ -22,25 +22,26 @@ class NuScenesDataset(Det3DDataset):
Args:
data_root (str): Path of dataset root.
ann_file (str): Path of annotation file.
task (str, optional): Detection task. Defaults to 'lidar_det'.
pipeline (list[dict], optional): Pipeline used for data processing.
Defaults to None.
box_type_3d (str): Type of 3D box of this dataset.
Based on the `box_type_3d`, the dataset will encapsulate the box
to its original format then converted them to `box_type_3d`.
Defaults to 'LiDAR' in this dataset. Available options includes.
Defaults to 'LiDAR' in this dataset. Available options includes:
- 'LiDAR': Box in LiDAR coordinates.
- 'Depth': Box in depth coordinates, usually for indoor dataset.
- 'Camera': Box in camera coordinates.
modality (dict, optional): Modality to specify the sensor data used
as input. Defaults to dict(use_camera=False,use_lidar=True).
filter_empty_gt (bool): Whether to filter empty GT.
as input. Defaults to dict(use_camera=False, use_lidar=True).
filter_empty_gt (bool, optional): Whether to filter empty GT.
Defaults to True.
test_mode (bool): Whether the dataset is in test mode.
test_mode (bool, optional): Whether the dataset is in test mode.
Defaults to False.
with_velocity (bool): Whether include velocity prediction
with_velocity (bool, optional): Whether to include velocity prediction
into the experiments. Defaults to True.
use_valid_flag (bool): Whether to use `use_valid_flag` key
use_valid_flag (bool, optional): Whether to use `use_valid_flag` key
in the info file as mask to filter gt_boxes and gt_names.
Defaults to False.
"""
......@@ -55,10 +56,10 @@ class NuScenesDataset(Det3DDataset):
def __init__(self,
data_root: str,
ann_file: str,
task: str = '3d',
pipeline: List[dict] = None,
task: str = 'lidar_det',
pipeline: List[Union[dict, Callable]] = [],
box_type_3d: str = 'LiDAR',
modality: Dict = dict(
modality: dict = dict(
use_camera=False,
use_lidar=True,
),
......@@ -66,12 +67,12 @@ class NuScenesDataset(Det3DDataset):
test_mode: bool = False,
with_velocity: bool = True,
use_valid_flag: bool = False,
**kwargs):
**kwargs) -> None:
self.use_valid_flag = use_valid_flag
self.with_velocity = with_velocity
# TODO: Redesign multi-view data process in the future
assert task in ('3d', 'mono3d', 'multi-view')
assert task in ('lidar_det', 'mono_det', 'multi-view_det')
self.task = task
assert box_type_3d.lower() in ('lidar', 'camera')
......@@ -85,6 +86,27 @@ class NuScenesDataset(Det3DDataset):
test_mode=test_mode,
**kwargs)
def _filter_with_mask(self, ann_info: dict) -> dict:
"""Remove annotations that do not need to be cared.
Args:
ann_info (dict): Dict of annotation infos.
Returns:
dict: Annotations after filtering.
"""
filtered_annotations = {}
if self.use_valid_flag:
filter_mask = ann_info['bbox_3d_isvalid']
else:
filter_mask = ann_info['num_lidar_pts'] > 0
for key in ann_info.keys():
if key != 'instances':
filtered_annotations[key] = (ann_info[key][filter_mask])
else:
filtered_annotations[key] = ann_info[key]
return filtered_annotations
def parse_ann_info(self, info: dict) -> dict:
"""Get annotation info according to the given index.
......@@ -99,66 +121,51 @@ class NuScenesDataset(Det3DDataset):
- gt_labels_3d (np.ndarray): Labels of ground truths.
"""
ann_info = super().parse_ann_info(info)
if ann_info is None:
# empty instance
anns_results = dict()
anns_results['gt_bboxes_3d'] = np.zeros((0, 7), dtype=np.float32)
anns_results['gt_labels_3d'] = np.zeros(0, dtype=np.int64)
return anns_results
if self.use_valid_flag:
mask = ann_info['bbox_3d_isvalid']
else:
mask = ann_info['num_lidar_pts'] > 0
gt_bboxes_3d = ann_info['gt_bboxes_3d'][mask]
gt_labels_3d = ann_info['gt_labels_3d'][mask]
if 'gt_bboxes' in ann_info:
gt_bboxes = ann_info['gt_bboxes'][mask]
gt_labels = ann_info['gt_labels'][mask]
attr_labels = ann_info['attr_labels'][mask]
if ann_info is not None:
ann_info = self._filter_with_mask(ann_info)
if self.with_velocity:
gt_bboxes_3d = ann_info['gt_bboxes_3d']
gt_velocities = ann_info['velocities']
nan_mask = np.isnan(gt_velocities[:, 0])
gt_velocities[nan_mask] = [0.0, 0.0]
gt_bboxes_3d = np.concatenate([gt_bboxes_3d, gt_velocities],
axis=-1)
ann_info['gt_bboxes_3d'] = gt_bboxes_3d
else:
gt_bboxes = np.zeros((0, 4), dtype=np.float32)
gt_labels = np.array([], dtype=np.int64)
attr_labels = np.array([], dtype=np.int64)
if 'centers_2d' in ann_info:
centers_2d = ann_info['centers_2d'][mask]
depths = ann_info['depths'][mask]
else:
centers_2d = np.zeros((0, 2), dtype=np.float32)
depths = np.zeros((0), dtype=np.float32)
if self.with_velocity:
gt_velocity = ann_info['velocity'][mask]
nan_mask = np.isnan(gt_velocity[:, 0])
gt_velocity[nan_mask] = [0.0, 0.0]
gt_bboxes_3d = np.concatenate([gt_bboxes_3d, gt_velocity], axis=-1)
# empty instance
ann_info = dict()
if self.with_velocity:
ann_info['gt_bboxes_3d'] = np.zeros((0, 9), dtype=np.float32)
else:
ann_info['gt_bboxes_3d'] = np.zeros((0, 7), dtype=np.float32)
ann_info['gt_labels_3d'] = np.zeros(0, dtype=np.int64)
if self.task == 'mono3d':
ann_info['gt_bboxes'] = np.zeros((0, 4), dtype=np.float32)
ann_info['gt_bboxes_labels'] = np.array(0, dtype=np.int64)
ann_info['attr_labels'] = np.array(0, dtype=np.int64)
ann_info['centers_2d'] = np.zeros((0, 2), dtype=np.float32)
ann_info['depths'] = np.zeros((0), dtype=np.float32)
# the nuscenes box center is [0.5, 0.5, 0.5], we change it to be
# the same as KITTI (0.5, 0.5, 0)
# TODO: Unify the coordinates
if self.task == 'mono3d':
if self.task == 'mono_det':
gt_bboxes_3d = CameraInstance3DBoxes(
gt_bboxes_3d,
box_dim=gt_bboxes_3d.shape[-1],
ann_info['gt_bboxes_3d'],
box_dim=ann_info['gt_bboxes_3d'].shape[-1],
origin=(0.5, 0.5, 0.5))
else:
gt_bboxes_3d = LiDARInstance3DBoxes(
gt_bboxes_3d,
box_dim=gt_bboxes_3d.shape[-1],
ann_info['gt_bboxes_3d'],
box_dim=ann_info['gt_bboxes_3d'].shape[-1],
origin=(0.5, 0.5, 0.5)).convert_to(self.box_mode_3d)
anns_results = dict(
gt_bboxes_3d=gt_bboxes_3d,
gt_labels_3d=gt_labels_3d,
gt_bboxes=gt_bboxes,
gt_labels=gt_labels,
attr_labels=attr_labels,
centers_2d=centers_2d,
depths=depths)
ann_info['gt_bboxes_3d'] = gt_bboxes_3d
return anns_results
return ann_info
def parse_data_info(self, info: dict) -> dict:
"""Process the raw data info.
......@@ -173,7 +180,7 @@ class NuScenesDataset(Det3DDataset):
dict: Has `ann_info` in training stage. And
all path has been converted to absolute path.
"""
if self.task == 'mono3d':
if self.task == 'mono_det':
data_list = []
if self.modality['use_lidar']:
info['lidar_points']['lidar_path'] = \
......
......@@ -36,7 +36,7 @@ class ScanNetDataset(Det3DDataset):
box_type_3d (str): Type of 3D box of this dataset.
Based on the `box_type_3d`, the dataset will encapsulate the box
to its original format then converted them to `box_type_3d`.
Defaults to 'Depth' in this dataset. Available options includes
Defaults to 'Depth' in this dataset. Available options includes:
- 'LiDAR': Box in LiDAR coordinates.
- 'Depth': Box in depth coordinates, usually for indoor dataset.
......@@ -61,13 +61,13 @@ class ScanNetDataset(Det3DDataset):
def __init__(self,
data_root: str,
ann_file: str,
metainfo: dict = None,
metainfo: Optional[dict] = None,
data_prefix: dict = dict(
pts='points',
pts_instance_mask='instance_mask',
pts_semantic_mask='semantic_mask'),
pipeline: List[Union[dict, Callable]] = [],
modality=dict(use_camera=False, use_lidar=True),
modality: dict = dict(use_camera=False, use_lidar=True),
box_type_3d: str = 'Depth',
filter_empty_gt: bool = True,
test_mode: bool = False,
......@@ -101,7 +101,7 @@ class ScanNetDataset(Det3DDataset):
assert self.modality['use_camera'] or self.modality['use_lidar']
@staticmethod
def _get_axis_align_matrix(info: dict) -> dict:
def _get_axis_align_matrix(info: dict) -> np.ndarray:
"""Get axis_align_matrix from info. If not exist, return identity mat.
Args:
......
......@@ -24,25 +24,25 @@ class SUNRGBDDataset(Det3DDataset):
ann_file (str): Path of annotation file.
metainfo (dict, optional): Meta information for dataset, such as class
information. Defaults to None.
data_prefix (dict): Prefix for data. Defaults to
`dict(pts='points',img='sunrgbd_trainval')`.
data_prefix (dict, optiona;): Prefix for data. Defaults to
dict(pts='points',img='sunrgbd_trainval').
pipeline (list[dict], optional): Pipeline used for data processing.
Defaults to None.
modality (dict, optional): Modality to specify the sensor data used
as input. Defaults to `dict(use_camera=True, use_lidar=True)`.
default_cam_key (str): The default camera name adopted.
Defaults to "CAM0".
as input. Defaults to dict(use_camera=True, use_lidar=True).
default_cam_key (str, optional): The default camera name adopted.
Defaults to 'CAM0'.
box_type_3d (str): Type of 3D box of this dataset.
Based on the `box_type_3d`, the dataset will encapsulate the box
to its original format then converted them to `box_type_3d`.
Defaults to 'Depth' in this dataset. Available options includes
Defaults to 'Depth' in this dataset. Available options includes:
- 'LiDAR': Box in LiDAR coordinates.
- 'Depth': Box in depth coordinates, usually for indoor dataset.
- 'Camera': Box in camera coordinates.
filter_empty_gt (bool): Whether to filter empty GT.
filter_empty_gt (bool, optional): Whether to filter empty GT.
Defaults to True.
test_mode (bool): Whether the dataset is in test mode.
test_mode (bool, optional): Whether the dataset is in test mode.
Defaults to False.
"""
METAINFO = {
......
......@@ -11,11 +11,12 @@ from .test_time_aug import MultiScaleFlipAug3D
from .transforms_3d import (AffineResize, BackgroundPointsFilter,
GlobalAlignment, GlobalRotScaleTrans,
IndoorPatchPointSample, IndoorPointSample,
ObjectNameFilter, ObjectNoise, ObjectRangeFilter,
ObjectSample, PointSample, PointShuffle,
MultiViewWrapper, ObjectNameFilter, ObjectNoise,
ObjectRangeFilter, ObjectSample,
PhotoMetricDistortion3D, PointSample, PointShuffle,
PointsRangeFilter, RandomDropPointsColor,
RandomFlip3D, RandomJitterPoints, RandomShiftScale,
VoxelBasedPointSampler)
RandomFlip3D, RandomJitterPoints, RandomResize3D,
RandomShiftScale, Resize3D, VoxelBasedPointSampler)
__all__ = [
'ObjectSample', 'RandomFlip3D', 'ObjectNoise', 'GlobalRotScaleTrans',
......@@ -29,5 +30,6 @@ __all__ = [
'VoxelBasedPointSampler', 'GlobalAlignment', 'IndoorPatchPointSample',
'LoadImageFromFileMono3D', 'ObjectNameFilter', 'RandomDropPointsColor',
'RandomJitterPoints', 'AffineResize', 'RandomShiftScale',
'LoadPointsFromDict'
'LoadPointsFromDict', 'Resize3D', 'RandomResize3D',
'MultiViewWrapper', 'PhotoMetricDistortion3D'
]
......@@ -32,7 +32,7 @@ class Compose:
data (dict): A result dict contains the data to transform.
Returns:
dict: Transformed data.
dict: Transformed data.
"""
for t in self.transforms:
......
This diff is collapsed.
......@@ -63,15 +63,20 @@ class Pack3DDetInputs(BaseTransform):
def __init__(
self,
keys: dict,
meta_keys: dict = ('img_path', 'ori_shape', 'img_shape', 'lidar2img',
'depth2img', 'cam2img', 'pad_shape', 'scale_factor',
'flip', 'pcd_horizontal_flip', 'pcd_vertical_flip',
'box_mode_3d', 'box_type_3d', 'img_norm_cfg',
'pcd_trans', 'sample_idx', 'pcd_scale_factor',
'pcd_rotation', 'pcd_rotation_angle', 'lidar_path',
'transformation_3d_flow', 'trans_mat',
'affine_aug')):
keys: tuple,
meta_keys: tuple = ('img_path', 'ori_shape', 'img_shape', 'lidar2img',
'depth2img', 'cam2img', 'pad_shape',
'scale_factor', 'flip', 'pcd_horizontal_flip',
'pcd_vertical_flip', 'box_mode_3d', 'box_type_3d',
'img_norm_cfg', 'num_pts_feats', 'pcd_trans',
'sample_idx', 'pcd_scale_factor', 'pcd_rotation',
'pcd_rotation_angle', 'lidar_path',
'transformation_3d_flow', 'trans_mat',
'affine_aug', 'sweep_img_metas', 'ori_cam2img',
'cam2global', 'crop_offset', 'img_crop_offset',
'resize_img_shape', 'lidar2cam', 'ori_lidar2img',
'num_ref_frames', 'num_views', 'ego2global')
) -> None:
self.keys = keys
self.meta_keys = meta_keys
......@@ -98,7 +103,7 @@ class Pack3DDetInputs(BaseTransform):
- img
- 'data_samples' (obj:`Det3DDataSample`): The annotation info of
the sample.
the sample.
"""
# augtest
if isinstance(results, list):
......@@ -115,7 +120,7 @@ class Pack3DDetInputs(BaseTransform):
else:
raise NotImplementedError
def pack_single_results(self, results):
def pack_single_results(self, results: dict) -> dict:
"""Method to pack the single input data. when the value in this dict is
a list, it usually is in Augmentations Testing.
......@@ -131,7 +136,7 @@ class Pack3DDetInputs(BaseTransform):
- points
- img
- 'data_samples' (obj:`Det3DDataSample`): The annotation info
- 'data_samples' (:obj:`Det3DDataSample`): The annotation info
of the sample.
"""
# Format 3D data
......@@ -219,6 +224,7 @@ class Pack3DDetInputs(BaseTransform):
return packed_results
def __repr__(self) -> str:
"""str: Return a string that describes the module."""
repr_str = self.__class__.__name__
repr_str += f'(keys={self.keys})'
repr_str += f'(meta_keys={self.meta_keys})'
......
This diff is collapsed.
......@@ -16,7 +16,7 @@ class MultiScaleFlipAug3D(BaseTransform):
Args:
transforms (list[dict]): Transforms to apply in each augmentation.
img_scale (tuple | list[tuple]: Images scales for resizing.
img_scale (tuple | list[tuple]): Images scales for resizing.
pts_scale_ratio (float | list[float]): Points scale ratios for
resizing.
flip (bool, optional): Whether apply flip augmentation.
......@@ -25,11 +25,11 @@ class MultiScaleFlipAug3D(BaseTransform):
directions for images, options are "horizontal" and "vertical".
If flip_direction is list, multiple flip augmentations will
be applied. It has no effect when ``flip == False``.
Defaults to "horizontal".
pcd_horizontal_flip (bool, optional): Whether apply horizontal
Defaults to 'horizontal'.
pcd_horizontal_flip (bool, optional): Whether to apply horizontal
flip augmentation to point cloud. Defaults to True.
Note that it works only when 'flip' is turned on.
pcd_vertical_flip (bool, optional): Whether apply vertical flip
pcd_vertical_flip (bool, optional): Whether to apply vertical flip
augmentation to point cloud. Defaults to True.
Note that it works only when 'flip' is turned on.
"""
......@@ -46,7 +46,7 @@ class MultiScaleFlipAug3D(BaseTransform):
self.img_scale = img_scale if isinstance(img_scale,
list) else [img_scale]
self.pts_scale_ratio = pts_scale_ratio \
if isinstance(pts_scale_ratio, list) else[float(pts_scale_ratio)]
if isinstance(pts_scale_ratio, list) else [float(pts_scale_ratio)]
assert mmengine.is_list_of(self.img_scale, tuple)
assert mmengine.is_list_of(self.pts_scale_ratio, float)
......
This diff is collapsed.
This diff is collapsed.
This diff is collapsed.
Markdown is supported
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment