Unverified Commit 6c03a971 authored by Tai-Wang's avatar Tai-Wang Committed by GitHub
Browse files

Release v1.1.0rc1

Release v1.1.0rc1
parents 9611c2d0 ca42c312
...@@ -102,10 +102,10 @@ python tools/misc/browse_dataset.py configs/_base_/datasets/kitti-3d-3class.py - ...@@ -102,10 +102,10 @@ python tools/misc/browse_dataset.py configs/_base_/datasets/kitti-3d-3class.py -
python tools/misc/browse_dataset.py configs/_base_/datasets/kitti-3d-3class.py --task det --aug --output-dir ${OUTPUT_DIR} --online python tools/misc/browse_dataset.py configs/_base_/datasets/kitti-3d-3class.py --task det --aug --output-dir ${OUTPUT_DIR} --online
``` ```
如果您还想显示 2D 图像以及投影的 3D 边界框,则需要找到支持多模态数据加载的配置文件,然后将 `--task` 参数更改为 `multi_modality-det`。一个例子如下所示 如果您还想显示 2D 图像以及投影的 3D 边界框,则需要找到支持多模态数据加载的配置文件,然后将 `--task` 参数更改为 `multi-modality_det`。一个例子如下所示
```shell ```shell
python tools/misc/browse_dataset.py configs/mvxnet/mvxnet_fpn_dv_second_secfpn_8xb2-80e_kitti-3d-3class.py --task multi_modality-det --output-dir ${OUTPUT_DIR} --online python tools/misc/browse_dataset.py configs/mvxnet/mvxnet_fpn_dv_second_secfpn_8xb2-80e_kitti-3d-3class.py --task multi-modality_det --output-dir ${OUTPUT_DIR} --online
``` ```
![](../../resources/browse_dataset_multi_modality.png) ![](../../resources/browse_dataset_multi_modality.png)
...@@ -121,7 +121,7 @@ python tools/misc/browse_dataset.py configs/_base_/datasets/scannet-seg.py --tas ...@@ -121,7 +121,7 @@ python tools/misc/browse_dataset.py configs/_base_/datasets/scannet-seg.py --tas
在单目 3D 检测任务中浏览 nuScenes 数据集 在单目 3D 检测任务中浏览 nuScenes 数据集
```shell ```shell
python tools/misc/browse_dataset.py configs/_base_/datasets/nus-mono3d.py --task mono-det --output-dir ${OUTPUT_DIR} --online python tools/misc/browse_dataset.py configs/_base_/datasets/nus-mono3d.py --task mono_det --output-dir ${OUTPUT_DIR} --online
``` ```
![](../../resources/browse_dataset_mono.png) ![](../../resources/browse_dataset_mono.png)
......
...@@ -143,6 +143,7 @@ def inference_detector(model: nn.Module, ...@@ -143,6 +143,7 @@ def inference_detector(model: nn.Module,
# load from point cloud file # load from point cloud file
data_ = dict( data_ = dict(
lidar_points=dict(lidar_path=pcd), lidar_points=dict(lidar_path=pcd),
timestamp=1,
# for ScanNet demo we need axis_align_matrix # for ScanNet demo we need axis_align_matrix
axis_align_matrix=np.eye(4), axis_align_matrix=np.eye(4),
box_type_3d=box_type_3d, box_type_3d=box_type_3d,
...@@ -151,6 +152,7 @@ def inference_detector(model: nn.Module, ...@@ -151,6 +152,7 @@ def inference_detector(model: nn.Module,
# directly use loaded point cloud # directly use loaded point cloud
data_ = dict( data_ = dict(
points=pcd, points=pcd,
timestamp=1,
# for ScanNet demo we need axis_align_matrix # for ScanNet demo we need axis_align_matrix
axis_align_matrix=np.eye(4), axis_align_matrix=np.eye(4),
box_type_3d=box_type_3d, box_type_3d=box_type_3d,
......
# Copyright (c) OpenMMLab. All rights reserved. # Copyright (c) OpenMMLab. All rights reserved.
from .builder import DATASETS, PIPELINES, build_dataset from .builder import DATASETS, PIPELINES, build_dataset
from .convert_utils import get_2d_boxes
from .dataset_wrappers import CBGSDataset from .dataset_wrappers import CBGSDataset
from .det3d_dataset import Det3DDataset from .det3d_dataset import Det3DDataset
from .kitti_dataset import KittiDataset from .kitti_dataset import KittiDataset
...@@ -22,8 +21,8 @@ from .transforms import (AffineResize, BackgroundPointsFilter, GlobalAlignment, ...@@ -22,8 +21,8 @@ from .transforms import (AffineResize, BackgroundPointsFilter, GlobalAlignment,
ObjectNameFilter, ObjectNoise, ObjectRangeFilter, ObjectNameFilter, ObjectNoise, ObjectRangeFilter,
ObjectSample, PointSample, PointShuffle, ObjectSample, PointSample, PointShuffle,
PointsRangeFilter, RandomDropPointsColor, PointsRangeFilter, RandomDropPointsColor,
RandomFlip3D, RandomJitterPoints, RandomShiftScale, RandomFlip3D, RandomJitterPoints, RandomResize3D,
VoxelBasedPointSampler) RandomShiftScale, Resize3D, VoxelBasedPointSampler)
from .utils import get_loading_pipeline from .utils import get_loading_pipeline
from .waymo_dataset import WaymoDataset from .waymo_dataset import WaymoDataset
...@@ -40,5 +39,6 @@ __all__ = [ ...@@ -40,5 +39,6 @@ __all__ = [
'LoadPointsFromMultiSweeps', 'WaymoDataset', 'BackgroundPointsFilter', 'LoadPointsFromMultiSweeps', 'WaymoDataset', 'BackgroundPointsFilter',
'VoxelBasedPointSampler', 'get_loading_pipeline', 'RandomDropPointsColor', 'VoxelBasedPointSampler', 'get_loading_pipeline', 'RandomDropPointsColor',
'RandomJitterPoints', 'ObjectNameFilter', 'AffineResize', 'RandomJitterPoints', 'ObjectNameFilter', 'AffineResize',
'RandomShiftScale', 'LoadPointsFromDict', 'PIPELINES', 'get_2d_boxes' 'RandomShiftScale', 'LoadPointsFromDict', 'PIPELINES',
'Resize3D', 'RandomResize3D',
] ]
# Copyright (c) OpenMMLab. All rights reserved. # Copyright (c) OpenMMLab. All rights reserved.
import copy import copy
from collections import OrderedDict from collections import OrderedDict
from typing import List, Tuple, Union from typing import List, Optional, Tuple, Union
import numpy as np import numpy as np
from nuscenes.utils.geometry_utils import view_points from nuscenes.utils.geometry_utils import view_points
...@@ -11,6 +11,11 @@ from shapely.geometry import MultiPoint, box ...@@ -11,6 +11,11 @@ from shapely.geometry import MultiPoint, box
from mmdet3d.structures import Box3DMode, CameraInstance3DBoxes, points_cam2img from mmdet3d.structures import Box3DMode, CameraInstance3DBoxes, points_cam2img
from mmdet3d.structures.ops import box_np_ops from mmdet3d.structures.ops import box_np_ops
kitti_categories = ('Pedestrian', 'Cyclist', 'Car', 'Van', 'Truck',
'Person_sitting', 'Tram', 'Misc')
waymo_categories = ('Car', 'Pedestrian', 'Cyclist')
nus_categories = ('car', 'truck', 'trailer', 'bus', 'construction_vehicle', nus_categories = ('car', 'truck', 'trailer', 'bus', 'construction_vehicle',
'bicycle', 'motorcycle', 'pedestrian', 'traffic_cone', 'bicycle', 'motorcycle', 'pedestrian', 'traffic_cone',
'barrier') 'barrier')
...@@ -48,8 +53,10 @@ LyftNameMapping = { ...@@ -48,8 +53,10 @@ LyftNameMapping = {
} }
def get_2d_boxes(nusc, sample_data_token: str, visibilities: List[str]): def get_nuscenes_2d_boxes(nusc, sample_data_token: str,
"""Get the 2D annotation records for a given `sample_data_token`. visibilities: List[str]):
"""Get the 2d / mono3d annotation records for a given `sample_data_token of
nuscenes dataset.
Args: Args:
sample_data_token (str): Sample data token belonging to a camera sample_data_token (str): Sample data token belonging to a camera
...@@ -57,7 +64,7 @@ def get_2d_boxes(nusc, sample_data_token: str, visibilities: List[str]): ...@@ -57,7 +64,7 @@ def get_2d_boxes(nusc, sample_data_token: str, visibilities: List[str]):
visibilities (list[str]): Visibility filter. visibilities (list[str]): Visibility filter.
Return: Return:
list[dict]: List of 2D annotation record that belongs to the input list[dict]: List of 2d annotation record that belongs to the input
`sample_data_token`. `sample_data_token`.
""" """
...@@ -128,7 +135,7 @@ def get_2d_boxes(nusc, sample_data_token: str, visibilities: List[str]): ...@@ -128,7 +135,7 @@ def get_2d_boxes(nusc, sample_data_token: str, visibilities: List[str]):
# Generate dictionary record to be included in the .json file. # Generate dictionary record to be included in the .json file.
repro_rec = generate_record(ann_rec, min_x, min_y, max_x, max_y, repro_rec = generate_record(ann_rec, min_x, min_y, max_x, max_y,
sample_data_token, sd_rec['filename']) 'nuscenes')
# if repro_rec is None, we do not append it into repre_recs # if repro_rec is None, we do not append it into repre_recs
if repro_rec is not None: if repro_rec is not None:
...@@ -178,23 +185,36 @@ def get_2d_boxes(nusc, sample_data_token: str, visibilities: List[str]): ...@@ -178,23 +185,36 @@ def get_2d_boxes(nusc, sample_data_token: str, visibilities: List[str]):
return repro_recs return repro_recs
def get_waymo_2d_boxes(info, cam_idx, occluded, annos=None, mono3d=True): def get_kitti_style_2d_boxes(info: dict,
"""Get the 2D annotation records for a given info. cam_idx: int = 2,
occluded: Tuple[int] = (0, 1, 2, 3),
annos: Optional[dict] = None,
mono3d: bool = True,
dataset: str = 'kitti'):
"""Get the 2d / mono3d annotation records for a given info.
This function is used to get 2D annotations when loading annotations from This function is used to get 2D/Mono3D annotations when loading annotations
a dataset class. The original version in the data converter will be from a kitti-style dataset class, such as KITTI and Waymo dataset.
deprecated in the future.
Args: Args:
info: Information of the given sample data. info (dict): Information of the given sample data.
occluded: Integer (0, 1, 2, 3) indicating occlusion state: cam_idx (int): Camera id which the 2d / mono3d annotations to obtain
belong to. In KITTI, typically only CAM 2 will be used,
and in Waymo, multi cameras could be used.
Defaults to 2.
occluded (tuple[int]): Integer (0, 1, 2, 3) indicating occlusion state:
0 = fully visible, 1 = partly occluded, 2 = largely occluded, 0 = fully visible, 1 = partly occluded, 2 = largely occluded,
3 = unknown, -1 = DontCare 3 = unknown, -1 = DontCare.
Defaults to (0, 1, 2, 3).
annos (dict, optional): Original annotations.
mono3d (bool): Whether to get boxes with mono3d annotation. mono3d (bool): Whether to get boxes with mono3d annotation.
Defaults to True.
dataset (str): Dataset name of getting 2d bboxes.
Defaults to `kitti`.
Return: Return:
list[dict]: List of 2D annotation record that belongs to the input list[dict]: List of 2d / mono3d annotation record that
`sample_data_token`. belongs to the input camera id.
""" """
# Get calibration information # Get calibration information
camera_intrinsic = info['calib'][f'P{cam_idx}'] camera_intrinsic = info['calib'][f'P{cam_idx}']
...@@ -224,7 +244,6 @@ def get_waymo_2d_boxes(info, cam_idx, occluded, annos=None, mono3d=True): ...@@ -224,7 +244,6 @@ def get_waymo_2d_boxes(info, cam_idx, occluded, annos=None, mono3d=True):
ann_rec['sample_annotation_token'] = \ ann_rec['sample_annotation_token'] = \
f"{info['image']['image_idx']}.{ann_idx}" f"{info['image']['image_idx']}.{ann_idx}"
ann_rec['sample_data_token'] = info['image']['image_idx'] ann_rec['sample_data_token'] = info['image']['image_idx']
sample_data_token = info['image']['image_idx']
loc = ann_rec['location'][np.newaxis, :] loc = ann_rec['location'][np.newaxis, :]
dim = ann_rec['dimensions'][np.newaxis, :] dim = ann_rec['dimensions'][np.newaxis, :]
...@@ -266,9 +285,8 @@ def get_waymo_2d_boxes(info, cam_idx, occluded, annos=None, mono3d=True): ...@@ -266,9 +285,8 @@ def get_waymo_2d_boxes(info, cam_idx, occluded, annos=None, mono3d=True):
min_x, min_y, max_x, max_y = final_coords min_x, min_y, max_x, max_y = final_coords
# Generate dictionary record to be included in the .json file. # Generate dictionary record to be included in the .json file.
repro_rec = generate_waymo_mono3d_record(ann_rec, min_x, min_y, max_x, repro_rec = generate_record(ann_rec, min_x, min_y, max_x, max_y,
max_y, sample_data_token, dataset)
info['image']['image_path'])
# If mono3d=True, add 3D annotations in camera coordinates # If mono3d=True, add 3D annotations in camera coordinates
if mono3d and (repro_rec is not None): if mono3d and (repro_rec is not None):
...@@ -288,11 +306,7 @@ def get_waymo_2d_boxes(info, cam_idx, occluded, annos=None, mono3d=True): ...@@ -288,11 +306,7 @@ def get_waymo_2d_boxes(info, cam_idx, occluded, annos=None, mono3d=True):
# samples with depth < 0 will be removed # samples with depth < 0 will be removed
if repro_rec['depth'] <= 0: if repro_rec['depth'] <= 0:
continue continue
repro_recs.append(repro_rec)
repro_rec['attribute_name'] = -1 # no attribute in KITTI
repro_rec['attribute_id'] = -1
repro_recs.append(repro_rec)
return repro_recs return repro_recs
...@@ -355,7 +369,7 @@ def post_process_coords( ...@@ -355,7 +369,7 @@ def post_process_coords(
def generate_record(ann_rec: dict, x1: float, y1: float, x2: float, y2: float, def generate_record(ann_rec: dict, x1: float, y1: float, x2: float, y2: float,
sample_data_token: str, filename: str) -> OrderedDict: dataset: str) -> OrderedDict:
"""Generate one 2D annotation record given various information on top of """Generate one 2D annotation record given various information on top of
the 2D bounding box coordinates. the 2D bounding box coordinates.
...@@ -365,112 +379,40 @@ def generate_record(ann_rec: dict, x1: float, y1: float, x2: float, y2: float, ...@@ -365,112 +379,40 @@ def generate_record(ann_rec: dict, x1: float, y1: float, x2: float, y2: float,
y1 (float): Minimum value of the y coordinate. y1 (float): Minimum value of the y coordinate.
x2 (float): Maximum value of the x coordinate. x2 (float): Maximum value of the x coordinate.
y2 (float): Maximum value of the y coordinate. y2 (float): Maximum value of the y coordinate.
sample_data_token (str): Sample data token. dataset (str): Name of dataset.
filename (str):The corresponding image file where the annotation
is present.
Returns: Returns:
dict: A sample mono3D annotation record. dict: A sample 2d annotation record.
- bbox_label (int): 2d box label id - bbox_label (int): 2d box label id
- bbox_label_3d (int): 3d box label id - bbox_label_3d (int): 3d box label id
- bbox (list[float]): left x, top y, right x, bottom y - bbox (list[float]): left x, top y, right x, bottom y
of 2d box of 2d box
- bbox_3d_isvalid (bool): whether the box is valid - bbox_3d_isvalid (bool): whether the box is valid
""" """
repro_rec = OrderedDict()
repro_rec['sample_data_token'] = sample_data_token
coco_rec = dict()
relevant_keys = [
'attribute_tokens',
'category_name',
'instance_token',
'next',
'num_lidar_pts',
'num_radar_pts',
'prev',
'sample_annotation_token',
'sample_data_token',
'visibility_token',
]
for key, value in ann_rec.items(): if dataset == 'nuscenes':
if key in relevant_keys: cat_name = ann_rec['category_name']
repro_rec[key] = value if cat_name not in NuScenesNameMapping:
return None
repro_rec['bbox_corners'] = [x1, y1, x2, y2] else:
repro_rec['filename'] = filename cat_name = NuScenesNameMapping[cat_name]
categories = nus_categories
if repro_rec['category_name'] not in NuScenesNameMapping: else:
return None cat_name = ann_rec['name']
cat_name = NuScenesNameMapping[repro_rec['category_name']] if cat_name not in categories:
coco_rec['bbox_label'] = nus_categories.index(cat_name) return None
coco_rec['bbox_label_3d'] = nus_categories.index(cat_name)
coco_rec['bbox'] = [x1, y1, x2, y2] if dataset == 'kitti':
coco_rec['bbox_3d_isvalid'] = True categories = kitti_categories
elif dataset == 'waymo':
return coco_rec categories = waymo_categories
else:
raise NotImplementedError('Unsupported dataset!')
def generate_waymo_mono3d_record(ann_rec, x1, y1, x2, y2, sample_data_token,
filename):
"""Generate one 2D annotation record given various information on top of
the 2D bounding box coordinates.
The original version in the data converter will be deprecated in the
future.
Args: rec = dict()
ann_rec (dict): Original 3d annotation record. rec['bbox_label'] = categories.index(cat_name)
x1 (float): Minimum value of the x coordinate. rec['bbox_label_3d'] = rec['bbox_label']
y1 (float): Minimum value of the y coordinate. rec['bbox'] = [x1, y1, x2, y2]
x2 (float): Maximum value of the x coordinate. rec['bbox_3d_isvalid'] = True
y2 (float): Maximum value of the y coordinate.
sample_data_token (str): Sample data token.
filename (str):The corresponding image file where the annotation
is present.
Returns: return rec
dict: A sample 2D annotation record.
- file_name (str): file name
- image_id (str): sample data token
- area (float): 2d box area
- category_name (str): category name
- category_id (int): category id
- bbox (list[float]): left x, top y, x_size, y_size of 2d box
- iscrowd (int): whether the area is crowd
"""
kitti_categories = ('Car', 'Pedestrian', 'Cyclist')
repro_rec = OrderedDict()
repro_rec['sample_data_token'] = sample_data_token
coco_rec = dict()
key_mapping = {
'name': 'category_name',
'num_points_in_gt': 'num_lidar_pts',
'sample_annotation_token': 'sample_annotation_token',
'sample_data_token': 'sample_data_token',
}
for key, value in ann_rec.items():
if key in key_mapping.keys():
repro_rec[key_mapping[key]] = value
repro_rec['bbox_corners'] = [x1, y1, x2, y2]
repro_rec['filename'] = filename
coco_rec['file_name'] = filename
coco_rec['image_id'] = sample_data_token
coco_rec['area'] = (y2 - y1) * (x2 - x1)
if repro_rec['category_name'] not in kitti_categories:
return None
cat_name = repro_rec['category_name']
coco_rec['category_name'] = cat_name
coco_rec['category_id'] = kitti_categories.index(cat_name)
coco_rec['bbox_label'] = coco_rec['category_id']
coco_rec['bbox_label_3d'] = coco_rec['bbox_label']
coco_rec['bbox'] = [x1, y1, x2 - x1, y2 - y1]
coco_rec['iscrowd'] = 0
return coco_rec
...@@ -26,11 +26,11 @@ class Det3DDataset(BaseDataset): ...@@ -26,11 +26,11 @@ class Det3DDataset(BaseDataset):
metainfo (dict, optional): Meta information for dataset, such as class metainfo (dict, optional): Meta information for dataset, such as class
information. Defaults to None. information. Defaults to None.
data_prefix (dict, optional): Prefix for training data. Defaults to data_prefix (dict, optional): Prefix for training data. Defaults to
dict(pts='velodyne', img=""). dict(pts='velodyne', img='').
pipeline (list[dict], optional): Pipeline used for data processing. pipeline (list[dict], optional): Pipeline used for data processing.
Defaults to None. Defaults to None.
modality (dict, optional): Modality to specify the sensor data used modality (dict, optional): Modality to specify the sensor data used
as input, it usually has following keys. as input, it usually has following keys:
- use_camera: bool - use_camera: bool
- use_lidar: bool - use_lidar: bool
...@@ -40,7 +40,7 @@ class Det3DDataset(BaseDataset): ...@@ -40,7 +40,7 @@ class Det3DDataset(BaseDataset):
box_type_3d (str, optional): Type of 3D box of this dataset. box_type_3d (str, optional): Type of 3D box of this dataset.
Based on the `box_type_3d`, the dataset will encapsulate the box Based on the `box_type_3d`, the dataset will encapsulate the box
to its original format then converted them to `box_type_3d`. to its original format then converted them to `box_type_3d`.
Defaults to 'LiDAR'. Available options includes Defaults to 'LiDAR'. Available options includes:
- 'LiDAR': Box in LiDAR coordinates, usually for - 'LiDAR': Box in LiDAR coordinates, usually for
outdoor point cloud 3d detection. outdoor point cloud 3d detection.
...@@ -49,15 +49,15 @@ class Det3DDataset(BaseDataset): ...@@ -49,15 +49,15 @@ class Det3DDataset(BaseDataset):
- 'Camera': Box in camera coordinates, usually - 'Camera': Box in camera coordinates, usually
for vision-based 3d detection. for vision-based 3d detection.
filter_empty_gt (bool): Whether to filter the data with filter_empty_gt (bool, optional): Whether to filter the data with
empty GT. Defaults to True. empty GT. Defaults to True.
test_mode (bool): Whether the dataset is in test mode. test_mode (bool, optional): Whether the dataset is in test mode.
Defaults to False. Defaults to False.
load_eval_anns (bool): Whether to load annotations load_eval_anns (bool, optional): Whether to load annotations
in test_mode, the annotation will be save in in test_mode, the annotation will be save in `eval_ann_infos`,
`eval_ann_infos`, which can be use in Evaluator. which can be used in Evaluator. Defaults to True.
file_client_args (dict): Configuration of file client. file_client_args (dict, optional): Configuration of file client.
Defaults to `dict(backend='disk')`. Defaults to dict(backend='disk').
""" """
def __init__(self, def __init__(self,
...@@ -73,7 +73,7 @@ class Det3DDataset(BaseDataset): ...@@ -73,7 +73,7 @@ class Det3DDataset(BaseDataset):
test_mode: bool = False, test_mode: bool = False,
load_eval_anns=True, load_eval_anns=True,
file_client_args: dict = dict(backend='disk'), file_client_args: dict = dict(backend='disk'),
**kwargs): **kwargs) -> None:
# init file client # init file client
self.file_client = mmengine.FileClient(**file_client_args) self.file_client = mmengine.FileClient(**file_client_args)
self.filter_empty_gt = filter_empty_gt self.filter_empty_gt = filter_empty_gt
...@@ -125,7 +125,7 @@ class Det3DDataset(BaseDataset): ...@@ -125,7 +125,7 @@ class Det3DDataset(BaseDataset):
self.metainfo['box_type_3d'] = box_type_3d self.metainfo['box_type_3d'] = box_type_3d
self.metainfo['label_mapping'] = self.label_mapping self.metainfo['label_mapping'] = self.label_mapping
def _remove_dontcare(self, ann_info): def _remove_dontcare(self, ann_info: dict) -> dict:
"""Remove annotations that do not need to be cared. """Remove annotations that do not need to be cared.
-1 indicate dontcare in MMDet3d. -1 indicate dontcare in MMDet3d.
...@@ -192,7 +192,8 @@ class Det3DDataset(BaseDataset): ...@@ -192,7 +192,8 @@ class Det3DDataset(BaseDataset):
'bbox_3d': 'gt_bboxes_3d', 'bbox_3d': 'gt_bboxes_3d',
'depth': 'depths', 'depth': 'depths',
'center_2d': 'centers_2d', 'center_2d': 'centers_2d',
'attr_label': 'attr_labels' 'attr_label': 'attr_labels',
'velocity': 'velocities',
} }
instances = info['instances'] instances = info['instances']
# empty gt # empty gt
...@@ -209,14 +210,18 @@ class Det3DDataset(BaseDataset): ...@@ -209,14 +210,18 @@ class Det3DDataset(BaseDataset):
self.label_mapping[item] for item in temp_anns self.label_mapping[item] for item in temp_anns
] ]
if ann_name in name_mapping: if ann_name in name_mapping:
ann_name = name_mapping[ann_name] mapped_ann_name = name_mapping[ann_name]
else:
mapped_ann_name = ann_name
if 'label' in ann_name: if 'label' in ann_name:
temp_anns = np.array(temp_anns).astype(np.int64) temp_anns = np.array(temp_anns).astype(np.int64)
else: elif ann_name in name_mapping:
temp_anns = np.array(temp_anns).astype(np.float32) temp_anns = np.array(temp_anns).astype(np.float32)
else:
temp_anns = np.array(temp_anns)
ann_info[ann_name] = temp_anns ann_info[mapped_ann_name] = temp_anns
ann_info['instances'] = info['instances'] ann_info['instances'] = info['instances']
return ann_info return ann_info
...@@ -241,6 +246,7 @@ class Det3DDataset(BaseDataset): ...@@ -241,6 +246,7 @@ class Det3DDataset(BaseDataset):
self.data_prefix.get('pts', ''), self.data_prefix.get('pts', ''),
info['lidar_points']['lidar_path']) info['lidar_points']['lidar_path'])
info['num_pts_feats'] = info['lidar_points']['num_pts_feats']
info['lidar_path'] = info['lidar_points']['lidar_path'] info['lidar_path'] = info['lidar_points']['lidar_path']
if 'lidar_sweeps' in info: if 'lidar_sweeps' in info:
for sweep in info['lidar_sweeps']: for sweep in info['lidar_sweeps']:
...@@ -285,7 +291,7 @@ class Det3DDataset(BaseDataset): ...@@ -285,7 +291,7 @@ class Det3DDataset(BaseDataset):
return info return info
def prepare_data(self, index): def prepare_data(self, index: int) -> Optional[dict]:
"""Data preparation for both training and testing stage. """Data preparation for both training and testing stage.
Called by `__getitem__` of dataset. Called by `__getitem__` of dataset.
...@@ -294,7 +300,7 @@ class Det3DDataset(BaseDataset): ...@@ -294,7 +300,7 @@ class Det3DDataset(BaseDataset):
index (int): Index for accessing the target data. index (int): Index for accessing the target data.
Returns: Returns:
dict: Data dict of the corresponding index. dict | None: Data dict of the corresponding index.
""" """
input_dict = self.get_data_info(index) input_dict = self.get_data_info(index)
......
# Copyright (c) OpenMMLab. All rights reserved. # Copyright (c) OpenMMLab. All rights reserved.
from typing import Callable, List, Optional, Union from typing import Callable, List, Union
import numpy as np import numpy as np
...@@ -22,11 +22,12 @@ class KittiDataset(Det3DDataset): ...@@ -22,11 +22,12 @@ class KittiDataset(Det3DDataset):
Defaults to None. Defaults to None.
modality (dict, optional): Modality to specify the sensor data used modality (dict, optional): Modality to specify the sensor data used
as input. Defaults to `dict(use_lidar=True)`. as input. Defaults to `dict(use_lidar=True)`.
default_cam_key (str, optional): The default camera name adopted.
Defaults to 'CAM2'.
box_type_3d (str, optional): Type of 3D box of this dataset. box_type_3d (str, optional): Type of 3D box of this dataset.
Based on the `box_type_3d`, the dataset will encapsulate the box Based on the `box_type_3d`, the dataset will encapsulate the box
to its original format then converted them to `box_type_3d`. to its original format then converted them to `box_type_3d`.
Defaults to 'LiDAR' in this dataset. Available options includes Defaults to 'LiDAR' in this dataset. Available options includes:
- 'LiDAR': Box in LiDAR coordinates. - 'LiDAR': Box in LiDAR coordinates.
- 'Depth': Box in depth coordinates, usually for indoor dataset. - 'Depth': Box in depth coordinates, usually for indoor dataset.
...@@ -35,9 +36,9 @@ class KittiDataset(Det3DDataset): ...@@ -35,9 +36,9 @@ class KittiDataset(Det3DDataset):
Defaults to True. Defaults to True.
test_mode (bool, optional): Whether the dataset is in test mode. test_mode (bool, optional): Whether the dataset is in test mode.
Defaults to False. Defaults to False.
pcd_limit_range (list, optional): The range of point cloud used to pcd_limit_range (list[float], optional): The range of point cloud
filter invalid predicted boxes. used to filter invalid predicted boxes.
Default: [0, -40, -3, 70.4, 40, 0.0]. Defaults to [0, -40, -3, 70.4, 40, 0.0].
""" """
# TODO: use full classes of kitti # TODO: use full classes of kitti
METAINFO = { METAINFO = {
...@@ -49,15 +50,18 @@ class KittiDataset(Det3DDataset): ...@@ -49,15 +50,18 @@ class KittiDataset(Det3DDataset):
data_root: str, data_root: str,
ann_file: str, ann_file: str,
pipeline: List[Union[dict, Callable]] = [], pipeline: List[Union[dict, Callable]] = [],
modality: Optional[dict] = dict(use_lidar=True), modality: dict = dict(use_lidar=True),
default_cam_key: str = 'CAM2', default_cam_key: str = 'CAM2',
task: str = 'lidar_det',
box_type_3d: str = 'LiDAR', box_type_3d: str = 'LiDAR',
filter_empty_gt: bool = True, filter_empty_gt: bool = True,
test_mode: bool = False, test_mode: bool = False,
pcd_limit_range: List[float] = [0, -40, -3, 70.4, 40, 0.0], pcd_limit_range: List[float] = [0, -40, -3, 70.4, 40, 0.0],
**kwargs): **kwargs) -> None:
self.pcd_limit_range = pcd_limit_range self.pcd_limit_range = pcd_limit_range
assert task in ('lidar_det', 'mono_det')
self.task = task
super().__init__( super().__init__(
data_root=data_root, data_root=data_root,
ann_file=ann_file, ann_file=ann_file,
...@@ -107,11 +111,14 @@ class KittiDataset(Det3DDataset): ...@@ -107,11 +111,14 @@ class KittiDataset(Det3DDataset):
info['plane'] = plane_lidar info['plane'] = plane_lidar
if self.task == 'mono_det':
info['instances'] = info['cam_instances'][self.default_cam_key]
info = super().parse_data_info(info) info = super().parse_data_info(info)
return info return info
def parse_ann_info(self, info): def parse_ann_info(self, info: dict) -> dict:
"""Get annotation info according to the given index. """Get annotation info according to the given index.
Args: Args:
...@@ -135,6 +142,12 @@ class KittiDataset(Det3DDataset): ...@@ -135,6 +142,12 @@ class KittiDataset(Det3DDataset):
ann_info['gt_bboxes_3d'] = np.zeros((0, 7), dtype=np.float32) ann_info['gt_bboxes_3d'] = np.zeros((0, 7), dtype=np.float32)
ann_info['gt_labels_3d'] = np.zeros(0, dtype=np.int64) ann_info['gt_labels_3d'] = np.zeros(0, dtype=np.int64)
if self.task == 'mono_det':
ann_info['gt_bboxes'] = np.zeros((0, 4), dtype=np.float32)
ann_info['gt_bboxes_labels'] = np.array(0, dtype=np.int64)
ann_info['centers_2d'] = np.zeros((0, 2), dtype=np.float32)
ann_info['depths'] = np.zeros((0), dtype=np.float32)
ann_info = self._remove_dontcare(ann_info) ann_info = self._remove_dontcare(ann_info)
# in kitti, lidar2cam = R0_rect @ Tr_velo_to_cam # in kitti, lidar2cam = R0_rect @ Tr_velo_to_cam
lidar2cam = np.array(info['images']['CAM2']['lidar2cam']) lidar2cam = np.array(info['images']['CAM2']['lidar2cam'])
......
# Copyright (c) OpenMMLab. All rights reserved. # Copyright (c) OpenMMLab. All rights reserved.
from typing import Dict, List from typing import Callable, List, Union
import numpy as np import numpy as np
...@@ -24,18 +24,18 @@ class LyftDataset(Det3DDataset): ...@@ -24,18 +24,18 @@ class LyftDataset(Det3DDataset):
pipeline (list[dict], optional): Pipeline used for data processing. pipeline (list[dict], optional): Pipeline used for data processing.
Defaults to None. Defaults to None.
modality (dict, optional): Modality to specify the sensor data used modality (dict, optional): Modality to specify the sensor data used
as input. Defaults to None. as input. Defaults to dict(use_camera=False, use_lidar=True).
box_type_3d (str): Type of 3D box of this dataset. box_type_3d (str): Type of 3D box of this dataset.
Based on the `box_type_3d`, the dataset will encapsulate the box Based on the `box_type_3d`, the dataset will encapsulate the box
to its original format then converted them to `box_type_3d`. to its original format then converted them to `box_type_3d`.
Defaults to 'LiDAR' in this dataset. Available options includes Defaults to 'LiDAR' in this dataset. Available options includes:
- 'LiDAR': Box in LiDAR coordinates. - 'LiDAR': Box in LiDAR coordinates.
- 'Depth': Box in depth coordinates, usually for indoor dataset. - 'Depth': Box in depth coordinates, usually for indoor dataset.
- 'Camera': Box in camera coordinates. - 'Camera': Box in camera coordinates.
filter_empty_gt (bool): Whether to filter empty GT. filter_empty_gt (bool, optional): Whether to filter empty GT.
Defaults to True. Defaults to True.
test_mode (bool): Whether the dataset is in test mode. test_mode (bool, optional): Whether the dataset is in test mode.
Defaults to False. Defaults to False.
""" """
...@@ -48,8 +48,8 @@ class LyftDataset(Det3DDataset): ...@@ -48,8 +48,8 @@ class LyftDataset(Det3DDataset):
def __init__(self, def __init__(self,
data_root: str, data_root: str,
ann_file: str, ann_file: str,
pipeline: List[dict] = None, pipeline: List[Union[dict, Callable]] = [],
modality: Dict = dict(use_camera=False, use_lidar=True), modality: dict = dict(use_camera=False, use_lidar=True),
box_type_3d: str = 'LiDAR', box_type_3d: str = 'LiDAR',
filter_empty_gt: bool = True, filter_empty_gt: bool = True,
test_mode: bool = False, test_mode: bool = False,
......
# Copyright (c) OpenMMLab. All rights reserved. # Copyright (c) OpenMMLab. All rights reserved.
from os import path as osp from os import path as osp
from typing import Dict, List from typing import Callable, List, Union
import numpy as np import numpy as np
...@@ -22,25 +22,26 @@ class NuScenesDataset(Det3DDataset): ...@@ -22,25 +22,26 @@ class NuScenesDataset(Det3DDataset):
Args: Args:
data_root (str): Path of dataset root. data_root (str): Path of dataset root.
ann_file (str): Path of annotation file. ann_file (str): Path of annotation file.
task (str, optional): Detection task. Defaults to 'lidar_det'.
pipeline (list[dict], optional): Pipeline used for data processing. pipeline (list[dict], optional): Pipeline used for data processing.
Defaults to None. Defaults to None.
box_type_3d (str): Type of 3D box of this dataset. box_type_3d (str): Type of 3D box of this dataset.
Based on the `box_type_3d`, the dataset will encapsulate the box Based on the `box_type_3d`, the dataset will encapsulate the box
to its original format then converted them to `box_type_3d`. to its original format then converted them to `box_type_3d`.
Defaults to 'LiDAR' in this dataset. Available options includes. Defaults to 'LiDAR' in this dataset. Available options includes:
- 'LiDAR': Box in LiDAR coordinates. - 'LiDAR': Box in LiDAR coordinates.
- 'Depth': Box in depth coordinates, usually for indoor dataset. - 'Depth': Box in depth coordinates, usually for indoor dataset.
- 'Camera': Box in camera coordinates. - 'Camera': Box in camera coordinates.
modality (dict, optional): Modality to specify the sensor data used modality (dict, optional): Modality to specify the sensor data used
as input. Defaults to dict(use_camera=False,use_lidar=True). as input. Defaults to dict(use_camera=False, use_lidar=True).
filter_empty_gt (bool): Whether to filter empty GT. filter_empty_gt (bool, optional): Whether to filter empty GT.
Defaults to True. Defaults to True.
test_mode (bool): Whether the dataset is in test mode. test_mode (bool, optional): Whether the dataset is in test mode.
Defaults to False. Defaults to False.
with_velocity (bool): Whether include velocity prediction with_velocity (bool, optional): Whether to include velocity prediction
into the experiments. Defaults to True. into the experiments. Defaults to True.
use_valid_flag (bool): Whether to use `use_valid_flag` key use_valid_flag (bool, optional): Whether to use `use_valid_flag` key
in the info file as mask to filter gt_boxes and gt_names. in the info file as mask to filter gt_boxes and gt_names.
Defaults to False. Defaults to False.
""" """
...@@ -55,10 +56,10 @@ class NuScenesDataset(Det3DDataset): ...@@ -55,10 +56,10 @@ class NuScenesDataset(Det3DDataset):
def __init__(self, def __init__(self,
data_root: str, data_root: str,
ann_file: str, ann_file: str,
task: str = '3d', task: str = 'lidar_det',
pipeline: List[dict] = None, pipeline: List[Union[dict, Callable]] = [],
box_type_3d: str = 'LiDAR', box_type_3d: str = 'LiDAR',
modality: Dict = dict( modality: dict = dict(
use_camera=False, use_camera=False,
use_lidar=True, use_lidar=True,
), ),
...@@ -66,12 +67,12 @@ class NuScenesDataset(Det3DDataset): ...@@ -66,12 +67,12 @@ class NuScenesDataset(Det3DDataset):
test_mode: bool = False, test_mode: bool = False,
with_velocity: bool = True, with_velocity: bool = True,
use_valid_flag: bool = False, use_valid_flag: bool = False,
**kwargs): **kwargs) -> None:
self.use_valid_flag = use_valid_flag self.use_valid_flag = use_valid_flag
self.with_velocity = with_velocity self.with_velocity = with_velocity
# TODO: Redesign multi-view data process in the future # TODO: Redesign multi-view data process in the future
assert task in ('3d', 'mono3d', 'multi-view') assert task in ('lidar_det', 'mono_det', 'multi-view_det')
self.task = task self.task = task
assert box_type_3d.lower() in ('lidar', 'camera') assert box_type_3d.lower() in ('lidar', 'camera')
...@@ -85,6 +86,27 @@ class NuScenesDataset(Det3DDataset): ...@@ -85,6 +86,27 @@ class NuScenesDataset(Det3DDataset):
test_mode=test_mode, test_mode=test_mode,
**kwargs) **kwargs)
def _filter_with_mask(self, ann_info: dict) -> dict:
"""Remove annotations that do not need to be cared.
Args:
ann_info (dict): Dict of annotation infos.
Returns:
dict: Annotations after filtering.
"""
filtered_annotations = {}
if self.use_valid_flag:
filter_mask = ann_info['bbox_3d_isvalid']
else:
filter_mask = ann_info['num_lidar_pts'] > 0
for key in ann_info.keys():
if key != 'instances':
filtered_annotations[key] = (ann_info[key][filter_mask])
else:
filtered_annotations[key] = ann_info[key]
return filtered_annotations
def parse_ann_info(self, info: dict) -> dict: def parse_ann_info(self, info: dict) -> dict:
"""Get annotation info according to the given index. """Get annotation info according to the given index.
...@@ -99,66 +121,51 @@ class NuScenesDataset(Det3DDataset): ...@@ -99,66 +121,51 @@ class NuScenesDataset(Det3DDataset):
- gt_labels_3d (np.ndarray): Labels of ground truths. - gt_labels_3d (np.ndarray): Labels of ground truths.
""" """
ann_info = super().parse_ann_info(info) ann_info = super().parse_ann_info(info)
if ann_info is None: if ann_info is not None:
# empty instance
anns_results = dict() ann_info = self._filter_with_mask(ann_info)
anns_results['gt_bboxes_3d'] = np.zeros((0, 7), dtype=np.float32)
anns_results['gt_labels_3d'] = np.zeros(0, dtype=np.int64) if self.with_velocity:
return anns_results gt_bboxes_3d = ann_info['gt_bboxes_3d']
gt_velocities = ann_info['velocities']
if self.use_valid_flag: nan_mask = np.isnan(gt_velocities[:, 0])
mask = ann_info['bbox_3d_isvalid'] gt_velocities[nan_mask] = [0.0, 0.0]
else: gt_bboxes_3d = np.concatenate([gt_bboxes_3d, gt_velocities],
mask = ann_info['num_lidar_pts'] > 0 axis=-1)
gt_bboxes_3d = ann_info['gt_bboxes_3d'][mask] ann_info['gt_bboxes_3d'] = gt_bboxes_3d
gt_labels_3d = ann_info['gt_labels_3d'][mask]
if 'gt_bboxes' in ann_info:
gt_bboxes = ann_info['gt_bboxes'][mask]
gt_labels = ann_info['gt_labels'][mask]
attr_labels = ann_info['attr_labels'][mask]
else: else:
gt_bboxes = np.zeros((0, 4), dtype=np.float32) # empty instance
gt_labels = np.array([], dtype=np.int64) ann_info = dict()
attr_labels = np.array([], dtype=np.int64) if self.with_velocity:
ann_info['gt_bboxes_3d'] = np.zeros((0, 9), dtype=np.float32)
if 'centers_2d' in ann_info: else:
centers_2d = ann_info['centers_2d'][mask] ann_info['gt_bboxes_3d'] = np.zeros((0, 7), dtype=np.float32)
depths = ann_info['depths'][mask] ann_info['gt_labels_3d'] = np.zeros(0, dtype=np.int64)
else:
centers_2d = np.zeros((0, 2), dtype=np.float32) if self.task == 'mono3d':
depths = np.zeros((0), dtype=np.float32) ann_info['gt_bboxes'] = np.zeros((0, 4), dtype=np.float32)
ann_info['gt_bboxes_labels'] = np.array(0, dtype=np.int64)
if self.with_velocity: ann_info['attr_labels'] = np.array(0, dtype=np.int64)
gt_velocity = ann_info['velocity'][mask] ann_info['centers_2d'] = np.zeros((0, 2), dtype=np.float32)
nan_mask = np.isnan(gt_velocity[:, 0]) ann_info['depths'] = np.zeros((0), dtype=np.float32)
gt_velocity[nan_mask] = [0.0, 0.0]
gt_bboxes_3d = np.concatenate([gt_bboxes_3d, gt_velocity], axis=-1)
# the nuscenes box center is [0.5, 0.5, 0.5], we change it to be # the nuscenes box center is [0.5, 0.5, 0.5], we change it to be
# the same as KITTI (0.5, 0.5, 0) # the same as KITTI (0.5, 0.5, 0)
# TODO: Unify the coordinates # TODO: Unify the coordinates
if self.task == 'mono3d': if self.task == 'mono_det':
gt_bboxes_3d = CameraInstance3DBoxes( gt_bboxes_3d = CameraInstance3DBoxes(
gt_bboxes_3d, ann_info['gt_bboxes_3d'],
box_dim=gt_bboxes_3d.shape[-1], box_dim=ann_info['gt_bboxes_3d'].shape[-1],
origin=(0.5, 0.5, 0.5)) origin=(0.5, 0.5, 0.5))
else: else:
gt_bboxes_3d = LiDARInstance3DBoxes( gt_bboxes_3d = LiDARInstance3DBoxes(
gt_bboxes_3d, ann_info['gt_bboxes_3d'],
box_dim=gt_bboxes_3d.shape[-1], box_dim=ann_info['gt_bboxes_3d'].shape[-1],
origin=(0.5, 0.5, 0.5)).convert_to(self.box_mode_3d) origin=(0.5, 0.5, 0.5)).convert_to(self.box_mode_3d)
anns_results = dict( ann_info['gt_bboxes_3d'] = gt_bboxes_3d
gt_bboxes_3d=gt_bboxes_3d,
gt_labels_3d=gt_labels_3d,
gt_bboxes=gt_bboxes,
gt_labels=gt_labels,
attr_labels=attr_labels,
centers_2d=centers_2d,
depths=depths)
return anns_results return ann_info
def parse_data_info(self, info: dict) -> dict: def parse_data_info(self, info: dict) -> dict:
"""Process the raw data info. """Process the raw data info.
...@@ -173,7 +180,7 @@ class NuScenesDataset(Det3DDataset): ...@@ -173,7 +180,7 @@ class NuScenesDataset(Det3DDataset):
dict: Has `ann_info` in training stage. And dict: Has `ann_info` in training stage. And
all path has been converted to absolute path. all path has been converted to absolute path.
""" """
if self.task == 'mono3d': if self.task == 'mono_det':
data_list = [] data_list = []
if self.modality['use_lidar']: if self.modality['use_lidar']:
info['lidar_points']['lidar_path'] = \ info['lidar_points']['lidar_path'] = \
......
...@@ -36,7 +36,7 @@ class ScanNetDataset(Det3DDataset): ...@@ -36,7 +36,7 @@ class ScanNetDataset(Det3DDataset):
box_type_3d (str): Type of 3D box of this dataset. box_type_3d (str): Type of 3D box of this dataset.
Based on the `box_type_3d`, the dataset will encapsulate the box Based on the `box_type_3d`, the dataset will encapsulate the box
to its original format then converted them to `box_type_3d`. to its original format then converted them to `box_type_3d`.
Defaults to 'Depth' in this dataset. Available options includes Defaults to 'Depth' in this dataset. Available options includes:
- 'LiDAR': Box in LiDAR coordinates. - 'LiDAR': Box in LiDAR coordinates.
- 'Depth': Box in depth coordinates, usually for indoor dataset. - 'Depth': Box in depth coordinates, usually for indoor dataset.
...@@ -61,13 +61,13 @@ class ScanNetDataset(Det3DDataset): ...@@ -61,13 +61,13 @@ class ScanNetDataset(Det3DDataset):
def __init__(self, def __init__(self,
data_root: str, data_root: str,
ann_file: str, ann_file: str,
metainfo: dict = None, metainfo: Optional[dict] = None,
data_prefix: dict = dict( data_prefix: dict = dict(
pts='points', pts='points',
pts_instance_mask='instance_mask', pts_instance_mask='instance_mask',
pts_semantic_mask='semantic_mask'), pts_semantic_mask='semantic_mask'),
pipeline: List[Union[dict, Callable]] = [], pipeline: List[Union[dict, Callable]] = [],
modality=dict(use_camera=False, use_lidar=True), modality: dict = dict(use_camera=False, use_lidar=True),
box_type_3d: str = 'Depth', box_type_3d: str = 'Depth',
filter_empty_gt: bool = True, filter_empty_gt: bool = True,
test_mode: bool = False, test_mode: bool = False,
...@@ -101,7 +101,7 @@ class ScanNetDataset(Det3DDataset): ...@@ -101,7 +101,7 @@ class ScanNetDataset(Det3DDataset):
assert self.modality['use_camera'] or self.modality['use_lidar'] assert self.modality['use_camera'] or self.modality['use_lidar']
@staticmethod @staticmethod
def _get_axis_align_matrix(info: dict) -> dict: def _get_axis_align_matrix(info: dict) -> np.ndarray:
"""Get axis_align_matrix from info. If not exist, return identity mat. """Get axis_align_matrix from info. If not exist, return identity mat.
Args: Args:
......
...@@ -24,25 +24,25 @@ class SUNRGBDDataset(Det3DDataset): ...@@ -24,25 +24,25 @@ class SUNRGBDDataset(Det3DDataset):
ann_file (str): Path of annotation file. ann_file (str): Path of annotation file.
metainfo (dict, optional): Meta information for dataset, such as class metainfo (dict, optional): Meta information for dataset, such as class
information. Defaults to None. information. Defaults to None.
data_prefix (dict): Prefix for data. Defaults to data_prefix (dict, optiona;): Prefix for data. Defaults to
`dict(pts='points',img='sunrgbd_trainval')`. dict(pts='points',img='sunrgbd_trainval').
pipeline (list[dict], optional): Pipeline used for data processing. pipeline (list[dict], optional): Pipeline used for data processing.
Defaults to None. Defaults to None.
modality (dict, optional): Modality to specify the sensor data used modality (dict, optional): Modality to specify the sensor data used
as input. Defaults to `dict(use_camera=True, use_lidar=True)`. as input. Defaults to dict(use_camera=True, use_lidar=True).
default_cam_key (str): The default camera name adopted. default_cam_key (str, optional): The default camera name adopted.
Defaults to "CAM0". Defaults to 'CAM0'.
box_type_3d (str): Type of 3D box of this dataset. box_type_3d (str): Type of 3D box of this dataset.
Based on the `box_type_3d`, the dataset will encapsulate the box Based on the `box_type_3d`, the dataset will encapsulate the box
to its original format then converted them to `box_type_3d`. to its original format then converted them to `box_type_3d`.
Defaults to 'Depth' in this dataset. Available options includes Defaults to 'Depth' in this dataset. Available options includes:
- 'LiDAR': Box in LiDAR coordinates. - 'LiDAR': Box in LiDAR coordinates.
- 'Depth': Box in depth coordinates, usually for indoor dataset. - 'Depth': Box in depth coordinates, usually for indoor dataset.
- 'Camera': Box in camera coordinates. - 'Camera': Box in camera coordinates.
filter_empty_gt (bool): Whether to filter empty GT. filter_empty_gt (bool, optional): Whether to filter empty GT.
Defaults to True. Defaults to True.
test_mode (bool): Whether the dataset is in test mode. test_mode (bool, optional): Whether the dataset is in test mode.
Defaults to False. Defaults to False.
""" """
METAINFO = { METAINFO = {
......
...@@ -11,11 +11,12 @@ from .test_time_aug import MultiScaleFlipAug3D ...@@ -11,11 +11,12 @@ from .test_time_aug import MultiScaleFlipAug3D
from .transforms_3d import (AffineResize, BackgroundPointsFilter, from .transforms_3d import (AffineResize, BackgroundPointsFilter,
GlobalAlignment, GlobalRotScaleTrans, GlobalAlignment, GlobalRotScaleTrans,
IndoorPatchPointSample, IndoorPointSample, IndoorPatchPointSample, IndoorPointSample,
ObjectNameFilter, ObjectNoise, ObjectRangeFilter, MultiViewWrapper, ObjectNameFilter, ObjectNoise,
ObjectSample, PointSample, PointShuffle, ObjectRangeFilter, ObjectSample,
PhotoMetricDistortion3D, PointSample, PointShuffle,
PointsRangeFilter, RandomDropPointsColor, PointsRangeFilter, RandomDropPointsColor,
RandomFlip3D, RandomJitterPoints, RandomShiftScale, RandomFlip3D, RandomJitterPoints, RandomResize3D,
VoxelBasedPointSampler) RandomShiftScale, Resize3D, VoxelBasedPointSampler)
__all__ = [ __all__ = [
'ObjectSample', 'RandomFlip3D', 'ObjectNoise', 'GlobalRotScaleTrans', 'ObjectSample', 'RandomFlip3D', 'ObjectNoise', 'GlobalRotScaleTrans',
...@@ -29,5 +30,6 @@ __all__ = [ ...@@ -29,5 +30,6 @@ __all__ = [
'VoxelBasedPointSampler', 'GlobalAlignment', 'IndoorPatchPointSample', 'VoxelBasedPointSampler', 'GlobalAlignment', 'IndoorPatchPointSample',
'LoadImageFromFileMono3D', 'ObjectNameFilter', 'RandomDropPointsColor', 'LoadImageFromFileMono3D', 'ObjectNameFilter', 'RandomDropPointsColor',
'RandomJitterPoints', 'AffineResize', 'RandomShiftScale', 'RandomJitterPoints', 'AffineResize', 'RandomShiftScale',
'LoadPointsFromDict' 'LoadPointsFromDict', 'Resize3D', 'RandomResize3D',
'MultiViewWrapper', 'PhotoMetricDistortion3D'
] ]
...@@ -32,7 +32,7 @@ class Compose: ...@@ -32,7 +32,7 @@ class Compose:
data (dict): A result dict contains the data to transform. data (dict): A result dict contains the data to transform.
Returns: Returns:
dict: Transformed data. dict: Transformed data.
""" """
for t in self.transforms: for t in self.transforms:
......
This diff is collapsed.
...@@ -63,15 +63,20 @@ class Pack3DDetInputs(BaseTransform): ...@@ -63,15 +63,20 @@ class Pack3DDetInputs(BaseTransform):
def __init__( def __init__(
self, self,
keys: dict, keys: tuple,
meta_keys: dict = ('img_path', 'ori_shape', 'img_shape', 'lidar2img', meta_keys: tuple = ('img_path', 'ori_shape', 'img_shape', 'lidar2img',
'depth2img', 'cam2img', 'pad_shape', 'scale_factor', 'depth2img', 'cam2img', 'pad_shape',
'flip', 'pcd_horizontal_flip', 'pcd_vertical_flip', 'scale_factor', 'flip', 'pcd_horizontal_flip',
'box_mode_3d', 'box_type_3d', 'img_norm_cfg', 'pcd_vertical_flip', 'box_mode_3d', 'box_type_3d',
'pcd_trans', 'sample_idx', 'pcd_scale_factor', 'img_norm_cfg', 'num_pts_feats', 'pcd_trans',
'pcd_rotation', 'pcd_rotation_angle', 'lidar_path', 'sample_idx', 'pcd_scale_factor', 'pcd_rotation',
'transformation_3d_flow', 'trans_mat', 'pcd_rotation_angle', 'lidar_path',
'affine_aug')): 'transformation_3d_flow', 'trans_mat',
'affine_aug', 'sweep_img_metas', 'ori_cam2img',
'cam2global', 'crop_offset', 'img_crop_offset',
'resize_img_shape', 'lidar2cam', 'ori_lidar2img',
'num_ref_frames', 'num_views', 'ego2global')
) -> None:
self.keys = keys self.keys = keys
self.meta_keys = meta_keys self.meta_keys = meta_keys
...@@ -98,7 +103,7 @@ class Pack3DDetInputs(BaseTransform): ...@@ -98,7 +103,7 @@ class Pack3DDetInputs(BaseTransform):
- img - img
- 'data_samples' (obj:`Det3DDataSample`): The annotation info of - 'data_samples' (obj:`Det3DDataSample`): The annotation info of
the sample. the sample.
""" """
# augtest # augtest
if isinstance(results, list): if isinstance(results, list):
...@@ -115,7 +120,7 @@ class Pack3DDetInputs(BaseTransform): ...@@ -115,7 +120,7 @@ class Pack3DDetInputs(BaseTransform):
else: else:
raise NotImplementedError raise NotImplementedError
def pack_single_results(self, results): def pack_single_results(self, results: dict) -> dict:
"""Method to pack the single input data. when the value in this dict is """Method to pack the single input data. when the value in this dict is
a list, it usually is in Augmentations Testing. a list, it usually is in Augmentations Testing.
...@@ -131,7 +136,7 @@ class Pack3DDetInputs(BaseTransform): ...@@ -131,7 +136,7 @@ class Pack3DDetInputs(BaseTransform):
- points - points
- img - img
- 'data_samples' (obj:`Det3DDataSample`): The annotation info - 'data_samples' (:obj:`Det3DDataSample`): The annotation info
of the sample. of the sample.
""" """
# Format 3D data # Format 3D data
...@@ -219,6 +224,7 @@ class Pack3DDetInputs(BaseTransform): ...@@ -219,6 +224,7 @@ class Pack3DDetInputs(BaseTransform):
return packed_results return packed_results
def __repr__(self) -> str: def __repr__(self) -> str:
"""str: Return a string that describes the module."""
repr_str = self.__class__.__name__ repr_str = self.__class__.__name__
repr_str += f'(keys={self.keys})' repr_str += f'(keys={self.keys})'
repr_str += f'(meta_keys={self.meta_keys})' repr_str += f'(meta_keys={self.meta_keys})'
......
This diff is collapsed.
...@@ -16,7 +16,7 @@ class MultiScaleFlipAug3D(BaseTransform): ...@@ -16,7 +16,7 @@ class MultiScaleFlipAug3D(BaseTransform):
Args: Args:
transforms (list[dict]): Transforms to apply in each augmentation. transforms (list[dict]): Transforms to apply in each augmentation.
img_scale (tuple | list[tuple]: Images scales for resizing. img_scale (tuple | list[tuple]): Images scales for resizing.
pts_scale_ratio (float | list[float]): Points scale ratios for pts_scale_ratio (float | list[float]): Points scale ratios for
resizing. resizing.
flip (bool, optional): Whether apply flip augmentation. flip (bool, optional): Whether apply flip augmentation.
...@@ -25,11 +25,11 @@ class MultiScaleFlipAug3D(BaseTransform): ...@@ -25,11 +25,11 @@ class MultiScaleFlipAug3D(BaseTransform):
directions for images, options are "horizontal" and "vertical". directions for images, options are "horizontal" and "vertical".
If flip_direction is list, multiple flip augmentations will If flip_direction is list, multiple flip augmentations will
be applied. It has no effect when ``flip == False``. be applied. It has no effect when ``flip == False``.
Defaults to "horizontal". Defaults to 'horizontal'.
pcd_horizontal_flip (bool, optional): Whether apply horizontal pcd_horizontal_flip (bool, optional): Whether to apply horizontal
flip augmentation to point cloud. Defaults to True. flip augmentation to point cloud. Defaults to True.
Note that it works only when 'flip' is turned on. Note that it works only when 'flip' is turned on.
pcd_vertical_flip (bool, optional): Whether apply vertical flip pcd_vertical_flip (bool, optional): Whether to apply vertical flip
augmentation to point cloud. Defaults to True. augmentation to point cloud. Defaults to True.
Note that it works only when 'flip' is turned on. Note that it works only when 'flip' is turned on.
""" """
...@@ -46,7 +46,7 @@ class MultiScaleFlipAug3D(BaseTransform): ...@@ -46,7 +46,7 @@ class MultiScaleFlipAug3D(BaseTransform):
self.img_scale = img_scale if isinstance(img_scale, self.img_scale = img_scale if isinstance(img_scale,
list) else [img_scale] list) else [img_scale]
self.pts_scale_ratio = pts_scale_ratio \ self.pts_scale_ratio = pts_scale_ratio \
if isinstance(pts_scale_ratio, list) else[float(pts_scale_ratio)] if isinstance(pts_scale_ratio, list) else [float(pts_scale_ratio)]
assert mmengine.is_list_of(self.img_scale, tuple) assert mmengine.is_list_of(self.img_scale, tuple)
assert mmengine.is_list_of(self.pts_scale_ratio, float) assert mmengine.is_list_of(self.pts_scale_ratio, float)
......
This diff is collapsed.
This diff is collapsed.
This diff is collapsed.
Markdown is supported
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment