Commit 7f9874aa authored by Xiangxu-0103's avatar Xiangxu-0103 Committed by ZwwWayne
Browse files

[Feature] Support FCAF3D on S3DIS dataset in `dev-1.x` branch (#1984)



* support fcaf3d for s3dis dataset

* Update convert_utils.py

* Update seg3d_dataset.py

* Delete compose.py

* fix import error

* use `mmengine.Compose`

* Update s3dis-3d.py

* Update fcaf3d_2xb8_s3dis-3d-5class.py

* Update s3dis_dataset.py

* update unittest for s3dis

* update docs

* use `mmcv.Compose` instead of `mmengine.Compose`

* update docstring

* fix s3dis preprocessing bug

* Add typehint

* Update config and fix s3dis dataset

* update typehit

* Update convert_utils.py

* Update README and metafile
Co-authored-by: default avatarTai-Wang <tab_wang@outlook.com>

fix __all__ keyword
parent e9149550
# dataset settings
dataset_type = 'S3DISDataset'
data_root = 'data/s3dis/'
metainfo = dict(classes=('table', 'chair', 'sofa', 'bookcase', 'board'))
train_area = [1, 2, 3, 4, 6]
test_area = 5
train_pipeline = [
dict(
type='LoadPointsFromFile',
coord_type='DEPTH',
shift_height=False,
use_color=True,
load_dim=6,
use_dim=[0, 1, 2, 3, 4, 5]),
dict(type='LoadAnnotations3D', with_bbox_3d=True, with_label_3d=True),
dict(type='PointSample', num_points=100000),
dict(
type='RandomFlip3D',
sync_2d=False,
flip_ratio_bev_horizontal=0.5,
flip_ratio_bev_vertical=0.5),
dict(
type='GlobalRotScaleTrans',
rot_range=[-0.087266, 0.087266],
scale_ratio_range=[0.9, 1.1],
translation_std=[.1, .1, .1],
shift_height=False),
dict(type='NormalizePointsColor', color_mean=None),
dict(
type='Pack3DDetInputs',
keys=['points', 'gt_bboxes_3d', 'gt_labels_3d'])
]
test_pipeline = [
dict(
type='LoadPointsFromFile',
coord_type='DEPTH',
shift_height=False,
use_color=True,
load_dim=6,
use_dim=[0, 1, 2, 3, 4, 5]),
dict(
type='MultiScaleFlipAug3D',
img_scale=(1333, 800),
pts_scale_ratio=1,
flip=False,
transforms=[
dict(
type='GlobalRotScaleTrans',
rot_range=[0, 0],
scale_ratio_range=[1., 1.],
translation_std=[0, 0, 0]),
dict(
type='RandomFlip3D',
sync_2d=False,
flip_ratio_bev_horizontal=0.5,
flip_ratio_bev_vertical=0.5),
dict(type='PointSample', num_points=100000),
dict(type='NormalizePointsColor', color_mean=None),
]),
dict(type='Pack3DDetInputs', keys=['points'])
]
train_dataloader = dict(
batch_size=8,
num_workers=4,
sampler=dict(type='DefaultSampler', shuffle=True),
dataset=dict(
type='RepeatDataset',
times=13,
dataset=dict(
type='ConcatDataset',
datasets=[
dict(
type=dataset_type,
data_root=data_root,
ann_file=f's3dis_infos_Area_{i}.pkl',
pipeline=train_pipeline,
filter_empty_gt=True,
metainfo=metainfo,
box_type_3d='Depth') for i in train_area
])))
val_dataloader = dict(
batch_size=1,
num_workers=1,
sampler=dict(type='DefaultSampler', shuffle=False),
dataset=dict(
type=dataset_type,
data_root=data_root,
ann_file=f's3dis_infos_Area_{test_area}.pkl',
pipeline=test_pipeline,
metainfo=metainfo,
test_mode=True,
box_type_3d='Depth'))
test_dataloader = dict(
batch_size=1,
num_workers=1,
sampler=dict(type='DefaultSampler', shuffle=False),
dataset=dict(
type=dataset_type,
data_root=data_root,
ann_file=f's3dis_infos_Area_{test_area}.pkl',
pipeline=test_pipeline,
metainfo=metainfo,
test_mode=True,
box_type_3d='Depth'))
val_evaluator = dict(type='IndoorMetric')
test_evaluator = val_evaluator
vis_backends = [dict(type='LocalVisBackend')]
visualizer = dict(
type='Det3DLocalVisualizer', vis_backends=vis_backends, name='visualizer')
......@@ -30,6 +30,12 @@ We implement FCAF3D and provide the result and checkpoints on the ScanNet and SU
| :------------------------------------------------: | :------: | :------------: | :----------: | :----------: | :-------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------: |
| [MinkResNet34](./fcaf3d_8x2_sunrgbd-3d-10class.py) | 6.3 | 15.6 | 63.8(63.8\*) | 47.3(48.2\*) | [model](https://download.openmmlab.com/mmdetection3d/v1.0.0_models/fcaf3d/fcaf3d_8x2_sunrgbd-3d-10class/fcaf3d_8x2_sunrgbd-3d-10class_20220805_165017.pth) \| [log](https://download.openmmlab.com/mmdetection3d/v1.0.0_models/fcaf3d/fcaf3d_8x2_sunrgbd-3d-10class/fcaf3d_8x2_sunrgbd-3d-10class_20220805_165017.log.json) |
### S3DIS
| Backbone | Mem (GB) | Inf time (fps) | AP@0.25 | AP@0.5 | Download |
| :----------------------------------------------: | :------: | :------------: | :----------: | :----------: | :-------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------: |
| [MinkResNet34](./fcaf3d_2xb8_s3dis-3d-5class.py) | 23.5 | 4.2 | 67.4(64.9\*) | 45.7(43.8\*) | [model](https://download.openmmlab.com/mmdetection3d/v1.0.0_models/fcaf3d/fcaf3d_8x2_s3dis-3d-5class/fcaf3d_8x2_s3dis-3d-5class_20220805_121957.pth) \| [log](https://download.openmmlab.com/mmdetection3d/v1.0.0_models/fcaf3d/fcaf3d_8x2_s3dis-3d-5class/fcaf3d_8x2_s3dis-3d-5class_20220805_121957.log.json) |
**Note**
- We report the results across 5 train runs followed by 5 test runs. * means the results reported in the paper.
......
_base_ = [
'../_base_/models/fcaf3d.py', '../_base_/default_runtime.py',
'../_base_/datasets/s3dis-3d.py'
]
model = dict(bbox_head=dict(num_classes=5))
optim_wrapper = dict(
type='OptimWrapper',
optimizer=dict(type='AdamW', lr=0.001, weight_decay=0.0001),
clip_grad=dict(max_norm=10, norm_type=2))
# learning rate
param_scheduler = dict(
type='MultiStepLR',
begin=0,
end=12,
by_epoch=True,
milestones=[8, 11],
gamma=0.1)
custom_hooks = [dict(type='EmptyCacheHook', after_iter=True)]
# training schedule for 1x
train_cfg = dict(type='EpochBasedTrainLoop', max_epochs=12, val_interval=12)
val_cfg = dict(type='ValLoop')
test_cfg = dict(type='TestLoop')
......@@ -42,3 +42,17 @@ Models:
AP@0.25: 63.76
AP@0.5: 47.31
Weights: https://download.openmmlab.com/mmdetection3d/v1.0.0_models/fcaf3d/fcaf3d_8x2_sunrgbd-3d-10class/fcaf3d_8x2_sunrgbd-3d-10class_20220805_165017.pth
- Name: fcaf3d_2xb8_s3dis-3d-5class
In Collection: FCAF3D
Config: configs/fcaf3d/fcaf3d_2xb8_s3dis-3d-5class.py
Metadata:
Training Data: S3DIS
Training Memory (GB): 23.5
Results:
- Task: 3D Object Detection
Dataset: S3DIS
Metrics:
AP@0.25: 67.36
AP@0.5: 45.74
Weights: https://download.openmmlab.com/mmdetection3d/v1.0.0_models/fcaf3d/fcaf3d_8x2_s3dis-3d-5class/fcaf3d_8x2_s3dis-3d-5class_20220805_121957.pth
......@@ -104,6 +104,10 @@ Please refer to [MonoFlex](https://github.com/open-mmlab/mmdetection3d/tree/v1.0
Please refer to [SA-SSD](https://github.com/open-mmlab/mmdetection3d/blob/master/configs/sassd) for details. We provide SA-SSD baselines on the KITTI dataset.
### FCAF3D
Please refer to [FCAF3D](https://github.com/open-mmlab/mmdetection3d/blob/master/configs/fcaf3d) for details. We provide FCAF3D baselines on the ScanNet, S3DIS, and SUN RGB-D datasets.
### Mixed Precision (FP16) Training
Please refer to [Mixed Precision (FP16) Training on PointPillars](https://github.com/open-mmlab/mmdetection3d/tree/v1.0.0.dev0/configs/pointpillars/hv_pointpillars_fpn_sbn-all_fp16_2x8_2x_nus-3d.py) for details.
# Copyright (c) OpenMMLab. All rights reserved.
import copy
from collections import OrderedDict
from typing import List, Optional, Tuple, Union
import numpy as np
from nuscenes import NuScenes
from nuscenes.utils.geometry_utils import view_points
from pyquaternion import Quaternion
from shapely.geometry import MultiPoint, box
......@@ -53,18 +53,19 @@ LyftNameMapping = {
}
def get_nuscenes_2d_boxes(nusc, sample_data_token: str,
visibilities: List[str]):
"""Get the 2d / mono3d annotation records for a given `sample_data_token of
nuscenes dataset.
def get_nuscenes_2d_boxes(nusc: NuScenes, sample_data_token: str,
visibilities: List[str]) -> List[dict]:
"""Get the 2d / mono3d annotation records for a given `sample_data_token`
of nuscenes dataset.
Args:
nusc (:obj:`NuScenes`): NuScenes class.
sample_data_token (str): Sample data token belonging to a camera
keyframe.
visibilities (list[str]): Visibility filter.
visibilities (List[str]): Visibility filter.
Return:
list[dict]: List of 2d annotation record that belongs to the input
List[dict]: List of 2d annotation record that belongs to the input
`sample_data_token`.
"""
......@@ -190,7 +191,7 @@ def get_kitti_style_2d_boxes(info: dict,
occluded: Tuple[int] = (0, 1, 2, 3),
annos: Optional[dict] = None,
mono3d: bool = True,
dataset: str = 'kitti'):
dataset: str = 'kitti') -> List[dict]:
"""Get the 2d / mono3d annotation records for a given info.
This function is used to get 2D/Mono3D annotations when loading annotations
......@@ -202,7 +203,7 @@ def get_kitti_style_2d_boxes(info: dict,
belong to. In KITTI, typically only CAM 2 will be used,
and in Waymo, multi cameras could be used.
Defaults to 2.
occluded (tuple[int]): Integer (0, 1, 2, 3) indicating occlusion state:
occluded (Tuple[int]): Integer (0, 1, 2, 3) indicating occlusion state:
0 = fully visible, 1 = partly occluded, 2 = largely occluded,
3 = unknown, -1 = DontCare.
Defaults to (0, 1, 2, 3).
......@@ -213,7 +214,7 @@ def get_kitti_style_2d_boxes(info: dict,
Defaults to 'kitti'.
Return:
list[dict]: List of 2d / mono3d annotation record that
List[dict]: List of 2d / mono3d annotation record that
belongs to the input camera id.
"""
# Get calibration information
......@@ -336,19 +337,19 @@ def convert_annos(info: dict, cam_idx: int) -> dict:
def post_process_coords(
corner_coords: List, imsize: Tuple[int, int] = (1600, 900)
) -> Union[Tuple[float, float, float, float], None]:
corner_coords: List[int], imsize: Tuple[int] = (1600, 900)
) -> Union[Tuple[float], None]:
"""Get the intersection of the convex hull of the reprojected bbox corners
and the image canvas, return None if no intersection.
Args:
corner_coords (list[int]): Corner coordinates of reprojected
corner_coords (List[int]): Corner coordinates of reprojected
bounding box.
imsize (tuple[int]): Size of the image canvas.
imsize (Tuple[int]): Size of the image canvas.
Defaults to (1600, 900).
Return:
tuple[float]: Intersection of the convex hull of the 2D box
Tuple[float] or None: Intersection of the convex hull of the 2D box
corners and the image canvas.
"""
polygon_from_2d_box = MultiPoint(corner_coords).convex_hull
......@@ -370,7 +371,7 @@ def post_process_coords(
def generate_record(ann_rec: dict, x1: float, y1: float, x2: float, y2: float,
dataset: str) -> OrderedDict:
dataset: str) -> Union[dict, None]:
"""Generate one 2D annotation record given various information on top of
the 2D bounding box coordinates.
......@@ -383,11 +384,11 @@ def generate_record(ann_rec: dict, x1: float, y1: float, x2: float, y2: float,
dataset (str): Name of dataset.
Returns:
dict: A sample 2d annotation record.
dict or None: A sample 2d annotation record.
- bbox_label (int): 2d box label id
- bbox_label_3d (int): 3d box label id
- bbox (list[float]): left x, top y, right x, bottom y of 2d box
- bbox (List[float]): left x, top y, right x, bottom y of 2d box
- bbox_3d_isvalid (bool): whether the box is valid
"""
......
......@@ -30,14 +30,14 @@ class Det3DDataset(BaseDataset):
information. Defaults to None.
data_prefix (dict): Prefix for training data. Defaults to
dict(pts='velodyne', img='').
pipeline (list[dict]): Pipeline used for data processing.
pipeline (List[dict]): Pipeline used for data processing.
Defaults to [].
modality (dict): Modality to specify the sensor data used as input,
it usually has following keys:
- use_camera: bool
- use_lidar: bool
Defaults to `dict(use_lidar=True, use_camera=False)`
Defaults to dict(use_lidar=True, use_camera=False).
default_cam_key (str, optional): The default camera name adopted.
Defaults to None.
box_type_3d (str): Type of 3D box of this dataset.
......@@ -78,7 +78,7 @@ class Det3DDataset(BaseDataset):
box_type_3d: dict = 'LiDAR',
filter_empty_gt: bool = True,
test_mode: bool = False,
load_eval_anns=True,
load_eval_anns: bool = True,
file_client_args: dict = dict(backend='disk'),
show_ins_var: bool = False,
**kwargs) -> None:
......@@ -158,7 +158,7 @@ class Det3DDataset(BaseDataset):
def _remove_dontcare(self, ann_info: dict) -> dict:
"""Remove annotations that do not need to be cared.
-1 indicate dontcare in MMDet3d.
-1 indicates dontcare in MMDet3d.
Args:
ann_info (dict): Dict of annotation infos. The
......@@ -186,7 +186,7 @@ class Det3DDataset(BaseDataset):
index (int): Index of the annotation data to get.
Returns:
dict: annotation information.
dict: Annotation information.
"""
data_info = self.get_data_info(index)
# test model
......@@ -197,7 +197,7 @@ class Det3DDataset(BaseDataset):
return ann_info
def parse_ann_info(self, info: dict) -> Optional[dict]:
def parse_ann_info(self, info: dict) -> Union[dict, None]:
"""Process the `instances` in data info to `ann_info`.
In `Custom3DDataset`, we simply concatenate all the field
......@@ -209,7 +209,7 @@ class Det3DDataset(BaseDataset):
info (dict): Info dict.
Returns:
dict | None: Processed `ann_info`
dict or None: Processed `ann_info`.
"""
# add s or gt prefix for most keys after concat
# we only process 3d annotations here, the corresponding
......@@ -327,7 +327,8 @@ class Det3DDataset(BaseDataset):
return info
def _show_ins_var(self, old_labels: np.ndarray, new_labels: torch.Tensor):
def _show_ins_var(self, old_labels: np.ndarray,
new_labels: torch.Tensor) -> None:
"""Show variation of the number of instances before and after through
the pipeline.
......@@ -356,7 +357,7 @@ class Det3DDataset(BaseDataset):
'The number of instances per category after and before '
f'through pipeline:\n{table.table}', 'current')
def prepare_data(self, index: int) -> Optional[dict]:
def prepare_data(self, index: int) -> Union[dict, None]:
"""Data preparation for both training and testing stage.
Called by `__getitem__` of dataset.
......@@ -365,7 +366,7 @@ class Det3DDataset(BaseDataset):
index (int): Index for accessing the target data.
Returns:
dict | None: Data dict of the corresponding index.
dict or None: Data dict of the corresponding index.
"""
ori_input_dict = self.get_data_info(index)
......
......@@ -18,10 +18,10 @@ class KittiDataset(Det3DDataset):
Args:
data_root (str): Path of dataset root.
ann_file (str): Path of annotation file.
pipeline (list[dict]): Pipeline used for data processing.
pipeline (List[dict]): Pipeline used for data processing.
Defaults to [].
modality (dict): Modality to specify the sensor data used as input.
Defaults to `dict(use_lidar=True)`.
Defaults to dict(use_lidar=True).
default_cam_key (str): The default camera name adopted.
Defaults to 'CAM2'.
box_type_3d (str): Type of 3D box of this dataset.
......@@ -47,7 +47,7 @@ class KittiDataset(Det3DDataset):
in `__getitem__`. Defaults to True.
test_mode (bool): Whether the dataset is in test mode.
Defaults to False.
pcd_limit_range (list[float]): The range of point cloud used to filter
pcd_limit_range (List[float]): The range of point cloud used to filter
invalid predicted boxes.
Defaults to [0, -40, -3, 70.4, 40, 0.0].
"""
......
......@@ -21,7 +21,7 @@ class LyftDataset(Det3DDataset):
Args:
data_root (str): Path of dataset root.
ann_file (str): Path of annotation file.
pipeline (list[dict]): Pipeline used for data processing.
pipeline (List[dict]): Pipeline used for data processing.
Defaults to [].
modality (dict): Modality to specify the sensor data used as input.
Defaults to dict(use_camera=False, use_lidar=True).
......
......@@ -178,7 +178,7 @@ class NuScenesDataset(Det3DDataset):
return ann_info
def parse_data_info(self, info: dict) -> dict:
def parse_data_info(self, info: dict) -> Union[List[dict], dict]:
"""Process the raw data info.
The only difference with it in `Det3DDataset`
......@@ -188,7 +188,7 @@ class NuScenesDataset(Det3DDataset):
info (dict): Raw info dict.
Returns:
dict: Has `ann_info` in training stage. And
List[dict] or dict: Has `ann_info` in training stage. And
all path has been converted to absolute path.
"""
if self.load_type == 'mv_image_based':
......
# Copyright (c) OpenMMLab. All rights reserved.
from os import path as osp
from typing import Callable, List, Optional, Union
from typing import Any, Callable, List, Optional, Tuple, Union
import numpy as np
......@@ -8,7 +8,6 @@ from mmdet3d.registry import DATASETS
from mmdet3d.structures import DepthInstance3DBoxes
from .det3d_dataset import Det3DDataset
from .seg3d_dataset import Seg3DDataset
from .transforms import Compose
@DATASETS.register_module()
......@@ -19,138 +18,132 @@ class S3DISDataset(Det3DDataset):
often train on 5 of them and test on the remaining one. The one for
test is Area_5 as suggested in `GSDN <https://arxiv.org/abs/2006.12356>`_.
To concatenate 5 areas during training
`mmdet.datasets.dataset_wrappers.ConcatDataset` should be used.
`mmengine.datasets.dataset_wrappers.ConcatDataset` should be used.
Args:
data_root (str): Path of dataset root.
ann_file (str): Path of annotation file.
pipeline (list[dict], optional): Pipeline used for data processing.
Defaults to None.
classes (tuple[str], optional): Classes used in the dataset.
Defaults to None.
modality (dict, optional): Modality to specify the sensor data used
as input. Defaults to None.
box_type_3d (str, optional): Type of 3D box of this dataset.
metainfo (dict, optional): Meta information for dataset, such as class
information. Defaults to None.
data_prefix (dict): Prefix for data. Defaults to
dict(pts='points',
pts_instance_mask='instance_mask',
pts_semantic_mask='semantic_mask').
pipeline (List[dict]): Pipeline used for data processing.
Defaults to [].
modality (dict): Modality to specify the sensor data used as input.
Defaults to dict(use_camera=False, use_lidar=True).
box_type_3d (str): Type of 3D box of this dataset.
Based on the `box_type_3d`, the dataset will encapsulate the box
to its original format then converted them to `box_type_3d`.
Defaults to 'Depth' in this dataset. Available options includes
Defaults to 'Depth' in this dataset. Available options includes:
- 'LiDAR': Box in LiDAR coordinates.
- 'Depth': Box in depth coordinates, usually for indoor dataset.
- 'Camera': Box in camera coordinates.
filter_empty_gt (bool, optional): Whether to filter empty GT.
Defaults to True.
test_mode (bool, optional): Whether the dataset is in test mode.
filter_empty_gt (bool): Whether to filter the data with empty GT.
If it's set to be True, the example with empty annotations after
data pipeline will be dropped and a random example will be chosen
in `__getitem__`. Defaults to True.
test_mode (bool): Whether the dataset is in test mode.
Defaults to False.
"""
classes = ('table', 'chair', 'sofa', 'bookcase', 'board')
METAINFO = {
'classes': ('table', 'chair', 'sofa', 'bookcase', 'board'),
# the valid ids of segmentation annotations
'seg_valid_class_ids': (7, 8, 9, 10, 11),
'seg_all_class_ids': tuple(range(1, 14)) # possibly with 'stair' class
}
def __init__(self,
data_root,
ann_file,
pipeline=None,
classes=None,
modality=None,
box_type_3d='Depth',
filter_empty_gt=True,
test_mode=False,
*kwargs):
data_root: str,
ann_file: str,
metainfo: Optional[dict] = None,
data_prefix: dict = dict(
pts='points',
pts_instance_mask='instance_mask',
pts_semantic_mask='semantic_mask'),
pipeline: List[Union[dict, Callable]] = [],
modality: dict = dict(use_camera=False, use_lidar=True),
box_type_3d: str = 'Depth',
filter_empty_gt: bool = True,
test_mode: bool = False,
**kwargs) -> None:
# construct seg_label_mapping for semantic mask
seg_max_cat_id = len(self.METAINFO['seg_all_class_ids'])
seg_valid_cat_ids = self.METAINFO['seg_valid_class_ids']
neg_label = len(seg_valid_cat_ids)
seg_label_mapping = np.ones(
seg_max_cat_id + 1, dtype=np.int) * neg_label
for cls_idx, cat_id in enumerate(seg_valid_cat_ids):
seg_label_mapping[cat_id] = cls_idx
self.seg_label_mapping = seg_label_mapping
super().__init__(
data_root=data_root,
ann_file=ann_file,
metainfo=metainfo,
data_prefix=data_prefix,
pipeline=pipeline,
classes=classes,
modality=modality,
box_type_3d=box_type_3d,
filter_empty_gt=filter_empty_gt,
test_mode=test_mode,
*kwargs)
**kwargs)
def get_ann_info(self, index):
"""Get annotation info according to the given index.
self.metainfo['seg_label_mapping'] = self.seg_label_mapping
assert 'use_camera' in self.modality and \
'use_lidar' in self.modality
assert self.modality['use_camera'] or self.modality['use_lidar']
def parse_data_info(self, info: dict) -> dict:
"""Process the raw data info.
Args:
index (int): Index of the annotation data to get.
info (dict): Raw info dict.
Returns:
dict: annotation information consists of the following keys:
- gt_bboxes_3d (:obj:`DepthInstance3DBoxes`):
3D ground truth bboxes
- gt_labels_3d (np.ndarray): Labels of ground truths.
- pts_instance_mask_path (str): Path of instance masks.
- pts_semantic_mask_path (str): Path of semantic masks.
dict: Has `ann_info` in training stage. And
all path has been converted to absolute path.
"""
# Use index to get the annos, thus the evalhook could also use this api
info = self.data_infos[index]
if info['annos']['gt_num'] != 0:
gt_bboxes_3d = info['annos']['gt_boxes_upright_depth'].astype(
np.float32) # k, 6
gt_labels_3d = info['annos']['class'].astype(np.int64)
else:
gt_bboxes_3d = np.zeros((0, 6), dtype=np.float32)
gt_labels_3d = np.zeros((0, ), dtype=np.int64)
# to target box structure
gt_bboxes_3d = DepthInstance3DBoxes(
gt_bboxes_3d,
box_dim=gt_bboxes_3d.shape[-1],
with_yaw=False,
origin=(0.5, 0.5, 0.5)).convert_to(self.box_mode_3d)
pts_instance_mask_path = osp.join(self.data_root,
info['pts_instance_mask_path'] = osp.join(
self.data_prefix.get('pts_instance_mask', ''),
info['pts_instance_mask_path'])
pts_semantic_mask_path = osp.join(self.data_root,
info['pts_semantic_mask_path'] = osp.join(
self.data_prefix.get('pts_semantic_mask', ''),
info['pts_semantic_mask_path'])
anns_results = dict(
gt_bboxes_3d=gt_bboxes_3d,
gt_labels_3d=gt_labels_3d,
pts_instance_mask_path=pts_instance_mask_path,
pts_semantic_mask_path=pts_semantic_mask_path)
return anns_results
info = super().parse_data_info(info)
# only be used in `PointSegClassMapping` in pipeline
# to map original semantic class to valid category ids.
info['seg_label_mapping'] = self.seg_label_mapping
return info
def get_data_info(self, index):
"""Get data info according to the given index.
def parse_ann_info(self, info: dict) -> dict:
"""Process the `instances` in data info to `ann_info`.
Args:
index (int): Index of the sample data to get.
info (dict): Info dict.
Returns:
dict: Data information that will be passed to the data
preprocessing transforms. It includes the following keys:
- pts_filename (str): Filename of point clouds.
- file_name (str): Filename of point clouds.
- ann_info (dict): Annotation info.
dict: Processed `ann_info`.
"""
info = self.data_infos[index]
pts_filename = osp.join(self.data_root, info['pts_path'])
input_dict = dict(pts_filename=pts_filename)
ann_info = super().parse_ann_info(info)
# empty gt
if ann_info is None:
ann_info = dict()
ann_info['gt_bboxes_3d'] = np.zeros((0, 6), dtype=np.float32)
ann_info['gt_labels_3d'] = np.zeros((0, ), dtype=np.int64)
# to target box structure
if not self.test_mode:
annos = self.get_ann_info(index)
input_dict['ann_info'] = annos
if self.filter_empty_gt and ~(annos['gt_labels_3d'] != -1).any():
return None
return input_dict
def _build_default_pipeline(self):
"""Build the default pipeline for this dataset."""
pipeline = [
dict(
type='LoadPointsFromFile',
coord_type='DEPTH',
shift_height=False,
load_dim=6,
use_dim=[0, 1, 2, 3, 4, 5]),
dict(
type='DefaultFormatBundle3D',
class_names=self.classes,
with_label=False),
dict(type='Collect3D', keys=['points'])
]
return Compose(pipeline)
ann_info['gt_bboxes_3d'] = DepthInstance3DBoxes(
ann_info['gt_bboxes_3d'],
box_dim=ann_info['gt_bboxes_3d'].shape[-1],
with_yaw=False,
origin=(0.5, 0.5, 0.5)).convert_to(self.box_mode_3d)
return ann_info
class _S3DISSegDataset(Seg3DDataset):
......@@ -171,16 +164,16 @@ class _S3DISSegDataset(Seg3DDataset):
metainfo (dict, optional): Meta information for dataset, such as class
information. Defaults to None.
data_prefix (dict): Prefix for training data. Defaults to
dict(pts='points', instance_mask='', semantic_mask='').
pipeline (list[dict]): Pipeline used for data processing.
dict(pts='points', pts_instance_mask='', pts_semantic_mask='').
pipeline (List[dict]): Pipeline used for data processing.
Defaults to [].
modality (dict): Modality to specify the sensor data used as input.
Defaults to dict(use_lidar=True, use_camera=False).
ignore_index (int, optional): The label index to be ignored, e.g.
unannotated points. If None is given, set to len(self.CLASSES) to
unannotated points. If None is given, set to len(self.classes) to
be consistent with PointSegClassMapping function in pipeline.
Defaults to None.
scene_idxs (np.ndarray | str, optional): Precomputed index to load
scene_idxs (np.ndarray or str, optional): Precomputed index to load
data. For scenes with many points, we may sample it several times.
Defaults to None.
test_mode (bool): Whether the dataset is in test mode.
......@@ -205,7 +198,7 @@ class _S3DISSegDataset(Seg3DDataset):
ann_file: str = '',
metainfo: Optional[dict] = None,
data_prefix: dict = dict(
pts='points', img='', instance_mask='', semantic_mask=''),
pts='points', pts_instance_mask='', pts_semantic_mask=''),
pipeline: List[Union[dict, Callable]] = [],
modality: dict = dict(use_lidar=True, use_camera=False),
ignore_index: Optional[int] = None,
......@@ -224,7 +217,8 @@ class _S3DISSegDataset(Seg3DDataset):
test_mode=test_mode,
**kwargs)
def get_scene_idxs(self, scene_idxs):
def get_scene_idxs(self, scene_idxs: Union[np.ndarray, str,
None]) -> np.ndarray:
"""Compute scene_idxs for data sampling.
We sample more times for scenes with more points.
......@@ -252,21 +246,21 @@ class S3DISSegDataset(_S3DISSegDataset):
Args:
data_root (str, optional): Path of dataset root. Defaults to None.
ann_files (list[str]): Path of several annotation files.
ann_files (List[str]): Path of several annotation files.
Defaults to ''.
metainfo (dict, optional): Meta information for dataset, such as class
information. Defaults to None.
data_prefix (dict): Prefix for training data. Defaults to
dict(pts='points', instance_mask='', semantic_mask='').
pipeline (list[dict]): Pipeline used for data processing.
dict(pts='points', pts_instance_mask='', pts_semantic_mask='').
pipeline (List[dict]): Pipeline used for data processing.
Defaults to [].
modality (dict): Modality to specify the sensor data used as input.
Defaults to dict(use_lidar=True, use_camera=False).
ignore_index (int, optional): The label index to be ignored, e.g.
unannotated points. If None is given, set to len(self.CLASSES) to
unannotated points. If None is given, set to len(self.classes) to
be consistent with PointSegClassMapping function in pipeline.
Defaults to None.
scene_idxs (list[np.ndarray] | list[str], optional): Precomputed index
scene_idxs (List[np.ndarray] | List[str], optional): Precomputed index
to load data. For scenes with many points, we may sample it
several times. Defaults to None.
test_mode (bool): Whether the dataset is in test mode.
......@@ -278,7 +272,7 @@ class S3DISSegDataset(_S3DISSegDataset):
ann_files: List[str] = '',
metainfo: Optional[dict] = None,
data_prefix: dict = dict(
pts='points', img='', instance_mask='', semantic_mask=''),
pts='points', pts_instance_mask='', pts_semantic_mask=''),
pipeline: List[Union[dict, Callable]] = [],
modality: dict = dict(use_lidar=True, use_camera=False),
ignore_index: Optional[int] = None,
......@@ -302,7 +296,6 @@ class S3DISSegDataset(_S3DISSegDataset):
ignore_index=ignore_index,
scene_idxs=scene_idxs[0],
test_mode=test_mode,
serialize_data=False,
**kwargs)
datasets = [
......@@ -326,29 +319,34 @@ class S3DISSegDataset(_S3DISSegDataset):
if not self.test_mode:
self._set_group_flag()
def concat_data_list(self, data_lists: List[List[dict]]) -> List[dict]:
def concat_data_list(self, data_lists: List[List[dict]]) -> None:
"""Concat data_list from several datasets to form self.data_list.
Args:
data_lists (list[list[dict]])
data_lists (List[List[dict]]): List of dict containing
annotation information.
"""
self.data_list = [
data for data_list in data_lists for data in data_list
]
@staticmethod
def _duplicate_to_list(x, num):
def _duplicate_to_list(x: Any, num: int) -> list:
"""Repeat x `num` times to form a list."""
return [x for _ in range(num)]
def _check_ann_files(self, ann_file):
def _check_ann_files(
self, ann_file: Union[List[str], Tuple[str], str]) -> List[str]:
"""Make ann_files as list/tuple."""
# ann_file could be str
if not isinstance(ann_file, (list, tuple)):
ann_file = self._duplicate_to_list(ann_file, 1)
return ann_file
def _check_scene_idxs(self, scene_idx, num):
def _check_scene_idxs(self, scene_idx: Union[str, List[Union[list, tuple,
np.ndarray]],
List[str], None],
num: int) -> List[np.ndarray]:
"""Make scene_idxs as list/tuple."""
if scene_idx is None:
return self._duplicate_to_list(scene_idx, num)
......
......@@ -27,9 +27,9 @@ class ScanNetDataset(Det3DDataset):
information. Defaults to None.
data_prefix (dict): Prefix for data. Defaults to
dict(pts='points',
pts_isntance_mask='instance_mask',
pts_instance_mask='instance_mask',
pts_semantic_mask='semantic_mask').
pipeline (list[dict]): Pipeline used for data processing.
pipeline (List[dict]): Pipeline used for data processing.
Defaults to [].
modality (dict): Modality to specify the sensor data used as input.
Defaults to dict(use_camera=False, use_lidar=True).
......@@ -185,19 +185,22 @@ class ScanNetSegDataset(Seg3DDataset):
Args:
data_root (str, optional): Path of dataset root. Defaults to None.
ann_file (str): Path of annotation file. Defaults to ''.
pipeline (list[dict]): Pipeline used for data processing.
pipeline (List[dict]): Pipeline used for data processing.
Defaults to [].
metainfo (dict, optional): Meta information for dataset, such as class
information. Defaults to None.
data_prefix (dict): Prefix for training data. Defaults to
dict(pts='velodyne', img='', instance_mask='', semantic_mask='').
dict(pts='points',
img='',
pts_instance_mask='',
pts_semantic_mask='').
modality (dict): Modality to specify the sensor data used as input.
Defaults to dict(use_lidar=True, use_camera=False).
ignore_index (int, optional): The label index to be ignored, e.g.
unannotated points. If None is given, set to len(self.CLASSES) to
unannotated points. If None is given, set to len(self.classes) to
be consistent with PointSegClassMapping function in pipeline.
Defaults to None.
scene_idxs (np.ndarray | str, optional): Precomputed index to load
scene_idxs (np.ndarray or str, optional): Precomputed index to load
data. For scenes with many points, we may sample it several times.
Defaults to None.
test_mode (bool): Whether the dataset is in test mode.
......@@ -242,7 +245,10 @@ class ScanNetSegDataset(Seg3DDataset):
ann_file: str = '',
metainfo: Optional[dict] = None,
data_prefix: dict = dict(
pts='points', img='', instance_mask='', semantic_mask=''),
pts='points',
img='',
pts_instance_mask='',
pts_semantic_mask=''),
pipeline: List[Union[dict, Callable]] = [],
modality: dict = dict(use_lidar=True, use_camera=False),
ignore_index: Optional[int] = None,
......@@ -261,7 +267,8 @@ class ScanNetSegDataset(Seg3DDataset):
test_mode=test_mode,
**kwargs)
def get_scene_idxs(self, scene_idxs):
def get_scene_idxs(self, scene_idxs: Union[np.ndarray, str,
None]) -> np.ndarray:
"""Compute scene_idxs for data sampling.
We sample more times for scenes with more points.
......@@ -315,7 +322,10 @@ class ScanNetInstanceSegDataset(Seg3DDataset):
ann_file: str = '',
metainfo: Optional[dict] = None,
data_prefix: dict = dict(
pts='points', img='', instance_mask='', semantic_mask=''),
pts='points',
img='',
pts_instance_mask='',
pts_semantic_mask=''),
pipeline: List[Union[dict, Callable]] = [],
modality: dict = dict(use_lidar=True, use_camera=False),
test_mode: bool = False,
......
# Copyright (c) OpenMMLab. All rights reserved.
from os import path as osp
from typing import Callable, Dict, List, Optional, Sequence, Union
from typing import Callable, List, Optional, Sequence, Union
import mmengine
import numpy as np
......@@ -21,8 +21,11 @@ class Seg3DDataset(BaseDataset):
metainfo (dict, optional): Meta information for dataset, such as class
information. Defaults to None.
data_prefix (dict): Prefix for training data. Defaults to
dict(pts='velodyne', img='', instance_mask='', semantic_mask='').
pipeline (list[dict]): Pipeline used for data processing.
dict(pts='points',
img='',
pts_instance_mask='',
pts_semantic_mask='').
pipeline (List[dict]): Pipeline used for data processing.
Defaults to [].
modality (dict): Modality to specify the sensor data used
as input, it usually has following keys:
......@@ -34,15 +37,15 @@ class Seg3DDataset(BaseDataset):
unannotated points. If None is given, set to len(self.classes) to
be consistent with PointSegClassMapping function in pipeline.
Defaults to None.
scene_idxs (np.ndarray | str, optional): Precomputed index to load
scene_idxs (np.ndarray or str, optional): Precomputed index to load
data. For scenes with many points, we may sample it several times.
Defaults to None.
test_mode (bool): Whether the dataset is in test mode.
Defaults to False.
serialize_data (bool, optional): Whether to hold memory using
serialized objects, when enabled, data loader workers can use
shared RAM from master process instead of making a copy. Defaults
to False for 3D Segmentation datasets.
serialize_data (bool): Whether to hold memory using serialized objects,
when enabled, data loader workers can use shared RAM from master
process instead of making a copy.
Defaults to False for 3D Segmentation datasets.
load_eval_anns (bool): Whether to load annotations in test_mode,
the annotation will be save in `eval_ann_infos`, which can be used
in Evaluator. Defaults to True.
......@@ -64,13 +67,13 @@ class Seg3DDataset(BaseDataset):
pts='points',
img='',
pts_instance_mask='',
pts_emantic_mask=''),
pts_semantic_mask=''),
pipeline: List[Union[dict, Callable]] = [],
modality: dict = dict(use_lidar=True, use_camera=False),
ignore_index: Optional[int] = None,
scene_idxs: Optional[Union[str, np.ndarray]] = None,
test_mode: bool = False,
serialize_data=False,
serialize_data: bool = False,
load_eval_anns: bool = True,
file_client_args: dict = dict(backend='disk'),
**kwargs) -> None:
......@@ -132,8 +135,7 @@ class Seg3DDataset(BaseDataset):
self._set_group_flag()
def get_label_mapping(self,
new_classes: Optional[Sequence] = None
) -> Union[Dict, None]:
new_classes: Optional[Sequence] = None) -> tuple:
"""Get label mapping.
The ``label_mapping`` is a dictionary, its keys are the old label ids
......@@ -143,8 +145,8 @@ class Seg3DDataset(BaseDataset):
None, `label_mapping` is not None.
Args:
new_classes (list, tuple, optional): The new classes name from
metainfo. Default to None.
new_classes (list or tuple, optional): The new classes name from
metainfo. Defaults to None.
Returns:
tuple: The mapping from old classes in cls.METAINFO to
......@@ -190,7 +192,8 @@ class Seg3DDataset(BaseDataset):
return label_mapping, label2cat, valid_class_ids
def _update_palette(self, new_classes, palette) -> list:
def _update_palette(self, new_classes: list, palette: Union[None,
list]) -> list:
"""Update palette according to metainfo.
If length of palette is equal to classes, just return the palette.
......@@ -264,7 +267,8 @@ class Seg3DDataset(BaseDataset):
return info
def get_scene_idxs(self, scene_idxs):
def get_scene_idxs(self, scene_idxs: Union[None, str,
np.ndarray]) -> np.ndarray:
"""Compute scene_idxs for data sampling.
We sample more times for scenes with more points.
......@@ -286,7 +290,7 @@ class Seg3DDataset(BaseDataset):
return scene_idxs.astype(np.int32)
def _set_group_flag(self):
def _set_group_flag(self) -> None:
"""Set flag according to image aspect ratio.
Images with aspect ratio greater than 1 will be set as group 1,
......
......@@ -21,8 +21,11 @@ class SemanticKITTIDataset(Seg3DDataset):
metainfo (dict, optional): Meta information for dataset, such as class
information. Defaults to None.
data_prefix (dict): Prefix for training data. Defaults to
dict(pts='points', img='', instance_mask='', semantic_mask='').
pipeline (list[dict]): Pipeline used for data processing.
dict(pts='points',
img='',
pts_instance_mask='',
pts_semantic_mask='').
pipeline (List[dict]): Pipeline used for data processing.
Defaults to [].
modality (dict): Modality to specify the sensor data used as input,
it usually has following keys:
......@@ -31,10 +34,10 @@ class SemanticKITTIDataset(Seg3DDataset):
- use_lidar: bool
Defaults to dict(use_lidar=True, use_camera=False).
ignore_index (int, optional): The label index to be ignored, e.g.
unannotated points. If None is given, set to len(self.CLASSES) to
unannotated points. If None is given, set to len(self.classes) to
be consistent with PointSegClassMapping function in pipeline.
Defaults to None.
scene_idxs (np.ndarray | str, optional): Precomputed index to load
scene_idxs (np.ndarray or str, optional): Precomputed index to load
data. For scenes with many points, we may sample it several times.
Defaults to None.
test_mode (bool): Whether the dataset is in test mode.
......@@ -56,7 +59,10 @@ class SemanticKITTIDataset(Seg3DDataset):
ann_file: str = '',
metainfo: Optional[dict] = None,
data_prefix: dict = dict(
pts='points', img='', instance_mask='', semantic_mask=''),
pts='points',
img='',
pts_instance_mask='',
pts_semantic_mask=''),
pipeline: List[Union[dict, Callable]] = [],
modality: dict = dict(use_lidar=True, use_camera=False),
ignore_index: Optional[int] = None,
......
......@@ -26,7 +26,7 @@ class SUNRGBDDataset(Det3DDataset):
information. Defaults to None.
data_prefix (dict): Prefix for data. Defaults to
dict(pts='points',img='sunrgbd_trainval').
pipeline (list[dict]): Pipeline used for data processing.
pipeline (List[dict]): Pipeline used for data processing.
Defaults to [].
modality (dict): Modality to specify the sensor data used as input.
Defaults to dict(use_camera=True, use_lidar=True).
......
# Copyright (c) OpenMMLab. All rights reserved.
from .compose import Compose
from .dbsampler import DataBaseSampler
from .formating import Pack3DDetInputs
from .loading import (LoadAnnotations3D, LoadImageFromFileMono3D,
......@@ -21,15 +20,13 @@ from .transforms_3d import (AffineResize, BackgroundPointsFilter,
__all__ = [
'ObjectSample', 'RandomFlip3D', 'ObjectNoise', 'GlobalRotScaleTrans',
'PointShuffle', 'ObjectRangeFilter', 'PointsRangeFilter',
'Pack3DDetInputs',
'Compose', 'LoadMultiViewImageFromFiles', 'LoadPointsFromFile',
'DataBaseSampler',
'NormalizePointsColor', 'LoadAnnotations3D', 'IndoorPointSample',
'PointSample', 'PointSegClassMapping', 'MultiScaleFlipAug3D',
'LoadPointsFromMultiSweeps', 'BackgroundPointsFilter',
'VoxelBasedPointSampler', 'GlobalAlignment', 'IndoorPatchPointSample',
'LoadImageFromFileMono3D', 'ObjectNameFilter', 'RandomDropPointsColor',
'RandomJitterPoints', 'AffineResize', 'RandomShiftScale',
'LoadPointsFromDict', 'Resize3D', 'RandomResize3D',
'Pack3DDetInputs', 'LoadMultiViewImageFromFiles', 'LoadPointsFromFile',
'DataBaseSampler', 'NormalizePointsColor', 'LoadAnnotations3D',
'IndoorPointSample', 'PointSample', 'PointSegClassMapping',
'MultiScaleFlipAug3D', 'LoadPointsFromMultiSweeps',
'BackgroundPointsFilter', 'VoxelBasedPointSampler', 'GlobalAlignment',
'IndoorPatchPointSample', 'LoadImageFromFileMono3D', 'ObjectNameFilter',
'RandomDropPointsColor', 'RandomJitterPoints', 'AffineResize',
'RandomShiftScale', 'LoadPointsFromDict', 'Resize3D', 'RandomResize3D',
'MultiViewWrapper', 'PhotoMetricDistortion3D'
]
# Copyright (c) OpenMMLab. All rights reserved.
import collections
from mmdet3d.registry import TRANSFORMS
@TRANSFORMS.register_module()
class Compose:
"""Compose multiple transforms sequentially.
Args:
transforms (Sequence[dict | callable]): Sequence of transform object or
config dict to be composed.
"""
def __init__(self, transforms):
assert isinstance(transforms, collections.abc.Sequence)
self.transforms = []
for transform in transforms:
if isinstance(transform, dict):
transform = TRANSFORMS.build(transform)
self.transforms.append(transform)
elif callable(transform):
self.transforms.append(transform)
else:
raise TypeError('transform must be callable or a dict')
def __call__(self, data):
"""Call function to apply transforms sequentially.
Args:
data (dict): A result dict contains the data to transform.
Returns:
dict: Transformed data.
"""
for t in self.transforms:
data = t(data)
if data is None:
return None
return data
def __repr__(self):
format_string = self.__class__.__name__ + '('
for t in self.transforms:
str_ = t.__repr__()
if 'Compose(' in str_:
str_ = str_.replace('\n', '\n ')
format_string += '\n'
format_string += f' {str_}'
format_string += '\n)'
return format_string
......@@ -6,7 +6,7 @@ from typing import List, Optional, Tuple, Union
import cv2
import mmcv
import numpy as np
from mmcv.transforms import BaseTransform, RandomResize, Resize
from mmcv.transforms import BaseTransform, Compose, RandomResize, Resize
from mmdet.datasets.transforms import (PhotoMetricDistortion, RandomCrop,
RandomFlip)
from mmengine import is_tuple_of
......@@ -17,7 +17,6 @@ from mmdet3d.structures import (CameraInstance3DBoxes, DepthInstance3DBoxes,
LiDARInstance3DBoxes)
from mmdet3d.structures.ops import box_np_ops
from mmdet3d.structures.points import BasePoints
from .compose import Compose
from .data_augment_utils import noise_per_object_v3_
......
......@@ -31,7 +31,7 @@ class WaymoDataset(KittiDataset):
CAM_FRONT_LEFT='image_2',
CAM_SIDE_RIGHT='image_3',
CAM_SIDE_LEFT='image_4')
pipeline (list[dict]): Pipeline used for data processing.
pipeline (List[dict]): Pipeline used for data processing.
Defaults to [].
modality (dict): Modality to specify the sensor data used
as input. Defaults to dict(use_lidar=True).
......@@ -60,7 +60,7 @@ class WaymoDataset(KittiDataset):
in `__getitem__`. Defaults to True.
test_mode (bool): Whether the dataset is in test mode.
Defaults to False.
pcd_limit_range (list[float]): The range of point cloud
pcd_limit_range (List[float]): The range of point cloud
used to filter invalid predicted boxes.
Defaults to [-85, -85, -5, 85, 85, 5].
cam_sync_instances (bool): If use the camera sync label
......@@ -123,7 +123,7 @@ class WaymoDataset(KittiDataset):
info (dict): Data information of single data sample.
Returns:
dict: annotation information consists of the following keys:
dict: Annotation information consists of the following keys:
- bboxes_3d (:obj:`LiDARInstance3DBoxes`):
3D ground truth bboxes.
......@@ -188,7 +188,7 @@ class WaymoDataset(KittiDataset):
data_list = data_list[::self.load_interval]
return data_list
def parse_data_info(self, info: dict) -> dict:
def parse_data_info(self, info: dict) -> Union[dict, List[dict]]:
"""if task is lidar or multiview det, use super() method elif task is
mono3d, split the info from frame-wise to img-wise."""
......
......@@ -7,7 +7,7 @@ from .nuscenes_metric import NuScenesMetric # noqa: F401,F403
from .seg_metric import SegMetric # noqa: F401,F403
from .waymo_metric import WaymoMetric # noqa: F401,F403
__all_ = [
__all__ = [
'KittiMetric', 'NuScenesMetric', 'IndoorMetric', 'LyftMetric', 'SegMetric',
'InstanceSegMetric', 'WaymoMetric'
]
Markdown is supported
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment