Unverified Commit 6d899956 authored by Danila Rukhovich's avatar Danila Rukhovich Committed by GitHub
Browse files

[Feature] Add S3DIS dataset for 3D object detection (#835)

* add s3dis dataset

* fix comments; remove _S3DISDataset if favour of ConcatDataset

* fix comments

* fix comments

* fix link in __init__/py

* remove unused import

* add dataset tests

* add pytest.skip
parent a2e7387e
# dataset settings
dataset_type = 'S3DISDataset'
data_root = './data/s3dis/'
class_names = ('table', 'chair', 'sofa', 'bookcase', 'board')
train_area = [1, 2, 3, 4, 6]
test_area = 5
train_pipeline = [
dict(
type='LoadPointsFromFile',
coord_type='DEPTH',
shift_height=True,
load_dim=6,
use_dim=[0, 1, 2, 3, 4, 5]),
dict(type='LoadAnnotations3D', with_bbox_3d=True, with_label_3d=True),
dict(type='PointSample', num_points=40000),
dict(
type='RandomFlip3D',
sync_2d=False,
flip_ratio_bev_horizontal=0.5,
flip_ratio_bev_vertical=0.5),
dict(
type='GlobalRotScaleTrans',
# following ScanNet dataset the rotation range is 5 degrees
rot_range=[-0.087266, 0.087266],
scale_ratio_range=[1.0, 1.0],
shift_height=True),
dict(type='DefaultFormatBundle3D', class_names=class_names),
dict(type='Collect3D', keys=['points', 'gt_bboxes_3d', 'gt_labels_3d'])
]
test_pipeline = [
dict(
type='LoadPointsFromFile',
coord_type='DEPTH',
shift_height=True,
load_dim=6,
use_dim=[0, 1, 2, 3, 4, 5]),
dict(
type='MultiScaleFlipAug3D',
img_scale=(1333, 800),
pts_scale_ratio=1,
flip=False,
transforms=[
dict(
type='GlobalRotScaleTrans',
rot_range=[0, 0],
scale_ratio_range=[1., 1.],
translation_std=[0, 0, 0]),
dict(
type='RandomFlip3D',
sync_2d=False,
flip_ratio_bev_horizontal=0.5,
flip_ratio_bev_vertical=0.5),
dict(type='PointSample', num_points=40000),
dict(
type='DefaultFormatBundle3D',
class_names=class_names,
with_label=False),
dict(type='Collect3D', keys=['points'])
])
]
# construct a pipeline for data and gt loading in show function
# please keep its loading function consistent with test_pipeline (e.g. client)
eval_pipeline = [
dict(
type='LoadPointsFromFile',
coord_type='DEPTH',
shift_height=False,
load_dim=6,
use_dim=[0, 1, 2, 3, 4, 5]),
dict(
type='DefaultFormatBundle3D',
class_names=class_names,
with_label=False),
dict(type='Collect3D', keys=['points'])
]
data = dict(
samples_per_gpu=8,
workers_per_gpu=4,
train=dict(
type='RepeatDataset',
times=5,
dataset=dict(
type='ConcatDataset',
datasets=[
dict(
type=dataset_type,
data_root=data_root,
ann_file=data_root + f's3dis_infos_Area_{i}.pkl',
pipeline=train_pipeline,
filter_empty_gt=False,
classes=class_names,
box_type_3d='Depth') for i in train_area
],
separate_eval=False)),
val=dict(
type=dataset_type,
data_root=data_root,
ann_file=data_root + f's3dis_infos_Area_{test_area}.pkl',
pipeline=test_pipeline,
classes=class_names,
test_mode=True,
box_type_3d='Depth'),
test=dict(
type=dataset_type,
data_root=data_root,
ann_file=data_root + f's3dis_infos_Area_{test_area}.pkl',
pipeline=test_pipeline,
classes=class_names,
test_mode=True,
box_type_3d='Depth'))
evaluation = dict(pipeline=eval_pipeline)
...@@ -18,7 +18,7 @@ from .pipelines import (BackgroundPointsFilter, GlobalAlignment, ...@@ -18,7 +18,7 @@ from .pipelines import (BackgroundPointsFilter, GlobalAlignment,
RandomFlip3D, RandomJitterPoints, RandomFlip3D, RandomJitterPoints,
VoxelBasedPointSampler) VoxelBasedPointSampler)
# yapf: enable # yapf: enable
from .s3dis_dataset import S3DISSegDataset from .s3dis_dataset import S3DISDataset, S3DISSegDataset
from .scannet_dataset import ScanNetDataset, ScanNetSegDataset from .scannet_dataset import ScanNetDataset, ScanNetSegDataset
from .semantickitti_dataset import SemanticKITTIDataset from .semantickitti_dataset import SemanticKITTIDataset
from .sunrgbd_dataset import SUNRGBDDataset from .sunrgbd_dataset import SUNRGBDDataset
...@@ -26,12 +26,11 @@ from .utils import get_loading_pipeline ...@@ -26,12 +26,11 @@ from .utils import get_loading_pipeline
from .waymo_dataset import WaymoDataset from .waymo_dataset import WaymoDataset
__all__ = [ __all__ = [
'KittiDataset', 'KittiMonoDataset', 'GroupSampler', 'KittiDataset', 'KittiMonoDataset', 'build_dataloader', 'DATASETS',
'DistributedGroupSampler', 'build_dataloader', 'RepeatFactorDataset', 'build_dataset', 'NuScenesDataset', 'NuScenesMonoDataset', 'LyftDataset',
'DATASETS', 'build_dataset', 'CocoDataset', 'NuScenesDataset', 'ObjectSample', 'RandomFlip3D', 'ObjectNoise', 'GlobalRotScaleTrans',
'NuScenesMonoDataset', 'LyftDataset', 'ObjectSample', 'RandomFlip3D', 'PointShuffle', 'ObjectRangeFilter', 'PointsRangeFilter',
'ObjectNoise', 'GlobalRotScaleTrans', 'PointShuffle', 'ObjectRangeFilter', 'LoadPointsFromFile', 'S3DISSegDataset', 'S3DISDataset',
'PointsRangeFilter', 'Collect3D', 'LoadPointsFromFile', 'S3DISSegDataset',
'NormalizePointsColor', 'IndoorPatchPointSample', 'IndoorPointSample', 'NormalizePointsColor', 'IndoorPatchPointSample', 'IndoorPointSample',
'PointSample', 'LoadAnnotations3D', 'GlobalAlignment', 'SUNRGBDDataset', 'PointSample', 'LoadAnnotations3D', 'GlobalAlignment', 'SUNRGBDDataset',
'ScanNetDataset', 'ScanNetSegDataset', 'SemanticKITTIDataset', 'ScanNetDataset', 'ScanNetSegDataset', 'SemanticKITTIDataset',
......
...@@ -2,14 +2,154 @@ import numpy as np ...@@ -2,14 +2,154 @@ import numpy as np
from os import path as osp from os import path as osp
from mmdet3d.core import show_seg_result from mmdet3d.core import show_seg_result
from mmdet3d.core.bbox import DepthInstance3DBoxes
from mmdet.datasets import DATASETS from mmdet.datasets import DATASETS
from mmseg.datasets import DATASETS as SEG_DATASETS from mmseg.datasets import DATASETS as SEG_DATASETS
from .custom_3d import Custom3DDataset
from .custom_3d_seg import Custom3DSegDataset from .custom_3d_seg import Custom3DSegDataset
from .pipelines import Compose from .pipelines import Compose
@DATASETS.register_module() @DATASETS.register_module()
@SEG_DATASETS.register_module() class S3DISDataset(Custom3DDataset):
r"""S3DIS Dataset for Detection Task.
This class is the inner dataset for S3DIS. Since S3DIS has 6 areas, we
often train on 5 of them and test on the remaining one. The one for
test is Area_5 as suggested in `GSDN <https://arxiv.org/abs/2006.12356>`_.
To concatenate 5 areas during training
`mmdet.datasets.dataset_wrappers.ConcatDataset` should be used.
Args:
data_root (str): Path of dataset root.
ann_file (str): Path of annotation file.
pipeline (list[dict], optional): Pipeline used for data processing.
Defaults to None.
classes (tuple[str], optional): Classes used in the dataset.
Defaults to None.
modality (dict, optional): Modality to specify the sensor data used
as input. Defaults to None.
box_type_3d (str, optional): Type of 3D box of this dataset.
Based on the `box_type_3d`, the dataset will encapsulate the box
to its original format then converted them to `box_type_3d`.
Defaults to 'Depth' in this dataset. Available options includes
- 'LiDAR': Box in LiDAR coordinates.
- 'Depth': Box in depth coordinates, usually for indoor dataset.
- 'Camera': Box in camera coordinates.
filter_empty_gt (bool, optional): Whether to filter empty GT.
Defaults to True.
test_mode (bool, optional): Whether the dataset is in test mode.
Defaults to False.
"""
CLASSES = ('table', 'chair', 'sofa', 'bookcase', 'board')
def __init__(self,
data_root,
ann_file,
pipeline=None,
classes=None,
modality=None,
box_type_3d='Depth',
filter_empty_gt=True,
test_mode=False):
super().__init__(
data_root=data_root,
ann_file=ann_file,
pipeline=pipeline,
classes=classes,
modality=modality,
box_type_3d=box_type_3d,
filter_empty_gt=filter_empty_gt,
test_mode=test_mode)
def get_ann_info(self, index):
"""Get annotation info according to the given index.
Args:
index (int): Index of the annotation data to get.
Returns:
dict: annotation information consists of the following keys:
- gt_bboxes_3d (:obj:`DepthInstance3DBoxes`):
3D ground truth bboxes
- gt_labels_3d (np.ndarray): Labels of ground truths.
- pts_instance_mask_path (str): Path of instance masks.
- pts_semantic_mask_path (str): Path of semantic masks.
"""
# Use index to get the annos, thus the evalhook could also use this api
info = self.data_infos[index]
if info['annos']['gt_num'] != 0:
gt_bboxes_3d = info['annos']['gt_boxes_upright_depth'].astype(
np.float32) # k, 6
gt_labels_3d = info['annos']['class'].astype(np.long)
else:
gt_bboxes_3d = np.zeros((0, 6), dtype=np.float32)
gt_labels_3d = np.zeros((0, ), dtype=np.long)
# to target box structure
gt_bboxes_3d = DepthInstance3DBoxes(
gt_bboxes_3d,
box_dim=gt_bboxes_3d.shape[-1],
with_yaw=False,
origin=(0.5, 0.5, 0.5)).convert_to(self.box_mode_3d)
pts_instance_mask_path = osp.join(self.data_root,
info['pts_instance_mask_path'])
pts_semantic_mask_path = osp.join(self.data_root,
info['pts_semantic_mask_path'])
anns_results = dict(
gt_bboxes_3d=gt_bboxes_3d,
gt_labels_3d=gt_labels_3d,
pts_instance_mask_path=pts_instance_mask_path,
pts_semantic_mask_path=pts_semantic_mask_path)
return anns_results
def get_data_info(self, index):
"""Get data info according to the given index.
Args:
index (int): Index of the sample data to get.
Returns:
dict: Data information that will be passed to the data
preprocessing pipelines. It includes the following keys:
- pts_filename (str): Filename of point clouds.
- file_name (str): Filename of point clouds.
- ann_info (dict): Annotation info.
"""
info = self.data_infos[index]
pts_filename = osp.join(self.data_root, info['pts_path'])
input_dict = dict(pts_filename=pts_filename)
if not self.test_mode:
annos = self.get_ann_info(index)
input_dict['ann_info'] = annos
if self.filter_empty_gt and ~(annos['gt_labels_3d'] != -1).any():
return None
return input_dict
def _build_default_pipeline(self):
"""Build the default pipeline for this dataset."""
pipeline = [
dict(
type='LoadPointsFromFile',
coord_type='DEPTH',
shift_height=False,
load_dim=6,
use_dim=[0, 1, 2, 3, 4, 5]),
dict(
type='DefaultFormatBundle3D',
class_names=self.CLASSES,
with_label=False),
dict(type='Collect3D', keys=['points'])
]
return Compose(pipeline)
class _S3DISSegDataset(Custom3DSegDataset): class _S3DISSegDataset(Custom3DSegDataset):
r"""S3DIS Dataset for Semantic Segmentation Task. r"""S3DIS Dataset for Semantic Segmentation Task.
...@@ -35,7 +175,7 @@ class _S3DISSegDataset(Custom3DSegDataset): ...@@ -35,7 +175,7 @@ class _S3DISSegDataset(Custom3DSegDataset):
as input. Defaults to None. as input. Defaults to None.
test_mode (bool, optional): Whether the dataset is in test mode. test_mode (bool, optional): Whether the dataset is in test mode.
Defaults to False. Defaults to False.
ignore_index (int, optional): The label index to be ignored, e.g. \ ignore_index (int, optional): The label index to be ignored, e.g.
unannotated points. If None is given, set to len(self.CLASSES). unannotated points. If None is given, set to len(self.CLASSES).
Defaults to None. Defaults to None.
scene_idxs (np.ndarray | str, optional): Precomputed index to load scene_idxs (np.ndarray | str, optional): Precomputed index to load
...@@ -188,7 +328,7 @@ class S3DISSegDataset(_S3DISSegDataset): ...@@ -188,7 +328,7 @@ class S3DISSegDataset(_S3DISSegDataset):
as input. Defaults to None. as input. Defaults to None.
test_mode (bool, optional): Whether the dataset is in test mode. test_mode (bool, optional): Whether the dataset is in test mode.
Defaults to False. Defaults to False.
ignore_index (int, optional): The label index to be ignored, e.g. \ ignore_index (int, optional): The label index to be ignored, e.g.
unannotated points. If None is given, set to len(self.CLASSES). unannotated points. If None is given, set to len(self.CLASSES).
Defaults to None. Defaults to None.
scene_idxs (list[np.ndarray] | list[str], optional): Precomputed index scene_idxs (list[np.ndarray] | list[str], optional): Precomputed index
......
...@@ -2,7 +2,67 @@ import numpy as np ...@@ -2,7 +2,67 @@ import numpy as np
import pytest import pytest
import torch import torch
from mmdet3d.datasets import S3DISSegDataset from mmdet3d.datasets import S3DISDataset, S3DISSegDataset
def test_getitem():
np.random.seed(0)
root_path = './tests/data/s3dis/'
ann_file = './tests/data/s3dis/s3dis_infos.pkl'
class_names = ('table', 'chair', 'sofa', 'bookcase', 'board')
pipeline = [
dict(
type='LoadPointsFromFile',
coord_type='DEPTH',
shift_height=False,
load_dim=6,
use_dim=[0, 1, 2, 3, 4, 5]),
dict(type='LoadAnnotations3D', with_bbox_3d=True, with_label_3d=True),
dict(type='PointSample', num_points=40000),
dict(type='DefaultFormatBundle3D', class_names=class_names),
dict(
type='Collect3D', keys=['points', 'gt_bboxes_3d', 'gt_labels_3d'])
]
s3dis_dataset = S3DISDataset(
data_root=root_path, ann_file=ann_file, pipeline=pipeline)
data = s3dis_dataset[0]
points = data['points']._data
gt_bboxes_3d = data['gt_bboxes_3d']._data
gt_labels_3d = data['gt_labels_3d']._data
expected_gt_bboxes_3d = torch.tensor(
[[2.3080, 2.4175, 0.2010, 0.8820, 0.8690, 0.6970, 0.0000],
[2.4730, 0.7090, 0.2010, 0.9080, 0.9620, 0.7030, 0.0000],
[5.3235, 0.4910, 0.0740, 0.8410, 0.9020, 0.8790, 0.0000]])
expected_gt_labels = np.array([1, 1, 3, 1, 2, 0, 0, 0, 3])
assert tuple(points.shape) == (40000, 6)
assert torch.allclose(gt_bboxes_3d[:3].tensor, expected_gt_bboxes_3d, 1e-2)
assert np.all(gt_labels_3d.numpy() == expected_gt_labels)
def test_evaluate():
if not torch.cuda.is_available():
pytest.skip()
from mmdet3d.core.bbox.structures import DepthInstance3DBoxes
root_path = './tests/data/s3dis'
ann_file = './tests/data/s3dis/s3dis_infos.pkl'
s3dis_dataset = S3DISDataset(root_path, ann_file)
results = []
pred_boxes = dict()
pred_boxes['boxes_3d'] = DepthInstance3DBoxes(
torch.tensor([[2.3080, 2.4175, 0.2010, 0.8820, 0.8690, 0.6970, 0.0000],
[2.4730, 0.7090, 0.2010, 0.9080, 0.9620, 0.7030, 0.0000],
[5.3235, 0.4910, 0.0740, 0.8410, 0.9020, 0.8790,
0.0000]]))
pred_boxes['labels_3d'] = torch.tensor([1, 1, 3])
pred_boxes['scores_3d'] = torch.tensor([0.5, 1.0, 1.0])
results.append(pred_boxes)
ret_dict = s3dis_dataset.evaluate(results)
assert abs(ret_dict['chair_AP_0.25'] - 0.666) < 0.01
assert abs(ret_dict['chair_AP_0.50'] - 0.666) < 0.01
assert abs(ret_dict['bookcase_AP_0.25'] - 0.5) < 0.01
assert abs(ret_dict['bookcase_AP_0.50'] - 0.5) < 0.01
def test_seg_getitem(): def test_seg_getitem():
......
...@@ -20,17 +20,15 @@ class S3DISData(object): ...@@ -20,17 +20,15 @@ class S3DISData(object):
self.split = split self.split = split
self.data_dir = osp.join(root_path, self.data_dir = osp.join(root_path,
'Stanford3dDataset_v1.2_Aligned_Version') 'Stanford3dDataset_v1.2_Aligned_Version')
self.classes = [
'ceiling', 'floor', 'wall', 'beam', 'column', 'window', 'door', # Following `GSDN <https://arxiv.org/abs/2006.12356>`_, use 5 furniture
'table', 'chair', 'sofa', 'bookcase', 'board', 'clutter' # classes for detection: table, chair, sofa, bookcase, board.
] self.cat_ids = np.array([7, 8, 9, 10, 11])
self.cat2label = {cat: self.classes.index(cat) for cat in self.classes}
self.label2cat = {self.cat2label[t]: t for t in self.cat2label}
self.cat_ids = np.array([0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12])
self.cat_ids2class = { self.cat_ids2class = {
cat_id: i cat_id: i
for i, cat_id in enumerate(list(self.cat_ids)) for i, cat_id in enumerate(list(self.cat_ids))
} }
assert split in [ assert split in [
'Area_1', 'Area_2', 'Area_3', 'Area_4', 'Area_5', 'Area_6' 'Area_1', 'Area_2', 'Area_3', 'Area_4', 'Area_5', 'Area_6'
] ]
...@@ -99,6 +97,8 @@ class S3DISData(object): ...@@ -99,6 +97,8 @@ class S3DISData(object):
'instance_mask', f'{self.split}_{sample_idx}.bin') 'instance_mask', f'{self.split}_{sample_idx}.bin')
info['pts_semantic_mask_path'] = osp.join( info['pts_semantic_mask_path'] = osp.join(
'semantic_mask', f'{self.split}_{sample_idx}.bin') 'semantic_mask', f'{self.split}_{sample_idx}.bin')
info['annos'] = self.get_bboxes(points, pts_instance_mask,
pts_semantic_mask)
return info return info
...@@ -108,6 +108,44 @@ class S3DISData(object): ...@@ -108,6 +108,44 @@ class S3DISData(object):
infos = executor.map(process_single_scene, sample_id_list) infos = executor.map(process_single_scene, sample_id_list)
return list(infos) return list(infos)
def get_bboxes(self, points, pts_instance_mask, pts_semantic_mask):
"""Convert instance masks to axis-aligned bounding boxes.
Args:
points (np.array): Scene points of shape (n, 6).
pts_instance_mask (np.ndarray): Instance labels of shape (n,).
pts_semantic_mask (np.ndarray): Semantic labels of shape (n,).
Returns:
dict: A dict containing detection infos with following keys:
- gt_boxes_upright_depth (np.ndarray): Bounding boxes
of shape (n, 6)
- class (np.ndarray): Box labels of shape (n,)
- gt_num (int): Number of boxes.
"""
bboxes, labels = [], []
for i in range(1, pts_instance_mask.max()):
ids = pts_instance_mask == i
# check if all instance points have same semantic label
assert pts_semantic_mask[ids].min() == pts_semantic_mask[ids].max()
label = pts_semantic_mask[ids][0]
# keep only furniture objects
if label in self.cat_ids2class:
labels.append(self.cat_ids2class[pts_semantic_mask[ids][0]])
pts = points[:, :3][ids]
min_pts = pts.min(axis=0)
max_pts = pts.max(axis=0)
locations = (min_pts + max_pts) / 2
dimensions = max_pts - min_pts
bboxes.append(np.concatenate((locations, dimensions)))
annotation = dict()
# follow ScanNet and SUN RGB-D keys
annotation['gt_boxes_upright_depth'] = np.array(bboxes)
annotation['class'] = np.array(labels)
annotation['gt_num'] = len(labels)
return annotation
class S3DISSegData(object): class S3DISSegData(object):
"""S3DIS dataset used to generate infos for semantic segmentation task. """S3DIS dataset used to generate infos for semantic segmentation task.
...@@ -191,7 +229,7 @@ class S3DISSegData(object): ...@@ -191,7 +229,7 @@ class S3DISSegData(object):
num_iter = int(np.sum(num_point_all) / float(self.num_points)) num_iter = int(np.sum(num_point_all) / float(self.num_points))
scene_idxs = [] scene_idxs = []
for idx in range(len(self.data_infos)): for idx in range(len(self.data_infos)):
scene_idxs.extend([idx] * round(sample_prob[idx] * num_iter)) scene_idxs.extend([idx] * int(round(sample_prob[idx] * num_iter)))
scene_idxs = np.array(scene_idxs).astype(np.int32) scene_idxs = np.array(scene_idxs).astype(np.int32)
# calculate label weight, adopted from PointNet++ # calculate label weight, adopted from PointNet++
......
...@@ -62,6 +62,9 @@ def build_data_cfg(config_path, skip_type, cfg_options): ...@@ -62,6 +62,9 @@ def build_data_cfg(config_path, skip_type, cfg_options):
# so we don't need to worry about it later # so we don't need to worry about it later
if cfg.data.train['type'] == 'RepeatDataset': if cfg.data.train['type'] == 'RepeatDataset':
cfg.data.train = cfg.data.train.dataset cfg.data.train = cfg.data.train.dataset
# use only first dataset for `ConcatDataset`
if cfg.data.train['type'] == 'ConcatDataset':
cfg.data.train = cfg.data.train.datasets[0]
train_data_cfg = cfg.data.train train_data_cfg = cfg.data.train
# eval_pipeline purely consists of loading functions # eval_pipeline purely consists of loading functions
# use eval_pipeline for data loading # use eval_pipeline for data loading
...@@ -200,7 +203,7 @@ def main(): ...@@ -200,7 +203,7 @@ def main():
data_path = data_info['point_cloud']['velodyne_path'] data_path = data_info['point_cloud']['velodyne_path']
elif dataset_type in [ elif dataset_type in [
'ScanNetDataset', 'SUNRGBDDataset', 'ScanNetSegDataset', 'ScanNetDataset', 'SUNRGBDDataset', 'ScanNetSegDataset',
'S3DISSegDataset' 'S3DISSegDataset', 'S3DISDataset'
]: ]:
data_path = data_info['pts_path'] data_path = data_info['pts_path']
elif dataset_type in ['NuScenesDataset', 'LyftDataset']: elif dataset_type in ['NuScenesDataset', 'LyftDataset']:
......
Markdown is supported
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment