Unverified Commit 6d899956 authored by Danila Rukhovich's avatar Danila Rukhovich Committed by GitHub
Browse files

[Feature] Add S3DIS dataset for 3D object detection (#835)

* add s3dis dataset

* fix comments; remove _S3DISDataset if favour of ConcatDataset

* fix comments

* fix comments

* fix link in __init__/py

* remove unused import

* add dataset tests

* add pytest.skip
parent a2e7387e
# dataset settings
dataset_type = 'S3DISDataset'
data_root = './data/s3dis/'
class_names = ('table', 'chair', 'sofa', 'bookcase', 'board')
train_area = [1, 2, 3, 4, 6]
test_area = 5
train_pipeline = [
dict(
type='LoadPointsFromFile',
coord_type='DEPTH',
shift_height=True,
load_dim=6,
use_dim=[0, 1, 2, 3, 4, 5]),
dict(type='LoadAnnotations3D', with_bbox_3d=True, with_label_3d=True),
dict(type='PointSample', num_points=40000),
dict(
type='RandomFlip3D',
sync_2d=False,
flip_ratio_bev_horizontal=0.5,
flip_ratio_bev_vertical=0.5),
dict(
type='GlobalRotScaleTrans',
# following ScanNet dataset the rotation range is 5 degrees
rot_range=[-0.087266, 0.087266],
scale_ratio_range=[1.0, 1.0],
shift_height=True),
dict(type='DefaultFormatBundle3D', class_names=class_names),
dict(type='Collect3D', keys=['points', 'gt_bboxes_3d', 'gt_labels_3d'])
]
test_pipeline = [
dict(
type='LoadPointsFromFile',
coord_type='DEPTH',
shift_height=True,
load_dim=6,
use_dim=[0, 1, 2, 3, 4, 5]),
dict(
type='MultiScaleFlipAug3D',
img_scale=(1333, 800),
pts_scale_ratio=1,
flip=False,
transforms=[
dict(
type='GlobalRotScaleTrans',
rot_range=[0, 0],
scale_ratio_range=[1., 1.],
translation_std=[0, 0, 0]),
dict(
type='RandomFlip3D',
sync_2d=False,
flip_ratio_bev_horizontal=0.5,
flip_ratio_bev_vertical=0.5),
dict(type='PointSample', num_points=40000),
dict(
type='DefaultFormatBundle3D',
class_names=class_names,
with_label=False),
dict(type='Collect3D', keys=['points'])
])
]
# construct a pipeline for data and gt loading in show function
# please keep its loading function consistent with test_pipeline (e.g. client)
eval_pipeline = [
dict(
type='LoadPointsFromFile',
coord_type='DEPTH',
shift_height=False,
load_dim=6,
use_dim=[0, 1, 2, 3, 4, 5]),
dict(
type='DefaultFormatBundle3D',
class_names=class_names,
with_label=False),
dict(type='Collect3D', keys=['points'])
]
data = dict(
samples_per_gpu=8,
workers_per_gpu=4,
train=dict(
type='RepeatDataset',
times=5,
dataset=dict(
type='ConcatDataset',
datasets=[
dict(
type=dataset_type,
data_root=data_root,
ann_file=data_root + f's3dis_infos_Area_{i}.pkl',
pipeline=train_pipeline,
filter_empty_gt=False,
classes=class_names,
box_type_3d='Depth') for i in train_area
],
separate_eval=False)),
val=dict(
type=dataset_type,
data_root=data_root,
ann_file=data_root + f's3dis_infos_Area_{test_area}.pkl',
pipeline=test_pipeline,
classes=class_names,
test_mode=True,
box_type_3d='Depth'),
test=dict(
type=dataset_type,
data_root=data_root,
ann_file=data_root + f's3dis_infos_Area_{test_area}.pkl',
pipeline=test_pipeline,
classes=class_names,
test_mode=True,
box_type_3d='Depth'))
evaluation = dict(pipeline=eval_pipeline)
......@@ -18,7 +18,7 @@ from .pipelines import (BackgroundPointsFilter, GlobalAlignment,
RandomFlip3D, RandomJitterPoints,
VoxelBasedPointSampler)
# yapf: enable
from .s3dis_dataset import S3DISSegDataset
from .s3dis_dataset import S3DISDataset, S3DISSegDataset
from .scannet_dataset import ScanNetDataset, ScanNetSegDataset
from .semantickitti_dataset import SemanticKITTIDataset
from .sunrgbd_dataset import SUNRGBDDataset
......@@ -26,12 +26,11 @@ from .utils import get_loading_pipeline
from .waymo_dataset import WaymoDataset
__all__ = [
'KittiDataset', 'KittiMonoDataset', 'GroupSampler',
'DistributedGroupSampler', 'build_dataloader', 'RepeatFactorDataset',
'DATASETS', 'build_dataset', 'CocoDataset', 'NuScenesDataset',
'NuScenesMonoDataset', 'LyftDataset', 'ObjectSample', 'RandomFlip3D',
'ObjectNoise', 'GlobalRotScaleTrans', 'PointShuffle', 'ObjectRangeFilter',
'PointsRangeFilter', 'Collect3D', 'LoadPointsFromFile', 'S3DISSegDataset',
'KittiDataset', 'KittiMonoDataset', 'build_dataloader', 'DATASETS',
'build_dataset', 'NuScenesDataset', 'NuScenesMonoDataset', 'LyftDataset',
'ObjectSample', 'RandomFlip3D', 'ObjectNoise', 'GlobalRotScaleTrans',
'PointShuffle', 'ObjectRangeFilter', 'PointsRangeFilter',
'LoadPointsFromFile', 'S3DISSegDataset', 'S3DISDataset',
'NormalizePointsColor', 'IndoorPatchPointSample', 'IndoorPointSample',
'PointSample', 'LoadAnnotations3D', 'GlobalAlignment', 'SUNRGBDDataset',
'ScanNetDataset', 'ScanNetSegDataset', 'SemanticKITTIDataset',
......
......@@ -2,14 +2,154 @@ import numpy as np
from os import path as osp
from mmdet3d.core import show_seg_result
from mmdet3d.core.bbox import DepthInstance3DBoxes
from mmdet.datasets import DATASETS
from mmseg.datasets import DATASETS as SEG_DATASETS
from .custom_3d import Custom3DDataset
from .custom_3d_seg import Custom3DSegDataset
from .pipelines import Compose
@DATASETS.register_module()
@SEG_DATASETS.register_module()
class S3DISDataset(Custom3DDataset):
r"""S3DIS Dataset for Detection Task.
This class is the inner dataset for S3DIS. Since S3DIS has 6 areas, we
often train on 5 of them and test on the remaining one. The one for
test is Area_5 as suggested in `GSDN <https://arxiv.org/abs/2006.12356>`_.
To concatenate 5 areas during training
`mmdet.datasets.dataset_wrappers.ConcatDataset` should be used.
Args:
data_root (str): Path of dataset root.
ann_file (str): Path of annotation file.
pipeline (list[dict], optional): Pipeline used for data processing.
Defaults to None.
classes (tuple[str], optional): Classes used in the dataset.
Defaults to None.
modality (dict, optional): Modality to specify the sensor data used
as input. Defaults to None.
box_type_3d (str, optional): Type of 3D box of this dataset.
Based on the `box_type_3d`, the dataset will encapsulate the box
to its original format then converted them to `box_type_3d`.
Defaults to 'Depth' in this dataset. Available options includes
- 'LiDAR': Box in LiDAR coordinates.
- 'Depth': Box in depth coordinates, usually for indoor dataset.
- 'Camera': Box in camera coordinates.
filter_empty_gt (bool, optional): Whether to filter empty GT.
Defaults to True.
test_mode (bool, optional): Whether the dataset is in test mode.
Defaults to False.
"""
CLASSES = ('table', 'chair', 'sofa', 'bookcase', 'board')
def __init__(self,
data_root,
ann_file,
pipeline=None,
classes=None,
modality=None,
box_type_3d='Depth',
filter_empty_gt=True,
test_mode=False):
super().__init__(
data_root=data_root,
ann_file=ann_file,
pipeline=pipeline,
classes=classes,
modality=modality,
box_type_3d=box_type_3d,
filter_empty_gt=filter_empty_gt,
test_mode=test_mode)
def get_ann_info(self, index):
"""Get annotation info according to the given index.
Args:
index (int): Index of the annotation data to get.
Returns:
dict: annotation information consists of the following keys:
- gt_bboxes_3d (:obj:`DepthInstance3DBoxes`):
3D ground truth bboxes
- gt_labels_3d (np.ndarray): Labels of ground truths.
- pts_instance_mask_path (str): Path of instance masks.
- pts_semantic_mask_path (str): Path of semantic masks.
"""
# Use index to get the annos, thus the evalhook could also use this api
info = self.data_infos[index]
if info['annos']['gt_num'] != 0:
gt_bboxes_3d = info['annos']['gt_boxes_upright_depth'].astype(
np.float32) # k, 6
gt_labels_3d = info['annos']['class'].astype(np.long)
else:
gt_bboxes_3d = np.zeros((0, 6), dtype=np.float32)
gt_labels_3d = np.zeros((0, ), dtype=np.long)
# to target box structure
gt_bboxes_3d = DepthInstance3DBoxes(
gt_bboxes_3d,
box_dim=gt_bboxes_3d.shape[-1],
with_yaw=False,
origin=(0.5, 0.5, 0.5)).convert_to(self.box_mode_3d)
pts_instance_mask_path = osp.join(self.data_root,
info['pts_instance_mask_path'])
pts_semantic_mask_path = osp.join(self.data_root,
info['pts_semantic_mask_path'])
anns_results = dict(
gt_bboxes_3d=gt_bboxes_3d,
gt_labels_3d=gt_labels_3d,
pts_instance_mask_path=pts_instance_mask_path,
pts_semantic_mask_path=pts_semantic_mask_path)
return anns_results
def get_data_info(self, index):
"""Get data info according to the given index.
Args:
index (int): Index of the sample data to get.
Returns:
dict: Data information that will be passed to the data
preprocessing pipelines. It includes the following keys:
- pts_filename (str): Filename of point clouds.
- file_name (str): Filename of point clouds.
- ann_info (dict): Annotation info.
"""
info = self.data_infos[index]
pts_filename = osp.join(self.data_root, info['pts_path'])
input_dict = dict(pts_filename=pts_filename)
if not self.test_mode:
annos = self.get_ann_info(index)
input_dict['ann_info'] = annos
if self.filter_empty_gt and ~(annos['gt_labels_3d'] != -1).any():
return None
return input_dict
def _build_default_pipeline(self):
"""Build the default pipeline for this dataset."""
pipeline = [
dict(
type='LoadPointsFromFile',
coord_type='DEPTH',
shift_height=False,
load_dim=6,
use_dim=[0, 1, 2, 3, 4, 5]),
dict(
type='DefaultFormatBundle3D',
class_names=self.CLASSES,
with_label=False),
dict(type='Collect3D', keys=['points'])
]
return Compose(pipeline)
class _S3DISSegDataset(Custom3DSegDataset):
r"""S3DIS Dataset for Semantic Segmentation Task.
......@@ -35,7 +175,7 @@ class _S3DISSegDataset(Custom3DSegDataset):
as input. Defaults to None.
test_mode (bool, optional): Whether the dataset is in test mode.
Defaults to False.
ignore_index (int, optional): The label index to be ignored, e.g. \
ignore_index (int, optional): The label index to be ignored, e.g.
unannotated points. If None is given, set to len(self.CLASSES).
Defaults to None.
scene_idxs (np.ndarray | str, optional): Precomputed index to load
......@@ -188,7 +328,7 @@ class S3DISSegDataset(_S3DISSegDataset):
as input. Defaults to None.
test_mode (bool, optional): Whether the dataset is in test mode.
Defaults to False.
ignore_index (int, optional): The label index to be ignored, e.g. \
ignore_index (int, optional): The label index to be ignored, e.g.
unannotated points. If None is given, set to len(self.CLASSES).
Defaults to None.
scene_idxs (list[np.ndarray] | list[str], optional): Precomputed index
......
......@@ -2,7 +2,67 @@ import numpy as np
import pytest
import torch
from mmdet3d.datasets import S3DISSegDataset
from mmdet3d.datasets import S3DISDataset, S3DISSegDataset
def test_getitem():
np.random.seed(0)
root_path = './tests/data/s3dis/'
ann_file = './tests/data/s3dis/s3dis_infos.pkl'
class_names = ('table', 'chair', 'sofa', 'bookcase', 'board')
pipeline = [
dict(
type='LoadPointsFromFile',
coord_type='DEPTH',
shift_height=False,
load_dim=6,
use_dim=[0, 1, 2, 3, 4, 5]),
dict(type='LoadAnnotations3D', with_bbox_3d=True, with_label_3d=True),
dict(type='PointSample', num_points=40000),
dict(type='DefaultFormatBundle3D', class_names=class_names),
dict(
type='Collect3D', keys=['points', 'gt_bboxes_3d', 'gt_labels_3d'])
]
s3dis_dataset = S3DISDataset(
data_root=root_path, ann_file=ann_file, pipeline=pipeline)
data = s3dis_dataset[0]
points = data['points']._data
gt_bboxes_3d = data['gt_bboxes_3d']._data
gt_labels_3d = data['gt_labels_3d']._data
expected_gt_bboxes_3d = torch.tensor(
[[2.3080, 2.4175, 0.2010, 0.8820, 0.8690, 0.6970, 0.0000],
[2.4730, 0.7090, 0.2010, 0.9080, 0.9620, 0.7030, 0.0000],
[5.3235, 0.4910, 0.0740, 0.8410, 0.9020, 0.8790, 0.0000]])
expected_gt_labels = np.array([1, 1, 3, 1, 2, 0, 0, 0, 3])
assert tuple(points.shape) == (40000, 6)
assert torch.allclose(gt_bboxes_3d[:3].tensor, expected_gt_bboxes_3d, 1e-2)
assert np.all(gt_labels_3d.numpy() == expected_gt_labels)
def test_evaluate():
if not torch.cuda.is_available():
pytest.skip()
from mmdet3d.core.bbox.structures import DepthInstance3DBoxes
root_path = './tests/data/s3dis'
ann_file = './tests/data/s3dis/s3dis_infos.pkl'
s3dis_dataset = S3DISDataset(root_path, ann_file)
results = []
pred_boxes = dict()
pred_boxes['boxes_3d'] = DepthInstance3DBoxes(
torch.tensor([[2.3080, 2.4175, 0.2010, 0.8820, 0.8690, 0.6970, 0.0000],
[2.4730, 0.7090, 0.2010, 0.9080, 0.9620, 0.7030, 0.0000],
[5.3235, 0.4910, 0.0740, 0.8410, 0.9020, 0.8790,
0.0000]]))
pred_boxes['labels_3d'] = torch.tensor([1, 1, 3])
pred_boxes['scores_3d'] = torch.tensor([0.5, 1.0, 1.0])
results.append(pred_boxes)
ret_dict = s3dis_dataset.evaluate(results)
assert abs(ret_dict['chair_AP_0.25'] - 0.666) < 0.01
assert abs(ret_dict['chair_AP_0.50'] - 0.666) < 0.01
assert abs(ret_dict['bookcase_AP_0.25'] - 0.5) < 0.01
assert abs(ret_dict['bookcase_AP_0.50'] - 0.5) < 0.01
def test_seg_getitem():
......
......@@ -20,17 +20,15 @@ class S3DISData(object):
self.split = split
self.data_dir = osp.join(root_path,
'Stanford3dDataset_v1.2_Aligned_Version')
self.classes = [
'ceiling', 'floor', 'wall', 'beam', 'column', 'window', 'door',
'table', 'chair', 'sofa', 'bookcase', 'board', 'clutter'
]
self.cat2label = {cat: self.classes.index(cat) for cat in self.classes}
self.label2cat = {self.cat2label[t]: t for t in self.cat2label}
self.cat_ids = np.array([0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12])
# Following `GSDN <https://arxiv.org/abs/2006.12356>`_, use 5 furniture
# classes for detection: table, chair, sofa, bookcase, board.
self.cat_ids = np.array([7, 8, 9, 10, 11])
self.cat_ids2class = {
cat_id: i
for i, cat_id in enumerate(list(self.cat_ids))
}
assert split in [
'Area_1', 'Area_2', 'Area_3', 'Area_4', 'Area_5', 'Area_6'
]
......@@ -99,6 +97,8 @@ class S3DISData(object):
'instance_mask', f'{self.split}_{sample_idx}.bin')
info['pts_semantic_mask_path'] = osp.join(
'semantic_mask', f'{self.split}_{sample_idx}.bin')
info['annos'] = self.get_bboxes(points, pts_instance_mask,
pts_semantic_mask)
return info
......@@ -108,6 +108,44 @@ class S3DISData(object):
infos = executor.map(process_single_scene, sample_id_list)
return list(infos)
def get_bboxes(self, points, pts_instance_mask, pts_semantic_mask):
"""Convert instance masks to axis-aligned bounding boxes.
Args:
points (np.array): Scene points of shape (n, 6).
pts_instance_mask (np.ndarray): Instance labels of shape (n,).
pts_semantic_mask (np.ndarray): Semantic labels of shape (n,).
Returns:
dict: A dict containing detection infos with following keys:
- gt_boxes_upright_depth (np.ndarray): Bounding boxes
of shape (n, 6)
- class (np.ndarray): Box labels of shape (n,)
- gt_num (int): Number of boxes.
"""
bboxes, labels = [], []
for i in range(1, pts_instance_mask.max()):
ids = pts_instance_mask == i
# check if all instance points have same semantic label
assert pts_semantic_mask[ids].min() == pts_semantic_mask[ids].max()
label = pts_semantic_mask[ids][0]
# keep only furniture objects
if label in self.cat_ids2class:
labels.append(self.cat_ids2class[pts_semantic_mask[ids][0]])
pts = points[:, :3][ids]
min_pts = pts.min(axis=0)
max_pts = pts.max(axis=0)
locations = (min_pts + max_pts) / 2
dimensions = max_pts - min_pts
bboxes.append(np.concatenate((locations, dimensions)))
annotation = dict()
# follow ScanNet and SUN RGB-D keys
annotation['gt_boxes_upright_depth'] = np.array(bboxes)
annotation['class'] = np.array(labels)
annotation['gt_num'] = len(labels)
return annotation
class S3DISSegData(object):
"""S3DIS dataset used to generate infos for semantic segmentation task.
......@@ -191,7 +229,7 @@ class S3DISSegData(object):
num_iter = int(np.sum(num_point_all) / float(self.num_points))
scene_idxs = []
for idx in range(len(self.data_infos)):
scene_idxs.extend([idx] * round(sample_prob[idx] * num_iter))
scene_idxs.extend([idx] * int(round(sample_prob[idx] * num_iter)))
scene_idxs = np.array(scene_idxs).astype(np.int32)
# calculate label weight, adopted from PointNet++
......
......@@ -62,6 +62,9 @@ def build_data_cfg(config_path, skip_type, cfg_options):
# so we don't need to worry about it later
if cfg.data.train['type'] == 'RepeatDataset':
cfg.data.train = cfg.data.train.dataset
# use only first dataset for `ConcatDataset`
if cfg.data.train['type'] == 'ConcatDataset':
cfg.data.train = cfg.data.train.datasets[0]
train_data_cfg = cfg.data.train
# eval_pipeline purely consists of loading functions
# use eval_pipeline for data loading
......@@ -200,7 +203,7 @@ def main():
data_path = data_info['point_cloud']['velodyne_path']
elif dataset_type in [
'ScanNetDataset', 'SUNRGBDDataset', 'ScanNetSegDataset',
'S3DISSegDataset'
'S3DISSegDataset', 'S3DISDataset'
]:
data_path = data_info['pts_path']
elif dataset_type in ['NuScenesDataset', 'LyftDataset']:
......
Markdown is supported
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment