# Copyright (c) OpenMMLab. All rights reserved. import warnings from os import path as osp from typing import Callable, List, Optional, Union import numpy as np from mmdet3d.core import show_result from mmdet3d.core.bbox import DepthInstance3DBoxes from mmdet3d.registry import DATASETS from .det3d_dataset import Det3DDataset from .pipelines import Compose from .seg3d_dataset import Seg3DDataset @DATASETS.register_module() class ScanNetDataset(Det3DDataset): r"""ScanNet Dataset for Detection Task. This class serves as the API for experiments on the ScanNet Dataset. Please refer to the `github repo `_ for data downloading. Args: data_root (str): Path of dataset root. ann_file (str): Path of annotation file. metainfo (dict, optional): Meta information for dataset, such as class information. Defaults to None. data_prefix (dict): Prefix for data. Defaults to `dict(pts='points', pts_isntance_mask='instance_mask', pts_semantic_mask='semantic_mask')`. pipeline (list[dict]): Pipeline used for data processing. Defaults to None. modality (dict): Modality to specify the sensor data used as input. Defaults to None. box_type_3d (str): Type of 3D box of this dataset. Based on the `box_type_3d`, the dataset will encapsulate the box to its original format then converted them to `box_type_3d`. Defaults to 'Depth' in this dataset. Available options includes - 'LiDAR': Box in LiDAR coordinates. - 'Depth': Box in depth coordinates, usually for indoor dataset. - 'Camera': Box in camera coordinates. filter_empty_gt (bool): Whether to filter empty GT. Defaults to True. test_mode (bool): Whether the dataset is in test mode. Defaults to False. """ METAINFO = { 'CLASSES': ('cabinet', 'bed', 'chair', 'sofa', 'table', 'door', 'window', 'bookshelf', 'picture', 'counter', 'desk', 'curtain', 'refrigerator', 'showercurtrain', 'toilet', 'sink', 'bathtub', 'garbagebin') } def __init__(self, data_root: str, ann_file: str, metainfo: dict = None, data_prefix: dict = dict( pts='points', pts_instance_mask='instance_mask', pts_semantic_mask='semantic_mask'), pipeline: List[Union[dict, Callable]] = [], modality=dict(use_camera=False, use_lidar=True), box_type_3d: str = 'Depth', filter_empty_gt: bool = True, test_mode: bool = False, **kwargs): super().__init__( data_root=data_root, ann_file=ann_file, metainfo=metainfo, data_prefix=data_prefix, pipeline=pipeline, modality=modality, box_type_3d=box_type_3d, filter_empty_gt=filter_empty_gt, test_mode=test_mode, **kwargs) assert 'use_camera' in self.modality and \ 'use_lidar' in self.modality assert self.modality['use_camera'] or self.modality['use_lidar'] @staticmethod def _get_axis_align_matrix(info: dict) -> dict: """Get axis_align_matrix from info. If not exist, return identity mat. Args: info (dict): Info of a single sample data. Returns: np.ndarray: 4x4 transformation matrix. """ if 'axis_align_matrix' in info: return np.array(info['axis_align_matrix']) else: warnings.warn( 'axis_align_matrix is not found in ScanNet data info, please ' 'use new pre-process scripts to re-generate ScanNet data') return np.eye(4).astype(np.float32) def parse_data_info(self, info: dict) -> dict: """Process the raw data info. The only difference with it in `Det3DDataset` is the specific process for `axis_align_matrix'. Args: info (dict): Raw info dict. Returns: dict: Data information that will be passed to the data preprocessing pipelines. It includes the following keys: """ info['axis_align_matrix'] = self._get_axis_align_matrix(info) info['pts_instance_mask_path'] = osp.join( self.data_prefix.get('pts_instance_mask', ''), info['pts_instance_mask_path']) info['pts_semantic_mask_path'] = osp.join( self.data_prefix.get('pts_semantic_mask', ''), info['pts_semantic_mask_path']) info = super().parse_data_info(info) return info def parse_ann_info(self, info: dict) -> dict: """Process the `instances` in data info to `ann_info` Args: info (dict): Info dict. Returns: dict: Processed `ann_info` """ ann_info = super().parse_ann_info(info) # empty gt if ann_info is None: ann_info['gt_bboxes_3d'] = np.zeros((0, 6), dtype=np.float32) ann_info['gt_labels_3d'] = np.zeros((0, ), dtype=np.int64) # to target box structure ann_info['gt_bboxes_3d'] = DepthInstance3DBoxes( ann_info['gt_bboxes_3d'], box_dim=ann_info['gt_bboxes_3d'].shape[-1], with_yaw=False, origin=(0.5, 0.5, 0.5)).convert_to(self.box_mode_3d) return ann_info def _build_default_pipeline(self): """Build the default pipeline for this dataset.""" pipeline = [ dict( type='LoadPointsFromFile', coord_type='DEPTH', shift_height=False, load_dim=6, use_dim=[0, 1, 2]), dict(type='GlobalAlignment', rotation_axis=2), dict( type='DefaultFormatBundle3D', class_names=self.CLASSES, with_label=False), dict(type='Collect3D', keys=['points']) ] return Compose(pipeline) def show(self, results, out_dir, show=True, pipeline=None): """Results visualization. Args: results (list[dict]): List of bounding boxes results. out_dir (str): Output directory of visualization result. show (bool): Visualize the results online. pipeline (list[dict], optional): raw data loading for showing. Default: None. """ assert out_dir is not None, 'Expect out_dir, got none.' pipeline = self._get_pipeline(pipeline) for i, result in enumerate(results): data_info = self.get_data_info[i] pts_path = data_info['lidar_points']['lidar_path'] file_name = osp.split(pts_path)[-1].split('.')[0] points = self._extract_data(i, pipeline, 'points').numpy() gt_bboxes = self.get_ann_info(i)['gt_bboxes_3d'].tensor.numpy() pred_bboxes = result['boxes_3d'].tensor.numpy() show_result(points, gt_bboxes, pred_bboxes, out_dir, file_name, show) @DATASETS.register_module() class ScanNetSegDataset(Seg3DDataset): r"""ScanNet Dataset for Semantic Segmentation Task. This class serves as the API for experiments on the ScanNet Dataset. Please refer to the `github repo `_ for data downloading. Args: data_root (str): Path of dataset root. ann_file (str): Path of annotation file. pipeline (list[dict], optional): Pipeline used for data processing. Defaults to None. classes (tuple[str], optional): Classes used in the dataset. Defaults to None. palette (list[list[int]], optional): The palette of segmentation map. Defaults to None. modality (dict, optional): Modality to specify the sensor data used as input. Defaults to None. test_mode (bool, optional): Whether the dataset is in test mode. Defaults to False. ignore_index (int, optional): The label index to be ignored, e.g. unannotated points. If None is given, set to len(self.CLASSES). Defaults to None. scene_idxs (np.ndarray | str, optional): Precomputed index to load data. For scenes with many points, we may sample it several times. Defaults to None. """ METAINFO = { 'CLASSES': ('wall', 'floor', 'cabinet', 'bed', 'chair', 'sofa', 'table', 'door', 'window', 'bookshelf', 'picture', 'counter', 'desk', 'curtain', 'refrigerator', 'showercurtrain', 'toilet', 'sink', 'bathtub', 'otherfurniture'), 'PALETTE': [ [174, 199, 232], [152, 223, 138], [31, 119, 180], [255, 187, 120], [188, 189, 34], [140, 86, 75], [255, 152, 150], [214, 39, 40], [197, 176, 213], [148, 103, 189], [196, 156, 148], [23, 190, 207], [247, 182, 210], [219, 219, 141], [255, 127, 14], [158, 218, 229], [44, 160, 44], [112, 128, 144], [227, 119, 194], [82, 84, 163], ], 'valid_class_ids': (1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 14, 16, 24, 28, 33, 34, 36, 39), 'all_class_ids': tuple(range(41)), } def __init__(self, data_root: Optional[str] = None, ann_file: str = '', metainfo: Optional[dict] = None, data_prefix: dict = dict( pts='points', img='', instance_mask='', semantic_mask=''), pipeline: List[Union[dict, Callable]] = [], modality: dict = dict(use_lidar=True, use_camera=False), ignore_index=None, scene_idxs=None, test_mode=False, **kwargs) -> None: super().__init__( data_root=data_root, ann_file=ann_file, metainfo=metainfo, data_prefix=data_prefix, pipeline=pipeline, modality=modality, ignore_index=ignore_index, scene_idxs=scene_idxs, test_mode=test_mode, **kwargs) def get_scene_idxs(self, scene_idxs): """Compute scene_idxs for data sampling. We sample more times for scenes with more points. """ # when testing, we load one whole scene every time if not self.test_mode and scene_idxs is None: raise NotImplementedError( 'please provide re-sampled scene indexes for training') return super().get_scene_idxs(scene_idxs) @DATASETS.register_module() class ScanNetInstanceSegDataset(Seg3DDataset): METAINFO = { 'CLASSES': ('cabinet', 'bed', 'chair', 'sofa', 'table', 'door', 'window', 'bookshelf', 'picture', 'counter', 'desk', 'curtain', 'refrigerator', 'showercurtrain', 'toilet', 'sink', 'bathtub', 'garbagebin'), 'PLATTE': [ [174, 199, 232], [152, 223, 138], [31, 119, 180], [255, 187, 120], [188, 189, 34], [140, 86, 75], [255, 152, 150], [214, 39, 40], [197, 176, 213], [148, 103, 189], [196, 156, 148], [23, 190, 207], [247, 182, 210], [219, 219, 141], [255, 127, 14], [158, 218, 229], [44, 160, 44], [112, 128, 144], [227, 119, 194], [82, 84, 163], ], 'valid_class_ids': (3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 14, 16, 24, 28, 33, 34, 36, 39), 'all_class_ids': tuple(range(41)) } def __init__(self, data_root: Optional[str] = None, ann_file: str = '', metainfo: Optional[dict] = None, data_prefix: dict = dict( pts='points', img='', instance_mask='', semantic_mask=''), pipeline: List[Union[dict, Callable]] = [], modality: dict = dict(use_lidar=True, use_camera=False), test_mode=False, ignore_index=None, scene_idxs=None, file_client_args=dict(backend='disk'), **kwargs) -> None: super().__init__( data_root=data_root, ann_file=ann_file, metainfo=metainfo, pipeline=pipeline, data_prefix=data_prefix, modality=modality, test_mode=test_mode, ignore_index=ignore_index, scene_idxs=scene_idxs, file_client_args=file_client_args, **kwargs)