# Copyright (c) OpenMMLab. All rights reserved. from collections import OrderedDict from os import path as osp from typing import Callable, List, Optional, Union from mmdet3d.core import show_multi_modality_result, show_result from mmdet3d.core.bbox import DepthInstance3DBoxes from mmdet3d.registry import DATASETS from mmdet.core import eval_map from .det3d_dataset import Det3DDataset from .pipelines import Compose @DATASETS.register_module() class SUNRGBDDataset(Det3DDataset): r"""SUNRGBD Dataset. This class serves as the API for experiments on the SUNRGBD Dataset. See the `download page `_ for data downloading. Args: data_root (str): Path of dataset root. ann_file (str): Path of annotation file. metainfo (dict, optional): Meta information for dataset, such as class information. Defaults to None. data_prefix (dict, optional): Prefix for data. Defaults to `dict(pts='points',img='sunrgbd_trainval')`. pipeline (list[dict], optional): Pipeline used for data processing. Defaults to None. modality (dict, optional): Modality to specify the sensor data used as input. Defaults to `dict(use_camera=True, use_lidar=True)`. box_type_3d (str, optional): Type of 3D box of this dataset. Based on the `box_type_3d`, the dataset will encapsulate the box to its original format then converted them to `box_type_3d`. Defaults to 'Depth' in this dataset. Available options includes - 'LiDAR': Box in LiDAR coordinates. - 'Depth': Box in depth coordinates, usually for indoor dataset. - 'Camera': Box in camera coordinates. filter_empty_gt (bool, optional): Whether to filter empty GT. Defaults to True. test_mode (bool, optional): Whether the dataset is in test mode. Defaults to False. """ METAINFO = { 'CLASSES': ('bed', 'table', 'sofa', 'chair', 'toilet', 'desk', 'dresser', 'night_stand', 'bookshelf', 'bathtub') } def __init__(self, data_root: str, ann_file: str, metainfo: Optional[dict] = None, data_prefix: dict = dict( pts='points', img='sunrgbd_trainval'), pipeline: List[Union[dict, Callable]] = [], modality=dict(use_camera=True, use_lidar=True), box_type_3d: str = 'Depth', filter_empty_gt: bool = True, test_mode: bool = False, **kwargs): super().__init__( data_root=data_root, ann_file=ann_file, metainfo=metainfo, data_prefix=data_prefix, pipeline=pipeline, modality=modality, box_type_3d=box_type_3d, filter_empty_gt=filter_empty_gt, test_mode=test_mode, **kwargs) assert 'use_camera' in self.modality and \ 'use_lidar' in self.modality assert self.modality['use_camera'] or self.modality['use_lidar'] def parse_ann_info(self, info: dict) -> dict: """Process the `instances` in data info to `ann_info` Args: info (dict): Info dict. Returns: dict: Processed `ann_info` """ ann_info = super().parse_ann_info(info) # to target box structure ann_info['gt_bboxes_3d'] = DepthInstance3DBoxes( ann_info['gt_bboxes_3d'], origin=(0.5, 0.5, 0.5)).convert_to(self.box_mode_3d) return ann_info def _build_default_pipeline(self): """Build the default pipeline for this dataset.""" pipeline = [ dict( type='LoadPointsFromFile', coord_type='DEPTH', shift_height=False, load_dim=6, use_dim=[0, 1, 2]), dict( type='DefaultFormatBundle3D', class_names=self.CLASSES, with_label=False), dict(type='Collect3D', keys=['points']) ] if self.modality['use_camera']: pipeline.insert(0, dict(type='LoadImageFromFile')) return Compose(pipeline) # TODO fix this def show(self, results, out_dir, show=True, pipeline=None): """Results visualization. Args: results (list[dict]): List of bounding boxes results. out_dir (str): Output directory of visualization result. show (bool): Visualize the results online. pipeline (list[dict], optional): raw data loading for showing. Default: None. """ assert out_dir is not None, 'Expect out_dir, got none.' pipeline = self._get_pipeline(pipeline) for i, result in enumerate(results): data_info = self.data_infos[i] pts_path = data_info['pts_path'] file_name = osp.split(pts_path)[-1].split('.')[0] points, img_metas, img = self._extract_data( i, pipeline, ['points', 'img_metas', 'img']) # scale colors to [0, 255] points = points.numpy() points[:, 3:] *= 255 gt_bboxes = self.get_ann_info(i)['gt_bboxes_3d'].tensor.numpy() pred_bboxes = result['boxes_3d'].tensor.numpy() show_result(points, gt_bboxes.copy(), pred_bboxes.copy(), out_dir, file_name, show) # multi-modality visualization if self.modality['use_camera']: img = img.numpy() # need to transpose channel to first dim img = img.transpose(1, 2, 0) pred_bboxes = DepthInstance3DBoxes( pred_bboxes, origin=(0.5, 0.5, 0)) gt_bboxes = DepthInstance3DBoxes( gt_bboxes, origin=(0.5, 0.5, 0)) show_multi_modality_result( img, gt_bboxes, pred_bboxes, None, out_dir, file_name, box_mode='depth', img_metas=img_metas, show=show) def evaluate(self, results, metric=None, iou_thr=(0.25, 0.5), iou_thr_2d=(0.5, ), logger=None, show=False, out_dir=None, pipeline=None): """Evaluate. Evaluation in indoor protocol. Args: results (list[dict]): List of results. metric (str | list[str], optional): Metrics to be evaluated. Default: None. iou_thr (list[float], optional): AP IoU thresholds for 3D evaluation. Default: (0.25, 0.5). iou_thr_2d (list[float], optional): AP IoU thresholds for 2D evaluation. Default: (0.5, ). show (bool, optional): Whether to visualize. Default: False. out_dir (str, optional): Path to save the visualization results. Default: None. pipeline (list[dict], optional): raw data loading for showing. Default: None. Returns: dict: Evaluation results. """ # evaluate 3D detection performance if isinstance(results[0], dict): return super().evaluate(results, metric, iou_thr, logger, show, out_dir, pipeline) # evaluate 2D detection performance else: eval_results = OrderedDict() annotations = [self.get_ann_info(i) for i in range(len(self))] iou_thr_2d = (iou_thr_2d) if isinstance(iou_thr_2d, float) else iou_thr_2d for iou_thr_2d_single in iou_thr_2d: mean_ap, _ = eval_map( results, annotations, scale_ranges=None, iou_thr=iou_thr_2d_single, dataset=self.CLASSES, logger=logger) eval_results['mAP_' + str(iou_thr_2d_single)] = mean_ap return eval_results