sunrgbd_dataset.py 8.22 KB
Newer Older
dingchang's avatar
dingchang committed
1
# Copyright (c) OpenMMLab. All rights reserved.
2
from collections import OrderedDict
zhangwenwei's avatar
zhangwenwei committed
3
from os import path as osp
jshilong's avatar
jshilong committed
4
from typing import Callable, List, Optional, Union
5

6
from mmdet3d.core import show_multi_modality_result, show_result
wuyuefeng's avatar
wuyuefeng committed
7
from mmdet3d.core.bbox import DepthInstance3DBoxes
8
from mmdet3d.registry import DATASETS
9
from mmdet.core import eval_map
jshilong's avatar
jshilong committed
10
from .det3d_dataset import Det3DDataset
11
from .pipelines import Compose
liyinhao's avatar
liyinhao committed
12
13
14


@DATASETS.register_module()
jshilong's avatar
jshilong committed
15
class SUNRGBDDataset(Det3DDataset):
zhangwenwei's avatar
zhangwenwei committed
16
    r"""SUNRGBD Dataset.
liyinhao's avatar
liyinhao committed
17

wangtai's avatar
wangtai committed
18
19
    This class serves as the API for experiments on the SUNRGBD Dataset.

zhangwenwei's avatar
zhangwenwei committed
20
21
    See the `download page <http://rgbd.cs.princeton.edu/challenge.html>`_
    for data downloading.
wangtai's avatar
wangtai committed
22
23
24
25

    Args:
        data_root (str): Path of dataset root.
        ann_file (str): Path of annotation file.
jshilong's avatar
jshilong committed
26
27
28
29
        metainfo (dict, optional): Meta information for dataset, such as class
            information. Defaults to None.
        data_prefix (dict, optional): Prefix for data. Defaults to
            `dict(pts='points',img='sunrgbd_trainval')`.
wangtai's avatar
wangtai committed
30
31
32
        pipeline (list[dict], optional): Pipeline used for data processing.
            Defaults to None.
        modality (dict, optional): Modality to specify the sensor data used
jshilong's avatar
jshilong committed
33
            as input. Defaults to `dict(use_camera=True, use_lidar=True)`.
wangtai's avatar
wangtai committed
34
35
36
37
38
        box_type_3d (str, optional): Type of 3D box of this dataset.
            Based on the `box_type_3d`, the dataset will encapsulate the box
            to its original format then converted them to `box_type_3d`.
            Defaults to 'Depth' in this dataset. Available options includes

wangtai's avatar
wangtai committed
39
40
41
            - 'LiDAR': Box in LiDAR coordinates.
            - 'Depth': Box in depth coordinates, usually for indoor dataset.
            - 'Camera': Box in camera coordinates.
wangtai's avatar
wangtai committed
42
43
44
45
46
        filter_empty_gt (bool, optional): Whether to filter empty GT.
            Defaults to True.
        test_mode (bool, optional): Whether the dataset is in test mode.
            Defaults to False.
    """
jshilong's avatar
jshilong committed
47
48
49
50
    METAINFO = {
        'CLASSES': ('bed', 'table', 'sofa', 'chair', 'toilet', 'desk',
                    'dresser', 'night_stand', 'bookshelf', 'bathtub')
    }
liyinhao's avatar
liyinhao committed
51
52

    def __init__(self,
jshilong's avatar
jshilong committed
53
54
55
56
57
58
                 data_root: str,
                 ann_file: str,
                 metainfo: Optional[dict] = None,
                 data_prefix: dict = dict(
                     pts='points', img='sunrgbd_trainval'),
                 pipeline: List[Union[dict, Callable]] = [],
59
                 modality=dict(use_camera=True, use_lidar=True),
jshilong's avatar
jshilong committed
60
61
62
                 box_type_3d: str = 'Depth',
                 filter_empty_gt: bool = True,
                 test_mode: bool = False,
63
                 **kwargs):
64
65
66
        super().__init__(
            data_root=data_root,
            ann_file=ann_file,
jshilong's avatar
jshilong committed
67
68
            metainfo=metainfo,
            data_prefix=data_prefix,
69
70
71
72
            pipeline=pipeline,
            modality=modality,
            box_type_3d=box_type_3d,
            filter_empty_gt=filter_empty_gt,
73
74
            test_mode=test_mode,
            **kwargs)
75
76
        assert 'use_camera' in self.modality and \
            'use_lidar' in self.modality
77
78
        assert self.modality['use_camera'] or self.modality['use_lidar']

jshilong's avatar
jshilong committed
79
80
    def parse_ann_info(self, info: dict) -> dict:
        """Process the `instances` in data info to `ann_info`
81
82

        Args:
jshilong's avatar
jshilong committed
83
            info (dict): Info dict.
84
85

        Returns:
jshilong's avatar
jshilong committed
86
            dict: Processed `ann_info`
87
        """
jshilong's avatar
jshilong committed
88
        ann_info = super().parse_ann_info(info)
wuyuefeng's avatar
wuyuefeng committed
89
        # to target box structure
jshilong's avatar
jshilong committed
90
91
92
        ann_info['gt_bboxes_3d'] = DepthInstance3DBoxes(
            ann_info['gt_bboxes_3d'],
            origin=(0.5, 0.5, 0.5)).convert_to(self.box_mode_3d)
93

jshilong's avatar
jshilong committed
94
        return ann_info
liyinhao's avatar
liyinhao committed
95

96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
    def _build_default_pipeline(self):
        """Build the default pipeline for this dataset."""
        pipeline = [
            dict(
                type='LoadPointsFromFile',
                coord_type='DEPTH',
                shift_height=False,
                load_dim=6,
                use_dim=[0, 1, 2]),
            dict(
                type='DefaultFormatBundle3D',
                class_names=self.CLASSES,
                with_label=False),
            dict(type='Collect3D', keys=['points'])
        ]
        if self.modality['use_camera']:
            pipeline.insert(0, dict(type='LoadImageFromFile'))
        return Compose(pipeline)

jshilong's avatar
jshilong committed
115
    # TODO fix this
116
    def show(self, results, out_dir, show=True, pipeline=None):
117
118
119
120
121
        """Results visualization.

        Args:
            results (list[dict]): List of bounding boxes results.
            out_dir (str): Output directory of visualization result.
122
            show (bool): Visualize the results online.
123
124
            pipeline (list[dict], optional): raw data loading for showing.
                Default: None.
125
        """
liyinhao's avatar
liyinhao committed
126
        assert out_dir is not None, 'Expect out_dir, got none.'
127
        pipeline = self._get_pipeline(pipeline)
liyinhao's avatar
liyinhao committed
128
129
130
131
        for i, result in enumerate(results):
            data_info = self.data_infos[i]
            pts_path = data_info['pts_path']
            file_name = osp.split(pts_path)[-1].split('.')[0]
132
133
            points, img_metas, img = self._extract_data(
                i, pipeline, ['points', 'img_metas', 'img'])
134
135
            # scale colors to [0, 255]
            points = points.numpy()
liyinhao's avatar
liyinhao committed
136
            points[:, 3:] *= 255
137
138

            gt_bboxes = self.get_ann_info(i)['gt_bboxes_3d'].tensor.numpy()
liyinhao's avatar
liyinhao committed
139
            pred_bboxes = result['boxes_3d'].tensor.numpy()
140
141
142
143
            show_result(points, gt_bboxes.copy(), pred_bboxes.copy(), out_dir,
                        file_name, show)

            # multi-modality visualization
144
            if self.modality['use_camera']:
145
146
147
                img = img.numpy()
                # need to transpose channel to first dim
                img = img.transpose(1, 2, 0)
148
149
150
151
152
153
154
155
                pred_bboxes = DepthInstance3DBoxes(
                    pred_bboxes, origin=(0.5, 0.5, 0))
                gt_bboxes = DepthInstance3DBoxes(
                    gt_bboxes, origin=(0.5, 0.5, 0))
                show_multi_modality_result(
                    img,
                    gt_bboxes,
                    pred_bboxes,
156
                    None,
157
158
                    out_dir,
                    file_name,
159
                    box_mode='depth',
160
                    img_metas=img_metas,
161
                    show=show)
162
163
164
165
166
167
168
169

    def evaluate(self,
                 results,
                 metric=None,
                 iou_thr=(0.25, 0.5),
                 iou_thr_2d=(0.5, ),
                 logger=None,
                 show=False,
170
171
172
173
174
                 out_dir=None,
                 pipeline=None):
        """Evaluate.

        Evaluation in indoor protocol.
175

176
177
        Args:
            results (list[dict]): List of results.
178
179
180
181
182
183
184
            metric (str | list[str], optional): Metrics to be evaluated.
                Default: None.
            iou_thr (list[float], optional): AP IoU thresholds for 3D
                evaluation. Default: (0.25, 0.5).
            iou_thr_2d (list[float], optional): AP IoU thresholds for 2D
                evaluation. Default: (0.5, ).
            show (bool, optional): Whether to visualize.
185
                Default: False.
186
            out_dir (str, optional): Path to save the visualization results.
187
188
189
190
191
192
193
                Default: None.
            pipeline (list[dict], optional): raw data loading for showing.
                Default: None.

        Returns:
            dict: Evaluation results.
        """
194
195
196
        # evaluate 3D detection performance
        if isinstance(results[0], dict):
            return super().evaluate(results, metric, iou_thr, logger, show,
197
                                    out_dir, pipeline)
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
        # evaluate 2D detection performance
        else:
            eval_results = OrderedDict()
            annotations = [self.get_ann_info(i) for i in range(len(self))]
            iou_thr_2d = (iou_thr_2d) if isinstance(iou_thr_2d,
                                                    float) else iou_thr_2d
            for iou_thr_2d_single in iou_thr_2d:
                mean_ap, _ = eval_map(
                    results,
                    annotations,
                    scale_ranges=None,
                    iou_thr=iou_thr_2d_single,
                    dataset=self.CLASSES,
                    logger=logger)
                eval_results['mAP_' + str(iou_thr_2d_single)] = mean_ap
            return eval_results