sunrgbd_dataset.py 5.58 KB
Newer Older
dingchang's avatar
dingchang committed
1
# Copyright (c) OpenMMLab. All rights reserved.
zhangshilong's avatar
zhangshilong committed
2
3

import os.path as osp
jshilong's avatar
jshilong committed
4
from typing import Callable, List, Optional, Union
5

jshilong's avatar
jshilong committed
6
7
import numpy as np

8
from mmdet3d.registry import DATASETS
zhangshilong's avatar
zhangshilong committed
9
from mmdet3d.structures import DepthInstance3DBoxes
jshilong's avatar
jshilong committed
10
from .det3d_dataset import Det3DDataset
liyinhao's avatar
liyinhao committed
11
12
13


@DATASETS.register_module()
jshilong's avatar
jshilong committed
14
class SUNRGBDDataset(Det3DDataset):
zhangwenwei's avatar
zhangwenwei committed
15
    r"""SUNRGBD Dataset.
liyinhao's avatar
liyinhao committed
16

wangtai's avatar
wangtai committed
17
18
    This class serves as the API for experiments on the SUNRGBD Dataset.

zhangwenwei's avatar
zhangwenwei committed
19
20
    See the `download page <http://rgbd.cs.princeton.edu/challenge.html>`_
    for data downloading.
wangtai's avatar
wangtai committed
21
22
23
24

    Args:
        data_root (str): Path of dataset root.
        ann_file (str): Path of annotation file.
jshilong's avatar
jshilong committed
25
26
        metainfo (dict, optional): Meta information for dataset, such as class
            information. Defaults to None.
27
        data_prefix (dict): Prefix for data. Defaults to
28
            dict(pts='points',img='sunrgbd_trainval').
29
        pipeline (List[dict]): Pipeline used for data processing.
30
31
32
33
            Defaults to [].
        modality (dict): Modality to specify the sensor data used as input.
            Defaults to dict(use_camera=True, use_lidar=True).
        default_cam_key (str): The default camera name adopted.
34
            Defaults to 'CAM0'.
zhangshilong's avatar
zhangshilong committed
35
        box_type_3d (str): Type of 3D box of this dataset.
wangtai's avatar
wangtai committed
36
37
            Based on the `box_type_3d`, the dataset will encapsulate the box
            to its original format then converted them to `box_type_3d`.
38
            Defaults to 'Depth' in this dataset. Available options includes:
wangtai's avatar
wangtai committed
39

wangtai's avatar
wangtai committed
40
41
42
            - 'LiDAR': Box in LiDAR coordinates.
            - 'Depth': Box in depth coordinates, usually for indoor dataset.
            - 'Camera': Box in camera coordinates.
43
        filter_empty_gt (bool): Whether to filter empty GT.
wangtai's avatar
wangtai committed
44
            Defaults to True.
45
        test_mode (bool): Whether the dataset is in test mode.
wangtai's avatar
wangtai committed
46
47
            Defaults to False.
    """
jshilong's avatar
jshilong committed
48
    METAINFO = {
49
        'classes': ('bed', 'table', 'sofa', 'chair', 'toilet', 'desk',
50
51
52
53
54
                    'dresser', 'night_stand', 'bookshelf', 'bathtub'),
        'palette': [(255, 187, 120), (255, 152, 150), (140, 86, 75),
                    (188, 189, 34), (44, 160, 44), (247, 182, 210),
                    (196, 156, 148), (23, 190, 207), (148, 103, 189),
                    (227, 119, 194)]
jshilong's avatar
jshilong committed
55
    }
liyinhao's avatar
liyinhao committed
56
57

    def __init__(self,
jshilong's avatar
jshilong committed
58
59
60
61
                 data_root: str,
                 ann_file: str,
                 metainfo: Optional[dict] = None,
                 data_prefix: dict = dict(
zhangshilong's avatar
zhangshilong committed
62
                     pts='points', img='sunrgbd_trainval/image'),
jshilong's avatar
jshilong committed
63
                 pipeline: List[Union[dict, Callable]] = [],
zhangshilong's avatar
zhangshilong committed
64
                 default_cam_key: str = 'CAM0',
65
                 modality: dict = dict(use_camera=True, use_lidar=True),
jshilong's avatar
jshilong committed
66
67
68
                 box_type_3d: str = 'Depth',
                 filter_empty_gt: bool = True,
                 test_mode: bool = False,
69
                 **kwargs) -> None:
70
71
72
        super().__init__(
            data_root=data_root,
            ann_file=ann_file,
jshilong's avatar
jshilong committed
73
74
            metainfo=metainfo,
            data_prefix=data_prefix,
75
            pipeline=pipeline,
zhangshilong's avatar
zhangshilong committed
76
            default_cam_key=default_cam_key,
77
78
79
            modality=modality,
            box_type_3d=box_type_3d,
            filter_empty_gt=filter_empty_gt,
80
81
            test_mode=test_mode,
            **kwargs)
82
83
        assert 'use_camera' in self.modality and \
            'use_lidar' in self.modality
84
85
        assert self.modality['use_camera'] or self.modality['use_lidar']

zhangshilong's avatar
zhangshilong committed
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
    def parse_data_info(self, info: dict) -> dict:
        """Process the raw data info.

        Convert all relative path of needed modality data file to
        the absolute path. And process
        the `instances` field to `ann_info` in training stage.

        Args:
            info (dict): Raw info dict.

        Returns:
            dict: Has `ann_info` in training stage. And
            all path has been converted to absolute path.
        """

        if self.modality['use_lidar']:
            info['lidar_points']['lidar_path'] = \
                osp.join(
                    self.data_prefix.get('pts', ''),
                    info['lidar_points']['lidar_path'])

        if self.modality['use_camera']:
            for cam_id, img_info in info['images'].items():
                if 'img_path' in img_info:
                    img_info['img_path'] = osp.join(
                        self.data_prefix.get('img', ''), img_info['img_path'])
            if self.default_cam_key is not None:
                info['img_path'] = info['images'][
                    self.default_cam_key]['img_path']
                info['depth2img'] = np.array(
                    info['images'][self.default_cam_key]['depth2img'],
                    dtype=np.float32)

        if not self.test_mode:
            # used in traing
            info['ann_info'] = self.parse_ann_info(info)
        if self.test_mode and self.load_eval_anns:
            info['eval_ann_info'] = self.parse_ann_info(info)

        return info

jshilong's avatar
jshilong committed
127
    def parse_ann_info(self, info: dict) -> dict:
128
        """Process the `instances` in data info to `ann_info`.
129
130

        Args:
jshilong's avatar
jshilong committed
131
            info (dict): Info dict.
132
133

        Returns:
jshilong's avatar
jshilong committed
134
            dict: Processed `ann_info`
135
        """
jshilong's avatar
jshilong committed
136
        ann_info = super().parse_ann_info(info)
zhangshilong's avatar
zhangshilong committed
137
        # process data without any annotations
jshilong's avatar
jshilong committed
138
139
140
141
        if ann_info is None:
            ann_info = dict()
            ann_info['gt_bboxes_3d'] = np.zeros((0, 6), dtype=np.float32)
            ann_info['gt_labels_3d'] = np.zeros((0, ), dtype=np.int64)
wuyuefeng's avatar
wuyuefeng committed
142
        # to target box structure
jshilong's avatar
jshilong committed
143
144
145
        ann_info['gt_bboxes_3d'] = DepthInstance3DBoxes(
            ann_info['gt_bboxes_3d'],
            origin=(0.5, 0.5, 0.5)).convert_to(self.box_mode_3d)
146

jshilong's avatar
jshilong committed
147
        return ann_info