sunrgbd_dataset.py 5.34 KB
Newer Older
dingchang's avatar
dingchang committed
1
# Copyright (c) OpenMMLab. All rights reserved.
zhangshilong's avatar
zhangshilong committed
2
3

import os.path as osp
jshilong's avatar
jshilong committed
4
from typing import Callable, List, Optional, Union
5

jshilong's avatar
jshilong committed
6
7
import numpy as np

8
from mmdet3d.registry import DATASETS
zhangshilong's avatar
zhangshilong committed
9
from mmdet3d.structures import DepthInstance3DBoxes
jshilong's avatar
jshilong committed
10
from .det3d_dataset import Det3DDataset
liyinhao's avatar
liyinhao committed
11
12
13


@DATASETS.register_module()
jshilong's avatar
jshilong committed
14
class SUNRGBDDataset(Det3DDataset):
zhangwenwei's avatar
zhangwenwei committed
15
    r"""SUNRGBD Dataset.
liyinhao's avatar
liyinhao committed
16

wangtai's avatar
wangtai committed
17
18
    This class serves as the API for experiments on the SUNRGBD Dataset.

zhangwenwei's avatar
zhangwenwei committed
19
20
    See the `download page <http://rgbd.cs.princeton.edu/challenge.html>`_
    for data downloading.
wangtai's avatar
wangtai committed
21
22
23
24

    Args:
        data_root (str): Path of dataset root.
        ann_file (str): Path of annotation file.
jshilong's avatar
jshilong committed
25
26
        metainfo (dict, optional): Meta information for dataset, such as class
            information. Defaults to None.
27
        data_prefix (dict): Prefix for data. Defaults to
28
            dict(pts='points',img='sunrgbd_trainval').
29
        pipeline (List[dict]): Pipeline used for data processing.
30
31
32
33
            Defaults to [].
        modality (dict): Modality to specify the sensor data used as input.
            Defaults to dict(use_camera=True, use_lidar=True).
        default_cam_key (str): The default camera name adopted.
34
            Defaults to 'CAM0'.
zhangshilong's avatar
zhangshilong committed
35
        box_type_3d (str): Type of 3D box of this dataset.
wangtai's avatar
wangtai committed
36
37
            Based on the `box_type_3d`, the dataset will encapsulate the box
            to its original format then converted them to `box_type_3d`.
38
            Defaults to 'Depth' in this dataset. Available options includes:
wangtai's avatar
wangtai committed
39

wangtai's avatar
wangtai committed
40
41
42
            - 'LiDAR': Box in LiDAR coordinates.
            - 'Depth': Box in depth coordinates, usually for indoor dataset.
            - 'Camera': Box in camera coordinates.
43
        filter_empty_gt (bool): Whether to filter empty GT.
wangtai's avatar
wangtai committed
44
            Defaults to True.
45
        test_mode (bool): Whether the dataset is in test mode.
wangtai's avatar
wangtai committed
46
47
            Defaults to False.
    """
jshilong's avatar
jshilong committed
48
    METAINFO = {
49
        'classes': ('bed', 'table', 'sofa', 'chair', 'toilet', 'desk',
jshilong's avatar
jshilong committed
50
51
                    'dresser', 'night_stand', 'bookshelf', 'bathtub')
    }
liyinhao's avatar
liyinhao committed
52
53

    def __init__(self,
jshilong's avatar
jshilong committed
54
55
56
57
                 data_root: str,
                 ann_file: str,
                 metainfo: Optional[dict] = None,
                 data_prefix: dict = dict(
zhangshilong's avatar
zhangshilong committed
58
                     pts='points', img='sunrgbd_trainval/image'),
jshilong's avatar
jshilong committed
59
                 pipeline: List[Union[dict, Callable]] = [],
zhangshilong's avatar
zhangshilong committed
60
                 default_cam_key: str = 'CAM0',
61
                 modality: dict = dict(use_camera=True, use_lidar=True),
jshilong's avatar
jshilong committed
62
63
64
                 box_type_3d: str = 'Depth',
                 filter_empty_gt: bool = True,
                 test_mode: bool = False,
65
                 **kwargs) -> None:
66
67
68
        super().__init__(
            data_root=data_root,
            ann_file=ann_file,
jshilong's avatar
jshilong committed
69
70
            metainfo=metainfo,
            data_prefix=data_prefix,
71
            pipeline=pipeline,
zhangshilong's avatar
zhangshilong committed
72
            default_cam_key=default_cam_key,
73
74
75
            modality=modality,
            box_type_3d=box_type_3d,
            filter_empty_gt=filter_empty_gt,
76
77
            test_mode=test_mode,
            **kwargs)
78
79
        assert 'use_camera' in self.modality and \
            'use_lidar' in self.modality
80
81
        assert self.modality['use_camera'] or self.modality['use_lidar']

zhangshilong's avatar
zhangshilong committed
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
    def parse_data_info(self, info: dict) -> dict:
        """Process the raw data info.

        Convert all relative path of needed modality data file to
        the absolute path. And process
        the `instances` field to `ann_info` in training stage.

        Args:
            info (dict): Raw info dict.

        Returns:
            dict: Has `ann_info` in training stage. And
            all path has been converted to absolute path.
        """

        if self.modality['use_lidar']:
            info['lidar_points']['lidar_path'] = \
                osp.join(
                    self.data_prefix.get('pts', ''),
                    info['lidar_points']['lidar_path'])

        if self.modality['use_camera']:
            for cam_id, img_info in info['images'].items():
                if 'img_path' in img_info:
                    img_info['img_path'] = osp.join(
                        self.data_prefix.get('img', ''), img_info['img_path'])
            if self.default_cam_key is not None:
                info['img_path'] = info['images'][
                    self.default_cam_key]['img_path']
                info['depth2img'] = np.array(
                    info['images'][self.default_cam_key]['depth2img'],
                    dtype=np.float32)

        if not self.test_mode:
            # used in traing
            info['ann_info'] = self.parse_ann_info(info)
        if self.test_mode and self.load_eval_anns:
            info['eval_ann_info'] = self.parse_ann_info(info)

        return info

jshilong's avatar
jshilong committed
123
    def parse_ann_info(self, info: dict) -> dict:
124
        """Process the `instances` in data info to `ann_info`.
125
126

        Args:
jshilong's avatar
jshilong committed
127
            info (dict): Info dict.
128
129

        Returns:
jshilong's avatar
jshilong committed
130
            dict: Processed `ann_info`
131
        """
jshilong's avatar
jshilong committed
132
        ann_info = super().parse_ann_info(info)
zhangshilong's avatar
zhangshilong committed
133
        # process data without any annotations
jshilong's avatar
jshilong committed
134
135
136
137
        if ann_info is None:
            ann_info = dict()
            ann_info['gt_bboxes_3d'] = np.zeros((0, 6), dtype=np.float32)
            ann_info['gt_labels_3d'] = np.zeros((0, ), dtype=np.int64)
wuyuefeng's avatar
wuyuefeng committed
138
        # to target box structure
jshilong's avatar
jshilong committed
139
140
141
        ann_info['gt_bboxes_3d'] = DepthInstance3DBoxes(
            ann_info['gt_bboxes_3d'],
            origin=(0.5, 0.5, 0.5)).convert_to(self.box_mode_3d)
142

jshilong's avatar
jshilong committed
143
        return ann_info