"test/srt/test_tool_choice.py" did not exist on "f127355a3066ff3d4c8dea7899a2e25335fd9890"
waymo_metric.py 15.7 KB
Newer Older
1
2
3
# Copyright (c) OpenMMLab. All rights reserved.
import tempfile
from os import path as osp
4
from typing import Dict, List, Optional, Sequence, Tuple, Union
5
6
7

import numpy as np
import torch
8
9
10
from mmengine import Config
from mmengine.device import get_device
from mmengine.evaluator import BaseMetric
11
from mmengine.logging import MMLogger, print_log
12
13
14

from mmdet3d.models.layers import box3d_multiclass_nms
from mmdet3d.registry import METRICS
15
from mmdet3d.structures import (Box3DMode, CameraInstance3DBoxes,
16
17
                                LiDARInstance3DBoxes, points_cam2img,
                                xywhr2xyxyr)
18
19
20


@METRICS.register_module()
21
class WaymoMetric(BaseMetric):
22
23
24
25
    """Waymo evaluation metric.

    Args:
        waymo_bin_file (str): The path of the annotation file in waymo format.
26
        metric (str or List[str]): Metrics to be evaluated. Defaults to 'mAP'.
27
        load_type (str): Type of loading mode during training.
28
29
            - 'frame_based': Load all of the instances in the frame.
            - 'mv_image_based': Load all of the instances in the frame and need
30
31
              to convert to the FOV-based data type to support image-based
              detector.
32
33
34
            - 'fov_image_based': Only load the instances inside the default cam
              and need to convert to the FOV-based data type to support image-
              based detector.
35
36
37
        result_prefix (str, optional): The prefix of result '*.bin' file,
            including the file path and the prefix of filename, e.g.,
            "a/b/prefix". If not specified, a temp file will be created.
38
            Defaults to None.
39
40
41
42
43
44
45
        format_only (bool): Format the output results without perform
            evaluation. It is useful when you want to format the result to a
            specific format and submit it to the test server.
            Defaults to False.
        nms_cfg (dict): The configuration of non-maximum suppression for
            the mergence of multi-image predicted bboxes, only use when
            load_type == 'mv_image_based'. Defaults to None.
46
    """
47
    num_cams = 5
48
    default_prefix = 'Waymo metric'
49
50
51

    def __init__(self,
                 waymo_bin_file: str,
52
                 metric: Union[str, List[str]] = 'mAP',
53
                 load_type: str = 'frame_based',
54
55
56
57
58
                 result_prefix: Optional[str] = None,
                 format_only: bool = False,
                 nms_cfg=None,
                 **kwargs) -> None:
        super().__init__(**kwargs)
59
        self.waymo_bin_file = waymo_bin_file
60
        self.metrics = metric if isinstance(metric, list) else [metric]
61
        self.load_type = load_type
62
        self.result_prefix = result_prefix
63
64
        self.format_only = format_only
        if self.format_only:
65
            assert result_prefix is not None, 'result_prefix must be not '
66
67
            'None when format_only is True, otherwise the result files will '
            'be saved to a temp directory which will be cleaned up at the end.'
68
69
70
71
72
73
74
        if nms_cfg is not None:
            assert load_type == 'mv_image_based', 'nms_cfg in WaymoMetric '
            'only use when load_type == \'mv_image_based\'.'
            self.nms_cfg = Config(nms_cfg)

    def process(self, data_batch: dict, data_samples: Sequence[dict]) -> None:
        """Process one batch of data samples and predictions.
75

76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
        The processed results should be stored in ``self.results``, which will
        be used to compute the metrics when all batches have been processed.

        Args:
            data_batch (dict): A batch of data from the dataloader.
            data_samples (Sequence[dict]): A batch of outputs from the model.
        """

        for data_sample in data_samples:
            result = dict()
            bboxes_3d = data_sample['pred_instances_3d']['bboxes_3d']
            bboxes_3d.limit_yaw(offset=0.5, period=np.pi * 2)
            scores_3d = data_sample['pred_instances_3d']['scores_3d']
            labels_3d = data_sample['pred_instances_3d']['labels_3d']
            # TODO: check lidar post-processing
            if isinstance(bboxes_3d, CameraInstance3DBoxes):
                box_corners = bboxes_3d.corners
                cam2img = box_corners.new_tensor(
                    np.array(data_sample['cam2img']))
                box_corners_in_image = points_cam2img(box_corners, cam2img)
                # box_corners_in_image: [N, 8, 2]
                minxy = torch.min(box_corners_in_image, dim=1)[0]
                maxxy = torch.max(box_corners_in_image, dim=1)[0]
                # check minxy & maxxy
                # if the projected 2d bbox has intersection
                # with the image, we keep it, otherwise, we omit it.
                img_shape = data_sample['img_shape']
                valid_inds = ((minxy[:, 0] < img_shape[1]) &
                              (minxy[:, 1] < img_shape[0]) & (maxxy[:, 0] > 0)
                              & (maxxy[:, 1] > 0))

                if valid_inds.sum() > 0:
                    lidar2cam = data_sample['lidar2cam']
                    bboxes_3d = bboxes_3d.convert_to(
                        Box3DMode.LIDAR,
                        np.linalg.inv(lidar2cam),
                        correct_yaw=True)
                    bboxes_3d = bboxes_3d[valid_inds]
                    scores_3d = scores_3d[valid_inds]
                    labels_3d = labels_3d[valid_inds]
                else:
                    bboxes_3d = torch.zeros([0, 7])
                    scores_3d = torch.zeros([0])
                    labels_3d = torch.zeros([0])
            result['bboxes_3d'] = bboxes_3d.tensor.cpu().numpy()
            result['scores_3d'] = scores_3d.cpu().numpy()
            result['labels_3d'] = labels_3d.cpu().numpy()
            result['sample_idx'] = data_sample['sample_idx']
            result['context_name'] = data_sample['context_name']
            result['timestamp'] = data_sample['timestamp']
            self.results.append(result)
127

128
    def compute_metrics(self, results: List[dict]) -> Dict[str, float]:
129
130
131
        """Compute the metrics from processed results.

        Args:
132
            results (List[dict]): The processed results of the whole dataset.
133
134
135
136
137
138

        Returns:
            Dict[str, float]: The computed metrics. The keys are the names of
            the metrics, and the values are corresponding results.
        """
        logger: MMLogger = MMLogger.get_current_instance()
139
        self.classes = self.dataset_meta['classes']
140
141

        # different from kitti, waymo do not need to convert the ann file
142
143
        # handle the mv_image_based load_mode
        if self.load_type == 'mv_image_based':
144
145
146
147
148
149
150
151
            assert len(results) % 5 == 0, 'The multi-view image-based results'
            ' must be 5 times as large as the original frame-based results.'
            frame_results = [
                results[i:i + 5] for i in range(0, len(results), 5)
            ]
            results = self.merge_multi_view_boxes(frame_results)

        if self.result_prefix is None:
152
            eval_tmp_dir = tempfile.TemporaryDirectory()
153
            result_prefix = osp.join(eval_tmp_dir.name, 'results')
154
155
        else:
            eval_tmp_dir = None
156
            result_prefix = self.result_prefix
157

158
        self.format_results(results, result_prefix=result_prefix)
159

160
        metric_dict = {}
161
162
163

        if self.format_only:
            logger.info('results are saved in '
164
                        f'{osp.dirname(self.result_prefix)}')
165
166
            return metric_dict

167
168
        for metric in self.metrics:
            ap_dict = self.waymo_evaluate(
169
                result_prefix, metric=metric, logger=logger)
170
            metric_dict.update(ap_dict)
171
172
173
        if eval_tmp_dir is not None:
            eval_tmp_dir.cleanup()

174
        return metric_dict
175

176
    def waymo_evaluate(self,
177
                       result_prefix: str,
178
179
                       metric: Optional[str] = None,
                       logger: Optional[MMLogger] = None) -> Dict[str, float]:
180
181
182
        """Evaluation in Waymo protocol.

        Args:
183
            result_prefix (str): The location that stored the prediction
184
                results.
185
            metric (str, optional): Metric to be evaluated. Defaults to None.
186
187
            logger (MMLogger, optional): Logger used for printing related
                information during evaluation. Defaults to None.
188
189

        Returns:
190
            Dict[str, float]: Results of each evaluation metric.
191
192
193
194
195
196
        """

        import subprocess

        if metric == 'mAP':
            eval_str = 'mmdet3d/evaluation/functional/waymo_utils/' + \
197
                f'compute_detection_metrics_main {result_prefix}.bin ' + \
198
199
                f'{self.waymo_bin_file}'
            print(eval_str)
200
            ret_bytes = subprocess.check_output(eval_str, shell=True)
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
            ret_texts = ret_bytes.decode('utf-8')
            print_log(ret_texts, logger=logger)

            ap_dict = {
                'Vehicle/L1 mAP': 0,
                'Vehicle/L1 mAPH': 0,
                'Vehicle/L2 mAP': 0,
                'Vehicle/L2 mAPH': 0,
                'Pedestrian/L1 mAP': 0,
                'Pedestrian/L1 mAPH': 0,
                'Pedestrian/L2 mAP': 0,
                'Pedestrian/L2 mAPH': 0,
                'Sign/L1 mAP': 0,
                'Sign/L1 mAPH': 0,
                'Sign/L2 mAP': 0,
                'Sign/L2 mAPH': 0,
                'Cyclist/L1 mAP': 0,
                'Cyclist/L1 mAPH': 0,
                'Cyclist/L2 mAP': 0,
                'Cyclist/L2 mAPH': 0,
                'Overall/L1 mAP': 0,
                'Overall/L1 mAPH': 0,
                'Overall/L2 mAP': 0,
                'Overall/L2 mAPH': 0
            }
            mAP_splits = ret_texts.split('mAP ')
            mAPH_splits = ret_texts.split('mAPH ')
            for idx, key in enumerate(ap_dict.keys()):
                split_idx = int(idx / 2) + 1
                if idx % 2 == 0:  # mAP
                    ap_dict[key] = float(mAP_splits[split_idx].split(']')[0])
                else:  # mAPH
                    ap_dict[key] = float(mAPH_splits[split_idx].split(']')[0])
            ap_dict['Overall/L1 mAP'] = \
                (ap_dict['Vehicle/L1 mAP'] + ap_dict['Pedestrian/L1 mAP'] +
                    ap_dict['Cyclist/L1 mAP']) / 3
            ap_dict['Overall/L1 mAPH'] = \
                (ap_dict['Vehicle/L1 mAPH'] + ap_dict['Pedestrian/L1 mAPH'] +
                    ap_dict['Cyclist/L1 mAPH']) / 3
            ap_dict['Overall/L2 mAP'] = \
                (ap_dict['Vehicle/L2 mAP'] + ap_dict['Pedestrian/L2 mAP'] +
                    ap_dict['Cyclist/L2 mAP']) / 3
            ap_dict['Overall/L2 mAPH'] = \
                (ap_dict['Vehicle/L2 mAPH'] + ap_dict['Pedestrian/L2 mAPH'] +
                    ap_dict['Cyclist/L2 mAPH']) / 3
        elif metric == 'LET_mAP':
            eval_str = 'mmdet3d/evaluation/functional/waymo_utils/' + \
248
                f'compute_detection_let_metrics_main {result_prefix}.bin ' + \
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
                f'{self.waymo_bin_file}'

            print(eval_str)
            ret_bytes = subprocess.check_output(eval_str, shell=True)
            ret_texts = ret_bytes.decode('utf-8')

            print_log(ret_texts, logger=logger)
            ap_dict = {
                'Vehicle mAPL': 0,
                'Vehicle mAP': 0,
                'Vehicle mAPH': 0,
                'Pedestrian mAPL': 0,
                'Pedestrian mAP': 0,
                'Pedestrian mAPH': 0,
                'Sign mAPL': 0,
                'Sign mAP': 0,
                'Sign mAPH': 0,
                'Cyclist mAPL': 0,
                'Cyclist mAP': 0,
                'Cyclist mAPH': 0,
                'Overall mAPL': 0,
                'Overall mAP': 0,
                'Overall mAPH': 0
            }
            mAPL_splits = ret_texts.split('mAPL ')
            mAP_splits = ret_texts.split('mAP ')
            mAPH_splits = ret_texts.split('mAPH ')
            for idx, key in enumerate(ap_dict.keys()):
                split_idx = int(idx / 3) + 1
                if idx % 3 == 0:  # mAPL
                    ap_dict[key] = float(mAPL_splits[split_idx].split(']')[0])
                elif idx % 3 == 1:  # mAP
                    ap_dict[key] = float(mAP_splits[split_idx].split(']')[0])
                else:  # mAPH
                    ap_dict[key] = float(mAPH_splits[split_idx].split(']')[0])
            ap_dict['Overall mAPL'] = \
                (ap_dict['Vehicle mAPL'] + ap_dict['Pedestrian mAPL'] +
                    ap_dict['Cyclist mAPL']) / 3
            ap_dict['Overall mAP'] = \
                (ap_dict['Vehicle mAP'] + ap_dict['Pedestrian mAP'] +
                    ap_dict['Cyclist mAP']) / 3
            ap_dict['Overall mAPH'] = \
                (ap_dict['Vehicle mAPH'] + ap_dict['Pedestrian mAPH'] +
                    ap_dict['Cyclist mAPH']) / 3
293
294
        return ap_dict

295
296
297
    def format_results(
        self,
        results: List[dict],
298
        result_prefix: Optional[str] = None
299
    ) -> Tuple[dict, Union[tempfile.TemporaryDirectory, None]]:
300
        """Format the results to bin file.
301
302

        Args:
303
            results (List[dict]): Testing results of the dataset.
304
            result_prefix (str, optional): The prefix of result file. It
305
306
                includes the file path and the prefix of filename, e.g.,
                "a/b/prefix". If not specified, a temp file will be created.
307
                Defaults to None.
308
        """
309
        waymo_results_final_path = f'{result_prefix}.bin'
310
311
312

        from ..functional.waymo_utils.prediction_to_waymo import \
            Prediction2Waymo
313
314
        converter = Prediction2Waymo(results, waymo_results_final_path,
                                     self.classes)
315
316
        converter.convert()

317
    def merge_multi_view_boxes(self, frame_results: List[dict]) -> dict:
318
        """Merge bounding boxes predicted from multi-view images.
319

320
        Args:
321
322
            box_dict_per_frame (List[dict]): The results of prediction for each
                camera.
323
            cam0_info (dict): Store the sample idx for the given frame.
324
325

        Returns:
326
            Dict: Merged results.
327
        """
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
343
344
345
346
347
348
349
350
351
352
353
354
355
356
357
358
359
360
361
362
363
364
365
        merged_results = []
        for frame_result in frame_results:
            merged_result = dict()
            merged_result['sample_idx'] = frame_result[0]['sample_idx'] // 5
            merged_result['context_name'] = frame_result[0]['context_name']
            merged_result['timestamp'] = frame_result[0]['timestamp']
            bboxes_3d, scores_3d, labels_3d = [], [], []
            for result in frame_result:
                assert result['timestamp'] == merged_result['timestamp']
                bboxes_3d.append(result['bboxes_3d'])
                scores_3d.append(result['scores_3d'])
                labels_3d.append(result['labels_3d'])

            bboxes_3d = np.concatenate(bboxes_3d)
            scores_3d = np.concatenate(scores_3d)
            labels_3d = np.concatenate(labels_3d)

            device = get_device()
            lidar_boxes3d = LiDARInstance3DBoxes(
                torch.from_numpy(bboxes_3d).to(device))
            scores = torch.from_numpy(scores_3d).to(device)
            labels = torch.from_numpy(labels_3d).long().to(device)
            nms_scores = scores.new_zeros(scores.shape[0],
                                          len(self.classes) + 1)
            indices = labels.new_tensor(list(range(scores.shape[0])))
            nms_scores[indices, labels] = scores
            lidar_boxes3d_for_nms = xywhr2xyxyr(lidar_boxes3d.bev)
            boxes3d = lidar_boxes3d.tensor
            bboxes_3d, scores_3d, labels_3d = box3d_multiclass_nms(
                boxes3d, lidar_boxes3d_for_nms, nms_scores,
                self.nms_cfg.score_thr, self.nms_cfg.max_per_frame,
                self.nms_cfg)

            merged_result['bboxes_3d'] = bboxes_3d.cpu().numpy()
            merged_result['scores_3d'] = scores_3d.cpu().numpy()
            merged_result['labels_3d'] = labels_3d.cpu().numpy()
            merged_results.append(merged_result)
        return merged_results