Merge branch 'master' into process_raw_data

36466f83 · liyinhao · 25d39342 · f93167c3 · 36466f83 · 36466f83
Commit 36466f83 authored Jun 15, 2020 by liyinhao
20 changed files
--- a/mmdet3d/datasets/nuscenes_dataset.py
+++ b/mmdet3d/datasets/nuscenes_dataset.py
@@ -7,7 +7,7 @@ import pyquaternion
 from nuscenes.utils.data_classes import Box as NuScenesBox
 from mmdet.datasets import DATASETS
-from ..core.bbox import LiDARInstance3DBoxes, box_np_ops
+from ..core.bbox import LiDARInstance3DBoxes
 from .custom_3d import Custom3DDataset
@@ -72,8 +72,10 @@ class NuScenesDataset(Custom3DDataset):
                 classes=None,
                 load_interval=1,
                 with_velocity=True,
-                 test_mode=False,
                 modality=None,
+                 box_type_3d='LiDAR',
+                 filter_empty_gt=True,
+                 test_mode=False,
                 eval_version='detection_cvpr_2019'):
        self.load_interval = load_interval
        super().__init__(
@@ -82,6 +84,8 @@ class NuScenesDataset(Custom3DDataset):
            pipeline=pipeline,
            classes=classes,
            modality=modality,
+            box_type_3d=box_type_3d,
+            filter_empty_gt=filter_empty_gt,
            test_mode=test_mode)
        self.with_velocity = with_velocity
@@ -168,11 +172,11 @@ class NuScenesDataset(Custom3DDataset):
            gt_bboxes_3d = np.concatenate([gt_bboxes_3d, gt_velocity], axis=-1)
        # the nuscenes box center is [0.5, 0.5, 0.5], we keep it
-        # the same as KITTI [0.5, 0.5, 0]
+        # the same as KITTI (0.5, 0.5, 0)
        gt_bboxes_3d = LiDARInstance3DBoxes(
            gt_bboxes_3d,
            box_dim=gt_bboxes_3d.shape[-1],
-            origin=[0.5, 0.5, 0.5])
+            origin=(0.5, 0.5, 0.5)).convert_to(self.box_mode_3d)
        anns_results = dict(
            gt_bboxes_3d=gt_bboxes_3d,
@@ -352,26 +356,28 @@ class NuScenesDataset(Custom3DDataset):
 def output_to_nusc_box(detection):
-    box3d = detection['boxes_3d'].numpy()
+    box3d = detection['boxes_3d']
    scores = detection['scores_3d'].numpy()
    labels = detection['labels_3d'].numpy()
+    box_gravity_center = box3d.gravity_center.numpy()
+    box_dims = box3d.dims.numpy()
+    box_yaw = box3d.yaw.numpy()
    # TODO: check whether this is necessary
    # with dir_offset & dir_limit in the head
-    box3d[:, 6] = -box3d[:, 6] - np.pi / 2
+    box_yaw = -box_yaw - np.pi / 2
-    # the trained model is in [0.5, 0.5, 0],
-    # change them back to nuscenes [0.5, 0.5, 0.5]
-    box_np_ops.change_box3d_center_(box3d, [0.5, 0.5, 0], [0.5, 0.5, 0.5])
    box_list = []
-    for i in range(box3d.shape[0]):
+    for i in range(len(box3d)):
-        quat = pyquaternion.Quaternion(axis=[0, 0, 1], radians=box3d[i, 6])
+        quat = pyquaternion.Quaternion(axis=[0, 0, 1], radians=box_yaw[i])
-        velocity = (*box3d[i, 7:9], 0.0)
+        velocity = (*box3d.tensor[i, 7:9], 0.0)
        # velo_val = np.linalg.norm(box3d[i, 7:9])
        # velo_ori = box3d[i, 6]
        # velocity = (
        # velo_val * np.cos(velo_ori), velo_val * np.sin(velo_ori), 0.0)
        box = NuScenesBox(
-            box3d[i, :3],
+            box_gravity_center[i],
-            box3d[i, 3:6],
+            box_dims[i],
            quat,
            label=labels[i],
            score=scores[i],

--- a/mmdet3d/datasets/pipelines/data_augment_utils.py
+++ b/mmdet3d/datasets/pipelines/data_augment_utils.py
@@ -6,7 +6,7 @@ from numba.errors import NumbaPerformanceWarning
 from mmdet3d.core.bbox import box_np_ops
-warnings.filterwarnings("ignore", category=NumbaPerformanceWarning)
+warnings.filterwarnings('ignore', category=NumbaPerformanceWarning)
 @numba.njit
@@ -301,7 +301,7 @@ def noise_per_object_v3_(gt_boxes,
        grot_uppers[..., np.newaxis],
        size=[num_boxes, num_try])
-    origin = [0.5, 0.5, 0]
+    origin = (0.5, 0.5, 0)
    gt_box_corners = box_np_ops.center_to_corner_box3d(
        gt_boxes[:, :3],
        gt_boxes[:, 3:6],

--- a/mmdet3d/datasets/pipelines/formating.py
+++ b/mmdet3d/datasets/pipelines/formating.py
@@ -73,14 +73,13 @@ class Collect3D(object):
    def __init__(self,
                 keys,
-                 pcd_shape=[1, 1600, 1408],
                 meta_keys=('filename', 'ori_shape', 'img_shape', 'lidar2img',
                            'pad_shape', 'scale_factor', 'flip', 'pcd_flip',
-                            'img_norm_cfg', 'rect', 'Trv2c', 'P2', 'pcd_trans',
+                            'box_mode_3d', 'box_type_3d', 'img_norm_cfg',
-                            'sample_idx', 'pcd_scale_factor', 'pcd_rotation')):
+                            'rect', 'Trv2c', 'P2', 'pcd_trans', 'sample_idx',
+                            'pcd_scale_factor', 'pcd_rotation')):
        self.keys = keys
        self.meta_keys = meta_keys
-        self.pcd_shape = pcd_shape
    def __call__(self, results):
        data = {}
@@ -88,7 +87,7 @@ class Collect3D(object):
        for key in self.meta_keys:
            if key in results:
                img_meta[key] = results[key]
-        img_meta.update(pcd_shape=self.pcd_shape, pcd_pad_shape=self.pcd_shape)
        data['img_meta'] = DC(img_meta, cpu_only=True)
        for key in self.keys:
            data[key] = results[key]

--- a/mmdet3d/datasets/pipelines/indoor_augment.py
+++ b/mmdet3d/datasets/pipelines/indoor_augment.py
@@ -25,21 +25,19 @@ class IndoorFlipData(object):
    def __call__(self, results):
        points = results['points']
        gt_bboxes_3d = results['gt_bboxes_3d']
-        aligned = True if gt_bboxes_3d.shape[1] == 6 else False
        results['flip_yz'] = False
        results['flip_xz'] = False
        if np.random.random() < self.flip_ratio_yz:
            # Flipping along the YZ plane
            points[:, 0] = -1 * points[:, 0]
-            gt_bboxes_3d[:, 0] = -1 * gt_bboxes_3d[:, 0]
+            gt_bboxes_3d.flip('horizontal')
-            if not aligned:
-                gt_bboxes_3d[:, 6] = np.pi - gt_bboxes_3d[:, 6]
            results['flip_yz'] = True
-        if aligned and np.random.random() < self.flip_ratio_xz:
+        if not gt_bboxes_3d.with_yaw and np.random.random(
+        ) < self.flip_ratio_xz:
            # Flipping along the XZ plane
            points[:, 1] = -1 * points[:, 1]
-            gt_bboxes_3d[:, 1] = -1 * gt_bboxes_3d[:, 1]
+            gt_bboxes_3d.flip('vertical')
            results['flip_xz'] = True
        results['points'] = points
@@ -154,57 +152,18 @@ class IndoorGlobalRotScale(object):
        rot_mat = np.array([[c, -s, 0], [s, c, 0], [0, 0, 1]])
        return rot_mat
-    def _rotate_aligned_boxes(self, input_boxes, rot_mat):
-        """Rotate aligned boxes.
-        Rotate function for the aligned boxes.
-        Args:
-            input_boxes (ndarray): 3D boxes.
-            rot_mat (ndarray): Rotation matrix.
-        Returns:
-            rotated_boxes (ndarry): 3D boxes after rotation.
-        """
-        centers, lengths = input_boxes[:, 0:3], input_boxes[:, 3:6]
-        new_centers = np.dot(centers, rot_mat.T)
-        dx, dy = lengths[:, 0] / 2.0, lengths[:, 1] / 2.0
-        new_x = np.zeros((dx.shape[0], 4))
-        new_y = np.zeros((dx.shape[0], 4))
-        for i, corner in enumerate([(-1, -1), (1, -1), (1, 1), (-1, 1)]):
-            corners = np.zeros((dx.shape[0], 3))
-            corners[:, 0] = corner[0] * dx
-            corners[:, 1] = corner[1] * dy
-            corners = np.dot(corners, rot_mat.T)
-            new_x[:, i] = corners[:, 0]
-            new_y[:, i] = corners[:, 1]
-        new_dx = 2.0 * np.max(new_x, 1)
-        new_dy = 2.0 * np.max(new_y, 1)
-        new_lengths = np.stack((new_dx, new_dy, lengths[:, 2]), axis=1)
-        return np.concatenate([new_centers, new_lengths], axis=1)
    def __call__(self, results):
        points = results['points']
        gt_bboxes_3d = results['gt_bboxes_3d']
-        aligned = True if gt_bboxes_3d.shape[1] == 6 else False
        if self.rot_range is not None:
            assert len(self.rot_range) == 2, \
                f'Expect length of rot range =2, ' \
                f'got {len(self.rot_range)}.'
            rot_angle = np.random.uniform(self.rot_range[0], self.rot_range[1])
-            rot_mat = self._rotz(rot_angle)
+            if gt_bboxes_3d.tensor.shape[0] != 0:
-            points[:, :3] = np.dot(points[:, :3], rot_mat.T)
+                gt_bboxes_3d.rotate(rot_angle)
-            if aligned:
+            points[:, :3] = np.dot(points[:, :3], self._rotz(rot_angle).T)
-                gt_bboxes_3d = self._rotate_aligned_boxes(
-                    gt_bboxes_3d, rot_mat)
-            else:
-                gt_bboxes_3d[:, :3] = np.dot(gt_bboxes_3d[:, :3], rot_mat.T)
-                gt_bboxes_3d[:, 6] -= rot_angle
            results['rot_angle'] = rot_angle
        if self.scale_range is not None:
@@ -216,15 +175,14 @@ class IndoorGlobalRotScale(object):
                                            self.scale_range[1])
            points[:, :3] *= scale_ratio
-            gt_bboxes_3d[:, :3] *= scale_ratio
+            gt_bboxes_3d.scale(scale_ratio)
-            gt_bboxes_3d[:, 3:6] *= scale_ratio
            if self.shift_height:
                points[:, -1] *= scale_ratio
            results['scale_ratio'] = scale_ratio
        results['points'] = points
-        results['gt_bboxes_3d'] = gt_bboxes_3d.astype(np.float32)
+        results['gt_bboxes_3d'] = gt_bboxes_3d
        return results
    def __repr__(self):

--- a/mmdet3d/datasets/pipelines/train_aug.py
+++ b/mmdet3d/datasets/pipelines/train_aug.py
@@ -113,7 +113,7 @@ class ObjectSample(object):
        #         Trv2c = input_dict['Trv2c']
        #         P2 = input_dict['P2']
        if self.sample_2d:
-            img = input_dict['img']  # .astype(np.float32)
+            img = input_dict['img']
            gt_bboxes_2d = input_dict['gt_bboxes']
            # Assume for now 3D & 2D bboxes are the same
            sampled_dict = self.db_sampler.sample_all(
@@ -148,7 +148,7 @@ class ObjectSample(object):
                    [gt_bboxes_2d, sampled_gt_bboxes_2d]).astype(np.float32)
                input_dict['gt_bboxes'] = gt_bboxes_2d
-                input_dict['img'] = sampled_dict['img']  # .astype(np.uint8)
+                input_dict['img'] = sampled_dict['img']
        input_dict['gt_bboxes_3d'] = gt_bboxes_3d
        input_dict['gt_labels_3d'] = gt_labels_3d
@@ -304,7 +304,11 @@ class ObjectRangeFilter(object):
        gt_labels_3d = input_dict['gt_labels_3d']
        mask = gt_bboxes_3d.in_range_bev(self.bev_range)
        gt_bboxes_3d = gt_bboxes_3d[mask]
-        gt_labels_3d = gt_labels_3d[mask]
+        # mask is a torch tensor but gt_labels_3d is still numpy array
+        # using mask to index gt_labels_3d will cause bug when
+        # len(gt_labels_3d) == 1, where mask=1 will be interpreted
+        # as gt_labels_3d[1] and cause out of index error
+        gt_labels_3d = gt_labels_3d[mask.numpy().astype(np.bool)]
        # limit rad to [-pi, pi]
        gt_bboxes_3d.limit_yaw(offset=0.5, period=2 * np.pi)

--- a/mmdet3d/datasets/scannet_dataset.py
+++ b/mmdet3d/datasets/scannet_dataset.py
@@ -2,6 +2,7 @@ import os.path as osp
 import numpy as np
+from mmdet3d.core.bbox import DepthInstance3DBoxes
 from mmdet.datasets import DATASETS
 from .custom_3d import Custom3DDataset
@@ -20,10 +21,18 @@ class ScanNetDataset(Custom3DDataset):
                 pipeline=None,
                 classes=None,
                 modality=None,
+                 box_type_3d='Depth',
                 filter_empty_gt=True,
                 test_mode=False):
-        super().__init__(data_root, ann_file, pipeline, classes, modality,
+        super().__init__(
-                         filter_empty_gt, test_mode)
+            data_root=data_root,
+            ann_file=ann_file,
+            pipeline=pipeline,
+            classes=classes,
+            modality=modality,
+            box_type_3d=box_type_3d,
+            filter_empty_gt=filter_empty_gt,
+            test_mode=test_mode)
    def get_ann_info(self, index):
        # Use index to get the annos, thus the evalhook could also use this api
@@ -35,6 +44,14 @@ class ScanNetDataset(Custom3DDataset):
        else:
            gt_bboxes_3d = np.zeros((0, 6), dtype=np.float32)
            gt_labels_3d = np.zeros((0, ), dtype=np.long)
+        # to target box structure
+        gt_bboxes_3d = DepthInstance3DBoxes(
+            gt_bboxes_3d,
+            box_dim=gt_bboxes_3d.shape[-1],
+            with_yaw=False,
+            origin=(0.5, 0.5, 0.5)).convert_to(self.box_mode_3d)
        pts_instance_mask_path = osp.join(self.data_root,
                                          info['pts_instance_mask_path'])
        pts_semantic_mask_path = osp.join(self.data_root,

--- a/mmdet3d/datasets/sunrgbd_dataset.py
+++ b/mmdet3d/datasets/sunrgbd_dataset.py
 import numpy as np
+from mmdet3d.core.bbox import DepthInstance3DBoxes
 from mmdet.datasets import DATASETS
 from .custom_3d import Custom3DDataset
@@ -16,10 +17,18 @@ class SUNRGBDDataset(Custom3DDataset):
                 pipeline=None,
                 classes=None,
                 modality=None,
+                 box_type_3d='Depth',
                 filter_empty_gt=True,
                 test_mode=False):
-        super().__init__(data_root, ann_file, pipeline, classes, modality,
+        super().__init__(
-                         filter_empty_gt, test_mode)
+            data_root=data_root,
+            ann_file=ann_file,
+            pipeline=pipeline,
+            classes=classes,
+            modality=modality,
+            box_type_3d=box_type_3d,
+            filter_empty_gt=filter_empty_gt,
+            test_mode=test_mode)
    def get_ann_info(self, index):
        # Use index to get the annos, thus the evalhook could also use this api
@@ -32,6 +41,10 @@ class SUNRGBDDataset(Custom3DDataset):
            gt_bboxes_3d = np.zeros((0, 7), dtype=np.float32)
            gt_labels_3d = np.zeros((0, ), dtype=np.long)
+        # to target box structure
+        gt_bboxes_3d = DepthInstance3DBoxes(
+            gt_bboxes_3d, origin=(0.5, 0.5, 0.5)).convert_to(self.box_mode_3d)
        anns_results = dict(
            gt_bboxes_3d=gt_bboxes_3d, gt_labels_3d=gt_labels_3d)
        return anns_results
--- a/mmdet3d/models/backbones/__init__.py
+++ b/mmdet3d/models/backbones/__init__.py
 from mmdet.models.backbones import SSDVGG, HRNet, ResNet, ResNetV1d, ResNeXt
+from .nostem_regnet import NoStemRegNet
 from .pointnet2_sa_ssg import PointNet2SASSG
 from .second import SECOND
 __all__ = [
-    'ResNet', 'ResNetV1d', 'ResNeXt', 'SSDVGG', 'HRNet', 'SECOND',
+    'ResNet', 'ResNetV1d', 'ResNeXt', 'SSDVGG', 'HRNet', 'NoStemRegNet',
-    'PointNet2SASSG'
+    'SECOND', 'PointNet2SASSG'
 ]
--- a/mmdet3d/models/backbones/nostem_regnet.py
+++ b/mmdet3d/models/backbones/nostem_regnet.py
+from mmdet.models.backbones import RegNet
+from ..builder import BACKBONES
+@BACKBONES.register_module()
+class NoStemRegNet(RegNet):
+    """RegNet backbone without Stem for 3D detection.
+    More details can be found in `paper <https://arxiv.org/abs/2003.13678>`_ .
+    Args:
+        arch (dict): The parameter of RegNets.
+            - w0 (int): initial width
+            - wa (float): slope of width
+            - wm (float): quantization parameter to quantize the width
+            - depth (int): depth of the backbone
+            - group_w (int): width of group
+            - bot_mul (float): bottleneck ratio, i.e. expansion of bottlneck.
+        strides (Sequence[int]): Strides of the first block of each stage.
+        base_channels (int): Base channels after stem layer.
+        in_channels (int): Number of input image channels. Normally 3.
+        dilations (Sequence[int]): Dilation of each stage.
+        out_indices (Sequence[int]): Output from which stages.
+        style (str): `pytorch` or `caffe`. If set to "pytorch", the stride-two
+            layer is the 3x3 conv layer, otherwise the stride-two layer is
+            the first 1x1 conv layer.
+        frozen_stages (int): Stages to be frozen (all param fixed). -1 means
+            not freezing any parameters.
+        norm_cfg (dict): dictionary to construct and config norm layer.
+        norm_eval (bool): Whether to set norm layers to eval mode, namely,
+            freeze running stats (mean and var). Note: Effect on Batch Norm
+            and its variants only.
+        with_cp (bool): Use checkpoint or not. Using checkpoint will save some
+            memory while slowing down the training speed.
+        zero_init_residual (bool): whether to use zero init for last norm layer
+            in resblocks to let them behave as identity.
+    Example:
+        >>> from mmdet3d.models import NoStemRegNet
+        >>> import torch
+        >>> self = NoStemRegNet(
+                arch=dict(
+                    w0=88,
+                    wa=26.31,
+                    wm=2.25,
+                    group_w=48,
+                    depth=25,
+                    bot_mul=1.0))
+        >>> self.eval()
+        >>> inputs = torch.rand(1, 64, 16, 16)
+        >>> level_outputs = self.forward(inputs)
+        >>> for level_out in level_outputs:
+        ...     print(tuple(level_out.shape))
+        (1, 96, 8, 8)
+        (1, 192, 4, 4)
+        (1, 432, 2, 2)
+        (1, 1008, 1, 1)
+    """
+    def __init__(self, arch, **kwargs):
+        super(NoStemRegNet, self).__init__(arch, **kwargs)
+    def _make_stem_layer(self, in_channels, base_channels):
+        return
+    def forward(self, x):
+        outs = []
+        for i, layer_name in enumerate(self.res_layers):
+            res_layer = getattr(self, layer_name)
+            x = res_layer(x)
+            if i in self.out_indices:
+                outs.append(x)
+        return tuple(outs)
--- a/mmdet3d/models/dense_heads/anchor3d_head.py
+++ b/mmdet3d/models/dense_heads/anchor3d_head.py
@@ -381,5 +381,5 @@ class Anchor3DHead(nn.Module, AnchorTrainMixin):
            bboxes[..., 6] = (
                dir_rot + self.dir_offset +
                np.pi * dir_scores.to(bboxes.dtype))
+        bboxes = input_meta['box_type_3d'](bboxes, box_dim=self.box_code_size)
        return bboxes, scores, labels
--- a/mmdet3d/models/dense_heads/parta2_rpn_head.py
+++ b/mmdet3d/models/dense_heads/parta2_rpn_head.py
@@ -150,13 +150,15 @@ class PartA2RPNHead(Anchor3DHead):
        result = self.class_agnostic_nms(mlvl_bboxes, mlvl_bboxes_for_nms,
                                         mlvl_max_scores, mlvl_label_pred,
                                         mlvl_cls_score, mlvl_dir_scores,
-                                         score_thr, cfg.nms_post, cfg)
+                                         score_thr, cfg.nms_post, cfg,
+                                         input_meta)
        return result
    def class_agnostic_nms(self, mlvl_bboxes, mlvl_bboxes_for_nms,
                           mlvl_max_scores, mlvl_label_pred, mlvl_cls_score,
-                           mlvl_dir_scores, score_thr, max_num, cfg):
+                           mlvl_dir_scores, score_thr, max_num, cfg,
+                           input_meta):
        bboxes = []
        scores = []
        labels = []
@@ -202,6 +204,8 @@ class PartA2RPNHead(Anchor3DHead):
                labels = labels[inds]
                scores = scores[inds]
                cls_scores = cls_scores[inds]
+            bboxes = input_meta['box_type_3d'](
+                bboxes, box_dim=self.box_code_size)
            return dict(
                boxes_3d=bboxes,
                scores_3d=scores,
@@ -210,7 +214,9 @@ class PartA2RPNHead(Anchor3DHead):
            )
        else:
            return dict(
-                boxes_3d=mlvl_bboxes.new_zeros([0, self.box_code_size]),
+                boxes_3d=input_meta['box_type_3d'](
+                    mlvl_bboxes.new_zeros([0, self.box_code_size]),
+                    box_dim=self.box_code_size),
                scores_3d=mlvl_bboxes.new_zeros([0]),
                labels_3d=mlvl_bboxes.new_zeros([0]),
                cls_preds=mlvl_bboxes.new_zeros([0, mlvl_cls_score.shape[-1]]))
--- a/mmdet3d/models/dense_heads/train_mixins.py
+++ b/mmdet3d/models/dense_heads/train_mixins.py
@@ -20,7 +20,8 @@ class AnchorTrainMixin(object):
        Args:
            anchor_list (list[list]): Multi level anchors of each image.
-            gt_bboxes_list (list[Tensor]): Ground truth bboxes of each image.
+            gt_bboxes_list (list[BaseInstance3DBoxes]): Ground truth
+                bboxes of each image.
            img_metas (list[dict]): Meta info of each image.
        Returns:

--- a/mmdet3d/models/dense_heads/vote_head.py
+++ b/mmdet3d/models/dense_heads/vote_head.py
@@ -5,14 +5,11 @@ import torch.nn.functional as F
 from mmcv.cnn import ConvModule
 from mmdet3d.core import build_bbox_coder, multi_apply
-from mmdet3d.core.bbox.box_torch_ops import boxes3d_to_corners3d_lidar_torch
-from mmdet3d.core.bbox.transforms import upright_depth_to_lidar_torch
 from mmdet3d.core.post_processing import aligned_3d_nms
 from mmdet3d.models.builder import build_loss
 from mmdet3d.models.losses import chamfer_distance
 from mmdet3d.models.model_utils import VoteModule
-from mmdet3d.ops import (PointSAModule, furthest_point_sample,
+from mmdet3d.ops import PointSAModule, furthest_point_sample
-                         points_in_boxes_batch)
 from mmdet.models import HEADS
@@ -276,7 +273,7 @@ class VoteHead(nn.Module):
        Args:
            points (list[Tensor]): Points of each batch.
-            gt_bboxes_3d (list[Tensor]): gt bboxes of each batch.
+            gt_bboxes_3d (BaseInstance3DBoxes): gt bboxes of each batch.
            gt_labels_3d (list[Tensor]): gt class labels of each batch.
            pts_semantic_mask (None | list[Tensor]): point-wise semantic
                label of each batch.
@@ -293,8 +290,9 @@ class VoteHead(nn.Module):
        gt_num = list()
        for index in range(len(gt_labels_3d)):
            if len(gt_labels_3d[index]) == 0:
-                gt_bboxes_3d[index] = gt_bboxes_3d[index].new_zeros(
+                fake_box = gt_bboxes_3d[index].tensor.new_zeros(
-                    1, gt_bboxes_3d[index].shape[-1])
+                    1, gt_bboxes_3d[index].tensor.shape[-1])
+                gt_bboxes_3d[index] = gt_bboxes_3d[index].new_box(fake_box)
                gt_labels_3d[index] = gt_labels_3d[index].new_zeros(1)
                valid_gt_masks.append(gt_labels_3d[index].new_zeros(1))
                gt_num.append(1)
@@ -359,25 +357,23 @@ class VoteHead(nn.Module):
                           aggregated_points=None):
        assert self.bbox_coder.with_rot or pts_semantic_mask is not None
+        gt_bboxes_3d = gt_bboxes_3d.to(points.device)
        # generate votes target
        num_points = points.shape[0]
        if self.bbox_coder.with_rot:
-            points_lidar, gt_bboxes_3d_lidar = upright_depth_to_lidar_torch(
-                points, gt_bboxes_3d, to_bottom_center=True)
            vote_targets = points.new_zeros([num_points, 3 * self.gt_per_seed])
            vote_target_masks = points.new_zeros([num_points],
                                                 dtype=torch.long)
            vote_target_idx = points.new_zeros([num_points], dtype=torch.long)
+            box_indices_all = gt_bboxes_3d.points_in_boxes(points)
-            box_indices_all = points_in_boxes_batch(
+            for i in range(gt_labels_3d.shape[0]):
-                points_lidar.unsqueeze(0), gt_bboxes_3d_lidar.unsqueeze(0))[0]
-            for i in range(gt_bboxes_3d.shape[0]):
                box_indices = box_indices_all[:, i]
                indices = torch.nonzero(box_indices).squeeze(-1)
                selected_points = points[indices]
                vote_target_masks[indices] = 1
                vote_targets_tmp = vote_targets[indices]
-                votes = gt_bboxes_3d[i][:3].unsqueeze(
+                votes = gt_bboxes_3d.gravity_center[i].unsqueeze(
                    0) - selected_points[:, :3]
                for j in range(self.gt_per_seed):
@@ -438,7 +434,7 @@ class VoteHead(nn.Module):
        size_class_targets = size_class_targets[assignment]
        size_res_targets = size_res_targets[assignment]
-        one_hot_size_targets = gt_bboxes_3d.new_zeros(
+        one_hot_size_targets = gt_bboxes_3d.tensor.new_zeros(
            (proposal_num, self.num_sizes))
        one_hot_size_targets.scatter_(1, size_class_targets.unsqueeze(-1), 1)
        one_hot_size_targets = one_hot_size_targets.unsqueeze(-1).repeat(
@@ -455,38 +451,43 @@ class VoteHead(nn.Module):
                dir_class_targets, dir_res_targets, center_targets,
                mask_targets.long(), objectness_targets, objectness_masks)
-    def get_bboxes(self, points, bbox_preds, img_meta, rescale=False):
+    def get_bboxes(self, points, bbox_preds, input_meta, rescale=False):
        # decode boxes
        obj_scores = F.softmax(bbox_preds['obj_scores'], dim=-1)[..., -1]
        sem_scores = F.softmax(bbox_preds['sem_scores'], dim=-1)
        bbox_depth = self.bbox_coder.decode(bbox_preds)
-        points_lidar, bbox_lidar = upright_depth_to_lidar_torch(
-            points[..., :3], bbox_depth, to_bottom_center=True)
        batch_size = bbox_depth.shape[0]
        results = list()
        for b in range(batch_size):
            bbox_selected, score_selected, labels = self.multiclass_nms_single(
-                obj_scores[b], sem_scores[b], bbox_lidar[b], points_lidar[b])
+                obj_scores[b], sem_scores[b], bbox_depth[b],
-            results.append((bbox_selected, score_selected, labels))
+                points[b, ..., :3], input_meta[b])
+            bbox = input_meta[b]['box_type_3d'](
+                bbox_selected,
+                box_dim=bbox_selected.shape[-1],
+                with_yaw=self.bbox_coder.with_rot)
+            results.append((bbox, score_selected, labels))
        return results
-    def multiclass_nms_single(self, obj_scores, sem_scores, bbox,
+    def multiclass_nms_single(self, obj_scores, sem_scores, bbox, points,
-                              points_lidar):
+                              input_meta):
-        box_indices = points_in_boxes_batch(
+        bbox = input_meta['box_type_3d'](
-            points_lidar.unsqueeze(0), bbox.unsqueeze(0))[0]
+            bbox,
-        nonempty_box_mask = box_indices.T.sum(1) > 5
+            box_dim=bbox.shape[-1],
+            with_yaw=self.bbox_coder.with_rot,
+            origin=(0.5, 0.5, 0.5))
+        box_indices = bbox.points_in_boxes(points)
-        bbox_classes = torch.argmax(sem_scores, -1)
+        corner3d = bbox.corners
-        # boxes3d to aligned boxes
-        corner3d = boxes3d_to_corners3d_lidar_torch(bbox)
        minmax_box3d = corner3d.new(torch.Size((corner3d.shape[0], 6)))
        minmax_box3d[:, :3] = torch.min(corner3d, dim=1)[0]
        minmax_box3d[:, 3:] = torch.max(corner3d, dim=1)[0]
+        nonempty_box_mask = box_indices.T.sum(1) > 5
+        bbox_classes = torch.argmax(sem_scores, -1)
        nms_selected = aligned_3d_nms(minmax_box3d[nonempty_box_mask],
                                      obj_scores[nonempty_box_mask],
                                      bbox_classes[nonempty_box_mask],
@@ -502,7 +503,7 @@ class VoteHead(nn.Module):
        if self.test_cfg.per_class_proposal:
            bbox_selected, score_selected, labels = [], [], []
            for k in range(sem_scores.shape[-1]):
-                bbox_selected.append(bbox[selected])
+                bbox_selected.append(bbox[selected].tensor)
                score_selected.append(obj_scores[selected] *
                                      sem_scores[selected][:, k])
                labels.append(
@@ -511,7 +512,7 @@ class VoteHead(nn.Module):
            score_selected = torch.cat(score_selected, 0)
            labels = torch.cat(labels, 0)
        else:
-            bbox_selected = bbox[selected]
+            bbox_selected = bbox[selected].tensor
            score_selected = obj_scores[selected]
            labels = bbox_classes[selected]

--- a/mmdet3d/models/detectors/mvx_two_stage.py
+++ b/mmdet3d/models/detectors/mvx_two_stage.py
@@ -66,21 +66,30 @@ class MVXTwoStageDetector(BaseDetector):
    def init_weights(self, pretrained=None):
        super(MVXTwoStageDetector, self).init_weights(pretrained)
+        if pretrained is None:
+            img_pretrained = None
+            pts_pretrained = None
+        elif isinstance(pretrained, dict):
+            img_pretrained = pretrained.get('img', None)
+            pts_pretrained = pretrained.get('pts', None)
+        else:
+            raise ValueError(
+                f'pretrained should be a dict, got {type(pretrained)}')
        if self.with_img_backbone:
-            self.img_backbone.init_weights(pretrained=pretrained)
+            self.img_backbone.init_weights(pretrained=img_pretrained)
+        if self.with_pts_backbone:
+            self.pts_backbone.init_weights(pretrained=pts_pretrained)
        if self.with_img_neck:
            if isinstance(self.img_neck, nn.Sequential):
                for m in self.img_neck:
                    m.init_weights()
            else:
                self.img_neck.init_weights()
-        if self.with_shared_head:
-            self.img_shared_head.init_weights(pretrained=pretrained)
+        if self.with_img_roi_head:
+            self.img_roi_head.init_weights(img_pretrained)
        if self.with_img_rpn:
            self.img_rpn_head.init_weights()
-        if self.with_img_bbox:
-            self.img_bbox_roi_extractor.init_weights()
-            self.img_bbox_head.init_weights()
        if self.with_pts_bbox:
            self.pts_bbox_head.init_weights()
@@ -103,6 +112,10 @@ class MVXTwoStageDetector(BaseDetector):
    def with_img_backbone(self):
        return hasattr(self, 'img_backbone') and self.img_backbone is not None
+    @property
+    def with_pts_backbone(self):
+        return hasattr(self, 'pts_backbone') and self.pts_backbone is not None
    @property
    def with_fusion(self):
        return hasattr(self,
@@ -120,6 +133,10 @@ class MVXTwoStageDetector(BaseDetector):
    def with_img_rpn(self):
        return hasattr(self, 'img_rpn_head') and self.img_rpn_head is not None
+    @property
+    def with_img_roi_head(self):
+        return hasattr(self, 'img_roi_head') and self.img_roi_head is not None
    def extract_img_feat(self, img, img_meta):
        if self.with_img_backbone:
            if img.dim() == 5 and img.size(0) == 1:

--- a/mmdet3d/models/detectors/votenet.py
+++ b/mmdet3d/models/detectors/votenet.py
@@ -43,7 +43,7 @@ class VoteNet(SingleStageDetector):
        Args:
            points (list[Tensor]): Points of each batch.
            img_meta (list): Image metas.
-            gt_bboxes_3d (list[Tensor]): gt bboxes of each batch.
+            gt_bboxes_3d (BaseInstance3DBoxes): gt bboxes of each batch.
            gt_labels_3d (list[Tensor]): gt class labels of each batch.
            pts_semantic_mask (None | list[Tensor]): point-wise semantic
                label of each batch.
@@ -86,7 +86,7 @@ class VoteNet(SingleStageDetector):
        Args:
            points (list[Tensor]): Points of each sample.
            img_meta (list): Image metas.
-            gt_bboxes_3d (list[Tensor]): gt bboxes of each sample.
+            gt_bboxes_3d (BaseInstance3DBoxes): gt bboxes of each sample.
            gt_labels_3d (list[Tensor]): gt class labels of each sample.
            pts_semantic_mask (None | list[Tensor]): point-wise semantic
                label of each sample.

--- a/mmdet3d/models/roi_heads/bbox_heads/parta2_bbox_head.py
+++ b/mmdet3d/models/roi_heads/bbox_heads/parta2_bbox_head.py
@@ -474,7 +474,9 @@ class PartA2BboxHead(nn.Module):
            selected_scores = cur_cls_score[selected]
            result_list.append(
-                (selected_bboxes, selected_scores, selected_label_preds))
+                (img_meta[batch_id]['box_type_3d'](selected_bboxes,
+                                                   self.bbox_coder.code_size),
+                 selected_scores, selected_label_preds))
        return result_list
    def multi_class_nms(self,

--- a/mmdet3d/models/roi_heads/part_aggregation_roi_head.py
+++ b/mmdet3d/models/roi_heads/part_aggregation_roi_head.py
@@ -112,7 +112,7 @@ class PartAggregationROIHead(Base3DRoIHead):
        semantic_results = self.semantic_head(feats_dict['seg_features'])
-        rois = bbox3d2roi([res['boxes_3d'] for res in proposal_list])
+        rois = bbox3d2roi([res['boxes_3d'].tensor for res in proposal_list])
        labels_3d = [res['labels_3d'] for res in proposal_list]
        cls_preds = [res['cls_preds'] for res in proposal_list]
        bbox_results = self._bbox_forward(feats_dict['seg_features'],

--- a/mmdet3d/ops/roiaware_pool3d/points_in_boxes.py
+++ b/mmdet3d/ops/roiaware_pool3d/points_in_boxes.py
@@ -62,7 +62,7 @@ def points_in_boxes_batch(points, boxes):
        points (torch.Tensor): [B, M, 3], [x, y, z] in LiDAR coordinate
        boxes (torch.Tensor): [B, T, 7],
            num_valid_boxes <= T, [x, y, z, w, l, h, ry] in LiDAR coordinate,
-            (x, y, z) is the bottom center
+            (x, y, z) is the bottom center.
    Returns:
        box_idxs_of_pts (torch.Tensor): (B, M, T), default background = 0

--- a/tests/test_box3d.py
+++ b/tests/test_box3d.py
@@ -33,7 +33,7 @@ def test_lidar_boxes3d():
         ]],
        dtype=np.float32)
    bottom_center_box = LiDARInstance3DBoxes(
-        gravity_center_box, origin=[0.5, 0.5, 0.5])
+        gravity_center_box, origin=(0.5, 0.5, 0.5))
    expected_tensor = torch.tensor(
        [[
            -5.24223238e+00, 4.00209696e+01, -4.76429619e-01, 2.06200000e+00,

--- a/tests/test_dataset/test_indoor_eval.py
+++ b/tests/test_dataset/test_indoor_eval.py
@@ -5,58 +5,58 @@ from mmdet3d.core.evaluation.indoor_eval import average_precision, indoor_eval
 def test_indoor_eval():
+    from mmdet3d.core.bbox.structures import DepthInstance3DBoxes, Box3DMode
    det_infos = [{
        'labels_3d':
-        torch.Tensor([4, 4, 3, 17, 2]),
+        torch.tensor([0, 1, 2, 2, 0, 3, 1, 2, 3, 2]),
        'boxes_3d':
-        torch.Tensor([[
+        DepthInstance3DBoxes(
-            2.8734498, -0.187645, -0.02600911, 0.6761766, 0.56542563,
+            torch.tensor([[
-            0.5953976, 0.
+                -2.4089e-03, -3.3174e+00, 4.9438e-01, 2.1668e+00, 2.8431e-01,
-        ],
+                1.6506e+00, 0.0000e+00
-                      [
+            ],
-                          0.4031701, -3.2346897, 0.07118589, 0.73209894,
+                          [
-                          0.8711227, 0.5148243, 0.
+                              -3.4269e-01, -2.7565e+00, 2.8144e-02, 6.8554e-01,
-                      ],
+                              9.6854e-01, 6.1755e-01, 0.0000e+00
-                      [
+                          ],
-                          -1.274147, -2.351935, 0.07428858, 1.4534658,
+                          [
-                          2.563081, 0.8587492, 0.
+                              -3.8320e+00, -1.0646e+00, 1.7074e-01, 2.4981e-01,
-                      ],
+                              4.4708e-01, 6.2538e-01, 0.0000e+00
-                      [
+                          ],
-                          3.2214177, 0.7899204, 0.03836718, 0.05321002,
+                          [
-                          1.2607929, 0.1411697, 0.
+                              4.1073e-01, 3.3757e+00, 3.4311e-01, 8.0617e-01,
-                      ],
+                              2.8679e-01, 1.6060e+00, 0.0000e+00
-                      [
+                          ],
-                          -1.6804854, 2.399011, -0.13099639, 0.5608963,
+                          [
-                          0.5052759, 0.6770297, 0.
+                              6.1199e-01, -3.1041e+00, 4.1873e-01, 1.2310e+00,
-                      ]]),
+                              4.0162e-01, 1.7303e+00, 0.0000e+00
-        'scores_3d':
+                          ],
-        torch.Tensor([0.9980684, 0.9747082, 0.9709939, 0.9482147, 0.84311247])
+                          [
-    }, {
+                              -5.9877e-01, -2.6011e+00, 1.1148e+00, 1.5704e-01,
-        'labels_3d':
+                              7.5957e-01, 9.6930e-01, 0.0000e+00
-        torch.Tensor([17.0, 17.0, 3.0, 4.0, 17.0]),
+                          ],
-        'boxes_3d':
+                          [
-        torch.Tensor([[
+                              2.7462e-01, -3.0088e+00, 6.5231e-02, 8.1208e-01,
-            3.2112048e+00, 5.6918913e-01, -8.6143613e-04, 1.1942449e-01,
+                              4.1861e-01, 3.7339e-01, 0.0000e+00
-            1.2988183e+00, 1.9952521e-01, 0.0000000e+00
+                          ],
-        ],
+                          [
-                      [
+                              -1.4704e+00, -2.0024e+00, 2.7479e-01, 1.7888e+00,
-                          3.248133, 0.4324184, 0.20038621, 0.17225507,
+                              1.0566e+00, 1.3704e+00, 0.0000e+00
-                          1.2736976, 0.32598814, 0.
+                          ],
-                      ],
+                          [
-                      [
+                              8.2727e-02, -3.1160e+00, 2.5690e-01, 1.4054e+00,
-                          -1.2793612, -2.3155289, 0.15598366, 1.2822601,
+                              2.0772e-01, 9.6792e-01, 0.0000e+00
-                          2.2253945, 0.8361754, 0.
+                          ],
-                      ],
+                          [
-                      [
+                              2.6896e+00, 1.9881e+00, 1.1566e+00, 9.9885e-02,
-                          2.8716104, -0.26416883, -0.04933786, 0.8190681,
+                              3.5713e-01, 4.5638e-01, 0.0000e+00
-                          0.60294986, 0.5769499, 0.
+                          ]]),
-                      ],
+            origin=(0.5, 0.5, 0)),
-                      [
-                          -2.2109854, 0.19445783, -0.01614259, 0.40659013,
-                          0.35370222, 0.3290567, 0.
-                      ]]),
        'scores_3d':
-        torch.Tensor([0.9965866, 0.99507546, 0.9916463, 0.9702634, 0.95803124])
+        torch.tensor([
+            1.7516e-05, 1.0167e-06, 8.4486e-07, 7.1048e-02, 6.4274e-05,
+            1.5003e-07, 5.8102e-06, 1.9399e-08, 5.3126e-07, 1.8630e-09
+        ])
    }]
    label2cat = {
@@ -64,168 +64,67 @@ def test_indoor_eval():
        1: 'bed',
        2: 'chair',
        3: 'sofa',
-        4: 'table',
-        5: 'door',
-        6: 'window',
-        7: 'bookshelf',
-        8: 'picture',
-        9: 'counter',
-        10: 'desk',
-        11: 'curtain',
-        12: 'refrigerator',
-        13: 'showercurtrain',
-        14: 'toilet',
-        15: 'sink',
-        16: 'bathtub',
-        17: 'garbagebin'
    }
    gt_annos = [{
        'gt_num':
-        12,
+        10,
        'gt_boxes_upright_depth':
        np.array([[
-            2.54621506, -0.89397144, 0.54144311, 2.90430856, 1.78370309,
+            -2.4089e-03, -3.3174e+00, 4.9438e-01, 2.1668e+00, 2.8431e-01,
-            0.93826824
+            1.6506e+00, 0.0000e+00
        ],
                  [
-                      3.36553669, 0.31014189, 0.38758934, 1.2504847,
+                      -3.4269e-01, -2.7565e+00, 2.8144e-02, 6.8554e-01,
-                      0.71281439, 0.3908577
+                      9.6854e-01, 6.1755e-01, 0.0000e+00
                  ],
                  [
-                      0.17272574, 2.90289116, 0.27966365, 0.56292468,
+                      -3.8320e+00, -1.0646e+00, 1.7074e-01, 2.4981e-01,
-                      0.8512187, 0.4987641
+                      4.4708e-01, 6.2538e-01, 0.0000e+00
                  ],
                  [
-                      2.39521956, 1.67557895, 0.40407273, 1.23511314,
+                      4.1073e-01, 3.3757e+00, 3.4311e-01, 8.0617e-01,
-                      0.49469376, 0.62720448
+                      2.8679e-01, 1.6060e+00, 0.0000e+00
                  ],
                  [
-                      -2.41815996, -1.69104958, 0.22304082, 0.55816364,
+                      6.1199e-01, -3.1041e+00, 4.1873e-01, 1.2310e+00,
-                      0.48154473, 0.66580439
+                      4.0162e-01, 1.7303e+00, 0.0000e+00
                  ],
                  [
-                      -0.18044823, 2.9227581, 0.24480903, 0.36165208,
+                      -5.9877e-01, -2.6011e+00, 1.1148e+00, 1.5704e-01,
-                      0.44468427, 0.53103662
+                      7.5957e-01, 9.6930e-01, 0.0000e+00
                  ],
                  [
-                      -2.44398379, -2.1610918, 0.23631772, 0.52229881,
+                      2.7462e-01, -3.0088e+00, 6.5231e-02, 8.1208e-01,
-                      0.63388562, 0.66596919
+                      4.1861e-01, 3.7339e-01, 0.0000e+00
                  ],
                  [
-                      -2.01452827, -2.9558928, 0.8139953, 1.61732554,
+                      -1.4704e+00, -2.0024e+00, 2.7479e-01, 1.7888e+00,
-                      0.60224247, 1.79295814
+                      1.0566e+00, 1.3704e+00, 0.0000e+00
                  ],
                  [
-                      -0.61519569, 3.24365234, 1.24335742, 2.11988783,
+                      8.2727e-02, -3.1160e+00, 2.5690e-01, 1.4054e+00,
-                      0.26006722, 1.77748263
+                      2.0772e-01, 9.6792e-01, 0.0000e+00
                  ],
                  [
-                      -2.64330673, 0.59929442, 1.59422684, 0.07352924,
+                      2.6896e+00, 1.9881e+00, 1.1566e+00, 9.9885e-02,
-                      0.28620502, 0.35408139
+                      3.5713e-01, 4.5638e-01, 0.0000e+00
-                  ],
-                  [
-                      -0.58128822, 3.23699641, 0.06050609, 1.94151425,
-                      0.16413498, 0.20168215
-                  ],
-                  [
-                      0.15343043, 2.24693251, 0.22470728, 0.49632657,
-                      0.47379827, 0.43063563
                  ]]),
        'class':
-        np.array([3, 4, 4, 17, 2, 2, 2, 7, 11, 8, 17, 2])
+        np.array([0, 1, 2, 0, 0, 3, 1, 3, 3, 2])
-    }, {
-        'gt_num':
-        12,
-        'gt_boxes_upright_depth':
-        np.array([[
-            3.48649406, 0.24238291, 0.48358256, 1.34014034, 0.72744983,
-            0.40819243
-        ],
-                  [
-                      -0.50371504, 3.25293231, 1.25988698, 2.12330937,
-                      0.27563906, 1.80230701
-                  ],
-                  [
-                      2.58820581, -0.99452347, 0.57732373, 2.94801593,
-                      1.67463434, 0.88743341
-                  ],
-                  [
-                      -1.9116497, -2.88811016, 0.70502496, 1.62386703,
-                      0.60732293, 1.5857985
-                  ],
-                  [
-                      -2.55324745, 0.6909315, 1.59045517, 0.07264495,
-                      0.32018459, 0.3506999
-                  ],
-                  [
-                      -2.3436017, -2.1659112, 0.254318, 0.5333302, 0.56154585,
-                      0.64904487
-                  ],
-                  [
-                      -2.32046795, -1.6880455, 0.26138437, 0.5586133,
-                      0.59743834, 0.6378752
-                  ],
-                  [
-                      -0.46495372, 3.22126102, 0.03188983, 1.92557108,
-                      0.15160203, 0.24680007
-                  ],
-                  [
-                      0.28087699, 2.88433838, 0.2495866, 0.57001019,
-                      0.85177159, 0.5689255
-                  ],
-                  [
-                      -0.05292395, 2.90586925, 0.23064148, 0.39113954,
-                      0.43746281, 0.52981442
-                  ],
-                  [
-                      0.25537968, 2.25156307, 0.24932587, 0.48192862,
-                      0.51398182, 0.38040417
-                  ],
-                  [
-                      2.60432816, 1.62303996, 0.42025632, 1.23775268,
-                      0.51761389, 0.66034317
-                  ]]),
-        'class':
-        np.array([4, 11, 3, 7, 8, 2, 2, 17, 4, 2, 2, 17])
    }]
-    ret_value = indoor_eval(gt_annos, det_infos, [0.25, 0.5], label2cat)
+    ret_value = indoor_eval(
-    garbagebin_AP_25 = ret_value['garbagebin_AP_0.25']
+        gt_annos,
-    sofa_AP_25 = ret_value['sofa_AP_0.25']
+        det_infos, [0.25, 0.5],
-    table_AP_25 = ret_value['table_AP_0.25']
+        label2cat,
-    chair_AP_25 = ret_value['chair_AP_0.25']
+        box_type_3d=DepthInstance3DBoxes,
-    mAP_25 = ret_value['mAP_0.25']
+        box_mode_3d=Box3DMode.DEPTH)
-    garbagebin_rec_25 = ret_value['garbagebin_rec_0.25']
-    sofa_rec_25 = ret_value['sofa_rec_0.25']
+    assert abs(ret_value['cabinet_AP_0.25'] - 0.666667) < 1e-3
-    table_rec_25 = ret_value['table_rec_0.25']
+    assert abs(ret_value['bed_AP_0.25'] - 1.0) < 1e-3
-    chair_rec_25 = ret_value['chair_rec_0.25']
+    assert abs(ret_value['chair_AP_0.25'] - 0.5) < 1e-3
-    mAR_25 = ret_value['mAR_0.25']
+    assert abs(ret_value['mAP_0.25'] - 0.708333) < 1e-3
-    sofa_AP_50 = ret_value['sofa_AP_0.50']
+    assert abs(ret_value['mAR_0.25'] - 0.833333) < 1e-3
-    table_AP_50 = ret_value['table_AP_0.50']
-    chair_AP_50 = ret_value['chair_AP_0.50']
-    mAP_50 = ret_value['mAP_0.50']
-    sofa_rec_50 = ret_value['sofa_rec_0.50']
-    table_rec_50 = ret_value['table_rec_0.50']
-    chair_rec_50 = ret_value['chair_rec_0.50']
-    mAR_50 = ret_value['mAR_0.50']
-    assert garbagebin_AP_25 == 0.25
-    assert sofa_AP_25 == 1.0
-    assert table_AP_25 == 0.75
-    assert chair_AP_25 == 0.125
-    assert abs(mAP_25 - 0.303571) < 0.001
-    assert garbagebin_rec_25 == 0.25
-    assert sofa_rec_25 == 1.0
-    assert table_rec_25 == 0.75
-    assert chair_rec_25 == 0.125
-    assert abs(mAR_25 - 0.303571) < 0.001
-    assert sofa_AP_50 == 0.25
-    assert abs(table_AP_50 - 0.416667) < 0.001
-    assert chair_AP_50 == 0.125
-    assert abs(mAP_50 - 0.113095) < 0.001
-    assert sofa_rec_50 == 0.5
-    assert table_rec_50 == 0.5
-    assert chair_rec_50 == 0.125
-    assert abs(mAR_50 - 0.160714) < 0.001
 def test_average_precision():