Merge branch 'indoor_dataset' into 'master'

Indoor dataset See merge request open-mmlab/mmdet.3d!29

Merge branch 'indoor_dataset' into 'master'
Indoor dataset See merge request open-mmlab/mmdet.3d!29
97e4ed42 · zhangwenwei · 6aab10da · fee31738 · 97e4ed42 · 97e4ed42
Commit 97e4ed42 authored May 17, 2020 by zhangwenwei
9 changed files
--- a/mmdet3d/core/evaluation/__init__.py
+++ b/mmdet3d/core/evaluation/__init__.py
 from .class_names import dataset_aliases, get_classes, kitti_classes
+from .indoor_eval import indoor_eval
 from .kitti_utils import kitti_eval, kitti_eval_coco_style

 __all__ = [
    'dataset_aliases', 'get_classes', 'kitti_classes', 'kitti_eval_coco_style',
-    'kitti_eval'
+    'kitti_eval', 'indoor_eval'
 ]
--- a/mmdet3d/core/evaluation/indoor_eval.py
+++ b/mmdet3d/core/evaluation/indoor_eval.py
+import numpy as np
+import torch
+
+from mmdet3d.core.bbox.iou_calculators.iou3d_calculator import bbox_overlaps_3d
+
+
+def boxes3d_depth_to_lidar(boxes3d, mid_to_bottom=True):
+    """Boxes3d Depth to Lidar.
+
+    Flip X-right,Y-forward,Z-up to X-forward,Y-left,Z-up.
+
+    Args:
+        boxes3d (ndarray): (N, 7) [x, y, z, w, l, h, r] in depth coords.
+
+    Return:
+        boxes3d_lidar (ndarray): (N, 7) [x, y, z, l, h, w, r] in LiDAR coords.
+    """
+    boxes3d_lidar = boxes3d.copy()
+    boxes3d_lidar[..., [0, 1, 2, 3, 4, 5]] = boxes3d_lidar[...,
+                                                           [1, 0, 2, 4, 3, 5]]
+    boxes3d_lidar[..., 1] *= -1
+    if mid_to_bottom:
+        boxes3d_lidar[..., 2] -= boxes3d_lidar[..., 5] / 2
+    return boxes3d_lidar
+
+
+def get_iou_gpu(bb1, bb2):
+    """Get IoU.
+
+    Compute IoU of two bounding boxes.
+
+    Args:
+        bb1 (ndarray): [x, y, z, w, l, h, ry] in LiDAR.
+        bb2 (ndarray): [x, y, z, h, w, l, ry] in LiDAR.
+
+    Returns:
+        ans_iou (tensor): The answer of IoU.
+    """
+
+    bb1 = torch.from_numpy(bb1).float().cuda()
+    bb2 = torch.from_numpy(bb2).float().cuda()
+    iou3d = bbox_overlaps_3d(bb1, bb2, mode='iou', coordinate='lidar')
+    return iou3d.cpu().numpy()
+
+
+def average_precision(recalls, precisions, mode='area'):
+    """Calculate average precision (for single or multiple scales).
+
+    Args:
+        recalls (ndarray): shape (num_scales, num_dets) or (num_dets, )
+        precisions (ndarray): shape (num_scales, num_dets) or (num_dets, )
+        mode (str): 'area' or '11points', 'area' means calculating the area
+            under precision-recall curve, '11points' means calculating
+            the average precision of recalls at [0, 0.1, ..., 1]
+
+    Returns:
+        float or ndarray: calculated average precision
+    """
+    if recalls.ndim == 1:
+        recalls = recalls[np.newaxis, :]
+        precisions = precisions[np.newaxis, :]
+    assert recalls.shape == precisions.shape and recalls.ndim == 2
+    num_scales = recalls.shape[0]
+    ap = np.zeros(num_scales, dtype=np.float32)
+    if mode == 'area':
+        zeros = np.zeros((num_scales, 1), dtype=recalls.dtype)
+        ones = np.ones((num_scales, 1), dtype=recalls.dtype)
+        mrec = np.hstack((zeros, recalls, ones))
+        mpre = np.hstack((zeros, precisions, zeros))
+        for i in range(mpre.shape[1] - 1, 0, -1):
+            mpre[:, i - 1] = np.maximum(mpre[:, i - 1], mpre[:, i])
+        for i in range(num_scales):
+            ind = np.where(mrec[i, 1:] != mrec[i, :-1])[0]
+            ap[i] = np.sum(
+                (mrec[i, ind + 1] - mrec[i, ind]) * mpre[i, ind + 1])
+    elif mode == '11points':
+        for i in range(num_scales):
+            for thr in np.arange(0, 1 + 1e-3, 0.1):
+                precs = precisions[i, recalls[i, :] >= thr]
+                prec = precs.max() if precs.size > 0 else 0
+                ap[i] += prec
+            ap /= 11
+    else:
+        raise ValueError(
+            'Unrecognized mode, only "area" and "11points" are supported')
+    return ap
+
+
+def eval_det_cls(pred, gt, iou_thr=None):
+    """Generic functions to compute precision/recall for object detection
+        for a single class.
+
+    Args:
+        pred (dict): {img_id: [(bbox, score)]} where bbox is numpy array.
+        gt (dict): {img_id: [bbox]}.
+        iou_thr (List[float]): a list, iou threshold.
+
+    Return:
+        ndarray: numpy array of length nd.
+        ndarray: numpy array of length nd.
+        float: scalar, average precision.
+    """
+
+    # construct gt objects
+    class_recs = {}  # {img_id: {'bbox': bbox list, 'det': matched list}}
+    npos = 0
+    for img_id in gt.keys():
+        bbox = np.array(gt[img_id])
+        det = [[False] * len(bbox) for i in iou_thr]
+        npos += len(bbox)
+        class_recs[img_id] = {'bbox': bbox, 'det': det}
+    # pad empty list to all other imgids
+    for img_id in pred.keys():
+        if img_id not in gt:
+            class_recs[img_id] = {'bbox': np.array([]), 'det': []}
+
+    # construct dets
+    image_ids = []
+    confidence = []
+    BB = []
+    ious = []
+    for img_id in pred.keys():
+        cur_num = len(pred[img_id])
+        if cur_num == 0:
+            continue
+        BB_cur = np.zeros((cur_num, 7))  # hard code
+        box_idx = 0
+        for box, score in pred[img_id]:
+            image_ids.append(img_id)
+            confidence.append(score)
+            BB.append(box)
+            BB_cur[box_idx] = box
+            box_idx += 1
+        gt_cur = class_recs[img_id]['bbox'].astype(float)
+        if len(gt_cur) > 0:
+            # calculate iou in each image
+            iou_cur = get_iou_gpu(BB_cur, gt_cur)
+            for i in range(cur_num):
+                ious.append(iou_cur[i])
+        else:
+            for i in range(cur_num):
+                ious.append(np.zeros(1))
+
+    confidence = np.array(confidence)
+
+    # sort by confidence
+    sorted_ind = np.argsort(-confidence)
+    image_ids = [image_ids[x] for x in sorted_ind]
+    ious = [ious[x] for x in sorted_ind]
+
+    # go down dets and mark TPs and FPs
+    nd = len(image_ids)
+    tp_thr = [np.zeros(nd) for i in iou_thr]
+    fp_thr = [np.zeros(nd) for i in iou_thr]
+    for d in range(nd):
+        R = class_recs[image_ids[d]]
+        iou_max = -np.inf
+        BBGT = R['bbox'].astype(float)
+        cur_iou = ious[d]
+
+        if BBGT.size > 0:
+            # compute overlaps
+            for j in range(BBGT.shape[0]):
+                # iou = get_iou_main(get_iou_func, (bb, BBGT[j,...]))
+                iou = cur_iou[j]
+                if iou > iou_max:
+                    iou_max = iou
+                    jmax = j
+
+        for iou_idx, thresh in enumerate(iou_thr):
+            if iou_max > thresh:
+                if not R['det'][iou_idx][jmax]:
+                    tp_thr[iou_idx][d] = 1.
+                    R['det'][iou_idx][jmax] = 1
+                else:
+                    fp_thr[iou_idx][d] = 1.
+            else:
+                fp_thr[iou_idx][d] = 1.
+
+    ret = []
+    for iou_idx, thresh in enumerate(iou_thr):
+        # compute precision recall
+        fp = np.cumsum(fp_thr[iou_idx])
+        tp = np.cumsum(tp_thr[iou_idx])
+        recall = tp / float(npos)
+        # avoid divide by zero in case the first detection matches a difficult
+        # ground truth
+        precision = tp / np.maximum(tp + fp, np.finfo(np.float64).eps)
+        ap = average_precision(recall, precision)
+        ret.append((recall, precision, ap))
+
+    return ret
+
+
+def eval_map_recall(det_infos, gt_infos, ovthresh=None):
+    """Evaluate mAP and Recall.
+
+    Generic functions to compute precision/recall for object detection
+        for multiple classes.
+
+    Args:
+        det_infos (List[ListList[[tuple]]]): Label, bbox and
+            score of the detection result.
+        gt_infos (List[List[List]]): Label, bbox of the groundtruth.
+        ovthresh (List[float]): iou threshold.
+            Default: None.
+
+    Return:
+        dict: {classname: rec}.
+        dict: {classname: prec_all}.
+        dict: {classname: scalar}.
+    """
+    pred_all = {}
+    scan_cnt = 0
+    for batch_pred_map_cls in det_infos:
+        for i in range(len(batch_pred_map_cls)):
+            pred_all[scan_cnt] = batch_pred_map_cls[i]
+            scan_cnt += 1
+
+    pred = {}  # map {classname: pred}
+    gt = {}  # map {classname: gt}
+    for img_id in pred_all.keys():
+        for label, bbox, score in pred_all[img_id]:
+            if label not in pred:
+                pred[int(label)] = {}
+            if img_id not in pred[label]:
+                pred[int(label)][img_id] = []
+            if label not in gt:
+                gt[int(label)] = {}
+            if img_id not in gt[label]:
+                gt[int(label)][img_id] = []
+            pred[int(label)][img_id].append((bbox, score))
+
+    for img_id in range(len(gt_infos)):
+        for label, bbox in gt_infos[img_id]:
+            if label not in gt:
+                gt[label] = {}
+            if img_id not in gt[label]:
+                gt[label][img_id] = []
+            gt[label][img_id].append(bbox)
+
+    ret_values = []
+    for classname in gt.keys():
+        if classname in pred:
+            ret_values.append(
+                eval_det_cls(pred[classname], gt[classname], ovthresh))
+    recall = [{} for i in ovthresh]
+    precision = [{} for i in ovthresh]
+    ap = [{} for i in ovthresh]
+
+    for i, label in enumerate(gt.keys()):
+        for iou_idx, thresh in enumerate(ovthresh):
+            if label in pred:
+                recall[iou_idx][label], precision[iou_idx][label], ap[iou_idx][
+                    label] = ret_values[i][iou_idx]
+            else:
+                recall[iou_idx][label] = [0]
+                precision[iou_idx][label] = [0]
+                ap[iou_idx][label] = [0]
+
+    return recall, precision, ap
+
+
+def indoor_eval(gt_annos, dt_annos, metric, label2cat):
+    """Scannet Evaluation.
+
+    Evaluate the result of the detection.
+
+    Args:
+        gt_annos (List[List[dict]]): GT annotations.
+        dt_annos (List[List[List[tuple]]]): Detection annotations.
+        metric (List[float]): AP IoU thresholds.
+        label2cat (dict): {label: cat}.
+
+    Return:
+        dict: Dict of results.
+    """
+    gt_infos = []
+    for gt_anno in gt_annos:
+        if gt_anno['gt_num'] != 0:
+            # convert to lidar coor for evaluation
+            bbox_lidar_bottom = boxes3d_depth_to_lidar(
+                gt_anno['gt_boxes_upright_depth'], mid_to_bottom=True)
+            if bbox_lidar_bottom.shape[-1] == 6:
+                bbox_lidar_bottom = np.pad(bbox_lidar_bottom, ((0, 0), (0, 1)),
+                                           'constant')
+            gt_info_temp = []
+            for i in range(gt_anno['gt_num']):
+                gt_info_temp.append(
+                    [gt_anno['class'][i], bbox_lidar_bottom[i]])
+            gt_infos.append(gt_info_temp)
+
+    result_str = str()
+    result_str += 'mAP'
+    rec, prec, ap = eval_map_recall(dt_annos, gt_infos, metric)
+    ret_dict = {}
+    for i, iou_thresh in enumerate(metric):
+        rec_list = []
+        for label in ap[i].keys():
+            ret_dict[f'{label2cat[label]}_AP_{iou_thresh:.2f}'] = ap[i][label][
+                0]
+        ret_dict[f'mAP_{iou_thresh:.2f}'] = np.mean(list(ap[i].values()))
+        for label in rec[i].keys():
+            ret_dict[f'{label2cat[label]}_rec_{iou_thresh:.2f}'] = rec[i][
+                label][-1]
+            rec_list.append(rec[i][label][-1])
+        ret_dict[f'mAR_{iou_thresh:.2f}'] = np.mean(rec_list)
+    return ret_dict
--- a/mmdet3d/datasets/__init__.py
+++ b/mmdet3d/datasets/__init__.py
 from mmdet.datasets.builder import DATASETS
 from .builder import build_dataset
 from .dataset_wrappers import RepeatFactorDataset
+from .indoor_base_dataset import IndoorBaseDataset
 from .kitti2d_dataset import Kitti2DDataset
 from .kitti_dataset import KittiDataset
 from .loader import DistributedGroupSampler, GroupSampler, build_dataloader
@@ -11,6 +12,8 @@ from .pipelines import (GlobalRotScale, IndoorFlipData, IndoorGlobalRotScale,
                        IndoorPointsColorNormalize, ObjectNoise,
                        ObjectRangeFilter, ObjectSample, PointShuffle,
                        PointsRangeFilter, RandomFlip3D)
+from .scannet_dataset import ScanNetDataset
+from .sunrgbd_dataset import SUNRGBDDataset

 __all__ = [
    'KittiDataset', 'GroupSampler', 'DistributedGroupSampler',
@@ -20,5 +23,6 @@ __all__ = [
    'ObjectRangeFilter', 'PointsRangeFilter', 'Collect3D',
    'IndoorLoadPointsFromFile', 'IndoorPointsColorNormalize',
    'IndoorPointSample', 'IndoorLoadAnnotations3D', 'IndoorPointsColorJitter',
-    'IndoorGlobalRotScale', 'IndoorFlipData'
+    'IndoorGlobalRotScale', 'IndoorFlipData', 'SUNRGBDDataset',
+    'ScanNetDataset', 'IndoorBaseDataset'
 ]
--- a/mmdet3d/datasets/indoor_base_dataset.py
+++ b/mmdet3d/datasets/indoor_base_dataset.py
+import mmcv
+import numpy as np
+import torch.utils.data as torch_data
+
+from mmdet.datasets import DATASETS
+from .pipelines import Compose
+
+
+@DATASETS.register_module()
+class IndoorBaseDataset(torch_data.Dataset):
+
+    def __init__(self,
+                 root_path,
+                 ann_file,
+                 pipeline=None,
+                 classes=None,
+                 test_mode=False,
+                 with_label=True):
+        super().__init__()
+        self.root_path = root_path
+        self.CLASSES = self.get_classes(classes)
+        self.test_mode = test_mode
+        self.label2cat = {i: cat_id for i, cat_id in enumerate(self.CLASSES)}
+        mmcv.check_file_exist(ann_file)
+        self.data_infos = mmcv.load(ann_file)
+
+        if pipeline is not None:
+            self.pipeline = Compose(pipeline)
+        self.with_label = with_label
+
+    def __len__(self):
+        return len(self.data_infos)
+
+    def get_data_info(self, index):
+        info = self.data_infos[index]
+        sample_idx = info['point_cloud']['lidar_idx']
+        pts_filename = self._get_pts_filename(sample_idx)
+
+        input_dict = dict(pts_filename=pts_filename)
+
+        if self.with_label:
+            annos = self._get_ann_info(index, sample_idx)
+            input_dict.update(annos)
+        if len(input_dict['gt_bboxes_3d']) == 0:
+            return None
+        return input_dict
+
+    def _rand_another(self, idx):
+        pool = np.where(self.flag == self.flag[idx])[0]
+        return np.random.choice(pool)
+
+    def __getitem__(self, idx):
+        if self.test_mode:
+            return self.prepare_test_data(idx)
+        while True:
+            data = self.prepare_train_data(idx)
+            if data is None:
+                idx = self._rand_another(idx)
+                continue
+            return data
+
+    def prepare_train_data(self, index):
+        input_dict = self.get_data_info(index)
+        if input_dict is None:
+            return None
+        example = self.pipeline(input_dict)
+        if len(example['gt_bboxes_3d']._data) == 0:
+            return None
+        return example
+
+    def prepare_test_data(self, index):
+        input_dict = self.get_data_info(index)
+        example = self.pipeline(input_dict)
+        return example
+
+    @classmethod
+    def get_classes(cls, classes=None):
+        """Get class names of current dataset.
+
+        Args:
+            classes (Sequence[str] | str | None): If classes is None, use
+                default CLASSES defined by builtin dataset. If classes is a
+                string, take it as a file name. The file contains the name of
+                classes where each line contains one class name. If classes is
+                a tuple or list, override the CLASSES defined by the dataset.
+        """
+        if classes is None:
+            return cls.CLASSES
+
+        if isinstance(classes, str):
+            # take it as a file path
+            class_names = mmcv.list_from_file(classes)
+        elif isinstance(classes, (tuple, list)):
+            class_names = classes
+        else:
+            raise ValueError(f'Unsupported type {type(classes)} of classes.')
+
+        return class_names
+
+    def _generate_annotations(self, output):
+        """Generate Annotations.
+
+        Transform results of the model to the form of the evaluation.
+
+        Args:
+            output (List): The output of the model.
+        """
+        result = []
+        bs = len(output)
+        for i in range(bs):
+            pred_list_i = list()
+            pred_boxes = output[i]
+            box3d_depth = pred_boxes['box3d_lidar']
+            if box3d_depth is not None:
+                label_preds = pred_boxes['label_preds']
+                scores = pred_boxes['scores']
+                label_preds = label_preds.detach().cpu().numpy()
+                num_proposal = box3d_depth.shape[0]
+                for j in range(num_proposal):
+                    bbox_lidar = box3d_depth[j]  # [7] in lidar
+                    bbox_lidar_bottom = bbox_lidar.copy()
+                    pred_list_i.append(
+                        (label_preds[j], bbox_lidar_bottom, scores[j]))
+                result.append(pred_list_i)
+            else:
+                result.append(pred_list_i)
+
+        return result
+
+    def format_results(self, outputs):
+        results = []
+        for output in outputs:
+            result = self._generate_annotations(output)
+            results.append(result)
+        return results
+
+    def evaluate(self, results, metric=None):
+        """Evaluate.
+
+        Evaluation in indoor protocol.
+
+        Args:
+            results (List): List of result.
+            metric (List[float]): AP IoU thresholds.
+        """
+        results = self.format_results(results)
+        from mmdet3d.core.evaluation import indoor_eval
+        assert len(metric) > 0
+        gt_annos = [info['annos'] for info in self.data_infos]
+        ret_dict = indoor_eval(gt_annos, results, metric, self.label2cat)
+        return ret_dict
--- a/mmdet3d/datasets/scannet_dataset.py
+++ b/mmdet3d/datasets/scannet_dataset.py
+import os.path as osp
+
+import numpy as np
+
+from mmdet.datasets import DATASETS
+from .indoor_base_dataset import IndoorBaseDataset
+
+
+@DATASETS.register_module()
+class ScanNetDataset(IndoorBaseDataset):
+
+    CLASSES = ('cabinet', 'bed', 'chair', 'sofa', 'table', 'door', 'window',
+               'bookshelf', 'picture', 'counter', 'desk', 'curtain',
+               'refrigerator', 'showercurtrain', 'toilet', 'sink', 'bathtub',
+               'garbagebin')
+
+    def __init__(self,
+                 root_path,
+                 ann_file,
+                 pipeline=None,
+                 classes=None,
+                 test_mode=False,
+                 with_label=True):
+        super().__init__(root_path, ann_file, pipeline, classes, test_mode,
+                         with_label)
+
+    def _get_pts_filename(self, sample_idx):
+        pts_filename = osp.join(self.root_path, f'{sample_idx}_vert.npy')
+        return pts_filename
+
+    def _get_ann_info(self, index, sample_idx):
+        # Use index to get the annos, thus the evalhook could also use this api
+        info = self.data_infos[index]
+        if info['annos']['gt_num'] != 0:
+            gt_bboxes_3d = info['annos']['gt_boxes_upright_depth']  # k, 6
+            gt_labels = info['annos']['class']
+            gt_bboxes_3d_mask = np.ones_like(gt_labels, dtype=np.bool)
+        else:
+            gt_bboxes_3d = np.zeros((1, 6), dtype=np.float32)
+            gt_labels = np.zeros(1, dtype=np.bool)
+            gt_bboxes_3d_mask = np.zeros(1, dtype=np.bool)
+        pts_instance_mask_path = osp.join(self.root_path,
+                                          f'{sample_idx}_ins_label.npy')
+        pts_semantic_mask_path = osp.join(self.root_path,
+                                          f'{sample_idx}_sem_label.npy')
+
+        anns_results = dict(
+            gt_bboxes_3d=gt_bboxes_3d,
+            gt_labels=gt_labels,
+            gt_bboxes_3d_mask=gt_bboxes_3d_mask,
+            pts_instance_mask_path=pts_instance_mask_path,
+            pts_semantic_mask_path=pts_semantic_mask_path)
+        return anns_results
--- a/mmdet3d/datasets/sunrgbd_dataset.py
+++ b/mmdet3d/datasets/sunrgbd_dataset.py
+import os.path as osp
+
+import numpy as np
+
+from mmdet.datasets import DATASETS
+from .indoor_base_dataset import IndoorBaseDataset
+
+
+@DATASETS.register_module()
+class SUNRGBDDataset(IndoorBaseDataset):
+
+    CLASSES = ('bed', 'table', 'sofa', 'chair', 'toilet', 'desk', 'dresser',
+               'night_stand', 'bookshelf', 'bathtub')
+
+    def __init__(self,
+                 root_path,
+                 ann_file,
+                 pipeline=None,
+                 classes=None,
+                 test_mode=False,
+                 with_label=True):
+        super().__init__(root_path, ann_file, pipeline, classes, test_mode,
+                         with_label)
+
+    def _get_pts_filename(self, sample_idx):
+        pts_filename = osp.join(self.root_path, 'lidar',
+                                f'{sample_idx:06d}.npy')
+        return pts_filename
+
+    def _get_ann_info(self, index, sample_idx):
+        # Use index to get the annos, thus the evalhook could also use this api
+        info = self.data_infos[index]
+        if info['annos']['gt_num'] != 0:
+            gt_bboxes_3d = info['annos']['gt_boxes_upright_depth']  # k, 6
+            gt_labels = info['annos']['class']
+            gt_bboxes_3d_mask = np.ones_like(gt_labels, dtype=np.bool)
+        else:
+            gt_bboxes_3d = np.zeros((1, 6), dtype=np.float32)
+            gt_labels = np.zeros(1, dtype=np.bool)
+            gt_bboxes_3d_mask = np.zeros(1, dtype=np.bool)
+
+        anns_results = dict(
+            gt_bboxes_3d=gt_bboxes_3d,
+            gt_labels=gt_labels,
+            gt_bboxes_3d_mask=gt_bboxes_3d_mask)
+        return anns_results
--- a/tests/test_dataset/test_indoor_eval.py
+++ b/tests/test_dataset/test_indoor_eval.py
+import numpy as np
+
+from mmdet3d.core.evaluation.indoor_eval import average_precision, indoor_eval
+
+
+def test_indoor_eval():
+    det_infos = [[[[
+        4.0,
+        [
+            2.8734498, -0.187645, -0.02600911, 0.6761766, 0.56542563,
+            0.5953976, 0.
+        ], 0.9980684
+    ],
+                   [
+                       4.0,
+                       [
+                           0.4031701, -3.2346897, 0.07118589, 0.73209894,
+                           0.8711227, 0.5148243, 0.
+                       ], 0.9747082
+                   ],
+                   [
+                       3.0,
+                       [
+                           -1.274147, -2.351935, 0.07428858, 1.4534658,
+                           2.563081, 0.8587492, 0.
+                       ], 0.9709939
+                   ],
+                   [
+                       17.0,
+                       [
+                           3.2214177, 0.7899204, 0.03836718, 0.05321002,
+                           1.2607929, 0.1411697, 0.
+                       ], 0.9482147
+                   ],
+                   [
+                       2.0,
+                       [
+                           -1.6804854, 2.399011, -0.13099639, 0.5608963,
+                           0.5052759, 0.6770297, 0.
+                       ], 0.84311247
+                   ]]],
+                 [[[
+                     17.0,
+                     [
+                         3.2112048e+00, 5.6918913e-01, -8.6143613e-04,
+                         1.1942449e-01, 1.2988183e+00, 1.9952521e-01,
+                         0.0000000e+00
+                     ], 0.9965866
+                 ],
+                   [
+                       17.0,
+                       [
+                           3.248133, 0.4324184, 0.20038621, 0.17225507,
+                           1.2736976, 0.32598814, 0.
+                       ], 0.99507546
+                   ],
+                   [
+                       3.0,
+                       [
+                           -1.2793612, -2.3155289, 0.15598366, 1.2822601,
+                           2.2253945, 0.8361754, 0.
+                       ], 0.9916463
+                   ],
+                   [
+                       4.0,
+                       [
+                           2.8716104, -0.26416883, -0.04933786, 0.8190681,
+                           0.60294986, 0.5769499, 0.
+                       ], 0.9702634
+                   ],
+                   [
+                       17.0,
+                       [
+                           -2.2109854, 0.19445783, -0.01614259, 0.40659013,
+                           0.35370222, 0.3290567, 0.
+                       ], 0.95803124
+                   ]]]]
+
+    label2cat = {
+        0: 'cabinet',
+        1: 'bed',
+        2: 'chair',
+        3: 'sofa',
+        4: 'table',
+        5: 'door',
+        6: 'window',
+        7: 'bookshelf',
+        8: 'picture',
+        9: 'counter',
+        10: 'desk',
+        11: 'curtain',
+        12: 'refrigerator',
+        13: 'showercurtrain',
+        14: 'toilet',
+        15: 'sink',
+        16: 'bathtub',
+        17: 'garbagebin'
+    }
+    gt_annos = [{
+        'gt_num':
+        12,
+        'gt_boxes_upright_depth':
+        np.array([[
+            2.54621506, -0.89397144, 0.54144311, 2.90430856, 1.78370309,
+            0.93826824
+        ],
+                  [
+                      3.36553669, 0.31014189, 0.38758934, 1.2504847,
+                      0.71281439, 0.3908577
+                  ],
+                  [
+                      0.17272574, 2.90289116, 0.27966365, 0.56292468,
+                      0.8512187, 0.4987641
+                  ],
+                  [
+                      2.39521956, 1.67557895, 0.40407273, 1.23511314,
+                      0.49469376, 0.62720448
+                  ],
+                  [
+                      -2.41815996, -1.69104958, 0.22304082, 0.55816364,
+                      0.48154473, 0.66580439
+                  ],
+                  [
+                      -0.18044823, 2.9227581, 0.24480903, 0.36165208,
+                      0.44468427, 0.53103662
+                  ],
+                  [
+                      -2.44398379, -2.1610918, 0.23631772, 0.52229881,
+                      0.63388562, 0.66596919
+                  ],
+                  [
+                      -2.01452827, -2.9558928, 0.8139953, 1.61732554,
+                      0.60224247, 1.79295814
+                  ],
+                  [
+                      -0.61519569, 3.24365234, 1.24335742, 2.11988783,
+                      0.26006722, 1.77748263
+                  ],
+                  [
+                      -2.64330673, 0.59929442, 1.59422684, 0.07352924,
+                      0.28620502, 0.35408139
+                  ],
+                  [
+                      -0.58128822, 3.23699641, 0.06050609, 1.94151425,
+                      0.16413498, 0.20168215
+                  ],
+                  [
+                      0.15343043, 2.24693251, 0.22470728, 0.49632657,
+                      0.47379827, 0.43063563
+                  ]]),
+        'class': [3, 4, 4, 17, 2, 2, 2, 7, 11, 8, 17, 2]
+    }, {
+        'gt_num':
+        12,
+        'gt_boxes_upright_depth':
+        np.array([[
+            3.48649406, 0.24238291, 0.48358256, 1.34014034, 0.72744983,
+            0.40819243
+        ],
+                  [
+                      -0.50371504, 3.25293231, 1.25988698, 2.12330937,
+                      0.27563906, 1.80230701
+                  ],
+                  [
+                      2.58820581, -0.99452347, 0.57732373, 2.94801593,
+                      1.67463434, 0.88743341
+                  ],
+                  [
+                      -1.9116497, -2.88811016, 0.70502496, 1.62386703,
+                      0.60732293, 1.5857985
+                  ],
+                  [
+                      -2.55324745, 0.6909315, 1.59045517, 0.07264495,
+                      0.32018459, 0.3506999
+                  ],
+                  [
+                      -2.3436017, -2.1659112, 0.254318, 0.5333302, 0.56154585,
+                      0.64904487
+                  ],
+                  [
+                      -2.32046795, -1.6880455, 0.26138437, 0.5586133,
+                      0.59743834, 0.6378752
+                  ],
+                  [
+                      -0.46495372, 3.22126102, 0.03188983, 1.92557108,
+                      0.15160203, 0.24680007
+                  ],
+                  [
+                      0.28087699, 2.88433838, 0.2495866, 0.57001019,
+                      0.85177159, 0.5689255
+                  ],
+                  [
+                      -0.05292395, 2.90586925, 0.23064148, 0.39113954,
+                      0.43746281, 0.52981442
+                  ],
+                  [
+                      0.25537968, 2.25156307, 0.24932587, 0.48192862,
+                      0.51398182, 0.38040417
+                  ],
+                  [
+                      2.60432816, 1.62303996, 0.42025632, 1.23775268,
+                      0.51761389, 0.66034317
+                  ]]),
+        'class': [4, 11, 3, 7, 8, 2, 2, 17, 4, 2, 2, 17]
+    }]
+
+    ret_value = indoor_eval(gt_annos, det_infos, [0.25, 0.5], label2cat)
+    garbagebin_AP_25 = ret_value['garbagebin_AP_0.25']
+    sofa_AP_25 = ret_value['sofa_AP_0.25']
+    table_AP_25 = ret_value['table_AP_0.25']
+    chair_AP_25 = ret_value['chair_AP_0.25']
+    mAP_25 = ret_value['mAP_0.25']
+    garbagebin_rec_25 = ret_value['garbagebin_rec_0.25']
+    sofa_rec_25 = ret_value['sofa_rec_0.25']
+    table_rec_25 = ret_value['table_rec_0.25']
+    chair_rec_25 = ret_value['chair_rec_0.25']
+    mAR_25 = ret_value['mAR_0.25']
+    sofa_AP_50 = ret_value['sofa_AP_0.50']
+    table_AP_50 = ret_value['table_AP_0.50']
+    chair_AP_50 = ret_value['chair_AP_0.50']
+    mAP_50 = ret_value['mAP_0.50']
+    sofa_rec_50 = ret_value['sofa_rec_0.50']
+    table_rec_50 = ret_value['table_rec_0.50']
+    chair_rec_50 = ret_value['chair_rec_0.50']
+    mAR_50 = ret_value['mAR_0.50']
+    assert garbagebin_AP_25 == 0.25
+    assert sofa_AP_25 == 1.0
+    assert table_AP_25 == 0.75
+    assert chair_AP_25 == 0.125
+    assert abs(mAP_25 - 0.303571) < 0.001
+    assert garbagebin_rec_25 == 0.25
+    assert sofa_rec_25 == 1.0
+    assert table_rec_25 == 0.75
+    assert chair_rec_25 == 0.125
+    assert abs(mAR_25 - 0.303571) < 0.001
+    assert sofa_AP_50 == 0.25
+    assert abs(table_AP_50 - 0.416667) < 0.001
+    assert chair_AP_50 == 0.125
+    assert abs(mAP_50 - 0.113095) < 0.001
+    assert sofa_rec_50 == 0.5
+    assert table_rec_50 == 0.5
+    assert chair_rec_50 == 0.125
+    assert abs(mAR_50 - 0.160714) < 0.001
+
+
+def test_average_precision():
+    ap = average_precision(
+        np.array([[0.25, 0.5, 0.75], [0.25, 0.5, 0.75]]),
+        np.array([[1., 1., 1.], [1., 1., 1.]]), '11points')
+    print(ap[0])
+    assert abs(ap[0] - 0.06611571) < 0.001
--- a/tests/test_dataset/test_scannet_dataset.py
+++ b/tests/test_dataset/test_scannet_dataset.py
+import numpy as np
+import pytest
+import torch
+
+from mmdet3d.datasets import ScanNetDataset
+
+
+def test_getitem():
+    np.random.seed(0)
+    root_path = './tests/data/scannet/scannet_train_instance_data'
+    ann_file = './tests/data/scannet/scannet_infos.pkl'
+    class_names = ('cabinet', 'bed', 'chair', 'sofa', 'table', 'door',
+                   'window', 'bookshelf', 'picture', 'counter', 'desk',
+                   'curtain', 'refrigerator', 'showercurtrain', 'toilet',
+                   'sink', 'bathtub', 'garbagebin')
+    pipelines = [
+        dict(
+            type='IndoorLoadPointsFromFile',
+            use_height=True,
+            load_dim=6,
+            use_dim=[0, 1, 2]),
+        dict(type='IndoorLoadAnnotations3D'),
+        dict(type='IndoorPointSample', num_points=5),
+        dict(type='IndoorFlipData', flip_ratio_yz=1.0, flip_ratio_xz=1.0),
+        dict(
+            type='IndoorGlobalRotScale',
+            use_height=True,
+            rot_range=[-1 / 36, 1 / 36],
+            scale_range=None),
+        dict(type='DefaultFormatBundle3D', class_names=class_names),
+        dict(
+            type='Collect3D',
+            keys=[
+                'points', 'gt_bboxes_3d', 'gt_labels', 'pts_semantic_mask',
+                'pts_instance_mask'
+            ]),
+    ]
+
+    scannet_dataset = ScanNetDataset(root_path, ann_file, pipelines)
+    data = scannet_dataset[0]
+    points = data['points']._data
+    gt_bboxes_3d = data['gt_bboxes_3d']._data
+    gt_labels = data['gt_labels']._data
+    pts_semantic_mask = data['pts_semantic_mask']._data
+    pts_instance_mask = data['pts_instance_mask']._data
+
+    expected_points = np.array(
+        [[-2.9078157, -1.9569951, 2.3543026, 2.389488],
+         [-0.71360034, -3.4359822, 2.1330001, 2.1681855],
+         [-1.332374, 1.474838, -0.04405887, -0.00887359],
+         [2.1336637, -1.3265059, -0.02880373, 0.00638155],
+         [0.43895668, -3.0259454, 1.5560012, 1.5911865]])
+    expected_gt_bboxes_3d = np.array([
+        [-1.5005362, -3.512584, 1.8565295, 1.7457027, 0.24149807, 0.57235193],
+        [-2.8848705, 3.4961755, 1.5268247, 0.66170084, 0.17433672, 0.67153597],
+        [-1.1585636, -2.192365, 0.61649567, 0.5557011, 2.5375574, 1.2144762],
+        [-2.930457, -2.4856408, 0.9722377, 0.6270478, 1.8461524, 0.28697443],
+        [3.3114715, -0.00476722, 1.0712197, 0.46191898, 3.8605113, 2.1603441]
+    ])
+    expected_gt_labels = np.array([
+        6, 6, 4, 9, 11, 11, 10, 0, 15, 17, 17, 17, 3, 12, 4, 4, 14, 1, 0, 0, 0,
+        0, 0, 0, 5, 5, 5
+    ])
+    expected_pts_semantic_mask = np.array([3, 1, 2, 2, 15])
+    expected_pts_instance_mask = np.array([44, 22, 10, 10, 57])
+    original_classes = scannet_dataset.CLASSES
+
+    assert scannet_dataset.CLASSES == class_names
+    assert np.allclose(points, expected_points)
+    assert gt_bboxes_3d[:5].shape == (5, 6)
+    assert np.allclose(gt_bboxes_3d[:5], expected_gt_bboxes_3d)
+    assert np.all(gt_labels.numpy() == expected_gt_labels)
+    assert np.all(pts_semantic_mask.numpy() == expected_pts_semantic_mask)
+    assert np.all(pts_instance_mask.numpy() == expected_pts_instance_mask)
+    assert original_classes == class_names
+
+    scannet_dataset = ScanNetDataset(
+        root_path, ann_file, pipeline=None, classes=['cabinet', 'bed'])
+    assert scannet_dataset.CLASSES != original_classes
+    assert scannet_dataset.CLASSES == ['cabinet', 'bed']
+
+    scannet_dataset = ScanNetDataset(
+        root_path, ann_file, pipeline=None, classes=('cabinet', 'bed'))
+    assert scannet_dataset.CLASSES != original_classes
+    assert scannet_dataset.CLASSES == ('cabinet', 'bed')
+
+    import tempfile
+    tmp_file = tempfile.NamedTemporaryFile()
+    with open(tmp_file.name, 'w') as f:
+        f.write('cabinet\nbed\n')
+
+    scannet_dataset = ScanNetDataset(
+        root_path, ann_file, pipeline=None, classes=tmp_file.name)
+    assert scannet_dataset.CLASSES != original_classes
+    assert scannet_dataset.CLASSES == ['cabinet', 'bed']
+
+
+def test_evaluate():
+    if not torch.cuda.is_available():
+        pytest.skip()
+    root_path = './tests/data/scannet'
+    ann_file = './tests/data/scannet/scannet_infos.pkl'
+    scannet_dataset = ScanNetDataset(root_path, ann_file)
+    results = []
+    pred_boxes = dict()
+    pred_boxes['box3d_lidar'] = np.array([[
+        3.52074146e+00, -1.48129511e+00, 1.57035351e+00, 2.31956959e-01,
+        1.74445975e+00, 5.72351933e-01, 0
+    ],
+                                          [
+                                              -3.48033905e+00, -2.90395617e+00,
+                                              1.19105673e+00, 1.70723915e-01,
+                                              6.60776615e-01, 6.71535969e-01, 0
+                                          ],
+                                          [
+                                              2.19867110e+00, -1.14655101e+00,
+                                              9.25755501e-03, 2.53463078e+00,
+                                              5.41841269e-01, 1.21447623e+00, 0
+                                          ],
+                                          [
+                                              2.50163722, -2.91681337,
+                                              0.82875049, 1.84280431,
+                                              0.61697435, 0.28697443, 0
+                                          ],
+                                          [
+                                              -0.01335114, 3.3114481,
+                                              -0.00895238, 3.85815716,
+                                              0.44081616, 2.16034412, 0
+                                          ]])
+    pred_boxes['label_preds'] = torch.Tensor([6, 6, 4, 9, 11]).cuda()
+    pred_boxes['scores'] = torch.Tensor([0.5, 1.0, 1.0, 1.0, 1.0]).cuda()
+    results.append([pred_boxes])
+    metric = [0.25, 0.5]
+    ret_dict = scannet_dataset.evaluate(results, metric)
+    table_average_precision_25 = ret_dict['table_AP_0.25']
+    window_average_precision_25 = ret_dict['window_AP_0.25']
+    counter_average_precision_25 = ret_dict['counter_AP_0.25']
+    curtain_average_precision_25 = ret_dict['curtain_AP_0.25']
+    assert abs(table_average_precision_25 - 0.3333) < 0.01
+    assert abs(window_average_precision_25 - 1) < 0.01
+    assert abs(counter_average_precision_25 - 1) < 0.01
+    assert abs(curtain_average_precision_25 - 0.5) < 0.01
--- a/tests/test_dataset/test_sunrgbd_dataset.py
+++ b/tests/test_dataset/test_sunrgbd_dataset.py
+import numpy as np
+import pytest
+import torch
+
+from mmdet3d.datasets import SUNRGBDDataset
+
+
+def test_getitem():
+    np.random.seed(0)
+    root_path = './tests/data/sunrgbd/sunrgbd_trainval'
+    ann_file = './tests/data/sunrgbd/sunrgbd_infos.pkl'
+    class_names = ('bed', 'table', 'sofa', 'chair', 'toilet', 'desk',
+                   'dresser', 'night_stand', 'bookshelf', 'bathtub')
+    pipelines = [
+        dict(
+            type='IndoorLoadPointsFromFile',
+            use_height=True,
+            load_dim=6,
+            use_dim=[0, 1, 2]),
+        dict(type='IndoorFlipData', flip_ratio_yz=1.0),
+        dict(
+            type='IndoorGlobalRotScale',
+            use_height=True,
+            rot_range=[-1 / 6, 1 / 6],
+            scale_range=[0.85, 1.15]),
+        dict(type='IndoorPointSample', num_points=5),
+        dict(type='DefaultFormatBundle3D', class_names=class_names),
+        dict(type='Collect3D', keys=['points', 'gt_bboxes_3d', 'gt_labels']),
+    ]
+
+    sunrgbd_dataset = SUNRGBDDataset(root_path, ann_file, pipelines)
+    data = sunrgbd_dataset[0]
+    points = data['points']._data
+    gt_bboxes_3d = data['gt_bboxes_3d']._data
+    gt_labels = data['gt_labels']._data
+
+    expected_points = np.array(
+        [[0.6570105, 1.5538014, 0.24514851, 1.0165423],
+         [0.656101, 1.558591, 0.21755838, 0.98895216],
+         [0.6293659, 1.5679953, -0.10004003, 0.67135376],
+         [0.6068739, 1.5974995, -0.41063973, 0.36075398],
+         [0.6464709, 1.5573514, 0.15114647, 0.9225402]])
+    expected_gt_bboxes_3d = np.array([[
+        -2.012483, 3.9473376, -0.25446942, 2.3730404, 1.9457763, 2.0303352,
+        1.2205974
+    ],
+                                      [
+                                          -3.7036808, 4.2396426, -0.81091917,
+                                          0.6032123, 0.91040343, 1.003341,
+                                          1.2662518
+                                      ],
+                                      [
+                                          0.6528646, 2.1638472, -0.15228128,
+                                          0.7347852, 1.6113238, 2.1694272,
+                                          2.81404
+                                      ]])
+    expected_gt_labels = np.array([0, 7, 6])
+    original_classes = sunrgbd_dataset.CLASSES
+
+    assert np.allclose(points, expected_points)
+    assert np.allclose(gt_bboxes_3d, expected_gt_bboxes_3d)
+    assert np.all(gt_labels.numpy() == expected_gt_labels)
+    assert original_classes == class_names
+
+    SUNRGBD_dataset = SUNRGBDDataset(
+        root_path, ann_file, pipeline=None, classes=['bed', 'table'])
+    assert SUNRGBD_dataset.CLASSES != original_classes
+    assert SUNRGBD_dataset.CLASSES == ['bed', 'table']
+
+    SUNRGBD_dataset = SUNRGBDDataset(
+        root_path, ann_file, pipeline=None, classes=('bed', 'table'))
+    assert SUNRGBD_dataset.CLASSES != original_classes
+    assert SUNRGBD_dataset.CLASSES == ('bed', 'table')
+
+    import tempfile
+    tmp_file = tempfile.NamedTemporaryFile()
+    with open(tmp_file.name, 'w') as f:
+        f.write('bed\ntable\n')
+
+    SUNRGBD_dataset = SUNRGBDDataset(
+        root_path, ann_file, pipeline=None, classes=tmp_file.name)
+    assert SUNRGBD_dataset.CLASSES != original_classes
+    assert SUNRGBD_dataset.CLASSES == ['bed', 'table']
+
+
+def test_evaluate():
+
+    if not torch.cuda.is_available():
+        pytest.skip()
+    root_path = './tests/data/sunrgbd'
+    ann_file = './tests/data/sunrgbd/sunrgbd_infos.pkl'
+    sunrgbd_dataset = SUNRGBDDataset(root_path, ann_file)
+    results = []
+    pred_boxes = dict()
+    pred_boxes['box3d_lidar'] = np.array(
+        [[
+            4.168696, -1.047307, -1.231666, 1.887584, 2.30207, 1.969614,
+            1.69564944
+        ],
+         [
+             4.811675, -2.583086, -1.273334, 0.883176, 0.585172, 0.973334,
+             1.64999513
+         ], [1.904545, 1.086364, -1.2, 1.563134, 0.71281, 2.104546,
+             0.1022069]])
+    pred_boxes['label_preds'] = torch.Tensor([0, 7, 6]).cuda()
+    pred_boxes['scores'] = torch.Tensor([0.5, 1.0, 1.0]).cuda()
+    results.append([pred_boxes])
+    metric = [0.25, 0.5]
+    ap_dict = sunrgbd_dataset.evaluate(results, metric)
+    bed_precision_25 = ap_dict['bed_AP_0.25']
+    dresser_precision_25 = ap_dict['dresser_AP_0.25']
+    night_stand_precision_25 = ap_dict['night_stand_AP_0.25']
+    assert abs(bed_precision_25 - 1) < 0.01
+    assert abs(dresser_precision_25 - 1) < 0.01
+    assert abs(night_stand_precision_25 - 1) < 0.01