Initial commit

d1aac35d · zhangwenwei · d1aac35d · d1aac35d · d1aac35d · d1aac35d
Commit d1aac35d authored Apr 14, 2020 by zhangwenwei
20 changed files
--- a/mmdet3d/core/bbox/assigners/approx_max_iou_assigner.py
+++ b/mmdet3d/core/bbox/assigners/approx_max_iou_assigner.py
+import torch
+
+from ..geometry import bbox_overlaps_2d
+from .max_iou_assigner import MaxIoUAssigner
+
+
+class ApproxMaxIoUAssigner(MaxIoUAssigner):
+    """Assign a corresponding gt bbox or background to each bbox.
+
+    Each proposals will be assigned with `-1`, `0`, or a positive integer
+    indicating the ground truth index.
+
+    - -1: don't care
+    - 0: negative sample, no assigned gt
+    - positive integer: positive sample, index (1-based) of assigned gt
+
+    Args:
+        pos_iou_thr (float): IoU threshold for positive bboxes.
+        neg_iou_thr (float or tuple): IoU threshold for negative bboxes.
+        min_pos_iou (float): Minimum iou for a bbox to be considered as a
+            positive bbox. Positive samples can have smaller IoU than
+            pos_iou_thr due to the 4th step (assign max IoU sample to each gt).
+        gt_max_assign_all (bool): Whether to assign all bboxes with the same
+            highest overlap with some gt to that gt.
+        ignore_iof_thr (float): IoF threshold for ignoring bboxes (if
+            `gt_bboxes_ignore` is specified). Negative values mean not
+            ignoring any bboxes.
+        ignore_wrt_candidates (bool): Whether to compute the iof between
+            `bboxes` and `gt_bboxes_ignore`, or the contrary.
+    """
+
+    def __init__(self,
+                 pos_iou_thr,
+                 neg_iou_thr,
+                 min_pos_iou=.0,
+                 gt_max_assign_all=True,
+                 ignore_iof_thr=-1,
+                 ignore_wrt_candidates=True):
+        self.pos_iou_thr = pos_iou_thr
+        self.neg_iou_thr = neg_iou_thr
+        self.min_pos_iou = min_pos_iou
+        self.gt_max_assign_all = gt_max_assign_all
+        self.ignore_iof_thr = ignore_iof_thr
+        self.ignore_wrt_candidates = ignore_wrt_candidates
+
+    def assign(self,
+               approxs,
+               squares,
+               approxs_per_octave,
+               gt_bboxes,
+               gt_bboxes_ignore=None,
+               gt_labels=None):
+        """Assign gt to approxs.
+
+        This method assign a gt bbox to each group of approxs (bboxes),
+        each group of approxs is represent by a base approx (bbox) and
+        will be assigned with -1, 0, or a positive number.
+        -1 means don't care, 0 means negative sample,
+        positive number is the index (1-based) of assigned gt.
+        The assignment is done in following steps, the order matters.
+
+        1. assign every bbox to -1
+        2. use the max IoU of each group of approxs to assign
+        2. assign proposals whose iou with all gts < neg_iou_thr to 0
+        3. for each bbox, if the iou with its nearest gt >= pos_iou_thr,
+           assign it to that bbox
+        4. for each gt bbox, assign its nearest proposals (may be more than
+           one) to itself
+
+        Args:
+            approxs (Tensor): Bounding boxes to be assigned,
+        shape(approxs_per_octave*n, 4).
+            squares (Tensor): Base Bounding boxes to be assigned,
+        shape(n, 4).
+            approxs_per_octave (int): number of approxs per octave
+            gt_bboxes (Tensor): Groundtruth boxes, shape (k, 4).
+            gt_bboxes_ignore (Tensor, optional): Ground truth bboxes that are
+                labelled as `ignored`, e.g., crowd boxes in COCO.
+            gt_labels (Tensor, optional): Label of gt_bboxes, shape (k, ).
+
+        Returns:
+            :obj:`AssignResult`: The assign result.
+        """
+
+        if squares.shape[0] == 0 or gt_bboxes.shape[0] == 0:
+            raise ValueError('No gt or approxs')
+        num_squares = squares.size(0)
+        num_gts = gt_bboxes.size(0)
+        # re-organize anchors by approxs_per_octave x num_squares
+        approxs = torch.transpose(
+            approxs.view(num_squares, approxs_per_octave, 4), 0,
+            1).contiguous().view(-1, 4)
+        all_overlaps = bbox_overlaps_2d(approxs, gt_bboxes)
+
+        overlaps, _ = all_overlaps.view(approxs_per_octave, num_squares,
+                                        num_gts).max(dim=0)
+        overlaps = torch.transpose(overlaps, 0, 1)
+
+        bboxes = squares[:, :4]
+
+        if (self.ignore_iof_thr > 0) and (gt_bboxes_ignore is not None) and (
+                gt_bboxes_ignore.numel() > 0):
+            if self.ignore_wrt_candidates:
+                ignore_overlaps = bbox_overlaps_2d(
+                    bboxes, gt_bboxes_ignore, mode='iof')
+                ignore_max_overlaps, _ = ignore_overlaps.max(dim=1)
+            else:
+                ignore_overlaps = bbox_overlaps_2d(
+                    gt_bboxes_ignore, bboxes, mode='iof')
+                ignore_max_overlaps, _ = ignore_overlaps.max(dim=0)
+            overlaps[:, ignore_max_overlaps > self.ignore_iof_thr] = -1
+
+        assign_result = self.assign_wrt_overlaps(overlaps, gt_labels)
+        return assign_result
--- a/mmdet3d/core/bbox/assigners/assign_result.py
+++ b/mmdet3d/core/bbox/assigners/assign_result.py
+import torch
+
+
+class AssignResult(object):
+
+    def __init__(self, num_gts, gt_inds, max_overlaps, labels=None):
+        self.num_gts = num_gts
+        self.gt_inds = gt_inds
+        self.max_overlaps = max_overlaps
+        self.labels = labels
+
+    def add_gt_(self, gt_labels):
+        self_inds = torch.arange(
+            1, len(gt_labels) + 1, dtype=torch.long, device=gt_labels.device)
+        self.gt_inds = torch.cat([self_inds, self.gt_inds])
+        self.max_overlaps = torch.cat(
+            [self.max_overlaps.new_ones(self.num_gts), self.max_overlaps])
+        if self.labels is not None:
+            self.labels = torch.cat([gt_labels, self.labels])
--- a/mmdet3d/core/bbox/assigners/base_assigner.py
+++ b/mmdet3d/core/bbox/assigners/base_assigner.py
+from abc import ABCMeta, abstractmethod
+
+
+class BaseAssigner(metaclass=ABCMeta):
+
+    @abstractmethod
+    def assign(self, bboxes, gt_bboxes, gt_bboxes_ignore=None, gt_labels=None):
+        pass
--- a/mmdet3d/core/bbox/assigners/max_iou_assigner.py
+++ b/mmdet3d/core/bbox/assigners/max_iou_assigner.py
+import torch
+
+from .. import geometry
+from .assign_result import AssignResult
+from .base_assigner import BaseAssigner
+
+
+class MaxIoUAssigner(BaseAssigner):
+    """Assign a corresponding gt bbox or background to each bbox.
+
+    Each proposals will be assigned with `-1`, `0`, or a positive integer
+    indicating the ground truth index.
+
+    - -1: don't care
+    - 0: negative sample, no assigned gt
+    - positive integer: positive sample, index (1-based) of assigned gt
+
+    Args:
+        pos_iou_thr (float): IoU threshold for positive bboxes.
+        neg_iou_thr (float or tuple): IoU threshold for negative bboxes.
+        min_pos_iou (float): Minimum iou for a bbox to be considered as a
+            positive bbox. Positive samples can have smaller IoU than
+            pos_iou_thr due to the 4th step (assign max IoU sample to each gt).
+        gt_max_assign_all (bool): Whether to assign all bboxes with the same
+            highest overlap with some gt to that gt.
+        ignore_iof_thr (float): IoF threshold for ignoring bboxes (if
+            `gt_bboxes_ignore` is specified). Negative values mean not
+            ignoring any bboxes.
+        ignore_wrt_candidates (bool): Whether to compute the iof between
+            `bboxes` and `gt_bboxes_ignore`, or the contrary.
+    """
+
+    def __init__(self,
+                 pos_iou_thr,
+                 neg_iou_thr,
+                 min_pos_iou=.0,
+                 gt_max_assign_all=True,
+                 ignore_iof_thr=-1,
+                 iou_type='2d',
+                 ignore_wrt_candidates=True):
+        self.pos_iou_thr = pos_iou_thr
+        self.neg_iou_thr = neg_iou_thr
+        self.min_pos_iou = min_pos_iou
+        self.gt_max_assign_all = gt_max_assign_all
+        self.ignore_iof_thr = ignore_iof_thr
+        self.ignore_wrt_candidates = ignore_wrt_candidates
+        # iou_type could be 2d, 3d, nearest_3d
+        self.iou_type = iou_type
+        self.bbox_overlaps = getattr(geometry,
+                                     'bbox_overlaps_{}'.format(iou_type))
+
+    def assign(self, bboxes, gt_bboxes, gt_bboxes_ignore=None, gt_labels=None):
+        """Assign gt to bboxes.
+
+        This method assign a gt bbox to every bbox (proposal/anchor), each bbox
+        will be assigned with -1, 0, or a positive number. -1 means don't care,
+        0 means negative sample, positive number is the index (1-based) of
+        assigned gt.
+        The assignment is done in following steps, the order matters.
+
+        1. assign every bbox to -1
+        2. assign proposals whose iou with all gts < neg_iou_thr to 0
+        3. for each bbox, if the iou with its nearest gt >= pos_iou_thr,
+           assign it to that bbox
+        4. for each gt bbox, assign its nearest proposals (may be more than
+           one) to itself
+
+        Args:
+            bboxes (Tensor): Bounding boxes to be assigned, shape(n, 4).
+            gt_bboxes (Tensor): Groundtruth boxes, shape (k, 4).
+            gt_bboxes_ignore (Tensor, optional): Ground truth bboxes that are
+                labelled as `ignored`, e.g., crowd boxes in COCO.
+            gt_labels (Tensor, optional): Label of gt_bboxes, shape (k, ).
+
+        Returns:
+            :obj:`AssignResult`: The assign result.
+        """
+        if self.iou_type == '2d':
+            bboxes = bboxes[:, :4]
+        overlaps = self.bbox_overlaps(gt_bboxes, bboxes)
+        if (self.ignore_iof_thr > 0) and (gt_bboxes_ignore is not None) and (
+                gt_bboxes_ignore.numel() > 0):
+            if self.ignore_wrt_candidates:
+                ignore_overlaps = self.bbox_overlaps(
+                    bboxes, gt_bboxes_ignore, mode='iof')
+                ignore_max_overlaps, _ = ignore_overlaps.max(dim=1)
+            else:
+                ignore_overlaps = self.bbox_overlaps(
+                    gt_bboxes_ignore, bboxes, mode='iof')
+                ignore_max_overlaps, _ = ignore_overlaps.max(dim=0)
+            overlaps[:, ignore_max_overlaps > self.ignore_iof_thr] = -1
+
+        assign_result = self.assign_wrt_overlaps(overlaps, gt_labels)
+        return assign_result
+
+    def assign_wrt_overlaps(self, overlaps, gt_labels=None):
+        """Assign w.r.t. the overlaps of bboxes with gts.
+
+        Args:
+            overlaps (Tensor): Overlaps between k gt_bboxes and n bboxes,
+                shape(k, n).
+            gt_labels (Tensor, optional): Labels of k gt_bboxes, shape (k, ).
+
+        Returns:
+            :obj:`AssignResult`: The assign result.
+        """
+        num_gts, num_bboxes = overlaps.size(0), overlaps.size(1)
+
+        # 1. assign -1 by default
+        assigned_gt_inds = overlaps.new_full((num_bboxes, ),
+                                             -1,
+                                             dtype=torch.long)
+
+        if num_gts == 0 or num_bboxes == 0:
+            # No ground truth or boxes, return empty assignment
+            max_overlaps = overlaps.new_zeros((num_bboxes, ))
+            if num_gts == 0:
+                # No truth, assign everything to background
+                assigned_gt_inds[:] = 0
+            if gt_labels is None:
+                assigned_labels = None
+            else:
+                assigned_labels = overlaps.new_zeros((num_bboxes, ),
+                                                     dtype=torch.long)
+            return AssignResult(
+                num_gts,
+                assigned_gt_inds,
+                max_overlaps,
+                labels=assigned_labels)
+
+        # for each anchor, which gt best overlaps with it
+        # for each anchor, the max iou of all gts
+        max_overlaps, argmax_overlaps = overlaps.max(dim=0)
+        # for each gt, which anchor best overlaps with it
+        # for each gt, the max iou of all proposals
+        gt_max_overlaps, gt_argmax_overlaps = overlaps.max(dim=1)
+        # 2. assign negative: below
+        if isinstance(self.neg_iou_thr, float):
+            assigned_gt_inds[(max_overlaps >= 0)
+                             & (max_overlaps < self.neg_iou_thr)] = 0
+        elif isinstance(self.neg_iou_thr, tuple):
+            assert len(self.neg_iou_thr) == 2
+            assigned_gt_inds[(max_overlaps >= self.neg_iou_thr[0])
+                             & (max_overlaps < self.neg_iou_thr[1])] = 0
+
+        # 3. assign positive: above positive IoU threshold
+        pos_inds = max_overlaps >= self.pos_iou_thr
+        assigned_gt_inds[pos_inds] = argmax_overlaps[pos_inds] + 1
+
+        # 4. assign fg: for each gt, proposals with highest IoU
+        for i in range(num_gts):
+            if gt_max_overlaps[i] >= self.min_pos_iou:
+                if self.gt_max_assign_all:
+                    max_iou_inds = overlaps[i, :] == gt_max_overlaps[i]
+                    assigned_gt_inds[max_iou_inds] = i + 1
+                else:
+                    assigned_gt_inds[gt_argmax_overlaps[i]] = i + 1
+
+        if gt_labels is not None:
+            assigned_labels = assigned_gt_inds.new_zeros((num_bboxes, ))
+            pos_inds = torch.nonzero(assigned_gt_inds > 0).squeeze()
+            if pos_inds.numel() > 0:
+                assigned_labels[pos_inds] = gt_labels[
+                    assigned_gt_inds[pos_inds] - 1]
+        else:
+            assigned_labels = None
+
+        return AssignResult(
+            num_gts, assigned_gt_inds, max_overlaps, labels=assigned_labels)
--- a/mmdet3d/core/bbox/box_np_ops.py
+++ b/mmdet3d/core/bbox/box_np_ops.py
+import numba
+import numpy as np
+
+
+def camera_to_lidar(points, r_rect, velo2cam):
+    points_shape = list(points.shape[0:-1])
+    if points.shape[-1] == 3:
+        points = np.concatenate([points, np.ones(points_shape + [1])], axis=-1)
+    lidar_points = points @ np.linalg.inv((r_rect @ velo2cam).T)
+    return lidar_points[..., :3]
+
+
+def box_camera_to_lidar(data, r_rect, velo2cam):
+    xyz = data[:, 0:3]
+    l, h, w = data[:, 3:4], data[:, 4:5], data[:, 5:6]
+    r = data[:, 6:7]
+    xyz_lidar = camera_to_lidar(xyz, r_rect, velo2cam)
+    return np.concatenate([xyz_lidar, w, l, h, r], axis=1)
+
+
+def corners_nd(dims, origin=0.5):
+    """generate relative box corners based on length per dim and
+    origin point.
+
+    Args:
+        dims (float array, shape=[N, ndim]): array of length per dim
+        origin (list or array or float): origin point relate to smallest point.
+
+    Returns:
+        float array, shape=[N, 2 ** ndim, ndim]: returned corners.
+        point layout example: (2d) x0y0, x0y1, x1y0, x1y1;
+            (3d) x0y0z0, x0y0z1, x0y1z0, x0y1z1, x1y0z0, x1y0z1, x1y1z0, x1y1z1
+            where x0 < x1, y0 < y1, z0 < z1
+    """
+    ndim = int(dims.shape[1])
+    corners_norm = np.stack(
+        np.unravel_index(np.arange(2**ndim), [2] * ndim),
+        axis=1).astype(dims.dtype)
+    # now corners_norm has format: (2d) x0y0, x0y1, x1y0, x1y1
+    # (3d) x0y0z0, x0y0z1, x0y1z0, x0y1z1, x1y0z0, x1y0z1, x1y1z0, x1y1z1
+    # so need to convert to a format which is convenient to do other computing.
+    # for 2d boxes, format is clockwise start with minimum point
+    # for 3d boxes, please draw lines by your hand.
+    if ndim == 2:
+        # generate clockwise box corners
+        corners_norm = corners_norm[[0, 1, 3, 2]]
+    elif ndim == 3:
+        corners_norm = corners_norm[[0, 1, 3, 2, 4, 5, 7, 6]]
+    corners_norm = corners_norm - np.array(origin, dtype=dims.dtype)
+    corners = dims.reshape([-1, 1, ndim]) * corners_norm.reshape(
+        [1, 2**ndim, ndim])
+    return corners
+
+
+def rotation_2d(points, angles):
+    """rotation 2d points based on origin point clockwise when angle positive.
+
+    Args:
+        points (float array, shape=[N, point_size, 2]): points to be rotated.
+        angles (float array, shape=[N]): rotation angle.
+
+    Returns:
+        float array: same shape as points
+    """
+    rot_sin = np.sin(angles)
+    rot_cos = np.cos(angles)
+    rot_mat_T = np.stack([[rot_cos, -rot_sin], [rot_sin, rot_cos]])
+    return np.einsum('aij,jka->aik', points, rot_mat_T)
+
+
+def center_to_corner_box2d(centers, dims, angles=None, origin=0.5):
+    """convert kitti locations, dimensions and angles to corners.
+    format: center(xy), dims(xy), angles(clockwise when positive)
+
+    Args:
+        centers (float array, shape=[N, 2]): locations in kitti label file.
+        dims (float array, shape=[N, 2]): dimensions in kitti label file.
+        angles (float array, shape=[N]): rotation_y in kitti label file.
+
+    Returns:
+        [type]: [description]
+    """
+    # 'length' in kitti format is in x axis.
+    # xyz(hwl)(kitti label file)<->xyz(lhw)(camera)<->z(-x)(-y)(wlh)(lidar)
+    # center in kitti format is [0.5, 1.0, 0.5] in xyz.
+    corners = corners_nd(dims, origin=origin)
+    # corners: [N, 4, 2]
+    if angles is not None:
+        corners = rotation_2d(corners, angles)
+    corners += centers.reshape([-1, 1, 2])
+    return corners
+
+
+@numba.jit(nopython=True)
+def depth_to_points(depth, trunc_pixel):
+    num_pts = np.sum(depth[trunc_pixel:, ] > 0.1)
+    points = np.zeros((num_pts, 3), dtype=depth.dtype)
+    x = np.array([0, 0, 1], dtype=depth.dtype)
+    k = 0
+    for i in range(trunc_pixel, depth.shape[0]):
+        for j in range(depth.shape[1]):
+            if depth[i, j] > 0.1:
+                x = np.array([j, i, 1], dtype=depth.dtype)
+                points[k] = x * depth[i, j]
+                k += 1
+    return points
+
+
+def depth_to_lidar_points(depth, trunc_pixel, P2, r_rect, velo2cam):
+    pts = depth_to_points(depth, trunc_pixel)
+    points_shape = list(pts.shape[0:-1])
+    points = np.concatenate([pts, np.ones(points_shape + [1])], axis=-1)
+    points = points @ np.linalg.inv(P2.T)
+    lidar_points = camera_to_lidar(points, r_rect, velo2cam)
+    return lidar_points
+
+
+def rotation_3d_in_axis(points, angles, axis=0):
+    # points: [N, point_size, 3]
+    rot_sin = np.sin(angles)
+    rot_cos = np.cos(angles)
+    ones = np.ones_like(rot_cos)
+    zeros = np.zeros_like(rot_cos)
+    if axis == 1:
+        rot_mat_T = np.stack([[rot_cos, zeros, -rot_sin], [zeros, ones, zeros],
+                              [rot_sin, zeros, rot_cos]])
+    elif axis == 2 or axis == -1:
+        rot_mat_T = np.stack([[rot_cos, -rot_sin, zeros],
+                              [rot_sin, rot_cos, zeros], [zeros, zeros, ones]])
+    elif axis == 0:
+        rot_mat_T = np.stack([[zeros, rot_cos, -rot_sin],
+                              [zeros, rot_sin, rot_cos], [ones, zeros, zeros]])
+    else:
+        raise ValueError('axis should in range')
+
+    return np.einsum('aij,jka->aik', points, rot_mat_T)
+
+
+def center_to_corner_box3d(centers,
+                           dims,
+                           angles=None,
+                           origin=(0.5, 1.0, 0.5),
+                           axis=1):
+    """convert kitti locations, dimensions and angles to corners
+
+    Args:
+        centers (float array, shape=[N, 3]): locations in kitti label file.
+        dims (float array, shape=[N, 3]): dimensions in kitti label file.
+        angles (float array, shape=[N]): rotation_y in kitti label file.
+        origin (list or array or float): origin point relate to smallest point.
+            use [0.5, 1.0, 0.5] in camera and [0.5, 0.5, 0] in lidar.
+        axis (int): rotation axis. 1 for camera and 2 for lidar.
+    Returns:
+        [type]: [description]
+    """
+    # 'length' in kitti format is in x axis.
+    # yzx(hwl)(kitti label file)<->xyz(lhw)(camera)<->z(-x)(-y)(wlh)(lidar)
+    # center in kitti format is [0.5, 1.0, 0.5] in xyz.
+    corners = corners_nd(dims, origin=origin)
+    # corners: [N, 8, 3]
+    if angles is not None:
+        corners = rotation_3d_in_axis(corners, angles, axis=axis)
+    corners += centers.reshape([-1, 1, 3])
+    return corners
+
+
+@numba.jit(nopython=True)
+def box2d_to_corner_jit(boxes):
+    num_box = boxes.shape[0]
+    corners_norm = np.zeros((4, 2), dtype=boxes.dtype)
+    corners_norm[1, 1] = 1.0
+    corners_norm[2] = 1.0
+    corners_norm[3, 0] = 1.0
+    corners_norm -= np.array([0.5, 0.5], dtype=boxes.dtype)
+    corners = boxes.reshape(num_box, 1, 5)[:, :, 2:4] * corners_norm.reshape(
+        1, 4, 2)
+    rot_mat_T = np.zeros((2, 2), dtype=boxes.dtype)
+    box_corners = np.zeros((num_box, 4, 2), dtype=boxes.dtype)
+    for i in range(num_box):
+        rot_sin = np.sin(boxes[i, -1])
+        rot_cos = np.cos(boxes[i, -1])
+        rot_mat_T[0, 0] = rot_cos
+        rot_mat_T[0, 1] = -rot_sin
+        rot_mat_T[1, 0] = rot_sin
+        rot_mat_T[1, 1] = rot_cos
+        box_corners[i] = corners[i] @ rot_mat_T + boxes[i, :2]
+    return box_corners
+
+
+@numba.njit
+def corner_to_standup_nd_jit(boxes_corner):
+    num_boxes = boxes_corner.shape[0]
+    ndim = boxes_corner.shape[-1]
+    result = np.zeros((num_boxes, ndim * 2), dtype=boxes_corner.dtype)
+    for i in range(num_boxes):
+        for j in range(ndim):
+            result[i, j] = np.min(boxes_corner[i, :, j])
+        for j in range(ndim):
+            result[i, j + ndim] = np.max(boxes_corner[i, :, j])
+    return result
+
+
+@numba.jit(nopython=True)
+def corner_to_surfaces_3d_jit(corners):
+    """convert 3d box corners from corner function above
+    to surfaces that normal vectors all direct to internal.
+
+    Args:
+        corners (float array, [N, 8, 3]): 3d box corners.
+    Returns:
+        surfaces (float array, [N, 6, 4, 3]):
+    """
+    # box_corners: [N, 8, 3], must from corner functions in this module
+    num_boxes = corners.shape[0]
+    surfaces = np.zeros((num_boxes, 6, 4, 3), dtype=corners.dtype)
+    corner_idxes = np.array([
+        0, 1, 2, 3, 7, 6, 5, 4, 0, 3, 7, 4, 1, 5, 6, 2, 0, 4, 5, 1, 3, 2, 6, 7
+    ]).reshape(6, 4)
+    for i in range(num_boxes):
+        for j in range(6):
+            for k in range(4):
+                surfaces[i, j, k] = corners[i, corner_idxes[j, k]]
+    return surfaces
+
+
+def rotation_points_single_angle(points, angle, axis=0):
+    # points: [N, 3]
+    rot_sin = np.sin(angle)
+    rot_cos = np.cos(angle)
+    if axis == 1:
+        rot_mat_T = np.array(
+            [[rot_cos, 0, -rot_sin], [0, 1, 0], [rot_sin, 0, rot_cos]],
+            dtype=points.dtype)
+    elif axis == 2 or axis == -1:
+        rot_mat_T = np.array(
+            [[rot_cos, -rot_sin, 0], [rot_sin, rot_cos, 0], [0, 0, 1]],
+            dtype=points.dtype)
+    elif axis == 0:
+        rot_mat_T = np.array(
+            [[1, 0, 0], [0, rot_cos, -rot_sin], [0, rot_sin, rot_cos]],
+            dtype=points.dtype)
+    else:
+        raise ValueError('axis should in range')
+
+    return points @ rot_mat_T, rot_mat_T
+
+
+def project_to_image(points_3d, proj_mat):
+    points_shape = list(points_3d.shape)
+    points_shape[-1] = 1
+    points_4 = np.concatenate([points_3d, np.zeros(points_shape)], axis=-1)
+    point_2d = points_4 @ proj_mat.T
+    point_2d_res = point_2d[..., :2] / point_2d[..., 2:3]
+    return point_2d_res
+
+
+def box3d_to_bbox(box3d, rect, Trv2c, P2):
+    box_corners = center_to_corner_box3d(
+        box3d[:, :3], box3d[:, 3:6], box3d[:, 6], [0.5, 1.0, 0.5], axis=1)
+    box_corners_in_image = project_to_image(box_corners, P2)
+    # box_corners_in_image: [N, 8, 2]
+    minxy = np.min(box_corners_in_image, axis=1)
+    maxxy = np.max(box_corners_in_image, axis=1)
+    bbox = np.concatenate([minxy, maxxy], axis=1)
+    return bbox
+
+
+def corner_to_surfaces_3d(corners):
+    """convert 3d box corners from corner function above
+    to surfaces that normal vectors all direct to internal.
+
+    Args:
+        corners (float array, [N, 8, 3]): 3d box corners.
+    Returns:
+        surfaces (float array, [N, 6, 4, 3]):
+    """
+    # box_corners: [N, 8, 3], must from corner functions in this module
+    surfaces = np.array([
+        [corners[:, 0], corners[:, 1], corners[:, 2], corners[:, 3]],
+        [corners[:, 7], corners[:, 6], corners[:, 5], corners[:, 4]],
+        [corners[:, 0], corners[:, 3], corners[:, 7], corners[:, 4]],
+        [corners[:, 1], corners[:, 5], corners[:, 6], corners[:, 2]],
+        [corners[:, 0], corners[:, 4], corners[:, 5], corners[:, 1]],
+        [corners[:, 3], corners[:, 2], corners[:, 6], corners[:, 7]],
+    ]).transpose([2, 0, 1, 3])
+    return surfaces
+
+
+def points_in_rbbox(points, rbbox, z_axis=2, origin=(0.5, 0.5, 0)):
+    # TODO: this function is different from PointCloud3D, be careful
+    # when start to use nuscene, check the input
+    rbbox_corners = center_to_corner_box3d(
+        rbbox[:, :3], rbbox[:, 3:6], rbbox[:, 6], origin=origin, axis=z_axis)
+    surfaces = corner_to_surfaces_3d(rbbox_corners)
+    indices = points_in_convex_polygon_3d_jit(points[:, :3], surfaces)
+    return indices
+
+
+def minmax_to_corner_2d(minmax_box):
+    ndim = minmax_box.shape[-1] // 2
+    center = minmax_box[..., :ndim]
+    dims = minmax_box[..., ndim:] - center
+    return center_to_corner_box2d(center, dims, origin=0.0)
+
+
+def limit_period(val, offset=0.5, period=np.pi):
+    return val - np.floor(val / period + offset) * period
+
+
+def create_anchors_3d_range(feature_size,
+                            anchor_range,
+                            sizes=((1.6, 3.9, 1.56), ),
+                            rotations=(0, np.pi / 2),
+                            dtype=np.float32):
+    """
+    Args:
+        feature_size: list [D, H, W](zyx)
+        sizes: [N, 3] list of list or array, size of anchors, xyz
+
+    Returns:
+        anchors: [*feature_size, num_sizes, num_rots, 7] tensor.
+    """
+    anchor_range = np.array(anchor_range, dtype)
+    z_centers = np.linspace(
+        anchor_range[2], anchor_range[5], feature_size[0], dtype=dtype)
+    y_centers = np.linspace(
+        anchor_range[1], anchor_range[4], feature_size[1], dtype=dtype)
+    x_centers = np.linspace(
+        anchor_range[0], anchor_range[3], feature_size[2], dtype=dtype)
+    sizes = np.reshape(np.array(sizes, dtype=dtype), [-1, 3])
+    rotations = np.array(rotations, dtype=dtype)
+    rets = np.meshgrid(
+        x_centers, y_centers, z_centers, rotations, indexing='ij')
+    tile_shape = [1] * 5
+    tile_shape[-2] = int(sizes.shape[0])
+    for i in range(len(rets)):
+        rets[i] = np.tile(rets[i][..., np.newaxis, :], tile_shape)
+        rets[i] = rets[i][..., np.newaxis]  # for concat
+    sizes = np.reshape(sizes, [1, 1, 1, -1, 1, 3])
+    tile_size_shape = list(rets[0].shape)
+    tile_size_shape[3] = 1
+    sizes = np.tile(sizes, tile_size_shape)
+    rets.insert(3, sizes)
+    ret = np.concatenate(rets, axis=-1)
+    return np.transpose(ret, [2, 1, 0, 3, 4, 5])
+
+
+def center_to_minmax_2d_0_5(centers, dims):
+    return np.concatenate([centers - dims / 2, centers + dims / 2], axis=-1)
+
+
+def center_to_minmax_2d(centers, dims, origin=0.5):
+    if origin == 0.5:
+        return center_to_minmax_2d_0_5(centers, dims)
+    corners = center_to_corner_box2d(centers, dims, origin=origin)
+    return corners[:, [0, 2]].reshape([-1, 4])
+
+
+def rbbox2d_to_near_bbox(rbboxes):
+    """convert rotated bbox to nearest 'standing' or 'lying' bbox.
+    Args:
+        rbboxes: [N, 5(x, y, xdim, ydim, rad)] rotated bboxes
+    Returns:
+        bboxes: [N, 4(xmin, ymin, xmax, ymax)] bboxes
+    """
+    rots = rbboxes[..., -1]
+    rots_0_pi_div_2 = np.abs(limit_period(rots, 0.5, np.pi))
+    cond = (rots_0_pi_div_2 > np.pi / 4)[..., np.newaxis]
+    bboxes_center = np.where(cond, rbboxes[:, [0, 1, 3, 2]], rbboxes[:, :4])
+    bboxes = center_to_minmax_2d(bboxes_center[:, :2], bboxes_center[:, 2:])
+    return bboxes
+
+
+@numba.jit(nopython=True)
+def iou_jit(boxes, query_boxes, mode='iou', eps=0.0):
+    """calculate box iou. note that jit version runs ~10x faster than the
+    box_overlaps function in mmdet3d.core.evaluation
+    Parameters
+    ----------
+    boxes: (N, 4) ndarray of float
+    query_boxes: (K, 4) ndarray of float
+    Returns
+    -------
+    overlaps: (N, K) ndarray of overlap between boxes and query_boxes
+    """
+    N = boxes.shape[0]
+    K = query_boxes.shape[0]
+    overlaps = np.zeros((N, K), dtype=boxes.dtype)
+    for k in range(K):
+        box_area = ((query_boxes[k, 2] - query_boxes[k, 0] + eps) *
+                    (query_boxes[k, 3] - query_boxes[k, 1] + eps))
+        for n in range(N):
+            iw = (
+                min(boxes[n, 2], query_boxes[k, 2]) -
+                max(boxes[n, 0], query_boxes[k, 0]) + eps)
+            if iw > 0:
+                ih = (
+                    min(boxes[n, 3], query_boxes[k, 3]) -
+                    max(boxes[n, 1], query_boxes[k, 1]) + eps)
+                if ih > 0:
+                    if mode == 'iou':
+                        ua = ((boxes[n, 2] - boxes[n, 0] + eps) *
+                              (boxes[n, 3] - boxes[n, 1] + eps) + box_area -
+                              iw * ih)
+                    else:
+                        ua = ((boxes[n, 2] - boxes[n, 0] + eps) *
+                              (boxes[n, 3] - boxes[n, 1] + eps))
+                    overlaps[n, k] = iw * ih / ua
+    return overlaps
+
+
+def change_box3d_center_(box3d, src, dst):
+    dst = np.array(dst, dtype=box3d.dtype)
+    src = np.array(src, dtype=box3d.dtype)
+    box3d[..., :3] += box3d[..., 3:6] * (dst - src)
+
+
+def projection_matrix_to_CRT_kitti(proj):
+    # P = C @ [R|T]
+    # C is upper triangular matrix, so we need to inverse CR and use QR
+    # stable for all kitti camera projection matrix
+    CR = proj[0:3, 0:3]
+    CT = proj[0:3, 3]
+    RinvCinv = np.linalg.inv(CR)
+    Rinv, Cinv = np.linalg.qr(RinvCinv)
+    C = np.linalg.inv(Cinv)
+    R = np.linalg.inv(Rinv)
+    T = Cinv @ CT
+    return C, R, T
+
+
+def remove_outside_points(points, rect, Trv2c, P2, image_shape):
+    # 5x faster than remove_outside_points_v1(2ms vs 10ms)
+    C, R, T = projection_matrix_to_CRT_kitti(P2)
+    image_bbox = [0, 0, image_shape[1], image_shape[0]]
+    frustum = get_frustum(image_bbox, C)
+    frustum -= T
+    frustum = np.linalg.inv(R) @ frustum.T
+    frustum = camera_to_lidar(frustum.T, rect, Trv2c)
+    frustum_surfaces = corner_to_surfaces_3d_jit(frustum[np.newaxis, ...])
+    indices = points_in_convex_polygon_3d_jit(points[:, :3], frustum_surfaces)
+    points = points[indices.reshape([-1])]
+    return points
+
+
+def get_frustum(bbox_image, C, near_clip=0.001, far_clip=100):
+    fku = C[0, 0]
+    fkv = -C[1, 1]
+    u0v0 = C[0:2, 2]
+    z_points = np.array(
+        [near_clip] * 4 + [far_clip] * 4, dtype=C.dtype)[:, np.newaxis]
+    b = bbox_image
+    box_corners = np.array(
+        [[b[0], b[1]], [b[0], b[3]], [b[2], b[3]], [b[2], b[1]]],
+        dtype=C.dtype)
+    near_box_corners = (box_corners - u0v0) / np.array(
+        [fku / near_clip, -fkv / near_clip], dtype=C.dtype)
+    far_box_corners = (box_corners - u0v0) / np.array(
+        [fku / far_clip, -fkv / far_clip], dtype=C.dtype)
+    ret_xy = np.concatenate([near_box_corners, far_box_corners],
+                            axis=0)  # [8, 2]
+    ret_xyz = np.concatenate([ret_xy, z_points], axis=1)
+    return ret_xyz
+
+
+def surface_equ_3d(polygon_surfaces):
+    # return [a, b, c], d in ax+by+cz+d=0
+    # polygon_surfaces: [num_polygon, num_surfaces, num_points_of_polygon, 3]
+    surface_vec = polygon_surfaces[:, :, :2, :] - polygon_surfaces[:, :,
+                                                                   1:3, :]
+    # normal_vec: [..., 3]
+    normal_vec = np.cross(surface_vec[:, :, 0, :], surface_vec[:, :, 1, :])
+    # print(normal_vec.shape, points[..., 0, :].shape)
+    # d = -np.inner(normal_vec, points[..., 0, :])
+    d = np.einsum('aij, aij->ai', normal_vec, polygon_surfaces[:, :, 0, :])
+    return normal_vec, -d
+
+
+@numba.njit
+def _points_in_convex_polygon_3d_jit(points, polygon_surfaces, normal_vec, d,
+                                     num_surfaces):
+    max_num_surfaces, max_num_points_of_surface = polygon_surfaces.shape[1:3]
+    num_points = points.shape[0]
+    num_polygons = polygon_surfaces.shape[0]
+    ret = np.ones((num_points, num_polygons), dtype=np.bool_)
+    sign = 0.0
+    for i in range(num_points):
+        for j in range(num_polygons):
+            for k in range(max_num_surfaces):
+                if k > num_surfaces[j]:
+                    break
+                sign = (
+                    points[i, 0] * normal_vec[j, k, 0] +
+                    points[i, 1] * normal_vec[j, k, 1] +
+                    points[i, 2] * normal_vec[j, k, 2] + d[j, k])
+                if sign >= 0:
+                    ret[i, j] = False
+                    break
+    return ret
+
+
+def points_in_convex_polygon_3d_jit(points,
+                                    polygon_surfaces,
+                                    num_surfaces=None):
+    """check points is in 3d convex polygons.
+    Args:
+        points: [num_points, 3] array.
+        polygon_surfaces: [num_polygon, max_num_surfaces,
+            max_num_points_of_surface, 3]
+            array. all surfaces' normal vector must direct to internal.
+            max_num_points_of_surface must at least 3.
+        num_surfaces: [num_polygon] array. indicate how many surfaces
+            a polygon contain
+    Returns:
+        [num_points, num_polygon] bool array.
+    """
+    max_num_surfaces, max_num_points_of_surface = polygon_surfaces.shape[1:3]
+    # num_points = points.shape[0]
+    num_polygons = polygon_surfaces.shape[0]
+    if num_surfaces is None:
+        num_surfaces = np.full((num_polygons, ), 9999999, dtype=np.int64)
+    normal_vec, d = surface_equ_3d(polygon_surfaces[:, :, :3, :])
+    # normal_vec: [num_polygon, max_num_surfaces, 3]
+    # d: [num_polygon, max_num_surfaces]
+    return _points_in_convex_polygon_3d_jit(points, polygon_surfaces,
+                                            normal_vec, d, num_surfaces)
+
+
+@numba.jit
+def points_in_convex_polygon_jit(points, polygon, clockwise=True):
+    """check points is in 2d convex polygons. True when point in polygon
+    Args:
+        points: [num_points, 2] array.
+        polygon: [num_polygon, num_points_of_polygon, 2] array.
+        clockwise: bool. indicate polygon is clockwise.
+    Returns:
+        [num_points, num_polygon] bool array.
+    """
+    # first convert polygon to directed lines
+    num_points_of_polygon = polygon.shape[1]
+    num_points = points.shape[0]
+    num_polygons = polygon.shape[0]
+    # if clockwise:
+    #     vec1 = polygon - polygon[:, [num_points_of_polygon - 1] +
+    #                              list(range(num_points_of_polygon - 1)), :]
+    # else:
+    #     vec1 = polygon[:, [num_points_of_polygon - 1] +
+    #                    list(range(num_points_of_polygon - 1)), :] - polygon
+    # vec1: [num_polygon, num_points_of_polygon, 2]
+    vec1 = np.zeros((2), dtype=polygon.dtype)
+    ret = np.zeros((num_points, num_polygons), dtype=np.bool_)
+    success = True
+    cross = 0.0
+    for i in range(num_points):
+        for j in range(num_polygons):
+            success = True
+            for k in range(num_points_of_polygon):
+                if clockwise:
+                    vec1 = polygon[j, k] - polygon[j, k - 1]
+                else:
+                    vec1 = polygon[j, k - 1] - polygon[j, k]
+                cross = vec1[1] * (polygon[j, k, 0] - points[i, 0])
+                cross -= vec1[0] * (polygon[j, k, 1] - points[i, 1])
+                if cross >= 0:
+                    success = False
+                    break
+            ret[i, j] = success
+    return ret
--- a/mmdet3d/core/bbox/box_torch_ops.py
+++ b/mmdet3d/core/bbox/box_torch_ops.py
+import numpy as np
+import torch
+
+
+def limit_period(val, offset=0.5, period=np.pi):
+    return val - torch.floor(val / period + offset) * period
+
+
+def corners_nd(dims, origin=0.5):
+    """generate relative box corners based on length per dim and
+    origin point.
+
+    Args:
+        dims (float array, shape=[N, ndim]): array of length per dim
+        origin (list or array or float): origin point relate to smallest point.
+
+    Returns:
+        float array, shape=[N, 2 ** ndim, ndim]: returned corners.
+        point layout example: (2d) x0y0, x0y1, x1y0, x1y1;
+            (3d) x0y0z0, x0y0z1, x0y1z0, x0y1z1, x1y0z0, x1y0z1, x1y1z0, x1y1z1
+            where x0 < x1, y0 < y1, z0 < z1
+    """
+    ndim = int(dims.shape[1])
+    corners_norm = np.stack(
+        np.unravel_index(np.arange(2**ndim), [2] * ndim),
+        axis=1).astype(dims.dtype)
+    # now corners_norm has format: (2d) x0y0, x0y1, x1y0, x1y1
+    # (3d) x0y0z0, x0y0z1, x0y1z0, x0y1z1, x1y0z0, x1y0z1, x1y1z0, x1y1z1
+    # so need to convert to a format which is convenient to do other computing.
+    # for 2d boxes, format is clockwise start with minimum point
+    # for 3d boxes, please draw lines by your hand.
+    if ndim == 2:
+        # generate clockwise box corners
+        corners_norm = corners_norm[[0, 1, 3, 2]]
+    elif ndim == 3:
+        corners_norm = corners_norm[[0, 1, 3, 2, 4, 5, 7, 6]]
+    corners_norm = corners_norm - np.array(origin, dtype=dims.dtype)
+    corners = dims.reshape([-1, 1, ndim]) * corners_norm.reshape(
+        [1, 2**ndim, ndim])
+    return corners
+
+
+def rotation_3d_in_axis(points, angles, axis=0):
+    # points: [N, point_size, 3]
+    # angles: [N]
+    rot_sin = torch.sin(angles)
+    rot_cos = torch.cos(angles)
+    ones = torch.ones_like(rot_cos)
+    zeros = torch.zeros_like(rot_cos)
+    if axis == 1:
+        rot_mat_T = torch.stack([
+            torch.stack([rot_cos, zeros, -rot_sin]),
+            torch.stack([zeros, ones, zeros]),
+            torch.stack([rot_sin, zeros, rot_cos])
+        ])
+    elif axis == 2 or axis == -1:
+        rot_mat_T = torch.stack([
+            torch.stack([rot_cos, -rot_sin, zeros]),
+            torch.stack([rot_sin, rot_cos, zeros]),
+            torch.stack([zeros, zeros, ones])
+        ])
+    elif axis == 0:
+        rot_mat_T = torch.stack([
+            torch.stack([zeros, rot_cos, -rot_sin]),
+            torch.stack([zeros, rot_sin, rot_cos]),
+            torch.stack([ones, zeros, zeros])
+        ])
+    else:
+        raise ValueError('axis should in range')
+
+    return torch.einsum('aij,jka->aik', (points, rot_mat_T))
+
+
+def center_to_corner_box3d(centers,
+                           dims,
+                           angles,
+                           origin=[0.5, 1.0, 0.5],
+                           axis=1):
+    """convert kitti locations, dimensions and angles to corners
+
+    Args:
+        centers (float array, shape=[N, 3]): locations in kitti label file.
+        dims (float array, shape=[N, 3]): dimensions in kitti label file.
+        angles (float array, shape=[N]): rotation_y in kitti label file.
+        origin (list or array or float): origin point relate to smallest point.
+            use [0.5, 1.0, 0.5] in camera and [0.5, 0.5, 0] in lidar.
+        axis (int): rotation axis. 1 for camera and 2 for lidar.
+    Returns:
+        [type]: [description]
+    """
+    # 'length' in kitti format is in x axis.
+    # yzx(hwl)(kitti label file)<->xyz(lhw)(camera)<->z(-x)(-y)(wlh)(lidar)
+    # center in kitti format is [0.5, 1.0, 0.5] in xyz.
+    corners = corners_nd(dims, origin=origin)
+    # corners: [N, 8, 3]
+    corners = rotation_3d_in_axis(corners, angles, axis=axis)
+    corners += centers.view(-1, 1, 3)
+    return corners
+
+
+def lidar_to_camera(points, r_rect, velo2cam):
+    num_points = points.shape[0]
+    points = torch.cat(
+        [points, torch.ones(num_points, 1).type_as(points)], dim=-1)
+    camera_points = points @ (r_rect @ velo2cam).t()
+    return camera_points[..., :3]
+
+
+def box_lidar_to_camera(data, r_rect, velo2cam):
+    xyz_lidar = data[..., 0:3]
+    w, l, h = data[..., 3:4], data[..., 4:5], data[..., 5:6]
+    r = data[..., 6:7]
+    xyz = lidar_to_camera(xyz_lidar, r_rect, velo2cam)
+    return torch.cat([xyz, l, h, w, r], dim=-1)
+
+
+def project_to_image(points_3d, proj_mat):
+    points_num = list(points_3d.shape)[:-1]
+    points_shape = np.concatenate([points_num, [1]], axis=0).tolist()
+    # previous implementation use new_zeros, new_one yeilds better results
+    points_4 = torch.cat(
+        [points_3d, points_3d.new_ones(*points_shape)], dim=-1)
+    # point_2d = points_4 @ tf.transpose(proj_mat, [1, 0])
+    point_2d = torch.matmul(points_4, proj_mat.t())
+    point_2d_res = point_2d[..., :2] / point_2d[..., 2:3]
+    return point_2d_res
+
+
+def rbbox2d_to_near_bbox(rbboxes):
+    """convert rotated bbox to nearest 'standing' or 'lying' bbox.
+
+    Args:
+        rbboxes: [N, 5(x, y, xdim, ydim, rad)] rotated bboxes
+    Returns:
+        bboxes: [N, 4(xmin, ymin, xmax, ymax)] bboxes
+    """
+    rots = rbboxes[..., -1]
+    rots_0_pi_div_2 = torch.abs(limit_period(rots, 0.5, np.pi))
+    cond = (rots_0_pi_div_2 > np.pi / 4)[..., None]
+    bboxes_center = torch.where(cond, rbboxes[:, [0, 1, 3, 2]], rbboxes[:, :4])
+    bboxes = center_to_minmax_2d(bboxes_center[:, :2], bboxes_center[:, 2:])
+    return bboxes
+
+
+def center_to_minmax_2d_0_5(centers, dims):
+    return torch.cat([centers - dims / 2, centers + dims / 2], dim=-1)
+
+
+def center_to_minmax_2d(centers, dims, origin=0.5):
+    if origin == 0.5:
+        return center_to_minmax_2d_0_5(centers, dims)
+    corners = center_to_corner_box2d(centers, dims, origin=origin)
+    return corners[:, [0, 2]].reshape([-1, 4])
+
+
+def center_to_corner_box2d(centers, dims, angles=None, origin=0.5):
+    """convert kitti locations, dimensions and angles to corners.
+    format: center(xy), dims(xy), angles(clockwise when positive)
+
+    Args:
+        centers (float array, shape=[N, 2]): locations in kitti label file.
+        dims (float array, shape=[N, 2]): dimensions in kitti label file.
+        angles (float array, shape=[N]): rotation_y in kitti label file.
+
+    Returns:
+        [type]: [description]
+    """
+    # 'length' in kitti format is in x axis.
+    # xyz(hwl)(kitti label file)<->xyz(lhw)(camera)<->z(-x)(-y)(wlh)(lidar)
+    # center in kitti format is [0.5, 1.0, 0.5] in xyz.
+    corners = corners_nd(dims, origin=origin)
+    # corners: [N, 4, 2]
+    if angles is not None:
+        corners = rotation_2d(corners, angles)
+    corners += centers.reshape([-1, 1, 2])
+    return corners
+
+
+def rotation_2d(points, angles):
+    """rotation 2d points based on origin point clockwise when angle positive.
+
+    Args:
+        points (float array, shape=[N, point_size, 2]): points to be rotated.
+        angles (float array, shape=[N]): rotation angle.
+
+    Returns:
+        float array: same shape as points
+    """
+    rot_sin = torch.sin(angles)
+    rot_cos = torch.cos(angles)
+    rot_mat_T = torch.stack([[rot_cos, -rot_sin], [rot_sin, rot_cos]])
+    return torch.einsum('aij,jka->aik', points, rot_mat_T)
--- a/mmdet3d/core/bbox/coders/__init__.py
+++ b/mmdet3d/core/bbox/coders/__init__.py
+from .box_coder import ResidualCoder
+
+__all__ = ['ResidualCoder']
--- a/mmdet3d/core/bbox/coders/box_coder.py
+++ b/mmdet3d/core/bbox/coders/box_coder.py
+import numpy as np
+import torch
+
+
+class ResidualCoder(object):
+
+    def __init__(self, code_size=7, mean=None, std=None):
+        super().__init__()
+        self.code_size = code_size
+        self.mean = mean
+        self.std = std
+
+    @staticmethod
+    def encode_np(boxes, anchors):
+        """
+        :param boxes: (N, 7) x, y, z, w, l, h, r
+        :param anchors: (N, 7)
+        :return:
+        """
+        # need to convert boxes to z-center format
+        xa, ya, za, wa, la, ha, ra = np.split(anchors, 7, axis=-1)
+        xg, yg, zg, wg, lg, hg, rg = np.split(boxes, 7, axis=-1)
+        zg = zg + hg / 2
+        za = za + ha / 2
+        diagonal = np.sqrt(la**2 + wa**2)  # 4.3
+        xt = (xg - xa) / diagonal
+        yt = (yg - ya) / diagonal
+        zt = (zg - za) / ha  # 1.6
+        lt = np.log(lg / la)
+        wt = np.log(wg / wa)
+        ht = np.log(hg / ha)
+        rt = rg - ra
+        return np.concatenate([xt, yt, zt, wt, lt, ht, rt], axis=-1)
+
+    @staticmethod
+    def decode_np(box_encodings, anchors):
+        """
+        :param box_encodings: (N, 7) x, y, z, w, l, h, r
+        :param anchors: (N, 7)
+        :return:
+        """
+        # need to convert box_encodings to z-bottom format
+        xa, ya, za, wa, la, ha, ra = np.split(anchors, 7, axis=-1)
+        xt, yt, zt, wt, lt, ht, rt = np.split(box_encodings, 7, axis=-1)
+
+        za = za + ha / 2
+        diagonal = np.sqrt(la**2 + wa**2)
+        xg = xt * diagonal + xa
+        yg = yt * diagonal + ya
+        zg = zt * ha + za
+
+        lg = np.exp(lt) * la
+        wg = np.exp(wt) * wa
+        hg = np.exp(ht) * ha
+        rg = rt + ra
+        zg = zg - hg / 2
+        return np.concatenate([xg, yg, zg, wg, lg, hg, rg], axis=-1)
+
+    @staticmethod
+    def encode_torch(anchors, boxes, means, stds):
+        """
+        :param boxes: (N, 7+n) x, y, z, w, l, h, r, velo*
+        :param anchors: (N, 7+n)
+        :return:
+        """
+        box_ndim = anchors.shape[-1]
+        cas, cgs, cts = [], [], []
+        if box_ndim > 7:
+            xa, ya, za, wa, la, ha, ra, *cas = torch.split(anchors, 1, dim=-1)
+            xg, yg, zg, wg, lg, hg, rg, *cgs = torch.split(boxes, 1, dim=-1)
+            cts = [g - a for g, a in zip(cgs, cas)]
+        else:
+            xa, ya, za, wa, la, ha, ra = torch.split(anchors, 1, dim=-1)
+            xg, yg, zg, wg, lg, hg, rg = torch.split(boxes, 1, dim=-1)
+        za = za + ha / 2
+        zg = zg + hg / 2
+        diagonal = torch.sqrt(la**2 + wa**2)
+        xt = (xg - xa) / diagonal
+        yt = (yg - ya) / diagonal
+        zt = (zg - za) / ha
+        lt = torch.log(lg / la)
+        wt = torch.log(wg / wa)
+        ht = torch.log(hg / ha)
+        rt = rg - ra
+        return torch.cat([xt, yt, zt, wt, lt, ht, rt, *cts], dim=-1)
+
+    @staticmethod
+    def decode_torch(anchors, box_encodings, means, stds):
+        """
+        :param box_encodings: (N, 7 + n) x, y, z, w, l, h, r
+        :param anchors: (N, 7)
+        :return:
+        """
+        cas, cts = [], []
+        box_ndim = anchors.shape[-1]
+        if box_ndim > 7:
+            xa, ya, za, wa, la, ha, ra, *cas = torch.split(anchors, 1, dim=-1)
+            xt, yt, zt, wt, lt, ht, rt, *cts = torch.split(
+                box_encodings, 1, dim=-1)
+        else:
+            xa, ya, za, wa, la, ha, ra = torch.split(anchors, 1, dim=-1)
+            xt, yt, zt, wt, lt, ht, rt = torch.split(box_encodings, 1, dim=-1)
+
+        za = za + ha / 2
+        diagonal = torch.sqrt(la**2 + wa**2)
+        xg = xt * diagonal + xa
+        yg = yt * diagonal + ya
+        zg = zt * ha + za
+
+        lg = torch.exp(lt) * la
+        wg = torch.exp(wt) * wa
+        hg = torch.exp(ht) * ha
+        rg = rt + ra
+        zg = zg - hg / 2
+        cgs = [t + a for t, a in zip(cts, cas)]
+        return torch.cat([xg, yg, zg, wg, lg, hg, rg, *cgs], dim=-1)
--- a/mmdet3d/core/bbox/geometry.py
+++ b/mmdet3d/core/bbox/geometry.py
+import torch
+
+from mmdet3d.ops.iou3d import boxes_iou3d_gpu
+from . import box_torch_ops
+
+
+def bbox_overlaps_2d(bboxes1, bboxes2, mode='iou', is_aligned=False):
+    """Calculate overlap between two set of bboxes.
+
+    If ``is_aligned`` is ``False``, then calculate the ious between each bbox
+    of bboxes1 and bboxes2, otherwise the ious between each aligned pair of
+    bboxes1 and bboxes2.
+
+    Args:
+        bboxes1 (Tensor): shape (m, 4) in <x1, y1, x2, y2> format.
+        bboxes2 (Tensor): shape (n, 4) in <x1, y1, x2, y2> format.
+            If is_aligned is ``True``, then m and n must be equal.
+        mode (str): "iou" (intersection over union) or iof (intersection over
+            foreground).
+
+    Returns:
+        ious(Tensor): shape (m, n) if is_aligned == False else shape (m, 1)
+
+    Example:
+        >>> bboxes1 = torch.FloatTensor([
+        >>>     [0, 0, 10, 10],
+        >>>     [10, 10, 20, 20],
+        >>>     [32, 32, 38, 42],
+        >>> ])
+        >>> bboxes2 = torch.FloatTensor([
+        >>>     [0, 0, 10, 20],
+        >>>     [0, 10, 10, 19],
+        >>>     [10, 10, 20, 20],
+        >>> ])
+        >>> bbox_overlaps(bboxes1, bboxes2)
+        tensor([[0.5238, 0.0500, 0.0041],
+                [0.0323, 0.0452, 1.0000],
+                [0.0000, 0.0000, 0.0000]])
+
+    Example:
+        >>> empty = torch.FloatTensor([])
+        >>> nonempty = torch.FloatTensor([
+        >>>     [0, 0, 10, 9],
+        >>> ])
+        >>> assert tuple(bbox_overlaps(empty, nonempty).shape) == (0, 1)
+        >>> assert tuple(bbox_overlaps(nonempty, empty).shape) == (1, 0)
+        >>> assert tuple(bbox_overlaps(empty, empty).shape) == (0, 0)
+    """
+
+    assert mode in ['iou', 'iof']
+
+    rows = bboxes1.size(0)
+    cols = bboxes2.size(0)
+    if is_aligned:
+        assert rows == cols
+
+    if rows * cols == 0:
+        return bboxes1.new(rows, 1) if is_aligned else bboxes1.new(rows, cols)
+
+    if is_aligned:
+        lt = torch.max(bboxes1[:, :2], bboxes2[:, :2])  # [rows, 2]
+        rb = torch.min(bboxes1[:, 2:], bboxes2[:, 2:])  # [rows, 2]
+
+        wh = (rb - lt).clamp(min=0)  # [rows, 2]
+        overlap = wh[:, 0] * wh[:, 1]
+        area1 = (bboxes1[:, 2] - bboxes1[:, 0]) * (
+            bboxes1[:, 3] - bboxes1[:, 1])
+
+        if mode == 'iou':
+            area2 = (bboxes2[:, 2] - bboxes2[:, 0]) * (
+                bboxes2[:, 3] - bboxes2[:, 1])
+            ious = overlap / (area1 + area2 - overlap)
+        else:
+            ious = overlap / area1
+    else:
+        lt = torch.max(bboxes1[:, None, :2], bboxes2[:, :2])  # [rows, cols, 2]
+        rb = torch.min(bboxes1[:, None, 2:], bboxes2[:, 2:])  # [rows, cols, 2]
+
+        wh = (rb - lt).clamp(min=0)  # [rows, cols, 2]
+        overlap = wh[:, :, 0] * wh[:, :, 1]
+        area1 = (bboxes1[:, 2] - bboxes1[:, 0]) * (
+            bboxes1[:, 3] - bboxes1[:, 1])
+
+        if mode == 'iou':
+            area2 = (bboxes2[:, 2] - bboxes2[:, 0]) * (
+                bboxes2[:, 3] - bboxes2[:, 1])
+            ious = overlap / (area1[:, None] + area2 - overlap)
+        else:
+            ious = overlap / (area1[:, None])
+
+    return ious
+
+
+def bbox_overlaps_3d(bboxes1, bboxes2, mode='iou'):
+    '''
+
+    :param bboxes1: Tensor, shape (N, 7) [x, y, z, h, w, l, ry]
+    :param bboxes2: Tensor, shape (M, 7) [x, y, z, h, w, l, ry]
+    :param mode: mode (str): "iou" (intersection over union) or
+            iof (intersection over foreground).
+    :return: iou: (M, N) not support aligned mode currently
+    '''
+    # TODO: check the input dimension meanings,
+    #  this is inconsistent with that in bbox_overlaps_nearest_3d
+    return boxes_iou3d_gpu(bboxes1, bboxes2, mode)
+
+
+def bbox_overlaps_nearest_3d(bboxes1, bboxes2, mode='iou', is_aligned=False):
+    '''
+    :param bboxes1: Tensor, shape (N, 7) [x, y, z, h, w, l, ry]?
+    :param bboxes2: Tensor, shape (M, 7) [x, y, z, h, w, l, ry]?
+    :param mode: mode (str): "iou" (intersection over union) or iof
+            (intersection over foreground).
+    :return: iou: (M, N) not support aligned mode currently
+    rbboxes: [N, 5(x, y, xdim, ydim, rad)] rotated bboxes
+    '''
+    # TODO: check the input dimension meanings,
+    # this is inconsistent with that in bbox_overlaps_3d
+    rbboxes1_np = bboxes1.index_select(
+        dim=-1, index=bboxes1.new_tensor([0, 1, 3, 4, 6]).long())
+    rbboxes2_np = bboxes2.index_select(
+        dim=-1, index=bboxes1.new_tensor([0, 1, 3, 4, 6]).long())
+
+    # Change the bboxes to bev
+    # box conversion and iou calculation in torch version on CUDA
+    # is 10x faster than that in numpy version
+    bboxes1_bv = box_torch_ops.rbbox2d_to_near_bbox(rbboxes1_np)
+    bboxes2_bv = box_torch_ops.rbbox2d_to_near_bbox(rbboxes2_np)
+    ret = bbox_overlaps_2d(
+        bboxes1_bv, bboxes2_bv, mode=mode, is_aligned=is_aligned)
+    return ret
--- a/mmdet3d/core/bbox/samplers/__init__.py
+++ b/mmdet3d/core/bbox/samplers/__init__.py
+from .base_sampler import BaseSampler
+from .combined_sampler import CombinedSampler
+from .instance_balanced_pos_sampler import InstanceBalancedPosSampler
+from .iou_balanced_neg_sampler import IoUBalancedNegSampler
+from .ohem_sampler import OHEMSampler
+from .pseudo_sampler import PseudoSampler
+from .random_sampler import RandomSampler
+from .sampling_result import SamplingResult
+
+__all__ = [
+    'BaseSampler', 'PseudoSampler', 'RandomSampler',
+    'InstanceBalancedPosSampler', 'IoUBalancedNegSampler', 'CombinedSampler',
+    'OHEMSampler', 'SamplingResult'
+]
--- a/mmdet3d/core/bbox/samplers/base_sampler.py
+++ b/mmdet3d/core/bbox/samplers/base_sampler.py
+from abc import ABCMeta, abstractmethod
+
+import torch
+
+from .sampling_result import SamplingResult
+
+
+class BaseSampler(metaclass=ABCMeta):
+
+    def __init__(self,
+                 num,
+                 pos_fraction,
+                 neg_pos_ub=-1,
+                 add_gt_as_proposals=True,
+                 **kwargs):
+        self.num = num
+        self.pos_fraction = pos_fraction
+        self.neg_pos_ub = neg_pos_ub
+        self.add_gt_as_proposals = add_gt_as_proposals
+        self.pos_sampler = self
+        self.neg_sampler = self
+
+    @abstractmethod
+    def _sample_pos(self, assign_result, num_expected, **kwargs):
+        pass
+
+    @abstractmethod
+    def _sample_neg(self, assign_result, num_expected, **kwargs):
+        pass
+
+    def sample(self,
+               assign_result,
+               bboxes,
+               gt_bboxes,
+               gt_labels=None,
+               **kwargs):
+        """Sample positive and negative bboxes.
+
+        This is a simple implementation of bbox sampling given candidates,
+        assigning results and ground truth bboxes.
+
+        Args:
+            assign_result (:obj:`AssignResult`): Bbox assigning results.
+            bboxes (Tensor): Boxes to be sampled from.
+            gt_bboxes (Tensor): Ground truth bboxes.
+            gt_labels (Tensor, optional): Class labels of ground truth bboxes.
+
+        Returns:
+            :obj:`SamplingResult`: Sampling result.
+        """
+        bboxes = bboxes[:, :4]
+
+        gt_flags = bboxes.new_zeros((bboxes.shape[0], ), dtype=torch.uint8)
+        if self.add_gt_as_proposals:
+            bboxes = torch.cat([gt_bboxes, bboxes], dim=0)
+            assign_result.add_gt_(gt_labels)
+            gt_ones = bboxes.new_ones(gt_bboxes.shape[0], dtype=torch.uint8)
+            gt_flags = torch.cat([gt_ones, gt_flags])
+
+        num_expected_pos = int(self.num * self.pos_fraction)
+        pos_inds = self.pos_sampler._sample_pos(
+            assign_result, num_expected_pos, bboxes=bboxes, **kwargs)
+        # We found that sampled indices have duplicated items occasionally.
+        # (may be a bug of PyTorch)
+        pos_inds = pos_inds.unique()
+        num_sampled_pos = pos_inds.numel()
+        num_expected_neg = self.num - num_sampled_pos
+        if self.neg_pos_ub >= 0:
+            _pos = max(1, num_sampled_pos)
+            neg_upper_bound = int(self.neg_pos_ub * _pos)
+            if num_expected_neg > neg_upper_bound:
+                num_expected_neg = neg_upper_bound
+        neg_inds = self.neg_sampler._sample_neg(
+            assign_result, num_expected_neg, bboxes=bboxes, **kwargs)
+        neg_inds = neg_inds.unique()
+
+        return SamplingResult(pos_inds, neg_inds, bboxes, gt_bboxes,
+                              assign_result, gt_flags)
--- a/mmdet3d/core/bbox/samplers/combined_sampler.py
+++ b/mmdet3d/core/bbox/samplers/combined_sampler.py
+from ..assign_sampling import build_sampler
+from .base_sampler import BaseSampler
+
+
+class CombinedSampler(BaseSampler):
+
+    def __init__(self, pos_sampler, neg_sampler, **kwargs):
+        super(CombinedSampler, self).__init__(**kwargs)
+        self.pos_sampler = build_sampler(pos_sampler, **kwargs)
+        self.neg_sampler = build_sampler(neg_sampler, **kwargs)
+
+    def _sample_pos(self, **kwargs):
+        raise NotImplementedError
+
+    def _sample_neg(self, **kwargs):
+        raise NotImplementedError
--- a/mmdet3d/core/bbox/samplers/instance_balanced_pos_sampler.py
+++ b/mmdet3d/core/bbox/samplers/instance_balanced_pos_sampler.py
+import numpy as np
+import torch
+
+from .random_sampler import RandomSampler
+
+
+class InstanceBalancedPosSampler(RandomSampler):
+
+    def _sample_pos(self, assign_result, num_expected, **kwargs):
+        pos_inds = torch.nonzero(assign_result.gt_inds > 0)
+        if pos_inds.numel() != 0:
+            pos_inds = pos_inds.squeeze(1)
+        if pos_inds.numel() <= num_expected:
+            return pos_inds
+        else:
+            unique_gt_inds = assign_result.gt_inds[pos_inds].unique()
+            num_gts = len(unique_gt_inds)
+            num_per_gt = int(round(num_expected / float(num_gts)) + 1)
+            sampled_inds = []
+            for i in unique_gt_inds:
+                inds = torch.nonzero(assign_result.gt_inds == i.item())
+                if inds.numel() != 0:
+                    inds = inds.squeeze(1)
+                else:
+                    continue
+                if len(inds) > num_per_gt:
+                    inds = self.random_choice(inds, num_per_gt)
+                sampled_inds.append(inds)
+            sampled_inds = torch.cat(sampled_inds)
+            if len(sampled_inds) < num_expected:
+                num_extra = num_expected - len(sampled_inds)
+                extra_inds = np.array(
+                    list(set(pos_inds.cpu()) - set(sampled_inds.cpu())))
+                if len(extra_inds) > num_extra:
+                    extra_inds = self.random_choice(extra_inds, num_extra)
+                extra_inds = torch.from_numpy(extra_inds).to(
+                    assign_result.gt_inds.device).long()
+                sampled_inds = torch.cat([sampled_inds, extra_inds])
+            elif len(sampled_inds) > num_expected:
+                sampled_inds = self.random_choice(sampled_inds, num_expected)
+            return sampled_inds
--- a/mmdet3d/core/bbox/samplers/iou_balanced_neg_sampler.py
+++ b/mmdet3d/core/bbox/samplers/iou_balanced_neg_sampler.py
+import numpy as np
+import torch
+
+from .random_sampler import RandomSampler
+
+
+class IoUBalancedNegSampler(RandomSampler):
+    """IoU Balanced Sampling
+
+    arXiv: https://arxiv.org/pdf/1904.02701.pdf (CVPR 2019)
+
+    Sampling proposals according to their IoU. `floor_fraction` of needed RoIs
+    are sampled from proposals whose IoU are lower than `floor_thr` randomly.
+    The others are sampled from proposals whose IoU are higher than
+    `floor_thr`. These proposals are sampled from some bins evenly, which are
+    split by `num_bins` via IoU evenly.
+
+    Args:
+        num (int): number of proposals.
+        pos_fraction (float): fraction of positive proposals.
+        floor_thr (float): threshold (minimum) IoU for IoU balanced sampling,
+            set to -1 if all using IoU balanced sampling.
+        floor_fraction (float): sampling fraction of proposals under floor_thr.
+        num_bins (int): number of bins in IoU balanced sampling.
+    """
+
+    def __init__(self,
+                 num,
+                 pos_fraction,
+                 floor_thr=-1,
+                 floor_fraction=0,
+                 num_bins=3,
+                 **kwargs):
+        super(IoUBalancedNegSampler, self).__init__(num, pos_fraction,
+                                                    **kwargs)
+        assert floor_thr >= 0 or floor_thr == -1
+        assert 0 <= floor_fraction <= 1
+        assert num_bins >= 1
+
+        self.floor_thr = floor_thr
+        self.floor_fraction = floor_fraction
+        self.num_bins = num_bins
+
+    def sample_via_interval(self, max_overlaps, full_set, num_expected):
+        max_iou = max_overlaps.max()
+        iou_interval = (max_iou - self.floor_thr) / self.num_bins
+        per_num_expected = int(num_expected / self.num_bins)
+
+        sampled_inds = []
+        for i in range(self.num_bins):
+            start_iou = self.floor_thr + i * iou_interval
+            end_iou = self.floor_thr + (i + 1) * iou_interval
+            tmp_set = set(
+                np.where(
+                    np.logical_and(max_overlaps >= start_iou,
+                                   max_overlaps < end_iou))[0])
+            tmp_inds = list(tmp_set & full_set)
+            if len(tmp_inds) > per_num_expected:
+                tmp_sampled_set = self.random_choice(tmp_inds,
+                                                     per_num_expected)
+            else:
+                tmp_sampled_set = np.array(tmp_inds, dtype=np.int)
+            sampled_inds.append(tmp_sampled_set)
+
+        sampled_inds = np.concatenate(sampled_inds)
+        if len(sampled_inds) < num_expected:
+            num_extra = num_expected - len(sampled_inds)
+            extra_inds = np.array(list(full_set - set(sampled_inds)))
+            if len(extra_inds) > num_extra:
+                extra_inds = self.random_choice(extra_inds, num_extra)
+            sampled_inds = np.concatenate([sampled_inds, extra_inds])
+
+        return sampled_inds
+
+    def _sample_neg(self, assign_result, num_expected, **kwargs):
+        neg_inds = torch.nonzero(assign_result.gt_inds == 0)
+        if neg_inds.numel() != 0:
+            neg_inds = neg_inds.squeeze(1)
+        if len(neg_inds) <= num_expected:
+            return neg_inds
+        else:
+            max_overlaps = assign_result.max_overlaps.cpu().numpy()
+            # balance sampling for negative samples
+            neg_set = set(neg_inds.cpu().numpy())
+
+            if self.floor_thr > 0:
+                floor_set = set(
+                    np.where(
+                        np.logical_and(max_overlaps >= 0,
+                                       max_overlaps < self.floor_thr))[0])
+                iou_sampling_set = set(
+                    np.where(max_overlaps >= self.floor_thr)[0])
+            elif self.floor_thr == 0:
+                floor_set = set(np.where(max_overlaps == 0)[0])
+                iou_sampling_set = set(
+                    np.where(max_overlaps > self.floor_thr)[0])
+            else:
+                floor_set = set()
+                iou_sampling_set = set(
+                    np.where(max_overlaps > self.floor_thr)[0])
+
+            floor_neg_inds = list(floor_set & neg_set)
+            iou_sampling_neg_inds = list(iou_sampling_set & neg_set)
+            num_expected_iou_sampling = int(num_expected *
+                                            (1 - self.floor_fraction))
+            if len(iou_sampling_neg_inds) > num_expected_iou_sampling:
+                if self.num_bins >= 2:
+                    iou_sampled_inds = self.sample_via_interval(
+                        max_overlaps, set(iou_sampling_neg_inds),
+                        num_expected_iou_sampling)
+                else:
+                    iou_sampled_inds = self.random_choice(
+                        iou_sampling_neg_inds, num_expected_iou_sampling)
+            else:
+                iou_sampled_inds = np.array(
+                    iou_sampling_neg_inds, dtype=np.int)
+            num_expected_floor = num_expected - len(iou_sampled_inds)
+            if len(floor_neg_inds) > num_expected_floor:
+                sampled_floor_inds = self.random_choice(
+                    floor_neg_inds, num_expected_floor)
+            else:
+                sampled_floor_inds = np.array(floor_neg_inds, dtype=np.int)
+            sampled_inds = np.concatenate(
+                (sampled_floor_inds, iou_sampled_inds))
+            if len(sampled_inds) < num_expected:
+                num_extra = num_expected - len(sampled_inds)
+                extra_inds = np.array(list(neg_set - set(sampled_inds)))
+                if len(extra_inds) > num_extra:
+                    extra_inds = self.random_choice(extra_inds, num_extra)
+                sampled_inds = np.concatenate((sampled_inds, extra_inds))
+            sampled_inds = torch.from_numpy(sampled_inds).long().to(
+                assign_result.gt_inds.device)
+            return sampled_inds
--- a/mmdet3d/core/bbox/samplers/ohem_sampler.py
+++ b/mmdet3d/core/bbox/samplers/ohem_sampler.py
+import torch
+
+from ..transforms import bbox2roi
+from .base_sampler import BaseSampler
+
+
+class OHEMSampler(BaseSampler):
+
+    def __init__(self,
+                 num,
+                 pos_fraction,
+                 context,
+                 neg_pos_ub=-1,
+                 add_gt_as_proposals=True,
+                 **kwargs):
+        super(OHEMSampler, self).__init__(num, pos_fraction, neg_pos_ub,
+                                          add_gt_as_proposals)
+        if not hasattr(context, 'num_stages'):
+            self.bbox_roi_extractor = context.bbox_roi_extractor
+            self.bbox_head = context.bbox_head
+        else:
+            self.bbox_roi_extractor = context.bbox_roi_extractor[
+                context.current_stage]
+            self.bbox_head = context.bbox_head[context.current_stage]
+
+    def hard_mining(self, inds, num_expected, bboxes, labels, feats):
+        with torch.no_grad():
+            rois = bbox2roi([bboxes])
+            bbox_feats = self.bbox_roi_extractor(
+                feats[:self.bbox_roi_extractor.num_inputs], rois)
+            cls_score, _ = self.bbox_head(bbox_feats)
+            loss = self.bbox_head.loss(
+                cls_score=cls_score,
+                bbox_pred=None,
+                labels=labels,
+                label_weights=cls_score.new_ones(cls_score.size(0)),
+                bbox_targets=None,
+                bbox_weights=None,
+                reduction_override='none')['loss_cls']
+            _, topk_loss_inds = loss.topk(num_expected)
+        return inds[topk_loss_inds]
+
+    def _sample_pos(self,
+                    assign_result,
+                    num_expected,
+                    bboxes=None,
+                    feats=None,
+                    **kwargs):
+        # Sample some hard positive samples
+        pos_inds = torch.nonzero(assign_result.gt_inds > 0)
+        if pos_inds.numel() != 0:
+            pos_inds = pos_inds.squeeze(1)
+        if pos_inds.numel() <= num_expected:
+            return pos_inds
+        else:
+            return self.hard_mining(pos_inds, num_expected, bboxes[pos_inds],
+                                    assign_result.labels[pos_inds], feats)
+
+    def _sample_neg(self,
+                    assign_result,
+                    num_expected,
+                    bboxes=None,
+                    feats=None,
+                    **kwargs):
+        # Sample some hard negative samples
+        neg_inds = torch.nonzero(assign_result.gt_inds == 0)
+        if neg_inds.numel() != 0:
+            neg_inds = neg_inds.squeeze(1)
+        if len(neg_inds) <= num_expected:
+            return neg_inds
+        else:
+            return self.hard_mining(neg_inds, num_expected, bboxes[neg_inds],
+                                    assign_result.labels[neg_inds], feats)
--- a/mmdet3d/core/bbox/samplers/pseudo_sampler.py
+++ b/mmdet3d/core/bbox/samplers/pseudo_sampler.py
+import torch
+
+from .base_sampler import BaseSampler
+from .sampling_result import SamplingResult
+
+
+class PseudoSampler(BaseSampler):
+
+    def __init__(self, **kwargs):
+        pass
+
+    def _sample_pos(self, **kwargs):
+        raise NotImplementedError
+
+    def _sample_neg(self, **kwargs):
+        raise NotImplementedError
+
+    def sample(self, assign_result, bboxes, gt_bboxes, **kwargs):
+        pos_inds = torch.nonzero(
+            assign_result.gt_inds > 0).squeeze(-1).unique()
+        neg_inds = torch.nonzero(
+            assign_result.gt_inds == 0).squeeze(-1).unique()
+        gt_flags = bboxes.new_zeros(bboxes.shape[0], dtype=torch.uint8)
+        sampling_result = SamplingResult(pos_inds, neg_inds, bboxes, gt_bboxes,
+                                         assign_result, gt_flags)
+        return sampling_result
--- a/mmdet3d/core/bbox/samplers/random_sampler.py
+++ b/mmdet3d/core/bbox/samplers/random_sampler.py
+import numpy as np
+import torch
+
+from .base_sampler import BaseSampler
+
+
+class RandomSampler(BaseSampler):
+
+    def __init__(self,
+                 num,
+                 pos_fraction,
+                 neg_pos_ub=-1,
+                 add_gt_as_proposals=True,
+                 **kwargs):
+        super(RandomSampler, self).__init__(num, pos_fraction, neg_pos_ub,
+                                            add_gt_as_proposals)
+
+    @staticmethod
+    def random_choice(gallery, num):
+        """Random select some elements from the gallery.
+
+        It seems that Pytorch's implementation is slower than numpy so we use
+        numpy to randperm the indices.
+        """
+        assert len(gallery) >= num
+        if isinstance(gallery, list):
+            gallery = np.array(gallery)
+        cands = np.arange(len(gallery))
+        np.random.shuffle(cands)
+        rand_inds = cands[:num]
+        if not isinstance(gallery, np.ndarray):
+            rand_inds = torch.from_numpy(rand_inds).long().to(gallery.device)
+        return gallery[rand_inds]
+
+    def _sample_pos(self, assign_result, num_expected, **kwargs):
+        """Randomly sample some positive samples."""
+        pos_inds = torch.nonzero(assign_result.gt_inds > 0)
+        if pos_inds.numel() != 0:
+            pos_inds = pos_inds.squeeze(1)
+        if pos_inds.numel() <= num_expected:
+            return pos_inds
+        else:
+            return self.random_choice(pos_inds, num_expected)
+
+    def _sample_neg(self, assign_result, num_expected, **kwargs):
+        """Randomly sample some negative samples."""
+        neg_inds = torch.nonzero(assign_result.gt_inds == 0)
+        if neg_inds.numel() != 0:
+            neg_inds = neg_inds.squeeze(1)
+        if len(neg_inds) <= num_expected:
+            return neg_inds
+        else:
+            return self.random_choice(neg_inds, num_expected)
--- a/mmdet3d/core/bbox/samplers/sampling_result.py
+++ b/mmdet3d/core/bbox/samplers/sampling_result.py
+import torch
+
+
+class SamplingResult(object):
+
+    def __init__(self, pos_inds, neg_inds, bboxes, gt_bboxes, assign_result,
+                 gt_flags):
+        self.pos_inds = pos_inds
+        self.neg_inds = neg_inds
+        self.pos_bboxes = bboxes[pos_inds]
+        self.neg_bboxes = bboxes[neg_inds]
+        self.pos_is_gt = gt_flags[pos_inds]
+
+        self.num_gts = gt_bboxes.shape[0]
+        self.pos_assigned_gt_inds = assign_result.gt_inds[pos_inds] - 1
+        self.pos_gt_bboxes = gt_bboxes[self.pos_assigned_gt_inds, :]
+        if assign_result.labels is not None:
+            self.pos_gt_labels = assign_result.labels[pos_inds]
+        else:
+            self.pos_gt_labels = None
+
+    @property
+    def bboxes(self):
+        return torch.cat([self.pos_bboxes, self.neg_bboxes])
--- a/mmdet3d/core/bbox/transforms.py
+++ b/mmdet3d/core/bbox/transforms.py
+import mmcv
+import numpy as np
+import torch
+
+
+def bbox2delta(proposals, gt, means=[0, 0, 0, 0], stds=[1, 1, 1, 1]):
+    assert proposals.size() == gt.size()
+
+    proposals = proposals.float()
+    gt = gt.float()
+    px = (proposals[..., 0] + proposals[..., 2]) * 0.5
+    py = (proposals[..., 1] + proposals[..., 3]) * 0.5
+    pw = proposals[..., 2] - proposals[..., 0]
+    ph = proposals[..., 3] - proposals[..., 1]
+
+    gx = (gt[..., 0] + gt[..., 2]) * 0.5
+    gy = (gt[..., 1] + gt[..., 3]) * 0.5
+    gw = gt[..., 2] - gt[..., 0]
+    gh = gt[..., 3] - gt[..., 1]
+
+    dx = (gx - px) / pw
+    dy = (gy - py) / ph
+    dw = torch.log(gw / pw)
+    dh = torch.log(gh / ph)
+    deltas = torch.stack([dx, dy, dw, dh], dim=-1)
+
+    means = deltas.new_tensor(means).unsqueeze(0)
+    stds = deltas.new_tensor(stds).unsqueeze(0)
+    deltas = deltas.sub_(means).div_(stds)
+
+    return deltas
+
+
+def delta2bbox(rois,
+               deltas,
+               means=[0, 0, 0, 0],
+               stds=[1, 1, 1, 1],
+               max_shape=None,
+               wh_ratio_clip=16 / 1000):
+    """
+    Apply deltas to shift/scale base boxes.
+
+    Typically the rois are anchor or proposed bounding boxes and the deltas are
+    network outputs used to shift/scale those boxes.
+
+    Args:
+        rois (Tensor): boxes to be transformed. Has shape (N, 4)
+        deltas (Tensor): encoded offsets with respect to each roi.
+            Has shape (N, 4). Note N = num_anchors * W * H when rois is a grid
+            of anchors. Offset encoding follows [1]_.
+        means (list): denormalizing means for delta coordinates
+        stds (list): denormalizing standard deviation for delta coordinates
+        max_shape (tuple[int, int]): maximum bounds for boxes. specifies (H, W)
+        wh_ratio_clip (float): maximum aspect ratio for boxes.
+
+    Returns:
+        Tensor: boxes with shape (N, 4), where columns represent
+            tl_x, tl_y, br_x, br_y.
+
+    References:
+        .. [1] https://arxiv.org/abs/1311.2524
+
+    Example:
+        >>> rois = torch.Tensor([[ 0.,  0.,  1.,  1.],
+        >>>                      [ 0.,  0.,  1.,  1.],
+        >>>                      [ 0.,  0.,  1.,  1.],
+        >>>                      [ 5.,  5.,  5.,  5.]])
+        >>> deltas = torch.Tensor([[  0.,   0.,   0.,   0.],
+        >>>                        [  1.,   1.,   1.,   1.],
+        >>>                        [  0.,   0.,   2.,  -1.],
+        >>>                        [ 0.7, -1.9, -0.5,  0.3]])
+        >>> delta2bbox(rois, deltas, max_shape=(32, 32))
+        tensor([[0.0000, 0.0000, 1.0000, 1.0000],
+                [0.2817, 0.2817, 4.7183, 4.7183],
+                [0.0000, 0.6321, 7.3891, 0.3679],
+                [5.8967, 2.9251, 5.5033, 3.2749]])
+    """
+    means = deltas.new_tensor(means).repeat(1, deltas.size(1) // 4)
+    stds = deltas.new_tensor(stds).repeat(1, deltas.size(1) // 4)
+    denorm_deltas = deltas * stds + means
+    dx = denorm_deltas[:, 0::4]
+    dy = denorm_deltas[:, 1::4]
+    dw = denorm_deltas[:, 2::4]
+    dh = denorm_deltas[:, 3::4]
+    max_ratio = np.abs(np.log(wh_ratio_clip))
+    dw = dw.clamp(min=-max_ratio, max=max_ratio)
+    dh = dh.clamp(min=-max_ratio, max=max_ratio)
+    # Compute center of each roi
+    px = ((rois[:, 0] + rois[:, 2]) * 0.5).unsqueeze(1).expand_as(dx)
+    py = ((rois[:, 1] + rois[:, 3]) * 0.5).unsqueeze(1).expand_as(dy)
+    # Compute width/height of each roi
+    pw = (rois[:, 2] - rois[:, 0]).unsqueeze(1).expand_as(dw)
+    ph = (rois[:, 3] - rois[:, 1]).unsqueeze(1).expand_as(dh)
+    # Use exp(network energy) to enlarge/shrink each roi
+    gw = pw * dw.exp()
+    gh = ph * dh.exp()
+    # Use network energy to shift the center of each roi
+    gx = torch.addcmul(px, 1, pw, dx)  # gx = px + pw * dx
+    gy = torch.addcmul(py, 1, ph, dy)  # gy = py + ph * dy
+    # Convert center-xy/width/height to top-left, bottom-right
+    x1 = gx - gw * 0.5
+    y1 = gy - gh * 0.5
+    x2 = gx + gw * 0.5
+    y2 = gy + gh * 0.5
+    if max_shape is not None:
+        x1 = x1.clamp(min=0, max=max_shape[1])
+        y1 = y1.clamp(min=0, max=max_shape[0])
+        x2 = x2.clamp(min=0, max=max_shape[1])
+        y2 = y2.clamp(min=0, max=max_shape[0])
+    bboxes = torch.stack([x1, y1, x2, y2], dim=-1).view_as(deltas)
+    return bboxes
+
+
+def bbox_flip(bboxes, img_shape):
+    """Flip bboxes horizontally.
+
+    Args:
+        bboxes(Tensor or ndarray): Shape (..., 4*k)
+        img_shape(tuple): Image shape.
+
+    Returns:
+        Same type as `bboxes`: Flipped bboxes.
+    """
+    if isinstance(bboxes, torch.Tensor):
+        assert bboxes.shape[-1] % 4 == 0
+        flipped = bboxes.clone()
+        flipped[:, 0::4] = img_shape[1] - bboxes[:, 2::4]
+        flipped[:, 2::4] = img_shape[1] - bboxes[:, 0::4]
+        return flipped
+    elif isinstance(bboxes, np.ndarray):
+        return mmcv.bbox_flip(bboxes, img_shape)
+
+
+def bbox_mapping(bboxes, img_shape, scale_factor, flip):
+    """Map bboxes from the original image scale to testing scale"""
+    new_bboxes = bboxes * scale_factor
+    if flip:
+        new_bboxes = bbox_flip(new_bboxes, img_shape)
+    return new_bboxes
+
+
+def bbox_mapping_back(bboxes, img_shape, scale_factor, flip):
+    """Map bboxes from testing scale to original image scale"""
+    new_bboxes = bbox_flip(bboxes, img_shape) if flip else bboxes
+    new_bboxes = new_bboxes / scale_factor
+    return new_bboxes
+
+
+def bbox2roi(bbox_list):
+    """Convert a list of bboxes to roi format.
+
+    Args:
+        bbox_list (list[Tensor]): a list of bboxes corresponding to a batch
+            of images.
+
+    Returns:
+        Tensor: shape (n, 5), [batch_ind, x1, y1, x2, y2]
+    """
+    rois_list = []
+    for img_id, bboxes in enumerate(bbox_list):
+        if bboxes.size(0) > 0:
+            img_inds = bboxes.new_full((bboxes.size(0), 1), img_id)
+            rois = torch.cat([img_inds, bboxes[:, :4]], dim=-1)
+        else:
+            rois = bboxes.new_zeros((0, 5))
+        rois_list.append(rois)
+    rois = torch.cat(rois_list, 0)
+    return rois
+
+
+def roi2bbox(rois):
+    bbox_list = []
+    img_ids = torch.unique(rois[:, 0].cpu(), sorted=True)
+    for img_id in img_ids:
+        inds = (rois[:, 0] == img_id.item())
+        bbox = rois[inds, 1:]
+        bbox_list.append(bbox)
+    return bbox_list
+
+
+def bbox2result_coco(bboxes, labels, num_classes):
+    """Convert detection results to a list of numpy arrays.
+
+    Args:
+        bboxes (Tensor): shape (n, 5)
+        labels (Tensor): shape (n, )
+        num_classes (int): class number, including background class
+
+    Returns:
+        list(ndarray): bbox results of each class
+    """
+    if bboxes.shape[0] == 0:
+        return [np.zeros((0, 5), dtype=np.float32) for i in range(num_classes)]
+    else:
+        bboxes = bboxes.cpu().numpy()
+        labels = labels.cpu().numpy()
+        return [bboxes[labels == i, :] for i in range(num_classes)]
+
+
+def distance2bbox(points, distance, max_shape=None):
+    """Decode distance prediction to bounding box.
+
+    Args:
+        points (Tensor): Shape (n, 2), [x, y].
+        distance (Tensor): Distance from the given point to 4
+            boundaries (left, top, right, bottom).
+        max_shape (tuple): Shape of the image.
+
+    Returns:
+        Tensor: Decoded bboxes.
+    """
+    x1 = points[:, 0] - distance[:, 0]
+    y1 = points[:, 1] - distance[:, 1]
+    x2 = points[:, 0] + distance[:, 2]
+    y2 = points[:, 1] + distance[:, 3]
+    if max_shape is not None:
+        x1 = x1.clamp(min=0, max=max_shape[1])
+        y1 = y1.clamp(min=0, max=max_shape[0])
+        x2 = x2.clamp(min=0, max=max_shape[1])
+        y2 = y2.clamp(min=0, max=max_shape[0])
+    return torch.stack([x1, y1, x2, y2], -1)
+
+
+def transform_lidar_to_cam(boxes_lidar):
+    """
+    Only transform format, not exactly in camera coords
+    :param boxes_lidar: (N, 3 or 7) [x, y, z, w, l, h, ry] in LiDAR coords
+    :return: boxes_cam: (N, 3 or 7) [x, y, z, h, w, l, ry] in camera coords
+    """
+    # boxes_cam = boxes_lidar.new_tensor(boxes_lidar.data)
+    boxes_cam = boxes_lidar.clone().detach()
+    boxes_cam[:, 0] = -boxes_lidar[:, 1]
+    boxes_cam[:, 1] = -boxes_lidar[:, 2]
+    boxes_cam[:, 2] = boxes_lidar[:, 0]
+    if boxes_cam.shape[1] > 3:
+        boxes_cam[:, [3, 4, 5]] = boxes_lidar[:, [5, 3, 4]]
+    return boxes_cam
+
+
+def boxes3d_to_bev_torch(boxes3d):
+    """
+    :param boxes3d: (N, 7) [x, y, z, h, w, l, ry] in camera coords
+    :return:
+        boxes_bev: (N, 5) [x1, y1, x2, y2, ry]
+    """
+    boxes_bev = boxes3d.new(torch.Size((boxes3d.shape[0], 5)))
+
+    cu, cv = boxes3d[:, 0], boxes3d[:, 2]
+    half_l, half_w = boxes3d[:, 5] / 2, boxes3d[:, 4] / 2
+    boxes_bev[:, 0], boxes_bev[:, 1] = cu - half_l, cv - half_w
+    boxes_bev[:, 2], boxes_bev[:, 3] = cu + half_l, cv + half_w
+    boxes_bev[:, 4] = boxes3d[:, 6]
+    return boxes_bev
+
+
+def boxes3d_to_bev_torch_lidar(boxes3d):
+    """
+    :param boxes3d: (N, 7) [x, y, z, w, l, h, ry] in LiDAR coords
+    :return:
+        boxes_bev: (N, 5) [x1, y1, x2, y2, ry]
+    """
+    boxes_bev = boxes3d.new(torch.Size((boxes3d.shape[0], 5)))
+
+    cu, cv = boxes3d[:, 0], boxes3d[:, 1]
+    half_l, half_w = boxes3d[:, 4] / 2, boxes3d[:, 3] / 2
+    boxes_bev[:, 0], boxes_bev[:, 1] = cu - half_w, cv - half_l
+    boxes_bev[:, 2], boxes_bev[:, 3] = cu + half_w, cv + half_l
+    boxes_bev[:, 4] = boxes3d[:, 6]
+    return boxes_bev
--- a/mmdet3d/core/evaluation/__init__.py
+++ b/mmdet3d/core/evaluation/__init__.py
+from .class_names import (coco_classes, dataset_aliases, get_classes,
+                          imagenet_det_classes, imagenet_vid_classes,
+                          kitti_classes, voc_classes)
+from .eval_hooks import (CocoDistEvalmAPHook, CocoDistEvalRecallHook,
+                         DistEvalHook, DistEvalmAPHook, KittiDistEvalmAPHook)
+from .kitti_utils import kitti_eval, kitti_eval_coco_style
+
+__all__ = [
+    'voc_classes', 'imagenet_det_classes', 'imagenet_vid_classes',
+    'coco_classes', 'dataset_aliases', 'get_classes', 'kitti_classes',
+    'kitti_eval_coco_style', 'kitti_eval', 'CocoDistEvalmAPHook',
+    'KittiDistEvalmAPHook', 'CocoDistEvalRecallHook', 'DistEvalHook',
+    'DistEvalmAPHook'
+]