Merge branch 'clean-iou' into 'master'

clean iou calculation See merge request open-mmlab/mmdet.3d!84

Merge branch 'clean-iou' into 'master'
clean iou calculation See merge request open-mmlab/mmdet.3d!84
b107238d · zhangwenwei · b2c43ffd · 8c4c9aee · b107238d · b107238d
Commit b107238d authored Jun 20, 2020 by zhangwenwei
20 changed files
--- a/mmdet3d/core/bbox/__init__.py
+++ b/mmdet3d/core/bbox/__init__.py
-from . import box_torch_ops
 from .assigners import AssignResult, BaseAssigner, MaxIoUAssigner
 from .coders import DeltaXYZWLHRBBoxCoder
 # from .bbox_target import bbox_target
@@ -9,10 +8,8 @@ from .samplers import (BaseSampler, CombinedSampler,
                       PseudoSampler, RandomSampler, SamplingResult)
 from .structures import (BaseInstance3DBoxes, Box3DMode, CameraInstance3DBoxes,
                         DepthInstance3DBoxes, LiDARInstance3DBoxes,
-                         xywhr2xyxyr)
-from .transforms import (bbox3d2result, bbox3d2roi, bbox3d_mapping_back,
-                         box3d_to_corner3d_upright_depth,
-                         boxes3d_to_bev_torch_lidar)
+                         limit_period, points_cam2img, xywhr2xyxyr)
+from .transforms import bbox3d2result, bbox3d2roi, bbox3d_mapping_back

 from .assign_sampling import (  # isort:skip, avoid recursive imports
    build_bbox_coder,  # temporally settings
@@ -22,11 +19,10 @@ __all__ = [
    'BaseAssigner', 'MaxIoUAssigner', 'AssignResult', 'BaseSampler',
    'PseudoSampler', 'RandomSampler', 'InstanceBalancedPosSampler',
    'IoUBalancedNegSampler', 'CombinedSampler', 'SamplingResult',
-    'build_assigner', 'build_sampler', 'assign_and_sample', 'box_torch_ops',
-    'build_bbox_coder', 'DeltaXYZWLHRBBoxCoder', 'boxes3d_to_bev_torch_lidar',
-    'BboxOverlapsNearest3D', 'BboxOverlaps3D', 'bbox_overlaps_nearest_3d',
-    'bbox_overlaps_3d', 'Box3DMode', 'LiDARInstance3DBoxes',
-    'CameraInstance3DBoxes', 'bbox3d2roi', 'bbox3d2result',
-    'box3d_to_corner3d_upright_depth', 'DepthInstance3DBoxes',
-    'BaseInstance3DBoxes', 'bbox3d_mapping_back', 'xywhr2xyxyr'
+    'build_assigner', 'build_sampler', 'assign_and_sample', 'build_bbox_coder',
+    'DeltaXYZWLHRBBoxCoder', 'BboxOverlapsNearest3D', 'BboxOverlaps3D',
+    'bbox_overlaps_nearest_3d', 'bbox_overlaps_3d', 'Box3DMode',
+    'LiDARInstance3DBoxes', 'CameraInstance3DBoxes', 'bbox3d2roi',
+    'bbox3d2result', 'DepthInstance3DBoxes', 'BaseInstance3DBoxes',
+    'bbox3d_mapping_back', 'xywhr2xyxyr', 'limit_period', 'points_cam2img'
 ]
--- a/mmdet3d/core/bbox/box_np_ops.py
+++ b/mmdet3d/core/bbox/box_np_ops.py
+# TODO: clean the functions in this file and move the APIs into box structures
+# in the future
+
 import numba
 import numpy as np

@@ -248,7 +251,7 @@ def rotation_points_single_angle(points, angle, axis=0):
    return points @ rot_mat_T, rot_mat_T


-def project_to_image(points_3d, proj_mat):
+def points_cam2img(points_3d, proj_mat):
    points_shape = list(points_3d.shape)
    points_shape[-1] = 1
    points_4 = np.concatenate([points_3d, np.zeros(points_shape)], axis=-1)
@@ -260,7 +263,7 @@ def project_to_image(points_3d, proj_mat):
 def box3d_to_bbox(box3d, rect, Trv2c, P2):
    box_corners = center_to_corner_box3d(
        box3d[:, :3], box3d[:, 3:6], box3d[:, 6], [0.5, 1.0, 0.5], axis=1)
-    box_corners_in_image = project_to_image(box_corners, P2)
+    box_corners_in_image = points_cam2img(box_corners, P2)
    # box_corners_in_image: [N, 8, 2]
    minxy = np.min(box_corners_in_image, axis=1)
    maxxy = np.max(box_corners_in_image, axis=1)

--- a/mmdet3d/core/bbox/box_torch_ops.py
+++ b/mmdet3d/core/bbox/box_torch_ops.py
-import numpy as np
-import torch
-
-
-def limit_period(val, offset=0.5, period=np.pi):
-    return val - torch.floor(val / period + offset) * period
-
-
-def corners_nd(dims, origin=0.5):
-    """Generate relative box corners based on length per dim and
-    origin point.
-
-    Args:
-        dims (np.ndarray, shape=[N, ndim]): Array of length per dim
-        origin (list or array or float): Origin point relate to smallest point.
-
-    Returns:
-        np.ndarray: Corners of boxes in shape [N, 2 ** ndim, ndim].
-        point layout example: (2d) x0y0, x0y1, x1y0, x1y1;
-            (3d) x0y0z0, x0y0z1, x0y1z0, x0y1z1, x1y0z0, x1y0z1, x1y1z0, x1y1z1
-            where x0 < x1, y0 < y1, z0 < z1
-    """
-    ndim = int(dims.shape[1])
-    corners_norm = torch.from_numpy(
-        np.stack(np.unravel_index(np.arange(2**ndim), [2] * ndim), axis=1)).to(
-            device=dims.device, dtype=dims.dtype)
-    # now corners_norm has format: (2d) x0y0, x0y1, x1y0, x1y1
-    # (3d) x0y0z0, x0y0z1, x0y1z0, x0y1z1, x1y0z0, x1y0z1, x1y1z0, x1y1z1
-    # so need to convert to a format which is convenient to do other computing.
-    # for 2d boxes, format is clockwise start with minimum point
-    # for 3d boxes, please draw lines by your hand.
-    if ndim == 2:
-        # generate clockwise box corners
-        corners_norm = corners_norm[[0, 1, 3, 2]]
-    elif ndim == 3:
-        corners_norm = corners_norm[[0, 1, 3, 2, 4, 5, 7, 6]]
-    corners_norm = corners_norm - dims.new_tensor(origin)
-    corners = dims.reshape([-1, 1, ndim]) * corners_norm.reshape(
-        [1, 2**ndim, ndim])
-    return corners
-
-
-def rotation_3d_in_axis(points, angles, axis=0):
-    # points: [N, point_size, 3]
-    # angles: [N]
-    rot_sin = torch.sin(angles)
-    rot_cos = torch.cos(angles)
-    ones = torch.ones_like(rot_cos)
-    zeros = torch.zeros_like(rot_cos)
-    if axis == 1:
-        rot_mat_T = torch.stack([
-            torch.stack([rot_cos, zeros, -rot_sin]),
-            torch.stack([zeros, ones, zeros]),
-            torch.stack([rot_sin, zeros, rot_cos])
-        ])
-    elif axis == 2 or axis == -1:
-        rot_mat_T = torch.stack([
-            torch.stack([rot_cos, -rot_sin, zeros]),
-            torch.stack([rot_sin, rot_cos, zeros]),
-            torch.stack([zeros, zeros, ones])
-        ])
-    elif axis == 0:
-        rot_mat_T = torch.stack([
-            torch.stack([zeros, rot_cos, -rot_sin]),
-            torch.stack([zeros, rot_sin, rot_cos]),
-            torch.stack([ones, zeros, zeros])
-        ])
-    else:
-        raise ValueError('axis should in range')
-
-    return torch.einsum('aij,jka->aik', (points, rot_mat_T))
-
-
-def center_to_corner_box3d(centers,
-                           dims,
-                           angles,
-                           origin=(0.5, 1.0, 0.5),
-                           axis=1):
-    """Convert kitti locations, dimensions and angles to corners.
-
-    Args:
-        centers (np.ndarray): Locations in kitti label file
-            with the shape of [N, 3].
-        dims (np.ndarray): Dimensions in kitti label
-            file with the shape of [N, 3]
-        angles (np.ndarray): Rotation_y in kitti
-            label file with the shape of [N]
-        origin (list or array or float): Origin point relate to smallest point.
-            use (0.5, 1.0, 0.5) in camera and (0.5, 0.5, 0) in lidar.
-        axis (int): Rotation axis. 1 for camera and 2 for lidar.
-
-    Returns:
-        torch.Tensor: Corners with the shape of [N, 8, 3].
-    """
-    # 'length' in kitti format is in x axis.
-    # yzx(hwl)(kitti label file)<->xyz(lhw)(camera)<->z(-x)(-y)(wlh)(lidar)
-    # center in kitti format is [0.5, 1.0, 0.5] in xyz.
-    corners = corners_nd(dims, origin=origin)
-    # corners: [N, 8, 3]
-    corners = rotation_3d_in_axis(corners, angles, axis=axis)
-    corners += centers.view(-1, 1, 3)
-    return corners
-
-
-def lidar_to_camera(points, r_rect, velo2cam):
-    num_points = points.shape[0]
-    points = torch.cat(
-        [points, torch.ones(num_points, 1).type_as(points)], dim=-1)
-    camera_points = points @ (r_rect @ velo2cam).t()
-    return camera_points[..., :3]
-
-
-def box_lidar_to_camera(data, r_rect, velo2cam):
-    xyz_lidar = data[..., 0:3]
-    w, l, h = data[..., 3:4], data[..., 4:5], data[..., 5:6]
-    r = data[..., 6:7]
-    xyz = lidar_to_camera(xyz_lidar, r_rect, velo2cam)
-    return torch.cat([xyz, l, h, w, r], dim=-1)
-
-
-def project_to_image(points_3d, proj_mat):
-    points_num = list(points_3d.shape)[:-1]
-    points_shape = np.concatenate([points_num, [1]], axis=0).tolist()
-    # previous implementation use new_zeros, new_one yeilds better results
-    points_4 = torch.cat(
-        [points_3d, points_3d.new_ones(*points_shape)], dim=-1)
-    # point_2d = points_4 @ tf.transpose(proj_mat, [1, 0])
-    point_2d = torch.matmul(points_4, proj_mat.t())
-    point_2d_res = point_2d[..., :2] / point_2d[..., 2:3]
-    return point_2d_res
-
-
-def rbbox2d_to_near_bbox(rbboxes):
-    """convert rotated bbox to nearest 'standing' or 'lying' bbox.
-
-    Args:
-        rbboxes (torch.Tensor): [N, 5(x, y, xdim, ydim, rad)] rotated bboxes.
-
-    Returns:
-        torch.Tensor: Bboxes with the shape of [N, 4(xmin, ymin, xmax, ymax)].
-    """
-    rots = rbboxes[..., -1]
-    rots_0_pi_div_2 = torch.abs(limit_period(rots, 0.5, np.pi))
-    cond = (rots_0_pi_div_2 > np.pi / 4)[..., None]
-    bboxes_center = torch.where(cond, rbboxes[:, [0, 1, 3, 2]], rbboxes[:, :4])
-    bboxes = center_to_minmax_2d(bboxes_center[:, :2], bboxes_center[:, 2:])
-    return bboxes
-
-
-def center_to_minmax_2d_0_5(centers, dims):
-    return torch.cat([centers - dims / 2, centers + dims / 2], dim=-1)
-
-
-def center_to_minmax_2d(centers, dims, origin=0.5):
-    if origin == 0.5:
-        return center_to_minmax_2d_0_5(centers, dims)
-    corners = center_to_corner_box2d(centers, dims, origin=origin)
-    return corners[:, [0, 2]].reshape([-1, 4])
-
-
-def center_to_corner_box2d(centers, dims, angles=None, origin=0.5):
-    """Convert kitti locations, dimensions and angles to corners.
-        format: center(xy), dims(xy), angles(clockwise when positive)
-
-    Args:
-        centers (np.ndarray, shape=[N, 2]): locations in kitti label file.
-        dims (np.ndarray, shape=[N, 2]): dimensions in kitti label file.
-        angles (np.ndarray, shape=[N]): rotation_y in kitti label file.
-
-    Returns:
-        torch.Tensor: Corners with the shape of [N, 4, 2].
-    """
-    # 'length' in kitti format is in x axis.
-    # xyz(hwl)(kitti label file)<->xyz(lhw)(camera)<->z(-x)(-y)(wlh)(lidar)
-    # center in kitti format is [0.5, 1.0, 0.5] in xyz.
-    corners = corners_nd(dims, origin=origin)
-    # corners: [N, 4, 2]
-    if angles is not None:
-        corners = rotation_2d(corners, angles)
-    corners += centers.reshape([-1, 1, 2])
-    return corners
-
-
-def rotation_2d(points, angles):
-    """rotation 2d points based on origin point clockwise when angle positive.
-
-    Args:
-        points (np.ndarray, shape=[N, point_size, 2]): points to be rotated.
-        angles (np.ndarray, shape=[N]): rotation angle.
-
-    Returns:
-        np.ndarray: Same shape as points.
-    """
-    rot_sin = torch.sin(angles)
-    rot_cos = torch.cos(angles)
-    rot_mat_T = torch.stack([[rot_cos, -rot_sin], [rot_sin, rot_cos]])
-    return torch.einsum('aij,jka->aik', points, rot_mat_T)
-
-
-def enlarge_box3d_lidar(boxes3d, extra_width):
-    """Enlarge the length, width and height of input boxes
-
-    Args:
-        boxes3d (torch.float32 or numpy.float32): bottom_center with
-            shape [N, 7], (x, y, z, w, l, h, ry) in LiDAR coords
-        extra_width (float): a fix number to add
-
-    Returns:
-        torch.float32 or numpy.float32: enlarged boxes
-    """
-    if isinstance(boxes3d, np.ndarray):
-        large_boxes3d = boxes3d.copy()
-    else:
-        large_boxes3d = boxes3d.clone()
-    large_boxes3d[:, 3:6] += extra_width * 2
-    large_boxes3d[:, 2] -= extra_width  # bottom center z minus extra_width
-    return large_boxes3d
-
-
-def boxes3d_to_corners3d_lidar_torch(boxes3d, bottom_center=True):
-    """Convert kitti center boxes to corners.
-
-        7 -------- 4
-       /|         /|
-      6 -------- 5 .
-      | |        | |
-      . 3 -------- 0
-      |/         |/
-      2 -------- 1
-
-    Args:
-        boxes3d (FloatTensor): (N, 7) [x, y, z, w, l, h, ry] in LiDAR coords,
-            see the definition of ry in KITTI dataset
-        bottom_center (bool): whether z is on the bottom center of object.
-
-    Returns:
-        FloatTensor: box corners with shape (N, 8, 3)
-    """
-    boxes_num = boxes3d.shape[0]
-    w, l, h = boxes3d[:, 3:4], boxes3d[:, 4:5], boxes3d[:, 5:6]
-    ry = boxes3d[:, 6:7]
-
-    zeros = boxes3d.new_zeros(boxes_num, 1)
-    ones = boxes3d.new_ones(boxes_num, 1)
-    x_corners = torch.cat(
-        [w / 2., -w / 2., -w / 2., w / 2., w / 2., -w / 2., -w / 2., w / 2.],
-        dim=1)  # (N, 8)
-    y_corners = torch.cat(
-        [-l / 2., -l / 2., l / 2., l / 2., -l / 2., -l / 2., l / 2., l / 2.],
-        dim=1)  # (N, 8)
-    if bottom_center:
-        z_corners = torch.cat([zeros, zeros, zeros, zeros, h, h, h, h],
-                              dim=1)  # (N, 8)
-    else:
-        z_corners = torch.cat([
-            -h / 2., -h / 2., -h / 2., -h / 2., h / 2., h / 2., h / 2., h / 2.
-        ],
-                              dim=1)  # (N, 8)
-    temp_corners = torch.cat(
-        (x_corners.unsqueeze(dim=2), y_corners.unsqueeze(dim=2),
-         z_corners.unsqueeze(dim=2)),
-        dim=2)  # (N, 8, 3)
-
-    cosa, sina = torch.cos(ry), torch.sin(ry)
-    raw_1 = torch.cat([cosa, -sina, zeros], dim=1)  # (N, 3)
-    raw_2 = torch.cat([sina, cosa, zeros], dim=1)  # (N, 3)
-    raw_3 = torch.cat([zeros, zeros, ones], dim=1)  # (N, 3)
-    R = torch.cat((raw_1.unsqueeze(dim=1), raw_2.unsqueeze(dim=1),
-                   raw_3.unsqueeze(dim=1)),
-                  dim=1)  # (N, 3, 3)
-
-    rotated_corners = torch.matmul(temp_corners, R)  # (N, 8, 3)
-    x_corners = rotated_corners[:, :, 0]
-    y_corners = rotated_corners[:, :, 1]
-    z_corners = rotated_corners[:, :, 2]
-    x_loc, y_loc, z_loc = boxes3d[:, 0], boxes3d[:, 1], boxes3d[:, 2]
-
-    x = x_loc.view(-1, 1) + x_corners.view(-1, 8)
-    y = y_loc.view(-1, 1) + y_corners.view(-1, 8)
-    z = z_loc.view(-1, 1) + z_corners.view(-1, 8)
-    corners = torch.cat((x.view(-1, 8, 1), y.view(-1, 8, 1), z.view(-1, 8, 1)),
-                        dim=2)
-
-    return corners
--- a/mmdet3d/core/bbox/iou_calculators/iou3d_calculator.py
+++ b/mmdet3d/core/bbox/iou_calculators/iou3d_calculator.py
-import torch
-
-from mmdet3d.ops.iou3d import boxes_iou3d_gpu_camera, boxes_iou3d_gpu_lidar
 from mmdet.core.bbox import bbox_overlaps
 from mmdet.core.bbox.iou_calculators.builder import IOU_CALCULATORS
-from .. import box_torch_ops
+from ..structures import (CameraInstance3DBoxes, DepthInstance3DBoxes,
+                          LiDARInstance3DBoxes)


 @IOU_CALCULATORS.register_module()
 class BboxOverlapsNearest3D(object):
-    """Nearest 3D IoU Calculator"""
+    """Nearest 3D IoU Calculator
+
+    Note:
+        This IoU calculator first finds the nearest 2D boxes in bird eye view
+        (BEV), and then calculate the 2D IoU using ``:meth:bbox_overlaps``.
+
+    Args:
+        coordinate (str): 'camera', 'lidar', or 'depth' coordinate system
+    """
+
+    def __init__(self, coordinate='lidar'):
+        assert coordinate in ['camera', 'lidar', 'depth']
+        self.coordinate = coordinate

    def __call__(self, bboxes1, bboxes2, mode='iou', is_aligned=False):
-        return bbox_overlaps_nearest_3d(bboxes1, bboxes2, mode, is_aligned)
+        return bbox_overlaps_nearest_3d(bboxes1, bboxes2, mode, is_aligned,
+                                        self.coordinate)

    def __repr__(self):
        repr_str = self.__class__.__name__
-        repr_str += '(mode={}, is_aligned={})'.format(self.mode,
-                                                      self.is_aligned)
+        repr_str += f'(coordinate={self.coordinate}'
        return repr_str


@@ -25,11 +35,11 @@ class BboxOverlaps3D(object):
    """3D IoU Calculator

    Args:
-        coordinate (str): 'camera' or 'lidar' coordinate system
+        coordinate (str): 'camera', 'lidar', or 'depth' coordinate system
    """

    def __init__(self, coordinate):
-        assert coordinate in ['camera', 'lidar']
+        assert coordinate in ['camera', 'lidar', 'depth']
        self.coordinate = coordinate

    def __call__(self, bboxes1, bboxes2, mode='iou'):
@@ -37,35 +47,63 @@ class BboxOverlaps3D(object):

    def __repr__(self):
        repr_str = self.__class__.__name__
-        repr_str += '(mode={}, is_aligned={})'.format(self.mode,
-                                                      self.is_aligned)
+        repr_str += f'(coordinate={self.coordinate}'
        return repr_str


-def bbox_overlaps_nearest_3d(bboxes1, bboxes2, mode='iou', is_aligned=False):
+def bbox_overlaps_nearest_3d(bboxes1,
+                             bboxes2,
+                             mode='iou',
+                             is_aligned=False,
+                             coordinate='lidar'):
    """Calculate nearest 3D IoU

+    Note:
+        This function first finds the nearest 2D boxes in bird eye view
+        (BEV), and then calculate the 2D IoU using ``:meth:bbox_overlaps``.
+        Ths IoU calculator ``:class:BboxOverlapsNearest3D`` uses this
+        function to calculate IoUs of boxes.
+
+        If ``is_aligned`` is ``False``, then it calculates the ious between
+        each bbox of bboxes1 and bboxes2, otherwise the ious between each
+        aligned pair of bboxes1 and bboxes2.
+
    Args:
        bboxes1 (torch.Tensor): shape (N, 7+N) [x, y, z, h, w, l, ry, v].
        bboxes2 (torch.Tensor): shape (M, 7+N) [x, y, z, h, w, l, ry, v].
        mode (str): "iou" (intersection over union) or iof
            (intersection over foreground).
+        is_aligned (bool): Whether the calculation is aligned

    Return:
-        torch.Tensor: Bbox overlaps results of bboxes1 and bboxes2
-            with shape (M, N).(not support aligned mode currently).
+        torch.Tensor: If ``is_aligned`` is ``True``, return ious between
+            bboxes1 and bboxes2 with shape (M, N). If ``is_aligned`` is
+            ``False``, return shape is M.
+
    """
    assert bboxes1.size(-1) >= 7
    assert bboxes2.size(-1) >= 7
-    column_index1 = bboxes1.new_tensor([0, 1, 3, 4, 6], dtype=torch.long)
-    rbboxes1_bev = bboxes1.index_select(dim=-1, index=column_index1)
-    rbboxes2_bev = bboxes2.index_select(dim=-1, index=column_index1)
+
+    if coordinate == 'camera':
+        box_type = CameraInstance3DBoxes
+    elif coordinate == 'lidar':
+        box_type = LiDARInstance3DBoxes
+    elif coordinate == 'depth':
+        box_type = DepthInstance3DBoxes
+    else:
+        raise ValueError(
+            '"coordinate" should be in ["camera", "lidar", "depth"],'
+            f' got invalid {coordinate}')
+
+    bboxes1 = box_type(bboxes1, box_dim=bboxes1.shape[-1])
+    bboxes2 = box_type(bboxes2, box_dim=bboxes2.shape[-1])

    # Change the bboxes to bev
    # box conversion and iou calculation in torch version on CUDA
    # is 10x faster than that in numpy version
-    bboxes1_bev = box_torch_ops.rbbox2d_to_near_bbox(rbboxes1_bev)
-    bboxes2_bev = box_torch_ops.rbbox2d_to_near_bbox(rbboxes2_bev)
+    bboxes1_bev = bboxes1.nearest_bev
+    bboxes2_bev = bboxes2.nearest_bev
+
    ret = bbox_overlaps(
        bboxes1_bev, bboxes2_bev, mode=mode, is_aligned=is_aligned)
    return ret
@@ -74,6 +112,11 @@ def bbox_overlaps_nearest_3d(bboxes1, bboxes2, mode='iou', is_aligned=False):
 def bbox_overlaps_3d(bboxes1, bboxes2, mode='iou', coordinate='camera'):
    """Calculate 3D IoU using cuda implementation

+    Note:
+        This function calculate the IoU of 3D boxes based on their volumes.
+        IoU calculator ``:class:BboxOverlaps3D`` uses this function to
+        calculate the actual IoUs of boxes.
+
    Args:
        bboxes1 (torch.Tensor): shape (N, 7) [x, y, z, h, w, l, ry].
        bboxes2 (torch.Tensor): shape (M, 7) [x, y, z, h, w, l, ry].
@@ -83,19 +126,21 @@ def bbox_overlaps_3d(bboxes1, bboxes2, mode='iou', coordinate='camera'):

    Return:
        torch.Tensor: Bbox overlaps results of bboxes1 and bboxes2
-            with shape (M, N).(not support aligned mode currently).
+            with shape (M, N) (aligned mode is not supported currently).
    """
    assert bboxes1.size(-1) == bboxes2.size(-1) == 7
-    assert coordinate in ['camera', 'lidar']
-
-    rows = bboxes1.size(0)
-    cols = bboxes2.size(0)
-    if rows * cols == 0:
-        return bboxes1.new(rows, cols)
-
    if coordinate == 'camera':
-        return boxes_iou3d_gpu_camera(bboxes1, bboxes2, mode)
+        box_type = CameraInstance3DBoxes
    elif coordinate == 'lidar':
-        return boxes_iou3d_gpu_lidar(bboxes1, bboxes2, mode)
+        box_type = LiDARInstance3DBoxes
+    elif coordinate == 'depth':
+        box_type = DepthInstance3DBoxes
    else:
-        raise NotImplementedError
+        raise ValueError(
+            '"coordinate" should be in ["camera", "lidar", "depth"],'
+            f' got invalid {coordinate}')
+
+    bboxes1 = box_type(bboxes1, box_dim=bboxes1.shape[-1])
+    bboxes2 = box_type(bboxes2, box_dim=bboxes2.shape[-1])
+
+    return bboxes1.overlaps(bboxes1, bboxes2, mode=mode)
--- a/mmdet3d/core/bbox/structures/__init__.py
+++ b/mmdet3d/core/bbox/structures/__init__.py
@@ -3,9 +3,11 @@ from .box_3d_mode import Box3DMode
 from .cam_box3d import CameraInstance3DBoxes
 from .depth_box3d import DepthInstance3DBoxes
 from .lidar_box3d import LiDARInstance3DBoxes
-from .utils import xywhr2xyxyr
+from .utils import (limit_period, points_cam2img, rotation_3d_in_axis,
+                    xywhr2xyxyr)

 __all__ = [
    'Box3DMode', 'BaseInstance3DBoxes', 'LiDARInstance3DBoxes',
-    'CameraInstance3DBoxes', 'DepthInstance3DBoxes', 'xywhr2xyxyr'
+    'CameraInstance3DBoxes', 'DepthInstance3DBoxes', 'xywhr2xyxyr',
+    'rotation_3d_in_axis', 'limit_period', 'points_cam2img'
 ]
--- a/mmdet3d/core/bbox/structures/base_box3d.py
+++ b/mmdet3d/core/bbox/structures/base_box3d.py
@@ -12,7 +12,7 @@ class BaseInstance3DBoxes(object):

    Note:
        The box is bottom centered, i.e. the relative position of origin in
-            the box is (0.5, 0.5, 0).
+        the box is (0.5, 0.5, 0).

    Args:
        tensor (torch.Tensor | np.ndarray | list): a Nxbox_dim matrix.
@@ -424,6 +424,11 @@ class BaseInstance3DBoxes(object):

        assert mode in ['iou', 'iof']

+        rows = len(boxes1)
+        cols = len(boxes2)
+        if rows * cols == 0:
+            return boxes1.tensor.new(rows, cols)
+
        # height overlap
        overlaps_h = cls.height_overlaps(boxes1, boxes2)


--- a/mmdet3d/core/bbox/structures/utils.py
+++ b/mmdet3d/core/bbox/structures/utils.py
@@ -72,3 +72,15 @@ def xywhr2xyxyr(boxes_xywhr):
    boxes[:, 3] = boxes_xywhr[:, 1] + half_h
    boxes[:, 4] = boxes_xywhr[:, 4]
    return boxes
+
+
+def points_cam2img(points_3d, proj_mat):
+    points_num = list(points_3d.shape)[:-1]
+    points_shape = np.concatenate([points_num, [1]], axis=0).tolist()
+    # previous implementation use new_zeros, new_one yeilds better results
+    points_4 = torch.cat(
+        [points_3d, points_3d.new_ones(*points_shape)], dim=-1)
+    # point_2d = points_4 @ tf.transpose(proj_mat, [1, 0])
+    point_2d = torch.matmul(points_4, proj_mat.t())
+    point_2d_res = point_2d[..., :2] / point_2d[..., 2:3]
+    return point_2d_res
--- a/mmdet3d/core/bbox/transforms.py
+++ b/mmdet3d/core/bbox/transforms.py
@@ -13,69 +13,6 @@ def bbox3d_mapping_back(bboxes, scale_factor, flip_horizontal, flip_vertical):
    return new_bboxes


-def transform_lidar_to_cam(boxes_lidar):
-    """Transform boxes from lidar coords to cam coords.
-        Only transform format, not exactly in camera coords.
-
-    Args:
-        boxes_lidar (torch.Tensor): (N, 3 or 7) [x, y, z, w, l, h, ry]
-            in LiDAR coords.
-        boxes_cam (torch.Tensor): (N, 3 or 7) [x, y, z, h, w, l, ry]
-            in camera coords.
-
-    Returns:
-        torch.Tensor: Boxes in camera coords.
-    """
-    # boxes_cam = boxes_lidar.new_tensor(boxes_lidar.data)
-    boxes_cam = boxes_lidar.clone().detach()
-    boxes_cam[:, 0] = -boxes_lidar[:, 1]
-    boxes_cam[:, 1] = -boxes_lidar[:, 2]
-    boxes_cam[:, 2] = boxes_lidar[:, 0]
-    if boxes_cam.shape[1] > 3:
-        boxes_cam[:, [3, 4, 5]] = boxes_lidar[:, [5, 3, 4]]
-    return boxes_cam
-
-
-def boxes3d_to_bev_torch(boxes3d):
-    """Transform 3d boxes to bev in camera coords.
-
-    Args:
-        boxes3d (torch.Tensor): 3d boxes in camera coords
-            with the shape of [N, 7] (x, y, z, h, w, l, ry).
-
-    Returns:
-        torch.Tensor: Bev boxes with the shape of [N, 5]
-            (x1, y1, x2, y2, ry).
-    """
-    boxes_bev = boxes3d.new(torch.Size((boxes3d.shape[0], 5)))
-
-    cu, cv = boxes3d[:, 0], boxes3d[:, 2]
-    half_l, half_w = boxes3d[:, 5] / 2, boxes3d[:, 4] / 2
-    boxes_bev[:, 0], boxes_bev[:, 1] = cu - half_l, cv - half_w
-    boxes_bev[:, 2], boxes_bev[:, 3] = cu + half_l, cv + half_w
-    boxes_bev[:, 4] = boxes3d[:, 6]
-    return boxes_bev
-
-
-def boxes3d_to_bev_torch_lidar(boxes3d):
-    """Transform 3d boxes to bev in lidar coords.
-
-    Args:
-        boxes3d (torch.Tensor): 3d boxes in lidar coords
-            with the shape of [N, 7] (x, y, z, h, w, l, ry).
-
-    Returns: Bev boxes with the shape of [N, 5] (x1, y1, x2, y2, ry).
-    """
-    boxes_bev = boxes3d.new(torch.Size((boxes3d.shape[0], 5)))
-
-    cu, cv = boxes3d[:, 0], boxes3d[:, 1]
-    half_l, half_w = boxes3d[:, 4] / 2, boxes3d[:, 3] / 2
-    boxes_bev[:, 0], boxes_bev[:, 1] = cu - half_w, cv - half_l
-    boxes_bev[:, 2], boxes_bev[:, 3] = cu + half_w, cv + half_l
-    boxes_bev[:, 4] = boxes3d[:, 6]
-    return boxes_bev
-
-
 def bbox3d2roi(bbox_list):
    """Convert a list of bboxes to roi format.

@@ -113,88 +50,3 @@ def bbox3d2result(bboxes, scores, labels):
        boxes_3d=bboxes.to('cpu'),
        scores_3d=scores.cpu(),
        labels_3d=labels.cpu())
-
-
-def upright_depth_to_lidar_torch(points=None,
-                                 bboxes=None,
-                                 to_bottom_center=False):
-    """Convert points and boxes in upright depth coordinate to lidar.
-
-    Args:
-        points (None | torch.Tensor): points in upright depth coordinate.
-        bboxes (None | torch.Tensor): bboxes in upright depth coordinate.
-        to_bottom_center (bool): covert bboxes to bottom center.
-
-    Returns:
-        tuple: points and bboxes in lidar coordinate.
-    """
-    if points is not None:
-        points_lidar = points.clone()
-        points_lidar = points_lidar[..., [1, 0, 2]]
-        points_lidar[..., 1] *= -1
-    else:
-        points_lidar = None
-
-    if bboxes is not None:
-        bboxes_lidar = bboxes.clone()
-        bboxes_lidar = bboxes_lidar[..., [1, 0, 2, 4, 3, 5, 6]]
-        bboxes_lidar[..., 1] *= -1
-        if to_bottom_center:
-            bboxes_lidar[..., 2] -= 0.5 * bboxes_lidar[..., 5]
-    else:
-        bboxes_lidar = None
-
-    return points_lidar, bboxes_lidar
-
-
-def box3d_to_corner3d_upright_depth(boxes3d):
-    """Convert box3d to corner3d in upright depth coordinate
-
-    Args:
-        boxes3d (torch.Tensor): boxes with shape [n,7] in
-            upright depth coordinate.
-
-    Returns:
-        torch.Tensor: boxes with [n, 8, 3] in upright depth coordinate
-    """
-    boxes_num = boxes3d.shape[0]
-    ry = boxes3d[:, 6:7]
-    l, w, h = boxes3d[:, 3:4], boxes3d[:, 4:5], boxes3d[:, 5:6]
-    zeros = boxes3d.new_zeros((boxes_num, 1))
-    ones = boxes3d.new_ones((boxes_num, 1))
-    # zeros = torch.cuda.FloatTensor(boxes_num, 1).fill_(0)
-    # ones = torch.cuda.FloatTensor(boxes_num, 1).fill_(1)
-    x_corners = torch.cat(
-        [-l / 2., l / 2., l / 2., -l / 2., -l / 2., l / 2., l / 2., -l / 2.],
-        dim=1)  # (N, 8)
-    y_corners = torch.cat(
-        [w / 2., w / 2., -w / 2., -w / 2., w / 2., w / 2., -w / 2., -w / 2.],
-        dim=1)  # (N, 8)
-    z_corners = torch.cat(
-        [h / 2., h / 2., h / 2., h / 2., -h / 2., -h / 2., -h / 2., -h / 2.],
-        dim=1)  # (N, 8)
-    temp_corners = torch.cat(
-        (x_corners.unsqueeze(dim=2), y_corners.unsqueeze(dim=2),
-         z_corners.unsqueeze(dim=2)),
-        dim=2)  # (N, 8, 3)
-
-    cosa, sina = torch.cos(-ry), torch.sin(-ry)
-    raw_1 = torch.cat([cosa, -sina, zeros], dim=1)  # (N, 3)
-    raw_2 = torch.cat([sina, cosa, zeros], dim=1)  # (N, 3)
-    raw_3 = torch.cat([zeros, zeros, ones], dim=1)  # (N, 3)
-    R = torch.cat((raw_1.unsqueeze(dim=1), raw_2.unsqueeze(dim=1),
-                   raw_3.unsqueeze(dim=1)),
-                  dim=1)  # (N, 3, 3)
-    rotated_corners = torch.matmul(temp_corners, R)  # (N, 8, 3)
-    x_corners = rotated_corners[:, :, 0]
-    y_corners = rotated_corners[:, :, 1]
-    z_corners = rotated_corners[:, :, 2]
-    x_loc, y_loc, z_loc = boxes3d[:, 0], boxes3d[:, 1], boxes3d[:, 2]
-
-    x = x_loc.view(-1, 1) + x_corners.view(-1, 8)
-    y = y_loc.view(-1, 1) + y_corners.view(-1, 8)
-    z = z_loc.view(-1, 1) + z_corners.view(-1, 8)
-    corners3d = torch.cat(
-        (x.view(-1, 8, 1), y.view(-1, 8, 1), z.view(-1, 8, 1)), dim=2)
-
-    return corners3d
--- a/mmdet3d/core/voxel/voxel_generator.py
+++ b/mmdet3d/core/voxel/voxel_generator.py
@@ -3,12 +3,21 @@ import numpy as np


 class VoxelGenerator(object):
+    """Voxel generator in numpy implementation"""

    def __init__(self,
                 voxel_size,
                 point_cloud_range,
                 max_num_points,
                 max_voxels=20000):
+        """
+        Args:
+            voxel_size (list[float]): Size of a single voxel
+            point_cloud_range (list[float]): Range of points
+            max_num_points (int): Maximum number of points in a single voxel
+            max_voxels (int, optional): Maximum number of voxels.
+                Defaults to 20000.
+        """
        point_cloud_range = np.array(point_cloud_range, dtype=np.float32)
        # [0, -40, -3, 70.4, 40, 1]
        voxel_size = np.array(voxel_size, dtype=np.float32)
@@ -55,24 +64,25 @@ def points_to_voxel(points,
    with jit and 3.2ghz cpu.(don't calculate other features)

    Args:
-        points: [N, ndim] float tensor. points[:, :3] contain xyz points and
+        points (np.ndarray): [N, ndim]. points[:, :3] contain xyz points and
            points[:, 3:] contain other information such as reflectivity.
-        voxel_size: [3] list/tuple or array, float. xyz, indicate voxel size
+        voxel_size (list, tuple, np.ndarray): [3] xyz, indicate voxel size
        coors_range: [6] list/tuple or array, float. indicate voxel range.
            format: xyzxyz, minmax
-        max_points: int. indicate maximum points contained in a voxel.
-        reverse_index: boolean. indicate whether return reversed coordinates.
+        max_points (int): Indicate maximum points contained in a voxel.
+        reverse_index (bool): Whether return reversed coordinates.
            if points has xyz format and reverse_index is True, output
            coordinates will be zyx format, but points in features always
            xyz format.
-        max_voxels: int. indicate maximum voxels this function create.
-            for second, 20000 is a good choice. you should shuffle points
-            before call this function because max_voxels may drop some points.
+        max_voxels (int): Maximum number of voxels this function create.
+            for second, 20000 is a good choice. Points should be shuffled for
+            randomness before this function because max_voxels drops points.

    Returns:
-        voxels: [M, max_points, ndim] float tensor. only contain points.
-        coordinates: [M, 3] int32 tensor.
-        num_points_per_voxel: [M] int32 tensor.
+        tuple[np.ndarray]:
+            voxels: [M, max_points, ndim] float tensor. only contain points.
+            coordinates: [M, 3] int32 tensor.
+            num_points_per_voxel: [M] int32 tensor.
    """
    if not isinstance(voxel_size, np.ndarray):
        voxel_size = np.array(voxel_size, dtype=points.dtype)

--- a/mmdet3d/datasets/kitti_dataset.py
+++ b/mmdet3d/datasets/kitti_dataset.py
@@ -9,7 +9,7 @@ import torch
 from mmcv.utils import print_log

 from mmdet.datasets import DATASETS
-from ..core.bbox import Box3DMode, CameraInstance3DBoxes
+from ..core.bbox import Box3DMode, CameraInstance3DBoxes, points_cam2img
 from .custom_3d import Custom3DDataset


@@ -463,7 +463,6 @@ class KittiDataset(Custom3DDataset):
                label_preds=np.zeros([0, 4]),
                sample_idx=sample_idx)

-        from mmdet3d.core.bbox import box_torch_ops
        rect = info['calib']['R0_rect'].astype(np.float32)
        Trv2c = info['calib']['Tr_velo_to_cam'].astype(np.float32)
        P2 = info['calib']['P2'].astype(np.float32)
@@ -473,7 +472,7 @@ class KittiDataset(Custom3DDataset):
        box_preds_camera = box_preds.convert_to(Box3DMode.CAM, rect @ Trv2c)

        box_corners = box_preds_camera.corners
-        box_corners_in_image = box_torch_ops.project_to_image(box_corners, P2)
+        box_corners_in_image = points_cam2img(box_corners, P2)
        # box_corners_in_image: [N, 8, 2]
        minxy = torch.min(box_corners_in_image, dim=1)[0]
        maxxy = torch.max(box_corners_in_image, dim=1)[0]

--- a/mmdet3d/models/dense_heads/anchor3d_head.py
+++ b/mmdet3d/models/dense_heads/anchor3d_head.py
@@ -3,9 +3,10 @@ import torch
 import torch.nn as nn
 from mmcv.cnn import bias_init_with_prob, normal_init

-from mmdet3d.core import (PseudoSampler, box3d_multiclass_nms, box_torch_ops,
-                          boxes3d_to_bev_torch_lidar, build_anchor_generator,
-                          build_assigner, build_bbox_coder, build_sampler)
+from mmdet3d.core import (PseudoSampler, box3d_multiclass_nms,
+                          build_anchor_generator, build_assigner,
+                          build_bbox_coder, build_sampler, limit_period,
+                          xywhr2xyxyr)
 from mmdet.core import multi_apply
 from mmdet.models import HEADS
 from ..builder import build_loss
@@ -447,7 +448,8 @@ class Anchor3DHead(nn.Module, AnchorTrainMixin):
            mlvl_dir_scores.append(dir_cls_score)

        mlvl_bboxes = torch.cat(mlvl_bboxes)
-        mlvl_bboxes_for_nms = boxes3d_to_bev_torch_lidar(mlvl_bboxes)
+        mlvl_bboxes_for_nms = xywhr2xyxyr(input_meta['box_type_3d'](
+            mlvl_bboxes, box_dim=self.box_code_size).bev)
        mlvl_scores = torch.cat(mlvl_scores)
        mlvl_dir_scores = torch.cat(mlvl_dir_scores)

@@ -462,8 +464,8 @@ class Anchor3DHead(nn.Module, AnchorTrainMixin):
                                       cfg, mlvl_dir_scores)
        bboxes, scores, labels, dir_scores = results
        if bboxes.shape[0] > 0:
-            dir_rot = box_torch_ops.limit_period(
-                bboxes[..., 6] - self.dir_offset, self.dir_limit_offset, np.pi)
+            dir_rot = limit_period(bboxes[..., 6] - self.dir_offset,
+                                   self.dir_limit_offset, np.pi)
            bboxes[..., 6] = (
                dir_rot + self.dir_offset +
                np.pi * dir_scores.to(bboxes.dtype))

--- a/mmdet3d/models/dense_heads/parta2_rpn_head.py
+++ b/mmdet3d/models/dense_heads/parta2_rpn_head.py
@@ -3,7 +3,7 @@ from __future__ import division
 import numpy as np
 import torch

-from mmdet3d.core import box_torch_ops, boxes3d_to_bev_torch_lidar
+from mmdet3d.core import limit_period, xywhr2xyxyr
 from mmdet3d.ops.iou3d.iou3d_utils import nms_gpu, nms_normal_gpu
 from mmdet.models import HEADS
 from .anchor3d_head import Anchor3DHead
@@ -172,7 +172,8 @@ class PartA2RPNHead(Anchor3DHead):
            mlvl_dir_scores.append(dir_cls_score)

        mlvl_bboxes = torch.cat(mlvl_bboxes)
-        mlvl_bboxes_for_nms = boxes3d_to_bev_torch_lidar(mlvl_bboxes)
+        mlvl_bboxes_for_nms = xywhr2xyxyr(input_meta['box_type_3d'](
+            mlvl_bboxes, box_dim=self.box_code_size).bev)
        mlvl_max_scores = torch.cat(mlvl_max_scores)
        mlvl_label_pred = torch.cat(mlvl_label_pred)
        mlvl_dir_scores = torch.cat(mlvl_dir_scores)
@@ -246,9 +247,8 @@ class PartA2RPNHead(Anchor3DHead):
            labels.append(_mlvl_label_pred[selected])
            cls_scores.append(_mlvl_cls_score[selected])
            dir_scores.append(_mlvl_dir_scores[selected])
-            dir_rot = box_torch_ops.limit_period(
-                bboxes[-1][..., 6] - self.dir_offset, self.dir_limit_offset,
-                np.pi)
+            dir_rot = limit_period(bboxes[-1][..., 6] - self.dir_offset,
+                                   self.dir_limit_offset, np.pi)
            bboxes[-1][..., 6] = (
                dir_rot + self.dir_offset +
                np.pi * dir_scores[-1].to(bboxes[-1].dtype))

--- a/mmdet3d/models/dense_heads/train_mixins.py
+++ b/mmdet3d/models/dense_heads/train_mixins.py
 import numpy as np
 import torch

-from mmdet3d.core import box_torch_ops
+from mmdet3d.core import limit_period
 from mmdet.core import images_to_levels, multi_apply


@@ -270,7 +270,7 @@ def get_direction_target(anchors,
        torch.Tensor: Encoded direction targets.
    """
    rot_gt = reg_targets[..., 6] + anchors[..., 6]
-    offset_rot = box_torch_ops.limit_period(rot_gt - dir_offset, 0, 2 * np.pi)
+    offset_rot = limit_period(rot_gt - dir_offset, 0, 2 * np.pi)
    dir_cls_targets = torch.floor(offset_rot / (2 * np.pi / num_bins)).long()
    dir_cls_targets = torch.clamp(dir_cls_targets, min=0, max=num_bins - 1)
    if one_hot:

--- a/mmdet3d/models/roi_heads/bbox_heads/parta2_bbox_head.py
+++ b/mmdet3d/models/roi_heads/bbox_heads/parta2_bbox_head.py
@@ -4,12 +4,12 @@ import torch.nn as nn
 from mmcv.cnn import ConvModule, normal_init, xavier_init

 import mmdet3d.ops.spconv as spconv
-from mmdet3d.core import build_bbox_coder
-from mmdet3d.core.bbox import box_torch_ops
+from mmdet3d.core import build_bbox_coder, xywhr2xyxyr
+from mmdet3d.core.bbox.structures import (LiDARInstance3DBoxes,
+                                          rotation_3d_in_axis)
 from mmdet3d.models.builder import build_loss
 from mmdet3d.ops import make_sparse_convmodule
-from mmdet3d.ops.iou3d.iou3d_utils import (boxes3d_to_bev_torch_lidar, nms_gpu,
-                                           nms_normal_gpu)
+from mmdet3d.ops.iou3d.iou3d_utils import nms_gpu, nms_normal_gpu
 from mmdet.core import multi_apply
 from mmdet.models import HEADS

@@ -335,7 +335,7 @@ class PartA2BboxHead(nn.Module):
                    batch_anchors,
                    pos_bbox_pred.view(-1, code_size)).view(-1, code_size)

-                pred_boxes3d[..., 0:3] = box_torch_ops.rotation_3d_in_axis(
+                pred_boxes3d[..., 0:3] = rotation_3d_in_axis(
                    pred_boxes3d[..., 0:3].unsqueeze(1),
                    (pos_rois_rotation + np.pi / 2),
                    axis=2).squeeze(1)
@@ -412,7 +412,7 @@ class PartA2BboxHead(nn.Module):
            # canonical transformation
            pos_gt_bboxes_ct[..., 0:3] -= roi_center
            pos_gt_bboxes_ct[..., 6] -= roi_ry
-            pos_gt_bboxes_ct[..., 0:3] = box_torch_ops.rotation_3d_in_axis(
+            pos_gt_bboxes_ct[..., 0:3] = rotation_3d_in_axis(
                pos_gt_bboxes_ct[..., 0:3].unsqueeze(1),
                -(roi_ry + np.pi / 2),
                axis=2).squeeze(1)
@@ -451,15 +451,17 @@ class PartA2BboxHead(nn.Module):
        """
        assert pred_bbox3d.shape[0] == gt_bbox3d.shape[0]

-        pred_box_corners = box_torch_ops.boxes3d_to_corners3d_lidar_torch(
-            pred_bbox3d)
-        gt_box_corners = box_torch_ops.boxes3d_to_corners3d_lidar_torch(
-            gt_bbox3d)
+        # This is a little bit hack here because we assume the box for
+        # Part-A2 is in LiDAR coordinates
+        gt_boxes_structure = LiDARInstance3DBoxes(gt_bbox3d)
+        pred_box_corners = LiDARInstance3DBoxes(pred_bbox3d).corners
+        gt_box_corners = gt_boxes_structure.corners
+
+        # This flip only changes the heading direction of GT boxes
+        gt_bbox3d_flip = gt_boxes_structure.clone()
+        gt_bbox3d_flip.tensor[:, 6] += np.pi
+        gt_box_corners_flip = gt_bbox3d_flip.corners

-        gt_bbox3d_flip = gt_bbox3d.clone()
-        gt_bbox3d_flip[:, 6] += np.pi
-        gt_box_corners_flip = box_torch_ops.boxes3d_to_corners3d_lidar_torch(
-            gt_bbox3d_flip)
        corner_dist = torch.min(
            torch.norm(pred_box_corners - gt_box_corners, dim=2),
            torch.norm(pred_box_corners - gt_box_corners_flip,
@@ -504,7 +506,7 @@ class PartA2BboxHead(nn.Module):
        local_roi_boxes = roi_boxes.clone().detach()
        local_roi_boxes[..., 0:3] = 0
        rcnn_boxes3d = self.bbox_coder.decode(local_roi_boxes, bbox_pred)
-        rcnn_boxes3d[..., 0:3] = box_torch_ops.rotation_3d_in_axis(
+        rcnn_boxes3d[..., 0:3] = rotation_3d_in_axis(
            rcnn_boxes3d[..., 0:3].unsqueeze(1), (roi_ry + np.pi / 2),
            axis=2).squeeze(1)
        rcnn_boxes3d[:, 0:3] += roi_xyz
@@ -519,6 +521,7 @@ class PartA2BboxHead(nn.Module):
            cur_rcnn_boxes3d = rcnn_boxes3d[roi_batch_id == batch_id]
            selected = self.multi_class_nms(cur_box_prob, cur_rcnn_boxes3d,
                                            cfg.score_thr, cfg.nms_thr,
+                                            img_metas[batch_id],
                                            cfg.use_rotate_nms)
            selected_bboxes = cur_rcnn_boxes3d[selected]
            selected_label_preds = cur_class_labels[selected]
@@ -535,6 +538,7 @@ class PartA2BboxHead(nn.Module):
                        box_preds,
                        score_thr,
                        nms_thr,
+                        input_meta,
                        use_rotate_nms=True):
        if use_rotate_nms:
            nms_func = nms_gpu
@@ -545,7 +549,8 @@ class PartA2BboxHead(nn.Module):
            1] == self.num_classes, f'box_probs shape: {str(box_probs.shape)}'
        selected_list = []
        selected_labels = []
-        boxes_for_nms = boxes3d_to_bev_torch_lidar(box_preds)
+        boxes_for_nms = xywhr2xyxyr(input_meta['box_type_3d'](
+            box_preds, self.bbox_coder.code_size).bev)

        score_thresh = score_thr if isinstance(
            score_thr, list) else [score_thr for x in range(self.num_classes)]

--- a/mmdet3d/models/roi_heads/mask_heads/pointwise_semantic_head.py
+++ b/mmdet3d/models/roi_heads/mask_heads/pointwise_semantic_head.py
@@ -2,7 +2,7 @@ import torch
 import torch.nn as nn
 import torch.nn.functional as F

-from mmdet3d.core.bbox import box_torch_ops
+from mmdet3d.core.bbox.structures import rotation_3d_in_axis
 from mmdet3d.models.builder import build_loss
 from mmdet.core import multi_apply
 from mmdet.models import HEADS
@@ -109,7 +109,7 @@ class PointwiseSemanticHead(nn.Module):
                continue
            fg_voxels = voxel_centers[k_box_flag]
            transformed_voxels = fg_voxels - gt_bboxes_3d.bottom_center[k]
-            transformed_voxels = box_torch_ops.rotation_3d_in_axis(
+            transformed_voxels = rotation_3d_in_axis(
                transformed_voxels.unsqueeze(0),
                -gt_bboxes_3d.yaw[k].view(1),
                axis=2)

--- a/mmdet3d/ops/iou3d/__init__.py
+++ b/mmdet3d/ops/iou3d/__init__.py
-from .iou3d_utils import (boxes_iou3d_gpu_camera, boxes_iou3d_gpu_lidar,
-                          boxes_iou_bev, nms_gpu, nms_normal_gpu)
+from .iou3d_utils import boxes_iou_bev, nms_gpu, nms_normal_gpu

-__all__ = [
-    'boxes_iou_bev', 'boxes_iou3d_gpu_camera', 'nms_gpu', 'nms_normal_gpu',
-    'boxes_iou3d_gpu_lidar'
-]
+__all__ = ['boxes_iou_bev', 'nms_gpu', 'nms_normal_gpu']
--- a/mmdet3d/ops/iou3d/iou3d_utils.py
+++ b/mmdet3d/ops/iou3d/iou3d_utils.py
@@ -20,102 +20,6 @@ def boxes_iou_bev(boxes_a, boxes_b):
    return ans_iou


-def boxes_iou3d_gpu_camera(boxes_a, boxes_b, mode='iou'):
-    """Calculate 3d iou of boxes in camera coordinate
-
-    Args:
-        boxes_a (FloatTensor): (N, 7) [x, y, z, h, w, l, ry]
-            in LiDAR coordinate
-        boxes_b (FloatTensor): (M, 7) [x, y, z, h, w, l, ry]
-        mode (str): "iou" (intersection over union) or iof (intersection over
-            foreground).
-
-    Returns:
-        FloatTensor: (M, N)
-    """
-
-    boxes_a_bev = boxes3d_to_bev_torch_camera(boxes_a)
-    boxes_b_bev = boxes3d_to_bev_torch_camera(boxes_b)
-
-    # bev overlap
-    overlaps_bev = torch.cuda.FloatTensor(
-        torch.Size((boxes_a.shape[0], boxes_b.shape[0]))).zero_()  # (N, M)
-    iou3d_cuda.boxes_overlap_bev_gpu(boxes_a_bev.contiguous(),
-                                     boxes_b_bev.contiguous(), overlaps_bev)
-
-    # height overlap
-    boxes_a_height_min = (boxes_a[:, 1] - boxes_a[:, 3]).view(-1, 1)
-    boxes_a_height_max = boxes_a[:, 1].view(-1, 1)
-    boxes_b_height_min = (boxes_b[:, 1] - boxes_b[:, 3]).view(1, -1)
-    boxes_b_height_max = boxes_b[:, 1].view(1, -1)
-
-    max_of_min = torch.max(boxes_a_height_min, boxes_b_height_min)
-    min_of_max = torch.min(boxes_a_height_max, boxes_b_height_max)
-    overlaps_h = torch.clamp(min_of_max - max_of_min, min=0)
-
-    # 3d iou
-    overlaps_3d = overlaps_bev * overlaps_h
-
-    volume_a = (boxes_a[:, 3] * boxes_a[:, 4] * boxes_a[:, 5]).view(-1, 1)
-    volume_b = (boxes_b[:, 3] * boxes_b[:, 4] * boxes_b[:, 5]).view(1, -1)
-
-    if mode == 'iou':
-        # the clamp func is used to avoid division of 0
-        iou3d = overlaps_3d / torch.clamp(
-            volume_a + volume_b - overlaps_3d, min=1e-8)
-    else:
-        iou3d = overlaps_3d / torch.clamp(volume_a, min=1e-8)
-
-    return iou3d
-
-
-def boxes_iou3d_gpu_lidar(boxes_a, boxes_b, mode='iou'):
-    """Calculate 3d iou of boxes in lidar coordinate
-
-    Args:
-        boxes_a (FloatTensor): (N, 7) [x, y, z, w, l, h, ry]
-            in LiDAR coordinate
-        boxes_b (FloatTensor): (M, 7) [x, y, z, w, l, h, ry]
-        mode (str): "iou" (intersection over union) or iof (intersection over
-            foreground).
-
-    :Returns:
-        FloatTensor: (M, N)
-    """
-    boxes_a_bev = boxes3d_to_bev_torch_lidar(boxes_a)
-    boxes_b_bev = boxes3d_to_bev_torch_lidar(boxes_b)
-    # height overlap
-    boxes_a_height_max = (boxes_a[:, 2] + boxes_a[:, 5]).view(-1, 1)
-    boxes_a_height_min = boxes_a[:, 2].view(-1, 1)
-    boxes_b_height_max = (boxes_b[:, 2] + boxes_b[:, 5]).view(1, -1)
-    boxes_b_height_min = boxes_b[:, 2].view(1, -1)
-
-    # bev overlap
-    overlaps_bev = boxes_a.new_zeros(
-        torch.Size((boxes_a.shape[0], boxes_b.shape[0])))  # (N, M)
-    iou3d_cuda.boxes_overlap_bev_gpu(boxes_a_bev.contiguous(),
-                                     boxes_b_bev.contiguous(), overlaps_bev)
-
-    max_of_min = torch.max(boxes_a_height_min, boxes_b_height_min)
-    min_of_max = torch.min(boxes_a_height_max, boxes_b_height_max)
-    overlaps_h = torch.clamp(min_of_max - max_of_min, min=0)
-
-    # 3d iou
-    overlaps_3d = overlaps_bev * overlaps_h
-
-    volume_a = (boxes_a[:, 3] * boxes_a[:, 4] * boxes_a[:, 5]).view(-1, 1)
-    volume_b = (boxes_b[:, 3] * boxes_b[:, 4] * boxes_b[:, 5]).view(1, -1)
-
-    if mode == 'iou':
-        # the clamp func is used to avoid division of 0
-        iou3d = overlaps_3d / torch.clamp(
-            volume_a + volume_b - overlaps_3d, min=1e-8)
-    else:
-        iou3d = overlaps_3d / torch.clamp(volume_a, min=1e-8)
-
-    return iou3d
-
-
 def nms_gpu(boxes, scores, thresh):
    """
    :param boxes: (N, 5) [x1, y1, x2, y2, ry]
@@ -148,41 +52,3 @@ def nms_normal_gpu(boxes, scores, thresh):
    keep = torch.LongTensor(boxes.size(0))
    num_out = iou3d_cuda.nms_normal_gpu(boxes, keep, thresh)
    return order[keep[:num_out].cuda()].contiguous()
-
-
-def boxes3d_to_bev_torch_camera(boxes3d):
-    """covert boxes3d to bev in in camera coords
-
-    Args:
-        boxes3d (FloartTensor): (N, 7) [x, y, z, h, w, l, ry] in camera coords
-
-    Return:
-        FloartTensor: (N, 5) [x1, y1, x2, y2, ry]
-    """
-    boxes_bev = boxes3d.new(torch.Size((boxes3d.shape[0], 5)))
-
-    cu, cv = boxes3d[:, 0], boxes3d[:, 2]
-    half_l, half_w = boxes3d[:, 5] / 2, boxes3d[:, 4] / 2
-    boxes_bev[:, 0], boxes_bev[:, 1] = cu - half_l, cv - half_w
-    boxes_bev[:, 2], boxes_bev[:, 3] = cu + half_l, cv + half_w
-    boxes_bev[:, 4] = boxes3d[:, 6]
-    return boxes_bev
-
-
-def boxes3d_to_bev_torch_lidar(boxes3d):
-    """covert boxes3d to bev in in LiDAR coords
-
-    Args:
-        boxes3d (FloartTensor): (N, 7) [x, y, z, w, l, h, ry] in LiDAR coords
-
-    Returns:
-        FloartTensor: (N, 5) [x1, y1, x2, y2, ry]
-    """
-    boxes_bev = boxes3d.new(torch.Size((boxes3d.shape[0], 5)))
-
-    x, y = boxes3d[:, 0], boxes3d[:, 1]
-    half_l, half_w = boxes3d[:, 4] / 2, boxes3d[:, 3] / 2
-    boxes_bev[:, 0], boxes_bev[:, 1] = x - half_w, y - half_l
-    boxes_bev[:, 2], boxes_bev[:, 3] = x + half_w, y + half_l
-    boxes_bev[:, 4] = boxes3d[:, 6]
-    return boxes_bev
--- a/mmdet3d/ops/iou3d/setup.py
+++ b/mmdet3d/ops/iou3d/setup.py
-from setuptools import setup
-
-from torch.utils.cpp_extension import BuildExtension, CUDAExtension
-
-setup(
-    name='iou3d',
-    ext_modules=[
-        CUDAExtension(
-            'iou3d_cuda', [
-                'src/iou3d.cpp',
-                'src/iou3d_kernel.cu',
-            ],
-            extra_compile_args={
-                'cxx': ['-g', '-I /usr/local/cuda/include'],
-                'nvcc': ['-O2']
-            })
-    ],
-    cmdclass={'build_ext': BuildExtension})
--- a/mmdet3d/ops/roiaware_pool3d/points_in_boxes.py
+++ b/mmdet3d/ops/roiaware_pool3d/points_in_boxes.py
@@ -31,7 +31,8 @@ def points_in_boxes_gpu(points, boxes):
 def points_in_boxes_cpu(points, boxes):
    """Find points that are in boxes (CPU)

-    Note: Currently, the output of this function is different from that of
+    Note:
+        Currently, the output of this function is different from that of
        points_in_boxes_gpu.

    Args:

--- a/mmdet3d/utils/__init__.py
+++ b/mmdet3d/utils/__init__.py
 from mmcv.utils import Registry, build_from_cfg, print_log

-from mmdet.utils import get_model_complexity_info
+from mmdet.utils import get_model_complexity_info, get_root_logger
 from .collect_env import collect_env
-from .logger import get_root_logger

 __all__ = [
    'Registry', 'build_from_cfg', 'get_model_complexity_info',