将子模块转换为普通目录

ba3cd005 · 雍大凯 · d2b71343 · ba3cd005 · ba3cd005 · ba3cd005
Commit ba3cd005 authored Apr 08, 2026 by 雍大凯
18 changed files
--- a/docker-hub/FlashOCC/Flashocc/mmdetection3d/mmdet3d/core/utils/gaussian.py
+++ b/docker-hub/FlashOCC/Flashocc/mmdetection3d/mmdet3d/core/utils/gaussian.py
+# Copyright (c) OpenMMLab. All rights reserved.
+import numpy as np
+import torch
+
+
+def gaussian_2d(shape, sigma=1):
+    """Generate gaussian map.
+
+    Args:
+        shape (list[int]): Shape of the map.
+        sigma (float, optional): Sigma to generate gaussian map.
+            Defaults to 1.
+
+    Returns:
+        np.ndarray: Generated gaussian map.
+    """
+    m, n = [(ss - 1.) / 2. for ss in shape]
+    y, x = np.ogrid[-m:m + 1, -n:n + 1]
+
+    h = np.exp(-(x * x + y * y) / (2 * sigma * sigma))
+    h[h < np.finfo(h.dtype).eps * h.max()] = 0
+    return h
+
+
+def draw_heatmap_gaussian(heatmap, center, radius, k=1):
+    """Get gaussian masked heatmap.
+
+    Args:
+        heatmap (torch.Tensor): Heatmap to be masked.
+        center (torch.Tensor): Center coord of the heatmap.
+        radius (int): Radius of gaussian.
+        K (int, optional): Multiple of masked_gaussian. Defaults to 1.
+
+    Returns:
+        torch.Tensor: Masked heatmap.
+    """
+    diameter = 2 * radius + 1
+    gaussian = gaussian_2d((diameter, diameter), sigma=diameter / 6)
+
+    x, y = int(center[0]), int(center[1])
+
+    height, width = heatmap.shape[0:2]
+
+    left, right = min(x, radius), min(width - x, radius + 1)
+    top, bottom = min(y, radius), min(height - y, radius + 1)
+
+    masked_heatmap = heatmap[y - top:y + bottom, x - left:x + right]
+    masked_gaussian = torch.from_numpy(
+        gaussian[radius - top:radius + bottom,
+                 radius - left:radius + right]).to(heatmap.device,
+                                                   torch.float32)
+    if min(masked_gaussian.shape) > 0 and min(masked_heatmap.shape) > 0:
+        torch.max(masked_heatmap, masked_gaussian * k, out=masked_heatmap)
+    return heatmap
+
+
+def gaussian_radius(det_size, min_overlap=0.5):
+    """Get radius of gaussian.
+
+    Args:
+        det_size (tuple[torch.Tensor]): Size of the detection result.
+        min_overlap (float, optional): Gaussian_overlap. Defaults to 0.5.
+
+    Returns:
+        torch.Tensor: Computed radius.
+    """
+    height, width = det_size
+
+    a1 = 1
+    b1 = (height + width)
+    c1 = width * height * (1 - min_overlap) / (1 + min_overlap)
+    sq1 = torch.sqrt(b1**2 - 4 * a1 * c1)
+    r1 = (b1 + sq1) / 2
+
+    a2 = 4
+    b2 = 2 * (height + width)
+    c2 = (1 - min_overlap) * width * height
+    sq2 = torch.sqrt(b2**2 - 4 * a2 * c2)
+    r2 = (b2 + sq2) / 2
+
+    a3 = 4 * min_overlap
+    b3 = -2 * min_overlap * (height + width)
+    c3 = (min_overlap - 1) * width * height
+    sq3 = torch.sqrt(b3**2 - 4 * a3 * c3)
+    r3 = (b3 + sq3) / 2
+    return min(r1, r2, r3)
+
+
+def get_ellip_gaussian_2D(heatmap, center, radius_x, radius_y, k=1):
+    """Generate 2D ellipse gaussian heatmap.
+
+    Args:
+        heatmap (Tensor): Input heatmap, the gaussian kernel will cover on
+            it and maintain the max value.
+        center (list[int]): Coord of gaussian kernel's center.
+        radius_x (int): X-axis radius of gaussian kernel.
+        radius_y (int): Y-axis radius of gaussian kernel.
+        k (int, optional): Coefficient of gaussian kernel. Default: 1.
+
+    Returns:
+        out_heatmap (Tensor): Updated heatmap covered by gaussian kernel.
+    """
+    diameter_x, diameter_y = 2 * radius_x + 1, 2 * radius_y + 1
+    gaussian_kernel = ellip_gaussian2D((radius_x, radius_y),
+                                       sigma_x=diameter_x / 6,
+                                       sigma_y=diameter_y / 6,
+                                       dtype=heatmap.dtype,
+                                       device=heatmap.device)
+
+    x, y = int(center[0]), int(center[1])
+    height, width = heatmap.shape[0:2]
+
+    left, right = min(x, radius_x), min(width - x, radius_x + 1)
+    top, bottom = min(y, radius_y), min(height - y, radius_y + 1)
+
+    masked_heatmap = heatmap[y - top:y + bottom, x - left:x + right]
+    masked_gaussian = gaussian_kernel[radius_y - top:radius_y + bottom,
+                                      radius_x - left:radius_x + right]
+    out_heatmap = heatmap
+    torch.max(
+        masked_heatmap,
+        masked_gaussian * k,
+        out=out_heatmap[y - top:y + bottom, x - left:x + right])
+
+    return out_heatmap
+
+
+def ellip_gaussian2D(radius,
+                     sigma_x,
+                     sigma_y,
+                     dtype=torch.float32,
+                     device='cpu'):
+    """Generate 2D ellipse gaussian kernel.
+
+    Args:
+        radius (tuple(int)): Ellipse radius (radius_x, radius_y) of gaussian
+            kernel.
+        sigma_x (int): X-axis sigma of gaussian function.
+        sigma_y (int): Y-axis sigma of gaussian function.
+        dtype (torch.dtype, optional): Dtype of gaussian tensor.
+            Default: torch.float32.
+        device (str, optional): Device of gaussian tensor.
+            Default: 'cpu'.
+
+    Returns:
+        h (Tensor): Gaussian kernel with a
+            ``(2 * radius_y + 1) * (2 * radius_x + 1)`` shape.
+    """
+    x = torch.arange(
+        -radius[0], radius[0] + 1, dtype=dtype, device=device).view(1, -1)
+    y = torch.arange(
+        -radius[1], radius[1] + 1, dtype=dtype, device=device).view(-1, 1)
+
+    h = (-(x * x) / (2 * sigma_x * sigma_x) - (y * y) /
+         (2 * sigma_y * sigma_y)).exp()
+    h[h < torch.finfo(h.dtype).eps * h.max()] = 0
+
+    return h
--- a/docker-hub/FlashOCC/Flashocc/mmdetection3d/mmdet3d/core/visualizer/__init__.py
+++ b/docker-hub/FlashOCC/Flashocc/mmdetection3d/mmdet3d/core/visualizer/__init__.py
+# Copyright (c) OpenMMLab. All rights reserved.
+from .show_result import (show_multi_modality_result, show_result,
+                          show_seg_result)
+
+__all__ = ['show_result', 'show_seg_result', 'show_multi_modality_result']
--- a/docker-hub/FlashOCC/Flashocc/mmdetection3d/mmdet3d/core/visualizer/image_vis.py
+++ b/docker-hub/FlashOCC/Flashocc/mmdetection3d/mmdet3d/core/visualizer/image_vis.py
+# Copyright (c) OpenMMLab. All rights reserved.
+import copy
+
+import cv2
+import numpy as np
+import torch
+from matplotlib import pyplot as plt
+
+
+def project_pts_on_img(points,
+                       raw_img,
+                       lidar2img_rt,
+                       max_distance=70,
+                       thickness=-1):
+    """Project the 3D points cloud on 2D image.
+
+    Args:
+        points (numpy.array): 3D points cloud (x, y, z) to visualize.
+        raw_img (numpy.array): The numpy array of image.
+        lidar2img_rt (numpy.array, shape=[4, 4]): The projection matrix
+            according to the camera intrinsic parameters.
+        max_distance (float, optional): the max distance of the points cloud.
+            Default: 70.
+        thickness (int, optional): The thickness of 2D points. Default: -1.
+    """
+    img = raw_img.copy()
+    num_points = points.shape[0]
+    pts_4d = np.concatenate([points[:, :3], np.ones((num_points, 1))], axis=-1)
+    pts_2d = pts_4d @ lidar2img_rt.T
+
+    # cam_points is Tensor of Nx4 whose last column is 1
+    # transform camera coordinate to image coordinate
+    pts_2d[:, 2] = np.clip(pts_2d[:, 2], a_min=1e-5, a_max=99999)
+    pts_2d[:, 0] /= pts_2d[:, 2]
+    pts_2d[:, 1] /= pts_2d[:, 2]
+
+    fov_inds = ((pts_2d[:, 0] < img.shape[1])
+                & (pts_2d[:, 0] >= 0)
+                & (pts_2d[:, 1] < img.shape[0])
+                & (pts_2d[:, 1] >= 0))
+
+    imgfov_pts_2d = pts_2d[fov_inds, :3]  # u, v, d
+
+    cmap = plt.cm.get_cmap('hsv', 256)
+    cmap = np.array([cmap(i) for i in range(256)])[:, :3] * 255
+    for i in range(imgfov_pts_2d.shape[0]):
+        depth = imgfov_pts_2d[i, 2]
+        color = cmap[np.clip(int(max_distance * 10 / depth), 0, 255), :]
+        cv2.circle(
+            img,
+            center=(int(np.round(imgfov_pts_2d[i, 0])),
+                    int(np.round(imgfov_pts_2d[i, 1]))),
+            radius=1,
+            color=tuple(color),
+            thickness=thickness,
+        )
+    cv2.imshow('project_pts_img', img.astype(np.uint8))
+    cv2.waitKey(100)
+
+
+def plot_rect3d_on_img(img,
+                       num_rects,
+                       rect_corners,
+                       color=(0, 255, 0),
+                       thickness=1):
+    """Plot the boundary lines of 3D rectangular on 2D images.
+
+    Args:
+        img (numpy.array): The numpy array of image.
+        num_rects (int): Number of 3D rectangulars.
+        rect_corners (numpy.array): Coordinates of the corners of 3D
+            rectangulars. Should be in the shape of [num_rect, 8, 2].
+        color (tuple[int], optional): The color to draw bboxes.
+            Default: (0, 255, 0).
+        thickness (int, optional): The thickness of bboxes. Default: 1.
+    """
+    line_indices = ((0, 1), (0, 3), (0, 4), (1, 2), (1, 5), (3, 2), (3, 7),
+                    (4, 5), (4, 7), (2, 6), (5, 6), (6, 7))
+    for i in range(num_rects):
+        corners = rect_corners[i].astype(np.int)
+        for start, end in line_indices:
+            cv2.line(img, (corners[start, 0], corners[start, 1]),
+                     (corners[end, 0], corners[end, 1]), color, thickness,
+                     cv2.LINE_AA)
+
+    return img.astype(np.uint8)
+
+
+def draw_lidar_bbox3d_on_img(bboxes3d,
+                             raw_img,
+                             lidar2img_rt,
+                             img_metas,
+                             color=(0, 255, 0),
+                             thickness=1):
+    """Project the 3D bbox on 2D plane and draw on input image.
+
+    Args:
+        bboxes3d (:obj:`LiDARInstance3DBoxes`):
+            3d bbox in lidar coordinate system to visualize.
+        raw_img (numpy.array): The numpy array of image.
+        lidar2img_rt (numpy.array, shape=[4, 4]): The projection matrix
+            according to the camera intrinsic parameters.
+        img_metas (dict): Useless here.
+        color (tuple[int], optional): The color to draw bboxes.
+            Default: (0, 255, 0).
+        thickness (int, optional): The thickness of bboxes. Default: 1.
+    """
+    img = raw_img.copy()
+    corners_3d = bboxes3d.corners
+    num_bbox = corners_3d.shape[0]
+    pts_4d = np.concatenate(
+        [corners_3d.reshape(-1, 3),
+         np.ones((num_bbox * 8, 1))], axis=-1)
+    lidar2img_rt = copy.deepcopy(lidar2img_rt).reshape(4, 4)
+    if isinstance(lidar2img_rt, torch.Tensor):
+        lidar2img_rt = lidar2img_rt.cpu().numpy()
+    pts_2d = pts_4d @ lidar2img_rt.T
+
+    pts_2d[:, 2] = np.clip(pts_2d[:, 2], a_min=1e-5, a_max=1e5)
+    pts_2d[:, 0] /= pts_2d[:, 2]
+    pts_2d[:, 1] /= pts_2d[:, 2]
+    imgfov_pts_2d = pts_2d[..., :2].reshape(num_bbox, 8, 2)
+
+    return plot_rect3d_on_img(img, num_bbox, imgfov_pts_2d, color, thickness)
+
+
+# TODO: remove third parameter in all functions here in favour of img_metas
+def draw_depth_bbox3d_on_img(bboxes3d,
+                             raw_img,
+                             calibs,
+                             img_metas,
+                             color=(0, 255, 0),
+                             thickness=1):
+    """Project the 3D bbox on 2D plane and draw on input image.
+
+    Args:
+        bboxes3d (:obj:`DepthInstance3DBoxes`, shape=[M, 7]):
+            3d bbox in depth coordinate system to visualize.
+        raw_img (numpy.array): The numpy array of image.
+        calibs (dict): Camera calibration information, Rt and K.
+        img_metas (dict): Used in coordinates transformation.
+        color (tuple[int], optional): The color to draw bboxes.
+            Default: (0, 255, 0).
+        thickness (int, optional): The thickness of bboxes. Default: 1.
+    """
+    from mmdet3d.core.bbox import points_cam2img
+    from mmdet3d.models import apply_3d_transformation
+
+    img = raw_img.copy()
+    img_metas = copy.deepcopy(img_metas)
+    corners_3d = bboxes3d.corners
+    num_bbox = corners_3d.shape[0]
+    points_3d = corners_3d.reshape(-1, 3)
+
+    # first reverse the data transformations
+    xyz_depth = apply_3d_transformation(
+        points_3d, 'DEPTH', img_metas, reverse=True)
+
+    # project to 2d to get image coords (uv)
+    uv_origin = points_cam2img(xyz_depth,
+                               xyz_depth.new_tensor(img_metas['depth2img']))
+    uv_origin = (uv_origin - 1).round()
+    imgfov_pts_2d = uv_origin[..., :2].reshape(num_bbox, 8, 2).numpy()
+
+    return plot_rect3d_on_img(img, num_bbox, imgfov_pts_2d, color, thickness)
+
+
+def draw_camera_bbox3d_on_img(bboxes3d,
+                              raw_img,
+                              cam2img,
+                              img_metas,
+                              color=(0, 255, 0),
+                              thickness=1):
+    """Project the 3D bbox on 2D plane and draw on input image.
+
+    Args:
+        bboxes3d (:obj:`CameraInstance3DBoxes`, shape=[M, 7]):
+            3d bbox in camera coordinate system to visualize.
+        raw_img (numpy.array): The numpy array of image.
+        cam2img (dict): Camera intrinsic matrix,
+            denoted as `K` in depth bbox coordinate system.
+        img_metas (dict): Useless here.
+        color (tuple[int], optional): The color to draw bboxes.
+            Default: (0, 255, 0).
+        thickness (int, optional): The thickness of bboxes. Default: 1.
+    """
+    from mmdet3d.core.bbox import points_cam2img
+
+    img = raw_img.copy()
+    cam2img = copy.deepcopy(cam2img)
+    corners_3d = bboxes3d.corners
+    num_bbox = corners_3d.shape[0]
+    points_3d = corners_3d.reshape(-1, 3)
+    if not isinstance(cam2img, torch.Tensor):
+        cam2img = torch.from_numpy(np.array(cam2img))
+
+    assert (cam2img.shape == torch.Size([3, 3])
+            or cam2img.shape == torch.Size([4, 4]))
+    cam2img = cam2img.float().cpu()
+
+    # project to 2d to get image coords (uv)
+    uv_origin = points_cam2img(points_3d, cam2img)
+    uv_origin = (uv_origin - 1).round()
+    imgfov_pts_2d = uv_origin[..., :2].reshape(num_bbox, 8, 2).numpy()
+
+    return plot_rect3d_on_img(img, num_bbox, imgfov_pts_2d, color, thickness)
--- a/docker-hub/FlashOCC/Flashocc/mmdetection3d/mmdet3d/core/visualizer/open3d_vis.py
+++ b/docker-hub/FlashOCC/Flashocc/mmdetection3d/mmdet3d/core/visualizer/open3d_vis.py
+# Copyright (c) OpenMMLab. All rights reserved.
+import copy
+
+import numpy as np
+import torch
+
+try:
+    import open3d as o3d
+    from open3d import geometry
+except ImportError:
+    raise ImportError(
+        'Please run "pip install open3d" to install open3d first.')
+
+
+def _draw_points(points,
+                 vis,
+                 points_size=2,
+                 point_color=(0.5, 0.5, 0.5),
+                 mode='xyz'):
+    """Draw points on visualizer.
+
+    Args:
+        points (numpy.array | torch.tensor, shape=[N, 3+C]):
+            points to visualize.
+        vis (:obj:`open3d.visualization.Visualizer`): open3d visualizer.
+        points_size (int, optional): the size of points to show on visualizer.
+            Default: 2.
+        point_color (tuple[float], optional): the color of points.
+            Default: (0.5, 0.5, 0.5).
+        mode (str, optional):  indicate type of the input points,
+            available mode ['xyz', 'xyzrgb']. Default: 'xyz'.
+
+    Returns:
+        tuple: points, color of each point.
+    """
+    vis.get_render_option().point_size = points_size  # set points size
+    if isinstance(points, torch.Tensor):
+        points = points.cpu().numpy()
+
+    points = points.copy()
+    pcd = geometry.PointCloud()
+    if mode == 'xyz':
+        pcd.points = o3d.utility.Vector3dVector(points[:, :3])
+        points_colors = np.tile(np.array(point_color), (points.shape[0], 1))
+    elif mode == 'xyzrgb':
+        pcd.points = o3d.utility.Vector3dVector(points[:, :3])
+        points_colors = points[:, 3:6]
+        # normalize to [0, 1] for open3d drawing
+        if not ((points_colors >= 0.0) & (points_colors <= 1.0)).all():
+            points_colors /= 255.0
+    else:
+        raise NotImplementedError
+
+    pcd.colors = o3d.utility.Vector3dVector(points_colors)
+    vis.add_geometry(pcd)
+
+    return pcd, points_colors
+
+
+def _draw_bboxes(bbox3d,
+                 vis,
+                 points_colors,
+                 pcd=None,
+                 bbox_color=(0, 1, 0),
+                 points_in_box_color=(1, 0, 0),
+                 rot_axis=2,
+                 center_mode='lidar_bottom',
+                 mode='xyz'):
+    """Draw bbox on visualizer and change the color of points inside bbox3d.
+
+    Args:
+        bbox3d (numpy.array | torch.tensor, shape=[M, 7]):
+            3d bbox (x, y, z, x_size, y_size, z_size, yaw) to visualize.
+        vis (:obj:`open3d.visualization.Visualizer`): open3d visualizer.
+        points_colors (numpy.array): color of each points.
+        pcd (:obj:`open3d.geometry.PointCloud`, optional): point cloud.
+            Default: None.
+        bbox_color (tuple[float], optional): the color of bbox.
+            Default: (0, 1, 0).
+        points_in_box_color (tuple[float], optional):
+            the color of points inside bbox3d. Default: (1, 0, 0).
+        rot_axis (int, optional): rotation axis of bbox. Default: 2.
+        center_mode (bool, optional): indicate the center of bbox is
+            bottom center or gravity center. available mode
+            ['lidar_bottom', 'camera_bottom']. Default: 'lidar_bottom'.
+        mode (str, optional):  indicate type of the input points,
+            available mode ['xyz', 'xyzrgb']. Default: 'xyz'.
+    """
+    if isinstance(bbox3d, torch.Tensor):
+        bbox3d = bbox3d.cpu().numpy()
+    bbox3d = bbox3d.copy()
+
+    in_box_color = np.array(points_in_box_color)
+    for i in range(len(bbox3d)):
+        center = bbox3d[i, 0:3]
+        dim = bbox3d[i, 3:6]
+        yaw = np.zeros(3)
+        yaw[rot_axis] = bbox3d[i, 6]
+        rot_mat = geometry.get_rotation_matrix_from_xyz(yaw)
+
+        if center_mode == 'lidar_bottom':
+            center[rot_axis] += dim[
+                rot_axis] / 2  # bottom center to gravity center
+        elif center_mode == 'camera_bottom':
+            center[rot_axis] -= dim[
+                rot_axis] / 2  # bottom center to gravity center
+        box3d = geometry.OrientedBoundingBox(center, rot_mat, dim)
+
+        line_set = geometry.LineSet.create_from_oriented_bounding_box(box3d)
+        line_set.paint_uniform_color(bbox_color)
+        # draw bboxes on visualizer
+        vis.add_geometry(line_set)
+
+        # change the color of points which are in box
+        if pcd is not None and mode == 'xyz':
+            indices = box3d.get_point_indices_within_bounding_box(pcd.points)
+            points_colors[indices] = in_box_color
+
+    # update points colors
+    if pcd is not None:
+        pcd.colors = o3d.utility.Vector3dVector(points_colors)
+        vis.update_geometry(pcd)
+
+
+def show_pts_boxes(points,
+                   bbox3d=None,
+                   show=True,
+                   save_path=None,
+                   points_size=2,
+                   point_color=(0.5, 0.5, 0.5),
+                   bbox_color=(0, 1, 0),
+                   points_in_box_color=(1, 0, 0),
+                   rot_axis=2,
+                   center_mode='lidar_bottom',
+                   mode='xyz'):
+    """Draw bbox and points on visualizer.
+
+    Args:
+        points (numpy.array | torch.tensor, shape=[N, 3+C]):
+            points to visualize.
+        bbox3d (numpy.array | torch.tensor, shape=[M, 7], optional):
+            3D bbox (x, y, z, x_size, y_size, z_size, yaw) to visualize.
+            Defaults to None.
+        show (bool, optional): whether to show the visualization results.
+            Default: True.
+        save_path (str, optional): path to save visualized results.
+            Default: None.
+        points_size (int, optional): the size of points to show on visualizer.
+            Default: 2.
+        point_color (tuple[float], optional): the color of points.
+            Default: (0.5, 0.5, 0.5).
+        bbox_color (tuple[float], optional): the color of bbox.
+            Default: (0, 1, 0).
+        points_in_box_color (tuple[float], optional):
+            the color of points which are in bbox3d. Default: (1, 0, 0).
+        rot_axis (int, optional): rotation axis of bbox. Default: 2.
+        center_mode (bool, optional): indicate the center of bbox is bottom
+            center or gravity center. available mode
+            ['lidar_bottom', 'camera_bottom']. Default: 'lidar_bottom'.
+        mode (str, optional):  indicate type of the input points, available
+            mode ['xyz', 'xyzrgb']. Default: 'xyz'.
+    """
+    # TODO: support score and class info
+    assert 0 <= rot_axis <= 2
+
+    # init visualizer
+    vis = o3d.visualization.Visualizer()
+    vis.create_window()
+    mesh_frame = geometry.TriangleMesh.create_coordinate_frame(
+        size=1, origin=[0, 0, 0])  # create coordinate frame
+    vis.add_geometry(mesh_frame)
+
+    # draw points
+    pcd, points_colors = _draw_points(points, vis, points_size, point_color,
+                                      mode)
+
+    # draw boxes
+    if bbox3d is not None:
+        _draw_bboxes(bbox3d, vis, points_colors, pcd, bbox_color,
+                     points_in_box_color, rot_axis, center_mode, mode)
+
+    if show:
+        vis.run()
+
+    if save_path is not None:
+        vis.capture_screen_image(save_path)
+
+    vis.destroy_window()
+
+
+def _draw_bboxes_ind(bbox3d,
+                     vis,
+                     indices,
+                     points_colors,
+                     pcd=None,
+                     bbox_color=(0, 1, 0),
+                     points_in_box_color=(1, 0, 0),
+                     rot_axis=2,
+                     center_mode='lidar_bottom',
+                     mode='xyz'):
+    """Draw bbox on visualizer and change the color or points inside bbox3d
+    with indices.
+
+    Args:
+        bbox3d (numpy.array | torch.tensor, shape=[M, 7]):
+            3d bbox (x, y, z, x_size, y_size, z_size, yaw) to visualize.
+        vis (:obj:`open3d.visualization.Visualizer`): open3d visualizer.
+        indices (numpy.array | torch.tensor, shape=[N, M]):
+            indicate which bbox3d that each point lies in.
+        points_colors (numpy.array): color of each points.
+        pcd (:obj:`open3d.geometry.PointCloud`, optional): point cloud.
+            Default: None.
+        bbox_color (tuple[float], optional): the color of bbox.
+            Default: (0, 1, 0).
+        points_in_box_color (tuple[float], optional):
+            the color of points which are in bbox3d. Default: (1, 0, 0).
+        rot_axis (int, optional): rotation axis of bbox. Default: 2.
+        center_mode (bool, optional): indicate the center of bbox is
+            bottom center or gravity center. available mode
+            ['lidar_bottom', 'camera_bottom']. Default: 'lidar_bottom'.
+        mode (str, optional):  indicate type of the input points,
+            available mode ['xyz', 'xyzrgb']. Default: 'xyz'.
+    """
+    if isinstance(bbox3d, torch.Tensor):
+        bbox3d = bbox3d.cpu().numpy()
+    if isinstance(indices, torch.Tensor):
+        indices = indices.cpu().numpy()
+    bbox3d = bbox3d.copy()
+
+    in_box_color = np.array(points_in_box_color)
+    for i in range(len(bbox3d)):
+        center = bbox3d[i, 0:3]
+        dim = bbox3d[i, 3:6]
+        yaw = np.zeros(3)
+        # TODO: fix problem of current coordinate system
+        # dim[0], dim[1] = dim[1], dim[0]  # for current coordinate
+        # yaw[rot_axis] = -(bbox3d[i, 6] - 0.5 * np.pi)
+        yaw[rot_axis] = -bbox3d[i, 6]
+        rot_mat = geometry.get_rotation_matrix_from_xyz(yaw)
+        if center_mode == 'lidar_bottom':
+            center[rot_axis] += dim[
+                rot_axis] / 2  # bottom center to gravity center
+        elif center_mode == 'camera_bottom':
+            center[rot_axis] -= dim[
+                rot_axis] / 2  # bottom center to gravity center
+        box3d = geometry.OrientedBoundingBox(center, rot_mat, dim)
+
+        line_set = geometry.LineSet.create_from_oriented_bounding_box(box3d)
+        line_set.paint_uniform_color(bbox_color)
+        # draw bboxes on visualizer
+        vis.add_geometry(line_set)
+
+        # change the color of points which are in box
+        if pcd is not None and mode == 'xyz':
+            points_colors[indices[:, i].astype(np.bool)] = in_box_color
+
+    # update points colors
+    if pcd is not None:
+        pcd.colors = o3d.utility.Vector3dVector(points_colors)
+        vis.update_geometry(pcd)
+
+
+def show_pts_index_boxes(points,
+                         bbox3d=None,
+                         show=True,
+                         indices=None,
+                         save_path=None,
+                         points_size=2,
+                         point_color=(0.5, 0.5, 0.5),
+                         bbox_color=(0, 1, 0),
+                         points_in_box_color=(1, 0, 0),
+                         rot_axis=2,
+                         center_mode='lidar_bottom',
+                         mode='xyz'):
+    """Draw bbox and points on visualizer with indices that indicate which
+    bbox3d that each point lies in.
+
+    Args:
+        points (numpy.array | torch.tensor, shape=[N, 3+C]):
+            points to visualize.
+        bbox3d (numpy.array | torch.tensor, shape=[M, 7]):
+            3D bbox (x, y, z, x_size, y_size, z_size, yaw) to visualize.
+            Defaults to None.
+        show (bool, optional): whether to show the visualization results.
+            Default: True.
+        indices (numpy.array | torch.tensor, shape=[N, M], optional):
+            indicate which bbox3d that each point lies in. Default: None.
+        save_path (str, optional): path to save visualized results.
+            Default: None.
+        points_size (int, optional): the size of points to show on visualizer.
+            Default: 2.
+        point_color (tuple[float], optional): the color of points.
+            Default: (0.5, 0.5, 0.5).
+        bbox_color (tuple[float], optional): the color of bbox.
+            Default: (0, 1, 0).
+        points_in_box_color (tuple[float], optional):
+            the color of points which are in bbox3d. Default: (1, 0, 0).
+        rot_axis (int, optional): rotation axis of bbox. Default: 2.
+        center_mode (bool, optional): indicate the center of bbox is
+            bottom center or gravity center. available mode
+            ['lidar_bottom', 'camera_bottom']. Default: 'lidar_bottom'.
+        mode (str, optional):  indicate type of the input points,
+            available mode ['xyz', 'xyzrgb']. Default: 'xyz'.
+    """
+    # TODO: support score and class info
+    assert 0 <= rot_axis <= 2
+
+    # init visualizer
+    vis = o3d.visualization.Visualizer()
+    vis.create_window()
+    mesh_frame = geometry.TriangleMesh.create_coordinate_frame(
+        size=1, origin=[0, 0, 0])  # create coordinate frame
+    vis.add_geometry(mesh_frame)
+
+    # draw points
+    pcd, points_colors = _draw_points(points, vis, points_size, point_color,
+                                      mode)
+
+    # draw boxes
+    if bbox3d is not None:
+        _draw_bboxes_ind(bbox3d, vis, indices, points_colors, pcd, bbox_color,
+                         points_in_box_color, rot_axis, center_mode, mode)
+
+    if show:
+        vis.run()
+
+    if save_path is not None:
+        vis.capture_screen_image(save_path)
+
+    vis.destroy_window()
+
+
+class Visualizer(object):
+    r"""Online visualizer implemented with Open3d.
+
+    Args:
+        points (numpy.array, shape=[N, 3+C]): Points to visualize. The Points
+            cloud is in mode of Coord3DMode.DEPTH (please refer to
+            core.structures.coord_3d_mode).
+        bbox3d (numpy.array, shape=[M, 7], optional): 3D bbox
+            (x, y, z, x_size, y_size, z_size, yaw) to visualize.
+            The 3D bbox is in mode of Box3DMode.DEPTH with
+            gravity_center (please refer to core.structures.box_3d_mode).
+            Default: None.
+        save_path (str, optional): path to save visualized results.
+            Default: None.
+        points_size (int, optional): the size of points to show on visualizer.
+            Default: 2.
+        point_color (tuple[float], optional): the color of points.
+            Default: (0.5, 0.5, 0.5).
+        bbox_color (tuple[float], optional): the color of bbox.
+            Default: (0, 1, 0).
+        points_in_box_color (tuple[float], optional):
+            the color of points which are in bbox3d. Default: (1, 0, 0).
+        rot_axis (int, optional): rotation axis of bbox. Default: 2.
+        center_mode (bool, optional): indicate the center of bbox is
+            bottom center or gravity center. available mode
+            ['lidar_bottom', 'camera_bottom']. Default: 'lidar_bottom'.
+        mode (str, optional):  indicate type of the input points,
+            available mode ['xyz', 'xyzrgb']. Default: 'xyz'.
+    """
+
+    def __init__(self,
+                 points,
+                 bbox3d=None,
+                 save_path=None,
+                 points_size=2,
+                 point_color=(0.5, 0.5, 0.5),
+                 bbox_color=(0, 1, 0),
+                 points_in_box_color=(1, 0, 0),
+                 rot_axis=2,
+                 center_mode='lidar_bottom',
+                 mode='xyz'):
+        super(Visualizer, self).__init__()
+        assert 0 <= rot_axis <= 2
+
+        # init visualizer
+        self.o3d_visualizer = o3d.visualization.Visualizer()
+        self.o3d_visualizer.create_window()
+        mesh_frame = geometry.TriangleMesh.create_coordinate_frame(
+            size=1, origin=[0, 0, 0])  # create coordinate frame
+        self.o3d_visualizer.add_geometry(mesh_frame)
+
+        self.points_size = points_size
+        self.point_color = point_color
+        self.bbox_color = bbox_color
+        self.points_in_box_color = points_in_box_color
+        self.rot_axis = rot_axis
+        self.center_mode = center_mode
+        self.mode = mode
+        self.seg_num = 0
+
+        # draw points
+        if points is not None:
+            self.pcd, self.points_colors = _draw_points(
+                points, self.o3d_visualizer, points_size, point_color, mode)
+
+        # draw boxes
+        if bbox3d is not None:
+            _draw_bboxes(bbox3d, self.o3d_visualizer, self.points_colors,
+                         self.pcd, bbox_color, points_in_box_color, rot_axis,
+                         center_mode, mode)
+
+    def add_bboxes(self, bbox3d, bbox_color=None, points_in_box_color=None):
+        """Add bounding box to visualizer.
+
+        Args:
+            bbox3d (numpy.array, shape=[M, 7]):
+                3D bbox (x, y, z, x_size, y_size, z_size, yaw)
+                to be visualized. The 3d bbox is in mode of
+                Box3DMode.DEPTH with gravity_center (please refer to
+                core.structures.box_3d_mode).
+            bbox_color (tuple[float]): the color of bbox. Default: None.
+            points_in_box_color (tuple[float]): the color of points which
+                are in bbox3d. Default: None.
+        """
+        if bbox_color is None:
+            bbox_color = self.bbox_color
+        if points_in_box_color is None:
+            points_in_box_color = self.points_in_box_color
+        _draw_bboxes(bbox3d, self.o3d_visualizer, self.points_colors, self.pcd,
+                     bbox_color, points_in_box_color, self.rot_axis,
+                     self.center_mode, self.mode)
+
+    def add_seg_mask(self, seg_mask_colors):
+        """Add segmentation mask to visualizer via per-point colorization.
+
+        Args:
+            seg_mask_colors (numpy.array, shape=[N, 6]):
+                The segmentation mask whose first 3 dims are point coordinates
+                and last 3 dims are converted colors.
+        """
+        # we can't draw the colors on existing points
+        # in case gt and pred mask would overlap
+        # instead we set a large offset along x-axis for each seg mask
+        self.seg_num += 1
+        offset = (np.array(self.pcd.points).max(0) -
+                  np.array(self.pcd.points).min(0))[0] * 1.2 * self.seg_num
+        mesh_frame = geometry.TriangleMesh.create_coordinate_frame(
+            size=1, origin=[offset, 0, 0])  # create coordinate frame for seg
+        self.o3d_visualizer.add_geometry(mesh_frame)
+        seg_points = copy.deepcopy(seg_mask_colors)
+        seg_points[:, 0] += offset
+        _draw_points(
+            seg_points, self.o3d_visualizer, self.points_size, mode='xyzrgb')
+
+    def show(self, save_path=None):
+        """Visualize the points cloud.
+
+        Args:
+            save_path (str, optional): path to save image. Default: None.
+        """
+
+        self.o3d_visualizer.run()
+
+        if save_path is not None:
+            self.o3d_visualizer.capture_screen_image(save_path)
+
+        self.o3d_visualizer.destroy_window()
+        return
--- a/docker-hub/FlashOCC/Flashocc/mmdetection3d/mmdet3d/core/visualizer/show_result.py
+++ b/docker-hub/FlashOCC/Flashocc/mmdetection3d/mmdet3d/core/visualizer/show_result.py
+# Copyright (c) OpenMMLab. All rights reserved.
+from os import path as osp
+
+import mmcv
+import numpy as np
+import trimesh
+
+from .image_vis import (draw_camera_bbox3d_on_img, draw_depth_bbox3d_on_img,
+                        draw_lidar_bbox3d_on_img)
+
+
+def _write_obj(points, out_filename):
+    """Write points into ``obj`` format for meshlab visualization.
+
+    Args:
+        points (np.ndarray): Points in shape (N, dim).
+        out_filename (str): Filename to be saved.
+    """
+    N = points.shape[0]
+    fout = open(out_filename, 'w')
+    for i in range(N):
+        if points.shape[1] == 6:
+            c = points[i, 3:].astype(int)
+            fout.write(
+                'v %f %f %f %d %d %d\n' %
+                (points[i, 0], points[i, 1], points[i, 2], c[0], c[1], c[2]))
+
+        else:
+            fout.write('v %f %f %f\n' %
+                       (points[i, 0], points[i, 1], points[i, 2]))
+    fout.close()
+
+
+def _write_oriented_bbox(scene_bbox, out_filename):
+    """Export oriented (around Z axis) scene bbox to meshes.
+
+    Args:
+        scene_bbox(list[ndarray] or ndarray): xyz pos of center and
+            3 lengths (x_size, y_size, z_size) and heading angle around Z axis.
+            Y forward, X right, Z upward. heading angle of positive X is 0,
+            heading angle of positive Y is 90 degrees.
+        out_filename(str): Filename.
+    """
+
+    def heading2rotmat(heading_angle):
+        rotmat = np.zeros((3, 3))
+        rotmat[2, 2] = 1
+        cosval = np.cos(heading_angle)
+        sinval = np.sin(heading_angle)
+        rotmat[0:2, 0:2] = np.array([[cosval, -sinval], [sinval, cosval]])
+        return rotmat
+
+    def convert_oriented_box_to_trimesh_fmt(box):
+        ctr = box[:3]
+        lengths = box[3:6]
+        trns = np.eye(4)
+        trns[0:3, 3] = ctr
+        trns[3, 3] = 1.0
+        trns[0:3, 0:3] = heading2rotmat(box[6])
+        box_trimesh_fmt = trimesh.creation.box(lengths, trns)
+        return box_trimesh_fmt
+
+    if len(scene_bbox) == 0:
+        scene_bbox = np.zeros((1, 7))
+    scene = trimesh.scene.Scene()
+    for box in scene_bbox:
+        scene.add_geometry(convert_oriented_box_to_trimesh_fmt(box))
+
+    mesh_list = trimesh.util.concatenate(scene.dump())
+    # save to obj file
+    trimesh.io.export.export_mesh(mesh_list, out_filename, file_type='obj')
+
+    return
+
+
+def show_result(points,
+                gt_bboxes,
+                pred_bboxes,
+                out_dir,
+                filename,
+                show=False,
+                snapshot=False,
+                pred_labels=None):
+    """Convert results into format that is directly readable for meshlab.
+
+    Args:
+        points (np.ndarray): Points.
+        gt_bboxes (np.ndarray): Ground truth boxes.
+        pred_bboxes (np.ndarray): Predicted boxes.
+        out_dir (str): Path of output directory
+        filename (str): Filename of the current frame.
+        show (bool, optional): Visualize the results online. Defaults to False.
+        snapshot (bool, optional): Whether to save the online results.
+            Defaults to False.
+        pred_labels (np.ndarray, optional): Predicted labels of boxes.
+            Defaults to None.
+    """
+    result_path = osp.join(out_dir, filename)
+    mmcv.mkdir_or_exist(result_path)
+
+    if show:
+        from .open3d_vis import Visualizer
+
+        vis = Visualizer(points)
+        if pred_bboxes is not None:
+            if pred_labels is None:
+                vis.add_bboxes(bbox3d=pred_bboxes)
+            else:
+                palette = np.random.randint(
+                    0, 255, size=(pred_labels.max() + 1, 3)) / 256
+                labelDict = {}
+                for j in range(len(pred_labels)):
+                    i = int(pred_labels[j].numpy())
+                    if labelDict.get(i) is None:
+                        labelDict[i] = []
+                    labelDict[i].append(pred_bboxes[j])
+                for i in labelDict:
+                    vis.add_bboxes(
+                        bbox3d=np.array(labelDict[i]),
+                        bbox_color=palette[i],
+                        points_in_box_color=palette[i])
+
+        if gt_bboxes is not None:
+            vis.add_bboxes(bbox3d=gt_bboxes, bbox_color=(0, 0, 1))
+        show_path = osp.join(result_path,
+                             f'{filename}_online.png') if snapshot else None
+        vis.show(show_path)
+
+    if points is not None:
+        _write_obj(points, osp.join(result_path, f'{filename}_points.obj'))
+
+    if gt_bboxes is not None:
+        # bottom center to gravity center
+        gt_bboxes[..., 2] += gt_bboxes[..., 5] / 2
+
+        _write_oriented_bbox(gt_bboxes,
+                             osp.join(result_path, f'{filename}_gt.obj'))
+
+    if pred_bboxes is not None:
+        # bottom center to gravity center
+        pred_bboxes[..., 2] += pred_bboxes[..., 5] / 2
+
+        _write_oriented_bbox(pred_bboxes,
+                             osp.join(result_path, f'{filename}_pred.obj'))
+
+
+def show_seg_result(points,
+                    gt_seg,
+                    pred_seg,
+                    out_dir,
+                    filename,
+                    palette,
+                    ignore_index=None,
+                    show=False,
+                    snapshot=False):
+    """Convert results into format that is directly readable for meshlab.
+
+    Args:
+        points (np.ndarray): Points.
+        gt_seg (np.ndarray): Ground truth segmentation mask.
+        pred_seg (np.ndarray): Predicted segmentation mask.
+        out_dir (str): Path of output directory
+        filename (str): Filename of the current frame.
+        palette (np.ndarray): Mapping between class labels and colors.
+        ignore_index (int, optional): The label index to be ignored, e.g.
+            unannotated points. Defaults to None.
+        show (bool, optional): Visualize the results online. Defaults to False.
+        snapshot (bool, optional): Whether to save the online results.
+            Defaults to False.
+    """
+    # we need 3D coordinates to visualize segmentation mask
+    if gt_seg is not None or pred_seg is not None:
+        assert points is not None, \
+            '3D coordinates are required for segmentation visualization'
+
+    # filter out ignored points
+    if gt_seg is not None and ignore_index is not None:
+        if points is not None:
+            points = points[gt_seg != ignore_index]
+        if pred_seg is not None:
+            pred_seg = pred_seg[gt_seg != ignore_index]
+        gt_seg = gt_seg[gt_seg != ignore_index]
+
+    if gt_seg is not None:
+        gt_seg_color = palette[gt_seg]
+        gt_seg_color = np.concatenate([points[:, :3], gt_seg_color], axis=1)
+    if pred_seg is not None:
+        pred_seg_color = palette[pred_seg]
+        pred_seg_color = np.concatenate([points[:, :3], pred_seg_color],
+                                        axis=1)
+
+    result_path = osp.join(out_dir, filename)
+    mmcv.mkdir_or_exist(result_path)
+
+    # online visualization of segmentation mask
+    # we show three masks in a row, scene_points, gt_mask, pred_mask
+    if show:
+        from .open3d_vis import Visualizer
+        mode = 'xyzrgb' if points.shape[1] == 6 else 'xyz'
+        vis = Visualizer(points, mode=mode)
+        if gt_seg is not None:
+            vis.add_seg_mask(gt_seg_color)
+        if pred_seg is not None:
+            vis.add_seg_mask(pred_seg_color)
+        show_path = osp.join(result_path,
+                             f'{filename}_online.png') if snapshot else None
+        vis.show(show_path)
+
+    if points is not None:
+        _write_obj(points, osp.join(result_path, f'{filename}_points.obj'))
+
+    if gt_seg is not None:
+        _write_obj(gt_seg_color, osp.join(result_path, f'{filename}_gt.obj'))
+
+    if pred_seg is not None:
+        _write_obj(pred_seg_color, osp.join(result_path,
+                                            f'{filename}_pred.obj'))
+
+
+def show_multi_modality_result(img,
+                               gt_bboxes,
+                               pred_bboxes,
+                               proj_mat,
+                               out_dir,
+                               filename,
+                               box_mode='lidar',
+                               img_metas=None,
+                               show=False,
+                               gt_bbox_color=(61, 102, 255),
+                               pred_bbox_color=(241, 101, 72)):
+    """Convert multi-modality detection results into 2D results.
+
+    Project the predicted 3D bbox to 2D image plane and visualize them.
+
+    Args:
+        img (np.ndarray): The numpy array of image in cv2 fashion.
+        gt_bboxes (:obj:`BaseInstance3DBoxes`): Ground truth boxes.
+        pred_bboxes (:obj:`BaseInstance3DBoxes`): Predicted boxes.
+        proj_mat (numpy.array, shape=[4, 4]): The projection matrix
+            according to the camera intrinsic parameters.
+        out_dir (str): Path of output directory.
+        filename (str): Filename of the current frame.
+        box_mode (str, optional): Coordinate system the boxes are in.
+            Should be one of 'depth', 'lidar' and 'camera'.
+            Defaults to 'lidar'.
+        img_metas (dict, optional): Used in projecting depth bbox.
+            Defaults to None.
+        show (bool, optional): Visualize the results online. Defaults to False.
+        gt_bbox_color (str or tuple(int), optional): Color of bbox lines.
+           The tuple of color should be in BGR order. Default: (255, 102, 61).
+        pred_bbox_color (str or tuple(int), optional): Color of bbox lines.
+           The tuple of color should be in BGR order. Default: (72, 101, 241).
+    """
+    if box_mode == 'depth':
+        draw_bbox = draw_depth_bbox3d_on_img
+    elif box_mode == 'lidar':
+        draw_bbox = draw_lidar_bbox3d_on_img
+    elif box_mode == 'camera':
+        draw_bbox = draw_camera_bbox3d_on_img
+    else:
+        raise NotImplementedError(f'unsupported box mode {box_mode}')
+
+    result_path = osp.join(out_dir, filename)
+    mmcv.mkdir_or_exist(result_path)
+
+    if show:
+        show_img = img.copy()
+        if gt_bboxes is not None:
+            show_img = draw_bbox(
+                gt_bboxes, show_img, proj_mat, img_metas, color=gt_bbox_color)
+        if pred_bboxes is not None:
+            show_img = draw_bbox(
+                pred_bboxes,
+                show_img,
+                proj_mat,
+                img_metas,
+                color=pred_bbox_color)
+        mmcv.imshow(show_img, win_name='project_bbox3d_img', wait_time=0)
+
+    if img is not None:
+        mmcv.imwrite(img, osp.join(result_path, f'{filename}_img.png'))
+
+    if gt_bboxes is not None:
+        gt_img = draw_bbox(
+            gt_bboxes, img, proj_mat, img_metas, color=gt_bbox_color)
+        mmcv.imwrite(gt_img, osp.join(result_path, f'{filename}_gt.png'))
+
+    if pred_bboxes is not None:
+        pred_img = draw_bbox(
+            pred_bboxes, img, proj_mat, img_metas, color=pred_bbox_color)
+        mmcv.imwrite(pred_img, osp.join(result_path, f'{filename}_pred.png'))
--- a/docker-hub/FlashOCC/Flashocc/mmdetection3d/mmdet3d/core/voxel/__init__.py
+++ b/docker-hub/FlashOCC/Flashocc/mmdetection3d/mmdet3d/core/voxel/__init__.py
+# Copyright (c) OpenMMLab. All rights reserved.
+from .builder import build_voxel_generator
+from .voxel_generator import VoxelGenerator
+
+__all__ = ['build_voxel_generator', 'VoxelGenerator']
--- a/docker-hub/FlashOCC/Flashocc/mmdetection3d/mmdet3d/core/voxel/builder.py
+++ b/docker-hub/FlashOCC/Flashocc/mmdetection3d/mmdet3d/core/voxel/builder.py
+# Copyright (c) OpenMMLab. All rights reserved.
+import mmcv
+
+from . import voxel_generator
+
+
+def build_voxel_generator(cfg, **kwargs):
+    """Builder of voxel generator."""
+    if isinstance(cfg, voxel_generator.VoxelGenerator):
+        return cfg
+    elif isinstance(cfg, dict):
+        return mmcv.runner.obj_from_dict(
+            cfg, voxel_generator, default_args=kwargs)
+    else:
+        raise TypeError('Invalid type {} for building a sampler'.format(
+            type(cfg)))
--- a/docker-hub/FlashOCC/Flashocc/mmdetection3d/mmdet3d/core/voxel/voxel_generator.py
+++ b/docker-hub/FlashOCC/Flashocc/mmdetection3d/mmdet3d/core/voxel/voxel_generator.py
+# Copyright (c) OpenMMLab. All rights reserved.
+import numba
+import numpy as np
+
+
+class VoxelGenerator(object):
+    """Voxel generator in numpy implementation.
+
+    Args:
+        voxel_size (list[float]): Size of a single voxel
+        point_cloud_range (list[float]): Range of points
+        max_num_points (int): Maximum number of points in a single voxel
+        max_voxels (int, optional): Maximum number of voxels.
+            Defaults to 20000.
+    """
+
+    def __init__(self,
+                 voxel_size,
+                 point_cloud_range,
+                 max_num_points,
+                 max_voxels=20000):
+
+        point_cloud_range = np.array(point_cloud_range, dtype=np.float32)
+        # [0, -40, -3, 70.4, 40, 1]
+        voxel_size = np.array(voxel_size, dtype=np.float32)
+        grid_size = (point_cloud_range[3:] -
+                     point_cloud_range[:3]) / voxel_size
+        grid_size = np.round(grid_size).astype(np.int64)
+
+        self._voxel_size = voxel_size
+        self._point_cloud_range = point_cloud_range
+        self._max_num_points = max_num_points
+        self._max_voxels = max_voxels
+        self._grid_size = grid_size
+
+    def generate(self, points):
+        """Generate voxels given points."""
+        return points_to_voxel(points, self._voxel_size,
+                               self._point_cloud_range, self._max_num_points,
+                               True, self._max_voxels)
+
+    @property
+    def voxel_size(self):
+        """list[float]: Size of a single voxel."""
+        return self._voxel_size
+
+    @property
+    def max_num_points_per_voxel(self):
+        """int: Maximum number of points per voxel."""
+        return self._max_num_points
+
+    @property
+    def point_cloud_range(self):
+        """list[float]: Range of point cloud."""
+        return self._point_cloud_range
+
+    @property
+    def grid_size(self):
+        """np.ndarray: The size of grids."""
+        return self._grid_size
+
+    def __repr__(self):
+        """str: Return a string that describes the module."""
+        repr_str = self.__class__.__name__
+        indent = ' ' * (len(repr_str) + 1)
+        repr_str += f'(voxel_size={self._voxel_size},\n'
+        repr_str += indent + 'point_cloud_range='
+        repr_str += f'{self._point_cloud_range.tolist()},\n'
+        repr_str += indent + f'max_num_points={self._max_num_points},\n'
+        repr_str += indent + f'max_voxels={self._max_voxels},\n'
+        repr_str += indent + f'grid_size={self._grid_size.tolist()}'
+        repr_str += ')'
+        return repr_str
+
+
+def points_to_voxel(points,
+                    voxel_size,
+                    coors_range,
+                    max_points=35,
+                    reverse_index=True,
+                    max_voxels=20000):
+    """convert kitti points(N, >=3) to voxels.
+
+    Args:
+        points (np.ndarray): [N, ndim]. points[:, :3] contain xyz points and
+            points[:, 3:] contain other information such as reflectivity.
+        voxel_size (list, tuple, np.ndarray): [3] xyz, indicate voxel size
+        coors_range (list[float | tuple[float] | ndarray]): Voxel range.
+            format: xyzxyz, minmax
+        max_points (int): Indicate maximum points contained in a voxel.
+        reverse_index (bool): Whether return reversed coordinates.
+            if points has xyz format and reverse_index is True, output
+            coordinates will be zyx format, but points in features always
+            xyz format.
+        max_voxels (int): Maximum number of voxels this function creates.
+            For second, 20000 is a good choice. Points should be shuffled for
+            randomness before this function because max_voxels drops points.
+
+    Returns:
+        tuple[np.ndarray]:
+            voxels: [M, max_points, ndim] float tensor. only contain points.
+            coordinates: [M, 3] int32 tensor.
+            num_points_per_voxel: [M] int32 tensor.
+    """
+    if not isinstance(voxel_size, np.ndarray):
+        voxel_size = np.array(voxel_size, dtype=points.dtype)
+    if not isinstance(coors_range, np.ndarray):
+        coors_range = np.array(coors_range, dtype=points.dtype)
+    voxelmap_shape = (coors_range[3:] - coors_range[:3]) / voxel_size
+    voxelmap_shape = tuple(np.round(voxelmap_shape).astype(np.int32).tolist())
+    if reverse_index:
+        voxelmap_shape = voxelmap_shape[::-1]
+    # don't create large array in jit(nopython=True) code.
+    num_points_per_voxel = np.zeros(shape=(max_voxels, ), dtype=np.int32)
+    coor_to_voxelidx = -np.ones(shape=voxelmap_shape, dtype=np.int32)
+    voxels = np.zeros(
+        shape=(max_voxels, max_points, points.shape[-1]), dtype=points.dtype)
+    coors = np.zeros(shape=(max_voxels, 3), dtype=np.int32)
+    if reverse_index:
+        voxel_num = _points_to_voxel_reverse_kernel(
+            points, voxel_size, coors_range, num_points_per_voxel,
+            coor_to_voxelidx, voxels, coors, max_points, max_voxels)
+
+    else:
+        voxel_num = _points_to_voxel_kernel(points, voxel_size, coors_range,
+                                            num_points_per_voxel,
+                                            coor_to_voxelidx, voxels, coors,
+                                            max_points, max_voxels)
+
+    coors = coors[:voxel_num]
+    voxels = voxels[:voxel_num]
+    num_points_per_voxel = num_points_per_voxel[:voxel_num]
+
+    return voxels, coors, num_points_per_voxel
+
+
+@numba.jit(nopython=True)
+def _points_to_voxel_reverse_kernel(points,
+                                    voxel_size,
+                                    coors_range,
+                                    num_points_per_voxel,
+                                    coor_to_voxelidx,
+                                    voxels,
+                                    coors,
+                                    max_points=35,
+                                    max_voxels=20000):
+    """convert kitti points(N, >=3) to voxels.
+
+    Args:
+        points (np.ndarray): [N, ndim]. points[:, :3] contain xyz points and
+            points[:, 3:] contain other information such as reflectivity.
+        voxel_size (list, tuple, np.ndarray): [3] xyz, indicate voxel size
+        coors_range (list[float | tuple[float] | ndarray]): Range of voxels.
+            format: xyzxyz, minmax
+        num_points_per_voxel (int): Number of points per voxel.
+        coor_to_voxel_idx (np.ndarray): A voxel grid of shape (D, H, W),
+            which has the same shape as the complete voxel map. It indicates
+            the index of each corresponding voxel.
+        voxels (np.ndarray): Created empty voxels.
+        coors (np.ndarray): Created coordinates of each voxel.
+        max_points (int): Indicate maximum points contained in a voxel.
+        max_voxels (int): Maximum number of voxels this function create.
+            for second, 20000 is a good choice. Points should be shuffled for
+            randomness before this function because max_voxels drops points.
+
+    Returns:
+        tuple[np.ndarray]:
+            voxels: Shape [M, max_points, ndim], only contain points.
+            coordinates: Shape [M, 3].
+            num_points_per_voxel: Shape [M].
+    """
+    # put all computations to one loop.
+    # we shouldn't create large array in main jit code, otherwise
+    # reduce performance
+    N = points.shape[0]
+    # ndim = points.shape[1] - 1
+    ndim = 3
+    ndim_minus_1 = ndim - 1
+    grid_size = (coors_range[3:] - coors_range[:3]) / voxel_size
+    # np.round(grid_size)
+    # grid_size = np.round(grid_size).astype(np.int64)(np.int32)
+    grid_size = np.round(grid_size, 0, grid_size).astype(np.int32)
+    coor = np.zeros(shape=(3, ), dtype=np.int32)
+    voxel_num = 0
+    failed = False
+    for i in range(N):
+        failed = False
+        for j in range(ndim):
+            c = np.floor((points[i, j] - coors_range[j]) / voxel_size[j])
+            if c < 0 or c >= grid_size[j]:
+                failed = True
+                break
+            coor[ndim_minus_1 - j] = c
+        if failed:
+            continue
+        voxelidx = coor_to_voxelidx[coor[0], coor[1], coor[2]]
+        if voxelidx == -1:
+            voxelidx = voxel_num
+            if voxel_num >= max_voxels:
+                continue
+            voxel_num += 1
+            coor_to_voxelidx[coor[0], coor[1], coor[2]] = voxelidx
+            coors[voxelidx] = coor
+        num = num_points_per_voxel[voxelidx]
+        if num < max_points:
+            voxels[voxelidx, num] = points[i]
+            num_points_per_voxel[voxelidx] += 1
+    return voxel_num
+
+
+@numba.jit(nopython=True)
+def _points_to_voxel_kernel(points,
+                            voxel_size,
+                            coors_range,
+                            num_points_per_voxel,
+                            coor_to_voxelidx,
+                            voxels,
+                            coors,
+                            max_points=35,
+                            max_voxels=20000):
+    """convert kitti points(N, >=3) to voxels.
+
+    Args:
+        points (np.ndarray): [N, ndim]. points[:, :3] contain xyz points and
+            points[:, 3:] contain other information such as reflectivity.
+        voxel_size (list, tuple, np.ndarray): [3] xyz, indicate voxel size.
+        coors_range (list[float | tuple[float] | ndarray]): Range of voxels.
+            format: xyzxyz, minmax
+        num_points_per_voxel (int): Number of points per voxel.
+        coor_to_voxel_idx (np.ndarray): A voxel grid of shape (D, H, W),
+            which has the same shape as the complete voxel map. It indicates
+            the index of each corresponding voxel.
+        voxels (np.ndarray): Created empty voxels.
+        coors (np.ndarray): Created coordinates of each voxel.
+        max_points (int): Indicate maximum points contained in a voxel.
+        max_voxels (int): Maximum number of voxels this function create.
+            for second, 20000 is a good choice. Points should be shuffled for
+            randomness before this function because max_voxels drops points.
+
+    Returns:
+        tuple[np.ndarray]:
+            voxels: Shape [M, max_points, ndim], only contain points.
+            coordinates: Shape [M, 3].
+            num_points_per_voxel: Shape [M].
+    """
+    N = points.shape[0]
+    # ndim = points.shape[1] - 1
+    ndim = 3
+    grid_size = (coors_range[3:] - coors_range[:3]) / voxel_size
+    # grid_size = np.round(grid_size).astype(np.int64)(np.int32)
+    grid_size = np.round(grid_size, 0, grid_size).astype(np.int32)
+
+    # lower_bound = coors_range[:3]
+    # upper_bound = coors_range[3:]
+    coor = np.zeros(shape=(3, ), dtype=np.int32)
+    voxel_num = 0
+    failed = False
+    for i in range(N):
+        failed = False
+        for j in range(ndim):
+            c = np.floor((points[i, j] - coors_range[j]) / voxel_size[j])
+            if c < 0 or c >= grid_size[j]:
+                failed = True
+                break
+            coor[j] = c
+        if failed:
+            continue
+        voxelidx = coor_to_voxelidx[coor[0], coor[1], coor[2]]
+        if voxelidx == -1:
+            voxelidx = voxel_num
+            if voxel_num >= max_voxels:
+                continue
+            voxel_num += 1
+            coor_to_voxelidx[coor[0], coor[1], coor[2]] = voxelidx
+            coors[voxelidx] = coor
+        num = num_points_per_voxel[voxelidx]
+        if num < max_points:
+            voxels[voxelidx, num] = points[i]
+            num_points_per_voxel[voxelidx] += 1
+    return voxel_num
--- a/docker-hub/FlashOCC/Flashocc/mmdetection3d/mmdet3d/datasets/__init__.py
+++ b/docker-hub/FlashOCC/Flashocc/mmdetection3d/mmdet3d/datasets/__init__.py
+# Copyright (c) OpenMMLab. All rights reserved.
+from mmdet.datasets.builder import build_dataloader
+from .builder import DATASETS, PIPELINES, build_dataset
+from .custom_3d import Custom3DDataset
+from .custom_3d_seg import Custom3DSegDataset
+from .kitti_dataset import KittiDataset
+from .kitti_mono_dataset import KittiMonoDataset
+from .lyft_dataset import LyftDataset
+from .nuscenes_dataset import NuScenesDataset
+from .nuscenes_mono_dataset import NuScenesMonoDataset
+# yapf: disable
+from .pipelines import (AffineResize, BackgroundPointsFilter, GlobalAlignment,
+                        GlobalRotScaleTrans, IndoorPatchPointSample,
+                        IndoorPointSample, LoadAnnotations3D,
+                        LoadPointsFromDict, LoadPointsFromFile,
+                        LoadPointsFromMultiSweeps, MultiViewWrapper,
+                        NormalizePointsColor, ObjectNameFilter, ObjectNoise,
+                        ObjectRangeFilter, ObjectSample, PointSample,
+                        PointShuffle, PointsRangeFilter, RandomDropPointsColor,
+                        RandomFlip3D, RandomJitterPoints, RandomRotate,
+                        RandomShiftScale, RangeLimitedRandomCrop,
+                        VoxelBasedPointSampler)
+# yapf: enable
+from .s3dis_dataset import S3DISDataset, S3DISSegDataset
+from .scannet_dataset import (ScanNetDataset, ScanNetInstanceSegDataset,
+                              ScanNetSegDataset)
+from .semantickitti_dataset import SemanticKITTIDataset
+from .sunrgbd_dataset import SUNRGBDDataset
+from .utils import get_loading_pipeline
+from .waymo_dataset import WaymoDataset
+
+__all__ = [
+    'KittiDataset', 'KittiMonoDataset', 'build_dataloader', 'DATASETS',
+    'build_dataset', 'NuScenesDataset', 'NuScenesMonoDataset', 'LyftDataset',
+    'ObjectSample', 'RandomFlip3D', 'ObjectNoise', 'GlobalRotScaleTrans',
+    'PointShuffle', 'ObjectRangeFilter', 'PointsRangeFilter',
+    'LoadPointsFromFile', 'S3DISSegDataset', 'S3DISDataset',
+    'NormalizePointsColor', 'IndoorPatchPointSample', 'IndoorPointSample',
+    'PointSample', 'LoadAnnotations3D', 'GlobalAlignment', 'SUNRGBDDataset',
+    'ScanNetDataset', 'ScanNetSegDataset', 'ScanNetInstanceSegDataset',
+    'SemanticKITTIDataset', 'Custom3DDataset', 'Custom3DSegDataset',
+    'LoadPointsFromMultiSweeps', 'WaymoDataset', 'BackgroundPointsFilter',
+    'VoxelBasedPointSampler', 'get_loading_pipeline', 'RandomDropPointsColor',
+    'RandomJitterPoints', 'ObjectNameFilter', 'AffineResize',
+    'RandomShiftScale', 'LoadPointsFromDict', 'PIPELINES',
+    'RangeLimitedRandomCrop', 'RandomRotate', 'MultiViewWrapper'
+]
--- a/docker-hub/FlashOCC/Flashocc/mmdetection3d/mmdet3d/datasets/builder.py
+++ b/docker-hub/FlashOCC/Flashocc/mmdetection3d/mmdet3d/datasets/builder.py
+# Copyright (c) OpenMMLab. All rights reserved.
+import platform
+
+from mmcv.utils import Registry, build_from_cfg
+
+from mmdet.datasets import DATASETS as MMDET_DATASETS
+from mmdet.datasets.builder import _concat_dataset
+
+if platform.system() != 'Windows':
+    # https://github.com/pytorch/pytorch/issues/973
+    import resource
+    rlimit = resource.getrlimit(resource.RLIMIT_NOFILE)
+    base_soft_limit = rlimit[0]
+    hard_limit = rlimit[1]
+    soft_limit = min(max(4096, base_soft_limit), hard_limit)
+    resource.setrlimit(resource.RLIMIT_NOFILE, (soft_limit, hard_limit))
+
+OBJECTSAMPLERS = Registry('Object sampler')
+DATASETS = Registry('dataset')
+PIPELINES = Registry('pipeline')
+
+
+def build_dataset(cfg, default_args=None):
+    from mmdet3d.datasets.dataset_wrappers import CBGSDataset
+    from mmdet.datasets.dataset_wrappers import (ClassBalancedDataset,
+                                                 ConcatDataset, RepeatDataset)
+    if isinstance(cfg, (list, tuple)):
+        dataset = ConcatDataset([build_dataset(c, default_args) for c in cfg])
+    elif cfg['type'] == 'ConcatDataset':
+        dataset = ConcatDataset(
+            [build_dataset(c, default_args) for c in cfg['datasets']],
+            cfg.get('separate_eval', True))
+    elif cfg['type'] == 'RepeatDataset':
+        dataset = RepeatDataset(
+            build_dataset(cfg['dataset'], default_args), cfg['times'])
+    elif cfg['type'] == 'ClassBalancedDataset':
+        dataset = ClassBalancedDataset(
+            build_dataset(cfg['dataset'], default_args), cfg['oversample_thr'])
+    elif cfg['type'] == 'CBGSDataset':
+        dataset = CBGSDataset(build_dataset(cfg['dataset'], default_args))
+    elif isinstance(cfg.get('ann_file'), (list, tuple)):
+        dataset = _concat_dataset(cfg, default_args)
+    elif cfg['type'] in DATASETS._module_dict.keys():
+        dataset = build_from_cfg(cfg, DATASETS, default_args)
+    else:
+        dataset = build_from_cfg(cfg, MMDET_DATASETS, default_args)
+    return dataset
--- a/docker-hub/FlashOCC/Flashocc/mmdetection3d/mmdet3d/datasets/custom_3d.py
+++ b/docker-hub/FlashOCC/Flashocc/mmdetection3d/mmdet3d/datasets/custom_3d.py
+# Copyright (c) OpenMMLab. All rights reserved.
+import tempfile
+import warnings
+from os import path as osp
+
+import mmcv
+import numpy as np
+from torch.utils.data import Dataset
+
+from ..core.bbox import get_box_type
+from .builder import DATASETS
+from .pipelines import Compose
+from .utils import extract_result_dict, get_loading_pipeline
+
+
+@DATASETS.register_module()
+class Custom3DDataset(Dataset):
+    """Customized 3D dataset.
+
+    This is the base dataset of SUNRGB-D, ScanNet, nuScenes, and KITTI
+    dataset.
+
+    .. code-block:: none
+
+    [
+        {'sample_idx':
+         'lidar_points': {'lidar_path': velodyne_path,
+                           ....
+                         },
+         'annos': {'box_type_3d':  (str)  'LiDAR/Camera/Depth'
+                   'gt_bboxes_3d':  <np.ndarray> (n, 7)
+                   'gt_names':  [list]
+                   ....
+               }
+         'calib': { .....}
+         'images': { .....}
+        }
+    ]
+
+    Args:
+        data_root (str): Path of dataset root.
+        ann_file (str): Path of annotation file.
+        pipeline (list[dict], optional): Pipeline used for data processing.
+            Defaults to None.
+        classes (tuple[str], optional): Classes used in the dataset.
+            Defaults to None.
+        modality (dict, optional): Modality to specify the sensor data used
+            as input. Defaults to None.
+        box_type_3d (str, optional): Type of 3D box of this dataset.
+            Based on the `box_type_3d`, the dataset will encapsulate the box
+            to its original format then converted them to `box_type_3d`.
+            Defaults to 'LiDAR'. Available options includes
+
+            - 'LiDAR': Box in LiDAR coordinates.
+            - 'Depth': Box in depth coordinates, usually for indoor dataset.
+            - 'Camera': Box in camera coordinates.
+        filter_empty_gt (bool, optional): Whether to filter empty GT.
+            Defaults to True.
+        test_mode (bool, optional): Whether the dataset is in test mode.
+            Defaults to False.
+    """
+
+    def __init__(self,
+                 data_root,
+                 ann_file,
+                 pipeline=None,
+                 classes=None,
+                 modality=None,
+                 box_type_3d='LiDAR',
+                 filter_empty_gt=True,
+                 test_mode=False,
+                 file_client_args=dict(backend='disk')):
+        super().__init__()
+        self.data_root = data_root
+        self.ann_file = ann_file
+        self.test_mode = test_mode
+        self.modality = modality
+        self.filter_empty_gt = filter_empty_gt
+        self.box_type_3d, self.box_mode_3d = get_box_type(box_type_3d)
+
+        self.CLASSES = self.get_classes(classes)
+        self.file_client = mmcv.FileClient(**file_client_args)
+        self.cat2id = {name: i for i, name in enumerate(self.CLASSES)}
+
+        # load annotations
+        if hasattr(self.file_client, 'get_local_path'):
+            with self.file_client.get_local_path(self.ann_file) as local_path:
+                self.data_infos = self.load_annotations(open(local_path, 'rb'))
+        else:
+            warnings.warn(
+                'The used MMCV version does not have get_local_path. '
+                f'We treat the {self.ann_file} as local paths and it '
+                'might cause errors if the path is not a local path. '
+                'Please use MMCV>= 1.3.16 if you meet errors.')
+            self.data_infos = self.load_annotations(self.ann_file)
+
+        # process pipeline
+        if pipeline is not None:
+            self.pipeline = Compose(pipeline)
+
+        # set group flag for the samplers
+        if not self.test_mode:
+            self._set_group_flag()
+
+    def load_annotations(self, ann_file):
+        """Load annotations from ann_file.
+
+        Args:
+            ann_file (str): Path of the annotation file.
+
+        Returns:
+            list[dict]: List of annotations.
+        """
+        # loading data from a file-like object needs file format
+        return mmcv.load(ann_file, file_format='pkl')
+
+    def get_data_info(self, index):
+        """Get data info according to the given index.
+
+        Args:
+            index (int): Index of the sample data to get.
+
+        Returns:
+            dict: Data information that will be passed to the data
+                preprocessing pipelines. It includes the following keys:
+
+                - sample_idx (str): Sample index.
+                - pts_filename (str): Filename of point clouds.
+                - file_name (str): Filename of point clouds.
+                - ann_info (dict): Annotation info.
+        """
+        info = self.data_infos[index]
+        sample_idx = info['sample_idx']
+        pts_filename = osp.join(self.data_root,
+                                info['lidar_points']['lidar_path'])
+
+        input_dict = dict(
+            pts_filename=pts_filename,
+            sample_idx=sample_idx,
+            file_name=pts_filename)
+
+        if not self.test_mode:
+            annos = self.get_ann_info(index)
+            input_dict['ann_info'] = annos
+            if self.filter_empty_gt and ~(annos['gt_labels_3d'] != -1).any():
+                return None
+        return input_dict
+
+    def get_ann_info(self, index):
+        """Get annotation info according to the given index.
+
+        Args:
+            index (int): Index of the annotation data to get.
+
+        Returns:
+            dict: Annotation information consists of the following keys:
+
+                - gt_bboxes_3d (:obj:`LiDARInstance3DBoxes`):
+                    3D ground truth bboxes
+                - gt_labels_3d (np.ndarray): Labels of ground truths.
+                - gt_names (list[str]): Class names of ground truths.
+        """
+        info = self.data_infos[index]
+        gt_bboxes_3d = info['annos']['gt_bboxes_3d']
+        gt_names_3d = info['annos']['gt_names']
+        gt_labels_3d = []
+        for cat in gt_names_3d:
+            if cat in self.CLASSES:
+                gt_labels_3d.append(self.CLASSES.index(cat))
+            else:
+                gt_labels_3d.append(-1)
+        gt_labels_3d = np.array(gt_labels_3d)
+
+        # Obtain original box 3d type in info file
+        ori_box_type_3d = info['annos']['box_type_3d']
+        ori_box_type_3d, _ = get_box_type(ori_box_type_3d)
+
+        # turn original box type to target box type
+        gt_bboxes_3d = ori_box_type_3d(
+            gt_bboxes_3d,
+            box_dim=gt_bboxes_3d.shape[-1],
+            origin=(0.5, 0.5, 0.5)).convert_to(self.box_mode_3d)
+
+        anns_results = dict(
+            gt_bboxes_3d=gt_bboxes_3d,
+            gt_labels_3d=gt_labels_3d,
+            gt_names=gt_names_3d)
+        return anns_results
+
+    def pre_pipeline(self, results):
+        """Initialization before data preparation.
+
+        Args:
+            results (dict): Dict before data preprocessing.
+
+                - img_fields (list): Image fields.
+                - bbox3d_fields (list): 3D bounding boxes fields.
+                - pts_mask_fields (list): Mask fields of points.
+                - pts_seg_fields (list): Mask fields of point segments.
+                - bbox_fields (list): Fields of bounding boxes.
+                - mask_fields (list): Fields of masks.
+                - seg_fields (list): Segment fields.
+                - box_type_3d (str): 3D box type.
+                - box_mode_3d (str): 3D box mode.
+        """
+        results['img_fields'] = []
+        results['bbox3d_fields'] = []
+        results['pts_mask_fields'] = []
+        results['pts_seg_fields'] = []
+        results['bbox_fields'] = []
+        results['mask_fields'] = []
+        results['seg_fields'] = []
+        results['box_type_3d'] = self.box_type_3d
+        results['box_mode_3d'] = self.box_mode_3d
+
+    def prepare_train_data(self, index):
+        """Training data preparation.
+
+        Args:
+            index (int): Index for accessing the target data.
+
+        Returns:
+            dict: Training data dict of the corresponding index.
+        """
+        input_dict = self.get_data_info(index)
+        if input_dict is None:
+            return None
+        self.pre_pipeline(input_dict)
+        example = self.pipeline(input_dict)
+        if self.filter_empty_gt and \
+                (example is None or
+                    ~(example['gt_labels_3d']._data != -1).any()):
+            return None
+        return example
+
+    def prepare_test_data(self, index):
+        """Prepare data for testing.
+
+        Args:
+            index (int): Index for accessing the target data.
+
+        Returns:
+            dict: Testing data dict of the corresponding index.
+        """
+        input_dict = self.get_data_info(index)
+        self.pre_pipeline(input_dict)
+        example = self.pipeline(input_dict)
+        return example
+
+    @classmethod
+    def get_classes(cls, classes=None):
+        """Get class names of current dataset.
+
+        Args:
+            classes (Sequence[str] | str): If classes is None, use
+                default CLASSES defined by builtin dataset. If classes is a
+                string, take it as a file name. The file contains the name of
+                classes where each line contains one class name. If classes is
+                a tuple or list, override the CLASSES defined by the dataset.
+
+        Return:
+            list[str]: A list of class names.
+        """
+        if classes is None:
+            return cls.CLASSES
+
+        if isinstance(classes, str):
+            # take it as a file path
+            class_names = mmcv.list_from_file(classes)
+        elif isinstance(classes, (tuple, list)):
+            class_names = classes
+        else:
+            raise ValueError(f'Unsupported type {type(classes)} of classes.')
+
+        return class_names
+
+    def format_results(self,
+                       outputs,
+                       pklfile_prefix=None,
+                       submission_prefix=None):
+        """Format the results to pkl file.
+
+        Args:
+            outputs (list[dict]): Testing results of the dataset.
+            pklfile_prefix (str): The prefix of pkl files. It includes
+                the file path and the prefix of filename, e.g., "a/b/prefix".
+                If not specified, a temp file will be created. Default: None.
+
+        Returns:
+            tuple: (outputs, tmp_dir), outputs is the detection results,
+                tmp_dir is the temporal directory created for saving json
+                files when ``jsonfile_prefix`` is not specified.
+        """
+        if pklfile_prefix is None:
+            tmp_dir = tempfile.TemporaryDirectory()
+            pklfile_prefix = osp.join(tmp_dir.name, 'results')
+            out = f'{pklfile_prefix}.pkl'
+        mmcv.dump(outputs, out)
+        return outputs, tmp_dir
+
+    def evaluate(self,
+                 results,
+                 metric=None,
+                 iou_thr=(0.25, 0.5),
+                 logger=None,
+                 show=False,
+                 out_dir=None,
+                 pipeline=None):
+        """Evaluate.
+
+        Evaluation in indoor protocol.
+
+        Args:
+            results (list[dict]): List of results.
+            metric (str | list[str], optional): Metrics to be evaluated.
+                Defaults to None.
+            iou_thr (list[float]): AP IoU thresholds. Defaults to (0.25, 0.5).
+            logger (logging.Logger | str, optional): Logger used for printing
+                related information during evaluation. Defaults to None.
+            show (bool, optional): Whether to visualize.
+                Default: False.
+            out_dir (str, optional): Path to save the visualization results.
+                Default: None.
+            pipeline (list[dict], optional): raw data loading for showing.
+                Default: None.
+
+        Returns:
+            dict: Evaluation results.
+        """
+        from mmdet3d.core.evaluation import indoor_eval
+        assert isinstance(
+            results, list), f'Expect results to be list, got {type(results)}.'
+        assert len(results) > 0, 'Expect length of results > 0.'
+        assert len(results) == len(self.data_infos)
+        assert isinstance(
+            results[0], dict
+        ), f'Expect elements in results to be dict, got {type(results[0])}.'
+        gt_annos = [info['annos'] for info in self.data_infos]
+        label2cat = {i: cat_id for i, cat_id in enumerate(self.CLASSES)}
+        ret_dict = indoor_eval(
+            gt_annos,
+            results,
+            iou_thr,
+            label2cat,
+            logger=logger,
+            box_type_3d=self.box_type_3d,
+            box_mode_3d=self.box_mode_3d)
+        if show:
+            self.show(results, out_dir, pipeline=pipeline)
+
+        return ret_dict
+
+    def _build_default_pipeline(self):
+        """Build the default pipeline for this dataset."""
+        raise NotImplementedError('_build_default_pipeline is not implemented '
+                                  f'for dataset {self.__class__.__name__}')
+
+    def _get_pipeline(self, pipeline):
+        """Get data loading pipeline in self.show/evaluate function.
+
+        Args:
+            pipeline (list[dict]): Input pipeline. If None is given,
+                get from self.pipeline.
+        """
+        if pipeline is None:
+            if not hasattr(self, 'pipeline') or self.pipeline is None:
+                warnings.warn(
+                    'Use default pipeline for data loading, this may cause '
+                    'errors when data is on ceph')
+                return self._build_default_pipeline()
+            loading_pipeline = get_loading_pipeline(self.pipeline.transforms)
+            return Compose(loading_pipeline)
+        return Compose(pipeline)
+
+    def _extract_data(self, index, pipeline, key, load_annos=False):
+        """Load data using input pipeline and extract data according to key.
+
+        Args:
+            index (int): Index for accessing the target data.
+            pipeline (:obj:`Compose`): Composed data loading pipeline.
+            key (str | list[str]): One single or a list of data key.
+            load_annos (bool): Whether to load data annotations.
+                If True, need to set self.test_mode as False before loading.
+
+        Returns:
+            np.ndarray | torch.Tensor | list[np.ndarray | torch.Tensor]:
+                A single or a list of loaded data.
+        """
+        assert pipeline is not None, 'data loading pipeline is not provided'
+        # when we want to load ground-truth via pipeline (e.g. bbox, seg mask)
+        # we need to set self.test_mode as False so that we have 'annos'
+        if load_annos:
+            original_test_mode = self.test_mode
+            self.test_mode = False
+        input_dict = self.get_data_info(index)
+        self.pre_pipeline(input_dict)
+        example = pipeline(input_dict)
+
+        # extract data items according to keys
+        if isinstance(key, str):
+            data = extract_result_dict(example, key)
+        else:
+            data = [extract_result_dict(example, k) for k in key]
+        if load_annos:
+            self.test_mode = original_test_mode
+
+        return data
+
+    def __len__(self):
+        """Return the length of data infos.
+
+        Returns:
+            int: Length of data infos.
+        """
+        return len(self.data_infos)
+
+    def _rand_another(self, idx):
+        """Randomly get another item with the same flag.
+
+        Returns:
+            int: Another index of item with the same flag.
+        """
+        pool = np.where(self.flag == self.flag[idx])[0]
+        return np.random.choice(pool)
+
+    def __getitem__(self, idx):
+        """Get item from infos according to the given index.
+
+        Returns:
+            dict: Data dictionary of the corresponding index.
+        """
+        if self.test_mode:
+            return self.prepare_test_data(idx)
+        while True:
+            data = self.prepare_train_data(idx)
+            if data is None:
+                idx = self._rand_another(idx)
+                continue
+            return data
+
+    def _set_group_flag(self):
+        """Set flag according to image aspect ratio.
+
+        Images with aspect ratio greater than 1 will be set as group 1,
+        otherwise group 0. In 3D datasets, they are all the same, thus are all
+        zeros.
+        """
+        self.flag = np.zeros(len(self), dtype=np.uint8)
--- a/docker-hub/FlashOCC/Flashocc/mmdetection3d/mmdet3d/datasets/custom_3d_seg.py
+++ b/docker-hub/FlashOCC/Flashocc/mmdetection3d/mmdet3d/datasets/custom_3d_seg.py
+# Copyright (c) OpenMMLab. All rights reserved.
+import tempfile
+import warnings
+from os import path as osp
+
+import mmcv
+import numpy as np
+from torch.utils.data import Dataset
+
+from mmseg.datasets import DATASETS as SEG_DATASETS
+from .builder import DATASETS
+from .pipelines import Compose
+from .utils import extract_result_dict, get_loading_pipeline
+
+
+@DATASETS.register_module()
+@SEG_DATASETS.register_module()
+class Custom3DSegDataset(Dataset):
+    """Customized 3D dataset for semantic segmentation task.
+
+    This is the base dataset of ScanNet and S3DIS dataset.
+
+    Args:
+        data_root (str): Path of dataset root.
+        ann_file (str): Path of annotation file.
+        pipeline (list[dict], optional): Pipeline used for data processing.
+            Defaults to None.
+        classes (tuple[str], optional): Classes used in the dataset.
+            Defaults to None.
+        palette (list[list[int]], optional): The palette of segmentation map.
+            Defaults to None.
+        modality (dict, optional): Modality to specify the sensor data used
+            as input. Defaults to None.
+        test_mode (bool, optional): Whether the dataset is in test mode.
+            Defaults to False.
+        ignore_index (int, optional): The label index to be ignored, e.g.
+            unannotated points. If None is given, set to len(self.CLASSES) to
+            be consistent with PointSegClassMapping function in pipeline.
+            Defaults to None.
+        scene_idxs (np.ndarray | str, optional): Precomputed index to load
+            data. For scenes with many points, we may sample it several times.
+            Defaults to None.
+    """
+    # names of all classes data used for the task
+    CLASSES = None
+
+    # class_ids used for training
+    VALID_CLASS_IDS = None
+
+    # all possible class_ids in loaded segmentation mask
+    ALL_CLASS_IDS = None
+
+    # official color for visualization
+    PALETTE = None
+
+    def __init__(self,
+                 data_root,
+                 ann_file,
+                 pipeline=None,
+                 classes=None,
+                 palette=None,
+                 modality=None,
+                 test_mode=False,
+                 ignore_index=None,
+                 scene_idxs=None,
+                 file_client_args=dict(backend='disk')):
+        super().__init__()
+        self.data_root = data_root
+        self.ann_file = ann_file
+        self.test_mode = test_mode
+        self.modality = modality
+        self.file_client = mmcv.FileClient(**file_client_args)
+
+        # load annotations
+        if hasattr(self.file_client, 'get_local_path'):
+            with self.file_client.get_local_path(self.ann_file) as local_path:
+                self.data_infos = self.load_annotations(open(local_path, 'rb'))
+        else:
+            warnings.warn(
+                'The used MMCV version does not have get_local_path. '
+                f'We treat the {self.ann_file} as local paths and it '
+                'might cause errors if the path is not a local path. '
+                'Please use MMCV>= 1.3.16 if you meet errors.')
+            self.data_infos = self.load_annotations(self.ann_file)
+
+        if pipeline is not None:
+            self.pipeline = Compose(pipeline)
+
+        self.ignore_index = len(self.CLASSES) if \
+            ignore_index is None else ignore_index
+
+        self.scene_idxs = self.get_scene_idxs(scene_idxs)
+        self.CLASSES, self.PALETTE = \
+            self.get_classes_and_palette(classes, palette)
+
+        # set group flag for the sampler
+        if not self.test_mode:
+            self._set_group_flag()
+
+    def load_annotations(self, ann_file):
+        """Load annotations from ann_file.
+
+        Args:
+            ann_file (str): Path of the annotation file.
+
+        Returns:
+            list[dict]: List of annotations.
+        """
+        # loading data from a file-like object needs file format
+        return mmcv.load(ann_file, file_format='pkl')
+
+    def get_data_info(self, index):
+        """Get data info according to the given index.
+
+        Args:
+            index (int): Index of the sample data to get.
+
+        Returns:
+            dict: Data information that will be passed to the data
+                preprocessing pipelines. It includes the following keys:
+
+                - sample_idx (str): Sample index.
+                - pts_filename (str): Filename of point clouds.
+                - file_name (str): Filename of point clouds.
+                - ann_info (dict): Annotation info.
+        """
+        info = self.data_infos[index]
+        sample_idx = info['point_cloud']['lidar_idx']
+        pts_filename = osp.join(self.data_root, info['pts_path'])
+
+        input_dict = dict(
+            pts_filename=pts_filename,
+            sample_idx=sample_idx,
+            file_name=pts_filename)
+
+        if not self.test_mode:
+            annos = self.get_ann_info(index)
+            input_dict['ann_info'] = annos
+        return input_dict
+
+    def pre_pipeline(self, results):
+        """Initialization before data preparation.
+
+        Args:
+            results (dict): Dict before data preprocessing.
+
+                - img_fields (list): Image fields.
+                - pts_mask_fields (list): Mask fields of points.
+                - pts_seg_fields (list): Mask fields of point segments.
+                - mask_fields (list): Fields of masks.
+                - seg_fields (list): Segment fields.
+        """
+        results['img_fields'] = []
+        results['pts_mask_fields'] = []
+        results['pts_seg_fields'] = []
+        results['mask_fields'] = []
+        results['seg_fields'] = []
+        results['bbox3d_fields'] = []
+
+    def prepare_train_data(self, index):
+        """Training data preparation.
+
+        Args:
+            index (int): Index for accessing the target data.
+
+        Returns:
+            dict: Training data dict of the corresponding index.
+        """
+        input_dict = self.get_data_info(index)
+        if input_dict is None:
+            return None
+        self.pre_pipeline(input_dict)
+        example = self.pipeline(input_dict)
+        return example
+
+    def prepare_test_data(self, index):
+        """Prepare data for testing.
+
+        Args:
+            index (int): Index for accessing the target data.
+
+        Returns:
+            dict: Testing data dict of the corresponding index.
+        """
+        input_dict = self.get_data_info(index)
+        self.pre_pipeline(input_dict)
+        example = self.pipeline(input_dict)
+        return example
+
+    def get_classes_and_palette(self, classes=None, palette=None):
+        """Get class names of current dataset.
+
+        This function is taken from MMSegmentation.
+
+        Args:
+            classes (Sequence[str] | str): If classes is None, use
+                default CLASSES defined by builtin dataset. If classes is a
+                string, take it as a file name. The file contains the name of
+                classes where each line contains one class name. If classes is
+                a tuple or list, override the CLASSES defined by the dataset.
+                Defaults to None.
+            palette (Sequence[Sequence[int]]] | np.ndarray):
+                The palette of segmentation map. If None is given, random
+                palette will be generated. Defaults to None.
+        """
+        if classes is None:
+            self.custom_classes = False
+            # map id in the loaded mask to label used for training
+            self.label_map = {
+                cls_id: self.ignore_index
+                for cls_id in self.ALL_CLASS_IDS
+            }
+            self.label_map.update(
+                {cls_id: i
+                 for i, cls_id in enumerate(self.VALID_CLASS_IDS)})
+            # map label to category name
+            self.label2cat = {
+                i: cat_name
+                for i, cat_name in enumerate(self.CLASSES)
+            }
+            return self.CLASSES, self.PALETTE
+
+        self.custom_classes = True
+        if isinstance(classes, str):
+            # take it as a file path
+            class_names = mmcv.list_from_file(classes)
+        elif isinstance(classes, (tuple, list)):
+            class_names = classes
+        else:
+            raise ValueError(f'Unsupported type {type(classes)} of classes.')
+
+        if self.CLASSES:
+            if not set(class_names).issubset(self.CLASSES):
+                raise ValueError('classes is not a subset of CLASSES.')
+
+            # update valid_class_ids
+            self.VALID_CLASS_IDS = [
+                self.VALID_CLASS_IDS[self.CLASSES.index(cls_name)]
+                for cls_name in class_names
+            ]
+
+            # dictionary, its keys are the old label ids and its values
+            # are the new label ids.
+            # used for changing pixel labels in load_annotations.
+            self.label_map = {
+                cls_id: self.ignore_index
+                for cls_id in self.ALL_CLASS_IDS
+            }
+            self.label_map.update(
+                {cls_id: i
+                 for i, cls_id in enumerate(self.VALID_CLASS_IDS)})
+            self.label2cat = {
+                i: cat_name
+                for i, cat_name in enumerate(class_names)
+            }
+
+        # modify palette for visualization
+        palette = [
+            self.PALETTE[self.CLASSES.index(cls_name)]
+            for cls_name in class_names
+        ]
+
+        return class_names, palette
+
+    def get_scene_idxs(self, scene_idxs):
+        """Compute scene_idxs for data sampling.
+
+        We sample more times for scenes with more points.
+        """
+        if self.test_mode:
+            # when testing, we load one whole scene every time
+            return np.arange(len(self.data_infos)).astype(np.int32)
+
+        # we may need to re-sample different scenes according to scene_idxs
+        # this is necessary for indoor scene segmentation such as ScanNet
+        if scene_idxs is None:
+            scene_idxs = np.arange(len(self.data_infos))
+        if isinstance(scene_idxs, str):
+            with self.file_client.get_local_path(scene_idxs) as local_path:
+                scene_idxs = np.load(local_path)
+        else:
+            scene_idxs = np.array(scene_idxs)
+
+        return scene_idxs.astype(np.int32)
+
+    def format_results(self,
+                       outputs,
+                       pklfile_prefix=None,
+                       submission_prefix=None):
+        """Format the results to pkl file.
+
+        Args:
+            outputs (list[dict]): Testing results of the dataset.
+            pklfile_prefix (str): The prefix of pkl files. It includes
+                the file path and the prefix of filename, e.g., "a/b/prefix".
+                If not specified, a temp file will be created. Default: None.
+
+        Returns:
+            tuple: (outputs, tmp_dir), outputs is the detection results,
+                tmp_dir is the temporal directory created for saving json
+                files when ``jsonfile_prefix`` is not specified.
+        """
+        if pklfile_prefix is None:
+            tmp_dir = tempfile.TemporaryDirectory()
+            pklfile_prefix = osp.join(tmp_dir.name, 'results')
+            out = f'{pklfile_prefix}.pkl'
+        mmcv.dump(outputs, out)
+        return outputs, tmp_dir
+
+    def evaluate(self,
+                 results,
+                 metric=None,
+                 logger=None,
+                 show=False,
+                 out_dir=None,
+                 pipeline=None):
+        """Evaluate.
+
+        Evaluation in semantic segmentation protocol.
+
+        Args:
+            results (list[dict]): List of results.
+            metric (str | list[str]): Metrics to be evaluated.
+            logger (logging.Logger | str, optional): Logger used for printing
+                related information during evaluation. Defaults to None.
+            show (bool, optional): Whether to visualize.
+                Defaults to False.
+            out_dir (str, optional): Path to save the visualization results.
+                Defaults to None.
+            pipeline (list[dict], optional): raw data loading for showing.
+                Default: None.
+
+        Returns:
+            dict: Evaluation results.
+        """
+        from mmdet3d.core.evaluation import seg_eval
+        assert isinstance(
+            results, list), f'Expect results to be list, got {type(results)}.'
+        assert len(results) > 0, 'Expect length of results > 0.'
+        assert len(results) == len(self.data_infos)
+        assert isinstance(
+            results[0], dict
+        ), f'Expect elements in results to be dict, got {type(results[0])}.'
+
+        load_pipeline = self._get_pipeline(pipeline)
+        pred_sem_masks = [result['semantic_mask'] for result in results]
+        gt_sem_masks = [
+            self._extract_data(
+                i, load_pipeline, 'pts_semantic_mask', load_annos=True)
+            for i in range(len(self.data_infos))
+        ]
+        ret_dict = seg_eval(
+            gt_sem_masks,
+            pred_sem_masks,
+            self.label2cat,
+            self.ignore_index,
+            logger=logger)
+
+        if show:
+            self.show(pred_sem_masks, out_dir, pipeline=pipeline)
+
+        return ret_dict
+
+    def _rand_another(self, idx):
+        """Randomly get another item with the same flag.
+
+        Returns:
+            int: Another index of item with the same flag.
+        """
+        pool = np.where(self.flag == self.flag[idx])[0]
+        return np.random.choice(pool)
+
+    def _build_default_pipeline(self):
+        """Build the default pipeline for this dataset."""
+        raise NotImplementedError('_build_default_pipeline is not implemented '
+                                  f'for dataset {self.__class__.__name__}')
+
+    def _get_pipeline(self, pipeline):
+        """Get data loading pipeline in self.show/evaluate function.
+
+        Args:
+            pipeline (list[dict]): Input pipeline. If None is given,
+                get from self.pipeline.
+        """
+        if pipeline is None:
+            if not hasattr(self, 'pipeline') or self.pipeline is None:
+                warnings.warn(
+                    'Use default pipeline for data loading, this may cause '
+                    'errors when data is on ceph')
+                return self._build_default_pipeline()
+            loading_pipeline = get_loading_pipeline(self.pipeline.transforms)
+            return Compose(loading_pipeline)
+        return Compose(pipeline)
+
+    def _extract_data(self, index, pipeline, key, load_annos=False):
+        """Load data using input pipeline and extract data according to key.
+
+        Args:
+            index (int): Index for accessing the target data.
+            pipeline (:obj:`Compose`): Composed data loading pipeline.
+            key (str | list[str]): One single or a list of data key.
+            load_annos (bool): Whether to load data annotations.
+                If True, need to set self.test_mode as False before loading.
+
+        Returns:
+            np.ndarray | torch.Tensor | list[np.ndarray | torch.Tensor]:
+                A single or a list of loaded data.
+        """
+        assert pipeline is not None, 'data loading pipeline is not provided'
+        # when we want to load ground-truth via pipeline (e.g. bbox, seg mask)
+        # we need to set self.test_mode as False so that we have 'annos'
+        if load_annos:
+            original_test_mode = self.test_mode
+            self.test_mode = False
+        input_dict = self.get_data_info(index)
+        self.pre_pipeline(input_dict)
+        example = pipeline(input_dict)
+
+        # extract data items according to keys
+        if isinstance(key, str):
+            data = extract_result_dict(example, key)
+        else:
+            data = [extract_result_dict(example, k) for k in key]
+        if load_annos:
+            self.test_mode = original_test_mode
+
+        return data
+
+    def __len__(self):
+        """Return the length of scene_idxs.
+
+        Returns:
+            int: Length of data infos.
+        """
+        return len(self.scene_idxs)
+
+    def __getitem__(self, idx):
+        """Get item from infos according to the given index.
+
+        In indoor scene segmentation task, each scene contains millions of
+        points. However, we only sample less than 10k points within a patch
+        each time. Therefore, we use `scene_idxs` to re-sample different rooms.
+
+        Returns:
+            dict: Data dictionary of the corresponding index.
+        """
+        scene_idx = self.scene_idxs[idx]  # map to scene idx
+        if self.test_mode:
+            return self.prepare_test_data(scene_idx)
+        while True:
+            data = self.prepare_train_data(scene_idx)
+            if data is None:
+                idx = self._rand_another(idx)
+                scene_idx = self.scene_idxs[idx]  # map to scene idx
+                continue
+            return data
+
+    def _set_group_flag(self):
+        """Set flag according to image aspect ratio.
+
+        Images with aspect ratio greater than 1 will be set as group 1,
+        otherwise group 0. In 3D datasets, they are all the same, thus are all
+        zeros.
+        """
+        self.flag = np.zeros(len(self), dtype=np.uint8)
--- a/docker-hub/FlashOCC/Flashocc/mmdetection3d/mmdet3d/datasets/dataset_wrappers.py
+++ b/docker-hub/FlashOCC/Flashocc/mmdetection3d/mmdet3d/datasets/dataset_wrappers.py
+# Copyright (c) OpenMMLab. All rights reserved.
+import numpy as np
+
+from .builder import DATASETS
+
+
+@DATASETS.register_module()
+class CBGSDataset(object):
+    """A wrapper of class sampled dataset with ann_file path. Implementation of
+    paper `Class-balanced Grouping and Sampling for Point Cloud 3D Object
+    Detection <https://arxiv.org/abs/1908.09492.>`_.
+
+    Balance the number of scenes under different classes.
+
+    Args:
+        dataset (:obj:`CustomDataset`): The dataset to be class sampled.
+    """
+
+    def __init__(self, dataset):
+        self.dataset = dataset
+        self.CLASSES = dataset.CLASSES
+        self.cat2id = {name: i for i, name in enumerate(self.CLASSES)}
+        self.sample_indices = self._get_sample_indices()
+        # self.dataset.data_infos = self.data_infos
+        if hasattr(self.dataset, 'flag'):
+            self.flag = np.array(
+                [self.dataset.flag[ind] for ind in self.sample_indices],
+                dtype=np.uint8)
+
+    def _get_sample_indices(self):
+        """Load annotations from ann_file.
+
+        Args:
+            ann_file (str): Path of the annotation file.
+
+        Returns:
+            list[dict]: List of annotations after class sampling.
+        """
+        class_sample_idxs = {cat_id: [] for cat_id in self.cat2id.values()}
+        for idx in range(len(self.dataset)):
+            sample_cat_ids = self.dataset.get_cat_ids(idx)
+            for cat_id in sample_cat_ids:
+                class_sample_idxs[cat_id].append(idx)
+        duplicated_samples = sum(
+            [len(v) for _, v in class_sample_idxs.items()])
+        class_distribution = {
+            k: len(v) / duplicated_samples
+            for k, v in class_sample_idxs.items()
+        }
+
+        sample_indices = []
+
+        frac = 1.0 / len(self.CLASSES)
+        ratios = [frac / v for v in class_distribution.values()]
+        for cls_inds, ratio in zip(list(class_sample_idxs.values()), ratios):
+            sample_indices += np.random.choice(cls_inds,
+                                               int(len(cls_inds) *
+                                                   ratio)).tolist()
+        return sample_indices
+
+    def __getitem__(self, idx):
+        """Get item from infos according to the given index.
+
+        Returns:
+            dict: Data dictionary of the corresponding index.
+        """
+        ori_idx = self.sample_indices[idx]
+        return self.dataset[ori_idx]
+
+    def __len__(self):
+        """Return the length of data infos.
+
+        Returns:
+            int: Length of data infos.
+        """
+        return len(self.sample_indices)
--- a/docker-hub/FlashOCC/Flashocc/mmdetection3d/mmdet3d/datasets/kitti2d_dataset.py
+++ b/docker-hub/FlashOCC/Flashocc/mmdetection3d/mmdet3d/datasets/kitti2d_dataset.py
+# Copyright (c) OpenMMLab. All rights reserved.
+import mmcv
+import numpy as np
+
+from mmdet.datasets import CustomDataset
+from .builder import DATASETS
+
+
+@DATASETS.register_module()
+class Kitti2DDataset(CustomDataset):
+    r"""KITTI 2D Dataset.
+
+    This class serves as the API for experiments on the `KITTI Dataset
+    <http://www.cvlibs.net/datasets/kitti/eval_object.php?obj_benchmark=3d>`_.
+
+    Args:
+        data_root (str): Path of dataset root.
+        ann_file (str): Path of annotation file.
+        pipeline (list[dict], optional): Pipeline used for data processing.
+            Defaults to None.
+        classes (tuple[str], optional): Classes used in the dataset.
+            Defaults to None.
+        modality (dict, optional): Modality to specify the sensor data used
+            as input. Defaults to None.
+        box_type_3d (str, optional): Type of 3D box of this dataset.
+            Based on the `box_type_3d`, the dataset will encapsulate the box
+            to its original format then converted them to `box_type_3d`.
+            Defaults to 'LiDAR'. Available options includes
+
+            - 'LiDAR': Box in LiDAR coordinates.
+            - 'Depth': Box in depth coordinates, usually for indoor dataset.
+            - 'Camera': Box in camera coordinates.
+        filter_empty_gt (bool, optional): Whether to filter empty GT.
+            Defaults to True.
+        test_mode (bool, optional): Whether the dataset is in test mode.
+            Defaults to False.
+    """
+
+    CLASSES = ('car', 'pedestrian', 'cyclist')
+    """
+    Annotation format:
+    [
+        {
+            'image': {
+                'image_idx': 0,
+                'image_path': 'training/image_2/000000.png',
+                'image_shape': array([ 370, 1224], dtype=int32)
+            },
+            'point_cloud': {
+                 'num_features': 4,
+                 'velodyne_path': 'training/velodyne/000000.bin'
+             },
+             'calib': {
+                 'P0': <np.ndarray> (4, 4),
+                 'P1': <np.ndarray> (4, 4),
+                 'P2': <np.ndarray> (4, 4),
+                 'P3': <np.ndarray> (4, 4),
+                 'R0_rect':4x4 np.array,
+                 'Tr_velo_to_cam': 4x4 np.array,
+                 'Tr_imu_to_velo': 4x4 np.array
+             },
+             'annos': {
+                 'name': <np.ndarray> (n),
+                 'truncated': <np.ndarray> (n),
+                 'occluded': <np.ndarray> (n),
+                 'alpha': <np.ndarray> (n),
+                 'bbox': <np.ndarray> (n, 4),
+                 'dimensions': <np.ndarray> (n, 3),
+                 'location': <np.ndarray> (n, 3),
+                 'rotation_y': <np.ndarray> (n),
+                 'score': <np.ndarray> (n),
+                 'index': array([0], dtype=int32),
+                 'group_ids': array([0], dtype=int32),
+                 'difficulty': array([0], dtype=int32),
+                 'num_points_in_gt': <np.ndarray> (n),
+             }
+        }
+    ]
+    """
+
+    def load_annotations(self, ann_file):
+        """Load annotations from ann_file.
+
+        Args:
+            ann_file (str): Path of the annotation file.
+
+        Returns:
+            list[dict]: List of annotations.
+        """
+        self.data_infos = mmcv.load(ann_file)
+        self.cat2label = {
+            cat_name: i
+            for i, cat_name in enumerate(self.CLASSES)
+        }
+        return self.data_infos
+
+    def _filter_imgs(self, min_size=32):
+        """Filter images without ground truths."""
+        valid_inds = []
+        for i, img_info in enumerate(self.data_infos):
+            if len(img_info['annos']['name']) > 0:
+                valid_inds.append(i)
+        return valid_inds
+
+    def get_ann_info(self, index):
+        """Get annotation info according to the given index.
+
+        Args:
+            index (int): Index of the annotation data to get.
+
+        Returns:
+            dict: Annotation information consists of the following keys:
+
+                - bboxes (np.ndarray): Ground truth bboxes.
+                - labels (np.ndarray): Labels of ground truths.
+        """
+        # Use index to get the annos, thus the evalhook could also use this api
+        info = self.data_infos[index]
+        annos = info['annos']
+        gt_names = annos['name']
+        gt_bboxes = annos['bbox']
+        difficulty = annos['difficulty']
+
+        # remove classes that is not needed
+        selected = self.keep_arrays_by_name(gt_names, self.CLASSES)
+        gt_bboxes = gt_bboxes[selected]
+        gt_names = gt_names[selected]
+        difficulty = difficulty[selected]
+        gt_labels = np.array([self.cat2label[n] for n in gt_names])
+
+        anns_results = dict(
+            bboxes=gt_bboxes.astype(np.float32),
+            labels=gt_labels,
+        )
+        return anns_results
+
+    def prepare_train_img(self, idx):
+        """Training image preparation.
+
+        Args:
+            index (int): Index for accessing the target image data.
+
+        Returns:
+            dict: Training image data dict after preprocessing
+                corresponding to the index.
+        """
+        img_raw_info = self.data_infos[idx]['image']
+        img_info = dict(filename=img_raw_info['image_path'])
+        ann_info = self.get_ann_info(idx)
+        if len(ann_info['bboxes']) == 0:
+            return None
+        results = dict(img_info=img_info, ann_info=ann_info)
+        if self.proposals is not None:
+            results['proposals'] = self.proposals[idx]
+        self.pre_pipeline(results)
+        return self.pipeline(results)
+
+    def prepare_test_img(self, idx):
+        """Prepare data for testing.
+
+        Args:
+            index (int): Index for accessing the target image data.
+
+        Returns:
+            dict: Testing image data dict after preprocessing
+                corresponding to the index.
+        """
+        img_raw_info = self.data_infos[idx]['image']
+        img_info = dict(filename=img_raw_info['image_path'])
+        results = dict(img_info=img_info)
+        if self.proposals is not None:
+            results['proposals'] = self.proposals[idx]
+        self.pre_pipeline(results)
+        return self.pipeline(results)
+
+    def drop_arrays_by_name(self, gt_names, used_classes):
+        """Drop irrelevant ground truths by name.
+
+        Args:
+            gt_names (list[str]): Names of ground truths.
+            used_classes (list[str]): Classes of interest.
+
+        Returns:
+            np.ndarray: Indices of ground truths that will be dropped.
+        """
+        inds = [i for i, x in enumerate(gt_names) if x not in used_classes]
+        inds = np.array(inds, dtype=np.int64)
+        return inds
+
+    def keep_arrays_by_name(self, gt_names, used_classes):
+        """Keep useful ground truths by name.
+
+        Args:
+            gt_names (list[str]): Names of ground truths.
+            used_classes (list[str]): Classes of interest.
+
+        Returns:
+            np.ndarray: Indices of ground truths that will be keeped.
+        """
+        inds = [i for i, x in enumerate(gt_names) if x in used_classes]
+        inds = np.array(inds, dtype=np.int64)
+        return inds
+
+    def reformat_bbox(self, outputs, out=None):
+        """Reformat bounding boxes to KITTI 2D styles.
+
+        Args:
+            outputs (list[np.ndarray]): List of arrays storing the inferenced
+                bounding boxes and scores.
+            out (str, optional): The prefix of output file.
+                Default: None.
+
+        Returns:
+            list[dict]: A list of dictionaries with the kitti 2D format.
+        """
+        from mmdet3d.core.bbox.transforms import bbox2result_kitti2d
+        sample_idx = [info['image']['image_idx'] for info in self.data_infos]
+        result_files = bbox2result_kitti2d(outputs, self.CLASSES, sample_idx,
+                                           out)
+        return result_files
+
+    def evaluate(self, result_files, eval_types=None):
+        """Evaluation in KITTI protocol.
+
+        Args:
+            result_files (str): Path of result files.
+            eval_types (str, optional): Types of evaluation. Default: None.
+                KITTI dataset only support 'bbox' evaluation type.
+
+        Returns:
+            tuple (str, dict): Average precision results in str format
+                and average precision results in dict format.
+        """
+        from mmdet3d.core.evaluation import kitti_eval
+        eval_types = ['bbox'] if not eval_types else eval_types
+        assert eval_types in ('bbox', ['bbox'
+                                       ]), 'KITTI data set only evaluate bbox'
+        gt_annos = [info['annos'] for info in self.data_infos]
+        ap_result_str, ap_dict = kitti_eval(
+            gt_annos, result_files, self.CLASSES, eval_types=['bbox'])
+        return ap_result_str, ap_dict
--- a/docker-hub/FlashOCC/Flashocc/mmdetection3d/mmdet3d/datasets/kitti_dataset.py
+++ b/docker-hub/FlashOCC/Flashocc/mmdetection3d/mmdet3d/datasets/kitti_dataset.py
+# Copyright (c) OpenMMLab. All rights reserved.
+import copy
+import os
+import tempfile
+from os import path as osp
+
+import mmcv
+import numpy as np
+import torch
+from mmcv.utils import print_log
+
+from ..core import show_multi_modality_result, show_result
+from ..core.bbox import (Box3DMode, CameraInstance3DBoxes, Coord3DMode,
+                         LiDARInstance3DBoxes, points_cam2img)
+from .builder import DATASETS
+from .custom_3d import Custom3DDataset
+from .pipelines import Compose
+
+
+@DATASETS.register_module()
+class KittiDataset(Custom3DDataset):
+    r"""KITTI Dataset.
+
+    This class serves as the API for experiments on the `KITTI Dataset
+    <http://www.cvlibs.net/datasets/kitti/eval_object.php?obj_benchmark=3d>`_.
+
+    Args:
+        data_root (str): Path of dataset root.
+        ann_file (str): Path of annotation file.
+        split (str): Split of input data.
+        pts_prefix (str, optional): Prefix of points files.
+            Defaults to 'velodyne'.
+        pipeline (list[dict], optional): Pipeline used for data processing.
+            Defaults to None.
+        classes (tuple[str], optional): Classes used in the dataset.
+            Defaults to None.
+        modality (dict, optional): Modality to specify the sensor data used
+            as input. Defaults to None.
+        box_type_3d (str, optional): Type of 3D box of this dataset.
+            Based on the `box_type_3d`, the dataset will encapsulate the box
+            to its original format then converted them to `box_type_3d`.
+            Defaults to 'LiDAR' in this dataset. Available options includes
+
+            - 'LiDAR': Box in LiDAR coordinates.
+            - 'Depth': Box in depth coordinates, usually for indoor dataset.
+            - 'Camera': Box in camera coordinates.
+        filter_empty_gt (bool, optional): Whether to filter empty GT.
+            Defaults to True.
+        test_mode (bool, optional): Whether the dataset is in test mode.
+            Defaults to False.
+        pcd_limit_range (list, optional): The range of point cloud used to
+            filter invalid predicted boxes.
+            Default: [0, -40, -3, 70.4, 40, 0.0].
+    """
+    CLASSES = ('car', 'pedestrian', 'cyclist')
+
+    def __init__(self,
+                 data_root,
+                 ann_file,
+                 split,
+                 pts_prefix='velodyne',
+                 pipeline=None,
+                 classes=None,
+                 modality=None,
+                 box_type_3d='LiDAR',
+                 filter_empty_gt=True,
+                 test_mode=False,
+                 pcd_limit_range=[0, -40, -3, 70.4, 40, 0.0],
+                 **kwargs):
+        super().__init__(
+            data_root=data_root,
+            ann_file=ann_file,
+            pipeline=pipeline,
+            classes=classes,
+            modality=modality,
+            box_type_3d=box_type_3d,
+            filter_empty_gt=filter_empty_gt,
+            test_mode=test_mode,
+            **kwargs)
+
+        self.split = split
+        self.root_split = os.path.join(self.data_root, split)
+        assert self.modality is not None
+        self.pcd_limit_range = pcd_limit_range
+        self.pts_prefix = pts_prefix
+
+    def _get_pts_filename(self, idx):
+        """Get point cloud filename according to the given index.
+
+        Args:
+            index (int): Index of the point cloud file to get.
+
+        Returns:
+            str: Name of the point cloud file.
+        """
+        pts_filename = osp.join(self.root_split, self.pts_prefix,
+                                f'{idx:06d}.bin')
+        return pts_filename
+
+    def get_data_info(self, index):
+        """Get data info according to the given index.
+
+        Args:
+            index (int): Index of the sample data to get.
+
+        Returns:
+            dict: Data information that will be passed to the data
+                preprocessing pipelines. It includes the following keys:
+
+                - sample_idx (str): Sample index.
+                - pts_filename (str): Filename of point clouds.
+                - img_prefix (str): Prefix of image files.
+                - img_info (dict): Image info.
+                - lidar2img (list[np.ndarray], optional): Transformations
+                    from lidar to different cameras.
+                - ann_info (dict): Annotation info.
+        """
+        info = self.data_infos[index]
+        sample_idx = info['image']['image_idx']
+        img_filename = os.path.join(self.data_root,
+                                    info['image']['image_path'])
+
+        # TODO: consider use torch.Tensor only
+        rect = info['calib']['R0_rect'].astype(np.float32)
+        Trv2c = info['calib']['Tr_velo_to_cam'].astype(np.float32)
+        P2 = info['calib']['P2'].astype(np.float32)
+        lidar2img = P2 @ rect @ Trv2c
+
+        pts_filename = self._get_pts_filename(sample_idx)
+        input_dict = dict(
+            sample_idx=sample_idx,
+            pts_filename=pts_filename,
+            img_prefix=None,
+            img_info=dict(filename=img_filename),
+            lidar2img=lidar2img)
+
+        if not self.test_mode:
+            annos = self.get_ann_info(index)
+            input_dict['ann_info'] = annos
+
+        return input_dict
+
+    def get_ann_info(self, index):
+        """Get annotation info according to the given index.
+
+        Args:
+            index (int): Index of the annotation data to get.
+
+        Returns:
+            dict: annotation information consists of the following keys:
+
+                - gt_bboxes_3d (:obj:`LiDARInstance3DBoxes`):
+                    3D ground truth bboxes.
+                - gt_labels_3d (np.ndarray): Labels of ground truths.
+                - gt_bboxes (np.ndarray): 2D ground truth bboxes.
+                - gt_labels (np.ndarray): Labels of ground truths.
+                - gt_names (list[str]): Class names of ground truths.
+                - difficulty (int): Difficulty defined by KITTI.
+                    0, 1, 2 represent xxxxx respectively.
+        """
+        # Use index to get the annos, thus the evalhook could also use this api
+        info = self.data_infos[index]
+        rect = info['calib']['R0_rect'].astype(np.float32)
+        Trv2c = info['calib']['Tr_velo_to_cam'].astype(np.float32)
+
+        if 'plane' in info:
+            # convert ground plane to velodyne coordinates
+            reverse = np.linalg.inv(rect @ Trv2c)
+
+            (plane_norm_cam,
+             plane_off_cam) = (info['plane'][:3],
+                               -info['plane'][:3] * info['plane'][3])
+            plane_norm_lidar = \
+                (reverse[:3, :3] @ plane_norm_cam[:, None])[:, 0]
+            plane_off_lidar = (
+                reverse[:3, :3] @ plane_off_cam[:, None][:, 0] +
+                reverse[:3, 3])
+            plane_lidar = np.zeros_like(plane_norm_lidar, shape=(4, ))
+            plane_lidar[:3] = plane_norm_lidar
+            plane_lidar[3] = -plane_norm_lidar.T @ plane_off_lidar
+        else:
+            plane_lidar = None
+
+        difficulty = info['annos']['difficulty']
+        annos = info['annos']
+        # we need other objects to avoid collision when sample
+        annos = self.remove_dontcare(annos)
+        loc = annos['location']
+        dims = annos['dimensions']
+        rots = annos['rotation_y']
+        gt_names = annos['name']
+        gt_bboxes_3d = np.concatenate([loc, dims, rots[..., np.newaxis]],
+                                      axis=1).astype(np.float32)
+
+        # convert gt_bboxes_3d to velodyne coordinates
+        gt_bboxes_3d = CameraInstance3DBoxes(gt_bboxes_3d).convert_to(
+            self.box_mode_3d, np.linalg.inv(rect @ Trv2c))
+        gt_bboxes = annos['bbox']
+
+        selected = self.drop_arrays_by_name(gt_names, ['DontCare'])
+        gt_bboxes = gt_bboxes[selected].astype('float32')
+        gt_names = gt_names[selected]
+
+        gt_labels = []
+        for cat in gt_names:
+            if cat in self.CLASSES:
+                gt_labels.append(self.CLASSES.index(cat))
+            else:
+                gt_labels.append(-1)
+        gt_labels = np.array(gt_labels).astype(np.int64)
+        gt_labels_3d = copy.deepcopy(gt_labels)
+
+        anns_results = dict(
+            gt_bboxes_3d=gt_bboxes_3d,
+            gt_labels_3d=gt_labels_3d,
+            bboxes=gt_bboxes,
+            labels=gt_labels,
+            gt_names=gt_names,
+            plane=plane_lidar,
+            difficulty=difficulty)
+        return anns_results
+
+    def drop_arrays_by_name(self, gt_names, used_classes):
+        """Drop irrelevant ground truths by name.
+
+        Args:
+            gt_names (list[str]): Names of ground truths.
+            used_classes (list[str]): Classes of interest.
+
+        Returns:
+            np.ndarray: Indices of ground truths that will be dropped.
+        """
+        inds = [i for i, x in enumerate(gt_names) if x not in used_classes]
+        inds = np.array(inds, dtype=np.int64)
+        return inds
+
+    def keep_arrays_by_name(self, gt_names, used_classes):
+        """Keep useful ground truths by name.
+
+        Args:
+            gt_names (list[str]): Names of ground truths.
+            used_classes (list[str]): Classes of interest.
+
+        Returns:
+            np.ndarray: Indices of ground truths that will be keeped.
+        """
+        inds = [i for i, x in enumerate(gt_names) if x in used_classes]
+        inds = np.array(inds, dtype=np.int64)
+        return inds
+
+    def remove_dontcare(self, ann_info):
+        """Remove annotations that do not need to be cared.
+
+        Args:
+            ann_info (dict): Dict of annotation infos. The ``'DontCare'``
+                annotations will be removed according to ann_file['name'].
+
+        Returns:
+            dict: Annotations after filtering.
+        """
+        img_filtered_annotations = {}
+        relevant_annotation_indices = [
+            i for i, x in enumerate(ann_info['name']) if x != 'DontCare'
+        ]
+        for key in ann_info.keys():
+            img_filtered_annotations[key] = (
+                ann_info[key][relevant_annotation_indices])
+        return img_filtered_annotations
+
+    def format_results(self,
+                       outputs,
+                       pklfile_prefix=None,
+                       submission_prefix=None):
+        """Format the results to pkl file.
+
+        Args:
+            outputs (list[dict]): Testing results of the dataset.
+            pklfile_prefix (str): The prefix of pkl files. It includes
+                the file path and the prefix of filename, e.g., "a/b/prefix".
+                If not specified, a temp file will be created. Default: None.
+            submission_prefix (str): The prefix of submitted files. It
+                includes the file path and the prefix of filename, e.g.,
+                "a/b/prefix". If not specified, a temp file will be created.
+                Default: None.
+
+        Returns:
+            tuple: (result_files, tmp_dir), result_files is a dict containing
+                the json filepaths, tmp_dir is the temporal directory created
+                for saving json files when jsonfile_prefix is not specified.
+        """
+        if pklfile_prefix is None:
+            tmp_dir = tempfile.TemporaryDirectory()
+            pklfile_prefix = osp.join(tmp_dir.name, 'results')
+        else:
+            tmp_dir = None
+
+        if not isinstance(outputs[0], dict):
+            result_files = self.bbox2result_kitti2d(outputs, self.CLASSES,
+                                                    pklfile_prefix,
+                                                    submission_prefix)
+        elif 'pts_bbox' in outputs[0] or 'img_bbox' in outputs[0]:
+            result_files = dict()
+            for name in outputs[0]:
+                results_ = [out[name] for out in outputs]
+                pklfile_prefix_ = pklfile_prefix + name
+                if submission_prefix is not None:
+                    submission_prefix_ = submission_prefix + name
+                else:
+                    submission_prefix_ = None
+                if 'img' in name:
+                    result_files = self.bbox2result_kitti2d(
+                        results_, self.CLASSES, pklfile_prefix_,
+                        submission_prefix_)
+                else:
+                    result_files_ = self.bbox2result_kitti(
+                        results_, self.CLASSES, pklfile_prefix_,
+                        submission_prefix_)
+                result_files[name] = result_files_
+        else:
+            result_files = self.bbox2result_kitti(outputs, self.CLASSES,
+                                                  pklfile_prefix,
+                                                  submission_prefix)
+        return result_files, tmp_dir
+
+    def evaluate(self,
+                 results,
+                 metric=None,
+                 logger=None,
+                 pklfile_prefix=None,
+                 submission_prefix=None,
+                 show=False,
+                 out_dir=None,
+                 pipeline=None):
+        """Evaluation in KITTI protocol.
+
+        Args:
+            results (list[dict]): Testing results of the dataset.
+            metric (str | list[str], optional): Metrics to be evaluated.
+                Default: None.
+            logger (logging.Logger | str, optional): Logger used for printing
+                related information during evaluation. Default: None.
+            pklfile_prefix (str, optional): The prefix of pkl files, including
+                the file path and the prefix of filename, e.g., "a/b/prefix".
+                If not specified, a temp file will be created. Default: None.
+            submission_prefix (str, optional): The prefix of submission data.
+                If not specified, the submission data will not be generated.
+                Default: None.
+            show (bool, optional): Whether to visualize.
+                Default: False.
+            out_dir (str, optional): Path to save the visualization results.
+                Default: None.
+            pipeline (list[dict], optional): raw data loading for showing.
+                Default: None.
+
+        Returns:
+            dict[str, float]: Results of each evaluation metric.
+        """
+        result_files, tmp_dir = self.format_results(results, pklfile_prefix)
+        from mmdet3d.core.evaluation import kitti_eval
+        gt_annos = [info['annos'] for info in self.data_infos]
+
+        if isinstance(result_files, dict):
+            ap_dict = dict()
+            for name, result_files_ in result_files.items():
+                eval_types = ['bbox', 'bev', '3d']
+                if 'img' in name:
+                    eval_types = ['bbox']
+                ap_result_str, ap_dict_ = kitti_eval(
+                    gt_annos,
+                    result_files_,
+                    self.CLASSES,
+                    eval_types=eval_types)
+                for ap_type, ap in ap_dict_.items():
+                    ap_dict[f'{name}/{ap_type}'] = float('{:.4f}'.format(ap))
+
+                print_log(
+                    f'Results of {name}:\n' + ap_result_str, logger=logger)
+
+        else:
+            if metric == 'img_bbox':
+                ap_result_str, ap_dict = kitti_eval(
+                    gt_annos, result_files, self.CLASSES, eval_types=['bbox'])
+            else:
+                ap_result_str, ap_dict = kitti_eval(gt_annos, result_files,
+                                                    self.CLASSES)
+            print_log('\n' + ap_result_str, logger=logger)
+
+        if tmp_dir is not None:
+            tmp_dir.cleanup()
+        if show or out_dir:
+            self.show(results, out_dir, show=show, pipeline=pipeline)
+        return ap_dict
+
+    def bbox2result_kitti(self,
+                          net_outputs,
+                          class_names,
+                          pklfile_prefix=None,
+                          submission_prefix=None):
+        """Convert 3D detection results to kitti format for evaluation and test
+        submission.
+
+        Args:
+            net_outputs (list[np.ndarray]): List of array storing the
+                inferenced bounding boxes and scores.
+            class_names (list[String]): A list of class names.
+            pklfile_prefix (str): The prefix of pkl file.
+            submission_prefix (str): The prefix of submission file.
+
+        Returns:
+            list[dict]: A list of dictionaries with the kitti format.
+        """
+        assert len(net_outputs) == len(self.data_infos), \
+            'invalid list length of network outputs'
+        if submission_prefix is not None:
+            mmcv.mkdir_or_exist(submission_prefix)
+
+        det_annos = []
+        print('\nConverting prediction to KITTI format')
+        for idx, pred_dicts in enumerate(
+                mmcv.track_iter_progress(net_outputs)):
+            annos = []
+            info = self.data_infos[idx]
+            sample_idx = info['image']['image_idx']
+            image_shape = info['image']['image_shape'][:2]
+            box_dict = self.convert_valid_bboxes(pred_dicts, info)
+            anno = {
+                'name': [],
+                'truncated': [],
+                'occluded': [],
+                'alpha': [],
+                'bbox': [],
+                'dimensions': [],
+                'location': [],
+                'rotation_y': [],
+                'score': []
+            }
+            if len(box_dict['bbox']) > 0:
+                box_2d_preds = box_dict['bbox']
+                box_preds = box_dict['box3d_camera']
+                scores = box_dict['scores']
+                box_preds_lidar = box_dict['box3d_lidar']
+                label_preds = box_dict['label_preds']
+
+                for box, box_lidar, bbox, score, label in zip(
+                        box_preds, box_preds_lidar, box_2d_preds, scores,
+                        label_preds):
+                    bbox[2:] = np.minimum(bbox[2:], image_shape[::-1])
+                    bbox[:2] = np.maximum(bbox[:2], [0, 0])
+                    anno['name'].append(class_names[int(label)])
+                    anno['truncated'].append(0.0)
+                    anno['occluded'].append(0)
+                    anno['alpha'].append(
+                        -np.arctan2(-box_lidar[1], box_lidar[0]) + box[6])
+                    anno['bbox'].append(bbox)
+                    anno['dimensions'].append(box[3:6])
+                    anno['location'].append(box[:3])
+                    anno['rotation_y'].append(box[6])
+                    anno['score'].append(score)
+
+                anno = {k: np.stack(v) for k, v in anno.items()}
+                annos.append(anno)
+            else:
+                anno = {
+                    'name': np.array([]),
+                    'truncated': np.array([]),
+                    'occluded': np.array([]),
+                    'alpha': np.array([]),
+                    'bbox': np.zeros([0, 4]),
+                    'dimensions': np.zeros([0, 3]),
+                    'location': np.zeros([0, 3]),
+                    'rotation_y': np.array([]),
+                    'score': np.array([]),
+                }
+                annos.append(anno)
+
+            if submission_prefix is not None:
+                curr_file = f'{submission_prefix}/{sample_idx:06d}.txt'
+                with open(curr_file, 'w') as f:
+                    bbox = anno['bbox']
+                    loc = anno['location']
+                    dims = anno['dimensions']  # lhw -> hwl
+
+                    for idx in range(len(bbox)):
+                        print(
+                            '{} -1 -1 {:.4f} {:.4f} {:.4f} {:.4f} '
+                            '{:.4f} {:.4f} {:.4f} '
+                            '{:.4f} {:.4f} {:.4f} {:.4f} {:.4f} {:.4f}'.format(
+                                anno['name'][idx], anno['alpha'][idx],
+                                bbox[idx][0], bbox[idx][1], bbox[idx][2],
+                                bbox[idx][3], dims[idx][1], dims[idx][2],
+                                dims[idx][0], loc[idx][0], loc[idx][1],
+                                loc[idx][2], anno['rotation_y'][idx],
+                                anno['score'][idx]),
+                            file=f)
+
+            annos[-1]['sample_idx'] = np.array(
+                [sample_idx] * len(annos[-1]['score']), dtype=np.int64)
+
+            det_annos += annos
+
+        if pklfile_prefix is not None:
+            if not pklfile_prefix.endswith(('.pkl', '.pickle')):
+                out = f'{pklfile_prefix}.pkl'
+            mmcv.dump(det_annos, out)
+            print(f'Result is saved to {out}.')
+
+        return det_annos
+
+    def bbox2result_kitti2d(self,
+                            net_outputs,
+                            class_names,
+                            pklfile_prefix=None,
+                            submission_prefix=None):
+        """Convert 2D detection results to kitti format for evaluation and test
+        submission.
+
+        Args:
+            net_outputs (list[np.ndarray]): List of array storing the
+                inferenced bounding boxes and scores.
+            class_names (list[String]): A list of class names.
+            pklfile_prefix (str): The prefix of pkl file.
+            submission_prefix (str): The prefix of submission file.
+
+        Returns:
+            list[dict]: A list of dictionaries have the kitti format
+        """
+        assert len(net_outputs) == len(self.data_infos), \
+            'invalid list length of network outputs'
+        det_annos = []
+        print('\nConverting prediction to KITTI format')
+        for i, bboxes_per_sample in enumerate(
+                mmcv.track_iter_progress(net_outputs)):
+            annos = []
+            anno = dict(
+                name=[],
+                truncated=[],
+                occluded=[],
+                alpha=[],
+                bbox=[],
+                dimensions=[],
+                location=[],
+                rotation_y=[],
+                score=[])
+            sample_idx = self.data_infos[i]['image']['image_idx']
+
+            num_example = 0
+            for label in range(len(bboxes_per_sample)):
+                bbox = bboxes_per_sample[label]
+                for i in range(bbox.shape[0]):
+                    anno['name'].append(class_names[int(label)])
+                    anno['truncated'].append(0.0)
+                    anno['occluded'].append(0)
+                    anno['alpha'].append(0.0)
+                    anno['bbox'].append(bbox[i, :4])
+                    # set dimensions (height, width, length) to zero
+                    anno['dimensions'].append(
+                        np.zeros(shape=[3], dtype=np.float32))
+                    # set the 3D translation to (-1000, -1000, -1000)
+                    anno['location'].append(
+                        np.ones(shape=[3], dtype=np.float32) * (-1000.0))
+                    anno['rotation_y'].append(0.0)
+                    anno['score'].append(bbox[i, 4])
+                    num_example += 1
+
+            if num_example == 0:
+                annos.append(
+                    dict(
+                        name=np.array([]),
+                        truncated=np.array([]),
+                        occluded=np.array([]),
+                        alpha=np.array([]),
+                        bbox=np.zeros([0, 4]),
+                        dimensions=np.zeros([0, 3]),
+                        location=np.zeros([0, 3]),
+                        rotation_y=np.array([]),
+                        score=np.array([]),
+                    ))
+            else:
+                anno = {k: np.stack(v) for k, v in anno.items()}
+                annos.append(anno)
+
+            annos[-1]['sample_idx'] = np.array(
+                [sample_idx] * num_example, dtype=np.int64)
+            det_annos += annos
+
+        if pklfile_prefix is not None:
+            # save file in pkl format
+            pklfile_path = (
+                pklfile_prefix[:-4] if pklfile_prefix.endswith(
+                    ('.pkl', '.pickle')) else pklfile_prefix)
+            mmcv.dump(det_annos, pklfile_path)
+
+        if submission_prefix is not None:
+            # save file in submission format
+            mmcv.mkdir_or_exist(submission_prefix)
+            print(f'Saving KITTI submission to {submission_prefix}')
+            for i, anno in enumerate(det_annos):
+                sample_idx = self.data_infos[i]['image']['image_idx']
+                cur_det_file = f'{submission_prefix}/{sample_idx:06d}.txt'
+                with open(cur_det_file, 'w') as f:
+                    bbox = anno['bbox']
+                    loc = anno['location']
+                    dims = anno['dimensions'][::-1]  # lhw -> hwl
+                    for idx in range(len(bbox)):
+                        print(
+                            '{} -1 -1 {:4f} {:4f} {:4f} {:4f} {:4f} {:4f} '
+                            '{:4f} {:4f} {:4f} {:4f} {:4f} {:4f} {:4f}'.format(
+                                anno['name'][idx],
+                                anno['alpha'][idx],
+                                *bbox[idx],  # 4 float
+                                *dims[idx],  # 3 float
+                                *loc[idx],  # 3 float
+                                anno['rotation_y'][idx],
+                                anno['score'][idx]),
+                            file=f,
+                        )
+            print(f'Result is saved to {submission_prefix}')
+
+        return det_annos
+
+    def convert_valid_bboxes(self, box_dict, info):
+        """Convert the predicted boxes into valid ones.
+
+        Args:
+            box_dict (dict): Box dictionaries to be converted.
+
+                - boxes_3d (:obj:`LiDARInstance3DBoxes`): 3D bounding boxes.
+                - scores_3d (torch.Tensor): Scores of boxes.
+                - labels_3d (torch.Tensor): Class labels of boxes.
+            info (dict): Data info.
+
+        Returns:
+            dict: Valid predicted boxes.
+
+                - bbox (np.ndarray): 2D bounding boxes.
+                - box3d_camera (np.ndarray): 3D bounding boxes in
+                    camera coordinate.
+                - box3d_lidar (np.ndarray): 3D bounding boxes in
+                    LiDAR coordinate.
+                - scores (np.ndarray): Scores of boxes.
+                - label_preds (np.ndarray): Class label predictions.
+                - sample_idx (int): Sample index.
+        """
+        # TODO: refactor this function
+        box_preds = box_dict['boxes_3d']
+        scores = box_dict['scores_3d']
+        labels = box_dict['labels_3d']
+        sample_idx = info['image']['image_idx']
+        box_preds.limit_yaw(offset=0.5, period=np.pi * 2)
+
+        if len(box_preds) == 0:
+            return dict(
+                bbox=np.zeros([0, 4]),
+                box3d_camera=np.zeros([0, 7]),
+                box3d_lidar=np.zeros([0, 7]),
+                scores=np.zeros([0]),
+                label_preds=np.zeros([0, 4]),
+                sample_idx=sample_idx)
+
+        rect = info['calib']['R0_rect'].astype(np.float32)
+        Trv2c = info['calib']['Tr_velo_to_cam'].astype(np.float32)
+        P2 = info['calib']['P2'].astype(np.float32)
+        img_shape = info['image']['image_shape']
+        P2 = box_preds.tensor.new_tensor(P2)
+
+        box_preds_camera = box_preds.convert_to(Box3DMode.CAM, rect @ Trv2c)
+
+        box_corners = box_preds_camera.corners
+        box_corners_in_image = points_cam2img(box_corners, P2)
+        # box_corners_in_image: [N, 8, 2]
+        minxy = torch.min(box_corners_in_image, dim=1)[0]
+        maxxy = torch.max(box_corners_in_image, dim=1)[0]
+        box_2d_preds = torch.cat([minxy, maxxy], dim=1)
+        # Post-processing
+        # check box_preds_camera
+        image_shape = box_preds.tensor.new_tensor(img_shape)
+        valid_cam_inds = ((box_2d_preds[:, 0] < image_shape[1]) &
+                          (box_2d_preds[:, 1] < image_shape[0]) &
+                          (box_2d_preds[:, 2] > 0) & (box_2d_preds[:, 3] > 0))
+        # check box_preds
+        limit_range = box_preds.tensor.new_tensor(self.pcd_limit_range)
+        valid_pcd_inds = ((box_preds.center > limit_range[:3]) &
+                          (box_preds.center < limit_range[3:]))
+        valid_inds = valid_cam_inds & valid_pcd_inds.all(-1)
+
+        if valid_inds.sum() > 0:
+            return dict(
+                bbox=box_2d_preds[valid_inds, :].numpy(),
+                box3d_camera=box_preds_camera[valid_inds].tensor.numpy(),
+                box3d_lidar=box_preds[valid_inds].tensor.numpy(),
+                scores=scores[valid_inds].numpy(),
+                label_preds=labels[valid_inds].numpy(),
+                sample_idx=sample_idx)
+        else:
+            return dict(
+                bbox=np.zeros([0, 4]),
+                box3d_camera=np.zeros([0, 7]),
+                box3d_lidar=np.zeros([0, 7]),
+                scores=np.zeros([0]),
+                label_preds=np.zeros([0, 4]),
+                sample_idx=sample_idx)
+
+    def _build_default_pipeline(self):
+        """Build the default pipeline for this dataset."""
+        pipeline = [
+            dict(
+                type='LoadPointsFromFile',
+                coord_type='LIDAR',
+                load_dim=4,
+                use_dim=4,
+                file_client_args=dict(backend='disk')),
+            dict(
+                type='DefaultFormatBundle3D',
+                class_names=self.CLASSES,
+                with_label=False),
+            dict(type='Collect3D', keys=['points'])
+        ]
+        if self.modality['use_camera']:
+            pipeline.insert(0, dict(type='LoadImageFromFile'))
+        return Compose(pipeline)
+
+    def show(self, results, out_dir, show=True, pipeline=None):
+        """Results visualization.
+
+        Args:
+            results (list[dict]): List of bounding boxes results.
+            out_dir (str): Output directory of visualization result.
+            show (bool): Whether to visualize the results online.
+                Default: False.
+            pipeline (list[dict], optional): raw data loading for showing.
+                Default: None.
+        """
+        assert out_dir is not None, 'Expect out_dir, got none.'
+        pipeline = self._get_pipeline(pipeline)
+        for i, result in enumerate(results):
+            if 'pts_bbox' in result.keys():
+                result = result['pts_bbox']
+            data_info = self.data_infos[i]
+            pts_path = data_info['point_cloud']['velodyne_path']
+            file_name = osp.split(pts_path)[-1].split('.')[0]
+            points, img_metas, img = self._extract_data(
+                i, pipeline, ['points', 'img_metas', 'img'])
+            points = points.numpy()
+            # for now we convert points into depth mode
+            points = Coord3DMode.convert_point(points, Coord3DMode.LIDAR,
+                                               Coord3DMode.DEPTH)
+            gt_bboxes = self.get_ann_info(i)['gt_bboxes_3d'].tensor.numpy()
+            show_gt_bboxes = Box3DMode.convert(gt_bboxes, Box3DMode.LIDAR,
+                                               Box3DMode.DEPTH)
+            pred_bboxes = result['boxes_3d'].tensor.numpy()
+            show_pred_bboxes = Box3DMode.convert(pred_bboxes, Box3DMode.LIDAR,
+                                                 Box3DMode.DEPTH)
+            show_result(points, show_gt_bboxes, show_pred_bboxes, out_dir,
+                        file_name, show)
+
+            # multi-modality visualization
+            if self.modality['use_camera'] and 'lidar2img' in img_metas.keys():
+                img = img.numpy()
+                # need to transpose channel to first dim
+                img = img.transpose(1, 2, 0)
+                show_pred_bboxes = LiDARInstance3DBoxes(
+                    pred_bboxes, origin=(0.5, 0.5, 0))
+                show_gt_bboxes = LiDARInstance3DBoxes(
+                    gt_bboxes, origin=(0.5, 0.5, 0))
+                show_multi_modality_result(
+                    img,
+                    show_gt_bboxes,
+                    show_pred_bboxes,
+                    img_metas['lidar2img'],
+                    out_dir,
+                    file_name,
+                    box_mode='lidar',
+                    show=show)
--- a/docker-hub/FlashOCC/Flashocc/mmdetection3d/mmdet3d/datasets/kitti_mono_dataset.py
+++ b/docker-hub/FlashOCC/Flashocc/mmdetection3d/mmdet3d/datasets/kitti_mono_dataset.py
+# Copyright (c) OpenMMLab. All rights reserved.
+import copy
+import tempfile
+from os import path as osp
+
+import mmcv
+import numpy as np
+import torch
+from mmcv.utils import print_log
+
+from ..core.bbox import Box3DMode, CameraInstance3DBoxes, points_cam2img
+from .builder import DATASETS
+from .nuscenes_mono_dataset import NuScenesMonoDataset
+
+
+@DATASETS.register_module()
+class KittiMonoDataset(NuScenesMonoDataset):
+    """Monocular 3D detection on KITTI Dataset.
+
+    Args:
+        data_root (str): Path of dataset root.
+        info_file (str): Path of info file.
+        load_interval (int, optional): Interval of loading the dataset. It is
+            used to uniformly sample the dataset. Defaults to 1.
+        with_velocity (bool, optional): Whether include velocity prediction
+            into the experiments. Defaults to False.
+        eval_version (str, optional): Configuration version of evaluation.
+            Defaults to None.
+        version (str, optional): Dataset version. Defaults to None.
+        kwargs (dict): Other arguments are the same of NuScenesMonoDataset.
+    """
+
+    CLASSES = ('Pedestrian', 'Cyclist', 'Car')
+
+    def __init__(self,
+                 data_root,
+                 info_file,
+                 ann_file,
+                 pipeline,
+                 load_interval=1,
+                 with_velocity=False,
+                 eval_version=None,
+                 version=None,
+                 **kwargs):
+        super().__init__(
+            data_root=data_root,
+            ann_file=ann_file,
+            pipeline=pipeline,
+            load_interval=load_interval,
+            with_velocity=with_velocity,
+            eval_version=eval_version,
+            version=version,
+            **kwargs)
+        self.anno_infos = mmcv.load(info_file)
+        self.bbox_code_size = 7
+
+    def _parse_ann_info(self, img_info, ann_info):
+        """Parse bbox and mask annotation.
+
+        Args:
+            ann_info (list[dict]): Annotation info of an image.
+            with_mask (bool): Whether to parse mask annotations.
+
+        Returns:
+            dict: A dict containing the following keys: bboxes, bboxes_ignore,
+                labels, masks, seg_map. "masks" are raw annotations and not
+                decoded into binary masks.
+        """
+        gt_bboxes = []
+        gt_labels = []
+        gt_bboxes_ignore = []
+        gt_masks_ann = []
+        gt_bboxes_cam3d = []
+        centers2d = []
+        depths = []
+        for i, ann in enumerate(ann_info):
+            if ann.get('ignore', False):
+                continue
+            x1, y1, w, h = ann['bbox']
+            inter_w = max(0, min(x1 + w, img_info['width']) - max(x1, 0))
+            inter_h = max(0, min(y1 + h, img_info['height']) - max(y1, 0))
+            if inter_w * inter_h == 0:
+                continue
+            if ann['area'] <= 0 or w < 1 or h < 1:
+                continue
+            if ann['category_id'] not in self.cat_ids:
+                continue
+            bbox = [x1, y1, x1 + w, y1 + h]
+            if ann.get('iscrowd', False):
+                gt_bboxes_ignore.append(bbox)
+            else:
+                gt_bboxes.append(bbox)
+                gt_labels.append(self.cat2label[ann['category_id']])
+                gt_masks_ann.append(ann.get('segmentation', None))
+                # 3D annotations in camera coordinates
+                bbox_cam3d = np.array(ann['bbox_cam3d']).reshape(-1, )
+                gt_bboxes_cam3d.append(bbox_cam3d)
+                # 2.5D annotations in camera coordinates
+                center2d = ann['center2d'][:2]
+                depth = ann['center2d'][2]
+                centers2d.append(center2d)
+                depths.append(depth)
+
+        if gt_bboxes:
+            gt_bboxes = np.array(gt_bboxes, dtype=np.float32)
+            gt_labels = np.array(gt_labels, dtype=np.int64)
+        else:
+            gt_bboxes = np.zeros((0, 4), dtype=np.float32)
+            gt_labels = np.array([], dtype=np.int64)
+
+        if gt_bboxes_cam3d:
+            gt_bboxes_cam3d = np.array(gt_bboxes_cam3d, dtype=np.float32)
+            centers2d = np.array(centers2d, dtype=np.float32)
+            depths = np.array(depths, dtype=np.float32)
+        else:
+            gt_bboxes_cam3d = np.zeros((0, self.bbox_code_size),
+                                       dtype=np.float32)
+            centers2d = np.zeros((0, 2), dtype=np.float32)
+            depths = np.zeros((0), dtype=np.float32)
+
+        gt_bboxes_cam3d = CameraInstance3DBoxes(
+            gt_bboxes_cam3d,
+            box_dim=gt_bboxes_cam3d.shape[-1],
+            origin=(0.5, 0.5, 0.5))
+        gt_labels_3d = copy.deepcopy(gt_labels)
+
+        if gt_bboxes_ignore:
+            gt_bboxes_ignore = np.array(gt_bboxes_ignore, dtype=np.float32)
+        else:
+            gt_bboxes_ignore = np.zeros((0, 4), dtype=np.float32)
+
+        seg_map = img_info['filename'].replace('jpg', 'png')
+
+        ann = dict(
+            bboxes=gt_bboxes,
+            labels=gt_labels,
+            gt_bboxes_3d=gt_bboxes_cam3d,
+            gt_labels_3d=gt_labels_3d,
+            centers2d=centers2d,
+            depths=depths,
+            bboxes_ignore=gt_bboxes_ignore,
+            masks=gt_masks_ann,
+            seg_map=seg_map)
+
+        return ann
+
+    def format_results(self,
+                       outputs,
+                       pklfile_prefix=None,
+                       submission_prefix=None):
+        """Format the results to pkl file.
+
+        Args:
+            outputs (list[dict]): Testing results of the dataset.
+            pklfile_prefix (str): The prefix of pkl files. It includes
+                the file path and the prefix of filename, e.g., "a/b/prefix".
+                If not specified, a temp file will be created. Default: None.
+            submission_prefix (str): The prefix of submitted files. It
+                includes the file path and the prefix of filename, e.g.,
+                "a/b/prefix". If not specified, a temp file will be created.
+                Default: None.
+
+        Returns:
+            tuple: (result_files, tmp_dir), result_files is a dict containing
+                the json filepaths, tmp_dir is the temporal directory created
+                for saving json files when jsonfile_prefix is not specified.
+        """
+        if pklfile_prefix is None:
+            tmp_dir = tempfile.TemporaryDirectory()
+            pklfile_prefix = osp.join(tmp_dir.name, 'results')
+        else:
+            tmp_dir = None
+
+        if not isinstance(outputs[0], dict):
+            result_files = self.bbox2result_kitti2d(outputs, self.CLASSES,
+                                                    pklfile_prefix,
+                                                    submission_prefix)
+        elif 'pts_bbox' in outputs[0] or 'img_bbox' in outputs[0] or \
+                'img_bbox2d' in outputs[0]:
+            result_files = dict()
+            for name in outputs[0]:
+                results_ = [out[name] for out in outputs]
+                pklfile_prefix_ = pklfile_prefix + name
+                if submission_prefix is not None:
+                    submission_prefix_ = submission_prefix + name
+                else:
+                    submission_prefix_ = None
+                if '2d' in name:
+                    result_files_ = self.bbox2result_kitti2d(
+                        results_, self.CLASSES, pklfile_prefix_,
+                        submission_prefix_)
+                else:
+                    result_files_ = self.bbox2result_kitti(
+                        results_, self.CLASSES, pklfile_prefix_,
+                        submission_prefix_)
+                result_files[name] = result_files_
+        else:
+            result_files = self.bbox2result_kitti(outputs, self.CLASSES,
+                                                  pklfile_prefix,
+                                                  submission_prefix)
+        return result_files, tmp_dir
+
+    def evaluate(self,
+                 results,
+                 metric=None,
+                 logger=None,
+                 pklfile_prefix=None,
+                 submission_prefix=None,
+                 show=False,
+                 out_dir=None,
+                 pipeline=None):
+        """Evaluation in KITTI protocol.
+
+        Args:
+            results (list[dict]): Testing results of the dataset.
+            metric (str | list[str], optional): Metrics to be evaluated.
+                Defaults to None.
+            logger (logging.Logger | str, optional): Logger used for printing
+                related information during evaluation. Default: None.
+            pklfile_prefix (str, optional): The prefix of pkl files, including
+                the file path and the prefix of filename, e.g., "a/b/prefix".
+                If not specified, a temp file will be created. Default: None.
+            submission_prefix (str, optional): The prefix of submission data.
+                If not specified, the submission data will not be generated.
+            show (bool, optional): Whether to visualize.
+                Default: False.
+            out_dir (str, optional): Path to save the visualization results.
+                Default: None.
+            pipeline (list[dict], optional): raw data loading for showing.
+                Default: None.
+
+        Returns:
+            dict[str, float]: Results of each evaluation metric.
+        """
+        result_files, tmp_dir = self.format_results(results, pklfile_prefix)
+        from mmdet3d.core.evaluation import kitti_eval
+        gt_annos = [info['annos'] for info in self.anno_infos]
+
+        if isinstance(result_files, dict):
+            ap_dict = dict()
+            for name, result_files_ in result_files.items():
+                eval_types = ['bbox', 'bev', '3d']
+                if '2d' in name:
+                    eval_types = ['bbox']
+                ap_result_str, ap_dict_ = kitti_eval(
+                    gt_annos,
+                    result_files_,
+                    self.CLASSES,
+                    eval_types=eval_types)
+                for ap_type, ap in ap_dict_.items():
+                    ap_dict[f'{name}/{ap_type}'] = float('{:.4f}'.format(ap))
+
+                print_log(
+                    f'Results of {name}:\n' + ap_result_str, logger=logger)
+
+        else:
+            if metric == 'img_bbox2d':
+                ap_result_str, ap_dict = kitti_eval(
+                    gt_annos, result_files, self.CLASSES, eval_types=['bbox'])
+            else:
+                ap_result_str, ap_dict = kitti_eval(gt_annos, result_files,
+                                                    self.CLASSES)
+            print_log('\n' + ap_result_str, logger=logger)
+
+        if tmp_dir is not None:
+            tmp_dir.cleanup()
+        if show or out_dir:
+            self.show(results, out_dir, show=show, pipeline=pipeline)
+        return ap_dict
+
+    def bbox2result_kitti(self,
+                          net_outputs,
+                          class_names,
+                          pklfile_prefix=None,
+                          submission_prefix=None):
+        """Convert 3D detection results to kitti format for evaluation and test
+        submission.
+
+        Args:
+            net_outputs (list[np.ndarray]): List of array storing the
+                inferenced bounding boxes and scores.
+            class_names (list[String]): A list of class names.
+            pklfile_prefix (str): The prefix of pkl file.
+            submission_prefix (str): The prefix of submission file.
+
+        Returns:
+            list[dict]: A list of dictionaries with the kitti format.
+        """
+        assert len(net_outputs) == len(self.anno_infos)
+        if submission_prefix is not None:
+            mmcv.mkdir_or_exist(submission_prefix)
+
+        det_annos = []
+        print('\nConverting prediction to KITTI format')
+        for idx, pred_dicts in enumerate(
+                mmcv.track_iter_progress(net_outputs)):
+            annos = []
+            info = self.anno_infos[idx]
+            sample_idx = info['image']['image_idx']
+            image_shape = info['image']['image_shape'][:2]
+
+            box_dict = self.convert_valid_bboxes(pred_dicts, info)
+            anno = {
+                'name': [],
+                'truncated': [],
+                'occluded': [],
+                'alpha': [],
+                'bbox': [],
+                'dimensions': [],
+                'location': [],
+                'rotation_y': [],
+                'score': []
+            }
+            if len(box_dict['bbox']) > 0:
+                box_2d_preds = box_dict['bbox']
+                box_preds = box_dict['box3d_camera']
+                scores = box_dict['scores']
+                box_preds_lidar = box_dict['box3d_lidar']
+                label_preds = box_dict['label_preds']
+
+                for box, box_lidar, bbox, score, label in zip(
+                        box_preds, box_preds_lidar, box_2d_preds, scores,
+                        label_preds):
+                    bbox[2:] = np.minimum(bbox[2:], image_shape[::-1])
+                    bbox[:2] = np.maximum(bbox[:2], [0, 0])
+                    anno['name'].append(class_names[int(label)])
+                    anno['truncated'].append(0.0)
+                    anno['occluded'].append(0)
+                    anno['alpha'].append(-np.arctan2(box[0], box[2]) + box[6])
+                    anno['bbox'].append(bbox)
+                    anno['dimensions'].append(box[3:6])
+                    anno['location'].append(box[:3])
+                    anno['rotation_y'].append(box[6])
+                    anno['score'].append(score)
+
+                anno = {k: np.stack(v) for k, v in anno.items()}
+                annos.append(anno)
+
+            else:
+                anno = {
+                    'name': np.array([]),
+                    'truncated': np.array([]),
+                    'occluded': np.array([]),
+                    'alpha': np.array([]),
+                    'bbox': np.zeros([0, 4]),
+                    'dimensions': np.zeros([0, 3]),
+                    'location': np.zeros([0, 3]),
+                    'rotation_y': np.array([]),
+                    'score': np.array([]),
+                }
+                annos.append(anno)
+
+            if submission_prefix is not None:
+                curr_file = f'{submission_prefix}/{sample_idx:06d}.txt'
+                with open(curr_file, 'w') as f:
+                    bbox = anno['bbox']
+                    loc = anno['location']
+                    dims = anno['dimensions']  # lhw -> hwl
+
+                    for idx in range(len(bbox)):
+                        print(
+                            '{} -1 -1 {:.4f} {:.4f} {:.4f} {:.4f} '
+                            '{:.4f} {:.4f} {:.4f} '
+                            '{:.4f} {:.4f} {:.4f} {:.4f} {:.4f} {:.4f}'.format(
+                                anno['name'][idx], anno['alpha'][idx],
+                                bbox[idx][0], bbox[idx][1], bbox[idx][2],
+                                bbox[idx][3], dims[idx][1], dims[idx][2],
+                                dims[idx][0], loc[idx][0], loc[idx][1],
+                                loc[idx][2], anno['rotation_y'][idx],
+                                anno['score'][idx]),
+                            file=f)
+
+            annos[-1]['sample_idx'] = np.array(
+                [sample_idx] * len(annos[-1]['score']), dtype=np.int64)
+
+            det_annos += annos
+
+        if pklfile_prefix is not None:
+            if not pklfile_prefix.endswith(('.pkl', '.pickle')):
+                out = f'{pklfile_prefix}.pkl'
+            mmcv.dump(det_annos, out)
+            print('Result is saved to %s' % out)
+
+        return det_annos
+
+    def bbox2result_kitti2d(self,
+                            net_outputs,
+                            class_names,
+                            pklfile_prefix=None,
+                            submission_prefix=None):
+        """Convert 2D detection results to kitti format for evaluation and test
+        submission.
+
+        Args:
+            net_outputs (list[np.ndarray]): List of array storing the
+                inferenced bounding boxes and scores.
+            class_names (list[String]): A list of class names.
+            pklfile_prefix (str): The prefix of pkl file.
+            submission_prefix (str): The prefix of submission file.
+
+        Returns:
+            list[dict]: A list of dictionaries have the kitti format
+        """
+        assert len(net_outputs) == len(self.anno_infos)
+
+        det_annos = []
+        print('\nConverting prediction to KITTI format')
+        for i, bboxes_per_sample in enumerate(
+                mmcv.track_iter_progress(net_outputs)):
+            annos = []
+            anno = dict(
+                name=[],
+                truncated=[],
+                occluded=[],
+                alpha=[],
+                bbox=[],
+                dimensions=[],
+                location=[],
+                rotation_y=[],
+                score=[])
+            sample_idx = self.anno_infos[i]['image']['image_idx']
+
+            num_example = 0
+            for label in range(len(bboxes_per_sample)):
+                bbox = bboxes_per_sample[label]
+                for i in range(bbox.shape[0]):
+                    anno['name'].append(class_names[int(label)])
+                    anno['truncated'].append(0.0)
+                    anno['occluded'].append(0)
+                    anno['alpha'].append(-10)
+                    anno['bbox'].append(bbox[i, :4])
+                    # set dimensions (height, width, length) to zero
+                    anno['dimensions'].append(
+                        np.zeros(shape=[3], dtype=np.float32))
+                    # set the 3D translation to (-1000, -1000, -1000)
+                    anno['location'].append(
+                        np.ones(shape=[3], dtype=np.float32) * (-1000.0))
+                    anno['rotation_y'].append(0.0)
+                    anno['score'].append(bbox[i, 4])
+                    num_example += 1
+
+            if num_example == 0:
+                annos.append(
+                    dict(
+                        name=np.array([]),
+                        truncated=np.array([]),
+                        occluded=np.array([]),
+                        alpha=np.array([]),
+                        bbox=np.zeros([0, 4]),
+                        dimensions=np.zeros([0, 3]),
+                        location=np.zeros([0, 3]),
+                        rotation_y=np.array([]),
+                        score=np.array([]),
+                    ))
+            else:
+                anno = {k: np.stack(v) for k, v in anno.items()}
+                annos.append(anno)
+
+            annos[-1]['sample_idx'] = np.array(
+                [sample_idx] * num_example, dtype=np.int64)
+            det_annos += annos
+
+        if pklfile_prefix is not None:
+            if not pklfile_prefix.endswith(('.pkl', '.pickle')):
+                out = f'{pklfile_prefix}.pkl'
+            mmcv.dump(det_annos, out)
+            print('Result is saved to %s' % out)
+
+        if submission_prefix is not None:
+            # save file in submission format
+            mmcv.mkdir_or_exist(submission_prefix)
+            print(f'Saving KITTI submission to {submission_prefix}')
+            for i, anno in enumerate(det_annos):
+                sample_idx = self.anno_infos[i]['image']['image_idx']
+                cur_det_file = f'{submission_prefix}/{sample_idx:06d}.txt'
+                with open(cur_det_file, 'w') as f:
+                    bbox = anno['bbox']
+                    loc = anno['location']
+                    dims = anno['dimensions'][::-1]  # lhw -> hwl
+                    for idx in range(len(bbox)):
+                        print(
+                            '{} -1 -1 {:4f} {:4f} {:4f} {:4f} {:4f} {:4f} '
+                            '{:4f} {:4f} {:4f} {:4f} {:4f} {:4f} {:4f}'.format(
+                                anno['name'][idx],
+                                anno['alpha'][idx],
+                                *bbox[idx],  # 4 float
+                                *dims[idx],  # 3 float
+                                *loc[idx],  # 3 float
+                                anno['rotation_y'][idx],
+                                anno['score'][idx]),
+                            file=f,
+                        )
+            print(f'Result is saved to {submission_prefix}')
+
+        return det_annos
+
+    def convert_valid_bboxes(self, box_dict, info):
+        """Convert the predicted boxes into valid ones.
+
+        Args:
+            box_dict (dict): Box dictionaries to be converted.
+                - boxes_3d (:obj:`CameraInstance3DBoxes`): 3D bounding boxes.
+                - scores_3d (torch.Tensor): Scores of boxes.
+                - labels_3d (torch.Tensor): Class labels of boxes.
+            info (dict): Data info.
+
+        Returns:
+            dict: Valid predicted boxes.
+                - bbox (np.ndarray): 2D bounding boxes.
+                - box3d_camera (np.ndarray): 3D bounding boxes in
+                    camera coordinate.
+                - scores (np.ndarray): Scores of boxes.
+                - label_preds (np.ndarray): Class label predictions.
+                - sample_idx (int): Sample index.
+        """
+        box_preds = box_dict['boxes_3d']
+        scores = box_dict['scores_3d']
+        labels = box_dict['labels_3d']
+        sample_idx = info['image']['image_idx']
+
+        if len(box_preds) == 0:
+            return dict(
+                bbox=np.zeros([0, 4]),
+                box3d_camera=np.zeros([0, 7]),
+                scores=np.zeros([0]),
+                label_preds=np.zeros([0, 4]),
+                sample_idx=sample_idx)
+
+        rect = info['calib']['R0_rect'].astype(np.float32)
+        Trv2c = info['calib']['Tr_velo_to_cam'].astype(np.float32)
+        P2 = info['calib']['P2'].astype(np.float32)
+        img_shape = info['image']['image_shape']
+        P2 = box_preds.tensor.new_tensor(P2)
+
+        box_preds_camera = box_preds
+        box_preds_lidar = box_preds.convert_to(Box3DMode.LIDAR,
+                                               np.linalg.inv(rect @ Trv2c))
+
+        box_corners = box_preds_camera.corners
+        box_corners_in_image = points_cam2img(box_corners, P2)
+        # box_corners_in_image: [N, 8, 2]
+        minxy = torch.min(box_corners_in_image, dim=1)[0]
+        maxxy = torch.max(box_corners_in_image, dim=1)[0]
+        box_2d_preds = torch.cat([minxy, maxxy], dim=1)
+        # Post-processing
+        # check box_preds_camera
+        image_shape = box_preds.tensor.new_tensor(img_shape)
+        valid_cam_inds = ((box_2d_preds[:, 0] < image_shape[1]) &
+                          (box_2d_preds[:, 1] < image_shape[0]) &
+                          (box_2d_preds[:, 2] > 0) & (box_2d_preds[:, 3] > 0))
+        # check box_preds
+        valid_inds = valid_cam_inds
+
+        if valid_inds.sum() > 0:
+            return dict(
+                bbox=box_2d_preds[valid_inds, :].numpy(),
+                box3d_camera=box_preds_camera[valid_inds].tensor.numpy(),
+                box3d_lidar=box_preds_lidar[valid_inds].tensor.numpy(),
+                scores=scores[valid_inds].numpy(),
+                label_preds=labels[valid_inds].numpy(),
+                sample_idx=sample_idx)
+        else:
+            return dict(
+                bbox=np.zeros([0, 4]),
+                box3d_camera=np.zeros([0, 7]),
+                box3d_lidar=np.zeros([0, 7]),
+                scores=np.zeros([0]),
+                label_preds=np.zeros([0, 4]),
+                sample_idx=sample_idx)
--- a/docker-hub/FlashOCC/Flashocc/mmdetection3d/mmdet3d/datasets/lyft_dataset.py
+++ b/docker-hub/FlashOCC/Flashocc/mmdetection3d/mmdet3d/datasets/lyft_dataset.py
+# Copyright (c) OpenMMLab. All rights reserved.
+import os
+import tempfile
+from os import path as osp
+
+import mmcv
+import numpy as np
+import pandas as pd
+from lyft_dataset_sdk.lyftdataset import LyftDataset as Lyft
+from lyft_dataset_sdk.utils.data_classes import Box as LyftBox
+from pyquaternion import Quaternion
+
+from mmdet3d.core.evaluation.lyft_eval import lyft_eval
+from ..core import show_result
+from ..core.bbox import Box3DMode, Coord3DMode, LiDARInstance3DBoxes
+from .builder import DATASETS
+from .custom_3d import Custom3DDataset
+from .pipelines import Compose
+
+
+@DATASETS.register_module()
+class LyftDataset(Custom3DDataset):
+    r"""Lyft Dataset.
+
+    This class serves as the API for experiments on the Lyft Dataset.
+
+    Please refer to
+    `<https://www.kaggle.com/c/3d-object-detection-for-autonomous-vehicles/data>`_
+    for data downloading.
+
+    Args:
+        ann_file (str): Path of annotation file.
+        pipeline (list[dict], optional): Pipeline used for data processing.
+            Defaults to None.
+        data_root (str): Path of dataset root.
+        classes (tuple[str], optional): Classes used in the dataset.
+            Defaults to None.
+        load_interval (int, optional): Interval of loading the dataset. It is
+            used to uniformly sample the dataset. Defaults to 1.
+        modality (dict, optional): Modality to specify the sensor data used
+            as input. Defaults to None.
+        box_type_3d (str, optional): Type of 3D box of this dataset.
+            Based on the `box_type_3d`, the dataset will encapsulate the box
+            to its original format then converted them to `box_type_3d`.
+            Defaults to 'LiDAR' in this dataset. Available options includes
+
+            - 'LiDAR': Box in LiDAR coordinates.
+            - 'Depth': Box in depth coordinates, usually for indoor dataset.
+            - 'Camera': Box in camera coordinates.
+        filter_empty_gt (bool, optional): Whether to filter empty GT.
+            Defaults to True.
+        test_mode (bool, optional): Whether the dataset is in test mode.
+            Defaults to False.
+    """  # noqa: E501
+    NameMapping = {
+        'bicycle': 'bicycle',
+        'bus': 'bus',
+        'car': 'car',
+        'emergency_vehicle': 'emergency_vehicle',
+        'motorcycle': 'motorcycle',
+        'other_vehicle': 'other_vehicle',
+        'pedestrian': 'pedestrian',
+        'truck': 'truck',
+        'animal': 'animal'
+    }
+    DefaultAttribute = {
+        'car': 'is_stationary',
+        'truck': 'is_stationary',
+        'bus': 'is_stationary',
+        'emergency_vehicle': 'is_stationary',
+        'other_vehicle': 'is_stationary',
+        'motorcycle': 'is_stationary',
+        'bicycle': 'is_stationary',
+        'pedestrian': 'is_stationary',
+        'animal': 'is_stationary'
+    }
+    CLASSES = ('car', 'truck', 'bus', 'emergency_vehicle', 'other_vehicle',
+               'motorcycle', 'bicycle', 'pedestrian', 'animal')
+
+    def __init__(self,
+                 ann_file,
+                 pipeline=None,
+                 data_root=None,
+                 classes=None,
+                 load_interval=1,
+                 modality=None,
+                 box_type_3d='LiDAR',
+                 filter_empty_gt=True,
+                 test_mode=False,
+                 **kwargs):
+        self.load_interval = load_interval
+        super().__init__(
+            data_root=data_root,
+            ann_file=ann_file,
+            pipeline=pipeline,
+            classes=classes,
+            modality=modality,
+            box_type_3d=box_type_3d,
+            filter_empty_gt=filter_empty_gt,
+            test_mode=test_mode,
+            **kwargs)
+
+        if self.modality is None:
+            self.modality = dict(
+                use_camera=False,
+                use_lidar=True,
+                use_radar=False,
+                use_map=False,
+                use_external=False,
+            )
+
+    def load_annotations(self, ann_file):
+        """Load annotations from ann_file.
+
+        Args:
+            ann_file (str): Path of the annotation file.
+
+        Returns:
+            list[dict]: List of annotations sorted by timestamps.
+        """
+        # loading data from a file-like object needs file format
+        data = mmcv.load(ann_file, file_format='pkl')
+        data_infos = list(sorted(data['infos'], key=lambda e: e['timestamp']))
+        data_infos = data_infos[::self.load_interval]
+        self.metadata = data['metadata']
+        self.version = self.metadata['version']
+        return data_infos
+
+    def get_data_info(self, index):
+        """Get data info according to the given index.
+
+        Args:
+            index (int): Index of the sample data to get.
+
+        Returns:
+            dict: Data information that will be passed to the data
+                preprocessing pipelines. It includes the following keys:
+
+                - sample_idx (str): sample index
+                - pts_filename (str): filename of point clouds
+                - sweeps (list[dict]): infos of sweeps
+                - timestamp (float): sample timestamp
+                - img_filename (str, optional): image filename
+                - lidar2img (list[np.ndarray], optional): transformations
+                    from lidar to different cameras
+                - ann_info (dict): annotation info
+        """
+        info = self.data_infos[index]
+
+        # standard protocol modified from SECOND.Pytorch
+        input_dict = dict(
+            sample_idx=info['token'],
+            pts_filename=info['lidar_path'],
+            sweeps=info['sweeps'],
+            timestamp=info['timestamp'] / 1e6,
+        )
+
+        if self.modality['use_camera']:
+            image_paths = []
+            lidar2img_rts = []
+            for cam_type, cam_info in info['cams'].items():
+                image_paths.append(cam_info['data_path'])
+                # obtain lidar to image transformation matrix
+                lidar2cam_r = np.linalg.inv(cam_info['sensor2lidar_rotation'])
+                lidar2cam_t = cam_info[
+                    'sensor2lidar_translation'] @ lidar2cam_r.T
+                lidar2cam_rt = np.eye(4)
+                lidar2cam_rt[:3, :3] = lidar2cam_r.T
+                lidar2cam_rt[3, :3] = -lidar2cam_t
+                intrinsic = cam_info['cam_intrinsic']
+                viewpad = np.eye(4)
+                viewpad[:intrinsic.shape[0], :intrinsic.shape[1]] = intrinsic
+                lidar2img_rt = (viewpad @ lidar2cam_rt.T)
+                lidar2img_rts.append(lidar2img_rt)
+
+            input_dict.update(
+                dict(
+                    img_filename=image_paths,
+                    lidar2img=lidar2img_rts,
+                ))
+
+        if not self.test_mode:
+            annos = self.get_ann_info(index)
+            input_dict['ann_info'] = annos
+
+        return input_dict
+
+    def get_ann_info(self, index):
+        """Get annotation info according to the given index.
+
+        Args:
+            index (int): Index of the annotation data to get.
+
+        Returns:
+            dict: Annotation information consists of the following keys:
+
+                - gt_bboxes_3d (:obj:`LiDARInstance3DBoxes`):
+                    3D ground truth bboxes.
+                - gt_labels_3d (np.ndarray): Labels of ground truths.
+                - gt_names (list[str]): Class names of ground truths.
+        """
+        info = self.data_infos[index]
+        gt_bboxes_3d = info['gt_boxes']
+        gt_names_3d = info['gt_names']
+        gt_labels_3d = []
+        for cat in gt_names_3d:
+            if cat in self.CLASSES:
+                gt_labels_3d.append(self.CLASSES.index(cat))
+            else:
+                gt_labels_3d.append(-1)
+        gt_labels_3d = np.array(gt_labels_3d)
+
+        if 'gt_shape' in info:
+            gt_shape = info['gt_shape']
+            gt_bboxes_3d = np.concatenate([gt_bboxes_3d, gt_shape], axis=-1)
+
+        # the lyft box center is [0.5, 0.5, 0.5], we change it to be
+        # the same as KITTI (0.5, 0.5, 0)
+        gt_bboxes_3d = LiDARInstance3DBoxes(
+            gt_bboxes_3d,
+            box_dim=gt_bboxes_3d.shape[-1],
+            origin=(0.5, 0.5, 0.5)).convert_to(self.box_mode_3d)
+
+        anns_results = dict(
+            gt_bboxes_3d=gt_bboxes_3d,
+            gt_labels_3d=gt_labels_3d,
+        )
+        return anns_results
+
+    def _format_bbox(self, results, jsonfile_prefix=None):
+        """Convert the results to the standard format.
+
+        Args:
+            results (list[dict]): Testing results of the dataset.
+            jsonfile_prefix (str): The prefix of the output jsonfile.
+                You can specify the output directory/filename by
+                modifying the jsonfile_prefix. Default: None.
+
+        Returns:
+            str: Path of the output json file.
+        """
+        lyft_annos = {}
+        mapped_class_names = self.CLASSES
+
+        print('Start to convert detection format...')
+        for sample_id, det in enumerate(mmcv.track_iter_progress(results)):
+            annos = []
+            boxes = output_to_lyft_box(det)
+            sample_token = self.data_infos[sample_id]['token']
+            boxes = lidar_lyft_box_to_global(self.data_infos[sample_id], boxes)
+            for i, box in enumerate(boxes):
+                name = mapped_class_names[box.label]
+                lyft_anno = dict(
+                    sample_token=sample_token,
+                    translation=box.center.tolist(),
+                    size=box.wlh.tolist(),
+                    rotation=box.orientation.elements.tolist(),
+                    name=name,
+                    score=box.score)
+                annos.append(lyft_anno)
+            lyft_annos[sample_token] = annos
+        lyft_submissions = {
+            'meta': self.modality,
+            'results': lyft_annos,
+        }
+
+        mmcv.mkdir_or_exist(jsonfile_prefix)
+        res_path = osp.join(jsonfile_prefix, 'results_lyft.json')
+        print('Results writes to', res_path)
+        mmcv.dump(lyft_submissions, res_path)
+        return res_path
+
+    def _evaluate_single(self,
+                         result_path,
+                         logger=None,
+                         metric='bbox',
+                         result_name='pts_bbox'):
+        """Evaluation for a single model in Lyft protocol.
+
+        Args:
+            result_path (str): Path of the result file.
+            logger (logging.Logger | str, optional): Logger used for printing
+                related information during evaluation. Default: None.
+            metric (str, optional): Metric name used for evaluation.
+                Default: 'bbox'.
+            result_name (str, optional): Result name in the metric prefix.
+                Default: 'pts_bbox'.
+
+        Returns:
+            dict: Dictionary of evaluation details.
+        """
+
+        output_dir = osp.join(*osp.split(result_path)[:-1])
+        lyft = Lyft(
+            data_path=osp.join(self.data_root, self.version),
+            json_path=osp.join(self.data_root, self.version, self.version),
+            verbose=True)
+        eval_set_map = {
+            'v1.01-train': 'val',
+        }
+        metrics = lyft_eval(lyft, self.data_root, result_path,
+                            eval_set_map[self.version], output_dir, logger)
+
+        # record metrics
+        detail = dict()
+        metric_prefix = f'{result_name}_Lyft'
+
+        for i, name in enumerate(metrics['class_names']):
+            AP = float(metrics['mAPs_cate'][i])
+            detail[f'{metric_prefix}/{name}_AP'] = AP
+
+        detail[f'{metric_prefix}/mAP'] = metrics['Final mAP']
+        return detail
+
+    def format_results(self, results, jsonfile_prefix=None, csv_savepath=None):
+        """Format the results to json (standard format for COCO evaluation).
+
+        Args:
+            results (list[dict]): Testing results of the dataset.
+            jsonfile_prefix (str): The prefix of json files. It includes
+                the file path and the prefix of filename, e.g., "a/b/prefix".
+                If not specified, a temp file will be created. Default: None.
+            csv_savepath (str): The path for saving csv files.
+                It includes the file path and the csv filename,
+                e.g., "a/b/filename.csv". If not specified,
+                the result will not be converted to csv file.
+
+        Returns:
+            tuple: Returns (result_files, tmp_dir), where `result_files` is a
+                dict containing the json filepaths, `tmp_dir` is the temporal
+                directory created for saving json files when
+                `jsonfile_prefix` is not specified.
+        """
+        assert isinstance(results, list), 'results must be a list'
+        assert len(results) == len(self), (
+            'The length of results is not equal to the dataset len: {} != {}'.
+            format(len(results), len(self)))
+
+        if jsonfile_prefix is None:
+            tmp_dir = tempfile.TemporaryDirectory()
+            jsonfile_prefix = osp.join(tmp_dir.name, 'results')
+        else:
+            tmp_dir = None
+
+        # currently the output prediction results could be in two formats
+        # 1. list of dict('boxes_3d': ..., 'scores_3d': ..., 'labels_3d': ...)
+        # 2. list of dict('pts_bbox' or 'img_bbox':
+        #     dict('boxes_3d': ..., 'scores_3d': ..., 'labels_3d': ...))
+        # this is a workaround to enable evaluation of both formats on Lyft
+        # refer to https://github.com/open-mmlab/mmdetection3d/issues/449
+        if not ('pts_bbox' in results[0] or 'img_bbox' in results[0]):
+            result_files = self._format_bbox(results, jsonfile_prefix)
+        else:
+            # should take the inner dict out of 'pts_bbox' or 'img_bbox' dict
+            result_files = dict()
+            for name in results[0]:
+                print(f'\nFormating bboxes of {name}')
+                results_ = [out[name] for out in results]
+                tmp_file_ = osp.join(jsonfile_prefix, name)
+                result_files.update(
+                    {name: self._format_bbox(results_, tmp_file_)})
+        if csv_savepath is not None:
+            self.json2csv(result_files['pts_bbox'], csv_savepath)
+        return result_files, tmp_dir
+
+    def evaluate(self,
+                 results,
+                 metric='bbox',
+                 logger=None,
+                 jsonfile_prefix=None,
+                 csv_savepath=None,
+                 result_names=['pts_bbox'],
+                 show=False,
+                 out_dir=None,
+                 pipeline=None):
+        """Evaluation in Lyft protocol.
+
+        Args:
+            results (list[dict]): Testing results of the dataset.
+            metric (str | list[str], optional): Metrics to be evaluated.
+                Default: 'bbox'.
+            logger (logging.Logger | str, optional): Logger used for printing
+                related information during evaluation. Default: None.
+            jsonfile_prefix (str, optional): The prefix of json files including
+                the file path and the prefix of filename, e.g., "a/b/prefix".
+                If not specified, a temp file will be created. Default: None.
+            csv_savepath (str, optional): The path for saving csv files.
+                It includes the file path and the csv filename,
+                e.g., "a/b/filename.csv". If not specified,
+                the result will not be converted to csv file.
+            result_names (list[str], optional): Result names in the
+                metric prefix. Default: ['pts_bbox'].
+            show (bool, optional): Whether to visualize.
+                Default: False.
+            out_dir (str, optional): Path to save the visualization results.
+                Default: None.
+            pipeline (list[dict], optional): raw data loading for showing.
+                Default: None.
+
+        Returns:
+            dict[str, float]: Evaluation results.
+        """
+        result_files, tmp_dir = self.format_results(results, jsonfile_prefix,
+                                                    csv_savepath)
+
+        if isinstance(result_files, dict):
+            results_dict = dict()
+            for name in result_names:
+                print(f'Evaluating bboxes of {name}')
+                ret_dict = self._evaluate_single(result_files[name])
+            results_dict.update(ret_dict)
+        elif isinstance(result_files, str):
+            results_dict = self._evaluate_single(result_files)
+
+        if tmp_dir is not None:
+            tmp_dir.cleanup()
+
+        if show or out_dir:
+            self.show(results, out_dir, show=show, pipeline=pipeline)
+        return results_dict
+
+    def _build_default_pipeline(self):
+        """Build the default pipeline for this dataset."""
+        pipeline = [
+            dict(
+                type='LoadPointsFromFile',
+                coord_type='LIDAR',
+                load_dim=5,
+                use_dim=5,
+                file_client_args=dict(backend='disk')),
+            dict(
+                type='LoadPointsFromMultiSweeps',
+                sweeps_num=10,
+                file_client_args=dict(backend='disk')),
+            dict(
+                type='DefaultFormatBundle3D',
+                class_names=self.CLASSES,
+                with_label=False),
+            dict(type='Collect3D', keys=['points'])
+        ]
+        return Compose(pipeline)
+
+    def show(self, results, out_dir, show=False, pipeline=None):
+        """Results visualization.
+
+        Args:
+            results (list[dict]): List of bounding boxes results.
+            out_dir (str): Output directory of visualization result.
+            show (bool): Whether to visualize the results online.
+                Default: False.
+            pipeline (list[dict], optional): raw data loading for showing.
+                Default: None.
+        """
+        assert out_dir is not None, 'Expect out_dir, got none.'
+        pipeline = self._get_pipeline(pipeline)
+        for i, result in enumerate(results):
+            if 'pts_bbox' in result.keys():
+                result = result['pts_bbox']
+            data_info = self.data_infos[i]
+            pts_path = data_info['lidar_path']
+            file_name = osp.split(pts_path)[-1].split('.')[0]
+            points = self._extract_data(i, pipeline, 'points').numpy()
+            points = Coord3DMode.convert_point(points, Coord3DMode.LIDAR,
+                                               Coord3DMode.DEPTH)
+            inds = result['scores_3d'] > 0.1
+            gt_bboxes = self.get_ann_info(i)['gt_bboxes_3d'].tensor.numpy()
+            show_gt_bboxes = Box3DMode.convert(gt_bboxes, Box3DMode.LIDAR,
+                                               Box3DMode.DEPTH)
+            pred_bboxes = result['boxes_3d'][inds].tensor.numpy()
+            show_pred_bboxes = Box3DMode.convert(pred_bboxes, Box3DMode.LIDAR,
+                                                 Box3DMode.DEPTH)
+            show_result(points, show_gt_bboxes, show_pred_bboxes, out_dir,
+                        file_name, show)
+
+    def json2csv(self, json_path, csv_savepath):
+        """Convert the json file to csv format for submission.
+
+        Args:
+            json_path (str): Path of the result json file.
+            csv_savepath (str): Path to save the csv file.
+        """
+        results = mmcv.load(json_path)['results']
+        sample_list_path = osp.join(self.data_root, 'sample_submission.csv')
+        data = pd.read_csv(sample_list_path)
+        Id_list = list(data['Id'])
+        pred_list = list(data['PredictionString'])
+        cnt = 0
+        print('Converting the json to csv...')
+        for token in results.keys():
+            cnt += 1
+            predictions = results[token]
+            prediction_str = ''
+            for i in range(len(predictions)):
+                prediction_str += \
+                    str(predictions[i]['score']) + ' ' + \
+                    str(predictions[i]['translation'][0]) + ' ' + \
+                    str(predictions[i]['translation'][1]) + ' ' + \
+                    str(predictions[i]['translation'][2]) + ' ' + \
+                    str(predictions[i]['size'][0]) + ' ' + \
+                    str(predictions[i]['size'][1]) + ' ' + \
+                    str(predictions[i]['size'][2]) + ' ' + \
+                    str(Quaternion(list(predictions[i]['rotation']))
+                        .yaw_pitch_roll[0]) + ' ' + \
+                    predictions[i]['name'] + ' '
+            prediction_str = prediction_str[:-1]
+            idx = Id_list.index(token)
+            pred_list[idx] = prediction_str
+        df = pd.DataFrame({'Id': Id_list, 'PredictionString': pred_list})
+        mmcv.mkdir_or_exist(os.path.dirname(csv_savepath))
+        df.to_csv(csv_savepath, index=False)
+
+
+def output_to_lyft_box(detection):
+    """Convert the output to the box class in the Lyft.
+
+    Args:
+        detection (dict): Detection results.
+
+    Returns:
+        list[:obj:`LyftBox`]: List of standard LyftBoxes.
+    """
+    box3d = detection['boxes_3d']
+    scores = detection['scores_3d'].numpy()
+    labels = detection['labels_3d'].numpy()
+
+    box_gravity_center = box3d.gravity_center.numpy()
+    box_dims = box3d.dims.numpy()
+    box_yaw = box3d.yaw.numpy()
+
+    # our LiDAR coordinate system -> Lyft box coordinate system
+    lyft_box_dims = box_dims[:, [1, 0, 2]]
+
+    box_list = []
+    for i in range(len(box3d)):
+        quat = Quaternion(axis=[0, 0, 1], radians=box_yaw[i])
+        box = LyftBox(
+            box_gravity_center[i],
+            lyft_box_dims[i],
+            quat,
+            label=labels[i],
+            score=scores[i])
+        box_list.append(box)
+    return box_list
+
+
+def lidar_lyft_box_to_global(info, boxes):
+    """Convert the box from ego to global coordinate.
+
+    Args:
+        info (dict): Info for a specific sample data, including the
+            calibration information.
+        boxes (list[:obj:`LyftBox`]): List of predicted LyftBoxes.
+
+    Returns:
+        list: List of standard LyftBoxes in the global
+            coordinate.
+    """
+    box_list = []
+    for box in boxes:
+        # Move box to ego vehicle coord system
+        box.rotate(Quaternion(info['lidar2ego_rotation']))
+        box.translate(np.array(info['lidar2ego_translation']))
+        # Move box to global coord system
+        box.rotate(Quaternion(info['ego2global_rotation']))
+        box.translate(np.array(info['ego2global_translation']))
+        box_list.append(box)
+    return box_list
--- a/docker-hub/FlashOCC/Flashocc/mmdetection3d/mmdet3d/datasets/nuscenes_dataset.py
+++ b/docker-hub/FlashOCC/Flashocc/mmdetection3d/mmdet3d/datasets/nuscenes_dataset.py
+# Copyright (c) OpenMMLab. All rights reserved.
+import tempfile
+from os import path as osp
+
+import mmcv
+import numpy as np
+import pyquaternion
+from nuscenes.utils.data_classes import Box as NuScenesBox
+
+from ..core import show_result
+from ..core.bbox import Box3DMode, Coord3DMode, LiDARInstance3DBoxes
+from .builder import DATASETS
+from .custom_3d import Custom3DDataset
+from .pipelines import Compose
+
+
+@DATASETS.register_module()
+class NuScenesDataset(Custom3DDataset):
+    r"""NuScenes Dataset.
+
+    This class serves as the API for experiments on the NuScenes Dataset.
+
+    Please refer to `NuScenes Dataset <https://www.nuscenes.org/download>`_
+    for data downloading.
+
+    Args:
+        ann_file (str): Path of annotation file.
+        pipeline (list[dict], optional): Pipeline used for data processing.
+            Defaults to None.
+        data_root (str): Path of dataset root.
+        classes (tuple[str], optional): Classes used in the dataset.
+            Defaults to None.
+        load_interval (int, optional): Interval of loading the dataset. It is
+            used to uniformly sample the dataset. Defaults to 1.
+        with_velocity (bool, optional): Whether include velocity prediction
+            into the experiments. Defaults to True.
+        modality (dict, optional): Modality to specify the sensor data used
+            as input. Defaults to None.
+        box_type_3d (str, optional): Type of 3D box of this dataset.
+            Based on the `box_type_3d`, the dataset will encapsulate the box
+            to its original format then converted them to `box_type_3d`.
+            Defaults to 'LiDAR' in this dataset. Available options includes.
+            - 'LiDAR': Box in LiDAR coordinates.
+            - 'Depth': Box in depth coordinates, usually for indoor dataset.
+            - 'Camera': Box in camera coordinates.
+        filter_empty_gt (bool, optional): Whether to filter empty GT.
+            Defaults to True.
+        test_mode (bool, optional): Whether the dataset is in test mode.
+            Defaults to False.
+        eval_version (bool, optional): Configuration version of evaluation.
+            Defaults to  'detection_cvpr_2019'.
+        use_valid_flag (bool, optional): Whether to use `use_valid_flag` key
+            in the info file as mask to filter gt_boxes and gt_names.
+            Defaults to False.
+    """
+    NameMapping = {
+        'movable_object.barrier': 'barrier',
+        'vehicle.bicycle': 'bicycle',
+        'vehicle.bus.bendy': 'bus',
+        'vehicle.bus.rigid': 'bus',
+        'vehicle.car': 'car',
+        'vehicle.construction': 'construction_vehicle',
+        'vehicle.motorcycle': 'motorcycle',
+        'human.pedestrian.adult': 'pedestrian',
+        'human.pedestrian.child': 'pedestrian',
+        'human.pedestrian.construction_worker': 'pedestrian',
+        'human.pedestrian.police_officer': 'pedestrian',
+        'movable_object.trafficcone': 'traffic_cone',
+        'vehicle.trailer': 'trailer',
+        'vehicle.truck': 'truck'
+    }
+    DefaultAttribute = {
+        'car': 'vehicle.parked',
+        'pedestrian': 'pedestrian.moving',
+        'trailer': 'vehicle.parked',
+        'truck': 'vehicle.parked',
+        'bus': 'vehicle.moving',
+        'motorcycle': 'cycle.without_rider',
+        'construction_vehicle': 'vehicle.parked',
+        'bicycle': 'cycle.without_rider',
+        'barrier': '',
+        'traffic_cone': '',
+    }
+    AttrMapping = {
+        'cycle.with_rider': 0,
+        'cycle.without_rider': 1,
+        'pedestrian.moving': 2,
+        'pedestrian.standing': 3,
+        'pedestrian.sitting_lying_down': 4,
+        'vehicle.moving': 5,
+        'vehicle.parked': 6,
+        'vehicle.stopped': 7,
+    }
+    AttrMapping_rev = [
+        'cycle.with_rider',
+        'cycle.without_rider',
+        'pedestrian.moving',
+        'pedestrian.standing',
+        'pedestrian.sitting_lying_down',
+        'vehicle.moving',
+        'vehicle.parked',
+        'vehicle.stopped',
+    ]
+    # https://github.com/nutonomy/nuscenes-devkit/blob/57889ff20678577025326cfc24e57424a829be0a/python-sdk/nuscenes/eval/detection/evaluate.py#L222 # noqa
+    ErrNameMapping = {
+        'trans_err': 'mATE',
+        'scale_err': 'mASE',
+        'orient_err': 'mAOE',
+        'vel_err': 'mAVE',
+        'attr_err': 'mAAE'
+    }
+    CLASSES = ('car', 'truck', 'trailer', 'bus', 'construction_vehicle',
+               'bicycle', 'motorcycle', 'pedestrian', 'traffic_cone',
+               'barrier')
+
+    def __init__(self,
+                 ann_file,
+                 pipeline=None,
+                 data_root=None,
+                 classes=None,
+                 load_interval=1,
+                 with_velocity=True,
+                 modality=None,
+                 box_type_3d='LiDAR',
+                 filter_empty_gt=True,
+                 test_mode=False,
+                 eval_version='detection_cvpr_2019',
+                 use_valid_flag=False):
+        self.load_interval = load_interval
+        self.use_valid_flag = use_valid_flag
+        super().__init__(
+            data_root=data_root,
+            ann_file=ann_file,
+            pipeline=pipeline,
+            classes=classes,
+            modality=modality,
+            box_type_3d=box_type_3d,
+            filter_empty_gt=filter_empty_gt,
+            test_mode=test_mode)
+
+        self.with_velocity = with_velocity
+        self.eval_version = eval_version
+        from nuscenes.eval.detection.config import config_factory
+        self.eval_detection_configs = config_factory(self.eval_version)
+        if self.modality is None:
+            self.modality = dict(
+                use_camera=False,
+                use_lidar=True,
+                use_radar=False,
+                use_map=False,
+                use_external=False,
+            )
+
+    def get_cat_ids(self, idx):
+        """Get category distribution of single scene.
+
+        Args:
+            idx (int): Index of the data_info.
+
+        Returns:
+            dict[list]: for each category, if the current scene
+                contains such boxes, store a list containing idx,
+                otherwise, store empty list.
+        """
+        info = self.data_infos[idx]
+        if self.use_valid_flag:
+            mask = info['valid_flag']
+            gt_names = set(info['gt_names'][mask])
+        else:
+            gt_names = set(info['gt_names'])
+
+        cat_ids = []
+        for name in gt_names:
+            if name in self.CLASSES:
+                cat_ids.append(self.cat2id[name])
+        return cat_ids
+
+    def load_annotations(self, ann_file):
+        """Load annotations from ann_file.
+
+        Args:
+            ann_file (str): Path of the annotation file.
+
+        Returns:
+            list[dict]: List of annotations sorted by timestamps.
+        """
+        data = mmcv.load(ann_file, file_format='pkl')
+        data_infos = list(sorted(data['infos'], key=lambda e: e['timestamp']))
+        data_infos = data_infos[::self.load_interval]
+        self.metadata = data['metadata']
+        self.version = self.metadata['version']
+        return data_infos
+
+    def get_data_info(self, index):
+        """Get data info according to the given index.
+
+        Args:
+            index (int): Index of the sample data to get.
+
+        Returns:
+            dict: Data information that will be passed to the data
+                preprocessing pipelines. It includes the following keys:
+
+                - sample_idx (str): Sample index.
+                - pts_filename (str): Filename of point clouds.
+                - sweeps (list[dict]): Infos of sweeps.
+                - timestamp (float): Sample timestamp.
+                - img_filename (str, optional): Image filename.
+                - lidar2img (list[np.ndarray], optional): Transformations
+                    from lidar to different cameras.
+                - ann_info (dict): Annotation info.
+        """
+        info = self.data_infos[index]
+        # standard protocol modified from SECOND.Pytorch
+        input_dict = dict(
+            sample_idx=info['token'],
+            pts_filename=info['lidar_path'],
+            sweeps=info['sweeps'],
+            timestamp=info['timestamp'] / 1e6,
+        )
+
+        if self.modality['use_camera']:
+            image_paths = []
+            lidar2img_rts = []
+            for cam_type, cam_info in info['cams'].items():
+                image_paths.append(cam_info['data_path'])
+                # obtain lidar to image transformation matrix
+                lidar2cam_r = np.linalg.inv(cam_info['sensor2lidar_rotation'])
+                lidar2cam_t = cam_info[
+                    'sensor2lidar_translation'] @ lidar2cam_r.T
+                lidar2cam_rt = np.eye(4)
+                lidar2cam_rt[:3, :3] = lidar2cam_r.T
+                lidar2cam_rt[3, :3] = -lidar2cam_t
+                intrinsic = cam_info['cam_intrinsic']
+                viewpad = np.eye(4)
+                viewpad[:intrinsic.shape[0], :intrinsic.shape[1]] = intrinsic
+                lidar2img_rt = (viewpad @ lidar2cam_rt.T)
+                lidar2img_rts.append(lidar2img_rt)
+
+            input_dict.update(
+                dict(
+                    img_filename=image_paths,
+                    lidar2img=lidar2img_rts,
+                ))
+
+        if not self.test_mode:
+            annos = self.get_ann_info(index)
+            input_dict['ann_info'] = annos
+
+        return input_dict
+
+    def get_ann_info(self, index):
+        """Get annotation info according to the given index.
+
+        Args:
+            index (int): Index of the annotation data to get.
+
+        Returns:
+            dict: Annotation information consists of the following keys:
+
+                - gt_bboxes_3d (:obj:`LiDARInstance3DBoxes`):
+                    3D ground truth bboxes
+                - gt_labels_3d (np.ndarray): Labels of ground truths.
+                - gt_names (list[str]): Class names of ground truths.
+        """
+        info = self.data_infos[index]
+        # filter out bbox containing no points
+        if self.use_valid_flag:
+            mask = info['valid_flag']
+        else:
+            mask = info['num_lidar_pts'] > 0
+        gt_bboxes_3d = info['gt_boxes'][mask]
+        gt_names_3d = info['gt_names'][mask]
+        gt_labels_3d = []
+        for cat in gt_names_3d:
+            if cat in self.CLASSES:
+                gt_labels_3d.append(self.CLASSES.index(cat))
+            else:
+                gt_labels_3d.append(-1)
+        gt_labels_3d = np.array(gt_labels_3d)
+
+        if self.with_velocity:
+            gt_velocity = info['gt_velocity'][mask]
+            nan_mask = np.isnan(gt_velocity[:, 0])
+            gt_velocity[nan_mask] = [0.0, 0.0]
+            gt_bboxes_3d = np.concatenate([gt_bboxes_3d, gt_velocity], axis=-1)
+
+        # the nuscenes box center is [0.5, 0.5, 0.5], we change it to be
+        # the same as KITTI (0.5, 0.5, 0)
+        gt_bboxes_3d = LiDARInstance3DBoxes(
+            gt_bboxes_3d,
+            box_dim=gt_bboxes_3d.shape[-1],
+            origin=(0.5, 0.5, 0.5)).convert_to(self.box_mode_3d)
+
+        anns_results = dict(
+            gt_bboxes_3d=gt_bboxes_3d,
+            gt_labels_3d=gt_labels_3d,
+            gt_names=gt_names_3d)
+        return anns_results
+
+    def _format_bbox(self, results, jsonfile_prefix=None):
+        """Convert the results to the standard format.
+
+        Args:
+            results (list[dict]): Testing results of the dataset.
+            jsonfile_prefix (str): The prefix of the output jsonfile.
+                You can specify the output directory/filename by
+                modifying the jsonfile_prefix. Default: None.
+
+        Returns:
+            str: Path of the output json file.
+        """
+        nusc_annos = {}
+        mapped_class_names = self.CLASSES
+
+        print('Start to convert detection format...')
+        for sample_id, det in enumerate(mmcv.track_iter_progress(results)):
+            annos = []
+            boxes = output_to_nusc_box(det, self.with_velocity)
+            sample_token = self.data_infos[sample_id]['token']
+            boxes = lidar_nusc_box_to_global(self.data_infos[sample_id], boxes,
+                                             mapped_class_names,
+                                             self.eval_detection_configs,
+                                             self.eval_version)
+            for i, box in enumerate(boxes):
+                name = mapped_class_names[box.label]
+                if np.sqrt(box.velocity[0]**2 + box.velocity[1]**2) > 0.2:
+                    if name in [
+                            'car',
+                            'construction_vehicle',
+                            'bus',
+                            'truck',
+                            'trailer',
+                    ]:
+                        attr = 'vehicle.moving'
+                    elif name in ['bicycle', 'motorcycle']:
+                        attr = 'cycle.with_rider'
+                    else:
+                        attr = NuScenesDataset.DefaultAttribute[name]
+                else:
+                    if name in ['pedestrian']:
+                        attr = 'pedestrian.standing'
+                    elif name in ['bus']:
+                        attr = 'vehicle.stopped'
+                    else:
+                        attr = NuScenesDataset.DefaultAttribute[name]
+
+                nusc_anno = dict(
+                    sample_token=sample_token,
+                    translation=box.center.tolist(),
+                    size=box.wlh.tolist(),
+                    rotation=box.orientation.elements.tolist(),
+                    velocity=box.velocity[:2].tolist(),
+                    detection_name=name,
+                    detection_score=box.score,
+                    attribute_name=attr)
+                annos.append(nusc_anno)
+            nusc_annos[sample_token] = annos
+        nusc_submissions = {
+            'meta': self.modality,
+            'results': nusc_annos,
+        }
+
+        mmcv.mkdir_or_exist(jsonfile_prefix)
+        res_path = osp.join(jsonfile_prefix, 'results_nusc.json')
+        print('Results writes to', res_path)
+        mmcv.dump(nusc_submissions, res_path)
+        return res_path
+
+    def _evaluate_single(self,
+                         result_path,
+                         logger=None,
+                         metric='bbox',
+                         result_name='pts_bbox'):
+        """Evaluation for a single model in nuScenes protocol.
+
+        Args:
+            result_path (str): Path of the result file.
+            logger (logging.Logger | str, optional): Logger used for printing
+                related information during evaluation. Default: None.
+            metric (str, optional): Metric name used for evaluation.
+                Default: 'bbox'.
+            result_name (str, optional): Result name in the metric prefix.
+                Default: 'pts_bbox'.
+
+        Returns:
+            dict: Dictionary of evaluation details.
+        """
+        from nuscenes import NuScenes
+        from nuscenes.eval.detection.evaluate import NuScenesEval
+
+        output_dir = osp.join(*osp.split(result_path)[:-1])
+        nusc = NuScenes(
+            version=self.version, dataroot=self.data_root, verbose=False)
+        eval_set_map = {
+            'v1.0-mini': 'mini_val',
+            'v1.0-trainval': 'val',
+        }
+        nusc_eval = NuScenesEval(
+            nusc,
+            config=self.eval_detection_configs,
+            result_path=result_path,
+            eval_set=eval_set_map[self.version],
+            output_dir=output_dir,
+            verbose=False)
+        nusc_eval.main(render_curves=False)
+
+        # record metrics
+        metrics = mmcv.load(osp.join(output_dir, 'metrics_summary.json'))
+        detail = dict()
+        metric_prefix = f'{result_name}_NuScenes'
+        for name in self.CLASSES:
+            for k, v in metrics['label_aps'][name].items():
+                val = float('{:.4f}'.format(v))
+                detail['{}/{}_AP_dist_{}'.format(metric_prefix, name, k)] = val
+            for k, v in metrics['label_tp_errors'][name].items():
+                val = float('{:.4f}'.format(v))
+                detail['{}/{}_{}'.format(metric_prefix, name, k)] = val
+            for k, v in metrics['tp_errors'].items():
+                val = float('{:.4f}'.format(v))
+                detail['{}/{}'.format(metric_prefix,
+                                      self.ErrNameMapping[k])] = val
+
+        detail['{}/NDS'.format(metric_prefix)] = metrics['nd_score']
+        detail['{}/mAP'.format(metric_prefix)] = metrics['mean_ap']
+        return detail
+
+    def format_results(self, results, jsonfile_prefix=None):
+        """Format the results to json (standard format for COCO evaluation).
+
+        Args:
+            results (list[dict]): Testing results of the dataset.
+            jsonfile_prefix (str): The prefix of json files. It includes
+                the file path and the prefix of filename, e.g., "a/b/prefix".
+                If not specified, a temp file will be created. Default: None.
+
+        Returns:
+            tuple: Returns (result_files, tmp_dir), where `result_files` is a
+                dict containing the json filepaths, `tmp_dir` is the temporal
+                directory created for saving json files when
+                `jsonfile_prefix` is not specified.
+        """
+        assert isinstance(results, list), 'results must be a list'
+        assert len(results) == len(self), (
+            'The length of results is not equal to the dataset len: {} != {}'.
+            format(len(results), len(self)))
+
+        if jsonfile_prefix is None:
+            tmp_dir = tempfile.TemporaryDirectory()
+            jsonfile_prefix = osp.join(tmp_dir.name, 'results')
+        else:
+            tmp_dir = None
+
+        # currently the output prediction results could be in two formats
+        # 1. list of dict('boxes_3d': ..., 'scores_3d': ..., 'labels_3d': ...)
+        # 2. list of dict('pts_bbox' or 'img_bbox':
+        #     dict('boxes_3d': ..., 'scores_3d': ..., 'labels_3d': ...))
+        # this is a workaround to enable evaluation of both formats on nuScenes
+        # refer to https://github.com/open-mmlab/mmdetection3d/issues/449
+        if not ('pts_bbox' in results[0] or 'img_bbox' in results[0]):
+            result_files = self._format_bbox(results, jsonfile_prefix)
+        else:
+            # should take the inner dict out of 'pts_bbox' or 'img_bbox' dict
+            result_files = dict()
+            for name in results[0]:
+                print(f'\nFormating bboxes of {name}')
+                results_ = [out[name] for out in results]
+                tmp_file_ = osp.join(jsonfile_prefix, name)
+                result_files.update(
+                    {name: self._format_bbox(results_, tmp_file_)})
+        return result_files, tmp_dir
+
+    def evaluate(self,
+                 results,
+                 metric='bbox',
+                 logger=None,
+                 jsonfile_prefix=None,
+                 result_names=['pts_bbox'],
+                 show=False,
+                 out_dir=None,
+                 pipeline=None):
+        """Evaluation in nuScenes protocol.
+
+        Args:
+            results (list[dict]): Testing results of the dataset.
+            metric (str | list[str], optional): Metrics to be evaluated.
+                Default: 'bbox'.
+            logger (logging.Logger | str, optional): Logger used for printing
+                related information during evaluation. Default: None.
+            jsonfile_prefix (str, optional): The prefix of json files including
+                the file path and the prefix of filename, e.g., "a/b/prefix".
+                If not specified, a temp file will be created. Default: None.
+            show (bool, optional): Whether to visualize.
+                Default: False.
+            out_dir (str, optional): Path to save the visualization results.
+                Default: None.
+            pipeline (list[dict], optional): raw data loading for showing.
+                Default: None.
+
+        Returns:
+            dict[str, float]: Results of each evaluation metric.
+        """
+        result_files, tmp_dir = self.format_results(results, jsonfile_prefix)
+
+        if isinstance(result_files, dict):
+            results_dict = dict()
+            for name in result_names:
+                print('Evaluating bboxes of {}'.format(name))
+                ret_dict = self._evaluate_single(result_files[name])
+            results_dict.update(ret_dict)
+        elif isinstance(result_files, str):
+            results_dict = self._evaluate_single(result_files)
+
+        if tmp_dir is not None:
+            tmp_dir.cleanup()
+
+        if show or out_dir:
+            self.show(results, out_dir, show=show, pipeline=pipeline)
+        return results_dict
+
+    def _build_default_pipeline(self):
+        """Build the default pipeline for this dataset."""
+        pipeline = [
+            dict(
+                type='LoadPointsFromFile',
+                coord_type='LIDAR',
+                load_dim=5,
+                use_dim=5,
+                file_client_args=dict(backend='disk')),
+            dict(
+                type='LoadPointsFromMultiSweeps',
+                sweeps_num=10,
+                file_client_args=dict(backend='disk')),
+            dict(
+                type='DefaultFormatBundle3D',
+                class_names=self.CLASSES,
+                with_label=False),
+            dict(type='Collect3D', keys=['points'])
+        ]
+        return Compose(pipeline)
+
+    def show(self, results, out_dir, show=False, pipeline=None):
+        """Results visualization.
+
+        Args:
+            results (list[dict]): List of bounding boxes results.
+            out_dir (str): Output directory of visualization result.
+            show (bool): Whether to visualize the results online.
+                Default: False.
+            pipeline (list[dict], optional): raw data loading for showing.
+                Default: None.
+        """
+        assert out_dir is not None, 'Expect out_dir, got none.'
+        pipeline = self._get_pipeline(pipeline)
+        for i, result in enumerate(results):
+            if 'pts_bbox' in result.keys():
+                result = result['pts_bbox']
+            data_info = self.data_infos[i]
+            pts_path = data_info['lidar_path']
+            file_name = osp.split(pts_path)[-1].split('.')[0]
+            points = self._extract_data(i, pipeline, 'points').numpy()
+            # for now we convert points into depth mode
+            points = Coord3DMode.convert_point(points, Coord3DMode.LIDAR,
+                                               Coord3DMode.DEPTH)
+            inds = result['scores_3d'] > 0.1
+            gt_bboxes = self.get_ann_info(i)['gt_bboxes_3d'].tensor.numpy()
+            show_gt_bboxes = Box3DMode.convert(gt_bboxes, Box3DMode.LIDAR,
+                                               Box3DMode.DEPTH)
+            pred_bboxes = result['boxes_3d'][inds].tensor.numpy()
+            show_pred_bboxes = Box3DMode.convert(pred_bboxes, Box3DMode.LIDAR,
+                                                 Box3DMode.DEPTH)
+            show_result(points, show_gt_bboxes, show_pred_bboxes, out_dir,
+                        file_name, show)
+
+
+def output_to_nusc_box(detection, with_velocity=True):
+    """Convert the output to the box class in the nuScenes.
+
+    Args:
+        detection (dict): Detection results.
+
+            - boxes_3d (:obj:`BaseInstance3DBoxes`): Detection bbox.
+            - scores_3d (torch.Tensor): Detection scores.
+            - labels_3d (torch.Tensor): Predicted box labels.
+
+    Returns:
+        list[:obj:`NuScenesBox`]: List of standard NuScenesBoxes.
+    """
+    box3d = detection['boxes_3d']
+    scores = detection['scores_3d'].numpy()
+    labels = detection['labels_3d'].numpy()
+
+    box_gravity_center = box3d.gravity_center.numpy()
+    box_dims = box3d.dims.numpy()
+    box_yaw = box3d.yaw.numpy()
+
+    # our LiDAR coordinate system -> nuScenes box coordinate system
+    nus_box_dims = box_dims[:, [1, 0, 2]]
+
+    box_list = []
+    for i in range(len(box3d)):
+        quat = pyquaternion.Quaternion(axis=[0, 0, 1], radians=box_yaw[i])
+        if with_velocity:
+            velocity = (*box3d.tensor[i, 7:9], 0.0)
+        else:
+            velocity = (0, 0, 0)
+        # velo_val = np.linalg.norm(box3d[i, 7:9])
+        # velo_ori = box3d[i, 6]
+        # velocity = (
+        # velo_val * np.cos(velo_ori), velo_val * np.sin(velo_ori), 0.0)
+        box = NuScenesBox(
+            box_gravity_center[i],
+            nus_box_dims[i],
+            quat,
+            label=labels[i],
+            score=scores[i],
+            velocity=velocity)
+        box_list.append(box)
+    return box_list
+
+
+def lidar_nusc_box_to_global(info,
+                             boxes,
+                             classes,
+                             eval_configs,
+                             eval_version='detection_cvpr_2019'):
+    """Convert the box from ego to global coordinate.
+
+    Args:
+        info (dict): Info for a specific sample data, including the
+            calibration information.
+        boxes (list[:obj:`NuScenesBox`]): List of predicted NuScenesBoxes.
+        classes (list[str]): Mapped classes in the evaluation.
+        eval_configs (object): Evaluation configuration object.
+        eval_version (str, optional): Evaluation version.
+            Default: 'detection_cvpr_2019'
+
+    Returns:
+        list: List of standard NuScenesBoxes in the global
+            coordinate.
+    """
+    box_list = []
+    for box in boxes:
+        # Move box to ego vehicle coord system
+        box.rotate(pyquaternion.Quaternion(info['lidar2ego_rotation']))
+        box.translate(np.array(info['lidar2ego_translation']))
+        # filter det in ego.
+        cls_range_map = eval_configs.class_range
+        radius = np.linalg.norm(box.center[:2], 2)
+        det_range = cls_range_map[classes[box.label]]
+        if radius > det_range:
+            continue
+        # Move box to global coord system
+        box.rotate(pyquaternion.Quaternion(info['ego2global_rotation']))
+        box.translate(np.array(info['ego2global_translation']))
+        box_list.append(box)
+    return box_list