init0905

afe88104 · lishj6 · a48c4071 · afe88104 · afe88104 · afe88104
Commit afe88104 authored Sep 05, 2025 by lishj6 🏸
20 changed files
--- a/scripts/create_data.sh
+++ b/scripts/create_data.sh
+export PYTHONPATH="$(dirname $0)/..":$PYTHONPATH
+
+# python tools/data_converter/nuscenes_converter.py nuscenes \
+#     --root-path ./data/nuscenes \
+#     --canbus ./data/nuscenes \
+#     --out-dir ./data/infos/ \
+#     --extra-tag nuscenes \
+#     --version v1.0-mini
+
+python tools/data_converter/nuscenes_converter.py nuscenes \
+    --root-path ./data/nuscenes \
+    --canbus ./data/nuscenes \
+    --out-dir ./data/infos/ \
+    --extra-tag nuscenes \
+    --version v1.0
+
--- a/scripts/kmeans.sh
+++ b/scripts/kmeans.sh
+python tools/kmeans/kmeans_det.py
+python tools/kmeans/kmeans_map.py
+python tools/kmeans/kmeans_motion.py
+python tools/kmeans/kmeans_plan.py
\ No newline at end of file
--- a/scripts/test.sh
+++ b/scripts/test.sh
+bash ./tools/dist_test.sh \
+    projects/configs/sparsedrive_small_stage2.py \
+    ckpt/sparsedrive_stage2.pth \
+    8 \
+    --deterministic \
+    --eval bbox
+    # --result_file ./work_dirs/sparsedrive_small_stage2/results.pkl
\ No newline at end of file
--- a/scripts/train.sh
+++ b/scripts/train.sh
+## stage1
+bash ./tools/dist_train.sh \
+   projects/configs/sparsedrive_small_stage1.py \
+   8 \
+   --deterministic
+
+# ## stage2
+# bash ./tools/dist_train.sh \
+#    projects/configs/sparsedrive_small_stage2.py \
+#    8 \
+#    --deterministic
\ No newline at end of file
--- a/scripts/visualize.sh
+++ b/scripts/visualize.sh
+export PYTHONPATH="$(dirname $0)/..":$PYTHONPATH
+python tools/visualization/visualize.py \
+	projects/configs/sparsedrive_small_stage2.py \
+	--result-path work_dirs/sparsedrive_small_stage2/results.pkl
\ No newline at end of file
--- a/tools/benchmark.py
+++ b/tools/benchmark.py
+# Copyright (c) OpenMMLab. All rights reserved.
+import argparse
+import time
+import torch
+from mmcv import Config
+from mmcv.parallel import MMDataParallel
+from mmcv.runner import load_checkpoint, wrap_fp16_model
+import sys
+sys.path.append('.')
+from projects.mmdet3d_plugin.datasets.builder import build_dataloader
+from projects.mmdet3d_plugin.datasets import custom_build_dataset
+from mmdet.models import build_detector
+from mmcv.cnn.utils.flops_counter import add_flops_counting_methods
+from mmcv.parallel import scatter
+
+
+def parse_args():
+    parser = argparse.ArgumentParser(description='MMDet benchmark a model')
+    parser.add_argument('config', help='test config file path')
+    parser.add_argument('--checkpoint', default=None, help='checkpoint file')
+    parser.add_argument('--samples', default=1000, help='samples to benchmark')
+    parser.add_argument(
+        '--log-interval', default=50, help='interval of logging')
+    parser.add_argument(
+        '--fuse-conv-bn',
+        action='store_true',
+        help='Whether to fuse conv and bn, this will slightly increase'
+        'the inference speed')
+    args = parser.parse_args()
+    return args
+
+
+def get_max_memory(model):
+    device = getattr(model, 'output_device', None)
+    mem = torch.cuda.max_memory_allocated(device=device)
+    mem_mb = torch.tensor([mem / (1024 * 1024)],
+        dtype=torch.int,
+        device=device)
+    return mem_mb.item()
+
+
+def main():
+    args = parse_args()
+    get_flops_params(args)
+    get_mem_fps(args)
+
+def get_mem_fps(args):
+    cfg = Config.fromfile(args.config)
+    # set cudnn_benchmark
+    if cfg.get('cudnn_benchmark', False):
+        torch.backends.cudnn.benchmark = True
+    cfg.model.pretrained = None
+    cfg.data.test.test_mode = True
+
+    # build the dataloader
+    # TODO: support multiple images per gpu (only minor changes are needed)
+    print(cfg.data.test)
+    dataset = custom_build_dataset(cfg.data.test)
+    data_loader = build_dataloader(
+        dataset,
+        samples_per_gpu=1,
+        workers_per_gpu=cfg.data.workers_per_gpu,
+        dist=False,
+        shuffle=False)
+
+    # build the model and load checkpoint
+    cfg.model.train_cfg = None
+    model = build_detector(cfg.model, test_cfg=cfg.get('test_cfg'))
+    fp16_cfg = cfg.get('fp16', None)
+    if fp16_cfg is not None:
+        wrap_fp16_model(model)
+    if args.checkpoint is not None:
+        load_checkpoint(model, args.checkpoint, map_location='cpu')
+    # if args.fuse_conv_bn:
+    #     model = fuse_module(model)
+
+    model = MMDataParallel(model, device_ids=[0])
+
+    model.eval()
+
+    # the first several iterations may be very slow so skip them
+    num_warmup = 5
+    pure_inf_time = 0
+
+    # benchmark with several samples and take the average
+    max_memory = 0
+    for i, data in enumerate(data_loader):
+        # torch.cuda.synchronize()
+        with torch.no_grad():
+            start_time = time.perf_counter()
+            model(return_loss=False, rescale=True, **data)
+
+            torch.cuda.synchronize()
+            elapsed = time.perf_counter() - start_time
+            max_memory = max(max_memory, get_max_memory(model))
+
+        if i >= num_warmup:
+            pure_inf_time += elapsed
+            if (i + 1) % args.log_interval == 0:
+                fps = (i + 1 - num_warmup) / pure_inf_time
+                print(f'Done image [{i + 1:<3}/ {args.samples}], '
+                      f'fps: {fps:.1f} img / s, '
+                      f"gpu mem: {max_memory} M")
+
+        if (i + 1) == args.samples:
+            pure_inf_time += elapsed
+            fps = (i + 1 - num_warmup) / pure_inf_time
+            print(f'Overall fps: {fps:.1f} img / s')
+            break
+
+
+def get_flops_params(args):
+    gpu_id = 0
+    cfg = Config.fromfile(args.config)
+    dataset = custom_build_dataset(cfg.data.val)
+    dataloader = build_dataloader(
+        dataset,
+        samples_per_gpu=1,
+        workers_per_gpu=0,
+        dist=False,
+        shuffle=False,
+    )
+    data_iter = dataloader.__iter__()
+    data = next(data_iter)
+    data = scatter(data, [gpu_id])[0]
+
+    cfg.model.train_cfg = None
+    model = build_detector(cfg.model, test_cfg=cfg.get('test_cfg'))
+    fp16_cfg = cfg.get('fp16', None)
+    if fp16_cfg is not None:
+        wrap_fp16_model(model)
+    if args.checkpoint is not None:
+        load_checkpoint(model, args.checkpoint, map_location='cpu')
+    model = model.cuda(gpu_id)
+    model.eval()
+
+    bilinear_flops = 11
+    num_key_pts_det = (
+        cfg.model["head"]['det_head']["deformable_model"]["kps_generator"]["num_learnable_pts"]
+        + len(cfg.model["head"]['det_head']["deformable_model"]["kps_generator"]["fix_scale"])
+    )
+    deformable_agg_flops_det = (
+        cfg.num_decoder
+        * cfg.embed_dims
+        * cfg.num_levels
+        * cfg.model["head"]['det_head']["instance_bank"]["num_anchor"]
+        * cfg.model["head"]['det_head']["deformable_model"]["num_cams"]
+        * num_key_pts_det
+        * bilinear_flops
+    )
+    num_key_pts_map = (
+        cfg.model["head"]['map_head']["deformable_model"]["kps_generator"]["num_learnable_pts"]
+        + len(cfg.model["head"]['map_head']["deformable_model"]["kps_generator"]["fix_height"])
+    ) * cfg.model["head"]['map_head']["deformable_model"]["kps_generator"]["num_sample"]
+    deformable_agg_flops_map = (
+        cfg.num_decoder
+        * cfg.embed_dims
+        * cfg.num_levels
+        * cfg.model["head"]['map_head']["instance_bank"]["num_anchor"]
+        * cfg.model["head"]['map_head']["deformable_model"]["num_cams"]
+        * num_key_pts_map
+        * bilinear_flops
+    )
+    deformable_agg_flops = deformable_agg_flops_det + deformable_agg_flops_map
+
+    for module in ["total", "img_backbone", "img_neck", "head"]:
+        if module != "total":
+            flops_model = add_flops_counting_methods(getattr(model, module))
+        else:
+            flops_model = add_flops_counting_methods(model)
+        flops_model.eval()
+        flops_model.start_flops_count()
+        
+        if module == "img_backbone":
+            flops_model(data["img"].flatten(0, 1))
+        elif module == "img_neck":
+            flops_model(model.img_backbone(data["img"].flatten(0, 1)))
+        elif module == "head":
+            flops_model(model.extract_feat(data["img"], metas=data), data)
+        else:
+            flops_model(**data)
+        flops_count, params_count = flops_model.compute_average_flops_cost()
+        flops_count *= flops_model.__batch_counter__
+        flops_model.stop_flops_count()
+        if module == "head" or module == "total":
+            flops_count += deformable_agg_flops
+        if module == "total":
+            total_flops = flops_count
+            total_params = params_count
+        print(
+            f"{module:<13} complexity: "
+            f"FLOPs={flops_count/ 10.**9:>8.4f} G / {flops_count/total_flops*100:>6.2f}%, "
+            f"Params={params_count/10**6:>8.4f} M / {params_count/total_params*100:>6.2f}%."
+        )
+
+if __name__ == '__main__':
+    main()
--- a/tools/data_converter/__init__.py
+++ b/tools/data_converter/__init__.py
+# Copyright (c) OpenMMLab. All rights reserved.
--- a/tools/data_converter/nuscenes_converter.py
+++ b/tools/data_converter/nuscenes_converter.py
+import os
+import math
+import copy
+import argparse
+from os import path as osp
+from collections import OrderedDict
+from typing import List, Tuple, Union
+
+import numpy as np
+from pyquaternion import Quaternion
+from shapely.geometry import MultiPoint, box
+
+import mmcv
+
+from nuscenes.nuscenes import NuScenes
+from nuscenes.can_bus.can_bus_api import NuScenesCanBus
+from nuscenes.utils.geometry_utils import transform_matrix
+from nuscenes.utils.data_classes import Box
+from nuscenes.utils.geometry_utils import view_points
+from nuscenes.prediction import PredictHelper, convert_local_coords_to_global
+
+from projects.mmdet3d_plugin.datasets.map_utils.nuscmap_extractor import NuscMapExtractor
+
+NameMapping = {
+    "movable_object.barrier": "barrier",
+    "vehicle.bicycle": "bicycle",
+    "vehicle.bus.bendy": "bus",
+    "vehicle.bus.rigid": "bus",
+    "vehicle.car": "car",
+    "vehicle.construction": "construction_vehicle",
+    "vehicle.motorcycle": "motorcycle",
+    "human.pedestrian.adult": "pedestrian",
+    "human.pedestrian.child": "pedestrian",
+    "human.pedestrian.construction_worker": "pedestrian",
+    "human.pedestrian.police_officer": "pedestrian",
+    "movable_object.trafficcone": "traffic_cone",
+    "vehicle.trailer": "trailer",
+    "vehicle.truck": "truck",
+}
+
+def quart_to_rpy(qua):
+    x, y, z, w = qua
+    roll = math.atan2(2 * (w * x + y * z), 1 - 2 * (x * x + y * y))
+    pitch = math.asin(2 * (w * y - x * z))
+    yaw = math.atan2(2 * (w * z + x * y), 1 - 2 * (z * z + y * y))
+    return roll, pitch, yaw
+
+def locate_message(utimes, utime):
+    i = np.searchsorted(utimes, utime)
+    if i == len(utimes) or (i > 0 and utime - utimes[i-1] < utimes[i] - utime):
+        i -= 1
+    return i
+
+def geom2anno(map_geoms):
+    MAP_CLASSES = (
+        'ped_crossing',
+        'divider',
+        'boundary',
+    )
+    vectors = {}
+    for cls, geom_list in map_geoms.items():
+        if cls in MAP_CLASSES:
+            label = MAP_CLASSES.index(cls)
+            vectors[label] = []
+            for geom in geom_list:
+                line = np.array(geom.coords)
+                vectors[label].append(line)
+    return vectors
+
+def create_nuscenes_infos(root_path,
+                          out_path,
+                          can_bus_root_path,
+                          info_prefix,
+                          version='v1.0-trainval',
+                          max_sweeps=10,
+                          roi_size=(30, 60),):
+    """Create info file of nuscene dataset.
+
+    Given the raw data, generate its related info file in pkl format.
+
+    Args:
+        root_path (str): Path of the data root.
+        info_prefix (str): Prefix of the info file to be generated.
+        version (str): Version of the data.
+            Default: 'v1.0-trainval'
+        max_sweeps (int): Max number of sweeps.
+            Default: 10
+    """
+    print(version, root_path)
+    nusc = NuScenes(version=version, dataroot=root_path, verbose=True)
+    nusc_map_extractor = NuscMapExtractor(root_path, roi_size)
+    nusc_can_bus = NuScenesCanBus(dataroot=can_bus_root_path)
+    from nuscenes.utils import splits
+    available_vers = ['v1.0-trainval', 'v1.0-test', 'v1.0-mini']
+    assert version in available_vers
+    if version == 'v1.0-trainval':
+        train_scenes = splits.train
+        val_scenes = splits.val
+    elif version == 'v1.0-test':
+        train_scenes = splits.test
+        val_scenes = []
+    elif version == 'v1.0-mini':
+        train_scenes = splits.mini_train
+        val_scenes = splits.mini_val
+        out_path = osp.join(out_path, 'mini')
+    else:
+        raise ValueError('unknown')
+    os.makedirs(out_path, exist_ok=True)
+
+    # filter existing scenes.
+    available_scenes = get_available_scenes(nusc)
+    available_scene_names = [s['name'] for s in available_scenes]
+    train_scenes = list(
+        filter(lambda x: x in available_scene_names, train_scenes))
+    val_scenes = list(filter(lambda x: x in available_scene_names, val_scenes))
+    train_scenes = set([
+        available_scenes[available_scene_names.index(s)]['token']
+        for s in train_scenes
+    ])
+    val_scenes = set([
+        available_scenes[available_scene_names.index(s)]['token']
+        for s in val_scenes
+    ])
+
+    test = 'test' in version
+    if test:
+        print('test scene: {}'.format(len(train_scenes)))
+    else:
+        print('train scene: {}, val scene: {}'.format(
+            len(train_scenes), len(val_scenes)))
+
+    train_nusc_infos, val_nusc_infos = _fill_trainval_infos(
+        nusc, nusc_map_extractor, nusc_can_bus, train_scenes, val_scenes, test, max_sweeps=max_sweeps)
+
+    metadata = dict(version=version)
+    if test:
+        print('test sample: {}'.format(len(train_nusc_infos)))
+        data = dict(infos=train_nusc_infos, metadata=metadata)
+        info_path = osp.join(out_path,
+                             '{}_infos_test.pkl'.format(info_prefix))
+        mmcv.dump(data, info_path)
+    else:
+        print('train sample: {}, val sample: {}'.format(
+            len(train_nusc_infos), len(val_nusc_infos)))
+        data = dict(infos=train_nusc_infos, metadata=metadata)
+        info_path = osp.join(out_path,
+                             '{}_infos_train.pkl'.format(info_prefix))
+        mmcv.dump(data, info_path)
+        data['infos'] = val_nusc_infos
+        info_val_path = osp.join(out_path,
+                                 '{}_infos_val.pkl'.format(info_prefix))
+        mmcv.dump(data, info_val_path)
+
+def get_available_scenes(nusc):
+    """Get available scenes from the input nuscenes class.
+
+    Given the raw data, get the information of available scenes for
+    further info generation.
+
+    Args:
+        nusc (class): Dataset class in the nuScenes dataset.
+
+    Returns:
+        available_scenes (list[dict]): List of basic information for the
+            available scenes.
+    """
+    available_scenes = []
+    print('total scene num: {}'.format(len(nusc.scene)))
+    for scene in nusc.scene:
+        scene_token = scene['token']
+        scene_rec = nusc.get('scene', scene_token)
+        sample_rec = nusc.get('sample', scene_rec['first_sample_token'])
+        sd_rec = nusc.get('sample_data', sample_rec['data']['LIDAR_TOP'])
+        has_more_frames = True
+        scene_not_exist = False
+        while has_more_frames:
+            lidar_path, boxes, _ = nusc.get_sample_data(sd_rec['token'])
+            lidar_path = str(lidar_path)
+            if os.getcwd() in lidar_path:
+                # path from lyftdataset is absolute path
+                lidar_path = lidar_path.split(f'{os.getcwd()}/')[-1]
+                # relative path
+            if not mmcv.is_filepath(lidar_path):
+                scene_not_exist = True
+                break
+            else:
+                break
+        if scene_not_exist:
+            continue
+        available_scenes.append(scene)
+    print('exist scene num: {}'.format(len(available_scenes)))
+    return available_scenes
+
+def _fill_trainval_infos(nusc,
+                         nusc_map_extractor,
+                         nusc_can_bus,
+                         train_scenes,
+                         val_scenes,
+                         test=False,
+                         max_sweeps=10,
+                         fut_ts=12,
+                         ego_fut_ts=6):
+    """Generate the train/val infos from the raw data.
+
+    Args:
+        nusc (:obj:`NuScenes`): Dataset class in the nuScenes dataset.
+        train_scenes (list[str]): Basic information of training scenes.
+        val_scenes (list[str]): Basic information of validation scenes.
+        test (bool): Whether use the test mode. In the test mode, no
+            annotations can be accessed. Default: False.
+        max_sweeps (int): Max number of sweeps. Default: 10.
+
+    Returns:
+        tuple[list[dict]]: Information of training set and validation set
+            that will be saved to the info file.
+    """
+    train_nusc_infos = []
+    val_nusc_infos = []
+    cat2idx = {}
+    for idx, dic in enumerate(nusc.category):
+        cat2idx[dic['name']] = idx
+
+    predict_helper = PredictHelper(nusc)
+    for sample in mmcv.track_iter_progress(nusc.sample):
+        map_location = nusc.get('log', nusc.get('scene', sample['scene_token'])['log_token'])['location']
+        lidar_token = sample['data']['LIDAR_TOP']
+        sd_rec = nusc.get('sample_data', lidar_token)
+        cs_record = nusc.get('calibrated_sensor',
+                             sd_rec['calibrated_sensor_token'])
+        pose_record = nusc.get('ego_pose', sd_rec['ego_pose_token'])
+        lidar_path, boxes, _ = nusc.get_sample_data(lidar_token)
+        mmcv.check_file_exist(lidar_path)
+
+        info = {
+            'lidar_path': lidar_path,
+            'token': sample['token'],
+            'sweeps': [],
+            'cams': dict(),
+            'scene_token': sample['scene_token'],
+            'lidar2ego_translation': cs_record['translation'],
+            'lidar2ego_rotation': cs_record['rotation'],
+            'ego2global_translation': pose_record['translation'],
+            'ego2global_rotation': pose_record['rotation'],
+            'timestamp': sample['timestamp'],
+            'map_location': map_location,
+        }
+
+        l2e_r = info['lidar2ego_rotation']
+        l2e_t = info['lidar2ego_translation']
+        e2g_r = info['ego2global_rotation']
+        e2g_t = info['ego2global_translation']
+        l2e_r_mat = Quaternion(l2e_r).rotation_matrix
+        e2g_r_mat = Quaternion(e2g_r).rotation_matrix
+
+        # extract map annos
+        lidar2ego = np.eye(4)
+        lidar2ego[:3, :3] = Quaternion(
+            info["lidar2ego_rotation"]
+        ).rotation_matrix
+        lidar2ego[:3, 3] = np.array(info["lidar2ego_translation"])
+        ego2global = np.eye(4)
+        ego2global[:3, :3] = Quaternion(
+            info["ego2global_rotation"]
+        ).rotation_matrix
+        ego2global[:3, 3] = np.array(info["ego2global_translation"])
+        lidar2global = ego2global @ lidar2ego
+
+        translation = list(lidar2global[:3, 3])
+        rotation = list(Quaternion(matrix=lidar2global).q)
+        map_geoms = nusc_map_extractor.get_map_geom(map_location, translation, rotation)
+        map_annos = geom2anno(map_geoms)
+        info['map_annos'] = map_annos
+
+        # obtain 6 image's information per frame
+        camera_types = [
+            'CAM_FRONT',
+            'CAM_FRONT_RIGHT',
+            'CAM_FRONT_LEFT',
+            'CAM_BACK',
+            'CAM_BACK_LEFT',
+            'CAM_BACK_RIGHT',
+        ]
+        for cam in camera_types:
+            cam_token = sample['data'][cam]
+            cam_path, _, cam_intrinsic = nusc.get_sample_data(cam_token)
+            cam_info = obtain_sensor2top(nusc, cam_token, l2e_t, l2e_r_mat,
+                                         e2g_t, e2g_r_mat, cam)
+            cam_info.update(cam_intrinsic=cam_intrinsic)
+            info['cams'].update({cam: cam_info})
+
+        # obtain sweeps for a single key-frame
+        sd_rec = nusc.get('sample_data', sample['data']['LIDAR_TOP'])
+        sweeps = []
+        while len(sweeps) < max_sweeps:
+            if not sd_rec['prev'] == '':
+                sweep = obtain_sensor2top(nusc, sd_rec['prev'], l2e_t,
+                                          l2e_r_mat, e2g_t, e2g_r_mat, 'lidar')
+                sweeps.append(sweep)
+                sd_rec = nusc.get('sample_data', sd_rec['prev'])
+            else:
+                break
+        info['sweeps'] = sweeps
+        # obtain annotation
+        if not test:
+            # object detection annos: boxes (locs, dims, yaw, velocity), names and valid flags
+            annotations = [
+                nusc.get('sample_annotation', token)
+                for token in sample['anns']
+            ]
+            locs = np.array([b.center for b in boxes]).reshape(-1, 3)
+            dims = np.array([b.wlh for b in boxes]).reshape(-1, 3)
+            rots = np.array([b.orientation.yaw_pitch_roll[0]
+                             for b in boxes]).reshape(-1, 1)
+            velocity = np.array(
+                [nusc.box_velocity(token)[:2] for token in sample['anns']])
+            # convert velo from global to lidar
+            for i in range(len(boxes)):
+                velo = np.array([*velocity[i], 0.0])
+                velo = velo @ np.linalg.inv(e2g_r_mat).T @ np.linalg.inv(
+                    l2e_r_mat).T
+                velocity[i] = velo[:2]
+            names = [b.name for b in boxes]
+            for i in range(len(names)):
+                if names[i] in NameMapping:
+                    names[i] = NameMapping[names[i]]
+            names = np.array(names)
+            valid_flag = np.array(
+                [(anno['num_lidar_pts'] + anno['num_radar_pts']) > 0
+                 for anno in annotations],
+                dtype=bool).reshape(-1)  ## TODO update valid flag for tracking
+            # we need to convert box size to
+            # the format of our lidar coordinate system
+            # which is x_size, y_size, z_size (corresponding to l, w, h)
+            gt_boxes = np.concatenate([locs, dims[:, [1, 0, 2]], rots], axis=1)
+            assert len(gt_boxes) == len(
+                annotations), f'{len(gt_boxes)}, {len(annotations)}'
+            
+            # object tracking annos: instance_ids
+            instance_inds = [nusc.getind('instance', anno['instance_token'])
+                             for anno in annotations]
+
+            # motion prediction annos: future trajectories offset in lidar frame and valid mask
+            num_box = len(boxes)
+            gt_fut_trajs = np.zeros((num_box, fut_ts, 2))
+            gt_fut_masks = np.zeros((num_box, fut_ts))
+            for i, anno in enumerate(annotations):
+                instance_token = anno['instance_token']
+                fut_traj_local = predict_helper.get_future_for_agent(
+                    instance_token, 
+                    sample['token'], 
+                    seconds=fut_ts/2, 
+                    in_agent_frame=True
+                )
+                if fut_traj_local.shape[0] > 0:
+                    box = boxes[i]
+                    trans = box.center
+                    rot = Quaternion(matrix=box.rotation_matrix)
+                    fut_traj_scene = convert_local_coords_to_global(fut_traj_local, trans, rot)
+                    valid_step = fut_traj_scene.shape[0]
+                    gt_fut_trajs[i, 0] = fut_traj_scene[0] - box.center[:2]
+                    gt_fut_trajs[i, 1:valid_step] = fut_traj_scene[1:] - fut_traj_scene[:-1]
+                    gt_fut_masks[i, :valid_step] = 1
+
+            # motion planning annos: future trajectories offset in lidar frame and valid mask
+            ego_fut_trajs = np.zeros((ego_fut_ts + 1, 3))
+            ego_fut_masks = np.zeros((ego_fut_ts + 1))
+            sample_cur = sample
+            ego_status = get_ego_status(nusc, nusc_can_bus, sample_cur)
+            for i in range(ego_fut_ts + 1):
+                pose_mat = get_global_sensor_pose(sample_cur, nusc)
+                ego_fut_trajs[i] = pose_mat[:3, 3]
+                ego_fut_masks[i] = 1
+                if sample_cur['next'] == '':
+                    ego_fut_trajs[i+1:] = ego_fut_trajs[i]
+                    break
+                else:
+                    sample_cur = nusc.get('sample', sample_cur['next'])
+            # global to ego
+            ego_fut_trajs = ego_fut_trajs - np.array(pose_record['translation'])
+            rot_mat = Quaternion(pose_record['rotation']).inverse.rotation_matrix
+            ego_fut_trajs = np.dot(rot_mat, ego_fut_trajs.T).T
+            # ego to lidar
+            ego_fut_trajs = ego_fut_trajs - np.array(cs_record['translation'])
+            rot_mat = Quaternion(cs_record['rotation']).inverse.rotation_matrix
+            ego_fut_trajs = np.dot(rot_mat, ego_fut_trajs.T).T
+            # drive command according to final fut step offset
+            if ego_fut_trajs[-1][0] >= 2:
+                command = np.array([1, 0, 0])  # Turn Right
+            elif ego_fut_trajs[-1][0] <= -2:
+                command = np.array([0, 1, 0])  # Turn Left
+            else:
+                command = np.array([0, 0, 1])  # Go Straight
+            # get offset
+            ego_fut_trajs = ego_fut_trajs[1:] - ego_fut_trajs[:-1]      
+
+            info['gt_boxes'] = gt_boxes
+            info['gt_names'] = names
+            info['gt_velocity'] = velocity.reshape(-1, 2)
+            info['num_lidar_pts'] = np.array(
+                [a['num_lidar_pts'] for a in annotations])
+            info['num_radar_pts'] = np.array(
+                [a['num_radar_pts'] for a in annotations])
+            info['valid_flag'] = valid_flag
+            info['instance_inds'] = instance_inds
+            info['gt_agent_fut_trajs'] = gt_fut_trajs.astype(np.float32)
+            info['gt_agent_fut_masks'] = gt_fut_masks.astype(np.float32)
+            info['gt_ego_fut_trajs'] = ego_fut_trajs[:, :2].astype(np.float32)
+            info['gt_ego_fut_masks'] = ego_fut_masks[1:].astype(np.float32)
+            info['gt_ego_fut_cmd'] = command.astype(np.float32)
+            info['ego_status'] = ego_status
+
+        if sample['scene_token'] in train_scenes:
+            train_nusc_infos.append(info)
+        else:
+            val_nusc_infos.append(info)
+
+    return train_nusc_infos, val_nusc_infos
+
+def get_ego_status(nusc, nusc_can_bus, sample):
+    ego_status = []
+    ref_scene = nusc.get("scene", sample['scene_token'])
+    try:
+        pose_msgs = nusc_can_bus.get_messages(ref_scene['name'],'pose')
+        steer_msgs = nusc_can_bus.get_messages(ref_scene['name'], 'steeranglefeedback')
+        pose_uts = [msg['utime'] for msg in pose_msgs]
+        steer_uts = [msg['utime'] for msg in steer_msgs]
+        ref_utime = sample['timestamp']
+        pose_index = locate_message(pose_uts, ref_utime)
+        pose_data = pose_msgs[pose_index]
+        steer_index = locate_message(steer_uts, ref_utime)
+        steer_data = steer_msgs[steer_index]
+        ego_status.extend(pose_data["accel"]) # acceleration in ego vehicle frame, m/s/s
+        ego_status.extend(pose_data["rotation_rate"]) # angular velocity in ego vehicle frame, rad/s
+        ego_status.extend(pose_data["vel"]) # velocity in ego vehicle frame, m/s
+        ego_status.append(steer_data["value"]) # steering angle, positive: left turn, negative: right turn
+    except:
+        ego_status = [0] * 10
+    
+    return np.array(ego_status).astype(np.float32)
+
+def get_global_sensor_pose(rec, nusc):
+    lidar_sample_data = nusc.get('sample_data', rec['data']['LIDAR_TOP'])
+
+    pose_record = nusc.get("ego_pose", lidar_sample_data["ego_pose_token"])
+    cs_record = nusc.get("calibrated_sensor", lidar_sample_data["calibrated_sensor_token"])
+
+    ego2global = transform_matrix(pose_record["translation"], Quaternion(pose_record["rotation"]), inverse=False)
+    sensor2ego = transform_matrix(cs_record["translation"], Quaternion(cs_record["rotation"]), inverse=False)
+    pose = ego2global.dot(sensor2ego)
+
+    return pose
+
+def obtain_sensor2top(nusc,
+                      sensor_token,
+                      l2e_t,
+                      l2e_r_mat,
+                      e2g_t,
+                      e2g_r_mat,
+                      sensor_type='lidar'):
+    """Obtain the info with RT matric from general sensor to Top LiDAR.
+
+    Args:
+        nusc (class): Dataset class in the nuScenes dataset.
+        sensor_token (str): Sample data token corresponding to the
+            specific sensor type.
+        l2e_t (np.ndarray): Translation from lidar to ego in shape (1, 3).
+        l2e_r_mat (np.ndarray): Rotation matrix from lidar to ego
+            in shape (3, 3).
+        e2g_t (np.ndarray): Translation from ego to global in shape (1, 3).
+        e2g_r_mat (np.ndarray): Rotation matrix from ego to global
+            in shape (3, 3).
+        sensor_type (str): Sensor to calibrate. Default: 'lidar'.
+
+    Returns:
+        sweep (dict): Sweep information after transformation.
+    """
+    sd_rec = nusc.get('sample_data', sensor_token)
+    cs_record = nusc.get('calibrated_sensor',
+                         sd_rec['calibrated_sensor_token'])
+    pose_record = nusc.get('ego_pose', sd_rec['ego_pose_token'])
+    data_path = str(nusc.get_sample_data_path(sd_rec['token']))
+    if os.getcwd() in data_path:  # path from lyftdataset is absolute path
+        data_path = data_path.split(f'{os.getcwd()}/')[-1]  # relative path
+    sweep = {
+        'data_path': data_path,
+        'type': sensor_type,
+        'sample_data_token': sd_rec['token'],
+        'sensor2ego_translation': cs_record['translation'],
+        'sensor2ego_rotation': cs_record['rotation'],
+        'ego2global_translation': pose_record['translation'],
+        'ego2global_rotation': pose_record['rotation'],
+        'timestamp': sd_rec['timestamp']
+    }
+
+    l2e_r_s = sweep['sensor2ego_rotation']
+    l2e_t_s = sweep['sensor2ego_translation']
+    e2g_r_s = sweep['ego2global_rotation']
+    e2g_t_s = sweep['ego2global_translation']
+
+    # obtain the RT from sensor to Top LiDAR
+    # sweep->ego->global->ego'->lidar
+    l2e_r_s_mat = Quaternion(l2e_r_s).rotation_matrix
+    e2g_r_s_mat = Quaternion(e2g_r_s).rotation_matrix
+    R = (l2e_r_s_mat.T @ e2g_r_s_mat.T) @ (
+        np.linalg.inv(e2g_r_mat).T @ np.linalg.inv(l2e_r_mat).T)
+    T = (l2e_t_s @ e2g_r_s_mat.T + e2g_t_s) @ (
+        np.linalg.inv(e2g_r_mat).T @ np.linalg.inv(l2e_r_mat).T)
+    T -= e2g_t @ (np.linalg.inv(e2g_r_mat).T @ np.linalg.inv(l2e_r_mat).T
+                  ) + l2e_t @ np.linalg.inv(l2e_r_mat).T
+    sweep['sensor2lidar_rotation'] = R.T  # points @ R.T + T
+    sweep['sensor2lidar_translation'] = T
+    return sweep
+
+def nuscenes_data_prep(root_path,
+                       can_bus_root_path,
+                       info_prefix,
+                       version,
+                       dataset_name,
+                       out_dir,
+                       max_sweeps=10):
+    """Prepare data related to nuScenes dataset.
+
+    Related data consists of '.pkl' files recording basic infos,
+    2D annotations and groundtruth database.
+
+    Args:
+        root_path (str): Path of dataset root.
+        info_prefix (str): The prefix of info filenames.
+        version (str): Dataset version.
+        dataset_name (str): The dataset class name.
+        out_dir (str): Output directory of the groundtruth database info.
+        max_sweeps (int): Number of input consecutive frames. Default: 10
+    """
+    create_nuscenes_infos(
+        root_path, out_dir, can_bus_root_path, info_prefix, version=version, max_sweeps=max_sweeps)
+
+
+parser = argparse.ArgumentParser(description='Data converter arg parser')
+parser.add_argument('dataset', metavar='kitti', help='name of the dataset')
+parser.add_argument(
+    '--root-path',
+    type=str,
+    default='./data/kitti',
+    help='specify the root path of dataset')
+parser.add_argument(
+    '--canbus',
+    type=str,
+    default='./data',
+    help='specify the root path of nuScenes canbus')
+parser.add_argument(
+    '--version',
+    type=str,
+    default='v1.0',
+    required=False,
+    help='specify the dataset version, no need for kitti')
+parser.add_argument(
+    '--max-sweeps',
+    type=int,
+    default=10,
+    required=False,
+    help='specify sweeps of lidar per example')
+parser.add_argument(
+    '--out-dir',
+    type=str,
+    default='./data/kitti',
+    required='False',
+    help='name of info pkl')
+parser.add_argument('--extra-tag', type=str, default='kitti')
+parser.add_argument(
+    '--workers', type=int, default=4, help='number of threads to be used')
+args = parser.parse_args()
+
+if __name__ == '__main__':
+    if args.dataset == 'nuscenes' and args.version != 'v1.0-mini':
+        train_version = f'{args.version}-trainval'
+        nuscenes_data_prep(
+            root_path=args.root_path,
+            can_bus_root_path=args.canbus,
+            info_prefix=args.extra_tag,
+            version=train_version,
+            dataset_name='NuScenesDataset',
+            out_dir=args.out_dir,
+            max_sweeps=args.max_sweeps)
+        test_version = f'{args.version}-test'
+        nuscenes_data_prep(
+            root_path=args.root_path,
+            can_bus_root_path=args.canbus,
+            info_prefix=args.extra_tag,
+            version=test_version,
+            dataset_name='NuScenesDataset',
+            out_dir=args.out_dir,
+            max_sweeps=args.max_sweeps)
+    elif args.dataset == 'nuscenes' and args.version == 'v1.0-mini':
+        train_version = f'{args.version}'
+        nuscenes_data_prep(
+            root_path=args.root_path,
+            can_bus_root_path=args.canbus,
+            info_prefix=args.extra_tag,
+            version=train_version,
+            dataset_name='NuScenesDataset',
+            out_dir=args.out_dir,
+            max_sweeps=args.max_sweeps)
--- a/tools/dist_test.sh
+++ b/tools/dist_test.sh
+#!/usr/bin/env bash
+
+CONFIG=$1
+CHECKPOINT=$2
+GPUS=$3
+PORT=${PORT:-29610}
+
+PYTHONPATH="$(dirname $0)/..":$PYTHONPATH \
+python3 -m torch.distributed.launch --nproc_per_node=$GPUS --master_port=$PORT \
+    $(dirname "$0")/test.py $CONFIG $CHECKPOINT --launcher pytorch ${@:4}
--- a/tools/dist_train.sh
+++ b/tools/dist_train.sh
+!/usr/bin/env bash
+
+CONFIG=$1
+GPUS=$2
+PORT=${PORT:-28651}
+
+# export GPU_FLUSH_ON_EXECUTION=1
+export PYTORCH_MIOPEN_SUGGEST_NHWC=1         #.to(memory_format=torch.channels_last)
+export MIOPEN_FIND_MODE=1
+export LD_LIBRARY_PATH=/home/SparseDrive/package/miopen/lib:$LD_LIBRARY_PATH
+export LD_LIBRARY_PATH=/home/SparseDrive/rocblas-install/lib:$LD_LIBRARY_PATH
+export MIOPEN_PRECISION_FP32_FP32_FP32_TF32_FP32=1
+
+PYTHONPATH="$(dirname $0)/..":$PYTHONPATH \
+python3 -m torch.distributed.launch --nproc_per_node=$GPUS --master_port=$PORT \
+    $(dirname "$0")/train.py $CONFIG --launcher pytorch ${@:3}  #--enable-profiler
--- a/tools/fuse_conv_bn.py
+++ b/tools/fuse_conv_bn.py
+# Copyright (c) OpenMMLab. All rights reserved.
+import argparse
+
+import torch
+from mmcv.runner import save_checkpoint
+from torch import nn as nn
+
+from mmdet3d.apis import init_model
+
+
+def fuse_conv_bn(conv, bn):
+    """During inference, the functionary of batch norm layers is turned off but
+    only the mean and var alone channels are used, which exposes the chance to
+    fuse it with the preceding conv layers to save computations and simplify
+    network structures."""
+    conv_w = conv.weight
+    conv_b = conv.bias if conv.bias is not None else torch.zeros_like(
+        bn.running_mean)
+
+    factor = bn.weight / torch.sqrt(bn.running_var + bn.eps)
+    conv.weight = nn.Parameter(conv_w *
+                               factor.reshape([conv.out_channels, 1, 1, 1]))
+    conv.bias = nn.Parameter((conv_b - bn.running_mean) * factor + bn.bias)
+    return conv
+
+
+def fuse_module(m):
+    last_conv = None
+    last_conv_name = None
+
+    for name, child in m.named_children():
+        if isinstance(child, (nn.BatchNorm2d, nn.SyncBatchNorm)):
+            if last_conv is None:  # only fuse BN that is after Conv
+                continue
+            fused_conv = fuse_conv_bn(last_conv, child)
+            m._modules[last_conv_name] = fused_conv
+            # To reduce changes, set BN as Identity instead of deleting it.
+            m._modules[name] = nn.Identity()
+            last_conv = None
+        elif isinstance(child, nn.Conv2d):
+            last_conv = child
+            last_conv_name = name
+        else:
+            fuse_module(child)
+    return m
+
+
+def parse_args():
+    parser = argparse.ArgumentParser(
+        description='fuse Conv and BN layers in a model')
+    parser.add_argument('config', help='config file path')
+    parser.add_argument('checkpoint', help='checkpoint file path')
+    parser.add_argument('out', help='output path of the converted model')
+    args = parser.parse_args()
+    return args
+
+
+def main():
+    args = parse_args()
+    # build the model from a config file and a checkpoint file
+    model = init_model(args.config, args.checkpoint)
+    # fuse conv and bn layers of the model
+    fused_model = fuse_module(model)
+    save_checkpoint(fused_model, args.out)
+
+
+if __name__ == '__main__':
+    main()
--- a/tools/kmeans/kmeans_det.py
+++ b/tools/kmeans/kmeans_det.py
+import os
+import pickle
+from tqdm import tqdm
+
+import numpy as np
+import matplotlib.pyplot as plt
+from sklearn.cluster import KMeans
+
+import mmcv
+
+os.makedirs('data/kmeans', exist_ok=True)
+os.makedirs('vis/kmeans', exist_ok=True)
+
+K = 900
+DIS_THRESH = 55
+
+fp = 'data/infos/mini/nuscenes_infos_train.pkl'
+data = mmcv.load(fp)
+data_infos = list(sorted(data["infos"], key=lambda e: e["timestamp"]))
+center = []
+for idx in tqdm(range(len(data_infos))):
+    boxes = data_infos[idx]['gt_boxes'][:,:3]
+    if len(boxes) == 0:
+        continue
+    distance = np.linalg.norm(boxes[:, :2], axis=1)
+    center.append(boxes[distance < DIS_THRESH])
+center = np.concatenate(center, axis=0)
+print("start clustering, may take a few minutes.")
+cluster = KMeans(n_clusters=K).fit(center).cluster_centers_
+plt.scatter(cluster[:,0], cluster[:,1])
+plt.savefig(f'vis/kmeans/det_anchor_{K}', bbox_inches='tight')
+others = np.array([1,1,1,1,0,0,0,0])[np.newaxis].repeat(K, axis=0)
+cluster = np.concatenate([cluster, others], axis=1)
+np.save(f'data/kmeans/kmeans_det_{K}.npy', cluster)
\ No newline at end of file
--- a/tools/kmeans/kmeans_map.py
+++ b/tools/kmeans/kmeans_map.py
+import os
+import pickle
+from tqdm import tqdm
+
+import numpy as np
+import matplotlib.pyplot as plt
+from sklearn.cluster import KMeans
+
+import mmcv
+
+K = 100
+num_sample = 20
+
+fp = 'data/infos/mini/nuscenes_infos_train.pkl'
+data = mmcv.load(fp)
+data_infos = list(sorted(data["infos"], key=lambda e: e["timestamp"]))
+center = []
+for idx in tqdm(range(len(data_infos))):
+    for cls, geoms in data_infos[idx]["map_annos"].items():
+        for geom in geoms:  
+            center.append(geom.mean(axis=0))
+center = np.stack(center, axis=0)
+center = KMeans(n_clusters=K).fit(center).cluster_centers_
+delta_y = np.linspace(-4, 4, num_sample)
+delta_x = np.zeros([num_sample])
+delta = np.stack([delta_x, delta_y], axis=-1)
+vecs = center[:, np.newaxis] + delta[np.newaxis]
+
+for i in range(K):
+    x = vecs[i, :, 0]
+    y = vecs[i, :, 1]
+    plt.plot(x, y, linewidth=1, marker='o', linestyle='-', markersize=2)
+plt.savefig(f'vis/kmeans/map_anchor_{K}', bbox_inches='tight')
+np.save(f'data/kmeans/kmeans_map_{K}.npy', vecs)
\ No newline at end of file
--- a/tools/kmeans/kmeans_motion.py
+++ b/tools/kmeans/kmeans_motion.py
+import os
+import pickle
+from tqdm import tqdm
+
+import numpy as np
+import matplotlib.pyplot as plt
+from sklearn.cluster import KMeans
+
+import mmcv
+
+CLASSES = [
+    "car",
+    "truck",
+    "construction_vehicle",
+    "bus",
+    "trailer",
+    "barrier",
+    "motorcycle",
+    "bicycle",
+    "pedestrian",
+    "traffic_cone",
+]
+
+def lidar2agent(trajs_offset, boxes):
+    origin = np.zeros((trajs_offset.shape[0], 1, 2), dtype=np.float32)
+    trajs_offset = np.concatenate([origin, trajs_offset], axis=1)
+    trajs = trajs_offset.cumsum(axis=1)
+    yaws = - boxes[:, 6]
+    rot_sin = np.sin(yaws)
+    rot_cos = np.cos(yaws)
+    rot_mat_T = np.stack(
+        [
+            np.stack([rot_cos, rot_sin]),
+            np.stack([-rot_sin, rot_cos]),
+        ]
+    )
+    trajs_new = np.einsum('aij,jka->aik', trajs, rot_mat_T)
+    trajs_new = trajs_new[:, 1:]
+    return trajs_new
+
+K = 6
+DIS_THRESH = 55
+
+fp = 'data/infos/mini/nuscenes_infos_train.pkl'
+data = mmcv.load(fp)
+data_infos = list(sorted(data["infos"], key=lambda e: e["timestamp"]))
+intention = dict()
+for i in range(len(CLASSES)):
+    intention[i] = []
+for idx in tqdm(range(len(data_infos))):
+    info = data_infos[idx]
+    boxes = info['gt_boxes']
+    names = info['gt_names']
+    fut_masks = info['gt_agent_fut_masks']
+    trajs = info['gt_agent_fut_trajs']
+    velos = info['gt_velocity']
+    labels = []
+    for cat in names:
+        if cat in CLASSES:
+            labels.append(CLASSES.index(cat))
+        else:
+            labels.append(-1)
+    labels = np.array(labels)
+    if len(boxes) == 0:
+        continue    
+    for i in range(len(CLASSES)):
+        cls_mask = (labels == i)
+        box_cls = boxes[cls_mask]
+        fut_masks_cls = fut_masks[cls_mask]
+        trajs_cls = trajs[cls_mask]
+        velos_cls = velos[cls_mask]
+
+        distance = np.linalg.norm(box_cls[:, :2], axis=1)
+        mask = np.logical_and(
+            fut_masks_cls.sum(axis=1) == 12,
+            distance < DIS_THRESH,
+        )
+        trajs_cls = trajs_cls[mask]
+        box_cls = box_cls[mask]
+        velos_cls = velos_cls[mask]
+
+        trajs_agent = lidar2agent(trajs_cls, box_cls)
+        if trajs_agent.shape[0] == 0:
+            continue
+        intention[i].append(trajs_agent)
+
+clusters = []
+for i in range(len(CLASSES)):
+    intention_cls = np.concatenate(intention[i], axis=0).reshape(-1, 24)
+    if intention_cls.shape[0] < K:
+        continue
+    cluster = KMeans(n_clusters=K).fit(intention_cls).cluster_centers_
+    cluster = cluster.reshape(-1, 12, 2)
+    clusters.append(cluster)
+    for j in range(K):
+        plt.scatter(cluster[j, :, 0], cluster[j, :,1])
+    plt.savefig(f'vis/kmeans/motion_intention_{CLASSES[i]}_{K}', bbox_inches='tight')
+    plt.close()
+
+clusters = np.stack(clusters, axis=0)
+np.save(f'data/kmeans/kmeans_motion_{K}.npy', clusters)
\ No newline at end of file
--- a/tools/kmeans/kmeans_plan.py
+++ b/tools/kmeans/kmeans_plan.py
+import os
+import pickle
+from tqdm import tqdm
+
+import numpy as np
+import matplotlib.pyplot as plt
+from sklearn.cluster import KMeans
+
+import mmcv
+
+K = 6
+
+fp = 'data/infos/mini/nuscenes_infos_train.pkl'
+data = mmcv.load(fp)
+data_infos = list(sorted(data["infos"], key=lambda e: e["timestamp"]))
+navi_trajs = [[], [], []]
+for idx in tqdm(range(len(data_infos))):
+    info = data_infos[idx]
+    plan_traj = info['gt_ego_fut_trajs'].cumsum(axis=-2)
+    plan_mask = info['gt_ego_fut_masks']
+    cmd = info['gt_ego_fut_cmd'].astype(np.int32)
+    cmd = cmd.argmax(axis=-1)
+    if not plan_mask.sum() == 6:
+        continue
+    navi_trajs[cmd].append(plan_traj)
+
+clusters = []
+clusters.append(np.zeros((6, 6, 2)))
+for trajs in navi_trajs[1:]:
+# for trajs in navi_trajs:
+    trajs = np.concatenate(trajs, axis=0).reshape(-1, 12)
+    cluster = KMeans(n_clusters=K).fit(trajs).cluster_centers_
+    cluster = cluster.reshape(-1, 6, 2)
+    clusters.append(cluster)
+    for j in range(K):
+        plt.scatter(cluster[j, :, 0], cluster[j, :,1])
+plt.savefig(f'vis/kmeans/plan_{K}', bbox_inches='tight')
+plt.close()
+
+clusters = np.stack(clusters, axis=0)
+np.save(f'data/kmeans/kmeans_plan_{K}.npy', clusters)
\ No newline at end of file
--- a/tools/test.py
+++ b/tools/test.py
+# Copyright (c) OpenMMLab. All rights reserved.
+import argparse
+import mmcv
+import os
+from os import path as osp
+
+import torch
+import warnings
+from mmcv import Config, DictAction
+from mmcv.cnn import fuse_conv_bn
+from mmcv.parallel import MMDataParallel, MMDistributedDataParallel
+from mmcv.runner import (
+    get_dist_info,
+    init_dist,
+    load_checkpoint,
+    wrap_fp16_model,
+)
+
+from mmdet.apis import single_gpu_test, multi_gpu_test, set_random_seed
+from mmdet.datasets import replace_ImageToTensor, build_dataset
+from mmdet.datasets import build_dataloader as build_dataloader_origin
+from mmdet.models import build_detector
+
+from projects.mmdet3d_plugin.datasets.builder import build_dataloader
+from projects.mmdet3d_plugin.apis.test import custom_multi_gpu_test
+
+
+def parse_args():
+    parser = argparse.ArgumentParser(
+        description="MMDet test (and eval) a model"
+    )
+    parser.add_argument("config", help="test config file path")
+    parser.add_argument("checkpoint", help="checkpoint file")
+    parser.add_argument("--out", help="output result file in pickle format")
+    parser.add_argument(
+        "--fuse-conv-bn",
+        action="store_true",
+        help="Whether to fuse conv and bn, this will slightly increase"
+        "the inference speed",
+    )
+    parser.add_argument(
+        "--format-only",
+        action="store_true",
+        help="Format the output results without perform evaluation. It is"
+        "useful when you want to format the result to a specific format and "
+        "submit it to the test server",
+    )
+    parser.add_argument(
+        "--eval",
+        type=str,
+        nargs="+",
+        help='evaluation metrics, which depends on the dataset, e.g., "bbox",'
+        ' "segm", "proposal" for COCO, and "mAP", "recall" for PASCAL VOC',
+    )
+    parser.add_argument("--show", action="store_true", help="show results")
+    parser.add_argument(
+        "--show-dir", help="directory where results will be saved"
+    )
+    parser.add_argument(
+        "--gpu-collect",
+        action="store_true",
+        help="whether to use gpu to collect results.",
+    )
+    parser.add_argument(
+        "--tmpdir",
+        help="tmp directory used for collecting results from multiple "
+        "workers, available when gpu-collect is not specified",
+    )
+    parser.add_argument("--seed", type=int, default=0, help="random seed")
+    parser.add_argument(
+        "--deterministic",
+        action="store_true",
+        help="whether to set deterministic options for CUDNN backend.",
+    )
+    parser.add_argument(
+        "--cfg-options",
+        nargs="+",
+        action=DictAction,
+        help="override some settings in the used config, the key-value pair "
+        "in xxx=yyy format will be merged into config file. If the value to "
+        'be overwritten is a list, it should be like key="[a,b]" or key=a,b '
+        'It also allows nested list/tuple values, e.g. key="[(a,b),(c,d)]" '
+        "Note that the quotation marks are necessary and that no white space "
+        "is allowed.",
+    )
+    parser.add_argument(
+        "--options",
+        nargs="+",
+        action=DictAction,
+        help="custom options for evaluation, the key-value pair in xxx=yyy "
+        "format will be kwargs for dataset.evaluate() function (deprecate), "
+        "change to --eval-options instead.",
+    )
+    parser.add_argument(
+        "--eval-options",
+        nargs="+",
+        action=DictAction,
+        help="custom options for evaluation, the key-value pair in xxx=yyy "
+        "format will be kwargs for dataset.evaluate() function",
+    )
+    parser.add_argument(
+        "--launcher",
+        choices=["none", "pytorch", "slurm", "mpi"],
+        default="none",
+        help="job launcher",
+    )
+    parser.add_argument("--local_rank", type=int, default=0)
+    parser.add_argument("--result_file", type=str, default=None)
+    parser.add_argument("--show_only", action="store_true")
+    args = parser.parse_args()
+    if "LOCAL_RANK" not in os.environ:
+        os.environ["LOCAL_RANK"] = str(args.local_rank)
+
+    if args.options and args.eval_options:
+        raise ValueError(
+            "--options and --eval-options cannot be both specified, "
+            "--options is deprecated in favor of --eval-options"
+        )
+    if args.options:
+        warnings.warn("--options is deprecated in favor of --eval-options")
+        args.eval_options = args.options
+    return args
+
+
+def main():
+    args = parse_args()
+
+    assert (
+        args.out or args.eval or args.format_only or args.show or args.show_dir
+    ), (
+        "Please specify at least one operation (save/eval/format/show the "
+        'results / save the results) with the argument "--out", "--eval"'
+        ', "--format-only", "--show" or "--show-dir"'
+    )
+
+    if args.eval and args.format_only:
+        raise ValueError("--eval and --format_only cannot be both specified")
+
+    if args.out is not None and not args.out.endswith((".pkl", ".pickle")):
+        raise ValueError("The output file must be a pkl file.")
+
+    cfg = Config.fromfile(args.config)
+    if args.cfg_options is not None:
+        cfg.merge_from_dict(args.cfg_options)
+    # import modules from string list.
+    if cfg.get("custom_imports", None):
+        from mmcv.utils import import_modules_from_strings
+
+        import_modules_from_strings(**cfg["custom_imports"])
+
+    # import modules from plguin/xx, registry will be updated
+    if hasattr(cfg, "plugin"):
+        if cfg.plugin:
+            import importlib
+
+            if hasattr(cfg, "plugin_dir"):
+                plugin_dir = cfg.plugin_dir
+                _module_dir = os.path.dirname(plugin_dir)
+                _module_dir = _module_dir.split("/")
+                _module_path = _module_dir[0]
+
+                for m in _module_dir[1:]:
+                    _module_path = _module_path + "." + m
+                print(_module_path)
+                plg_lib = importlib.import_module(_module_path)
+            else:
+                # import dir is the dirpath for the config file
+                _module_dir = os.path.dirname(args.config)
+                _module_dir = _module_dir.split("/")
+                _module_path = _module_dir[0]
+                for m in _module_dir[1:]:
+                    _module_path = _module_path + "." + m
+                print(_module_path)
+                plg_lib = importlib.import_module(_module_path)
+
+    # set cudnn_benchmark
+    if cfg.get("cudnn_benchmark", False):
+        torch.backends.cudnn.benchmark = True
+
+    cfg.model.pretrained = None
+    # in case the test dataset is concatenated
+    samples_per_gpu = 1
+    if isinstance(cfg.data.test, dict):
+        cfg.data.test.test_mode = True
+        samples_per_gpu = cfg.data.test.pop("samples_per_gpu", 1)
+        if samples_per_gpu > 1:
+            # Replace 'ImageToTensor' to 'DefaultFormatBundle'
+            cfg.data.test.pipeline = replace_ImageToTensor(
+                cfg.data.test.pipeline
+            )
+    elif isinstance(cfg.data.test, list):
+        for ds_cfg in cfg.data.test:
+            ds_cfg.test_mode = True
+        samples_per_gpu = max(
+            [ds_cfg.pop("samples_per_gpu", 1) for ds_cfg in cfg.data.test]
+        )
+        if samples_per_gpu > 1:
+            for ds_cfg in cfg.data.test:
+                ds_cfg.pipeline = replace_ImageToTensor(ds_cfg.pipeline)
+
+    # init distributed env first, since logger depends on the dist info.
+    if args.launcher == "none":
+        distributed = False
+    else:
+        distributed = True
+        init_dist(args.launcher, **cfg.dist_params)
+
+    # set random seeds
+    if args.seed is not None:
+        set_random_seed(args.seed, deterministic=args.deterministic)
+
+    # set work dir
+    if cfg.get('work_dir', None) is None:
+        # use config filename as default work_dir if cfg.work_dir is None
+        cfg.work_dir = osp.join('./work_dirs',
+                                osp.splitext(osp.basename(args.config))[0]) 
+    mmcv.mkdir_or_exist(osp.abspath(cfg.work_dir))
+    cfg.data.test.work_dir = cfg.work_dir
+    print('work_dir: ',cfg.work_dir)
+
+    # build the dataloader
+    dataset = build_dataset(cfg.data.test)
+    print("distributed:", distributed)
+    if distributed:
+        data_loader = build_dataloader(
+            dataset,
+            samples_per_gpu=samples_per_gpu,
+            workers_per_gpu=cfg.data.workers_per_gpu,
+            dist=distributed,
+            shuffle=False,
+            nonshuffler_sampler=dict(type="DistributedSampler"),
+        )
+    else:
+        data_loader = build_dataloader_origin(
+            dataset,
+            samples_per_gpu=samples_per_gpu,
+            workers_per_gpu=cfg.data.workers_per_gpu,
+            dist=distributed,
+            shuffle=False,
+        )
+
+    # build the model and load checkpoint
+    cfg.model.train_cfg = None
+    model = build_detector(cfg.model, test_cfg=cfg.get("test_cfg"))
+    # model = build_model(cfg.model, test_cfg=cfg.get("test_cfg"))
+    fp16_cfg = cfg.get("fp16", None)
+    if fp16_cfg is not None:
+        wrap_fp16_model(model)
+    checkpoint = load_checkpoint(model, args.checkpoint, map_location="cpu")
+    if args.fuse_conv_bn:
+        model = fuse_conv_bn(model)
+    # old versions did not save class info in checkpoints, this walkaround is
+    # for backward compatibility
+    if "CLASSES" in checkpoint.get("meta", {}):
+        model.CLASSES = checkpoint["meta"]["CLASSES"]
+    else:
+        model.CLASSES = dataset.CLASSES
+    # palette for visualization in segmentation tasks
+    if "PALETTE" in checkpoint.get("meta", {}):
+        model.PALETTE = checkpoint["meta"]["PALETTE"]
+    elif hasattr(dataset, "PALETTE"):
+        # segmentation dataset has `PALETTE` attribute
+        model.PALETTE = dataset.PALETTE
+
+    if args.result_file is not None:
+        # outputs = torch.load(args.result_file)
+        outputs = mmcv.load(args.result_file)
+    elif not distributed:
+        model = MMDataParallel(model, device_ids=[0])
+        outputs = single_gpu_test(model, data_loader, args.show, args.show_dir)
+    else:
+        model = MMDistributedDataParallel(
+            model.cuda(),
+            device_ids=[torch.cuda.current_device()],
+            broadcast_buffers=False,
+        )
+        outputs = custom_multi_gpu_test(
+            model, data_loader, args.tmpdir, args.gpu_collect
+        )
+
+    rank, _ = get_dist_info()
+    if rank == 0:
+        if args.out:
+            print(f"\nwriting results to {args.out}")
+            mmcv.dump(outputs, args.out)
+        kwargs = {} if args.eval_options is None else args.eval_options
+        if args.show_only:
+            eval_kwargs = cfg.get("evaluation", {}).copy()
+            # hard-code way to remove EvalHook args
+            for key in [
+                "interval",
+                "tmpdir",
+                "start",
+                "gpu_collect",
+                "save_best",
+                "rule",
+            ]:
+                eval_kwargs.pop(key, None)
+            eval_kwargs.update(kwargs)
+            dataset.show(outputs, show=True, **eval_kwargs)
+        elif args.format_only:
+            dataset.format_results(outputs, **kwargs)
+        elif args.eval:
+            eval_kwargs = cfg.get("evaluation", {}).copy()
+            # hard-code way to remove EvalHook args
+            for key in [
+                "interval",
+                "tmpdir",
+                "start",
+                "gpu_collect",
+                "save_best",
+                "rule",
+            ]:
+                eval_kwargs.pop(key, None)
+            eval_kwargs.update(dict(metric=args.eval, **kwargs))
+            print(eval_kwargs)
+            results_dict = dataset.evaluate(outputs, **eval_kwargs)
+            print(results_dict)
+
+
+if __name__ == "__main__":
+    torch.multiprocessing.set_start_method(
+        "fork"
+    )  # use fork workers_per_gpu can be > 1
+    main()
--- a/tools/train copy.py
+++ b/tools/train copy.py
+# Copyright (c) OpenMMLab. All rights reserved.
+from __future__ import division
+import sys
+import os
+
+print(sys.executable, os.path.abspath(__file__))
+# import init_paths # for conda pkgs submitting method
+import argparse
+import copy
+import mmcv
+import time
+import torch
+import warnings
+from mmcv import Config, DictAction
+from mmcv.runner import get_dist_info, init_dist
+from os import path as osp
+
+from mmdet import __version__ as mmdet_version
+from mmdet.apis import train_detector
+from mmdet.datasets import build_dataset
+from mmdet.models import build_detector
+from mmdet.utils import collect_env, get_root_logger
+from mmdet.apis import set_random_seed
+from torch import distributed as dist
+from datetime import timedelta
+
+import cv2
+
+cv2.setNumThreads(8)
+
+
+def parse_args():
+    parser = argparse.ArgumentParser(description="Train a detector")
+    parser.add_argument("config", help="train config file path")
+    parser.add_argument("--work-dir", help="the dir to save logs and models")
+    parser.add_argument(
+        "--resume-from", help="the checkpoint file to resume from"
+    )
+    parser.add_argument(
+        "--no-validate",
+        action="store_true",
+        help="whether not to evaluate the checkpoint during training",
+    )
+    group_gpus = parser.add_mutually_exclusive_group()
+    group_gpus.add_argument(
+        "--gpus",
+        type=int,
+        help="number of gpus to use "
+        "(only applicable to non-distributed training)",
+    )
+    group_gpus.add_argument(
+        "--gpu-ids",
+        type=int,
+        nargs="+",
+        help="ids of gpus to use "
+        "(only applicable to non-distributed training)",
+    )
+    parser.add_argument("--seed", type=int, default=0, help="random seed")
+    parser.add_argument(
+        "--deterministic",
+        action="store_true",
+        help="whether to set deterministic options for CUDNN backend.",
+    )
+    parser.add_argument(
+        "--options",
+        nargs="+",
+        action=DictAction,
+        help="override some settings in the used config, the key-value pair "
+        "in xxx=yyy format will be merged into config file (deprecate), "
+        "change to --cfg-options instead.",
+    )
+    parser.add_argument(
+        "--cfg-options",
+        nargs="+",
+        action=DictAction,
+        help="override some settings in the used config, the key-value pair "
+        "in xxx=yyy format will be merged into config file. If the value to "
+        'be overwritten is a list, it should be like key="[a,b]" or key=a,b '
+        'It also allows nested list/tuple values, e.g. key="[(a,b),(c,d)]" '
+        "Note that the quotation marks are necessary and that no white space "
+        "is allowed.",
+    )
+    parser.add_argument(
+        "--dist-url",
+        type=str,
+        default="auto",
+        help="dist url for init process, such as tcp://localhost:8000",
+    )
+    parser.add_argument("--gpus-per-machine", type=int, default=8)
+    parser.add_argument(
+        "--launcher",
+        choices=["none", "pytorch", "slurm", "mpi", "mpi_nccl"],
+        default="none",
+        help="job launcher",
+    )
+    parser.add_argument("--local_rank", "--local-rank", type=int, default=0)
+    parser.add_argument(
+        "--autoscale-lr",
+        action="store_true",
+        help="automatically scale lr with the number of gpus",
+    )
+    args = parser.parse_args()
+    if "LOCAL_RANK" not in os.environ:
+        os.environ["LOCAL_RANK"] = str(args.local_rank)
+
+    if args.options and args.cfg_options:
+        raise ValueError(
+            "--options and --cfg-options cannot be both specified, "
+            "--options is deprecated in favor of --cfg-options"
+        )
+    if args.options:
+        warnings.warn("--options is deprecated in favor of --cfg-options")
+        args.cfg_options = args.options
+
+    return args
+
+
+def main():
+    args = parse_args()
+
+    cfg = Config.fromfile(args.config)
+    if args.cfg_options is not None:
+        cfg.merge_from_dict(args.cfg_options)
+    # import modules from string list.
+    if cfg.get("custom_imports", None):
+        from mmcv.utils import import_modules_from_strings
+
+        import_modules_from_strings(**cfg["custom_imports"])
+
+    # import modules from plguin/xx, registry will be updated
+    if hasattr(cfg, "plugin"):
+        if cfg.plugin:
+            import importlib
+
+            if hasattr(cfg, "plugin_dir"):
+                plugin_dir = cfg.plugin_dir
+                _module_dir = os.path.dirname(plugin_dir)
+                _module_dir = _module_dir.split("/")
+                _module_path = _module_dir[0]
+
+                for m in _module_dir[1:]:
+                    _module_path = _module_path + "." + m
+                print(_module_path)
+                plg_lib = importlib.import_module(_module_path)
+            else:
+                # import dir is the dirpath for the config file
+                _module_dir = os.path.dirname(args.config)
+                _module_dir = _module_dir.split("/")
+                _module_path = _module_dir[0]
+                for m in _module_dir[1:]:
+                    _module_path = _module_path + "." + m
+                print(_module_path)
+                plg_lib = importlib.import_module(_module_path)
+            from projects.mmdet3d_plugin.apis.train import custom_train_model
+
+    # set cudnn_benchmark
+    if cfg.get("cudnn_benchmark", False):
+        torch.backends.cudnn.benchmark = True
+
+    # work_dir is determined in this priority: CLI > segment in file > filename
+    if args.work_dir is not None:
+        # update configs according to CLI args if args.work_dir is not None
+        cfg.work_dir = args.work_dir
+    elif cfg.get("work_dir", None) is None:
+        # use config filename as default work_dir if cfg.work_dir is None
+        cfg.work_dir = osp.join(
+            "./work_dirs", osp.splitext(osp.basename(args.config))[0]
+        )
+    if args.resume_from is not None:
+        cfg.resume_from = args.resume_from
+    if args.gpu_ids is not None:
+        cfg.gpu_ids = args.gpu_ids
+    else:
+        cfg.gpu_ids = range(1) if args.gpus is None else range(args.gpus)
+
+    if args.autoscale_lr:
+        # apply the linear scaling rule (https://arxiv.org/abs/1706.02677)
+        cfg.optimizer["lr"] = cfg.optimizer["lr"] * len(cfg.gpu_ids) / 8
+
+    # init distributed env first, since logger depends on the dist info.
+    if args.launcher == "none":
+        distributed = False
+    elif args.launcher == "mpi_nccl":
+        distributed = True
+
+        import mpi4py.MPI as MPI
+
+        comm = MPI.COMM_WORLD
+        mpi_local_rank = comm.Get_rank()
+        mpi_world_size = comm.Get_size()
+        print(
+            "MPI local_rank=%d, world_size=%d"
+            % (mpi_local_rank, mpi_world_size)
+        )
+
+        # num_gpus = torch.cuda.device_count()
+        device_ids_on_machines = list(range(args.gpus_per_machine))
+        str_ids = list(map(str, device_ids_on_machines))
+        os.environ["CUDA_VISIBLE_DEVICES"] = ",".join(str_ids)
+        torch.cuda.set_device(mpi_local_rank % args.gpus_per_machine)
+
+        dist.init_process_group(
+            backend="nccl",
+            init_method=args.dist_url,
+            world_size=mpi_world_size,
+            rank=mpi_local_rank,
+            timeout=timedelta(seconds=3600),
+        )
+
+        cfg.gpu_ids = range(mpi_world_size)
+        print("cfg.gpu_ids:", cfg.gpu_ids)
+    else:
+        distributed = True
+        init_dist(
+            args.launcher, timeout=timedelta(seconds=3600), **cfg.dist_params
+        )
+        # re-set gpu_ids with distributed training mode
+        _, world_size = get_dist_info()
+        cfg.gpu_ids = range(world_size)
+
+    # create work_dir
+    mmcv.mkdir_or_exist(osp.abspath(cfg.work_dir))
+    # dump config
+    cfg.dump(osp.join(cfg.work_dir, osp.basename(args.config)))
+    # init the logger before other steps
+    timestamp = time.strftime("%Y%m%d_%H%M%S", time.localtime())
+    log_file = osp.join(cfg.work_dir, f"{timestamp}.log")
+    # specify logger name, if we still use 'mmdet', the output info will be
+    # filtered and won't be saved in the log_file
+    # TODO: ugly workaround to judge whether we are training det or seg model
+    logger = get_root_logger(
+        log_file=log_file, log_level=cfg.log_level
+    )
+
+    # init the meta dict to record some important information such as
+    # environment info and seed, which will be logged
+    meta = dict()
+    # log env info
+    env_info_dict = collect_env()
+    env_info = "\n".join([(f"{k}: {v}") for k, v in env_info_dict.items()])
+    dash_line = "-" * 60 + "\n"
+    logger.info(
+        "Environment info:\n" + dash_line + env_info + "\n" + dash_line
+    )
+    meta["env_info"] = env_info
+    meta["config"] = cfg.pretty_text
+
+    # log some basic info
+    logger.info(f"Distributed training: {distributed}")
+    logger.info(f"Config:\n{cfg.pretty_text}")
+
+    # set random seeds
+    if args.seed is not None:
+        logger.info(
+            f"Set random seed to {args.seed}, "
+            f"deterministic: {args.deterministic}"
+        )
+        set_random_seed(args.seed, deterministic=args.deterministic)
+    cfg.seed = args.seed
+    meta["seed"] = args.seed
+    meta["exp_name"] = osp.basename(args.config)
+
+    model = build_detector(
+        cfg.model, train_cfg=cfg.get("train_cfg"), test_cfg=cfg.get("test_cfg")
+    )
+    model.init_weights()
+    logger.info(f"Model:\n{model}")
+
+    cfg.data.train.work_dir = cfg.work_dir
+    cfg.data.val.work_dir = cfg.work_dir
+    datasets = [build_dataset(cfg.data.train)]
+
+    if len(cfg.workflow) == 2:
+        val_dataset = copy.deepcopy(cfg.data.val)
+        # in case we use a dataset wrapper
+        if "dataset" in cfg.data.train:
+            val_dataset.pipeline = cfg.data.train.dataset.pipeline
+        else:
+            val_dataset.pipeline = cfg.data.train.pipeline
+        # set test_mode=False here in deep copied config
+        # which do not affect AP/AR calculation later
+        # refer to https://mmdetection3d.readthedocs.io/en/latest/tutorials/customize_runtime.html#customize-workflow  # noqa
+        val_dataset.test_mode = False
+        datasets.append(build_dataset(val_dataset))
+    if cfg.checkpoint_config is not None:
+        # save mmdet version, config file content and class names in
+        # checkpoints as meta data
+        cfg.checkpoint_config.meta = dict(
+            mmdet_version=mmdet_version,
+            config=cfg.pretty_text,
+            CLASSES=datasets[0].CLASSES,
+        )
+    # add an attribute for visualization convenience
+    model.CLASSES = datasets[0].CLASSES
+    if hasattr(cfg, "plugin"):
+        custom_train_model(
+            model,
+            datasets,
+            cfg,
+            distributed=distributed,
+            validate=(not args.no_validate),
+            timestamp=timestamp,
+            meta=meta,
+        )
+    else:
+        train_detector(
+            model,
+            datasets,
+            cfg,
+            distributed=distributed,
+            validate=(not args.no_validate),
+            timestamp=timestamp,
+            meta=meta,
+        )
+
+
+if __name__ == "__main__":
+    torch.multiprocessing.set_start_method(
+        "fork"
+    )  # use fork workers_per_gpu can be > 1
+    main()
--- a/tools/train.py
+++ b/tools/train.py
+# Copyright (c) OpenMMLab. All rights reserved.
+from __future__ import division
+import sys
+import os
+
+print(sys.executable, os.path.abspath(__file__))
+# import init_paths # for conda pkgs submitting method
+import argparse
+import copy
+import mmcv
+import time
+import torch
+import warnings
+from mmcv import Config, DictAction
+from mmcv.runner import get_dist_info, init_dist
+from os import path as osp
+
+from mmdet import __version__ as mmdet_version
+from mmdet.apis import train_detector
+from mmdet.datasets import build_dataset
+from mmdet.models import build_detector
+from mmdet.utils import collect_env, get_root_logger
+from mmdet.apis import set_random_seed
+from torch import distributed as dist
+from datetime import timedelta
+
+import cv2
+
+cv2.setNumThreads(8)
+
+
+def parse_args():
+    parser = argparse.ArgumentParser(description="Train a detector")
+    parser.add_argument("config", help="train config file path")
+    parser.add_argument("--work-dir", help="the dir to save logs and models")
+    parser.add_argument(
+        "--resume-from", help="the checkpoint file to resume from"
+    )
+    parser.add_argument(
+        "--no-validate",
+        action="store_true",
+        help="whether not to evaluate the checkpoint during training",
+    )
+    group_gpus = parser.add_mutually_exclusive_group()
+    group_gpus.add_argument(
+        "--gpus",
+        type=int,
+        help="number of gpus to use "
+        "(only applicable to non-distributed training)",
+    )
+    group_gpus.add_argument(
+        "--gpu-ids",
+        type=int,
+        nargs="+",
+        help="ids of gpus to use "
+        "(only applicable to non-distributed training)",
+    )
+    parser.add_argument("--seed", type=int, default=0, help="random seed")
+    parser.add_argument(
+        "--deterministic",
+        action="store_true",
+        help="whether to set deterministic options for CUDNN backend.",
+    )
+    parser.add_argument(
+        "--options",
+        nargs="+",
+        action=DictAction,
+        help="override some settings in the used config, the key-value pair "
+        "in xxx=yyy format will be merged into config file (deprecate), "
+        "change to --cfg-options instead.",
+    )
+    parser.add_argument(
+        "--cfg-options",
+        nargs="+",
+        action=DictAction,
+        help="override some settings in the used config, the key-value pair "
+        "in xxx=yyy format will be merged into config file. If the value to "
+        'be overwritten is a list, it should be like key="[a,b]" or key=a,b '
+        'It also allows nested list/tuple values, e.g. key="[(a,b),(c,d)]" '
+        "Note that the quotation marks are necessary and that no white space "
+        "is allowed.",
+    )
+    parser.add_argument(
+        "--dist-url",
+        type=str,
+        default="auto",
+        help="dist url for init process, such as tcp://localhost:8000",
+    )
+    parser.add_argument("--gpus-per-machine", type=int, default=8)
+    parser.add_argument(
+        "--launcher",
+        choices=["none", "pytorch", "slurm", "mpi", "mpi_nccl"],
+        default="none",
+        help="job launcher",
+    )
+    parser.add_argument("--local_rank", "--local-rank", type=int, default=0)
+    parser.add_argument(
+        "--autoscale-lr",
+        action="store_true",
+        help="automatically scale lr with the number of gpus",
+    )
+    parser.add_argument(
+        '--enable-profiler',
+        action='store_true',
+        help='enable torch profiler during training'
+    )
+    args = parser.parse_args()
+    if "LOCAL_RANK" not in os.environ:
+        os.environ["LOCAL_RANK"] = str(args.local_rank)
+
+    if args.options and args.cfg_options:
+        raise ValueError(
+            "--options and --cfg-options cannot be both specified, "
+            "--options is deprecated in favor of --cfg-options"
+        )
+    if args.options:
+        warnings.warn("--options is deprecated in favor of --cfg-options")
+        args.cfg_options = args.options
+
+    return args
+
+
+def main():
+    args = parse_args()
+
+    cfg = Config.fromfile(args.config)
+
+    cfg.enable_profiler = args.enable_profiler
+    if args.cfg_options is not None:
+        cfg.merge_from_dict(args.cfg_options)
+    # import modules from string list.
+    if cfg.get("custom_imports", None):
+        from mmcv.utils import import_modules_from_strings
+
+        import_modules_from_strings(**cfg["custom_imports"])
+
+    # import modules from plguin/xx, registry will be updated
+    if hasattr(cfg, "plugin"):
+        if cfg.plugin:
+            import importlib
+
+            if hasattr(cfg, "plugin_dir"):
+                plugin_dir = cfg.plugin_dir
+                _module_dir = os.path.dirname(plugin_dir)
+                _module_dir = _module_dir.split("/")
+                _module_path = _module_dir[0]
+
+                for m in _module_dir[1:]:
+                    _module_path = _module_path + "." + m
+                print(_module_path)
+                plg_lib = importlib.import_module(_module_path)
+            else:
+                # import dir is the dirpath for the config file
+                _module_dir = os.path.dirname(args.config)
+                _module_dir = _module_dir.split("/")
+                _module_path = _module_dir[0]
+                for m in _module_dir[1:]:
+                    _module_path = _module_path + "." + m
+                print(_module_path)
+                plg_lib = importlib.import_module(_module_path)
+            from projects.mmdet3d_plugin.apis.train import custom_train_model
+
+    # set cudnn_benchmark
+    if cfg.get("cudnn_benchmark", False):
+        torch.backends.cudnn.benchmark = True
+
+    # work_dir is determined in this priority: CLI > segment in file > filename
+    if args.work_dir is not None:
+        # update configs according to CLI args if args.work_dir is not None
+        cfg.work_dir = args.work_dir
+    elif cfg.get("work_dir", None) is None:
+        # use config filename as default work_dir if cfg.work_dir is None
+        cfg.work_dir = osp.join(
+            "./work_dirs", osp.splitext(osp.basename(args.config))[0]
+        )
+    if args.resume_from is not None:
+        cfg.resume_from = args.resume_from
+    if args.gpu_ids is not None:
+        cfg.gpu_ids = args.gpu_ids
+    else:
+        cfg.gpu_ids = range(1) if args.gpus is None else range(args.gpus)
+
+    if args.autoscale_lr:
+        # apply the linear scaling rule (https://arxiv.org/abs/1706.02677)
+        cfg.optimizer["lr"] = cfg.optimizer["lr"] * len(cfg.gpu_ids) / 8
+
+    # init distributed env first, since logger depends on the dist info.
+    if args.launcher == "none":
+        distributed = False
+    elif args.launcher == "mpi_nccl":
+        distributed = True
+
+        import mpi4py.MPI as MPI
+
+        comm = MPI.COMM_WORLD
+        mpi_local_rank = comm.Get_rank()
+        mpi_world_size = comm.Get_size()
+        print(
+            "MPI local_rank=%d, world_size=%d"
+            % (mpi_local_rank, mpi_world_size)
+        )
+
+        # num_gpus = torch.cuda.device_count()
+        device_ids_on_machines = list(range(args.gpus_per_machine))
+        str_ids = list(map(str, device_ids_on_machines))
+        os.environ["CUDA_VISIBLE_DEVICES"] = ",".join(str_ids)
+        torch.cuda.set_device(mpi_local_rank % args.gpus_per_machine)
+
+        dist.init_process_group(
+            backend="nccl",
+            init_method=args.dist_url,
+            world_size=mpi_world_size,
+            rank=mpi_local_rank,
+            timeout=timedelta(seconds=3600),
+        )
+
+        cfg.gpu_ids = range(mpi_world_size)
+        print("cfg.gpu_ids:", cfg.gpu_ids)
+    else:
+        distributed = True
+        init_dist(
+            args.launcher, timeout=timedelta(seconds=3600), **cfg.dist_params
+        )
+        # re-set gpu_ids with distributed training mode
+        _, world_size = get_dist_info()
+        cfg.gpu_ids = range(world_size)
+
+    # create work_dir
+    mmcv.mkdir_or_exist(osp.abspath(cfg.work_dir))
+    # dump config
+    cfg.dump(osp.join(cfg.work_dir, osp.basename(args.config)))
+    # init the logger before other steps
+    timestamp = time.strftime("%Y%m%d_%H%M%S", time.localtime())
+    log_file = osp.join(cfg.work_dir, f"{timestamp}.log")
+    # specify logger name, if we still use 'mmdet', the output info will be
+    # filtered and won't be saved in the log_file
+    # TODO: ugly workaround to judge whether we are training det or seg model
+    logger = get_root_logger(
+        log_file=log_file, log_level=cfg.log_level
+    )
+
+    # init the meta dict to record some important information such as
+    # environment info and seed, which will be logged
+    meta = dict()
+    # log env info
+    env_info_dict = collect_env()
+    env_info = "\n".join([(f"{k}: {v}") for k, v in env_info_dict.items()])
+    dash_line = "-" * 60 + "\n"
+    logger.info(
+        "Environment info:\n" + dash_line + env_info + "\n" + dash_line
+    )
+    meta["env_info"] = env_info
+    meta["config"] = cfg.pretty_text
+
+    # log some basic info
+    logger.info(f"Distributed training: {distributed}")
+    logger.info(f"Config:\n{cfg.pretty_text}")
+
+    # set random seeds
+    if args.seed is not None:
+        logger.info(
+            f"Set random seed to {args.seed}, "
+            f"deterministic: {args.deterministic}"
+        )
+        set_random_seed(args.seed, deterministic=args.deterministic)
+    cfg.seed = args.seed
+    meta["seed"] = args.seed
+    meta["exp_name"] = osp.basename(args.config)
+
+    model = build_detector(
+        cfg.model, train_cfg=cfg.get("train_cfg"), test_cfg=cfg.get("test_cfg")
+    )
+    model.init_weights()
+    logger.info(f"Model:\n{model}")
+
+    cfg.data.train.work_dir = cfg.work_dir
+    cfg.data.val.work_dir = cfg.work_dir
+    datasets = [build_dataset(cfg.data.train)]
+
+    if len(cfg.workflow) == 2:
+        val_dataset = copy.deepcopy(cfg.data.val)
+        # in case we use a dataset wrapper
+        if "dataset" in cfg.data.train:
+            val_dataset.pipeline = cfg.data.train.dataset.pipeline
+        else:
+            val_dataset.pipeline = cfg.data.train.pipeline
+        # set test_mode=False here in deep copied config
+        # which do not affect AP/AR calculation later
+        # refer to https://mmdetection3d.readthedocs.io/en/latest/tutorials/customize_runtime.html#customize-workflow  # noqa
+        val_dataset.test_mode = False
+        datasets.append(build_dataset(val_dataset))
+    if cfg.checkpoint_config is not None:
+        # save mmdet version, config file content and class names in
+        # checkpoints as meta data
+        cfg.checkpoint_config.meta = dict(
+            mmdet_version=mmdet_version,
+            config=cfg.pretty_text,
+            CLASSES=datasets[0].CLASSES,
+        )
+    # add an attribute for visualization convenience
+    model.CLASSES = datasets[0].CLASSES
+    if hasattr(cfg, "plugin"):
+        torch.backends.cudnn.benchmark = True  # 启用自动寻找最优卷积算法
+        torch.backends.cudnn.deterministic = False  # 允许非确定性算法提升速度
+        custom_train_model(
+            model,
+            datasets,
+            cfg,
+            distributed=distributed,
+            validate=(not args.no_validate),
+            timestamp=timestamp,
+            meta=meta,
+        )
+    else:
+        train_detector(
+            model,
+            datasets,
+            cfg,
+            distributed=distributed,
+            validate=(not args.no_validate),
+            timestamp=timestamp,
+            meta=meta,
+        )
+
+
+if __name__ == "__main__":
+    torch.multiprocessing.set_start_method(
+        "fork"
+    )  # use fork workers_per_gpu can be > 1
+    main()
--- a/tools/visualization/bev_render.py
+++ b/tools/visualization/bev_render.py
+import os
+import numpy as np
+import cv2
+
+import matplotlib
+import matplotlib.pyplot as plt
+
+from projects.mmdet3d_plugin.datasets.utils import box3d_to_corners
+ 
+CMD_LIST = ['Turn Right', 'Turn Left', 'Go Straight']
+COLOR_VECTORS = ['cornflowerblue', 'royalblue', 'slategrey']
+SCORE_THRESH = 0.3
+MAP_SCORE_THRESH = 0.3
+color_mapping = np.asarray([
+    [0, 0, 0],
+    [255, 179, 0],
+    [128, 62, 117],
+    [255, 104, 0],
+    [166, 189, 215],
+    [193, 0, 32],
+    [206, 162, 98],
+    [129, 112, 102],
+    [0, 125, 52],
+    [246, 118, 142],
+    [0, 83, 138],
+    [255, 122, 92],
+    [83, 55, 122],
+    [255, 142, 0],
+    [179, 40, 81],
+    [244, 200, 0],
+    [127, 24, 13],
+    [147, 170, 0],
+    [89, 51, 21],
+    [241, 58, 19],
+    [35, 44, 22],
+    [112, 224, 255],
+    [70, 184, 160],
+    [153, 0, 255],
+    [71, 255, 0],
+    [255, 0, 163],
+    [255, 204, 0],
+    [0, 255, 235],
+    [255, 0, 235],
+    [255, 0, 122],
+    [255, 245, 0],
+    [10, 190, 212],
+    [214, 255, 0],
+    [0, 204, 255],
+    [20, 0, 255],
+    [255, 255, 0],
+    [0, 153, 255],
+    [0, 255, 204],
+    [41, 255, 0],
+    [173, 0, 255],
+    [0, 245, 255],
+    [71, 0, 255],
+    [0, 255, 184],
+    [0, 92, 255],
+    [184, 255, 0],
+    [255, 214, 0],
+    [25, 194, 194],
+    [92, 0, 255],
+    [220, 220, 220],
+    [255, 9, 92],
+    [112, 9, 255],
+    [8, 255, 214],
+    [255, 184, 6],
+    [10, 255, 71],
+    [255, 41, 10],
+    [7, 255, 255],
+    [224, 255, 8],
+    [102, 8, 255],
+    [255, 61, 6],
+    [255, 194, 7],
+    [0, 255, 20],
+    [255, 8, 41],
+    [255, 5, 153],
+    [6, 51, 255],
+    [235, 12, 255],
+    [160, 150, 20],
+    [0, 163, 255],
+    [140, 140, 140],
+    [250, 10, 15],
+    [20, 255, 0],
+]) / 255
+
+
+class BEVRender:
+    def __init__(
+        self, 
+        plot_choices,
+        out_dir,
+        xlim = 40,
+        ylim = 40,
+    ):
+        self.plot_choices = plot_choices
+        self.xlim = xlim
+        self.ylim = ylim
+        self.gt_dir = os.path.join(out_dir, "bev_gt")
+        self.pred_dir = os.path.join(out_dir, "bev_pred")
+        os.makedirs(self.gt_dir, exist_ok=True)
+        os.makedirs(self.pred_dir, exist_ok=True)
+
+    def reset_canvas(self):
+        plt.close()
+        self.fig, self.axes = plt.subplots(1, 1, figsize=(20, 20))
+        self.axes.set_xlim(- self.xlim, self.xlim)
+        self.axes.set_ylim(- self.ylim, self.ylim)
+        self.axes.axis('off')
+
+    def render(
+        self,
+        data, 
+        result,
+        index,
+    ):
+        self.reset_canvas()
+        self.draw_detection_gt(data)
+        self.draw_motion_gt(data)
+        self.draw_map_gt(data)
+        self.draw_planning_gt(data)
+        self._render_sdc_car()
+        self._render_command(data)
+        self._render_legend()
+        save_path_gt = os.path.join(self.gt_dir, str(index).zfill(4) + '.jpg')
+        self.save_fig(save_path_gt)
+
+        self.reset_canvas()
+        self.draw_detection_pred(result)
+        self.draw_track_pred(result)
+        self.draw_motion_pred(result)
+        self.draw_map_pred(result)
+        self.draw_planning_pred(data, result)
+        self._render_sdc_car()
+        self._render_command(data)
+        self._render_legend()
+        save_path_pred = os.path.join(self.pred_dir, str(index).zfill(4) + '.jpg')
+        self.save_fig(save_path_pred)
+
+        return save_path_gt, save_path_pred
+
+    def save_fig(self, filename):
+        plt.subplots_adjust(top=1, bottom=0, right=1, left=0,
+                            hspace=0, wspace=0)
+        plt.margins(0, 0)
+        plt.savefig(filename)
+
+    def draw_detection_gt(self, data):
+        if not self.plot_choices['det']:
+            return
+
+        for i in range(data['gt_labels_3d'].shape[0]):
+            label = data['gt_labels_3d'][i]
+            if label == -1: 
+                continue
+            color = color_mapping[i % len(color_mapping)]
+
+            # draw corners
+            corners = box3d_to_corners(data['gt_bboxes_3d'])[i, [0, 3, 7, 4, 0]]
+            x = corners[:, 0]
+            y = corners[:, 1]
+            self.axes.plot(x, y, color=color, linewidth=3, linestyle='-')
+
+            # draw line to indicate forward direction
+            forward_center = np.mean(corners[2:4], axis=0)
+            center = np.mean(corners[0:4], axis=0)
+            x = [forward_center[0], center[0]]
+            y = [forward_center[1], center[1]]
+            self.axes.plot(x, y, color=color, linewidth=3, linestyle='-')
+
+    def draw_detection_pred(self, result):
+        if not (self.plot_choices['draw_pred'] and self.plot_choices['det'] and "boxes_3d" in result):
+            return
+
+        bboxes = result['boxes_3d']
+        for i in range(result['labels_3d'].shape[0]):
+            score = result['scores_3d'][i]
+            if score < SCORE_THRESH: 
+                continue
+            color = color_mapping[result['instance_ids'][i] % len(color_mapping)]
+
+            # draw corners
+            corners = box3d_to_corners(bboxes)[i, [0, 3, 7, 4, 0]]
+            x = corners[:, 0]
+            y = corners[:, 1]
+            self.axes.plot(x, y, color=color, linewidth=3, linestyle='-')
+
+            # draw line to indicate forward direction
+            forward_center = np.mean(corners[2:4], axis=0)
+            center = np.mean(corners[0:4], axis=0)
+            x = [forward_center[0], center[0]]
+            y = [forward_center[1], center[1]]
+            self.axes.plot(x, y, color=color, linewidth=3, linestyle='-')
+
+    def draw_track_pred(self, result):
+        if not (self.plot_choices['draw_pred'] and self.plot_choices['track'] and "anchor_queue" in result):
+            return
+        
+        temp_bboxes = result["anchor_queue"]
+        period = result["period"]
+        bboxes = result['boxes_3d']
+        for i in range(result['labels_3d'].shape[0]):
+            score = result['scores_3d'][i]
+            if score < SCORE_THRESH: 
+                continue
+            color = color_mapping[result['instance_ids'][i] % len(color_mapping)]
+            center = bboxes[i, :3]
+            centers = [center]
+            for j in range(period[i]):
+                # draw corners
+                corners = box3d_to_corners(temp_bboxes[:, -1-j])[i, [0, 3, 7, 4, 0]]
+                x = corners[:, 0]
+                y = corners[:, 1]
+                self.axes.plot(x, y, color=color, linewidth=2, linestyle='-')
+
+                # draw line to indicate forward direction
+                forward_center = np.mean(corners[2:4], axis=0)
+                center = np.mean(corners[0:4], axis=0)
+                x = [forward_center[0], center[0]]
+                y = [forward_center[1], center[1]]
+                self.axes.plot(x, y, color=color, linewidth=2, linestyle='-')
+                centers.append(center)
+
+            centers = np.stack(centers)
+            xs = centers[:, 0]
+            ys = centers[:, 1]
+            self.axes.plot(xs, ys, color=color, linewidth=2, linestyle='-')
+
+    def draw_motion_gt(self, data):
+        if not self.plot_choices['motion']:
+            return
+
+        for i in range(data['gt_labels_3d'].shape[0]):
+            label = data['gt_labels_3d'][i]
+            if label == -1: 
+                continue
+            color = color_mapping[i % len(color_mapping)]
+            vehicle_id_list = [0, 1, 2, 3, 4, 6, 7]
+            if label in vehicle_id_list:
+                dot_size = 150
+            else:
+                dot_size = 25
+
+            center = data['gt_bboxes_3d'][i, :2]
+            masks = data['gt_agent_fut_masks'][i].astype(bool)
+            if masks[0] == 0:
+                continue
+            trajs = data['gt_agent_fut_trajs'][i][masks]
+            trajs = trajs.cumsum(axis=0) + center
+            trajs = np.concatenate([center.reshape(1, 2), trajs], axis=0)
+            
+            self._render_traj(trajs, traj_score=1.0,
+                            colormap='winter', dot_size=dot_size)
+
+    def draw_motion_pred(self, result, top_k=3):
+        if not (self.plot_choices['draw_pred'] and self.plot_choices['motion'] and "trajs_3d" in result):
+            return
+        
+        bboxes = result['boxes_3d']
+        labels = result['labels_3d']
+        for i in range(result['labels_3d'].shape[0]):
+            score = result['scores_3d'][i]
+            if score < SCORE_THRESH: 
+                continue
+            label = labels[i]
+            vehicle_id_list = [0, 1, 2, 3, 4, 6, 7]
+            if label in vehicle_id_list:
+                dot_size = 150
+            else:
+                dot_size = 25
+
+            traj_score = result['trajs_score'][i].numpy()
+            traj = result['trajs_3d'][i].numpy()
+            num_modes = len(traj_score)
+            center = bboxes[i, :2][None, None].repeat(num_modes, 1, 1).numpy()
+            traj = np.concatenate([center, traj], axis=1)
+
+            sorted_ind = np.argsort(traj_score)[::-1]
+            sorted_traj = traj[sorted_ind, :, :2]
+            sorted_score = traj_score[sorted_ind]
+            norm_score = np.exp(sorted_score[0])
+
+            for j in range(top_k - 1, -1, -1):
+                viz_traj = sorted_traj[j]
+                traj_score = np.exp(sorted_score[j])/norm_score
+                self._render_traj(viz_traj, traj_score=traj_score,
+                                colormap='winter', dot_size=dot_size)
+    
+    def draw_map_gt(self, data):
+        if not self.plot_choices['map']:
+            return
+        vectors = data['map_infos']
+        for label, vector_list in vectors.items():
+            color = COLOR_VECTORS[label]
+            for vector in vector_list:
+                pts = vector[:, :2]
+                x = np.array([pt[0] for pt in pts])
+                y = np.array([pt[1] for pt in pts])
+                self.axes.plot(x, y, color=color, linewidth=3, marker='o', linestyle='-', markersize=7)
+
+    def draw_map_pred(self, result):
+        if not (self.plot_choices['draw_pred'] and self.plot_choices['map'] and "vectors" in result):
+            return
+
+        for i in range(result['scores'].shape[0]):
+            score = result['scores'][i]
+            if  score < MAP_SCORE_THRESH:
+                continue
+            color = COLOR_VECTORS[result['labels'][i]]
+            pts = result['vectors'][i]
+            x = pts[:, 0]
+            y = pts[:, 1]
+            plt.plot(x, y, color=color, linewidth=3, marker='o', linestyle='-', markersize=7)
+
+    def draw_planning_gt(self, data):
+        if not self.plot_choices['planning']:
+            return
+
+        # draw planning gt
+        masks = data['gt_ego_fut_masks'].astype(bool)
+        if masks[0] != 0:
+            plan_traj = data['gt_ego_fut_trajs'][masks]
+            cmd = data['gt_ego_fut_cmd']
+            plan_traj[abs(plan_traj) < 0.01] = 0.0
+            plan_traj = plan_traj.cumsum(axis=0)
+            plan_traj = np.concatenate((np.zeros((1, plan_traj.shape[1])), plan_traj), axis=0)
+            self._render_traj(plan_traj, traj_score=1.0,
+                colormap='autumn', dot_size=50)
+
+    def draw_planning_pred(self, data, result, top_k=3):
+        if not (self.plot_choices['draw_pred'] and self.plot_choices['planning'] and "planning" in result):
+            return
+
+        if self.plot_choices['track'] and "ego_anchor_queue" in result:
+            ego_temp_bboxes = result["ego_anchor_queue"]
+            ego_period = result["ego_period"]
+            for j in range(ego_period[0]):
+                # draw corners
+                corners = box3d_to_corners(ego_temp_bboxes[:, -1-j])[0, [0, 3, 7, 4, 0]]
+                x = corners[:, 0]
+                y = corners[:, 1]
+                self.axes.plot(x, y, color='mediumseagreen', linewidth=2, linestyle='-')
+
+                # draw line to indicate forward direction
+                forward_center = np.mean(corners[2:4], axis=0)
+                center = np.mean(corners[0:4], axis=0)
+                x = [forward_center[0], center[0]]
+                y = [forward_center[1], center[1]]
+                self.axes.plot(x, y, color='mediumseagreen', linewidth=2, linestyle='-')
+        # import ipdb; ipdb.set_trace()
+        plan_trajs = result['planning'].cpu().numpy()
+        num_cmd = len(CMD_LIST)
+        num_mode = plan_trajs.shape[1]
+        plan_trajs = np.concatenate((np.zeros((num_cmd, num_mode, 1, 2)), plan_trajs), axis=2)
+        plan_score = result['planning_score'].cpu().numpy()
+
+        cmd = data['gt_ego_fut_cmd'].argmax()
+        plan_trajs = plan_trajs[cmd]
+        plan_score = plan_score[cmd]
+
+        sorted_ind = np.argsort(plan_score)[::-1]
+        sorted_traj = plan_trajs[sorted_ind, :, :2]
+        sorted_score = plan_score[sorted_ind]
+        norm_score = np.exp(sorted_score[0])
+
+        for j in range(top_k - 1, -1, -1):
+            viz_traj = sorted_traj[j]
+            traj_score = np.exp(sorted_score[j]) / norm_score
+            self._render_traj(viz_traj, traj_score=traj_score,
+                            colormap='autumn', dot_size=50)
+
+    def _render_traj(
+        self, 
+        future_traj, 
+        traj_score=1, 
+        colormap='winter', 
+        points_per_step=20, 
+        dot_size=25
+    ):
+        total_steps = (len(future_traj) - 1) * points_per_step + 1
+        dot_colors = matplotlib.colormaps[colormap](
+            np.linspace(0, 1, total_steps))[:, :3]
+        dot_colors = dot_colors * traj_score + \
+            (1 - traj_score) * np.ones_like(dot_colors)
+        total_xy = np.zeros((total_steps, 2))
+        for i in range(total_steps - 1):
+            unit_vec = future_traj[i // points_per_step +
+                                   1] - future_traj[i // points_per_step]
+            total_xy[i] = (i / points_per_step - i // points_per_step) * \
+                unit_vec + future_traj[i // points_per_step]
+        total_xy[-1] = future_traj[-1]
+        self.axes.scatter(
+            total_xy[:, 0], total_xy[:, 1], c=dot_colors, s=dot_size)
+
+    def _render_sdc_car(self):
+        sdc_car_png = cv2.imread('resources/sdc_car.png')
+        sdc_car_png = cv2.cvtColor(sdc_car_png, cv2.COLOR_BGR2RGB)
+        im = self.axes.imshow(sdc_car_png, extent=(-1, 1, -2, 2))
+        im.set_zorder(2)
+
+    def _render_legend(self):
+        legend = cv2.imread('resources/legend.png')
+        legend = cv2.cvtColor(legend, cv2.COLOR_BGR2RGB)
+        self.axes.imshow(legend, extent=(15, 40, -40, -30))
+
+    def _render_command(self, data):
+        cmd = data['gt_ego_fut_cmd'].argmax()
+        self.axes.text(-38, -38, CMD_LIST[cmd], fontsize=60)
\ No newline at end of file
--- a/tools/visualization/cam_render.py
+++ b/tools/visualization/cam_render.py
+import os
+import numpy as np
+import cv2
+from PIL import Image
+
+import matplotlib
+import matplotlib.pyplot as plt
+from pyquaternion import Quaternion
+from nuscenes.utils.data_classes import Box as NuScenesBox
+from nuscenes.utils.geometry_utils import view_points, box_in_image, BoxVisibility, transform_matrix
+
+from tools.visualization.bev_render import (
+    color_mapping, 
+    SCORE_THRESH, 
+    MAP_SCORE_THRESH,
+    CMD_LIST
+)
+
+
+CAM_NAMES_NUSC = [
+    'CAM_FRONT_LEFT',
+    'CAM_FRONT',
+    'CAM_FRONT_RIGHT',
+    'CAM_BACK_RIGHT',
+    'CAM_BACK',
+    'CAM_BACK_LEFT',
+]
+CAM_NAMES_NUSC_converter = [
+    'CAM_FRONT',
+    'CAM_FRONT_RIGHT',
+    'CAM_FRONT_LEFT',
+    'CAM_BACK',
+    'CAM_BACK_LEFT',
+    'CAM_BACK_RIGHT',
+]
+
+class CamRender:
+    def __init__(
+        self, 
+        plot_choices,
+        out_dir,
+    ):
+        self.plot_choices = plot_choices
+        self.pred_dir = os.path.join(out_dir, "cam_pred")
+        os.makedirs(self.pred_dir, exist_ok=True)
+
+    def reset_canvas(self):
+        plt.close()
+        plt.gca().set_axis_off()
+        plt.axis('off')
+        self.fig, self.axes = plt.subplots(2, 3, figsize=(160 /3 , 20))
+        plt.tight_layout()
+
+    def render(
+        self,
+        data, 
+        result,
+        index,
+    ):
+        self.reset_canvas()
+        self.render_image_data(data, index)
+        self.draw_detection_pred(data, result)
+        self.draw_motion_pred(data, result)
+        self.draw_planning_pred(data, result)
+        save_path = os.path.join(self.pred_dir, str(index).zfill(4) + '.jpg')
+        self.save_fig(save_path)
+        return save_path
+
+    def load_image(self, data_path, cam):
+        """Update the axis of the plot with the provided image."""
+        image = np.array(Image.open(data_path))
+        font = cv2.FONT_HERSHEY_SIMPLEX
+        org = (50, 60)
+        fontScale = 2
+        color = (0, 0, 0)
+        thickness = 4
+        return cv2.putText(image, cam, org, font, fontScale, color, thickness, cv2.LINE_AA)
+
+    def update_image(self, image, index, cam):
+        """Render image data for each camera."""
+        ax = self.get_axis(index)
+        ax.imshow(image)
+        plt.axis('off')
+        ax.axis('off')
+        ax.grid(False)
+
+    def get_axis(self, index):
+        """Retrieve the corresponding axis based on the index."""
+        return self.axes[index//3, index % 3]
+
+    def save_fig(self, filename):
+        plt.subplots_adjust(top=1, bottom=0, right=1, left=0,
+                            hspace=0, wspace=0)
+        plt.margins(0, 0)
+        plt.savefig(filename)
+
+    def render_image_data(self, data, index):
+        """Load and annotate image based on the provided path."""
+        for i, cam in enumerate(CAM_NAMES_NUSC):
+            idx = CAM_NAMES_NUSC_converter.index(cam)
+            img_path = data['img_filename'][idx]
+            image = self.load_image(img_path, cam)
+            self.update_image(image, i, cam)
+    
+    def draw_detection_pred(self, data, result):
+        if not (self.plot_choices['draw_pred'] and self.plot_choices['det'] and "boxes_3d" in result):
+            return
+
+        bboxes = result['boxes_3d'].numpy()
+        for j, cam in enumerate(CAM_NAMES_NUSC):
+            idx = CAM_NAMES_NUSC_converter.index(cam)
+            cam_intrinsic = data['cam_intrinsic'][idx]
+            lidar2cam = data['lidar2cam']
+            extrinsic = lidar2cam[idx]
+            trans = extrinsic[3, :3]
+            rot = Quaternion(matrix=extrinsic[:3, :3]).inverse
+            imsize = (1600, 900)
+
+            for i in range(result['labels_3d'].shape[0]):
+                score = result['scores_3d'][i]
+                if score < SCORE_THRESH: 
+                    continue
+                color = color_mapping[result['instance_ids'][i] % len(color_mapping)]
+                
+                center = bboxes[i, 0 : 3]
+                box_dims = bboxes[i, 3 : 6]
+                nusc_dims = box_dims[..., [1, 0, 2]]
+                quat = Quaternion(axis=[0, 0, 1], radians=bboxes[i, 6])
+                box = NuScenesBox(
+                    center,
+                    nusc_dims,
+                    quat
+                )
+                box.rotate(rot)
+                box.translate(trans)
+                if box_in_image(box, cam_intrinsic, imsize):
+                    box.render(
+                        self.axes[j // 3, j % 3], 
+                        view=cam_intrinsic, 
+                        normalize=True, 
+                        colors=(color, color, color),
+                        linewidth=4,
+                    )
+            
+            self.axes[j//3, j % 3].set_xlim(0, imsize[0])
+            self.axes[j//3, j % 3].set_ylim(imsize[1], 0)
+
+    def draw_motion_pred(self, data, result, points_per_step=10):
+        if not (self.plot_choices['draw_pred'] and self.plot_choices['motion'] and "trajs_3d" in result):
+            return
+
+        bboxes = result['boxes_3d'].numpy()
+        for j, cam in enumerate(CAM_NAMES_NUSC):
+            idx = CAM_NAMES_NUSC_converter.index(cam)
+            cam_intrinsic = data['cam_intrinsic'][idx]
+            lidar2cam = data['lidar2cam']
+            extrinsic = lidar2cam[idx]
+            trans = extrinsic[3, :3]
+            rot = Quaternion(matrix=extrinsic[:3, :3]).inverse
+            imsize = (1600, 900)
+
+            for i in range(result['labels_3d'].shape[0]):
+                score = result['scores_3d'][i]
+                if score < SCORE_THRESH: 
+                    continue
+                color = color_mapping[result['instance_ids'][i] % len(color_mapping)]
+                
+                traj_score = result['trajs_score'][i].numpy()
+                traj = result['trajs_3d'][i].numpy()
+                
+                mode_idx = traj_score.argmax()
+                traj = traj[mode_idx]
+                origin = bboxes[i, :2][None]
+                traj = np.concatenate([origin, traj], axis=0)
+                traj_expand = np.ones((traj.shape[0], 1)) 
+                traj_expand[:] = bboxes[i, 2] - bboxes[i, 5] / 2
+                traj = np.concatenate([traj, traj_expand], axis=1)
+
+                center = bboxes[i, 0 : 3]
+                box_dims = bboxes[i, 3 : 6]
+                nusc_dims = box_dims[..., [1, 0, 2]]
+                quat = Quaternion(axis=[0, 0, 1], radians=bboxes[i, 6])
+                box = NuScenesBox(
+                    center,
+                    nusc_dims,
+                    quat
+                )
+                box.rotate(rot)
+                box.translate(trans)
+                if not box_in_image(box, cam_intrinsic, imsize):
+                    continue
+                traj_points = traj @ extrinsic[:3, :3] + trans
+                self._render_traj(traj_points, cam_intrinsic, j, color=color, s=15)
+
+        
+    def draw_planning_pred(self, data, result):
+        if not (self.plot_choices['draw_pred'] and self.plot_choices['planning'] and "planning" in result):
+            return
+        # for j, cam in enumerate(CAM_NAMES_NUSC[1]):
+        #     idx = CAM_NAMES_NUSC_converter.index(cam)
+        #     cam_intrinsic = data['cam_intrinsic'][idx]
+        #     lidar2cam = data['lidar2cam']
+        #     extrinsic = lidar2cam[idx]
+        #     trans = extrinsic[3, :3]
+        #     rot = Quaternion(matrix=extrinsic[:3, :3]).inverse
+        #     imsize = (1600, 900)
+
+        #     plan_trajs = result['planning'][0].cpu().numpy()
+        #     plan_trajs = plan_trajs.reshape(3, -1, 6, 2)
+        #     num_cmd = len(CMD_LIST)
+        #     num_mode = plan_trajs.shape[1]
+        #     plan_trajs = np.concatenate((np.zeros((num_cmd, num_mode, 1, 2)), plan_trajs), axis=2)
+        #     plan_trajs = plan_trajs.cumsum(axis=-2)
+        #     plan_score = result['planning_score'][0].cpu().numpy()
+        #     plan_score = plan_score.reshape(3, -1)
+
+        #     cmd = data['gt_ego_fut_cmd'].argmax()
+        #     plan_trajs = plan_trajs[cmd]
+        #     plan_score = plan_score[cmd]
+
+        #     mode_idx = plan_score.argmax()
+        #     plan_traj = plan_trajs[mode_idx]
+        #     traj_expand = np.ones((plan_traj.shape[0], 1)) * -2
+        #     # traj_expand[:] = bboxes[i, 2] - bboxes[i, 5] / 2
+        #     plan_traj = np.concatenate([plan_traj, traj_expand], axis=1)
+
+        #     traj_points = plan_traj @ extrinsic[:3, :3] + trans
+        #     self._render_traj(traj_points, cam_intrinsic, j)
+
+        idx = 0 ## front camera
+        cam_intrinsic = data['cam_intrinsic'][idx]
+        lidar2cam = data['lidar2cam']
+        extrinsic = lidar2cam[idx]
+        trans = extrinsic[3, :3]
+        rot = Quaternion(matrix=extrinsic[:3, :3]).inverse
+        # plan_trajs = result['planning'][0].cpu().numpy()
+        # plan_trajs = plan_trajs.reshape(3, -1, 6, 2)
+        # num_cmd = len(CMD_LIST)
+        # num_mode = plan_trajs.shape[1]
+        # plan_trajs = np.concatenate((np.zeros((num_cmd, num_mode, 1, 2)), plan_trajs), axis=2)
+        # plan_trajs = plan_trajs.cumsum(axis=-2)
+        # plan_score = result['planning_score'][0].cpu().numpy()
+        # plan_score = plan_score.reshape(3, -1)
+
+        # cmd = data['gt_ego_fut_cmd'].argmax()
+        # plan_trajs = plan_trajs[cmd]
+        # plan_score = plan_score[cmd]
+
+        # mode_idx = plan_score.argmax()
+        # plan_traj = plan_trajs[mode_idx]
+        plan_traj = result["final_planning"]
+        plan_traj = np.concatenate((np.zeros((1, 2)), plan_traj), axis=0)
+        traj_expand = np.ones((plan_traj.shape[0], 1)) * -1.8
+        plan_traj = np.concatenate([plan_traj, traj_expand], axis=1)
+
+        traj_points = plan_traj @ extrinsic[:3, :3] + trans
+        self._render_traj(traj_points, cam_intrinsic, j=1)
+
+    def _render_traj(self, traj_points, cam_intrinsic, j, color=(1, 0.5, 0), s=150, points_per_step=10):
+        total_steps = (len(traj_points)-1) * points_per_step + 1
+        total_xy = np.zeros((total_steps, 3))
+        for k in range(total_steps-1):
+            unit_vec = traj_points[k//points_per_step +
+                                    1] - traj_points[k//points_per_step]
+            total_xy[k] = (k/points_per_step - k//points_per_step) * \
+                unit_vec + traj_points[k//points_per_step]
+        in_range_mask = total_xy[:, 2] > 0.1
+        traj_points = view_points(
+            total_xy.T, cam_intrinsic, normalize=True)[:2, :]
+        traj_points = traj_points[:2, in_range_mask]
+        self.axes[j // 3, j % 3].scatter(traj_points[0], traj_points[1], color=color, s=s)
\ No newline at end of file