merge with nuscene related codes

6901df66 · Shaoshuai Shi · 43baf787 · 1c4e1391 · 6901df66 · 6901df66
Commit 6901df66 authored Jul 27, 2020 by Shaoshuai Shi
20 changed files
--- a/pcdet/datasets/__init__.py
+++ b/pcdet/datasets/__init__.py
@@ -2,14 +2,17 @@ import torch
 from torch.utils.data import DataLoader
 from .dataset import DatasetTemplate
 from .kitti.kitti_dataset import KittiDataset
+from .nuscenes.nuscenes_dataset import NuScenesDataset
 from torch.utils.data import DistributedSampler as _DistributedSampler
 from pcdet.utils import common_utils
 __all__ = {
    'DatasetTemplate': DatasetTemplate,
    'KittiDataset': KittiDataset,
+    'NuScenesDataset': NuScenesDataset
 }
 class DistributedSampler(_DistributedSampler):
    def __init__(self, dataset, num_replicas=None, rank=None, shuffle=True):

--- a/pcdet/datasets/augmentor/augmentor_utils.py
+++ b/pcdet/datasets/augmentor/augmentor_utils.py
@@ -5,7 +5,7 @@ from ...utils import common_utils
 def random_flip_along_x(gt_boxes, points):
    """
    Args:
-        gt_boxes: (N, 7), [x, y, z, dx, dy, dz, heading]
+        gt_boxes: (N, 7 + C), [x, y, z, dx, dy, dz, heading, [vx], [vy]]
        points: (M, 3 + C)
    Returns:
    """
@@ -14,13 +14,17 @@ def random_flip_along_x(gt_boxes, points):
        gt_boxes[:, 1] = -gt_boxes[:, 1]
        gt_boxes[:, 6] = -gt_boxes[:, 6]
        points[:, 1] = -points[:, 1]
+        if gt_boxes.shape[1] > 7:
+            gt_boxes[:, 8] = -gt_boxes[:, 8]
    return gt_boxes, points
 def random_flip_along_y(gt_boxes, points):
    """
    Args:
-        gt_boxes: (N, 7), [x, y, z, dx, dy, dz, heading]
+        gt_boxes: (N, 7 + C), [x, y, z, dx, dy, dz, heading, [vx], [vy]]
        points: (M, 3 + C)
    Returns:
    """
@@ -29,13 +33,17 @@ def random_flip_along_y(gt_boxes, points):
        gt_boxes[:, 0] = -gt_boxes[:, 0]
        gt_boxes[:, 6] = -(gt_boxes[:, 6] + np.pi)
        points[:, 0] = -points[:, 0]
+        if gt_boxes.shape[1] > 7:
+            gt_boxes[:, 7] = -gt_boxes[:, 7]
    return gt_boxes, points
 def global_rotation(gt_boxes, points, rot_range):
    """
    Args:
-        gt_boxes: (N, 7), [x, y, z, dx, dy, dz, heading]
+        gt_boxes: (N, 7 + C), [x, y, z, dx, dy, dz, heading, [vx], [vy]]
        points: (M, 3 + C),
        rot_range: [min, max]
    Returns:
@@ -44,6 +52,12 @@ def global_rotation(gt_boxes, points, rot_range):
    points = common_utils.rotate_points_along_z(points[np.newaxis, :, :], np.array([noise_rotation]))[0]
    gt_boxes[:, 0:3] = common_utils.rotate_points_along_z(gt_boxes[np.newaxis, :, 0:3], np.array([noise_rotation]))[0]
    gt_boxes[:, 6] += noise_rotation
+    if gt_boxes.shape[1] > 7:
+        gt_boxes[:, 7:9] = common_utils.rotate_points_along_z(
+            np.hstack((gt_boxes[:, 7:9], np.zeros((gt_boxes.shape[0], 1))))[np.newaxis, :, :],
+            np.array([noise_rotation])
+        )[0][:, 0:2]
    return gt_boxes, points

--- a/pcdet/datasets/nuscenes/nuscenes_dataset.py
+++ b/pcdet/datasets/nuscenes/nuscenes_dataset.py
+import pickle
+import copy
+import numpy as np
+from tqdm import tqdm
+from pathlib import Path
+from ...utils import common_utils
+from ..dataset import DatasetTemplate
+from ...ops.roiaware_pool3d import roiaware_pool3d_utils
+class NuScenesDataset(DatasetTemplate):
+    def __init__(self, dataset_cfg, class_names, training=True, root_path=None, logger=None):
+        root_path = (root_path if root_path is not None else Path(dataset_cfg.DATA_PATH)) / dataset_cfg.VERSION
+        super().__init__(
+            dataset_cfg=dataset_cfg, class_names=class_names, training=training, root_path=root_path, logger=logger
+        )
+        self.infos = []
+        self.include_nuscenes_data(self.mode)
+        if self.training and self.dataset_cfg.get('BALANCED_RESAMPLING', False):
+            self.infos = self.balanced_infos_resampling(self.infos)
+    def include_nuscenes_data(self, mode):
+        self.logger.info('Loading NuScenes dataset')
+        nuscenes_infos = []
+        for info_path in self.dataset_cfg.INFO_PATH[mode]:
+            info_path = self.root_path / info_path
+            if not info_path.exists():
+                continue
+            with open(info_path, 'rb') as f:
+                infos = pickle.load(f)
+                nuscenes_infos.extend(infos)
+        self.infos.extend(nuscenes_infos)
+        self.logger.info('Total samples for NuScenes dataset: %d' % (len(nuscenes_infos)))
+    def balanced_infos_resampling(self, infos):
+        """
+        Class-balanced sampling of nuScenes dataset from https://arxiv.org/abs/1908.09492
+        """
+        if self.class_names is None:
+            return infos
+        cls_infos = {name: [] for name in self.class_names}
+        for info in infos:
+            for name in set(info['gt_names']):
+                if name in self.class_names:
+                    cls_infos[name].append(info)
+        duplicated_samples = sum([len(v) for _, v in cls_infos.items()])
+        cls_dist = {k: len(v) / duplicated_samples for k, v in cls_infos.items()}
+        sampled_infos = []
+        frac = 1.0 / len(self.class_names)
+        ratios = [frac / v for v in cls_dist.values()]
+        for cur_cls_infos, ratio in zip(list(cls_infos.values()), ratios):
+            sampled_infos += np.random.choice(
+                cur_cls_infos, int(len(cur_cls_infos) * ratio)
+            ).tolist()
+        self.logger.info('Total samples after balanced resampling: %s' % (len(sampled_infos)))
+        cls_infos_new = {name: [] for name in self.class_names}
+        for info in sampled_infos:
+            for name in set(info['gt_names']):
+                if name in self.class_names:
+                    cls_infos_new[name].append(info)
+        cls_dist_new = {k: len(v) / len(sampled_infos) for k, v in cls_infos_new.items()}
+        return sampled_infos
+    def get_sweep(self, sweep_info):
+        def remove_ego_points(points, center_radius=1.0):
+            mask = ~((np.abs(points[:, 0]) < center_radius) & (np.abs(points[:, 1]) < center_radius))
+            return points[mask]
+        lidar_path = self.root_path / sweep_info['lidar_path']
+        points_sweep = np.fromfile(str(lidar_path), dtype=np.float32, count=-1).reshape([-1, 5])[:, :4]
+        points_sweep = remove_ego_points(points_sweep).T
+        if sweep_info['transform_matrix'] is not None:
+            num_points = points_sweep.shape[1]
+            points_sweep[:3, :] = sweep_info['transform_matrix'].dot(
+                np.vstack((points_sweep[:3, :], np.ones(num_points))))[:3, :]
+        cur_times = sweep_info['time_lag'] * np.ones((1, points_sweep.shape[1]))
+        return points_sweep.T, cur_times.T
+    def get_lidar_with_sweeps(self, index, max_sweeps=1):
+        info = self.infos[index]
+        lidar_path = self.root_path / info['lidar_path']
+        points = np.fromfile(str(lidar_path), dtype=np.float32, count=-1).reshape([-1, 5])[:, :4]
+        sweep_points_list = [points]
+        sweep_times_list = [np.zeros((points.shape[0], 1))]
+        for k in np.random.choice(len(info['sweeps']), max_sweeps - 1, replace=False):
+            points_sweep, times_sweep = self.get_sweep(info['sweeps'][k])
+            sweep_points_list.append(points_sweep)
+            sweep_times_list.append(times_sweep)
+        points = np.concatenate(sweep_points_list, axis=0)
+        times = np.concatenate(sweep_times_list, axis=0).astype(points.dtype)
+        points = np.concatenate((points, times), axis=1)
+        return points
+    def __len__(self):
+        if self._merge_all_iters_to_one_epoch:
+            return len(self.infos) * self.total_epochs
+        return len(self.infos)
+    def __getitem__(self, index):
+        if self._merge_all_iters_to_one_epoch:
+            index = index % len(self.infos)
+        info = copy.deepcopy(self.infos[index])
+        points = self.get_lidar_with_sweeps(index, max_sweeps=self.dataset_cfg.MAX_SWEEPS)
+        input_dict = {
+            'points': points,
+            'frame_id': Path(info['lidar_path']).stem,
+            'metadata': {'token': info['token']}
+        }
+        if 'gt_boxes' in info:
+            if self.dataset_cfg.get('FILTER_MIN_POINTS_IN_GT', False):
+                mask = (info['num_lidar_pts'] > self.dataset_cfg.FILTER_MIN_POINTS_IN_GT - 1)
+            else:
+                mask = None
+            input_dict.update({
+                'gt_names': info['gt_names'] if mask is None else info['gt_names'][mask],
+                'gt_boxes': info['gt_boxes'] if mask is None else info['gt_boxes'][mask]
+            })
+        data_dict = self.prepare_data(data_dict=input_dict)
+        if self.dataset_cfg.get('SET_NAN_VELOCITY_TO_ZEROS', False):
+            gt_boxes = data_dict['gt_boxes']
+            gt_boxes[np.isnan(gt_boxes)] = 0
+            data_dict['gt_boxes'] = gt_boxes
+        if not self.dataset_cfg.PRED_VELOCITY and 'gt_boxes' in data_dict:
+            data_dict['gt_boxes'] = data_dict['gt_boxes'][:, [0, 1, 2, 3, 4, 5, 6, -1]]
+        return data_dict
+    @staticmethod
+    def generate_prediction_dicts(batch_dict, pred_dicts, class_names, output_path=None):
+        """
+        Args:
+            batch_dict:
+                frame_id:
+            pred_dicts: list of pred_dicts
+                pred_boxes: (N, 7), Tensor
+                pred_scores: (N), Tensor
+                pred_labels: (N), Tensor
+            class_names:
+            output_path:
+        Returns:
+        """
+        def get_template_prediction(num_samples):
+            ret_dict = {
+                'name': np.zeros(num_samples), 'score': np.zeros(num_samples),
+                'boxes_lidar': np.zeros([num_samples, 7]), 'pred_labels': np.zeros(num_samples)
+            }
+            return ret_dict
+        def generate_single_sample_dict(box_dict):
+            pred_scores = box_dict['pred_scores'].cpu().numpy()
+            pred_boxes = box_dict['pred_boxes'].cpu().numpy()
+            pred_labels = box_dict['pred_labels'].cpu().numpy()
+            pred_dict = get_template_prediction(pred_scores.shape[0])
+            if pred_scores.shape[0] == 0:
+                return pred_dict
+            pred_dict['name'] = np.array(class_names)[pred_labels - 1]
+            pred_dict['score'] = pred_scores
+            pred_dict['boxes_lidar'] = pred_boxes
+            pred_dict['pred_labels'] = pred_labels
+            return pred_dict
+        annos = []
+        for index, box_dict in enumerate(pred_dicts):
+            single_pred_dict = generate_single_sample_dict(box_dict)
+            single_pred_dict['frame_id'] = batch_dict['frame_id'][index]
+            single_pred_dict['metadata'] = batch_dict['metadata'][index]
+            annos.append(single_pred_dict)
+        return annos
+    def evaluation(self, det_annos, class_names, **kwargs):
+        import json
+        from nuscenes.nuscenes import NuScenes
+        from . import nuscenes_utils
+        nusc = NuScenes(version=self.dataset_cfg.VERSION, dataroot=str(self.root_path), verbose=True)
+        nusc_annos = nuscenes_utils.transform_det_annos_to_nusc_annos(det_annos, nusc)
+        nusc_annos['meta'] = {
+            'use_camera': False,
+            'use_lidar': True,
+            'use_radar': False,
+            'use_map': False,
+            'use_external': False,
+        }
+        output_path = Path(kwargs['output_path'])
+        output_path.mkdir(exist_ok=True, parents=True)
+        res_path = str(output_path / 'results_nusc.json')
+        with open(res_path, 'w') as f:
+            json.dump(nusc_annos, f)
+        self.logger.info(f'The predictions of NuScenes have been saved to {res_path}')
+        if self.dataset_cfg.VERSION == 'v1.0-test':
+            return 'No ground-truth annotations for evaluation', {}
+        from nuscenes.eval.detection.config import config_factory
+        from nuscenes.eval.detection.evaluate import NuScenesEval
+        eval_set_map = {
+            'v1.0-mini': 'mini_val',
+            'v1.0-trainval': 'val',
+            'v1.0-test': 'test'
+        }
+        try:
+            eval_version = 'detection_cvpr_2019'
+            eval_config = config_factory(eval_version)
+        except:
+            eval_version = 'cvpr_2019'
+            eval_config = config_factory(eval_version)
+        nusc_eval = NuScenesEval(
+            nusc,
+            config=eval_config,
+            result_path=res_path,
+            eval_set=eval_set_map[self.dataset_cfg.VERSION],
+            output_dir=str(output_path),
+            verbose=True,
+        )
+        metrics_summary = nusc_eval.main(plot_examples=0, render_curves=False)
+        with open(output_path / 'metrics_summary.json', 'r') as f:
+            metrics = json.load(f)
+        result_str, result_dict = nuscenes_utils.format_nuscene_results(metrics, self.class_names, version=eval_version)
+        return result_str, result_dict
+    def create_groundtruth_database(self, used_classes=None, max_sweeps=10):
+        import torch
+        database_save_path = self.root_path / f'gt_database_{max_sweeps}sweeps_withvelo'
+        db_info_save_path = self.root_path / f'nuscenes_dbinfos_{max_sweeps}sweeps_withvelo.pkl'
+        database_save_path.mkdir(parents=True, exist_ok=True)
+        all_db_infos = {}
+        for idx in tqdm(range(len(self.infos))):
+            sample_idx = idx
+            info = self.infos[idx]
+            points = self.get_lidar_with_sweeps(idx, max_sweeps=max_sweeps)
+            gt_boxes = info['gt_boxes']
+            gt_names = info['gt_names']
+            box_idxs_of_pts = roiaware_pool3d_utils.points_in_boxes_gpu(
+                torch.from_numpy(points[:, 0:3]).unsqueeze(dim=0).float().cuda(),
+                torch.from_numpy(gt_boxes[:, 0:7]).unsqueeze(dim=0).float().cuda()
+            ).long().squeeze(dim=0).cpu().numpy()
+            for i in range(gt_boxes.shape[0]):
+                filename = '%s_%s_%d.bin' % (sample_idx, gt_names[i], i)
+                filepath = database_save_path / filename
+                gt_points = points[box_idxs_of_pts == i]
+                gt_points[:, :3] -= gt_boxes[i, :3]
+                with open(filepath, 'w') as f:
+                    gt_points.tofile(f)
+                if (used_classes is None) or gt_names[i] in used_classes:
+                    db_path = str(filepath.relative_to(self.root_path))  # gt_database/xxxxx.bin
+                    db_info = {'name': gt_names[i], 'path': db_path, 'image_idx': sample_idx, 'gt_idx': i,
+                               'box3d_lidar': gt_boxes[i], 'num_points_in_gt': gt_points.shape[0]}
+                    if gt_names[i] in all_db_infos:
+                        all_db_infos[gt_names[i]].append(db_info)
+                    else:
+                        all_db_infos[gt_names[i]] = [db_info]
+        for k, v in all_db_infos.items():
+            print('Database %s: %d' % (k, len(v)))
+        with open(db_info_save_path, 'wb') as f:
+            pickle.dump(all_db_infos, f)
+def create_nuscenes_info(version, data_path, save_path, max_sweeps=10):
+    from nuscenes.nuscenes import NuScenes
+    from nuscenes.utils import splits
+    from . import nuscenes_utils
+    data_path = data_path / version
+    save_path = save_path / version
+    assert version in ['v1.0-trainval', 'v1.0-test', 'v1.0-mini']
+    if version == 'v1.0-trainval':
+        train_scenes = splits.train
+        val_scenes = splits.val
+    elif version == 'v1.0-test':
+        train_scenes = splits.test
+        val_scenes = []
+    elif version == 'v1.0-mini':
+        train_scenes = splits.mini_train
+        val_scenes = splits.mini_val
+    else:
+        raise NotImplementedError
+    nusc = NuScenes(version=version, dataroot=data_path, verbose=True)
+    available_scenes = nuscenes_utils.get_available_scenes(nusc)
+    available_scene_names = [s['name'] for s in available_scenes]
+    train_scenes = list(filter(lambda x: x in available_scene_names, train_scenes))
+    val_scenes = list(filter(lambda x: x in available_scene_names, val_scenes))
+    train_scenes = set([available_scenes[available_scene_names.index(s)]['token'] for s in train_scenes])
+    val_scenes = set([available_scenes[available_scene_names.index(s)]['token'] for s in val_scenes])
+    print('%s: train scene(%d), val scene(%d)' % (version, len(train_scenes), len(val_scenes)))
+    train_nusc_infos, val_nusc_infos = nuscenes_utils.fill_trainval_infos(
+        data_path=data_path, nusc=nusc, train_scenes=train_scenes, val_scenes=val_scenes,
+        test='test' in version, max_sweeps=max_sweeps
+    )
+    if version == 'v1.0-test':
+        print('test sample: %d' % len(train_nusc_infos))
+        with open(save_path / f'nuscenes_infos_{max_sweeps}sweeps_test.pkl', 'wb') as f:
+            pickle.dump(train_nusc_infos, f)
+    else:
+        print('train sample: %d, val sample: %d' % (len(train_nusc_infos), len(val_nusc_infos)))
+        with open(save_path / f'nuscenes_infos_{max_sweeps}sweeps_train.pkl', 'wb') as f:
+            pickle.dump(train_nusc_infos, f)
+        with open(save_path / f'nuscenes_infos_{max_sweeps}sweeps_val.pkl', 'wb') as f:
+            pickle.dump(val_nusc_infos, f)
+if __name__ == '__main__':
+    import yaml
+    import argparse
+    from pathlib import Path
+    from easydict import EasyDict
+    parser = argparse.ArgumentParser(description='arg parser')
+    parser.add_argument('--cfg_file', type=str, default=None, help='specify the config of dataset')
+    parser.add_argument('--func', type=str, default='create_nuscenes_infos', help='')
+    parser.add_argument('--version', type=str, default='v1.0-trainval', help='')
+    args = parser.parse_args()
+    if args.func == 'create_nuscenes_infos':
+        dataset_cfg = EasyDict(yaml.load(open(args.cfg_file)))
+        ROOT_DIR = (Path(__file__).resolve().parent / '../../../').resolve()
+        dataset_cfg.VERSION = args.version
+        create_nuscenes_info(
+            version=dataset_cfg.VERSION,
+            data_path=ROOT_DIR / 'data' / 'nuscenes',
+            save_path=ROOT_DIR / 'data' / 'nuscenes',
+            max_sweeps=dataset_cfg.MAX_SWEEPS,
+        )
+        nuscenes_dataset = NuScenesDataset(
+            dataset_cfg=dataset_cfg, class_names=None,
+            root_path=ROOT_DIR / 'data' / 'nuscenes',
+            logger=common_utils.create_logger(), training=True
+        )
+        nuscenes_dataset.create_groundtruth_database(max_sweeps=dataset_cfg.MAX_SWEEPS)
--- a/pcdet/datasets/nuscenes/nuscenes_utils.py
+++ b/pcdet/datasets/nuscenes/nuscenes_utils.py
+"""
+The NuScenes data pre-processing and evaluation is modified from
+https://github.com/traveller59/second.pytorch and https://github.com/poodarchu/Det3D
+"""
+from pathlib import Path
+import tqdm
+import numpy as np
+import operator
+from functools import reduce
+from nuscenes.utils.geometry_utils import transform_matrix
+from pyquaternion import Quaternion
+from nuscenes.utils.data_classes import Box
+map_name_from_general_to_detection = {
+    'human.pedestrian.adult': 'pedestrian',
+    'human.pedestrian.child': 'pedestrian',
+    'human.pedestrian.wheelchair': 'ignore',
+    'human.pedestrian.stroller': 'ignore',
+    'human.pedestrian.personal_mobility': 'ignore',
+    'human.pedestrian.police_officer': 'pedestrian',
+    'human.pedestrian.construction_worker': 'pedestrian',
+    'animal': 'ignore',
+    'vehicle.car': 'car',
+    'vehicle.motorcycle': 'motorcycle',
+    'vehicle.bicycle': 'bicycle',
+    'vehicle.bus.bendy': 'bus',
+    'vehicle.bus.rigid': 'bus',
+    'vehicle.truck': 'truck',
+    'vehicle.construction': 'construction_vehicle',
+    'vehicle.emergency.ambulance': 'ignore',
+    'vehicle.emergency.police': 'ignore',
+    'vehicle.trailer': 'trailer',
+    'movable_object.barrier': 'barrier',
+    'movable_object.trafficcone': 'traffic_cone',
+    'movable_object.pushable_pullable': 'ignore',
+    'movable_object.debris': 'ignore',
+    'static_object.bicycle_rack': 'ignore',
+}
+cls_attr_dist = {
+    'barrier': {
+        'cycle.with_rider': 0,
+        'cycle.without_rider': 0,
+        'pedestrian.moving': 0,
+        'pedestrian.sitting_lying_down': 0,
+        'pedestrian.standing': 0,
+        'vehicle.moving': 0,
+        'vehicle.parked': 0,
+        'vehicle.stopped': 0,
+    },
+    'bicycle': {
+        'cycle.with_rider': 2791,
+        'cycle.without_rider': 8946,
+        'pedestrian.moving': 0,
+        'pedestrian.sitting_lying_down': 0,
+        'pedestrian.standing': 0,
+        'vehicle.moving': 0,
+        'vehicle.parked': 0,
+        'vehicle.stopped': 0,
+    },
+    'bus': {
+        'cycle.with_rider': 0,
+        'cycle.without_rider': 0,
+        'pedestrian.moving': 0,
+        'pedestrian.sitting_lying_down': 0,
+        'pedestrian.standing': 0,
+        'vehicle.moving': 9092,
+        'vehicle.parked': 3294,
+        'vehicle.stopped': 3881,
+    },
+    'car': {
+        'cycle.with_rider': 0,
+        'cycle.without_rider': 0,
+        'pedestrian.moving': 0,
+        'pedestrian.sitting_lying_down': 0,
+        'pedestrian.standing': 0,
+        'vehicle.moving': 114304,
+        'vehicle.parked': 330133,
+        'vehicle.stopped': 46898,
+    },
+    'construction_vehicle': {
+        'cycle.with_rider': 0,
+        'cycle.without_rider': 0,
+        'pedestrian.moving': 0,
+        'pedestrian.sitting_lying_down': 0,
+        'pedestrian.standing': 0,
+        'vehicle.moving': 882,
+        'vehicle.parked': 11549,
+        'vehicle.stopped': 2102,
+    },
+    'ignore': {
+        'cycle.with_rider': 307,
+        'cycle.without_rider': 73,
+        'pedestrian.moving': 0,
+        'pedestrian.sitting_lying_down': 0,
+        'pedestrian.standing': 0,
+        'vehicle.moving': 165,
+        'vehicle.parked': 400,
+        'vehicle.stopped': 102,
+    },
+    'motorcycle': {
+        'cycle.with_rider': 4233,
+        'cycle.without_rider': 8326,
+        'pedestrian.moving': 0,
+        'pedestrian.sitting_lying_down': 0,
+        'pedestrian.standing': 0,
+        'vehicle.moving': 0,
+        'vehicle.parked': 0,
+        'vehicle.stopped': 0,
+    },
+    'pedestrian': {
+        'cycle.with_rider': 0,
+        'cycle.without_rider': 0,
+        'pedestrian.moving': 157444,
+        'pedestrian.sitting_lying_down': 13939,
+        'pedestrian.standing': 46530,
+        'vehicle.moving': 0,
+        'vehicle.parked': 0,
+        'vehicle.stopped': 0,
+    },
+    'traffic_cone': {
+        'cycle.with_rider': 0,
+        'cycle.without_rider': 0,
+        'pedestrian.moving': 0,
+        'pedestrian.sitting_lying_down': 0,
+        'pedestrian.standing': 0,
+        'vehicle.moving': 0,
+        'vehicle.parked': 0,
+        'vehicle.stopped': 0,
+    },
+    'trailer': {
+        'cycle.with_rider': 0,
+        'cycle.without_rider': 0,
+        'pedestrian.moving': 0,
+        'pedestrian.sitting_lying_down': 0,
+        'pedestrian.standing': 0,
+        'vehicle.moving': 3421,
+        'vehicle.parked': 19224,
+        'vehicle.stopped': 1895,
+    },
+    'truck': {
+        'cycle.with_rider': 0,
+        'cycle.without_rider': 0,
+        'pedestrian.moving': 0,
+        'pedestrian.sitting_lying_down': 0,
+        'pedestrian.standing': 0,
+        'vehicle.moving': 21339,
+        'vehicle.parked': 55626,
+        'vehicle.stopped': 11097,
+    },
+}
+def get_available_scenes(nusc):
+    available_scenes = []
+    print('total scene num:', len(nusc.scene))
+    for scene in nusc.scene:
+        scene_token = scene['token']
+        scene_rec = nusc.get('scene', scene_token)
+        sample_rec = nusc.get('sample', scene_rec['first_sample_token'])
+        sd_rec = nusc.get('sample_data', sample_rec['data']['LIDAR_TOP'])
+        has_more_frames = True
+        scene_not_exist = False
+        while has_more_frames:
+            lidar_path, boxes, _ = nusc.get_sample_data(sd_rec['token'])
+            if not Path(lidar_path).exists():
+                scene_not_exist = True
+                break
+            else:
+                break
+            # if not sd_rec['next'] == '':
+            #     sd_rec = nusc.get('sample_data', sd_rec['next'])
+            # else:
+            #     has_more_frames = False
+        if scene_not_exist:
+            continue
+        available_scenes.append(scene)
+    print('exist scene num:', len(available_scenes))
+    return available_scenes
+def get_sample_data(nusc, sample_data_token, selected_anntokens=None):
+    """
+    Returns the data path as well as all annotations related to that sample_data.
+    Note that the boxes are transformed into the current sensor's coordinate frame.
+    Args:
+        nusc:
+        sample_data_token: Sample_data token.
+        selected_anntokens: If provided only return the selected annotation.
+    Returns:
+    """
+    # Retrieve sensor & pose records
+    sd_record = nusc.get('sample_data', sample_data_token)
+    cs_record = nusc.get('calibrated_sensor', sd_record['calibrated_sensor_token'])
+    sensor_record = nusc.get('sensor', cs_record['sensor_token'])
+    pose_record = nusc.get('ego_pose', sd_record['ego_pose_token'])
+    data_path = nusc.get_sample_data_path(sample_data_token)
+    if sensor_record['modality'] == 'camera':
+        cam_intrinsic = np.array(cs_record['camera_intrinsic'])
+        imsize = (sd_record['width'], sd_record['height'])
+    else:
+        cam_intrinsic = imsize = None
+    # Retrieve all sample annotations and map to sensor coordinate system.
+    if selected_anntokens is not None:
+        boxes = list(map(nusc.get_box, selected_anntokens))
+    else:
+        boxes = nusc.get_boxes(sample_data_token)
+    # Make list of Box objects including coord system transforms.
+    box_list = []
+    for box in boxes:
+        box.velocity = nusc.box_velocity(box.token)
+        # Move box to ego vehicle coord system
+        box.translate(-np.array(pose_record['translation']))
+        box.rotate(Quaternion(pose_record['rotation']).inverse)
+        #  Move box to sensor coord system
+        box.translate(-np.array(cs_record['translation']))
+        box.rotate(Quaternion(cs_record['rotation']).inverse)
+        box_list.append(box)
+    return data_path, box_list, cam_intrinsic
+def quaternion_yaw(q: Quaternion) -> float:
+    """
+    Calculate the yaw angle from a quaternion.
+    Note that this only works for a quaternion that represents a box in lidar or global coordinate frame.
+    It does not work for a box in the camera frame.
+    :param q: Quaternion of interest.
+    :return: Yaw angle in radians.
+    """
+    # Project into xy plane.
+    v = np.dot(q.rotation_matrix, np.array([1, 0, 0]))
+    # Measure yaw using arctan.
+    yaw = np.arctan2(v[1], v[0])
+    return yaw
+def fill_trainval_infos(data_path, nusc, train_scenes, val_scenes, test=False, max_sweeps=10):
+    train_nusc_infos = []
+    val_nusc_infos = []
+    progress_bar = tqdm.tqdm(total=len(nusc.sample), desc='create_info', dynamic_ncols=True)
+    ref_chan = 'LIDAR_TOP'  # The radar channel from which we track back n sweeps to aggregate the point cloud.
+    chan = 'LIDAR_TOP'  # The reference channel of the current sample_rec that the point clouds are mapped to.
+    for index, sample in enumerate(nusc.sample):
+        progress_bar.update()
+        ref_sd_token = sample['data'][ref_chan]
+        ref_sd_rec = nusc.get('sample_data', ref_sd_token)
+        ref_cs_rec = nusc.get('calibrated_sensor', ref_sd_rec['calibrated_sensor_token'])
+        ref_pose_rec = nusc.get('ego_pose', ref_sd_rec['ego_pose_token'])
+        ref_time = 1e-6 * ref_sd_rec['timestamp']
+        ref_lidar_path, ref_boxes, _ = get_sample_data(nusc, ref_sd_token)
+        ref_cam_front_token = sample['data']['CAM_FRONT']
+        ref_cam_path, _, ref_cam_intrinsic = nusc.get_sample_data(ref_cam_front_token)
+        # Homogeneous transform from ego car frame to reference frame
+        ref_from_car = transform_matrix(
+            ref_cs_rec['translation'], Quaternion(ref_cs_rec['rotation']), inverse=True
+        )
+        # Homogeneous transformation matrix from global to _current_ ego car frame
+        car_from_global = transform_matrix(
+            ref_pose_rec['translation'], Quaternion(ref_pose_rec['rotation']), inverse=True,
+        )
+        info = {
+            'lidar_path': Path(ref_lidar_path).relative_to(data_path).__str__(),
+            'cam_front_path': Path(ref_cam_path).relative_to(data_path).__str__(),
+            'cam_intrinsic': ref_cam_intrinsic,
+            'token': sample['token'],
+            'sweeps': [],
+            'ref_from_car': ref_from_car,
+            'car_from_global': car_from_global,
+            'timestamp': ref_time,
+        }
+        sample_data_token = sample['data'][chan]
+        curr_sd_rec = nusc.get('sample_data', sample_data_token)
+        sweeps = []
+        while len(sweeps) < max_sweeps - 1:
+            if curr_sd_rec['prev'] == '':
+                if len(sweeps) == 0:
+                    sweep = {
+                        'lidar_path': Path(ref_lidar_path).relative_to(data_path).__str__(),
+                        'sample_data_token': curr_sd_rec['token'],
+                        'transform_matrix': None,
+                        'time_lag': curr_sd_rec['timestamp'] * 0,
+                    }
+                    sweeps.append(sweep)
+                else:
+                    sweeps.append(sweeps[-1])
+            else:
+                curr_sd_rec = nusc.get('sample_data', curr_sd_rec['prev'])
+                # Get past pose
+                current_pose_rec = nusc.get('ego_pose', curr_sd_rec['ego_pose_token'])
+                global_from_car = transform_matrix(
+                    current_pose_rec['translation'], Quaternion(current_pose_rec['rotation']), inverse=False,
+                )
+                # Homogeneous transformation matrix from sensor coordinate frame to ego car frame.
+                current_cs_rec = nusc.get(
+                    'calibrated_sensor', curr_sd_rec['calibrated_sensor_token']
+                )
+                car_from_current = transform_matrix(
+                    current_cs_rec['translation'], Quaternion(current_cs_rec['rotation']), inverse=False,
+                )
+                tm = reduce(np.dot, [ref_from_car, car_from_global, global_from_car, car_from_current])
+                lidar_path = nusc.get_sample_data_path(curr_sd_rec['token'])
+                time_lag = ref_time - 1e-6 * curr_sd_rec['timestamp']
+                sweep = {
+                    'lidar_path': Path(lidar_path).relative_to(data_path).__str__(),
+                    'sample_data_token': curr_sd_rec['token'],
+                    'transform_matrix': tm,
+                    'global_from_car': global_from_car,
+                    'car_from_current': car_from_current,
+                    'time_lag': time_lag,
+                }
+                sweeps.append(sweep)
+        info['sweeps'] = sweeps
+        assert len(info['sweeps']) == max_sweeps - 1, \
+            f"sweep {curr_sd_rec['token']} only has {len(info['sweeps'])} sweeps, " \
+            f"you should duplicate to sweep num {max_sweeps - 1}"
+        if not test:
+            annotations = [nusc.get('sample_annotation', token) for token in sample['anns']]
+            # the filtering gives 0.5~1 map improvement
+            num_lidar_pts = np.array([anno['num_lidar_pts'] for anno in annotations])
+            num_radar_pts = np.array([anno['num_radar_pts'] for anno in annotations])
+            mask = (num_lidar_pts + num_radar_pts > 0)
+            locs = np.array([b.center for b in ref_boxes]).reshape(-1, 3)
+            dims = np.array([b.wlh for b in ref_boxes]).reshape(-1, 3)[:, [1, 0, 2]]  # wlh == > dxdydz (lwh)
+            velocity = np.array([b.velocity for b in ref_boxes]).reshape(-1, 3)
+            rots = np.array([quaternion_yaw(b.orientation) for b in ref_boxes]).reshape(-1, 1)
+            names = np.array([b.name for b in ref_boxes])
+            tokens = np.array([b.token for b in ref_boxes])
+            gt_boxes = np.concatenate([locs, dims, rots, velocity[:, :2]], axis=1)
+            assert len(annotations) == len(gt_boxes) == len(velocity)
+            info['gt_boxes'] = gt_boxes[mask, :]
+            info['gt_boxes_velocity'] = velocity[mask, :]
+            info['gt_names'] = np.array([map_name_from_general_to_detection[name] for name in names])[mask]
+            info['gt_boxes_token'] = tokens[mask]
+            info['num_lidar_pts'] = num_lidar_pts[mask]
+            info['num_radar_pts'] = num_radar_pts[mask]
+        if sample['scene_token'] in train_scenes:
+            train_nusc_infos.append(info)
+        else:
+            val_nusc_infos.append(info)
+    progress_bar.close()
+    return train_nusc_infos, val_nusc_infos
+def boxes_lidar_to_nusenes(det_info):
+    boxes3d = det_info['boxes_lidar']
+    scores = det_info['score']
+    labels = det_info['pred_labels']
+    box_list = []
+    for k in range(boxes3d.shape[0]):
+        quat = Quaternion(axis=[0, 0, 1], radians=boxes3d[k, 6])
+        velocity = (*boxes3d[k, 7:9], 0.0) if boxes3d.shape[1] == 9 else (0.0, 0.0, 0.0)
+        box = Box(
+            boxes3d[k, :3],
+            boxes3d[k, [4, 3, 5]],  # wlh
+            quat, label=labels[k], score=scores[k], velocity=velocity,
+        )
+        box_list.append(box)
+    return box_list
+def lidar_nusc_box_to_global(nusc, boxes, sample_token):
+    s_record = nusc.get('sample', sample_token)
+    sample_data_token = s_record['data']['LIDAR_TOP']
+    sd_record = nusc.get('sample_data', sample_data_token)
+    cs_record = nusc.get('calibrated_sensor', sd_record['calibrated_sensor_token'])
+    sensor_record = nusc.get('sensor', cs_record['sensor_token'])
+    pose_record = nusc.get('ego_pose', sd_record['ego_pose_token'])
+    data_path = nusc.get_sample_data_path(sample_data_token)
+    box_list = []
+    for box in boxes:
+        # Move box to ego vehicle coord system
+        box.rotate(Quaternion(cs_record['rotation']))
+        box.translate(np.array(cs_record['translation']))
+        # Move box to global coord system
+        box.rotate(Quaternion(pose_record['rotation']))
+        box.translate(np.array(pose_record['translation']))
+        box_list.append(box)
+    return box_list
+def transform_det_annos_to_nusc_annos(det_annos, nusc):
+    nusc_annos = {
+        'results': {},
+        'meta': None,
+    }
+    for det in det_annos:
+        annos = []
+        box_list = boxes_lidar_to_nusenes(det)
+        box_list = lidar_nusc_box_to_global(
+            nusc=nusc, boxes=box_list, sample_token=det['metadata']['token']
+        )
+        for k, box in enumerate(box_list):
+            name = det['name'][k]
+            if np.sqrt(box.velocity[0] ** 2 + box.velocity[1] ** 2) > 0.2:
+                if name in ['car', 'construction_vehicle', 'bus', 'truck', 'trailer']:
+                    attr = 'vehicle.moving'
+                elif name in ['bicycle', 'motorcycle']:
+                    attr = 'cycle.with_rider'
+                else:
+                    attr = None
+            else:
+                if name in ['pedestrian']:
+                    attr = 'pedestrian.standing'
+                elif name in ['bus']:
+                    attr = 'vehicle.stopped'
+                else:
+                    attr = None
+            attr = attr if attr is not None else max(
+                cls_attr_dist[name].items(), key=operator.itemgetter(1))[0]
+            nusc_anno = {
+                'sample_token': det['metadata']['token'],
+                'translation': box.center.tolist(),
+                'size': box.wlh.tolist(),
+                'rotation': box.orientation.elements.tolist(),
+                'velocity': box.velocity[:2].tolist(),
+                'detection_name': name,
+                'detection_score': box.score,
+                'attribute_name': attr
+            }
+            annos.append(nusc_anno)
+        nusc_annos['results'].update({det["metadata"]["token"]: annos})
+    return nusc_annos
+def format_nuscene_results(metrics, class_names, version='default'):
+    result = '----------------Nuscene %s results-----------------\n' % version
+    for name in class_names:
+        threshs = ', '.join(list(metrics['label_aps'][name].keys()))
+        ap_list = list(metrics['label_aps'][name].values())
+        err_name =', '.join([x.split('_')[0] for x in list(metrics['label_tp_errors'][name].keys())])
+        error_list = list(metrics['label_tp_errors'][name].values())
+        result += f'***{name} error@{err_name} | AP@{threshs}\n'
+        result += ', '.join(['%.2f' % x for x in error_list]) + ' | '
+        result += ', '.join(['%.2f' % (x * 100) for x in ap_list])
+        result += f" | mean AP: {metrics['mean_dist_aps'][name]}"
+        result += '\n'
+    result += '--------------average performance-------------\n'
+    details = {}
+    for key, val in metrics['tp_errors'].items():
+        result += '%s:\t %.4f\n' % (key, val)
+        details[key] = val
+    result += 'mAP:\t %.4f\n' % metrics['mean_ap']
+    result += 'NDS:\t %.4f\n' % metrics['nd_score']
+    details.update({
+        'mAP': metrics['mean_ap'],
+        'NDS': metrics['nd_score'],
+    })
+    return result, details
--- a/pcdet/models/backbones_2d/base_bev_backbone.py
+++ b/pcdet/models/backbones_2d/base_bev_backbone.py
 import torch
 import torch.nn as nn
+import numpy as np
 class BaseBEVBackbone(nn.Module):
@@ -7,13 +8,20 @@ class BaseBEVBackbone(nn.Module):
        super().__init__()
        self.model_cfg = model_cfg
-        assert len(self.model_cfg.LAYER_NUMS) == len(self.model_cfg.LAYER_STRIDES) == len(self.model_cfg.NUM_FILTERS)
+        if self.model_cfg.get('LAYER_NUMS', None) is not None:
-        assert len(self.model_cfg.UPSAMPLE_STRIDES) == len(self.model_cfg.NUM_UPSAMPLE_FILTERS)
+            assert len(self.model_cfg.LAYER_NUMS) == len(self.model_cfg.LAYER_STRIDES) == len(self.model_cfg.NUM_FILTERS)
-        layer_nums = self.model_cfg.LAYER_NUMS
+            layer_nums = self.model_cfg.LAYER_NUMS
-        layer_strides = self.model_cfg.LAYER_STRIDES
+            layer_strides = self.model_cfg.LAYER_STRIDES
-        num_filters = self.model_cfg.NUM_FILTERS
+            num_filters = self.model_cfg.NUM_FILTERS
-        num_upsample_filters = self.model_cfg.NUM_UPSAMPLE_FILTERS
+        else:
-        upsample_strides = self.model_cfg.UPSAMPLE_STRIDES
+            layer_nums = layer_strides = num_filters = []
+        if self.model_cfg.get('UPSAMPLE_STRIDES', None) is not None:
+            assert len(self.model_cfg.UPSAMPLE_STRIDES) == len(self.model_cfg.NUM_UPSAMPLE_FILTERS)
+            num_upsample_filters = self.model_cfg.NUM_UPSAMPLE_FILTERS
+            upsample_strides = self.model_cfg.UPSAMPLE_STRIDES
+        else:
+            upsample_strides = num_upsample_filters = []
        num_levels = len(layer_nums)
        c_in_list = [input_channels, *num_filters[:-1]]
@@ -37,15 +45,28 @@ class BaseBEVBackbone(nn.Module):
                ])
            self.blocks.append(nn.Sequential(*cur_layers))
            if len(upsample_strides) > 0:
-                self.deblocks.append(nn.Sequential(
+                stride = upsample_strides[idx]
-                    nn.ConvTranspose2d(
+                if stride > 1:
-                        num_filters[idx], num_upsample_filters[idx],
+                    self.deblocks.append(nn.Sequential(
-                        upsample_strides[idx],
+                        nn.ConvTranspose2d(
-                        stride=upsample_strides[idx], bias=False
+                            num_filters[idx], num_upsample_filters[idx],
-                    ),
+                            upsample_strides[idx],
-                    nn.BatchNorm2d(num_upsample_filters[idx], eps=1e-3, momentum=0.01),
+                            stride=upsample_strides[idx], bias=False
-                    nn.ReLU()
+                        ),
-                ))
+                        nn.BatchNorm2d(num_upsample_filters[idx], eps=1e-3, momentum=0.01),
+                        nn.ReLU()
+                    ))
+                else:
+                    stride = np.round(1 / stride).astype(np.int)
+                    self.deblocks.append(nn.Sequential(
+                        nn.Conv2d(
+                            num_filters[idx], num_upsample_filters[idx],
+                            stride,
+                            stride=stride, bias=False
+                        ),
+                        nn.BatchNorm2d(num_upsample_filters[idx], eps=1e-3, momentum=0.01),
+                        nn.ReLU()
+                    ))
        c_in = sum(num_upsample_filters)
        if len(upsample_strides) > num_levels:

--- a/pcdet/models/backbones_3d/__init__.py
+++ b/pcdet/models/backbones_3d/__init__.py
-from .spconv_backbone import VoxelBackBone8x
+from .spconv_backbone import VoxelBackBone8x, VoxelResBackBone8x
 from .spconv_unet import UNetV2
 from .pointnet2_backbone import PointNet2Backbone, PointNet2MSG
@@ -6,5 +6,6 @@ __all__ = {
    'VoxelBackBone8x': VoxelBackBone8x,
    'UNetV2': UNetV2,
    'PointNet2Backbone': PointNet2Backbone,
-    'PointNet2MSG': PointNet2MSG
+    'PointNet2MSG': PointNet2MSG,
+    'VoxelResBackBone8x': VoxelResBackBone8x,
 }
--- a/pcdet/models/backbones_3d/spconv_backbone.py
+++ b/pcdet/models/backbones_3d/spconv_backbone.py
@@ -25,6 +25,45 @@ def post_act_block(in_channels, out_channels, kernel_size, indice_key=None, stri
    return m
+class SparseBasicBlock(spconv.SparseModule):
+    expansion = 1
+    def __init__(self, inplanes, planes, stride=1, norm_fn=None, downsample=None, indice_key=None):
+        super(SparseBasicBlock, self).__init__()
+        assert norm_fn is not None
+        bias = norm_fn is not None
+        self.conv1 = spconv.SubMConv3d(
+            inplanes, planes, kernel_size=3, stride=stride, padding=1, bias=bias, indice_key=indice_key
+        )
+        self.bn1 = norm_fn(planes)
+        self.relu = nn.ReLU()
+        self.conv2 = spconv.SubMConv3d(
+            planes, planes, kernel_size=3, stride=stride, padding=1, bias=bias, indice_key=indice_key
+        )
+        self.bn2 = norm_fn(planes)
+        self.downsample = downsample
+        self.stride = stride
+    def forward(self, x):
+        identity = x
+        out = self.conv1(x)
+        out.features = self.bn1(out.features)
+        out.features = self.relu(out.features)
+        out = self.conv2(out)
+        out.features = self.bn2(out.features)
+        if self.downsample is not None:
+            identity = self.downsample(x)
+        out.features += identity.features
+        out.features = self.relu(out.features)
+        return out
 class VoxelBackBone8x(nn.Module):
    def __init__(self, model_cfg, input_channels, grid_size, **kwargs):
        super().__init__()
@@ -121,3 +160,101 @@ class VoxelBackBone8x(nn.Module):
        })
        return batch_dict
+class VoxelResBackBone8x(nn.Module):
+    def __init__(self, model_cfg, input_channels, grid_size, **kwargs):
+        super().__init__()
+        self.model_cfg = model_cfg
+        norm_fn = partial(nn.BatchNorm1d, eps=1e-3, momentum=0.01)
+        self.sparse_shape = grid_size[::-1] + [1, 0, 0]
+        self.conv_input = spconv.SparseSequential(
+            spconv.SubMConv3d(input_channels, 16, 3, padding=1, bias=False, indice_key='subm1'),
+            norm_fn(16),
+            nn.ReLU(),
+        )
+        block = post_act_block
+        self.conv1 = spconv.SparseSequential(
+            SparseBasicBlock(16, 16, norm_fn=norm_fn, indice_key='res1'),
+            SparseBasicBlock(16, 16, norm_fn=norm_fn, indice_key='res1'),
+        )
+        self.conv2 = spconv.SparseSequential(
+            # [1600, 1408, 41] <- [800, 704, 21]
+            block(16, 32, 3, norm_fn=norm_fn, stride=2, padding=1, indice_key='spconv2', conv_type='spconv'),
+            SparseBasicBlock(32, 32, norm_fn=norm_fn, indice_key='res2'),
+            SparseBasicBlock(32, 32, norm_fn=norm_fn, indice_key='res2'),
+        )
+        self.conv3 = spconv.SparseSequential(
+            # [800, 704, 21] <- [400, 352, 11]
+            block(32, 64, 3, norm_fn=norm_fn, stride=2, padding=1, indice_key='spconv3', conv_type='spconv'),
+            SparseBasicBlock(64, 64, norm_fn=norm_fn, indice_key='res3'),
+            SparseBasicBlock(64, 64, norm_fn=norm_fn, indice_key='res3'),
+        )
+        self.conv4 = spconv.SparseSequential(
+            # [400, 352, 11] <- [200, 176, 5]
+            block(64, 128, 3, norm_fn=norm_fn, stride=2, padding=(0, 1, 1), indice_key='spconv4', conv_type='spconv'),
+            SparseBasicBlock(128, 128, norm_fn=norm_fn, indice_key='res4'),
+            SparseBasicBlock(128, 128, norm_fn=norm_fn, indice_key='res4'),
+        )
+        last_pad = 0
+        last_pad = self.model_cfg.get('last_pad', last_pad)
+        self.conv_out = spconv.SparseSequential(
+            # [200, 150, 5] -> [200, 150, 2]
+            spconv.SparseConv3d(128, 128, (3, 1, 1), stride=(2, 1, 1), padding=last_pad,
+                                bias=False, indice_key='spconv_down2'),
+            norm_fn(128),
+            nn.ReLU(),
+        )
+        self.num_point_features = 128
+    def forward(self, batch_dict):
+        """
+        Args:
+            batch_dict:
+                batch_size: int
+                vfe_features: (num_voxels, C)
+                voxel_coords: (num_voxels, 4), [batch_idx, z_idx, y_idx, x_idx]
+        Returns:
+            batch_dict:
+                encoded_spconv_tensor: sparse tensor
+        """
+        voxel_features, voxel_coords = batch_dict['voxel_features'], batch_dict['voxel_coords']
+        batch_size = batch_dict['batch_size']
+        input_sp_tensor = spconv.SparseConvTensor(
+            features=voxel_features,
+            indices=voxel_coords.int(),
+            spatial_shape=self.sparse_shape,
+            batch_size=batch_size
+        )
+        x = self.conv_input(input_sp_tensor)
+        x_conv1 = self.conv1(x)
+        x_conv2 = self.conv2(x_conv1)
+        x_conv3 = self.conv3(x_conv2)
+        x_conv4 = self.conv4(x_conv3)
+        # for detection head
+        # [200, 176, 5] -> [200, 176, 2]
+        out = self.conv_out(x_conv4)
+        batch_dict.update({
+            'encoded_spconv_tensor': out,
+            'encoded_spconv_tensor_stride': 8
+        })
+        batch_dict.update({
+            'multi_scale_3d_features': {
+                'x_conv1': x_conv1,
+                'x_conv2': x_conv2,
+                'x_conv3': x_conv3,
+                'x_conv4': x_conv4,
+            }
+        })
+        return batch_dict
--- a/pcdet/models/dense_heads/anchor_head_multi.py
+++ b/pcdet/models/dense_heads/anchor_head_multi.py
@@ -6,22 +6,81 @@ import torch
 class SingleHead(BaseBEVBackbone):
-    def __init__(self, model_cfg, input_channels, num_class, num_anchors_per_location, code_size, encode_conv_cfg=None):
+    def __init__(self, model_cfg, input_channels, num_class, num_anchors_per_location, code_size, rpn_head_cfg=None,
-        super().__init__(encode_conv_cfg, input_channels)
+                 head_label_indices=None, separate_reg_config=None):
+        super().__init__(rpn_head_cfg, input_channels)
        self.num_anchors_per_location = num_anchors_per_location
        self.num_class = num_class
        self.code_size = code_size
        self.model_cfg = model_cfg
+        self.separate_reg_config = separate_reg_config
+        self.register_buffer('head_label_indices', head_label_indices)
-        self.conv_cls = nn.Conv2d(
+        if self.separate_reg_config is not None:
-            input_channels, self.num_anchors_per_location * self.num_class,
+            code_size_cnt = 0
-            kernel_size=1
+            self.conv_box = nn.ModuleDict()
-        )
+            self.conv_box_names = []
-        self.conv_box = nn.Conv2d(
+            num_middle_conv = self.separate_reg_config.NUM_MIDDLE_CONV
-            input_channels, self.num_anchors_per_location * self.code_size,
+            num_middle_filter = self.separate_reg_config.NUM_MIDDLE_FILTER
-            kernel_size=1
+            conv_cls_list = []
-        )
+            c_in = input_channels
+            for k in range(num_middle_conv):
+                conv_cls_list.extend([
+                    nn.Conv2d(
+                        c_in, num_middle_filter,
+                        kernel_size=3, stride=1, padding=1, bias=False
+                    ),
+                    nn.BatchNorm2d(num_middle_filter),
+                    nn.ReLU()
+                ])
+                c_in = num_middle_filter
+            conv_cls_list.append(nn.Conv2d(
+                c_in, self.num_anchors_per_location * self.num_class,
+                kernel_size=3, stride=1, padding=1
+            ))
+            self.conv_cls = nn.Sequential(*conv_cls_list)
+            for reg_config in self.separate_reg_config.REG_LIST:
+                reg_name, reg_channel = reg_config.split(':')
+                reg_channel = int(reg_channel)
+                cur_conv_list = []
+                c_in = input_channels
+                for k in range(num_middle_conv):
+                    cur_conv_list.extend([
+                        nn.Conv2d(
+                            c_in, num_middle_filter,
+                            kernel_size=3, stride=1, padding=1, bias=False
+                        ),
+                        nn.BatchNorm2d(num_middle_filter),
+                        nn.ReLU()
+                    ])
+                    c_in = num_middle_filter
+                cur_conv_list.append(nn.Conv2d(
+                    c_in, self.num_anchors_per_location * int(reg_channel),
+                    kernel_size=3, stride=1, padding=1, bias=True
+                ))
+                code_size_cnt += reg_channel
+                self.conv_box[f'conv_{reg_name}'] = nn.Sequential(*cur_conv_list)
+                self.conv_box_names.append(f'conv_{reg_name}')
+            for m in self.conv_box.modules():
+                if isinstance(m, nn.Conv2d):
+                    nn.init.kaiming_normal_(m.weight, mode='fan_out', nonlinearity='relu')
+                    if m.bias is not None:
+                        nn.init.constant_(m.bias, 0)
+            assert code_size_cnt == code_size, f'Code size does not match: {code_size_cnt}:{code_size}'
+        else:
+            self.conv_cls = nn.Conv2d(
+                input_channels, self.num_anchors_per_location * self.num_class,
+                kernel_size=1
+            )
+            self.conv_box = nn.Conv2d(
+                input_channels, self.num_anchors_per_location * self.code_size,
+                kernel_size=1
+            )
        if self.model_cfg.get('USE_DIRECTION_CLASSIFIER', None) is not None:
            self.conv_dir_cls = nn.Conv2d(
@@ -31,19 +90,29 @@ class SingleHead(BaseBEVBackbone):
            )
        else:
            self.conv_dir_cls = None
-        self.use_multihead = self.model_cfg.get('USE_MULTI_HEAD', False)
+        self.use_multihead = self.model_cfg.get('USE_MULTIHEAD', False)
        self.init_weights()
    def init_weights(self):
        pi = 0.01
-        nn.init.constant_(self.conv_cls.bias, -np.log((1 - pi) / pi))
+        if isinstance(self.conv_cls, nn.Conv2d):
+            nn.init.constant_(self.conv_cls.bias, -np.log((1 - pi) / pi))
+        else:
+            nn.init.constant_(self.conv_cls[-1].bias, -np.log((1 - pi) / pi))
    def forward(self, spatial_features_2d):
        ret_dict = {}
        spatial_features_2d = super().forward({'spatial_features': spatial_features_2d})['spatial_features_2d']
        cls_preds = self.conv_cls(spatial_features_2d)
-        box_preds = self.conv_box(spatial_features_2d)
+        if self.separate_reg_config is None:
+            box_preds = self.conv_box(spatial_features_2d)
+        else:
+            box_preds_list = []
+            for reg_name in self.conv_box_names:
+                box_preds_list.append(self.conv_box[reg_name](spatial_features_2d))
+            box_preds = torch.cat(box_preds_list, dim=1)
        if not self.use_multihead:
            box_preds = box_preds.permute(0, 2, 3, 1).contiguous()
@@ -56,13 +125,14 @@ class SingleHead(BaseBEVBackbone):
            cls_preds = cls_preds.view(-1, self.num_anchors_per_location,
                                       self.num_class, H, W).permute(0, 1, 3, 4, 2).contiguous()
            box_preds = box_preds.view(batch_size, -1, self.code_size)
-            cls_preds = cls_preds.view(batch_size, -1, self.num_class).unsqueeze(-1)
+            cls_preds = cls_preds.view(batch_size, -1, self.num_class)
        if self.conv_dir_cls is not None:
            dir_cls_preds = self.conv_dir_cls(spatial_features_2d)
            if self.use_multihead:
                dir_cls_preds = dir_cls_preds.view(
-                    -1, self.num_anchors_per_location, self.model_cfg.NUM_DIR_BINS, H, W).permute(0, 1, 3, 4, 2).contiguous()
+                    -1, self.num_anchors_per_location, self.model_cfg.NUM_DIR_BINS, H, W).permute(0, 1, 3, 4,
+                                                                                                  2).contiguous()
                dir_cls_preds = dir_cls_preds.view(batch_size, -1, self.model_cfg.NUM_DIR_BINS)
            else:
                dir_cls_preds = dir_cls_preds.permute(0, 2, 3, 1).contiguous()
@@ -78,12 +148,27 @@ class SingleHead(BaseBEVBackbone):
 class AnchorHeadMulti(AnchorHeadTemplate):
-    def __init__(self, model_cfg, input_channels, num_class, class_names, grid_size, point_cloud_range, predict_boxes_when_training=True):
+    def __init__(self, model_cfg, input_channels, num_class, class_names, grid_size, point_cloud_range,
+                 predict_boxes_when_training=True):
        super().__init__(
-            model_cfg=model_cfg, num_class=num_class, class_names=class_names, grid_size=grid_size, point_cloud_range=point_cloud_range, predict_boxes_when_training=predict_boxes_when_training
+            model_cfg=model_cfg, num_class=num_class, class_names=class_names, grid_size=grid_size,
+            point_cloud_range=point_cloud_range, predict_boxes_when_training=predict_boxes_when_training
        )
        self.model_cfg = model_cfg
-        self.make_multihead(input_channels)
+        self.separate_multihead = self.model_cfg.get('SEPARATE_MULTIHEAD', False)
+        if self.model_cfg.get('SHARED_CONV_NUM_FILTER', None) is not None:
+            shared_conv_num_filter = self.model_cfg.SHARED_CONV_NUM_FILTER
+            self.shared_conv = nn.Sequential(
+                nn.Conv2d(input_channels, shared_conv_num_filter, 3, stride=1, padding=1, bias=False),
+                nn.BatchNorm2d(shared_conv_num_filter, eps=1e-3, momentum=0.01),
+                nn.ReLU(),
+            )
+        else:
+            self.shared_conv = None
+            shared_conv_num_filter = input_channels
+        self.rpn_heads = None
+        self.make_multihead(shared_conv_num_filter)
    def make_multihead(self, input_channels):
        rpn_head_cfgs = self.model_cfg.RPN_HEAD_CFGS
@@ -91,34 +176,46 @@ class AnchorHeadMulti(AnchorHeadTemplate):
        class_names = []
        for rpn_head_cfg in rpn_head_cfgs:
            class_names.extend(rpn_head_cfg['HEAD_CLS_NAME'])
        for rpn_head_cfg in rpn_head_cfgs:
-            num_anchors_per_location = sum([self.num_anchors_per_location[class_names.index(head_cls)] for head_cls in rpn_head_cfg['HEAD_CLS_NAME']])
+            num_anchors_per_location = sum([self.num_anchors_per_location[class_names.index(head_cls)]
-            rpn_head = SingleHead(self.model_cfg, input_channels, self.num_class, num_anchors_per_location, self.box_coder.code_size, rpn_head_cfg)
+                                            for head_cls in rpn_head_cfg['HEAD_CLS_NAME']])
+            head_label_indices = torch.from_numpy(np.array([
+                self.class_names.index(cur_name) + 1 for cur_name in rpn_head_cfg['HEAD_CLS_NAME']
+            ]))
+            rpn_head = SingleHead(
+                self.model_cfg, input_channels,
+                len(rpn_head_cfg['HEAD_CLS_NAME']) if self.separate_multihead else self.num_class,
+                num_anchors_per_location, self.box_coder.code_size, rpn_head_cfg,
+                head_label_indices=head_label_indices,
+                separate_reg_config=self.model_cfg.get('SEPARATE_REG_CONFIG', None)
+            )
            rpn_heads.append(rpn_head)
        self.rpn_heads = nn.ModuleList(rpn_heads)
    def forward(self, data_dict):
        spatial_features_2d = data_dict['spatial_features_2d']
+        if self.shared_conv is not None:
+            spatial_features_2d = self.shared_conv(spatial_features_2d)
        ret_dicts = []
        for rpn_head in self.rpn_heads:
            ret_dicts.append(rpn_head(spatial_features_2d))
-        cls_preds = torch.cat([ret_dict['cls_preds'] for ret_dict in ret_dicts], dim=1)
+        cls_preds = [ret_dict['cls_preds'] for ret_dict in ret_dicts]
-        box_preds = torch.cat([ret_dict['box_preds'] for ret_dict in ret_dicts], dim=1)
+        box_preds = [ret_dict['box_preds'] for ret_dict in ret_dicts]
        ret = {
-            'cls_preds': cls_preds,
+            'cls_preds': cls_preds if self.separate_multihead else torch.cat(cls_preds, dim=1),
-            'box_preds': box_preds,
+            'box_preds': box_preds if self.separate_multihead else torch.cat(box_preds, dim=1),
        }
        if self.model_cfg.get('USE_DIRECTION_CLASSIFIER', False):
-            dir_cls_preds = torch.cat([ret_dict['dir_cls_preds'] for ret_dict in ret_dicts], dim=1)
+            dir_cls_preds = [ret_dict['dir_cls_preds'] for ret_dict in ret_dicts]
-            ret['dir_cls_preds'] = dir_cls_preds
+            ret['dir_cls_preds'] = dir_cls_preds if self.separate_multihead else torch.cat(dir_cls_preds, dim=1)
-        else:
-            dir_cls_preds = None
        self.forward_ret_dict.update(ret)
        if self.training:
            targets_dict = self.assign_targets(
                gt_boxes=data_dict['gt_boxes']
@@ -128,10 +225,156 @@ class AnchorHeadMulti(AnchorHeadTemplate):
        if not self.training or self.predict_boxes_when_training:
            batch_cls_preds, batch_box_preds = self.generate_predicted_boxes(
                batch_size=data_dict['batch_size'],
-                cls_preds=cls_preds, box_preds=box_preds, dir_cls_preds=dir_cls_preds
+                cls_preds=ret['cls_preds'], box_preds=ret['box_preds'], dir_cls_preds=ret.get('dir_cls_preds', None)
            )
+            if isinstance(batch_cls_preds, list):
+                all_pred_labels = []
+                all_cls_preds = []
+                for idx, cls_pred in enumerate(batch_cls_preds):
+                    pred_score, pred_head_label = torch.max(cls_pred, dim=-1)
+                    pred_label = self.rpn_heads[idx].head_label_indices[pred_head_label]
+                    all_pred_labels.append(pred_label)
+                    all_cls_preds.append(pred_score[:, :, None])
+                batch_cls_preds = torch.cat(all_cls_preds, dim=1)
+                batch_pred_labels = torch.cat(all_pred_labels, dim=1)
+                data_dict['batch_pred_labels'] = batch_pred_labels
+                data_dict['has_class_labels'] = True
            data_dict['batch_cls_preds'] = batch_cls_preds
            data_dict['batch_box_preds'] = batch_box_preds
            data_dict['cls_preds_normalized'] = False
        return data_dict
+    def get_cls_layer_loss(self):
+        loss_weights = self.model_cfg.LOSS_CONFIG.LOSS_WEIGHTS
+        if 'pos_cls_weight' in loss_weights:
+            pos_cls_weight = loss_weights['pos_cls_weight']
+            neg_cls_weight = loss_weights['neg_cls_weight']
+        else:
+            pos_cls_weight = neg_cls_weight = 1.0
+        cls_preds = self.forward_ret_dict['cls_preds']
+        box_cls_labels = self.forward_ret_dict['box_cls_labels']
+        if not isinstance(cls_preds, list):
+            cls_preds = [cls_preds]
+        batch_size = int(cls_preds[0].shape[0])
+        cared = box_cls_labels >= 0  # [N, num_anchors]
+        positives = box_cls_labels > 0
+        negatives = box_cls_labels == 0
+        negative_cls_weights = negatives * 1.0 * neg_cls_weight
+        cls_weights = (negative_cls_weights + pos_cls_weight * positives).float()
+        reg_weights = positives.float()
+        if self.num_class == 1:
+            # class agnostic
+            box_cls_labels[positives] = 1
+        pos_normalizer = positives.sum(1, keepdim=True).float()
+        reg_weights /= torch.clamp(pos_normalizer, min=1.0)
+        cls_weights /= torch.clamp(pos_normalizer, min=1.0)
+        cls_targets = box_cls_labels * cared.type_as(box_cls_labels)
+        one_hot_targets = torch.zeros(
+            *list(cls_targets.shape), self.num_class + 1, dtype=cls_preds[0].dtype, device=cls_targets.device
+        )
+        one_hot_targets.scatter_(-1, cls_targets.unsqueeze(dim=-1).long(), 1.0)
+        one_hot_targets = one_hot_targets[..., 1:]
+        start_idx = c_idx = 0
+        cls_losses = 0
+        for idx, cls_pred in enumerate(cls_preds):
+            cur_num_class = self.rpn_heads[idx].num_class
+            cls_pred = cls_pred.view(batch_size, -1, cur_num_class)
+            if self.separate_multihead:
+                one_hot_target = one_hot_targets[:, start_idx:start_idx + cls_pred.shape[1],
+                                 c_idx:c_idx + cur_num_class]
+                c_idx += cur_num_class
+            else:
+                one_hot_target = one_hot_targets[:, start_idx:start_idx + cls_pred.shape[1]]
+            cls_weight = cls_weights[:, start_idx:start_idx + cls_pred.shape[1]]
+            cls_loss_src = self.cls_loss_func(cls_pred, one_hot_target, weights=cls_weight)  # [N, M]
+            cls_loss = cls_loss_src.sum() / batch_size
+            cls_loss = cls_loss * loss_weights['cls_weight']
+            cls_losses += cls_loss
+            start_idx += cls_pred.shape[1]
+        assert start_idx == one_hot_targets.shape[1]
+        tb_dict = {
+            'rpn_loss_cls': cls_losses.item()
+        }
+        return cls_losses, tb_dict
+    def get_box_reg_layer_loss(self):
+        box_preds = self.forward_ret_dict['box_preds']
+        box_dir_cls_preds = self.forward_ret_dict.get('dir_cls_preds', None)
+        box_reg_targets = self.forward_ret_dict['box_reg_targets']
+        box_cls_labels = self.forward_ret_dict['box_cls_labels']
+        positives = box_cls_labels > 0
+        reg_weights = positives.float()
+        pos_normalizer = positives.sum(1, keepdim=True).float()
+        reg_weights /= torch.clamp(pos_normalizer, min=1.0)
+        if not isinstance(box_preds, list):
+            box_preds = [box_preds]
+        batch_size = int(box_preds[0].shape[0])
+        if isinstance(self.anchors, list):
+            if self.use_multihead:
+                anchors = torch.cat(
+                    [anchor.permute(3, 4, 0, 1, 2, 5).contiguous().view(-1, anchor.shape[-1])
+                     for anchor in self.anchors], dim=0
+                )
+            else:
+                anchors = torch.cat(self.anchors, dim=-3)
+        else:
+            anchors = self.anchors
+        anchors = anchors.view(1, -1, anchors.shape[-1]).repeat(batch_size, 1, 1)
+        start_idx = 0
+        box_losses = 0
+        tb_dict = {}
+        for idx, box_pred in enumerate(box_preds):
+            box_pred = box_pred.view(
+                batch_size, -1,
+                box_pred.shape[-1] // self.num_anchors_per_location if not self.use_multihead else box_pred.shape[-1]
+            )
+            box_reg_target = box_reg_targets[:, start_idx:start_idx + box_pred.shape[1]]
+            reg_weight = reg_weights[:, start_idx:start_idx + box_pred.shape[1]]
+            # sin(a - b) = sinacosb-cosasinb
+            if box_dir_cls_preds is not None:
+                box_pred_sin, reg_target_sin = self.add_sin_difference(box_pred, box_reg_target)
+                loc_loss_src = self.reg_loss_func(box_pred_sin, reg_target_sin, weights=reg_weight)  # [N, M]
+            else:
+                loc_loss_src = self.reg_loss_func(box_pred, box_reg_target, weights=reg_weight)  # [N, M]
+            loc_loss = loc_loss_src.sum() / batch_size
+            loc_loss = loc_loss * self.model_cfg.LOSS_CONFIG.LOSS_WEIGHTS['loc_weight']
+            box_losses += loc_loss
+            tb_dict['rpn_loss_loc'] = tb_dict.get('rpn_loss_loc', 0) + loc_loss.item()
+            if box_dir_cls_preds is not None:
+                if not isinstance(box_dir_cls_preds, list):
+                    box_dir_cls_preds = [box_dir_cls_preds]
+                dir_targets = self.get_direction_target(
+                    anchors, box_reg_targets,
+                    dir_offset=self.model_cfg.DIR_OFFSET,
+                    num_bins=self.model_cfg.NUM_DIR_BINS
+                )
+                box_dir_cls_pred = box_dir_cls_preds[idx]
+                dir_logit = box_dir_cls_pred.view(batch_size, -1, self.model_cfg.NUM_DIR_BINS)
+                weights = positives.type_as(dir_logit)
+                weights /= torch.clamp(weights.sum(-1, keepdim=True), min=1.0)
+                weight = weights[:, start_idx:start_idx + box_pred.shape[1]]
+                dir_target = dir_targets[:, start_idx:start_idx + box_pred.shape[1]]
+                dir_loss = self.dir_loss_func(dir_logit, dir_target, weights=weight)
+                dir_loss = dir_loss.sum() / batch_size
+                dir_loss = dir_loss * self.model_cfg.LOSS_CONFIG.LOSS_WEIGHTS['dir_weight']
+                box_losses += dir_loss
+                tb_dict['rpn_loss_dir'] = tb_dict.get('rpn_loss_dir', 0) + dir_loss.item()
+            start_idx += box_pred.shape[1]
+        return box_losses, tb_dict
--- a/pcdet/models/dense_heads/anchor_head_template.py
+++ b/pcdet/models/dense_heads/anchor_head_template.py
@@ -14,44 +14,53 @@ class AnchorHeadTemplate(nn.Module):
        self.num_class = num_class
        self.class_names = class_names
        self.predict_boxes_when_training = predict_boxes_when_training
-        self.use_multihead = self.model_cfg.get('USE_MULTI_HEAD', False)
+        self.use_multihead = self.model_cfg.get('USE_MULTIHEAD', False)
        anchor_target_cfg = self.model_cfg.TARGET_ASSIGNER_CONFIG
        self.box_coder = getattr(box_coder_utils, anchor_target_cfg.BOX_CODER)(
-            num_dir_bins=anchor_target_cfg.get('NUM_DIR_BINS', 6)
+            num_dir_bins=anchor_target_cfg.get('NUM_DIR_BINS', 6),
+            **anchor_target_cfg.get('BOX_CODER_CONFIG', {})
        )
        anchor_generator_cfg = self.model_cfg.ANCHOR_GENERATOR_CONFIG
        anchors, self.num_anchors_per_location = self.generate_anchors(
-            anchor_generator_cfg, grid_size=grid_size, point_cloud_range=point_cloud_range
+            anchor_generator_cfg, grid_size=grid_size, point_cloud_range=point_cloud_range,
+            anchor_ndim=self.box_coder.code_size
        )
        self.anchors = [x.cuda() for x in anchors]
-        self.target_assigner = self.get_target_assigner(anchor_target_cfg, anchor_generator_cfg)
+        self.target_assigner = self.get_target_assigner(anchor_target_cfg)
        self.forward_ret_dict = {}
        self.build_losses(self.model_cfg.LOSS_CONFIG)
    @staticmethod
-    def generate_anchors(anchor_generator_cfg, grid_size, point_cloud_range):
+    def generate_anchors(anchor_generator_cfg, grid_size, point_cloud_range, anchor_ndim=7):
        anchor_generator = AnchorGenerator(
            anchor_range=point_cloud_range,
            anchor_generator_config=anchor_generator_cfg
        )
        feature_map_size = [grid_size[:2] // config['feature_map_stride'] for config in anchor_generator_cfg]
        anchors_list, num_anchors_per_location_list = anchor_generator.generate_anchors(feature_map_size)
+        if anchor_ndim != 7:
+            for idx, anchors in enumerate(anchors_list):
+                pad_zeros = anchors.new_zeros([*anchors.shape[0:-1], anchor_ndim - 7])
+                new_anchors = torch.cat((anchors, pad_zeros), dim=-1)
+                anchors_list[idx] = new_anchors
        return anchors_list, num_anchors_per_location_list
-    def get_target_assigner(self, anchor_target_cfg, anchor_generator_cfg):
+    def get_target_assigner(self, anchor_target_cfg):
        if anchor_target_cfg.NAME == 'ATSS':
            target_assigner = ATSSTargetAssigner(
                topk=anchor_target_cfg.TOPK,
                box_coder=self.box_coder,
+                use_multihead=self.use_multihead,
                match_height=anchor_target_cfg.MATCH_HEIGHT
            )
        elif anchor_target_cfg.NAME == 'AxisAlignedTargetAssigner':
            target_assigner = AxisAlignedTargetAssigner(
-                anchor_target_cfg=anchor_target_cfg,
+                model_cfg=self.model_cfg,
-                anchor_generator_cfg=anchor_generator_cfg,
                class_names=self.class_names,
                box_coder=self.box_coder,
                match_height=anchor_target_cfg.MATCH_HEIGHT
@@ -65,9 +74,11 @@ class AnchorHeadTemplate(nn.Module):
            'cls_loss_func',
            loss_utils.SigmoidFocalClassificationLoss(alpha=0.25, gamma=2.0)
        )
+        reg_loss_name = 'WeightedSmoothL1Loss' if losses_cfg.get('REG_LOSS_TYPE', None) is None \
+            else losses_cfg.REG_LOSS_TYPE
        self.add_module(
            'reg_loss_func',
-            loss_utils.WeightedSmoothL1Loss(code_weights=losses_cfg.LOSS_WEIGHTS['code_weights'])
+            getattr(loss_utils, reg_loss_name)(code_weights=losses_cfg.LOSS_WEIGHTS['code_weights'])
        )
        self.add_module(
            'dir_loss_func',
@@ -82,7 +93,7 @@ class AnchorHeadTemplate(nn.Module):
        """
        targets_dict = self.target_assigner.assign_targets(
-            self.anchors, gt_boxes, self.use_multihead
+            self.anchors, gt_boxes
        )
        return targets_dict
@@ -113,8 +124,6 @@ class AnchorHeadTemplate(nn.Module):
        one_hot_targets.scatter_(-1, cls_targets.unsqueeze(dim=-1).long(), 1.0)
        cls_preds = cls_preds.view(batch_size, -1, self.num_class)
        one_hot_targets = one_hot_targets[..., 1:]
-        # import pdb
-        # pdb.set_trace()
        cls_loss_src = self.cls_loss_func(cls_preds, one_hot_targets, weights=cls_weights)  # [N, M]
        cls_loss = cls_loss_src.sum() / batch_size
@@ -235,14 +244,17 @@ class AnchorHeadTemplate(nn.Module):
            anchors = self.anchors
        num_anchors = anchors.view(-1, anchors.shape[-1]).shape[0]
        batch_anchors = anchors.view(1, -1, anchors.shape[-1]).repeat(batch_size, 1, 1)
-        batch_cls_preds = cls_preds.view(batch_size, num_anchors, -1).float()
+        batch_cls_preds = cls_preds.view(batch_size, num_anchors, -1).float() \
-        batch_box_preds = box_preds.view(batch_size, num_anchors, -1)
+            if not isinstance(cls_preds, list) else cls_preds
+        batch_box_preds = box_preds.view(batch_size, num_anchors, -1) if not isinstance(box_preds, list) \
+            else torch.cat(box_preds, dim=1).view(batch_size, num_anchors, -1)
        batch_box_preds = self.box_coder.decode_torch(batch_box_preds, batch_anchors)
        if dir_cls_preds is not None:
            dir_offset = self.model_cfg.DIR_OFFSET
            dir_limit_offset = self.model_cfg.DIR_LIMIT_OFFSET
-            dir_cls_preds = dir_cls_preds.view(batch_size, num_anchors, -1)
+            dir_cls_preds = dir_cls_preds.view(batch_size, num_anchors, -1) if not isinstance(dir_cls_preds, list) \
+                else torch.cat(dir_cls_preds, dim=1).view(batch_size, num_anchors, -1)
            dir_labels = torch.max(dir_cls_preds, dim=-1)[1]
            period = (2 * np.pi / self.model_cfg.NUM_DIR_BINS)

--- a/pcdet/models/dense_heads/target_assigner/atss_target_assigner.py
+++ b/pcdet/models/dense_heads/target_assigner/atss_target_assigner.py
@@ -28,8 +28,8 @@ class ATSSTargetAssigner(object):
        cls_labels_list, reg_targets_list, reg_weights_list = [], [], []
        for anchors in anchors_list:
            batch_size = gt_boxes_with_classes.shape[0]
-            gt_classes = gt_boxes_with_classes[:, :, 7]
+            gt_classes = gt_boxes_with_classes[:, :, -1]
-            gt_boxes = gt_boxes_with_classes[:, :, :7]
+            gt_boxes = gt_boxes_with_classes[:, :, :-1]
            if use_multihead:
                anchors = anchors.permute(3, 4, 0, 1, 2, 5).contiguous().view(-1, anchors.shape[-1])
            else:

--- a/pcdet/models/dense_heads/target_assigner/axis_aligned_target_assigner.py
+++ b/pcdet/models/dense_heads/target_assigner/axis_aligned_target_assigner.py
@@ -5,8 +5,11 @@ from ....ops.iou3d_nms import iou3d_nms_utils
 class AxisAlignedTargetAssigner(object):
-    def __init__(self, anchor_target_cfg, anchor_generator_cfg, class_names, box_coder, match_height=False):
+    def __init__(self, model_cfg, class_names, box_coder, match_height=False):
        super().__init__()
+        anchor_generator_cfg = model_cfg.ANCHOR_GENERATOR_CONFIG
+        anchor_target_cfg = model_cfg.TARGET_ASSIGNER_CONFIG
        self.box_coder = box_coder
        self.match_height = match_height
        self.class_names = np.array(class_names)
@@ -19,8 +22,17 @@ class AxisAlignedTargetAssigner(object):
        for config in anchor_generator_cfg:
            self.matched_thresholds[config['class_name']] = config['matched_threshold']
            self.unmatched_thresholds[config['class_name']] = config['unmatched_threshold']
-    def assign_targets(self, all_anchors, gt_boxes_with_classes, use_multihead=False):
+        self.use_multihead = model_cfg.get('USE_MULTIHEAD', False)
+        self.seperate_multihead = model_cfg.get('SEPERATE_MULTIHEAD', False)
+        if self.seperate_multihead:
+            rpn_head_cfgs = model_cfg.RPN_HEAD_CFGS
+            self.gt_remapping = {}
+            for rpn_head_cfg in rpn_head_cfgs:
+                for idx, name in enumerate(rpn_head_cfg['HEAD_CLS_NAME']):
+                    self.gt_remapping[name] = idx + 1
+    def assign_targets(self, all_anchors, gt_boxes_with_classes):
        """
        Args:
            all_anchors: [(N, 7), ...]
@@ -30,13 +42,12 @@ class AxisAlignedTargetAssigner(object):
        """
        bbox_targets = []
-        bbox_src_targets = []
+        cls_labels = []
-        cls_labels = [] 
        reg_weights = []
        batch_size = gt_boxes_with_classes.shape[0]
-        gt_classes = gt_boxes_with_classes[:, :, 7]
+        gt_classes = gt_boxes_with_classes[:, :, -1]
-        gt_boxes = gt_boxes_with_classes[:, :, :7]
+        gt_boxes = gt_boxes_with_classes[:, :, :-1]
        for k in range(batch_size):
            cur_gt = gt_boxes[k]
            cnt = cur_gt.__len__() - 1
@@ -53,27 +64,36 @@ class AxisAlignedTargetAssigner(object):
                    mask = torch.tensor([self.class_names[c - 1] == anchor_class_name
                                         for c in cur_gt_classes], dtype=torch.bool)
-                if use_multihead:
+                if self.use_multihead:
                    anchors = anchors.permute(3, 4, 0, 1, 2, 5).contiguous().view(-1, anchors.shape[-1])
+                    if self.seperate_multihead:
+                        selected_classes = cur_gt_classes[mask].clone()
+                        if len(selected_classes) > 0:
+                            new_cls_id = self.gt_remapping[anchor_class_name]
+                            selected_classes[:] = new_cls_id
+                    else:
+                        selected_classes = cur_gt_classes[mask]
                else:
                    feature_map_size = anchors.shape[:3]
                    anchors = anchors.view(-1, anchors.shape[-1])
+                    selected_classes = cur_gt_classes[mask]
                single_target = self.assign_targets_single(
                    anchors,
                    cur_gt[mask],
-                    gt_classes=cur_gt_classes[mask],
+                    gt_classes=selected_classes,
                    matched_threshold=self.matched_thresholds[anchor_class_name],
                    unmatched_threshold=self.unmatched_thresholds[anchor_class_name]
                )
                target_list.append(single_target)
-            if use_multihead:
+            if self.use_multihead:
                target_dict = {
                    'box_cls_labels': [t['box_cls_labels'].view(-1) for t in target_list],
                    'box_reg_targets': [t['box_reg_targets'].view(-1, self.box_coder.code_size) for t in target_list],
                    'reg_weights': [t['reg_weights'].view(-1) for t in target_list]
                }
                target_dict['box_reg_targets'] = torch.cat(target_dict['box_reg_targets'], dim=0)
                target_dict['box_cls_labels'] = torch.cat(target_dict['box_cls_labels'], dim=0).view(-1)
                target_dict['reg_weights'] = torch.cat(target_dict['reg_weights'], dim=0).view(-1)
@@ -84,18 +104,19 @@ class AxisAlignedTargetAssigner(object):
                                        for t in target_list],
                    'reg_weights': [t['reg_weights'].view(*feature_map_size, -1) for t in target_list]
                }
+                target_dict['box_reg_targets'] = torch.cat(
-                target_dict['box_reg_targets'] = torch.cat(target_dict['box_reg_targets'],
+                    target_dict['box_reg_targets'], dim=-2
-                                                           dim=-2).view(-1, self.box_coder.code_size)
+                ).view(-1, self.box_coder.code_size)
                target_dict['box_cls_labels'] = torch.cat(target_dict['box_cls_labels'], dim=-1).view(-1)
                target_dict['reg_weights'] = torch.cat(target_dict['reg_weights'], dim=-1).view(-1)
            bbox_targets.append(target_dict['box_reg_targets'])
            cls_labels.append(target_dict['box_cls_labels'])
            reg_weights.append(target_dict['reg_weights'])
        bbox_targets = torch.stack(bbox_targets, dim=0)
        cls_labels = torch.stack(cls_labels, dim=0)
        reg_weights = torch.stack(reg_weights, dim=0)
        all_targets_dict = {
@@ -115,11 +136,10 @@ class AxisAlignedTargetAssigner(object):
        num_anchors = anchors.shape[0]
        num_gt = gt_boxes.shape[0]
-        # box_ndim = anchors.shape[1]
        labels = torch.ones((num_anchors,), dtype=torch.int32, device=anchors.device) * -1
        gt_ids = torch.ones((num_anchors,), dtype=torch.int32, device=anchors.device) * -1
        if len(gt_boxes) > 0 and anchors.shape[0] > 0:
            anchor_by_gt_overlap = iou3d_nms_utils.boxes_iou3d_gpu(anchors[:, 0:7], gt_boxes[:, 0:7]) \
                if self.match_height else box_utils.boxes3d_nearest_bev_iou(anchors[:, 0:7], gt_boxes[:, 0:7])
@@ -133,12 +153,12 @@ class AxisAlignedTargetAssigner(object):
            gt_to_anchor_max = anchor_by_gt_overlap[gt_to_anchor_argmax, torch.arange(num_gt, device=anchors.device)]
            empty_gt_mask = gt_to_anchor_max == 0
            gt_to_anchor_max[empty_gt_mask] = -1
            anchors_with_max_overlap = torch.nonzero(anchor_by_gt_overlap == gt_to_anchor_max)[:, 0]
            gt_inds_force = anchor_to_gt_argmax[anchors_with_max_overlap]
            labels[anchors_with_max_overlap] = gt_classes[gt_inds_force]
            gt_ids[anchors_with_max_overlap] = gt_inds_force.int()
            pos_inds = anchor_to_gt_max >= matched_threshold
            gt_inds_over_thresh = anchor_to_gt_argmax[pos_inds]
            labels[pos_inds] = gt_classes[gt_inds_over_thresh]
@@ -148,7 +168,7 @@ class AxisAlignedTargetAssigner(object):
            bg_inds = torch.arange(num_anchors, device=anchors.device)
        fg_inds = torch.nonzero(labels > 0)[:, 0]
        if self.pos_fraction is not None:
            num_fg = int(self.pos_fraction * self.sample_size)
            if len(fg_inds) > num_fg:
@@ -176,7 +196,7 @@ class AxisAlignedTargetAssigner(object):
            bbox_targets[fg_inds, :] = self.box_coder.encode_torch(fg_gt_boxes, fg_anchors)
        reg_weights = anchors.new_zeros((num_anchors,))
        if self.norm_by_num_examples:
            num_examples = (labels >= 0).sum()
            num_examples = num_examples if num_examples > 1.0 else 1.0

--- a/pcdet/models/detectors/detector3d_template.py
+++ b/pcdet/models/detectors/detector3d_template.py
@@ -172,7 +172,9 @@ class Detector3DTemplate(nn.Module):
                batch_box_preds: (B, num_boxes, 7+C) or (N1+N2+..., 7+C)
                cls_preds_normalized: indicate whether batch_cls_preds is normalized
                batch_index: optional (N1+N2+...)
+                has_class_labels: True/False
                roi_labels: (B, num_rois)  1 .. num_classes
+                batch_pred_labels: (B, num_boxes, 1)
        Returns:
        """
@@ -197,12 +199,15 @@ class Detector3DTemplate(nn.Module):
            if not batch_dict['cls_preds_normalized']:
                cls_preds = torch.sigmoid(cls_preds)
            if post_process_cfg.NMS_CONFIG.MULTI_CLASSES_NMS:
                raise NotImplementedError
            else:
                cls_preds, label_preds = torch.max(cls_preds, dim=-1)
-                label_preds = batch_dict['roi_labels'][index] if batch_dict.get('has_class_labels', False) else label_preds + 1
+                if batch_dict.get('has_class_labels', False):
+                    label_key = 'roi_labels' if 'roi_labels' in batch_dict else 'batch_pred_labels'
+                    label_preds = batch_dict[label_key][index]
+                else:
+                    label_preds + 1
                selected, selected_scores = class_agnostic_nms(
                    box_scores=cls_preds, box_preds=box_preds,
@@ -253,14 +258,14 @@ class Detector3DTemplate(nn.Module):
            k -= 1
        cur_gt = cur_gt[:k + 1]
-        if cur_gt.sum() > 0:
+        if cur_gt.shape[0] > 0:
            if box_preds.shape[0] > 0:
-                iou3d_rcnn = iou3d_nms_utils.boxes_iou3d_gpu(box_preds, cur_gt[:, 0:7])
+                iou3d_rcnn = iou3d_nms_utils.boxes_iou3d_gpu(box_preds[:, 0:7], cur_gt[:, 0:7])
            else:
                iou3d_rcnn = torch.zeros((0, cur_gt.shape[0]))
            if rois is not None:
-                iou3d_roi = iou3d_nms_utils.boxes_iou3d_gpu(rois, cur_gt[:, 0:7])
+                iou3d_roi = iou3d_nms_utils.boxes_iou3d_gpu(rois[:, 0:7], cur_gt[:, 0:7])
            for cur_thresh in thresh_list:
                if iou3d_rcnn.shape[0] == 0:

--- a/pcdet/models/model_utils/model_nms_utils.py
+++ b/pcdet/models/model_utils/model_nms_utils.py
@@ -14,7 +14,7 @@ def class_agnostic_nms(box_scores, box_preds, nms_config, score_thresh=None):
        box_scores_nms, indices = torch.topk(box_scores, k=min(nms_config.NMS_PRE_MAXSIZE, box_scores.shape[0]))
        boxes_for_nms = box_preds[indices]
        keep_idx, selected_scores = getattr(iou3d_nms_utils, nms_config.NMS_TYPE)(
-            boxes_for_nms, box_scores_nms, nms_config.NMS_THRESH, **nms_config
+                boxes_for_nms[:, 0:7], box_scores_nms, nms_config.NMS_THRESH, **nms_config
        )
        selected = indices[keep_idx[:nms_config.NMS_POST_MAXSIZE]]

--- a/pcdet/models/roi_heads/roi_head_template.py
+++ b/pcdet/models/roi_heads/roi_head_template.py
@@ -12,7 +12,9 @@ class RoIHeadTemplate(nn.Module):
        super().__init__()
        self.model_cfg = model_cfg
        self.num_class = num_class
-        self.box_coder = getattr(box_coder_utils, self.model_cfg.TARGET_CONFIG.BOX_CODER)()
+        self.box_coder = getattr(box_coder_utils, self.model_cfg.TARGET_CONFIG.BOX_CODER)(
+            **self.model_cfg.TARGET_CONFIG.get('BOX_CODER_CONFIG', {})
+        )
        self.proposal_target_layer = ProposalTargetLayer(roi_sampler_cfg=self.model_cfg.TARGET_CONFIG)
        self.build_losses(self.model_cfg.LOSS_CONFIG)
        self.forward_ret_dict = None

--- a/pcdet/utils/box_coder_utils.py
+++ b/pcdet/utils/box_coder_utils.py
@@ -3,16 +3,18 @@ import numpy as np
 class ResidualCoder(object):
-    def __init__(self, code_size=7, **kwargs):
+    def __init__(self, code_size=7, encode_angle_by_sincos=False, **kwargs):
        super().__init__()
        self.code_size = code_size
+        self.encode_angle_by_sincos = encode_angle_by_sincos
+        if self.encode_angle_by_sincos:
+            self.code_size += 1
-    @staticmethod
+    def encode_torch(self, boxes, anchors):
-    def encode_torch(boxes, anchors):
        """
        Args:
            boxes: (N, 7 + C) [x, y, z, dx, dy, dz, heading, ...]
-            anchors: (N, 7 + C) [x, y, z, dx, dy, dz, heading, ...]
+            anchors: (N, 7 + C) [x, y, z, dx, dy, dz, heading or *[cos, sin], ...]
        Returns:
@@ -30,23 +32,30 @@ class ResidualCoder(object):
        dxt = torch.log(dxg / dxa)
        dyt = torch.log(dyg / dya)
        dzt = torch.log(dzg / dza)
-        rt = rg - ra
+        if self.encode_angle_by_sincos:
+            rt_cos = torch.cos(rg) - torch.cos(ra)
+            rt_sin = torch.sin(rg) - torch.sin(ra)
+            rts = [rt_cos, rt_sin]
+        else:
+            rts = [rg - ra]
        cts = [g - a for g, a in zip(cgs, cas)]
-        return torch.cat([xt, yt, zt, dxt, dyt, dzt, rt, *cts], dim=-1)
+        return torch.cat([xt, yt, zt, dxt, dyt, dzt, *rts, *cts], dim=-1)
-    @staticmethod
+    def decode_torch(self, box_encodings, anchors):
-    def decode_torch(box_encodings, anchors):
        """
        Args:
-            box_encodings: (B, N, 7 + C) or (N, 7 + C) [x, y, z, dx, dy, dz, heading, ...]
+            box_encodings: (B, N, 7 + C) or (N, 7 + C) [x, y, z, dx, dy, dz, heading or *[cos, sin], ...]
            anchors: (B, N, 7 + C) or (N, 7 + C) [x, y, z, dx, dy, dz, heading, ...]
        Returns:
        """
        xa, ya, za, dxa, dya, dza, ra, *cas = torch.split(anchors, 1, dim=-1)
-        xt, yt, zt, dxt, dyt, dzt, rt, *cts = torch.split(box_encodings, 1, dim=-1)
+        if not self.encode_angle_by_sincos:
+            xt, yt, zt, dxt, dyt, dzt, rt, *cts = torch.split(box_encodings, 1, dim=-1)
+        else:
+            xt, yt, zt, dxt, dyt, dzt, cost, sint, *cts = torch.split(box_encodings, 1, dim=-1)
        diagonal = torch.sqrt(dxa ** 2 + dya ** 2)
        xg = xt * diagonal + xa
@@ -56,7 +65,13 @@ class ResidualCoder(object):
        dxg = torch.exp(dxt) * dxa
        dyg = torch.exp(dyt) * dya
        dzg = torch.exp(dzt) * dza
-        rg = rt + ra
+        if self.encode_angle_by_sincos:
+            rg_cos = cost + torch.cos(ra)
+            rg_sin = sint + torch.sin(ra)
+            rg = torch.atan2(rg_sin, rg_cos)
+        else:
+            rg = rt + ra
        cgs = [t + a for t, a in zip(cts, cas)]
        return torch.cat([xg, yg, zg, dxg, dyg, dzg, rg, *cgs], dim=-1)

--- a/pcdet/utils/loss_utils.py
+++ b/pcdet/utils/loss_utils.py
@@ -118,6 +118,8 @@ class WeightedSmoothL1Loss(nn.Module):
            loss: (B, #anchors) float tensor.
                Weighted smooth l1 loss without reduction.
        """
+        target = torch.where(torch.isnan(target), input, target)  # ignore nan targets
        diff = input - target
        # code-wise weighting
        if self.code_weights is not None:
@@ -133,6 +135,48 @@ class WeightedSmoothL1Loss(nn.Module):
        return loss
+class WeightedL1Loss(nn.Module):
+    def __init__(self, code_weights: list = None):
+        """
+        Args:
+            code_weights: (#codes) float list if not None.
+                Code-wise weights.
+        """
+        super(WeightedL1Loss, self).__init__()
+        if code_weights is not None:
+            self.code_weights = np.array(code_weights, dtype=np.float32)
+            self.code_weights = torch.from_numpy(self.code_weights).cuda()
+    def forward(self, input: torch.Tensor, target: torch.Tensor, weights: torch.Tensor = None):
+        """
+        Args:
+            input: (B, #anchors, #codes) float tensor.
+                Ecoded predicted locations of objects.
+            target: (B, #anchors, #codes) float tensor.
+                Regression targets.
+            weights: (B, #anchors) float tensor if not None.
+        Returns:
+            loss: (B, #anchors) float tensor.
+                Weighted smooth l1 loss without reduction.
+        """
+        target = torch.where(torch.isnan(target), input, target)  # ignore nan targets
+        diff = input - target
+        # code-wise weighting
+        if self.code_weights is not None:
+            diff = diff * self.code_weights.view(1, 1, -1)
+        loss = torch.abs(diff)
+        # anchor-wise weighting
+        if weights is not None:
+            assert weights.shape[0] == loss.shape[0] and weights.shape[1] == loss.shape[1]
+            loss = loss * weights.unsqueeze(-1)
+        return loss
 class WeightedCrossEntropyLoss(nn.Module):
    """
    Transform input to fit the fomation of PyTorch offical cross entropy loss
@@ -184,4 +228,4 @@ def get_corner_loss_lidar(pred_bbox3d: torch.Tensor, gt_bbox3d: torch.Tensor):
    # (N, 8)
    corner_loss = WeightedSmoothL1Loss.smooth_l1_loss(corner_dist, beta=1.0)
    return corner_loss.mean(dim=1)
\ No newline at end of file
--- a/tools/cfgs/dataset_configs/nuscenes_dataset.yaml
+++ b/tools/cfgs/dataset_configs/nuscenes_dataset.yaml
+DATASET: 'NuScenesDataset'
+DATA_PATH: '../data/nuscenes'
+VERSION: 'v1.0-trainval'
+MAX_SWEEPS: 10
+PRED_VELOCITY: True
+SET_NAN_VELOCITY_TO_ZEROS: True
+FILTER_MIN_POINTS_IN_GT: 1
+DATA_SPLIT: {
+    'train': train,
+    'test': val
+}
+INFO_PATH: {
+    'train': [nuscenes_infos_10sweeps_train.pkl],
+    'test': [nuscenes_infos_10sweeps_val.pkl],
+}
+POINT_CLOUD_RANGE: [-51.2, -51.2, -5.0, 51.2, 51.2, 3.0]
+BALANCED_RESAMPLING: True 
+DATA_AUGMENTOR:
+    DISABLE_AUG_LIST: ['placeholder']
+    AUG_CONFIG_LIST:
+        - NAME: gt_sampling
+          DB_INFO_PATH:
+              - nuscenes_dbinfos_10sweeps_withvelo.pkl
+          PREPARE: {
+             filter_by_min_points: [
+                 'car:5','truck:5', 'construction_vehicle:5', 'bus:5', 'trailer:5',
+                 'barrier:5', 'motorcycle:5', 'bicycle:5', 'pedestrian:5', 'traffic_cone:5'
+             ],
+          }
+          SAMPLE_GROUPS: [
+              'car:2','truck:3', 'construction_vehicle:7', 'bus:4', 'trailer:6',
+              'barrier:2', 'motorcycle:6', 'bicycle:6', 'pedestrian:2', 'traffic_cone:2'
+          ]
+          NUM_POINT_FEATURES: 5
+          DATABASE_WITH_FAKELIDAR: False
+          REMOVE_EXTRA_WIDTH: [0.0, 0.0, 0.0]
+          LIMIT_WHOLE_SCENE: True
+        - NAME: random_world_flip
+          ALONG_AXIS_LIST: ['x', 'y']
+        - NAME: random_world_rotation
+          WORLD_ROT_ANGLE: [-0.3925, 0.3925]
+        - NAME: random_world_scaling
+          WORLD_SCALE_RANGE: [0.95, 1.05]
+POINT_FEATURE_ENCODING: {
+    encoding_type: absolute_coordinates_encoding,
+    used_feature_list: ['x', 'y', 'z', 'intensity', 'timestamp'],
+    src_feature_list: ['x', 'y', 'z', 'intensity', 'timestamp'],
+}
+DATA_PROCESSOR:
+    - NAME: mask_points_and_boxes_outside_range
+      REMOVE_OUTSIDE_BOXES: True
+    - NAME: shuffle_points
+      SHUFFLE_ENABLED: {
+        'train': True,
+        'test': True
+      }
+    - NAME: transform_points_to_voxels
+      VOXEL_SIZE: [0.1, 0.1, 0.2]
+      MAX_POINTS_PER_VOXEL: 10
+      MAX_NUMBER_OF_VOXELS: {
+        'train': 60000,
+        'test': 60000
+      }
--- a/tools/cfgs/kitti_models/second_multihead.yaml
+++ b/tools/cfgs/kitti_models/second_multihead.yaml
@@ -34,7 +34,8 @@ MODEL:
        DIR_LIMIT_OFFSET: 0.0
        NUM_DIR_BINS: 2
-        USE_MULTI_HEAD: True
+        USE_MULTIHEAD: True
+        SEPARATE_MULTIHEAD: True
        ANCHOR_GENERATOR_CONFIG: [
            {
                'class_name': 'Car',
@@ -52,7 +53,7 @@ MODEL:
                'anchor_rotations': [0, 1.57],
                'anchor_bottom_heights': [-1.6],
                'align_center': False,
-                'feature_map_stride': 4,
+                'feature_map_stride': 8,
                'matched_threshold': 0.5,
                'unmatched_threshold': 0.35
            },
@@ -62,36 +63,23 @@ MODEL:
                'anchor_rotations': [0, 1.57],
                'anchor_bottom_heights': [-1.6],
                'align_center': False,
-                'feature_map_stride': 4,
+                'feature_map_stride': 8,
                'matched_threshold': 0.5,
                'unmatched_threshold': 0.35
            }
        ]
+        SHARED_CONV_NUM_FILTER: 64
        RPN_HEAD_CFGS: [
            {
                'HEAD_CLS_NAME': ['Car'],
-                'LAYER_NUMS': [1],
-                'LAYER_STRIDES': [1],
-                'NUM_FILTERS': [512],
-                'UPSAMPLE_STRIDES': [1],
-                'NUM_UPSAMPLE_FILTERS': [512]
            },
            {
                'HEAD_CLS_NAME': ['Pedestrian'],
-                'LAYER_NUMS': [1],
-                'LAYER_STRIDES': [1],
-                'NUM_FILTERS': [512],
-                'UPSAMPLE_STRIDES': [2],
-                'NUM_UPSAMPLE_FILTERS': [512]
            },
            {
                'HEAD_CLS_NAME': ['Cyclist'],
-                'LAYER_NUMS': [1],
-                'LAYER_STRIDES': [1],
-                'NUM_FILTERS': [512],
-                'UPSAMPLE_STRIDES': [2],
-                'NUM_UPSAMPLE_FILTERS': [512]
            }
        ]

--- a/tools/cfgs/nuscenes_models/cbgs.yaml
+++ b/tools/cfgs/nuscenes_models/cbgs.yaml
+CLASS_NAMES: ['car','truck', 'construction_vehicle', 'bus', 'trailer',
+              'barrier', 'motorcycle', 'bicycle', 'pedestrian', 'traffic_cone']
+DATA_CONFIG:
+    _BASE_CONFIG_: cfgs/dataset_configs/nuscenes_dataset.yaml
+MODEL:
+    NAME: SECONDNet
+    VFE:
+        NAME: MeanVFE
+    BACKBONE_3D:
+        NAME: VoxelResBackBone8x
+    MAP_TO_BEV:
+        NAME: HeightCompression
+        NUM_BEV_FEATURES: 256
+    BACKBONE_2D:
+        NAME: BaseBEVBackbone
+        LAYER_NUMS: [5, 5]
+        LAYER_STRIDES: [1, 2]
+        NUM_FILTERS: [128, 256]
+        UPSAMPLE_STRIDES: [1, 2]
+        NUM_UPSAMPLE_FILTERS: [256, 256]
+    DENSE_HEAD:
+        NAME: AnchorHeadMulti
+        CLASS_AGNOSTIC: False
+        USE_DIRECTION_CLASSIFIER: True
+        DIR_OFFSET: 0.78539
+        DIR_LIMIT_OFFSET: 0.0
+        NUM_DIR_BINS: 2
+        USE_MULTIHEAD: True
+        SEPARATE_MULTIHEAD: True
+        ANCHOR_GENERATOR_CONFIG: [
+            {
+                'class_name': car,
+                'anchor_sizes': [[4.63, 1.97, 1.74]],
+                'anchor_rotations': [0, 1.57],
+                'anchor_bottom_heights': [-0.95],
+                'align_center': False,
+                'feature_map_stride': 8,
+                'matched_threshold': 0.6,
+                'unmatched_threshold': 0.45
+            },
+            {
+                'class_name': truck,
+                'anchor_sizes': [[6.93, 2.51, 2.84]],
+                'anchor_rotations': [0, 1.57],
+                'anchor_bottom_heights': [-0.6],
+                'align_center': False,
+                'feature_map_stride': 8,
+                'matched_threshold': 0.55,
+                'unmatched_threshold': 0.4
+            },
+            {
+                'class_name': construction_vehicle,
+                'anchor_sizes': [[6.37, 2.85, 3.19]],
+                'anchor_rotations': [0, 1.57],
+                'anchor_bottom_heights': [-0.225],
+                'align_center': False,
+                'feature_map_stride': 8,
+                'matched_threshold': 0.5,
+                'unmatched_threshold': 0.35
+            },
+            {
+                'class_name': bus,
+                'anchor_sizes': [[10.5, 2.94, 3.47]],
+                'anchor_rotations': [0, 1.57],
+                'anchor_bottom_heights': [-0.085],
+                'align_center': False,
+                'feature_map_stride': 8,
+                'matched_threshold': 0.55,
+                'unmatched_threshold': 0.4
+            },
+            {
+                'class_name': trailer,
+                'anchor_sizes': [[12.29, 2.90, 3.87]],
+                'anchor_rotations': [0, 1.57],
+                'anchor_bottom_heights': [0.115],
+                'align_center': False,
+                'feature_map_stride': 8,
+                'matched_threshold': 0.5,
+                'unmatched_threshold': 0.35
+            },
+            {
+                'class_name': barrier,
+                'anchor_sizes': [[0.50, 2.53, 0.98]],
+                'anchor_rotations': [0, 1.57],
+                'anchor_bottom_heights': [-1.33],
+                'align_center': False,
+                'feature_map_stride': 8,
+                'matched_threshold': 0.55,
+                'unmatched_threshold': 0.4
+            },
+            {
+                'class_name': motorcycle,
+                'anchor_sizes': [[2.11, 0.77, 1.47]],
+                'anchor_rotations': [0, 1.57],
+                'anchor_bottom_heights': [-1.085],
+                'align_center': False,
+                'feature_map_stride': 8,
+                'matched_threshold': 0.5,
+                'unmatched_threshold': 0.3
+            },
+            {
+                'class_name': bicycle,
+                'anchor_sizes': [[1.70, 0.60, 1.28]],
+                'anchor_rotations': [0, 1.57],
+                'anchor_bottom_heights': [-1.18],
+                'align_center': False,
+                'feature_map_stride': 8,
+                'matched_threshold': 0.5,
+                'unmatched_threshold': 0.35
+            },
+            {
+                'class_name': pedestrian,
+                'anchor_sizes': [[0.73, 0.67, 1.77]],
+                'anchor_rotations': [0, 1.57],
+                'anchor_bottom_heights': [-0.935],
+                'align_center': False,
+                'feature_map_stride': 8,
+                'matched_threshold': 0.6,
+                'unmatched_threshold': 0.4
+            },
+            {
+                'class_name': traffic_cone,
+                'anchor_sizes': [[0.41, 0.41, 1.07]],
+                'anchor_rotations': [0, 1.57],
+                'anchor_bottom_heights': [-1.285],
+                'align_center': False,
+                'feature_map_stride': 8,
+                'matched_threshold': 0.6,
+                'unmatched_threshold': 0.4
+            },
+        ]
+        SHARED_CONV_NUM_FILTER: 64
+        RPN_HEAD_CFGS: [
+            {
+                'HEAD_CLS_NAME': ['car'],
+                'LAYER_NUMS': [1],
+                'LAYER_STRIDES': [1],
+                'NUM_FILTERS': [64],
+            },
+            {
+                'HEAD_CLS_NAME': ['truck', 'construction_vehicle'],
+                'LAYER_NUMS': [1],
+                'LAYER_STRIDES': [1],
+                'NUM_FILTERS': [64],
+            },
+            {
+                'HEAD_CLS_NAME': ['bus', 'trailer'],
+                'LAYER_NUMS': [1],
+                'LAYER_STRIDES': [1],
+                'NUM_FILTERS': [64],
+            },
+            {
+                'HEAD_CLS_NAME': ['barrier'],
+                'LAYER_NUMS': [1],
+                'LAYER_STRIDES': [1],
+                'NUM_FILTERS': [64],
+            },
+            {
+                'HEAD_CLS_NAME': ['motorcycle', 'bicycle'],
+                'LAYER_NUMS': [1],
+                'LAYER_STRIDES': [1],
+                'NUM_FILTERS': [64],
+            },
+            {
+                'HEAD_CLS_NAME': ['pedestrian', 'traffic_cone'],
+                'LAYER_NUMS': [1],
+                'LAYER_STRIDES': [1],
+                'NUM_FILTERS': [64],
+            },
+        ]
+        TARGET_ASSIGNER_CONFIG:
+            NAME: AxisAlignedTargetAssigner
+            POS_FRACTION: -1.0
+            SAMPLE_SIZE: 512
+            NORM_BY_NUM_EXAMPLES: False
+            MATCH_HEIGHT: False
+            BOX_CODER: ResidualCoder
+            BOX_CODER_CONFIG: {
+                'code_size': 9
+            }
+        LOSS_CONFIG:
+            REG_LOSS_TYPE: WeightedL1Loss
+            LOSS_WEIGHTS: {
+                'pos_cls_weight': 1.0,
+                'neg_cls_weight': 2.0,
+                'cls_weight': 1.0,
+                'loc_weight': 0.25,
+                'dir_weight': 0.2,
+                'code_weights': [1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 0.2, 0.2]
+            }
+    POST_PROCESSING:
+        RECALL_THRESH_LIST: [0.3, 0.5, 0.7]
+        SCORE_THRESH: 0.1
+        OUTPUT_RAW_SCORE: False
+        EVAL_METRIC: kitti
+        NMS_CONFIG:
+            MULTI_CLASSES_NMS: False
+            NMS_TYPE: nms_gpu
+            NMS_THRESH: 0.2
+            NMS_PRE_MAXSIZE: 1000
+            NMS_POST_MAXSIZE: 100
+OPTIMIZATION:
+    OPTIMIZER: adam_onecycle
+    LR: 0.003
+    WEIGHT_DECAY: 0.01
+    MOMENTUM: 0.9
+    MOMS: [0.95, 0.85]
+    PCT_START: 0.4
+    DIV_FACTOR: 10
+    DECAY_STEP_LIST: [35, 45]
+    LR_DECAY: 0.1
+    LR_CLIP: 0.0000001
+    LR_WARMUP: False
+    WARMUP_EPOCH: 1
+    GRAD_NORM_CLIP: 10
--- a/tools/cfgs/nuscenes_models/cbgs_1conv.yaml
+++ b/tools/cfgs/nuscenes_models/cbgs_1conv.yaml
+CLASS_NAMES: ['car','truck', 'construction_vehicle', 'bus', 'trailer',
+              'barrier', 'motorcycle', 'bicycle', 'pedestrian', 'traffic_cone']
+DATA_CONFIG:
+    _BASE_CONFIG_: cfgs/dataset_configs/nuscenes_dataset.yaml
+MODEL:
+    NAME: SECONDNet
+    VFE:
+        NAME: MeanVFE
+    BACKBONE_3D:
+        NAME: VoxelResBackBone8x
+    MAP_TO_BEV:
+        NAME: HeightCompression
+        NUM_BEV_FEATURES: 256
+    BACKBONE_2D:
+        NAME: BaseBEVBackbone
+        LAYER_NUMS: [5, 5]
+        LAYER_STRIDES: [1, 2]
+        NUM_FILTERS: [128, 256]
+        UPSAMPLE_STRIDES: [1, 2]
+        NUM_UPSAMPLE_FILTERS: [256, 256]
+    DENSE_HEAD:
+        NAME: AnchorHeadMulti
+        CLASS_AGNOSTIC: False
+        USE_DIRECTION_CLASSIFIER: True
+        DIR_OFFSET: 0.78539
+        DIR_LIMIT_OFFSET: 0.0
+        NUM_DIR_BINS: 2
+        USE_MULTIHEAD: True
+        SEPARATE_MULTIHEAD: True
+        ANCHOR_GENERATOR_CONFIG: [
+            {
+                'class_name': car,
+                'anchor_sizes': [[4.63, 1.97, 1.74]],
+                'anchor_rotations': [0, 1.57],
+                'anchor_bottom_heights': [-0.95],
+                'align_center': False,
+                'feature_map_stride': 8,
+                'matched_threshold': 0.6,
+                'unmatched_threshold': 0.45
+            },
+            {
+                'class_name': truck,
+                'anchor_sizes': [[6.93, 2.51, 2.84]],
+                'anchor_rotations': [0, 1.57],
+                'anchor_bottom_heights': [-0.6],
+                'align_center': False,
+                'feature_map_stride': 8,
+                'matched_threshold': 0.55,
+                'unmatched_threshold': 0.4
+            },
+            {
+                'class_name': construction_vehicle,
+                'anchor_sizes': [[6.37, 2.85, 3.19]],
+                'anchor_rotations': [0, 1.57],
+                'anchor_bottom_heights': [-0.225],
+                'align_center': False,
+                'feature_map_stride': 8,
+                'matched_threshold': 0.5,
+                'unmatched_threshold': 0.35
+            },
+            {
+                'class_name': bus,
+                'anchor_sizes': [[10.5, 2.94, 3.47]],
+                'anchor_rotations': [0, 1.57],
+                'anchor_bottom_heights': [-0.085],
+                'align_center': False,
+                'feature_map_stride': 8,
+                'matched_threshold': 0.55,
+                'unmatched_threshold': 0.4
+            },
+            {
+                'class_name': trailer,
+                'anchor_sizes': [[12.29, 2.90, 3.87]],
+                'anchor_rotations': [0, 1.57],
+                'anchor_bottom_heights': [0.115],
+                'align_center': False,
+                'feature_map_stride': 8,
+                'matched_threshold': 0.5,
+                'unmatched_threshold': 0.35
+            },
+            {
+                'class_name': barrier,
+                'anchor_sizes': [[0.50, 2.53, 0.98]],
+                'anchor_rotations': [0, 1.57],
+                'anchor_bottom_heights': [-1.33],
+                'align_center': False,
+                'feature_map_stride': 8,
+                'matched_threshold': 0.55,
+                'unmatched_threshold': 0.4
+            },
+            {
+                'class_name': motorcycle,
+                'anchor_sizes': [[2.11, 0.77, 1.47]],
+                'anchor_rotations': [0, 1.57],
+                'anchor_bottom_heights': [-1.085],
+                'align_center': False,
+                'feature_map_stride': 8,
+                'matched_threshold': 0.5,
+                'unmatched_threshold': 0.3
+            },
+            {
+                'class_name': bicycle,
+                'anchor_sizes': [[1.70, 0.60, 1.28]],
+                'anchor_rotations': [0, 1.57],
+                'anchor_bottom_heights': [-1.18],
+                'align_center': False,
+                'feature_map_stride': 8,
+                'matched_threshold': 0.5,
+                'unmatched_threshold': 0.35
+            },
+            {
+                'class_name': pedestrian,
+                'anchor_sizes': [[0.73, 0.67, 1.77]],
+                'anchor_rotations': [0, 1.57],
+                'anchor_bottom_heights': [-0.935],
+                'align_center': False,
+                'feature_map_stride': 8,
+                'matched_threshold': 0.6,
+                'unmatched_threshold': 0.4
+            },
+            {
+                'class_name': traffic_cone,
+                'anchor_sizes': [[0.41, 0.41, 1.07]],
+                'anchor_rotations': [0, 1.57],
+                'anchor_bottom_heights': [-1.285],
+                'align_center': False,
+                'feature_map_stride': 8,
+                'matched_threshold': 0.6,
+                'unmatched_threshold': 0.4
+            },
+        ]
+        SHARED_CONV_NUM_FILTER: 64
+        RPN_HEAD_CFGS: [
+            {
+                'HEAD_CLS_NAME': ['car'],
+                'LAYER_NUMS': [0],
+                'LAYER_STRIDES': [1],
+                'NUM_FILTERS': [64],
+            },
+            {
+                'HEAD_CLS_NAME': ['truck', 'construction_vehicle'],
+                'LAYER_NUMS': [0],
+                'LAYER_STRIDES': [1],
+                'NUM_FILTERS': [64],
+            },
+            {
+                'HEAD_CLS_NAME': ['bus', 'trailer'],
+                'LAYER_NUMS': [0],
+                'LAYER_STRIDES': [1],
+                'NUM_FILTERS': [64],
+            },
+            {
+                'HEAD_CLS_NAME': ['barrier'],
+                'LAYER_NUMS': [0],
+                'LAYER_STRIDES': [1],
+                'NUM_FILTERS': [64],
+            },
+            {
+                'HEAD_CLS_NAME': ['motorcycle', 'bicycle'],
+                'LAYER_NUMS': [0],
+                'LAYER_STRIDES': [1],
+                'NUM_FILTERS': [64],
+            },
+            {
+                'HEAD_CLS_NAME': ['pedestrian', 'traffic_cone'],
+                'LAYER_NUMS': [0],
+                'LAYER_STRIDES': [1],
+                'NUM_FILTERS': [64],
+            },
+        ]
+        TARGET_ASSIGNER_CONFIG:
+            NAME: AxisAlignedTargetAssigner
+            POS_FRACTION: -1.0
+            SAMPLE_SIZE: 512
+            NORM_BY_NUM_EXAMPLES: False
+            MATCH_HEIGHT: False
+            BOX_CODER: ResidualCoder
+            BOX_CODER_CONFIG: {
+                'code_size': 9
+            }
+        LOSS_CONFIG:
+            REG_LOSS_TYPE: WeightedL1Loss
+            LOSS_WEIGHTS: {
+                'pos_cls_weight': 1.0,
+                'neg_cls_weight': 2.0,
+                'cls_weight': 1.0,
+                'loc_weight': 0.25,
+                'dir_weight': 0.2,
+                'code_weights': [1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 0.2, 0.2]
+            }
+    POST_PROCESSING:
+        RECALL_THRESH_LIST: [0.3, 0.5, 0.7]
+        SCORE_THRESH: 0.1
+        OUTPUT_RAW_SCORE: False
+        EVAL_METRIC: kitti
+        NMS_CONFIG:
+            MULTI_CLASSES_NMS: False
+            NMS_TYPE: nms_gpu
+            NMS_THRESH: 0.2
+            NMS_PRE_MAXSIZE: 1000
+            NMS_POST_MAXSIZE: 100
+OPTIMIZATION:
+    OPTIMIZER: adam_onecycle
+    LR: 0.003
+    WEIGHT_DECAY: 0.01
+    MOMENTUM: 0.9
+    MOMS: [0.95, 0.85]
+    PCT_START: 0.4
+    DIV_FACTOR: 10
+    DECAY_STEP_LIST: [35, 45]
+    LR_DECAY: 0.1
+    LR_CLIP: 0.0000001
+    LR_WARMUP: False
+    WARMUP_EPOCH: 1
+    GRAD_NORM_CLIP: 10