add code

d2b71343 · 雍大凯 · 69e57885 · d2b71343 · d2b71343 · d2b71343
Commit d2b71343 authored Apr 08, 2026 by 雍大凯
20 changed files
--- a/docker-hub/FlashOCC/Flashocc/tools/data_converter/s3dis_data_utils.py
+++ b/docker-hub/FlashOCC/Flashocc/tools/data_converter/s3dis_data_utils.py
+# Copyright (c) OpenMMLab. All rights reserved.
+import os
+from concurrent import futures as futures
+from os import path as osp
+
+import mmcv
+import numpy as np
+
+
+class S3DISData(object):
+    """S3DIS data.
+
+    Generate s3dis infos for s3dis_converter.
+
+    Args:
+        root_path (str): Root path of the raw data.
+        split (str, optional): Set split type of the data. Default: 'Area_1'.
+    """
+
+    def __init__(self, root_path, split='Area_1'):
+        self.root_dir = root_path
+        self.split = split
+        self.data_dir = osp.join(root_path,
+                                 'Stanford3dDataset_v1.2_Aligned_Version')
+
+        # Following `GSDN <https://arxiv.org/abs/2006.12356>`_, use 5 furniture
+        # classes for detection: table, chair, sofa, bookcase, board.
+        self.cat_ids = np.array([7, 8, 9, 10, 11])
+        self.cat_ids2class = {
+            cat_id: i
+            for i, cat_id in enumerate(list(self.cat_ids))
+        }
+
+        assert split in [
+            'Area_1', 'Area_2', 'Area_3', 'Area_4', 'Area_5', 'Area_6'
+        ]
+        self.sample_id_list = os.listdir(osp.join(self.data_dir,
+                                                  split))  # conferenceRoom_1
+        for sample_id in self.sample_id_list:
+            if os.path.isfile(osp.join(self.data_dir, split, sample_id)):
+                self.sample_id_list.remove(sample_id)
+
+    def __len__(self):
+        return len(self.sample_id_list)
+
+    def get_infos(self, num_workers=4, has_label=True, sample_id_list=None):
+        """Get data infos.
+
+        This method gets information from the raw data.
+
+        Args:
+            num_workers (int, optional): Number of threads to be used.
+                Default: 4.
+            has_label (bool, optional): Whether the data has label.
+                Default: True.
+            sample_id_list (list[int], optional): Index list of the sample.
+                Default: None.
+
+        Returns:
+            infos (list[dict]): Information of the raw data.
+        """
+
+        def process_single_scene(sample_idx):
+            print(f'{self.split} sample_idx: {sample_idx}')
+            info = dict()
+            pc_info = {
+                'num_features': 6,
+                'lidar_idx': f'{self.split}_{sample_idx}'
+            }
+            info['point_cloud'] = pc_info
+            pts_filename = osp.join(self.root_dir, 's3dis_data',
+                                    f'{self.split}_{sample_idx}_point.npy')
+            pts_instance_mask_path = osp.join(
+                self.root_dir, 's3dis_data',
+                f'{self.split}_{sample_idx}_ins_label.npy')
+            pts_semantic_mask_path = osp.join(
+                self.root_dir, 's3dis_data',
+                f'{self.split}_{sample_idx}_sem_label.npy')
+
+            points = np.load(pts_filename).astype(np.float32)
+            pts_instance_mask = np.load(pts_instance_mask_path).astype(np.int)
+            pts_semantic_mask = np.load(pts_semantic_mask_path).astype(np.int)
+
+            mmcv.mkdir_or_exist(osp.join(self.root_dir, 'points'))
+            mmcv.mkdir_or_exist(osp.join(self.root_dir, 'instance_mask'))
+            mmcv.mkdir_or_exist(osp.join(self.root_dir, 'semantic_mask'))
+
+            points.tofile(
+                osp.join(self.root_dir, 'points',
+                         f'{self.split}_{sample_idx}.bin'))
+            pts_instance_mask.tofile(
+                osp.join(self.root_dir, 'instance_mask',
+                         f'{self.split}_{sample_idx}.bin'))
+            pts_semantic_mask.tofile(
+                osp.join(self.root_dir, 'semantic_mask',
+                         f'{self.split}_{sample_idx}.bin'))
+
+            info['pts_path'] = osp.join('points',
+                                        f'{self.split}_{sample_idx}.bin')
+            info['pts_instance_mask_path'] = osp.join(
+                'instance_mask', f'{self.split}_{sample_idx}.bin')
+            info['pts_semantic_mask_path'] = osp.join(
+                'semantic_mask', f'{self.split}_{sample_idx}.bin')
+            info['annos'] = self.get_bboxes(points, pts_instance_mask,
+                                            pts_semantic_mask)
+
+            return info
+
+        sample_id_list = sample_id_list if sample_id_list is not None \
+            else self.sample_id_list
+        with futures.ThreadPoolExecutor(num_workers) as executor:
+            infos = executor.map(process_single_scene, sample_id_list)
+        return list(infos)
+
+    def get_bboxes(self, points, pts_instance_mask, pts_semantic_mask):
+        """Convert instance masks to axis-aligned bounding boxes.
+
+        Args:
+            points (np.array): Scene points of shape (n, 6).
+            pts_instance_mask (np.ndarray): Instance labels of shape (n,).
+            pts_semantic_mask (np.ndarray): Semantic labels of shape (n,).
+
+        Returns:
+            dict: A dict containing detection infos with following keys:
+
+                - gt_boxes_upright_depth (np.ndarray): Bounding boxes
+                    of shape (n, 6)
+                - class (np.ndarray): Box labels of shape (n,)
+                - gt_num (int): Number of boxes.
+        """
+        bboxes, labels = [], []
+        for i in range(1, pts_instance_mask.max() + 1):
+            ids = pts_instance_mask == i
+            # check if all instance points have same semantic label
+            assert pts_semantic_mask[ids].min() == pts_semantic_mask[ids].max()
+            label = pts_semantic_mask[ids][0]
+            # keep only furniture objects
+            if label in self.cat_ids2class:
+                labels.append(self.cat_ids2class[pts_semantic_mask[ids][0]])
+                pts = points[:, :3][ids]
+                min_pts = pts.min(axis=0)
+                max_pts = pts.max(axis=0)
+                locations = (min_pts + max_pts) / 2
+                dimensions = max_pts - min_pts
+                bboxes.append(np.concatenate((locations, dimensions)))
+        annotation = dict()
+        # follow ScanNet and SUN RGB-D keys
+        annotation['gt_boxes_upright_depth'] = np.array(bboxes)
+        annotation['class'] = np.array(labels)
+        annotation['gt_num'] = len(labels)
+        return annotation
+
+
+class S3DISSegData(object):
+    """S3DIS dataset used to generate infos for semantic segmentation task.
+
+    Args:
+        data_root (str): Root path of the raw data.
+        ann_file (str): The generated scannet infos.
+        split (str, optional): Set split type of the data. Default: 'train'.
+        num_points (int, optional): Number of points in each data input.
+            Default: 8192.
+        label_weight_func (function, optional): Function to compute the
+            label weight. Default: None.
+    """
+
+    def __init__(self,
+                 data_root,
+                 ann_file,
+                 split='Area_1',
+                 num_points=4096,
+                 label_weight_func=None):
+        self.data_root = data_root
+        self.data_infos = mmcv.load(ann_file)
+        self.split = split
+        self.num_points = num_points
+
+        self.all_ids = np.arange(13)  # all possible ids
+        self.cat_ids = np.array([0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11,
+                                 12])  # used for seg task
+        self.ignore_index = len(self.cat_ids)
+
+        self.cat_id2class = np.ones((self.all_ids.shape[0],), dtype=np.int) * \
+            self.ignore_index
+        for i, cat_id in enumerate(self.cat_ids):
+            self.cat_id2class[cat_id] = i
+
+        # label weighting function is taken from
+        # https://github.com/charlesq34/pointnet2/blob/master/scannet/scannet_dataset.py#L24
+        self.label_weight_func = (lambda x: 1.0 / np.log(1.2 + x)) if \
+            label_weight_func is None else label_weight_func
+
+    def get_seg_infos(self):
+        scene_idxs, label_weight = self.get_scene_idxs_and_label_weight()
+        save_folder = osp.join(self.data_root, 'seg_info')
+        mmcv.mkdir_or_exist(save_folder)
+        np.save(
+            osp.join(save_folder, f'{self.split}_resampled_scene_idxs.npy'),
+            scene_idxs)
+        np.save(
+            osp.join(save_folder, f'{self.split}_label_weight.npy'),
+            label_weight)
+        print(f'{self.split} resampled scene index and label weight saved')
+
+    def _convert_to_label(self, mask):
+        """Convert class_id in loaded segmentation mask to label."""
+        if isinstance(mask, str):
+            if mask.endswith('npy'):
+                mask = np.load(mask)
+            else:
+                mask = np.fromfile(mask, dtype=np.int64)
+        label = self.cat_id2class[mask]
+        return label
+
+    def get_scene_idxs_and_label_weight(self):
+        """Compute scene_idxs for data sampling and label weight for loss
+        calculation.
+
+        We sample more times for scenes with more points. Label_weight is
+        inversely proportional to number of class points.
+        """
+        num_classes = len(self.cat_ids)
+        num_point_all = []
+        label_weight = np.zeros((num_classes + 1, ))  # ignore_index
+        for data_info in self.data_infos:
+            label = self._convert_to_label(
+                osp.join(self.data_root, data_info['pts_semantic_mask_path']))
+            num_point_all.append(label.shape[0])
+            class_count, _ = np.histogram(label, range(num_classes + 2))
+            label_weight += class_count
+
+        # repeat scene_idx for num_scene_point // num_sample_point times
+        sample_prob = np.array(num_point_all) / float(np.sum(num_point_all))
+        num_iter = int(np.sum(num_point_all) / float(self.num_points))
+        scene_idxs = []
+        for idx in range(len(self.data_infos)):
+            scene_idxs.extend([idx] * int(round(sample_prob[idx] * num_iter)))
+        scene_idxs = np.array(scene_idxs).astype(np.int32)
+
+        # calculate label weight, adopted from PointNet++
+        label_weight = label_weight[:-1].astype(np.float32)
+        label_weight = label_weight / label_weight.sum()
+        label_weight = self.label_weight_func(label_weight).astype(np.float32)
+
+        return scene_idxs, label_weight
--- a/docker-hub/FlashOCC/Flashocc/tools/data_converter/scannet_data_utils.py
+++ b/docker-hub/FlashOCC/Flashocc/tools/data_converter/scannet_data_utils.py
+# Copyright (c) OpenMMLab. All rights reserved.
+import os
+from concurrent import futures as futures
+from os import path as osp
+
+import mmcv
+import numpy as np
+
+
+class ScanNetData(object):
+    """ScanNet data.
+
+    Generate scannet infos for scannet_converter.
+
+    Args:
+        root_path (str): Root path of the raw data.
+        split (str, optional): Set split type of the data. Default: 'train'.
+    """
+
+    def __init__(self, root_path, split='train'):
+        self.root_dir = root_path
+        self.split = split
+        self.split_dir = osp.join(root_path)
+        self.classes = [
+            'cabinet', 'bed', 'chair', 'sofa', 'table', 'door', 'window',
+            'bookshelf', 'picture', 'counter', 'desk', 'curtain',
+            'refrigerator', 'showercurtrain', 'toilet', 'sink', 'bathtub',
+            'garbagebin'
+        ]
+        self.cat2label = {cat: self.classes.index(cat) for cat in self.classes}
+        self.label2cat = {self.cat2label[t]: t for t in self.cat2label}
+        self.cat_ids = np.array(
+            [3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 14, 16, 24, 28, 33, 34, 36, 39])
+        self.cat_ids2class = {
+            nyu40id: i
+            for i, nyu40id in enumerate(list(self.cat_ids))
+        }
+        assert split in ['train', 'val', 'test']
+        split_file = osp.join(self.root_dir, 'meta_data',
+                              f'scannetv2_{split}.txt')
+        mmcv.check_file_exist(split_file)
+        self.sample_id_list = mmcv.list_from_file(split_file)
+        self.test_mode = (split == 'test')
+
+    def __len__(self):
+        return len(self.sample_id_list)
+
+    def get_aligned_box_label(self, idx):
+        box_file = osp.join(self.root_dir, 'scannet_instance_data',
+                            f'{idx}_aligned_bbox.npy')
+        mmcv.check_file_exist(box_file)
+        return np.load(box_file)
+
+    def get_unaligned_box_label(self, idx):
+        box_file = osp.join(self.root_dir, 'scannet_instance_data',
+                            f'{idx}_unaligned_bbox.npy')
+        mmcv.check_file_exist(box_file)
+        return np.load(box_file)
+
+    def get_axis_align_matrix(self, idx):
+        matrix_file = osp.join(self.root_dir, 'scannet_instance_data',
+                               f'{idx}_axis_align_matrix.npy')
+        mmcv.check_file_exist(matrix_file)
+        return np.load(matrix_file)
+
+    def get_images(self, idx):
+        paths = []
+        path = osp.join(self.root_dir, 'posed_images', idx)
+        for file in sorted(os.listdir(path)):
+            if file.endswith('.jpg'):
+                paths.append(osp.join('posed_images', idx, file))
+        return paths
+
+    def get_extrinsics(self, idx):
+        extrinsics = []
+        path = osp.join(self.root_dir, 'posed_images', idx)
+        for file in sorted(os.listdir(path)):
+            if file.endswith('.txt') and not file == 'intrinsic.txt':
+                extrinsics.append(np.loadtxt(osp.join(path, file)))
+        return extrinsics
+
+    def get_intrinsics(self, idx):
+        matrix_file = osp.join(self.root_dir, 'posed_images', idx,
+                               'intrinsic.txt')
+        mmcv.check_file_exist(matrix_file)
+        return np.loadtxt(matrix_file)
+
+    def get_infos(self, num_workers=4, has_label=True, sample_id_list=None):
+        """Get data infos.
+
+        This method gets information from the raw data.
+
+        Args:
+            num_workers (int, optional): Number of threads to be used.
+                Default: 4.
+            has_label (bool, optional): Whether the data has label.
+                Default: True.
+            sample_id_list (list[int], optional): Index list of the sample.
+                Default: None.
+
+        Returns:
+            infos (list[dict]): Information of the raw data.
+        """
+
+        def process_single_scene(sample_idx):
+            print(f'{self.split} sample_idx: {sample_idx}')
+            info = dict()
+            pc_info = {'num_features': 6, 'lidar_idx': sample_idx}
+            info['point_cloud'] = pc_info
+            pts_filename = osp.join(self.root_dir, 'scannet_instance_data',
+                                    f'{sample_idx}_vert.npy')
+            points = np.load(pts_filename)
+            mmcv.mkdir_or_exist(osp.join(self.root_dir, 'points'))
+            points.tofile(
+                osp.join(self.root_dir, 'points', f'{sample_idx}.bin'))
+            info['pts_path'] = osp.join('points', f'{sample_idx}.bin')
+
+            # update with RGB image paths if exist
+            if os.path.exists(osp.join(self.root_dir, 'posed_images')):
+                info['intrinsics'] = self.get_intrinsics(sample_idx)
+                all_extrinsics = self.get_extrinsics(sample_idx)
+                all_img_paths = self.get_images(sample_idx)
+                # some poses in ScanNet are invalid
+                extrinsics, img_paths = [], []
+                for extrinsic, img_path in zip(all_extrinsics, all_img_paths):
+                    if np.all(np.isfinite(extrinsic)):
+                        img_paths.append(img_path)
+                        extrinsics.append(extrinsic)
+                info['extrinsics'] = extrinsics
+                info['img_paths'] = img_paths
+
+            if not self.test_mode:
+                pts_instance_mask_path = osp.join(
+                    self.root_dir, 'scannet_instance_data',
+                    f'{sample_idx}_ins_label.npy')
+                pts_semantic_mask_path = osp.join(
+                    self.root_dir, 'scannet_instance_data',
+                    f'{sample_idx}_sem_label.npy')
+
+                pts_instance_mask = np.load(pts_instance_mask_path).astype(
+                    np.int64)
+                pts_semantic_mask = np.load(pts_semantic_mask_path).astype(
+                    np.int64)
+
+                mmcv.mkdir_or_exist(osp.join(self.root_dir, 'instance_mask'))
+                mmcv.mkdir_or_exist(osp.join(self.root_dir, 'semantic_mask'))
+
+                pts_instance_mask.tofile(
+                    osp.join(self.root_dir, 'instance_mask',
+                             f'{sample_idx}.bin'))
+                pts_semantic_mask.tofile(
+                    osp.join(self.root_dir, 'semantic_mask',
+                             f'{sample_idx}.bin'))
+
+                info['pts_instance_mask_path'] = osp.join(
+                    'instance_mask', f'{sample_idx}.bin')
+                info['pts_semantic_mask_path'] = osp.join(
+                    'semantic_mask', f'{sample_idx}.bin')
+
+            if has_label:
+                annotations = {}
+                # box is of shape [k, 6 + class]
+                aligned_box_label = self.get_aligned_box_label(sample_idx)
+                unaligned_box_label = self.get_unaligned_box_label(sample_idx)
+                annotations['gt_num'] = aligned_box_label.shape[0]
+                if annotations['gt_num'] != 0:
+                    aligned_box = aligned_box_label[:, :-1]  # k, 6
+                    unaligned_box = unaligned_box_label[:, :-1]
+                    classes = aligned_box_label[:, -1]  # k
+                    annotations['name'] = np.array([
+                        self.label2cat[self.cat_ids2class[classes[i]]]
+                        for i in range(annotations['gt_num'])
+                    ])
+                    # default names are given to aligned bbox for compatibility
+                    # we also save unaligned bbox info with marked names
+                    annotations['location'] = aligned_box[:, :3]
+                    annotations['dimensions'] = aligned_box[:, 3:6]
+                    annotations['gt_boxes_upright_depth'] = aligned_box
+                    annotations['unaligned_location'] = unaligned_box[:, :3]
+                    annotations['unaligned_dimensions'] = unaligned_box[:, 3:6]
+                    annotations[
+                        'unaligned_gt_boxes_upright_depth'] = unaligned_box
+                    annotations['index'] = np.arange(
+                        annotations['gt_num'], dtype=np.int32)
+                    annotations['class'] = np.array([
+                        self.cat_ids2class[classes[i]]
+                        for i in range(annotations['gt_num'])
+                    ])
+                axis_align_matrix = self.get_axis_align_matrix(sample_idx)
+                annotations['axis_align_matrix'] = axis_align_matrix  # 4x4
+                info['annos'] = annotations
+            return info
+
+        sample_id_list = sample_id_list if sample_id_list is not None \
+            else self.sample_id_list
+        with futures.ThreadPoolExecutor(num_workers) as executor:
+            infos = executor.map(process_single_scene, sample_id_list)
+        return list(infos)
+
+
+class ScanNetSegData(object):
+    """ScanNet dataset used to generate infos for semantic segmentation task.
+
+    Args:
+        data_root (str): Root path of the raw data.
+        ann_file (str): The generated scannet infos.
+        split (str, optional): Set split type of the data. Default: 'train'.
+        num_points (int, optional): Number of points in each data input.
+            Default: 8192.
+        label_weight_func (function, optional): Function to compute the
+            label weight. Default: None.
+    """
+
+    def __init__(self,
+                 data_root,
+                 ann_file,
+                 split='train',
+                 num_points=8192,
+                 label_weight_func=None):
+        self.data_root = data_root
+        self.data_infos = mmcv.load(ann_file)
+        self.split = split
+        assert split in ['train', 'val', 'test']
+        self.num_points = num_points
+
+        self.all_ids = np.arange(41)  # all possible ids
+        self.cat_ids = np.array([
+            1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 14, 16, 24, 28, 33, 34, 36,
+            39
+        ])  # used for seg task
+        self.ignore_index = len(self.cat_ids)
+
+        self.cat_id2class = np.ones((self.all_ids.shape[0],), dtype=np.int) * \
+            self.ignore_index
+        for i, cat_id in enumerate(self.cat_ids):
+            self.cat_id2class[cat_id] = i
+
+        # label weighting function is taken from
+        # https://github.com/charlesq34/pointnet2/blob/master/scannet/scannet_dataset.py#L24
+        self.label_weight_func = (lambda x: 1.0 / np.log(1.2 + x)) if \
+            label_weight_func is None else label_weight_func
+
+    def get_seg_infos(self):
+        if self.split == 'test':
+            return
+        scene_idxs, label_weight = self.get_scene_idxs_and_label_weight()
+        save_folder = osp.join(self.data_root, 'seg_info')
+        mmcv.mkdir_or_exist(save_folder)
+        np.save(
+            osp.join(save_folder, f'{self.split}_resampled_scene_idxs.npy'),
+            scene_idxs)
+        np.save(
+            osp.join(save_folder, f'{self.split}_label_weight.npy'),
+            label_weight)
+        print(f'{self.split} resampled scene index and label weight saved')
+
+    def _convert_to_label(self, mask):
+        """Convert class_id in loaded segmentation mask to label."""
+        if isinstance(mask, str):
+            if mask.endswith('npy'):
+                mask = np.load(mask)
+            else:
+                mask = np.fromfile(mask, dtype=np.int64)
+        label = self.cat_id2class[mask]
+        return label
+
+    def get_scene_idxs_and_label_weight(self):
+        """Compute scene_idxs for data sampling and label weight for loss
+        calculation.
+
+        We sample more times for scenes with more points. Label_weight is
+        inversely proportional to number of class points.
+        """
+        num_classes = len(self.cat_ids)
+        num_point_all = []
+        label_weight = np.zeros((num_classes + 1, ))  # ignore_index
+        for data_info in self.data_infos:
+            label = self._convert_to_label(
+                osp.join(self.data_root, data_info['pts_semantic_mask_path']))
+            num_point_all.append(label.shape[0])
+            class_count, _ = np.histogram(label, range(num_classes + 2))
+            label_weight += class_count
+
+        # repeat scene_idx for num_scene_point // num_sample_point times
+        sample_prob = np.array(num_point_all) / float(np.sum(num_point_all))
+        num_iter = int(np.sum(num_point_all) / float(self.num_points))
+        scene_idxs = []
+        for idx in range(len(self.data_infos)):
+            scene_idxs.extend([idx] * int(round(sample_prob[idx] * num_iter)))
+        scene_idxs = np.array(scene_idxs).astype(np.int32)
+
+        # calculate label weight, adopted from PointNet++
+        label_weight = label_weight[:-1].astype(np.float32)
+        label_weight = label_weight / label_weight.sum()
+        label_weight = self.label_weight_func(label_weight).astype(np.float32)
+
+        return scene_idxs, label_weight
--- a/docker-hub/FlashOCC/Flashocc/tools/data_converter/sunrgbd_data_utils.py
+++ b/docker-hub/FlashOCC/Flashocc/tools/data_converter/sunrgbd_data_utils.py
+# Copyright (c) OpenMMLab. All rights reserved.
+from concurrent import futures as futures
+from os import path as osp
+
+import mmcv
+import numpy as np
+from scipy import io as sio
+
+
+def random_sampling(points, num_points, replace=None):
+    """Random sampling.
+
+    Sampling point cloud to a certain number of points.
+
+    Args:
+        points (ndarray): Point cloud.
+        num_points (int): The number of samples.
+        replace (bool): Whether the sample is with or without replacement.
+
+    Returns:
+        points (ndarray): Point cloud after sampling.
+    """
+    if num_points < 0:
+        return points
+    if replace is None:
+        replace = (points.shape[0] < num_points)
+    choices = np.random.choice(points.shape[0], num_points, replace=replace)
+    return points[choices]
+
+
+class SUNRGBDInstance(object):
+
+    def __init__(self, line):
+        data = line.split(' ')
+        data[1:] = [float(x) for x in data[1:]]
+        self.classname = data[0]
+        self.xmin = data[1]
+        self.ymin = data[2]
+        self.xmax = data[1] + data[3]
+        self.ymax = data[2] + data[4]
+        self.box2d = np.array([self.xmin, self.ymin, self.xmax, self.ymax])
+        self.centroid = np.array([data[5], data[6], data[7]])
+        self.width = data[8]
+        self.length = data[9]
+        self.height = data[10]
+        # data[9] is x_size (length), data[8] is y_size (width), data[10] is
+        # z_size (height) in our depth coordinate system,
+        # l corresponds to the size along the x axis
+        self.size = np.array([data[9], data[8], data[10]]) * 2
+        self.orientation = np.zeros((3, ))
+        self.orientation[0] = data[11]
+        self.orientation[1] = data[12]
+        self.heading_angle = np.arctan2(self.orientation[1],
+                                        self.orientation[0])
+        self.box3d = np.concatenate(
+            [self.centroid, self.size, self.heading_angle[None]])
+
+
+class SUNRGBDData(object):
+    """SUNRGBD data.
+
+    Generate scannet infos for sunrgbd_converter.
+
+    Args:
+        root_path (str): Root path of the raw data.
+        split (str, optional): Set split type of the data. Default: 'train'.
+        use_v1 (bool, optional): Whether to use v1. Default: False.
+        num_points (int, optional): Number of points to sample. Set to -1
+            to utilize all points. Defaults to -1.
+    """
+
+    def __init__(self, root_path, split='train', use_v1=False, num_points=-1):
+        self.root_dir = root_path
+        self.split = split
+        self.split_dir = osp.join(root_path, 'sunrgbd_trainval')
+        self.num_points = num_points
+        self.classes = [
+            'bed', 'table', 'sofa', 'chair', 'toilet', 'desk', 'dresser',
+            'night_stand', 'bookshelf', 'bathtub'
+        ]
+        self.cat2label = {cat: self.classes.index(cat) for cat in self.classes}
+        self.label2cat = {
+            label: self.classes[label]
+            for label in range(len(self.classes))
+        }
+        assert split in ['train', 'val', 'test']
+        split_file = osp.join(self.split_dir, f'{split}_data_idx.txt')
+        mmcv.check_file_exist(split_file)
+        self.sample_id_list = map(int, mmcv.list_from_file(split_file))
+        self.image_dir = osp.join(self.split_dir, 'image')
+        self.calib_dir = osp.join(self.split_dir, 'calib')
+        self.depth_dir = osp.join(self.split_dir, 'depth')
+        if use_v1:
+            self.label_dir = osp.join(self.split_dir, 'label_v1')
+        else:
+            self.label_dir = osp.join(self.split_dir, 'label')
+
+    def __len__(self):
+        return len(self.sample_id_list)
+
+    def get_image(self, idx):
+        img_filename = osp.join(self.image_dir, f'{idx:06d}.jpg')
+        return mmcv.imread(img_filename)
+
+    def get_image_shape(self, idx):
+        image = self.get_image(idx)
+        return np.array(image.shape[:2], dtype=np.int32)
+
+    def get_depth(self, idx):
+        depth_filename = osp.join(self.depth_dir, f'{idx:06d}.mat')
+        depth = sio.loadmat(depth_filename)['instance']
+        return depth
+
+    def get_calibration(self, idx):
+        calib_filepath = osp.join(self.calib_dir, f'{idx:06d}.txt')
+        lines = [line.rstrip() for line in open(calib_filepath)]
+        Rt = np.array([float(x) for x in lines[0].split(' ')])
+        Rt = np.reshape(Rt, (3, 3), order='F').astype(np.float32)
+        K = np.array([float(x) for x in lines[1].split(' ')])
+        K = np.reshape(K, (3, 3), order='F').astype(np.float32)
+        return K, Rt
+
+    def get_label_objects(self, idx):
+        label_filename = osp.join(self.label_dir, f'{idx:06d}.txt')
+        lines = [line.rstrip() for line in open(label_filename)]
+        objects = [SUNRGBDInstance(line) for line in lines]
+        return objects
+
+    def get_infos(self, num_workers=4, has_label=True, sample_id_list=None):
+        """Get data infos.
+
+        This method gets information from the raw data.
+
+        Args:
+            num_workers (int, optional): Number of threads to be used.
+                Default: 4.
+            has_label (bool, optional): Whether the data has label.
+                Default: True.
+            sample_id_list (list[int], optional): Index list of the sample.
+                Default: None.
+
+        Returns:
+            infos (list[dict]): Information of the raw data.
+        """
+
+        def process_single_scene(sample_idx):
+            print(f'{self.split} sample_idx: {sample_idx}')
+            # convert depth to points
+            pc_upright_depth = self.get_depth(sample_idx)
+            pc_upright_depth_subsampled = random_sampling(
+                pc_upright_depth, self.num_points)
+
+            info = dict()
+            pc_info = {'num_features': 6, 'lidar_idx': sample_idx}
+            info['point_cloud'] = pc_info
+
+            mmcv.mkdir_or_exist(osp.join(self.root_dir, 'points'))
+            pc_upright_depth_subsampled.tofile(
+                osp.join(self.root_dir, 'points', f'{sample_idx:06d}.bin'))
+
+            info['pts_path'] = osp.join('points', f'{sample_idx:06d}.bin')
+            img_path = osp.join('image', f'{sample_idx:06d}.jpg')
+            image_info = {
+                'image_idx': sample_idx,
+                'image_shape': self.get_image_shape(sample_idx),
+                'image_path': img_path
+            }
+            info['image'] = image_info
+
+            K, Rt = self.get_calibration(sample_idx)
+            calib_info = {'K': K, 'Rt': Rt}
+            info['calib'] = calib_info
+
+            if has_label:
+                obj_list = self.get_label_objects(sample_idx)
+                annotations = {}
+                annotations['gt_num'] = len([
+                    obj.classname for obj in obj_list
+                    if obj.classname in self.cat2label.keys()
+                ])
+                if annotations['gt_num'] != 0:
+                    annotations['name'] = np.array([
+                        obj.classname for obj in obj_list
+                        if obj.classname in self.cat2label.keys()
+                    ])
+                    annotations['bbox'] = np.concatenate([
+                        obj.box2d.reshape(1, 4) for obj in obj_list
+                        if obj.classname in self.cat2label.keys()
+                    ],
+                                                         axis=0)
+                    annotations['location'] = np.concatenate([
+                        obj.centroid.reshape(1, 3) for obj in obj_list
+                        if obj.classname in self.cat2label.keys()
+                    ],
+                                                             axis=0)
+                    annotations['dimensions'] = 2 * np.array([
+                        [obj.length, obj.width, obj.height] for obj in obj_list
+                        if obj.classname in self.cat2label.keys()
+                    ])  # lwh (depth) format
+                    annotations['rotation_y'] = np.array([
+                        obj.heading_angle for obj in obj_list
+                        if obj.classname in self.cat2label.keys()
+                    ])
+                    annotations['index'] = np.arange(
+                        len(obj_list), dtype=np.int32)
+                    annotations['class'] = np.array([
+                        self.cat2label[obj.classname] for obj in obj_list
+                        if obj.classname in self.cat2label.keys()
+                    ])
+                    annotations['gt_boxes_upright_depth'] = np.stack(
+                        [
+                            obj.box3d for obj in obj_list
+                            if obj.classname in self.cat2label.keys()
+                        ],
+                        axis=0)  # (K,8)
+                info['annos'] = annotations
+            return info
+
+        sample_id_list = sample_id_list if \
+            sample_id_list is not None else self.sample_id_list
+        with futures.ThreadPoolExecutor(num_workers) as executor:
+            infos = executor.map(process_single_scene, sample_id_list)
+        return list(infos)
--- a/docker-hub/FlashOCC/Flashocc/tools/data_converter/waymo_converter.py
+++ b/docker-hub/FlashOCC/Flashocc/tools/data_converter/waymo_converter.py
+# Copyright (c) OpenMMLab. All rights reserved.
+r"""Adapted from `Waymo to KITTI converter
+    <https://github.com/caizhongang/waymo_kitti_converter>`_.
+"""
+
+try:
+    from waymo_open_dataset import dataset_pb2
+except ImportError:
+    raise ImportError(
+        'Please run "pip install waymo-open-dataset-tf-2-1-0==1.2.0" '
+        'to install the official devkit first.')
+
+from glob import glob
+from os.path import join
+
+import mmcv
+import numpy as np
+import tensorflow as tf
+from waymo_open_dataset.utils import range_image_utils, transform_utils
+from waymo_open_dataset.utils.frame_utils import \
+    parse_range_image_and_camera_projection
+
+
+class Waymo2KITTI(object):
+    """Waymo to KITTI converter.
+
+    This class serves as the converter to change the waymo raw data to KITTI
+    format.
+
+    Args:
+        load_dir (str): Directory to load waymo raw data.
+        save_dir (str): Directory to save data in KITTI format.
+        prefix (str): Prefix of filename. In general, 0 for training, 1 for
+            validation and 2 for testing.
+        workers (int, optional): Number of workers for the parallel process.
+        test_mode (bool, optional): Whether in the test_mode. Default: False.
+    """
+
+    def __init__(self,
+                 load_dir,
+                 save_dir,
+                 prefix,
+                 workers=64,
+                 test_mode=False):
+        self.filter_empty_3dboxes = True
+        self.filter_no_label_zone_points = True
+
+        self.selected_waymo_classes = ['VEHICLE', 'PEDESTRIAN', 'CYCLIST']
+
+        # Only data collected in specific locations will be converted
+        # If set None, this filter is disabled
+        # Available options: location_sf (main dataset)
+        self.selected_waymo_locations = None
+        self.save_track_id = False
+
+        # turn on eager execution for older tensorflow versions
+        if int(tf.__version__.split('.')[0]) < 2:
+            tf.enable_eager_execution()
+
+        self.lidar_list = [
+            '_FRONT', '_FRONT_RIGHT', '_FRONT_LEFT', '_SIDE_RIGHT',
+            '_SIDE_LEFT'
+        ]
+        self.type_list = [
+            'UNKNOWN', 'VEHICLE', 'PEDESTRIAN', 'SIGN', 'CYCLIST'
+        ]
+        self.waymo_to_kitti_class_map = {
+            'UNKNOWN': 'DontCare',
+            'PEDESTRIAN': 'Pedestrian',
+            'VEHICLE': 'Car',
+            'CYCLIST': 'Cyclist',
+            'SIGN': 'Sign'  # not in kitti
+        }
+
+        self.load_dir = load_dir
+        self.save_dir = save_dir
+        self.prefix = prefix
+        self.workers = int(workers)
+        self.test_mode = test_mode
+
+        self.tfrecord_pathnames = sorted(
+            glob(join(self.load_dir, '*.tfrecord')))
+
+        self.label_save_dir = f'{self.save_dir}/label_'
+        self.label_all_save_dir = f'{self.save_dir}/label_all'
+        self.image_save_dir = f'{self.save_dir}/image_'
+        self.calib_save_dir = f'{self.save_dir}/calib'
+        self.point_cloud_save_dir = f'{self.save_dir}/velodyne'
+        self.pose_save_dir = f'{self.save_dir}/pose'
+        self.timestamp_save_dir = f'{self.save_dir}/timestamp'
+
+        self.create_folder()
+
+    def convert(self):
+        """Convert action."""
+        print('Start converting ...')
+        mmcv.track_parallel_progress(self.convert_one, range(len(self)),
+                                     self.workers)
+        print('\nFinished ...')
+
+    def convert_one(self, file_idx):
+        """Convert action for single file.
+
+        Args:
+            file_idx (int): Index of the file to be converted.
+        """
+        pathname = self.tfrecord_pathnames[file_idx]
+        dataset = tf.data.TFRecordDataset(pathname, compression_type='')
+
+        for frame_idx, data in enumerate(dataset):
+
+            frame = dataset_pb2.Frame()
+            frame.ParseFromString(bytearray(data.numpy()))
+            if (self.selected_waymo_locations is not None
+                    and frame.context.stats.location
+                    not in self.selected_waymo_locations):
+                continue
+
+            self.save_image(frame, file_idx, frame_idx)
+            self.save_calib(frame, file_idx, frame_idx)
+            self.save_lidar(frame, file_idx, frame_idx)
+            self.save_pose(frame, file_idx, frame_idx)
+            self.save_timestamp(frame, file_idx, frame_idx)
+
+            if not self.test_mode:
+                self.save_label(frame, file_idx, frame_idx)
+
+    def __len__(self):
+        """Length of the filename list."""
+        return len(self.tfrecord_pathnames)
+
+    def save_image(self, frame, file_idx, frame_idx):
+        """Parse and save the images in png format.
+
+        Args:
+            frame (:obj:`Frame`): Open dataset frame proto.
+            file_idx (int): Current file index.
+            frame_idx (int): Current frame index.
+        """
+        for img in frame.images:
+            img_path = f'{self.image_save_dir}{str(img.name - 1)}/' + \
+                f'{self.prefix}{str(file_idx).zfill(3)}' + \
+                f'{str(frame_idx).zfill(3)}.png'
+            img = mmcv.imfrombytes(img.image)
+            mmcv.imwrite(img, img_path)
+
+    def save_calib(self, frame, file_idx, frame_idx):
+        """Parse and save the calibration data.
+
+        Args:
+            frame (:obj:`Frame`): Open dataset frame proto.
+            file_idx (int): Current file index.
+            frame_idx (int): Current frame index.
+        """
+        # waymo front camera to kitti reference camera
+        T_front_cam_to_ref = np.array([[0.0, -1.0, 0.0], [0.0, 0.0, -1.0],
+                                       [1.0, 0.0, 0.0]])
+        camera_calibs = []
+        R0_rect = [f'{i:e}' for i in np.eye(3).flatten()]
+        Tr_velo_to_cams = []
+        calib_context = ''
+
+        for camera in frame.context.camera_calibrations:
+            # extrinsic parameters
+            T_cam_to_vehicle = np.array(camera.extrinsic.transform).reshape(
+                4, 4)
+            T_vehicle_to_cam = np.linalg.inv(T_cam_to_vehicle)
+            Tr_velo_to_cam = \
+                self.cart_to_homo(T_front_cam_to_ref) @ T_vehicle_to_cam
+            if camera.name == 1:  # FRONT = 1, see dataset.proto for details
+                self.T_velo_to_front_cam = Tr_velo_to_cam.copy()
+            Tr_velo_to_cam = Tr_velo_to_cam[:3, :].reshape((12, ))
+            Tr_velo_to_cams.append([f'{i:e}' for i in Tr_velo_to_cam])
+
+            # intrinsic parameters
+            camera_calib = np.zeros((3, 4))
+            camera_calib[0, 0] = camera.intrinsic[0]
+            camera_calib[1, 1] = camera.intrinsic[1]
+            camera_calib[0, 2] = camera.intrinsic[2]
+            camera_calib[1, 2] = camera.intrinsic[3]
+            camera_calib[2, 2] = 1
+            camera_calib = list(camera_calib.reshape(12))
+            camera_calib = [f'{i:e}' for i in camera_calib]
+            camera_calibs.append(camera_calib)
+
+        # all camera ids are saved as id-1 in the result because
+        # camera 0 is unknown in the proto
+        for i in range(5):
+            calib_context += 'P' + str(i) + ': ' + \
+                ' '.join(camera_calibs[i]) + '\n'
+        calib_context += 'R0_rect' + ': ' + ' '.join(R0_rect) + '\n'
+        for i in range(5):
+            calib_context += 'Tr_velo_to_cam_' + str(i) + ': ' + \
+                ' '.join(Tr_velo_to_cams[i]) + '\n'
+
+        with open(
+                f'{self.calib_save_dir}/{self.prefix}' +
+                f'{str(file_idx).zfill(3)}{str(frame_idx).zfill(3)}.txt',
+                'w+') as fp_calib:
+            fp_calib.write(calib_context)
+            fp_calib.close()
+
+    def save_lidar(self, frame, file_idx, frame_idx):
+        """Parse and save the lidar data in psd format.
+
+        Args:
+            frame (:obj:`Frame`): Open dataset frame proto.
+            file_idx (int): Current file index.
+            frame_idx (int): Current frame index.
+        """
+        range_images, camera_projections, range_image_top_pose = \
+            parse_range_image_and_camera_projection(frame)
+
+        # First return
+        points_0, cp_points_0, intensity_0, elongation_0, mask_indices_0 = \
+            self.convert_range_image_to_point_cloud(
+                frame,
+                range_images,
+                camera_projections,
+                range_image_top_pose,
+                ri_index=0
+            )
+        points_0 = np.concatenate(points_0, axis=0)
+        intensity_0 = np.concatenate(intensity_0, axis=0)
+        elongation_0 = np.concatenate(elongation_0, axis=0)
+        mask_indices_0 = np.concatenate(mask_indices_0, axis=0)
+
+        # Second return
+        points_1, cp_points_1, intensity_1, elongation_1, mask_indices_1 = \
+            self.convert_range_image_to_point_cloud(
+                frame,
+                range_images,
+                camera_projections,
+                range_image_top_pose,
+                ri_index=1
+            )
+        points_1 = np.concatenate(points_1, axis=0)
+        intensity_1 = np.concatenate(intensity_1, axis=0)
+        elongation_1 = np.concatenate(elongation_1, axis=0)
+        mask_indices_1 = np.concatenate(mask_indices_1, axis=0)
+
+        points = np.concatenate([points_0, points_1], axis=0)
+        intensity = np.concatenate([intensity_0, intensity_1], axis=0)
+        elongation = np.concatenate([elongation_0, elongation_1], axis=0)
+        mask_indices = np.concatenate([mask_indices_0, mask_indices_1], axis=0)
+
+        # timestamp = frame.timestamp_micros * np.ones_like(intensity)
+
+        # concatenate x,y,z, intensity, elongation, timestamp (6-dim)
+        point_cloud = np.column_stack(
+            (points, intensity, elongation, mask_indices))
+
+        pc_path = f'{self.point_cloud_save_dir}/{self.prefix}' + \
+            f'{str(file_idx).zfill(3)}{str(frame_idx).zfill(3)}.bin'
+        point_cloud.astype(np.float32).tofile(pc_path)
+
+    def save_label(self, frame, file_idx, frame_idx):
+        """Parse and save the label data in txt format.
+        The relation between waymo and kitti coordinates is noteworthy:
+        1. x, y, z correspond to l, w, h (waymo) -> l, h, w (kitti)
+        2. x-y-z: front-left-up (waymo) -> right-down-front(kitti)
+        3. bbox origin at volumetric center (waymo) -> bottom center (kitti)
+        4. rotation: +x around y-axis (kitti) -> +x around z-axis (waymo)
+
+        Args:
+            frame (:obj:`Frame`): Open dataset frame proto.
+            file_idx (int): Current file index.
+            frame_idx (int): Current frame index.
+        """
+        fp_label_all = open(
+            f'{self.label_all_save_dir}/{self.prefix}' +
+            f'{str(file_idx).zfill(3)}{str(frame_idx).zfill(3)}.txt', 'w+')
+        id_to_bbox = dict()
+        id_to_name = dict()
+        for labels in frame.projected_lidar_labels:
+            name = labels.name
+            for label in labels.labels:
+                # TODO: need a workaround as bbox may not belong to front cam
+                bbox = [
+                    label.box.center_x - label.box.length / 2,
+                    label.box.center_y - label.box.width / 2,
+                    label.box.center_x + label.box.length / 2,
+                    label.box.center_y + label.box.width / 2
+                ]
+                id_to_bbox[label.id] = bbox
+                id_to_name[label.id] = name - 1
+
+        for obj in frame.laser_labels:
+            bounding_box = None
+            name = None
+            id = obj.id
+            for lidar in self.lidar_list:
+                if id + lidar in id_to_bbox:
+                    bounding_box = id_to_bbox.get(id + lidar)
+                    name = str(id_to_name.get(id + lidar))
+                    break
+
+            if bounding_box is None or name is None:
+                name = '0'
+                bounding_box = (0, 0, 0, 0)
+
+            my_type = self.type_list[obj.type]
+
+            if my_type not in self.selected_waymo_classes:
+                continue
+
+            if self.filter_empty_3dboxes and obj.num_lidar_points_in_box < 1:
+                continue
+
+            my_type = self.waymo_to_kitti_class_map[my_type]
+
+            height = obj.box.height
+            width = obj.box.width
+            length = obj.box.length
+
+            x = obj.box.center_x
+            y = obj.box.center_y
+            z = obj.box.center_z - height / 2
+
+            # project bounding box to the virtual reference frame
+            pt_ref = self.T_velo_to_front_cam @ \
+                np.array([x, y, z, 1]).reshape((4, 1))
+            x, y, z, _ = pt_ref.flatten().tolist()
+
+            rotation_y = -obj.box.heading - np.pi / 2
+            track_id = obj.id
+
+            # not available
+            truncated = 0
+            occluded = 0
+            alpha = -10
+
+            line = my_type + \
+                ' {} {} {} {} {} {} {} {} {} {} {} {} {} {}\n'.format(
+                    round(truncated, 2), occluded, round(alpha, 2),
+                    round(bounding_box[0], 2), round(bounding_box[1], 2),
+                    round(bounding_box[2], 2), round(bounding_box[3], 2),
+                    round(height, 2), round(width, 2), round(length, 2),
+                    round(x, 2), round(y, 2), round(z, 2),
+                    round(rotation_y, 2))
+
+            if self.save_track_id:
+                line_all = line[:-1] + ' ' + name + ' ' + track_id + '\n'
+            else:
+                line_all = line[:-1] + ' ' + name + '\n'
+
+            fp_label = open(
+                f'{self.label_save_dir}{name}/{self.prefix}' +
+                f'{str(file_idx).zfill(3)}{str(frame_idx).zfill(3)}.txt', 'a')
+            fp_label.write(line)
+            fp_label.close()
+
+            fp_label_all.write(line_all)
+
+        fp_label_all.close()
+
+    def save_pose(self, frame, file_idx, frame_idx):
+        """Parse and save the pose data.
+
+        Note that SDC's own pose is not included in the regular training
+        of KITTI dataset. KITTI raw dataset contains ego motion files
+        but are not often used. Pose is important for algorithms that
+        take advantage of the temporal information.
+
+        Args:
+            frame (:obj:`Frame`): Open dataset frame proto.
+            file_idx (int): Current file index.
+            frame_idx (int): Current frame index.
+        """
+        pose = np.array(frame.pose.transform).reshape(4, 4)
+        np.savetxt(
+            join(f'{self.pose_save_dir}/{self.prefix}' +
+                 f'{str(file_idx).zfill(3)}{str(frame_idx).zfill(3)}.txt'),
+            pose)
+
+    def save_timestamp(self, frame, file_idx, frame_idx):
+        """Save the timestamp data in a separate file instead of the
+        pointcloud.
+
+        Note that SDC's own pose is not included in the regular training
+        of KITTI dataset. KITTI raw dataset contains ego motion files
+        but are not often used. Pose is important for algorithms that
+        take advantage of the temporal information.
+
+        Args:
+            frame (:obj:`Frame`): Open dataset frame proto.
+            file_idx (int): Current file index.
+            frame_idx (int): Current frame index.
+        """
+        with open(
+                join(f'{self.timestamp_save_dir}/{self.prefix}' +
+                     f'{str(file_idx).zfill(3)}{str(frame_idx).zfill(3)}.txt'),
+                'w') as f:
+            f.write(str(frame.timestamp_micros))
+
+    def create_folder(self):
+        """Create folder for data preprocessing."""
+        if not self.test_mode:
+            dir_list1 = [
+                self.label_all_save_dir, self.calib_save_dir,
+                self.point_cloud_save_dir, self.pose_save_dir,
+                self.timestamp_save_dir
+            ]
+            dir_list2 = [self.label_save_dir, self.image_save_dir]
+        else:
+            dir_list1 = [
+                self.calib_save_dir, self.point_cloud_save_dir,
+                self.pose_save_dir, self.timestamp_save_dir
+            ]
+            dir_list2 = [self.image_save_dir]
+        for d in dir_list1:
+            mmcv.mkdir_or_exist(d)
+        for d in dir_list2:
+            for i in range(5):
+                mmcv.mkdir_or_exist(f'{d}{str(i)}')
+
+    def convert_range_image_to_point_cloud(self,
+                                           frame,
+                                           range_images,
+                                           camera_projections,
+                                           range_image_top_pose,
+                                           ri_index=0):
+        """Convert range images to point cloud.
+
+        Args:
+            frame (:obj:`Frame`): Open dataset frame.
+            range_images (dict): Mapping from laser_name to list of two
+                range images corresponding with two returns.
+            camera_projections (dict): Mapping from laser_name to list of two
+                camera projections corresponding with two returns.
+            range_image_top_pose (:obj:`Transform`): Range image pixel pose for
+                top lidar.
+            ri_index (int, optional): 0 for the first return,
+                1 for the second return. Default: 0.
+
+        Returns:
+            tuple[list[np.ndarray]]: (List of points with shape [N, 3],
+                camera projections of points with shape [N, 6], intensity
+                with shape [N, 1], elongation with shape [N, 1], points'
+                position in the depth map (element offset if points come from
+                the main lidar otherwise -1) with shape[N, 1]). All the
+                lists have the length of lidar numbers (5).
+        """
+        calibrations = sorted(
+            frame.context.laser_calibrations, key=lambda c: c.name)
+        points = []
+        cp_points = []
+        intensity = []
+        elongation = []
+        mask_indices = []
+
+        frame_pose = tf.convert_to_tensor(
+            value=np.reshape(np.array(frame.pose.transform), [4, 4]))
+        # [H, W, 6]
+        range_image_top_pose_tensor = tf.reshape(
+            tf.convert_to_tensor(value=range_image_top_pose.data),
+            range_image_top_pose.shape.dims)
+        # [H, W, 3, 3]
+        range_image_top_pose_tensor_rotation = \
+            transform_utils.get_rotation_matrix(
+                range_image_top_pose_tensor[..., 0],
+                range_image_top_pose_tensor[..., 1],
+                range_image_top_pose_tensor[..., 2])
+        range_image_top_pose_tensor_translation = \
+            range_image_top_pose_tensor[..., 3:]
+        range_image_top_pose_tensor = transform_utils.get_transform(
+            range_image_top_pose_tensor_rotation,
+            range_image_top_pose_tensor_translation)
+        for c in calibrations:
+            range_image = range_images[c.name][ri_index]
+            if len(c.beam_inclinations) == 0:
+                beam_inclinations = range_image_utils.compute_inclination(
+                    tf.constant(
+                        [c.beam_inclination_min, c.beam_inclination_max]),
+                    height=range_image.shape.dims[0])
+            else:
+                beam_inclinations = tf.constant(c.beam_inclinations)
+
+            beam_inclinations = tf.reverse(beam_inclinations, axis=[-1])
+            extrinsic = np.reshape(np.array(c.extrinsic.transform), [4, 4])
+
+            range_image_tensor = tf.reshape(
+                tf.convert_to_tensor(value=range_image.data),
+                range_image.shape.dims)
+            pixel_pose_local = None
+            frame_pose_local = None
+            if c.name == dataset_pb2.LaserName.TOP:
+                pixel_pose_local = range_image_top_pose_tensor
+                pixel_pose_local = tf.expand_dims(pixel_pose_local, axis=0)
+                frame_pose_local = tf.expand_dims(frame_pose, axis=0)
+            range_image_mask = range_image_tensor[..., 0] > 0
+
+            if self.filter_no_label_zone_points:
+                nlz_mask = range_image_tensor[..., 3] != 1.0  # 1.0: in NLZ
+                range_image_mask = range_image_mask & nlz_mask
+
+            range_image_cartesian = \
+                range_image_utils.extract_point_cloud_from_range_image(
+                    tf.expand_dims(range_image_tensor[..., 0], axis=0),
+                    tf.expand_dims(extrinsic, axis=0),
+                    tf.expand_dims(tf.convert_to_tensor(
+                        value=beam_inclinations), axis=0),
+                    pixel_pose=pixel_pose_local,
+                    frame_pose=frame_pose_local)
+
+            mask_index = tf.where(range_image_mask)
+
+            range_image_cartesian = tf.squeeze(range_image_cartesian, axis=0)
+            points_tensor = tf.gather_nd(range_image_cartesian, mask_index)
+
+            cp = camera_projections[c.name][ri_index]
+            cp_tensor = tf.reshape(
+                tf.convert_to_tensor(value=cp.data), cp.shape.dims)
+            cp_points_tensor = tf.gather_nd(cp_tensor, mask_index)
+            points.append(points_tensor.numpy())
+            cp_points.append(cp_points_tensor.numpy())
+
+            intensity_tensor = tf.gather_nd(range_image_tensor[..., 1],
+                                            mask_index)
+            intensity.append(intensity_tensor.numpy())
+
+            elongation_tensor = tf.gather_nd(range_image_tensor[..., 2],
+                                             mask_index)
+            elongation.append(elongation_tensor.numpy())
+            if c.name == 1:
+                mask_index = (ri_index * range_image_mask.shape[0] +
+                              mask_index[:, 0]
+                              ) * range_image_mask.shape[1] + mask_index[:, 1]
+                mask_index = mask_index.numpy().astype(elongation[-1].dtype)
+            else:
+                mask_index = np.full_like(elongation[-1], -1)
+
+            mask_indices.append(mask_index)
+
+        return points, cp_points, intensity, elongation, mask_indices
+
+    def cart_to_homo(self, mat):
+        """Convert transformation matrix in Cartesian coordinates to
+        homogeneous format.
+
+        Args:
+            mat (np.ndarray): Transformation matrix in Cartesian.
+                The input matrix shape is 3x3 or 3x4.
+
+        Returns:
+            np.ndarray: Transformation matrix in homogeneous format.
+                The matrix shape is 4x4.
+        """
+        ret = np.eye(4)
+        if mat.shape == (3, 3):
+            ret[:3, :3] = mat
+        elif mat.shape == (3, 4):
+            ret[:3, :] = mat
+        else:
+            raise ValueError(mat.shape)
+        return ret
--- a/docker-hub/FlashOCC/Flashocc/tools/deployment/mmdet3d2torchserve.py
+++ b/docker-hub/FlashOCC/Flashocc/tools/deployment/mmdet3d2torchserve.py
+# Copyright (c) OpenMMLab. All rights reserved.
+from argparse import ArgumentParser, Namespace
+from pathlib import Path
+from tempfile import TemporaryDirectory
+
+import mmcv
+
+try:
+    from model_archiver.model_packaging import package_model
+    from model_archiver.model_packaging_utils import ModelExportUtils
+except ImportError:
+    package_model = None
+
+
+def mmdet3d2torchserve(
+    config_file: str,
+    checkpoint_file: str,
+    output_folder: str,
+    model_name: str,
+    model_version: str = '1.0',
+    force: bool = False,
+):
+    """Converts MMDetection3D model (config + checkpoint) to TorchServe `.mar`.
+
+    Args:
+        config_file (str):
+            In MMDetection3D config format.
+            The contents vary for each task repository.
+        checkpoint_file (str):
+            In MMDetection3D checkpoint format.
+            The contents vary for each task repository.
+        output_folder (str):
+            Folder where `{model_name}.mar` will be created.
+            The file created will be in TorchServe archive format.
+        model_name (str):
+            If not None, used for naming the `{model_name}.mar` file
+            that will be created under `output_folder`.
+            If None, `{Path(checkpoint_file).stem}` will be used.
+        model_version (str, optional):
+            Model's version. Default: '1.0'.
+        force (bool, optional):
+            If True, if there is an existing `{model_name}.mar`
+            file under `output_folder` it will be overwritten.
+            Default: False.
+    """
+    mmcv.mkdir_or_exist(output_folder)
+
+    config = mmcv.Config.fromfile(config_file)
+
+    with TemporaryDirectory() as tmpdir:
+        config.dump(f'{tmpdir}/config.py')
+
+        args = Namespace(
+            **{
+                'model_file': f'{tmpdir}/config.py',
+                'serialized_file': checkpoint_file,
+                'handler': f'{Path(__file__).parent}/mmdet3d_handler.py',
+                'model_name': model_name or Path(checkpoint_file).stem,
+                'version': model_version,
+                'export_path': output_folder,
+                'force': force,
+                'requirements_file': None,
+                'extra_files': None,
+                'runtime': 'python',
+                'archive_format': 'default'
+            })
+        manifest = ModelExportUtils.generate_manifest_json(args)
+        package_model(args, manifest)
+
+
+def parse_args():
+    parser = ArgumentParser(
+        description='Convert MMDetection models to TorchServe `.mar` format.')
+    parser.add_argument('config', type=str, help='config file path')
+    parser.add_argument('checkpoint', type=str, help='checkpoint file path')
+    parser.add_argument(
+        '--output-folder',
+        type=str,
+        required=True,
+        help='Folder where `{model_name}.mar` will be created.')
+    parser.add_argument(
+        '--model-name',
+        type=str,
+        default=None,
+        help='If not None, used for naming the `{model_name}.mar`'
+        'file that will be created under `output_folder`.'
+        'If None, `{Path(checkpoint_file).stem}` will be used.')
+    parser.add_argument(
+        '--model-version',
+        type=str,
+        default='1.0',
+        help='Number used for versioning.')
+    parser.add_argument(
+        '-f',
+        '--force',
+        action='store_true',
+        help='overwrite the existing `{model_name}.mar`')
+    args = parser.parse_args()
+
+    return args
+
+
+if __name__ == '__main__':
+    args = parse_args()
+
+    if package_model is None:
+        raise ImportError('`torch-model-archiver` is required.'
+                          'Try: pip install torch-model-archiver')
+
+    mmdet3d2torchserve(args.config, args.checkpoint, args.output_folder,
+                       args.model_name, args.model_version, args.force)
--- a/docker-hub/FlashOCC/Flashocc/tools/deployment/mmdet3d_handler.py
+++ b/docker-hub/FlashOCC/Flashocc/tools/deployment/mmdet3d_handler.py
+# Copyright (c) OpenMMLab. All rights reserved.
+import base64
+import os
+
+import numpy as np
+import torch
+from ts.torch_handler.base_handler import BaseHandler
+
+from mmdet3d.apis import inference_detector, init_model
+from mmdet3d.core.points import get_points_type
+
+
+class MMdet3dHandler(BaseHandler):
+    """MMDetection3D Handler used in TorchServe.
+
+    Handler to load models in MMDetection3D, and it will process data to get
+    predicted results. For now, it only supports SECOND.
+    """
+    threshold = 0.5
+    load_dim = 4
+    use_dim = [0, 1, 2, 3]
+    coord_type = 'LIDAR'
+    attribute_dims = None
+
+    def initialize(self, context):
+        """Initialize function loads the model in MMDetection3D.
+
+        Args:
+            context (context): It is a JSON Object containing information
+                pertaining to the model artifacts parameters.
+        """
+        properties = context.system_properties
+        self.map_location = 'cuda' if torch.cuda.is_available() else 'cpu'
+        self.device = torch.device(self.map_location + ':' +
+                                   str(properties.get('gpu_id')) if torch.cuda.
+                                   is_available() else self.map_location)
+        self.manifest = context.manifest
+
+        model_dir = properties.get('model_dir')
+        serialized_file = self.manifest['model']['serializedFile']
+        checkpoint = os.path.join(model_dir, serialized_file)
+        self.config_file = os.path.join(model_dir, 'config.py')
+        self.model = init_model(self.config_file, checkpoint, self.device)
+        self.initialized = True
+
+    def preprocess(self, data):
+        """Preprocess function converts data into LiDARPoints class.
+
+        Args:
+            data (List): Input data from the request.
+
+        Returns:
+            `LiDARPoints` : The preprocess function returns the input
+                point cloud data as LiDARPoints class.
+        """
+        for row in data:
+            # Compat layer: normally the envelope should just return the data
+            # directly, but older versions of Torchserve didn't have envelope.
+            pts = row.get('data') or row.get('body')
+            if isinstance(pts, str):
+                pts = base64.b64decode(pts)
+
+            points = np.frombuffer(pts, dtype=np.float32)
+            points = points.reshape(-1, self.load_dim)
+            points = points[:, self.use_dim]
+            points_class = get_points_type(self.coord_type)
+            points = points_class(
+                points,
+                points_dim=points.shape[-1],
+                attribute_dims=self.attribute_dims)
+
+        return points
+
+    def inference(self, data):
+        """Inference Function.
+
+        This function is used to make a prediction call on the
+        given input request.
+
+        Args:
+            data (`LiDARPoints`): LiDARPoints class passed to make
+                the inference request.
+
+        Returns:
+            List(dict) : The predicted result is returned in this function.
+        """
+        results, _ = inference_detector(self.model, data)
+        return results
+
+    def postprocess(self, data):
+        """Postprocess function.
+
+        This function makes use of the output from the inference and
+        converts it into a torchserve supported response output.
+
+        Args:
+            data (List[dict]): The data received from the prediction
+                output of the model.
+
+        Returns:
+            List: The post process function returns a list of the predicted
+                output.
+        """
+        output = []
+        for pts_index, result in enumerate(data):
+            output.append([])
+            if 'pts_bbox' in result.keys():
+                pred_bboxes = result['pts_bbox']['boxes_3d'].tensor.numpy()
+                pred_scores = result['pts_bbox']['scores_3d'].numpy()
+            else:
+                pred_bboxes = result['boxes_3d'].tensor.numpy()
+                pred_scores = result['scores_3d'].numpy()
+
+            index = pred_scores > self.threshold
+            bbox_coords = pred_bboxes[index].tolist()
+            score = pred_scores[index].tolist()
+
+            output[pts_index].append({'3dbbox': bbox_coords, 'score': score})
+
+        return output
--- a/docker-hub/FlashOCC/Flashocc/tools/deployment/test_torchserver.py
+++ b/docker-hub/FlashOCC/Flashocc/tools/deployment/test_torchserver.py
+from argparse import ArgumentParser
+
+import numpy as np
+import requests
+
+from mmdet3d.apis import inference_detector, init_model
+
+
+def parse_args():
+    parser = ArgumentParser()
+    parser.add_argument('pcd', help='Point cloud file')
+    parser.add_argument('config', help='Config file')
+    parser.add_argument('checkpoint', help='Checkpoint file')
+    parser.add_argument('model_name', help='The model name in the server')
+    parser.add_argument(
+        '--inference-addr',
+        default='127.0.0.1:8080',
+        help='Address and port of the inference server')
+    parser.add_argument(
+        '--device', default='cuda:0', help='Device used for inference')
+    parser.add_argument(
+        '--score-thr', type=float, default=0.5, help='3d bbox score threshold')
+    args = parser.parse_args()
+    return args
+
+
+def parse_result(input):
+    bbox = input[0]['3dbbox']
+    result = np.array(bbox)
+    return result
+
+
+def main(args):
+    # build the model from a config file and a checkpoint file
+    model = init_model(args.config, args.checkpoint, device=args.device)
+    # test a single point cloud file
+    model_result, _ = inference_detector(model, args.pcd)
+    # filter the 3d bboxes whose scores > 0.5
+    if 'pts_bbox' in model_result[0].keys():
+        pred_bboxes = model_result[0]['pts_bbox']['boxes_3d'].tensor.numpy()
+        pred_scores = model_result[0]['pts_bbox']['scores_3d'].numpy()
+    else:
+        pred_bboxes = model_result[0]['boxes_3d'].tensor.numpy()
+        pred_scores = model_result[0]['scores_3d'].numpy()
+    model_result = pred_bboxes[pred_scores > 0.5]
+
+    url = 'http://' + args.inference_addr + '/predictions/' + args.model_name
+    with open(args.pcd, 'rb') as points:
+        response = requests.post(url, points)
+    server_result = parse_result(response.json())
+    assert np.allclose(model_result, server_result)
+
+
+if __name__ == '__main__':
+    args = parse_args()
+    main(args)
--- a/docker-hub/FlashOCC/Flashocc/tools/dist_test.sh
+++ b/docker-hub/FlashOCC/Flashocc/tools/dist_test.sh
+#!/usr/bin/env bash
+
+CONFIG=$1
+CHECKPOINT=$2
+GPUS=$3
+NNODES=${NNODES:-1}
+NODE_RANK=${NODE_RANK:-0}
+PORT=${PORT:-29501}
+MASTER_ADDR=${MASTER_ADDR:-"127.0.0.1"}
+
+PYTHONPATH="$(dirname $0)/..":$PYTHONPATH \
+python -m torch.distributed.launch \
+    --nnodes=$NNODES \
+    --node_rank=$NODE_RANK \
+    --master_addr=$MASTER_ADDR \
+    --nproc_per_node=$GPUS \
+    --master_port=$PORT \
+    $(dirname "$0")/test.py \
+    $CONFIG \
+    $CHECKPOINT \
+    --launcher pytorch \
+    ${@:4}
--- a/docker-hub/FlashOCC/Flashocc/tools/dist_train.sh
+++ b/docker-hub/FlashOCC/Flashocc/tools/dist_train.sh
+#!/usr/bin/env bash
+
+CONFIG=$1
+GPUS=$2
+NNODES=${NNODES:-1}
+NODE_RANK=${NODE_RANK:-0}
+PORT=${PORT:-29500}
+MASTER_ADDR=${MASTER_ADDR:-"127.0.0.1"}
+
+PYTHONPATH="$(dirname $0)/..":$PYTHONPATH \
+python -m torch.distributed.launch \
+    --nnodes=$NNODES \
+    --node_rank=$NODE_RANK \
+    --master_addr=$MASTER_ADDR \
+    --nproc_per_node=$GPUS \
+    --master_port=$PORT \
+    $(dirname "$0")/train.py \
+    $CONFIG \
+    --seed 0 \
+    --launcher pytorch ${@:3}
--- a/docker-hub/FlashOCC/Flashocc/tools/dist_train_numa.sh
+++ b/docker-hub/FlashOCC/Flashocc/tools/dist_train_numa.sh
+#!/usr/bin/env bash
+
+MASTER_ADDR=${1:-localhost}
+MASTER_PORT=6000
+NNODES=${2:-1}
+NODE_RANK=${3:-0}
+GPUS_PER_NODE=8
+DISTRIBUTED_ARGS="--nproc_per_node $GPUS_PER_NODE --nnodes $NNODES --node_rank $NODE_RANK --master_addr $MASTER_ADDR --master_port $MASTER_PORT"
+
+CONFIG=$4
+
+# add numa affinity config
+PYTHONPATH="$(dirname $0)/..":$PYTHONPATH \
+torchrun $DISTRIBUTED_ARGS --no-python \
+    bash -c '
+    numa_map=( $(hy-smi --showtopo | grep "Numa Node" | awk "{print \$6}") );
+    LOCAL_RANK=${LOCAL_RANK:-0}
+    NUMA_ID=${numa_map[$LOCAL_RANK]}
+    numactl --cpunodebind=${NUMA_ID} --membind=${NUMA_ID} python $(dirname "$0")/tools/train.py "$@"
+    ' _ $CONFIG --launcher pytorch ${@:5}
--- a/docker-hub/FlashOCC/Flashocc/tools/export_onnx.py
+++ b/docker-hub/FlashOCC/Flashocc/tools/export_onnx.py
+import argparse
+import sys
+import os
+sys.path.insert(0, os.getcwd())
+
+import torch.onnx
+from mmcv import Config
+from mmdeploy.backend.tensorrt.utils import save, search_cuda_version
+
+try:
+    # If mmdet version > 2.23.0, compat_cfg would be imported and
+    # used from mmdet instead of mmdet3d.
+    from mmdet.utils import compat_cfg
+except ImportError:
+    from mmdet3d.utils import compat_cfg
+
+import os
+from typing import Dict, Optional, Sequence, Union
+
+import h5py
+import mmcv
+import numpy as np
+import onnx
+import pycuda.driver as cuda
+import tensorrt as trt
+import torch
+import tqdm
+from mmcv.runner import load_checkpoint
+from mmdeploy.apis.core import no_mp
+from mmdeploy.backend.tensorrt.calib_utils import HDF5Calibrator
+from mmdeploy.backend.tensorrt.init_plugins import load_tensorrt_plugin
+from mmdeploy.utils import load_config
+from packaging import version
+from torch.utils.data import DataLoader
+
+from mmdet3d.datasets import build_dataloader, build_dataset
+from mmdet3d.models import build_model
+from mmdet.datasets import replace_ImageToTensor
+from tools.misc.fuse_conv_bn import fuse_module
+
+
+class HDF5CalibratorBEVDet(HDF5Calibrator):
+
+    def get_batch(self, names: Sequence[str], **kwargs) -> list:
+        """Get batch data."""
+        if self.count < self.dataset_length:
+            if self.count % 100 == 0:
+                print('%d/%d' % (self.count, self.dataset_length))
+            ret = []
+            for name in names:
+                input_group = self.calib_data[name]
+                if name == 'img':
+                    data_np = input_group[str(self.count)][...].astype(
+                        np.float32)
+                else:
+                    data_np = input_group[str(self.count)][...].astype(
+                        np.int32)
+
+                # tile the tensor so we can keep the same distribute
+                opt_shape = self.input_shapes[name]['opt_shape']
+                data_shape = data_np.shape
+
+                reps = [
+                    int(np.ceil(opt_s / data_s))
+                    for opt_s, data_s in zip(opt_shape, data_shape)
+                ]
+
+                data_np = np.tile(data_np, reps)
+
+                slice_list = tuple(slice(0, end) for end in opt_shape)
+                data_np = data_np[slice_list]
+
+                data_np_cuda_ptr = cuda.mem_alloc(data_np.nbytes)
+                cuda.memcpy_htod(data_np_cuda_ptr,
+                                 np.ascontiguousarray(data_np))
+                self.buffers[name] = data_np_cuda_ptr
+
+                ret.append(self.buffers[name])
+            self.count += 1
+            return ret
+        else:
+            return None
+
+
+def parse_args():
+    parser = argparse.ArgumentParser(description='Deploy BEVDet with Tensorrt')
+    parser.add_argument('config', help='deploy config file path')
+    parser.add_argument('checkpoint', help='checkpoint file')
+    parser.add_argument('work_dir', help='work dir to save file')
+    parser.add_argument(
+        '--prefix', default='bevdet', help='prefix of the save file name')
+    parser.add_argument(
+        '--fp16', action='store_true', help='Whether to use tensorrt fp16')
+    parser.add_argument(
+        '--int8', action='store_true', help='Whether to use tensorrt int8')
+    parser.add_argument(
+        '--fuse-conv-bn',
+        action='store_true',
+        help='Whether to fuse conv and bn, this will slightly increase'
+        'the inference speed')
+    parser.add_argument('--calib_num', type=int, help='num to calib')
+    args = parser.parse_args()
+    return args
+
+
+def get_plugin_names():
+    return [pc.name for pc in trt.get_plugin_registry().plugin_creator_list]
+
+
+def create_calib_input_data_impl(calib_file: str,
+                                 dataloader: DataLoader,
+                                 model_partition: bool = False,
+                                 metas: list = [],
+                                 calib_num = None) -> None:
+    with h5py.File(calib_file, mode='w') as file:
+        calib_data_group = file.create_group('calib_data')
+        assert not model_partition
+        # create end2end group
+        input_data_group = calib_data_group.create_group('end2end')
+        input_group_img = input_data_group.create_group('img')
+        input_keys = [
+            'ranks_bev', 'ranks_depth', 'ranks_feat', 'interval_starts',
+            'interval_lengths'
+        ]
+        input_groups = []
+        for input_key in input_keys:
+            input_groups.append(input_data_group.create_group(input_key))
+        metas = [
+            metas[i].int().detach().cpu().numpy() for i in range(len(metas))
+        ]
+        for data_id, input_data in enumerate(tqdm.tqdm(dataloader)):
+            # save end2end data
+            if (calib_num is not None) and (data_id > calib_num):
+                break
+            input_tensor = input_data['img_inputs'][0][0]
+            input_ndarray = input_tensor.squeeze(0).detach().cpu().numpy()
+            # print(input_ndarray.shape, input_ndarray.dtype)
+            input_group_img.create_dataset(
+                str(data_id),
+                shape=input_ndarray.shape,
+                compression='gzip',
+                compression_opts=4,
+                data=input_ndarray)
+            for kid, input_key in enumerate(input_keys):
+                input_groups[kid].create_dataset(
+                    str(data_id),
+                    shape=metas[kid].shape,
+                    compression='gzip',
+                    compression_opts=4,
+                    data=metas[kid])
+            file.flush()
+
+
+def create_calib_input_data(calib_file: str,
+                            deploy_cfg: Union[str, mmcv.Config],
+                            model_cfg: Union[str, mmcv.Config],
+                            model_checkpoint: Optional[str] = None,
+                            dataset_cfg: Optional[Union[str,
+                                                        mmcv.Config]] = None,
+                            dataset_type: str = 'val',
+                            device: str = 'cpu',
+                            metas: list = [None],
+                            calib_num = None) -> None:
+    """Create dataset for post-training quantization.
+
+    Args:
+        calib_file (str): The output calibration data file.
+        deploy_cfg (str | mmcv.Config): Deployment config file or
+            Config object.
+        model_cfg (str | mmcv.Config): Model config file or Config object.
+        model_checkpoint (str): A checkpoint path of PyTorch model,
+            defaults to `None`.
+        dataset_cfg (Optional[Union[str, mmcv.Config]], optional): Model
+            config to provide calibration dataset. If none, use `model_cfg`
+            as the dataset config. Defaults to None.
+        dataset_type (str, optional): The dataset type. Defaults to 'val'.
+        device (str, optional): Device to create dataset. Defaults to 'cpu'.
+    """
+    with no_mp():
+        if dataset_cfg is None:
+            dataset_cfg = model_cfg
+
+        # load cfg if necessary
+        deploy_cfg, model_cfg = load_config(deploy_cfg, model_cfg)
+
+        if dataset_cfg is None:
+            dataset_cfg = model_cfg
+
+        # load dataset_cfg if necessary
+        dataset_cfg = load_config(dataset_cfg)[0]
+
+        from mmdeploy.apis.utils import build_task_processor
+        task_processor = build_task_processor(model_cfg, deploy_cfg, device)
+
+        dataset = task_processor.build_dataset(dataset_cfg, dataset_type)
+
+        dataloader = task_processor.build_dataloader(
+            dataset, 1, 1, dist=False, shuffle=False)
+
+        create_calib_input_data_impl(
+            calib_file, dataloader, model_partition=False, metas=metas, calib_num=calib_num)
+
+
+def from_onnx(onnx_model: Union[str, onnx.ModelProto],
+              output_file_prefix: str,
+              input_shapes: Dict[str, Sequence[int]],
+              max_workspace_size: int = 0,
+              fp16_mode: bool = False,
+              int8_mode: bool = False,
+              int8_param: Optional[dict] = None,
+              device_id: int = 0,
+              log_level: trt.Logger.Severity = trt.Logger.ERROR,
+              **kwargs) -> trt.ICudaEngine:
+    """Create a tensorrt engine from ONNX.
+
+    Modified from mmdeploy.backend.tensorrt.utils.from_onnx
+    """
+
+    import os
+    old_cuda_device = os.environ.get('CUDA_DEVICE', None)
+    os.environ['CUDA_DEVICE'] = str(device_id)
+    import pycuda.autoinit  # noqa:F401
+    if old_cuda_device is not None:
+        os.environ['CUDA_DEVICE'] = old_cuda_device
+    else:
+        os.environ.pop('CUDA_DEVICE')
+
+    load_tensorrt_plugin()
+    # create builder and network
+    logger = trt.Logger(log_level)
+    builder = trt.Builder(logger)
+    EXPLICIT_BATCH = 1 << (int)(
+        trt.NetworkDefinitionCreationFlag.EXPLICIT_BATCH)
+    network = builder.create_network(EXPLICIT_BATCH)
+
+    # parse onnx
+    parser = trt.OnnxParser(network, logger)
+
+    if isinstance(onnx_model, str):
+        onnx_model = onnx.load(onnx_model)
+
+    if not parser.parse(onnx_model.SerializeToString()):
+        error_msgs = ''
+        for error in range(parser.num_errors):
+            error_msgs += f'{parser.get_error(error)}\n'
+        raise RuntimeError(f'Failed to parse onnx, {error_msgs}')
+
+    # config builder
+    if version.parse(trt.__version__) < version.parse('8'):
+        builder.max_workspace_size = max_workspace_size
+
+    config = builder.create_builder_config()
+    config.max_workspace_size = max_workspace_size
+
+    cuda_version = search_cuda_version()
+    if cuda_version is not None:
+        version_major = int(cuda_version.split('.')[0])
+        if version_major < 11:
+            # cu11 support cublasLt, so cudnn heuristic tactic should disable CUBLAS_LT # noqa E501
+            tactic_source = config.get_tactic_sources() - (
+                1 << int(trt.TacticSource.CUBLAS_LT))
+            config.set_tactic_sources(tactic_source)
+
+    profile = builder.create_optimization_profile()
+
+    for input_name, param in input_shapes.items():
+        min_shape = param['min_shape']
+        opt_shape = param['opt_shape']
+        max_shape = param['max_shape']
+        profile.set_shape(input_name, min_shape, opt_shape, max_shape)
+    config.add_optimization_profile(profile)
+
+    if fp16_mode:
+        if version.parse(trt.__version__) < version.parse('8'):
+            builder.fp16_mode = fp16_mode
+        config.set_flag(trt.BuilderFlag.FP16)
+
+    if int8_mode:
+        config.set_flag(trt.BuilderFlag.INT8)
+        assert int8_param is not None
+        config.int8_calibrator = HDF5CalibratorBEVDet(
+            int8_param['calib_file'],
+            input_shapes,
+            model_type=int8_param['model_type'],
+            device_id=device_id,
+            algorithm=int8_param.get(
+                'algorithm', trt.CalibrationAlgoType.ENTROPY_CALIBRATION_2))
+        if version.parse(trt.__version__) < version.parse('8'):
+            builder.int8_mode = int8_mode
+            builder.int8_calibrator = config.int8_calibrator
+
+    # create engine
+    engine = builder.build_engine(network, config)
+
+    assert engine is not None, 'Failed to create TensorRT engine'
+
+    save(engine, output_file_prefix + '.engine')
+    print('Save engine at ', output_file_prefix + '.engine')
+    return engine
+
+
+def main():
+    args = parse_args()
+
+    max_workspace_size = 200*200*256*(2**8)
+    
+    if not os.path.exists(args.work_dir):
+        os.makedirs(args.work_dir)
+
+    load_tensorrt_plugin()
+    # assert 'bev_pool_v2' in get_plugin_names(), \
+    #     'bev_pool_v2 is not in the plugin list of tensorrt, ' \
+    #     'please install mmdeploy from ' \
+    #     'https://github.com/HuangJunJie2017/mmdeploy.git'
+
+    # if args.int8:
+    #     assert args.fp16
+    model_prefix = args.prefix
+    if args.int8:
+        model_prefix = model_prefix + '_int8'
+    elif args.fp16:
+        model_prefix = model_prefix + '_fp16'
+    cfg = Config.fromfile(args.config)
+    cfg.model.pretrained = None
+    cfg.model.type = cfg.model.type + 'TRT'
+
+    cfg = compat_cfg(cfg)
+    cfg.gpu_ids = [0]
+
+    # import modules from plguin/xx, registry will be updated
+    if hasattr(cfg, 'plugin'):
+        if cfg.plugin:
+            import importlib
+            if hasattr(cfg, 'plugin_dir'):
+                plugin_dir = cfg.plugin_dir
+                _module_dir = os.path.dirname(plugin_dir)
+                _module_dir = _module_dir.split('/')
+                _module_path = _module_dir[0]
+
+                for m in _module_dir[1:]:
+                    _module_path = _module_path + '.' + m
+                print(_module_path)
+                plg_lib = importlib.import_module(_module_path)
+            else:
+                # import dir is the dirpath for the config file
+                _module_dir = os.path.dirname(args.config)
+                _module_dir = _module_dir.split('/')
+                _module_path = _module_dir[0]
+                for m in _module_dir[1:]:
+                    _module_path = _module_path + '.' + m
+                plg_lib = importlib.import_module(_module_path)
+
+    # build the dataloader
+    test_dataloader_default_args = dict(
+        samples_per_gpu=1, workers_per_gpu=0, dist=False, shuffle=False)
+
+    if isinstance(cfg.data.test, dict):
+        cfg.data.test.test_mode = True
+        if cfg.data.test_dataloader.get('samples_per_gpu', 1) > 1:
+            # Replace 'ImageToTensor' to 'DefaultFormatBundle'
+            cfg.data.test.pipeline = replace_ImageToTensor(
+                cfg.data.test.pipeline)
+    elif isinstance(cfg.data.test, list):
+        for ds_cfg in cfg.data.test:
+            ds_cfg.test_mode = True
+        if cfg.data.test_dataloader.get('samples_per_gpu', 1) > 1:
+            for ds_cfg in cfg.data.test:
+                ds_cfg.pipeline = replace_ImageToTensor(ds_cfg.pipeline)
+
+    test_loader_cfg = {
+        **test_dataloader_default_args,
+        **cfg.data.get('test_dataloader', {})
+    }
+    dataset = build_dataset(cfg.data.test)
+    data_loader = build_dataloader(dataset, **test_loader_cfg)
+
+    # build the model and load checkpoint
+    cfg.model.train_cfg = None
+    model = build_model(cfg.model, test_cfg=cfg.get('test_cfg'))
+    # assert model.img_view_transformer.grid_size[0] == 128
+    # assert model.img_view_transformer.grid_size[1] == 128
+    # assert model.img_view_transformer.grid_size[2] == 1
+    if os.path.exists(args.checkpoint):
+        load_checkpoint(model, args.checkpoint, map_location='cpu')
+    else:
+        print(args.checkpoint, " does not exists!")
+    if args.fuse_conv_bn:
+        model_prefix = model_prefix + '_fuse'
+        model = fuse_module(model)
+    model.cuda()
+    model.eval()
+
+    for i, data in enumerate(data_loader):
+        inputs = [t.cuda() for t in data['img_inputs'][0]]
+        img = inputs[0].squeeze(0)
+        if img.shape[0] > 6:
+            img = img[:6]
+        if model.__class__.__name__ in ['FBOCCTRT', 'FBOCC2DTRT']:
+            metas = model.get_bev_pool_input(inputs, img_metas=data['img_metas'])
+        else:
+            if model.__class__.__name__ in ['BEVDetOCCTRT']:
+                metas = model.get_bev_pool_input(inputs)
+            elif model.__class__.__name__ in ['BEVDepthOCCTRT', 'BEVDepthPanoTRT']:
+                metas, mlp_input = model.get_bev_pool_input(inputs)
+
+        if model.__class__.__name__ in ['FBOCCTRT', 'FBOCC2DTRT', 'BEVDetOCCTRT']:
+            onnx_input = (img.float().contiguous(), metas[1].int().contiguous(),
+                metas[2].int().contiguous(), metas[0].int().contiguous(),
+                metas[3].int().contiguous(), metas[4].int().contiguous())
+            dynamic_axes={
+                    "ranks_depth" : {0: 'M'},
+                    "ranks_feat" : {0: 'M'},
+                    "ranks_bev" : {0: 'M'},
+                    "interval_starts" : {0: 'N'},
+                    "interval_lengths" : {0: 'N'},
+                }
+            input_names=[
+                    'img', 'ranks_depth', 'ranks_feat', 'ranks_bev',
+                    'interval_starts', 'interval_lengths'
+                ]
+        elif model.__class__.__name__ in ['BEVDepthOCCTRT', 'BEVDepthPanoTRT']:
+            onnx_input = (img.float().contiguous(), metas[1].int().contiguous(),
+                metas[2].int().contiguous(), metas[0].int().contiguous(),
+                metas[3].int().contiguous(), metas[4].int().contiguous(), mlp_input)
+            dynamic_axes={
+                    "ranks_depth" : {0: 'M'},
+                    "ranks_feat" : {0: 'M'},
+                    "ranks_bev" : {0: 'M'},
+                    "interval_starts" : {0: 'N'},
+                    "interval_lengths" : {0: 'N'},
+                    # "mlp_input" : {0: 'K'},
+                }
+            input_names=[
+                    'img', 'ranks_depth', 'ranks_feat', 'ranks_bev',
+                    'interval_starts', 'interval_lengths', 'mlp_input',
+                ]
+
+        with torch.no_grad():
+            if (model.wdet3d == True) and (model.wocc == False) :
+                output_names=[f'output_{j}' for j in range(6 * len(model.pts_bbox_head.task_heads))]
+            elif (model.wdet3d == True) and (model.wocc == True) :
+                output_names=[f'output_{j}' for j in range(1 + 6 * len(model.pts_bbox_head.task_heads))]
+            elif (model.wdet3d == False) and (model.wocc == True) :
+                output_names=[f'output_{j}' for j in range(1)]
+            else:
+                raise(" At least one of wdet3d and wocc is set as True!! ")
+
+            # part1
+            from functools import partial
+            model.forward = partial(model.forward_part1,
+                                    mlp_input = mlp_input
+                                    )
+            onnx_path = args.work_dir + 'part1.onnx'
+            torch.onnx.export(
+                model,
+                (img.float().contiguous(),),
+                onnx_path,
+                export_params=True,
+                opset_version=11,
+                input_names=['img'],
+                output_names=['tran_feat','depth'])
+            # check onnx model
+            onnx_model = onnx.load(onnx_path)
+            try:
+                onnx.checker.check_model(onnx_model)
+            except Exception:
+                print('ONNX Model Incorrect')
+            else:
+                print('ONNX Model Correct')
+            model_file = 'model.onnx'
+            onnx.save(onnx.shape_inference.infer_shapes(onnx_model), onnx_path)
+            print('====== onnx is saved at : ', onnx_path)
+            tran_feat, depth = model.forward(img)
+            # # from onnxsim import simplify
+            # # model_simp, check = simplify(onnx_model)
+            # # assert check, "Simplified ONNX model could not be validated"
+            # # onnx.save(model_simp, onnx_path)
+            # # print('====== onnx is saved at : ', onnx_path)
+
+            # part2
+            from functools import partial
+            model.forward = partial(model.forward_part2,
+                                    ranks_depth = onnx_input[1],
+                                    ranks_feat = onnx_input[2],
+                                    ranks_bev = onnx_input[3],
+                                    interval_starts = onnx_input[4],
+                                    interval_lengths = onnx_input[5]
+                                    )
+            onnx_path = args.work_dir + 'part2.onnx'
+            torch.onnx.export(
+                model,
+                (tran_feat.float().contiguous(), depth.float().contiguous()),
+                onnx_path,
+                export_params=True,
+                opset_version=11,
+                input_names=['tran_feat','depth'],
+                output_names=['bev_feat'])
+            # check onnx model
+            onnx_model = onnx.load(onnx_path)
+            try:
+                onnx.checker.check_model(onnx_model)
+            except Exception:
+                print('ONNX Model Incorrect')
+            else:
+                print('ONNX Model Correct')
+            model_file = 'model.onnx'
+            onnx.save(onnx.shape_inference.infer_shapes(onnx_model), onnx_path)
+            print('====== onnx is saved at : ', onnx_path)
+            bev_pool_feat = model.forward(tran_feat, depth)
+            # # from onnxsim import simplify
+            # # model_simp, check = simplify(onnx_model)
+            # # assert check, "Simplified ONNX model could not be validated"
+            # # onnx.save(model_simp, onnx_path)
+            # # print('====== onnx is saved at : ', onnx_path)
+
+            # part3
+            model.forward = model.forward_part3
+            onnx_path = args.work_dir + 'part3.onnx'
+            torch.onnx.export(
+                model,
+                (bev_pool_feat,),
+                onnx_path,
+                export_params=True,
+                opset_version=11,
+                input_names=['bev_feat'],
+                output_names=['occ_pred', 'inst_center_reg', 'inst_center_height', 'inst_center_heatmap'])
+            # check onnx model
+            onnx_model = onnx.load(onnx_path)
+            try:
+                onnx.checker.check_model(onnx_model)
+            except Exception:
+                print('ONNX Model Incorrect')
+            else:
+                print('ONNX Model Correct')
+            model_file = 'model.onnx'
+            onnx.save(onnx.shape_inference.infer_shapes(onnx_model), onnx_path)
+            print('====== onnx is saved at : ', onnx_path)
+            # # from onnxsim import simplify
+            # # model_simp, check = simplify(onnx_model)
+            # # assert check, "Simplified ONNX model could not be validated"
+            # # onnx.save(model_simp, onnx_path)
+            # # print('====== onnx is saved at : ', onnx_path)
+
+            from functools import partial
+            model.forward = partial(model.forward_ori,
+                                    ranks_depth = onnx_input[1],
+                                    ranks_feat = onnx_input[2],
+                                    ranks_bev = onnx_input[3],
+                                    interval_starts = onnx_input[4],
+                                    interval_lengths = onnx_input[5],
+                                    mlp_input = mlp_input
+                                    )
+            torch.onnx.export(
+                model,
+                (onnx_input[0],),
+                args.work_dir + model_prefix + '.onnx',
+                opset_version=11,
+                dynamic_axes=dynamic_axes,
+                input_names=['img'],
+                output_names=['occ_pred', 'inst_center_reg', 'inst_center_height', 'inst_center_heatmap']
+                )
+            print('output_names:', output_names)
+            print('====== onnx is saved at : ', args.work_dir + model_prefix + '.onnx')
+            # check onnx model
+            onnx_model = onnx.load(args.work_dir + model_prefix + '.onnx')
+            try:
+                onnx.checker.check_model(onnx_model)
+            except Exception:
+                print('ONNX Model Incorrect')
+            else:
+                print('ONNX Model Correct')
+            outs = model.forward(onnx_input[0])
+
+            model.forward = model.forward_with_argmax
+            out = model(*onnx_input)
+            output_names = [f'cls_occ_label']
+            torch.onnx.export(
+                model,
+                onnx_input,
+                args.work_dir + model_prefix + '_with_argmax.onnx',
+                opset_version=11,
+                dynamic_axes=dynamic_axes,
+                input_names=input_names,
+                output_names=output_names)
+            print('output_names:', output_names)
+            print('====== onnx is saved at : ', args.work_dir + model_prefix + '_with_argmax.onnx')
+            # check onnx model
+            onnx_model = onnx.load(args.work_dir + model_prefix + '_with_argmax.onnx')
+            try:
+                onnx.checker.check_model(onnx_model)
+            except Exception:
+                print('ONNX Model Incorrect')
+            else:
+                print('ONNX Model Correct')
+
+        break
+
+    return
+
+
+if __name__ == '__main__':
+
+    main()
--- a/docker-hub/FlashOCC/Flashocc/tools/misc/browse_dataset.py
+++ b/docker-hub/FlashOCC/Flashocc/tools/misc/browse_dataset.py
+# Copyright (c) OpenMMLab. All rights reserved.
+import argparse
+import warnings
+from os import path as osp
+from pathlib import Path
+
+import mmcv
+import numpy as np
+from mmcv import Config, DictAction, mkdir_or_exist
+
+from mmdet3d.core.bbox import (Box3DMode, CameraInstance3DBoxes, Coord3DMode,
+                               DepthInstance3DBoxes, LiDARInstance3DBoxes)
+from mmdet3d.core.visualizer import (show_multi_modality_result, show_result,
+                                     show_seg_result)
+from mmdet3d.datasets import build_dataset
+
+
+def parse_args():
+    parser = argparse.ArgumentParser(description='Browse a dataset')
+    parser.add_argument('config', help='train config file path')
+    parser.add_argument(
+        '--skip-type',
+        type=str,
+        nargs='+',
+        default=['Normalize'],
+        help='skip some useless pipeline')
+    parser.add_argument(
+        '--output-dir',
+        default=None,
+        type=str,
+        help='If there is no display interface, you can save it')
+    parser.add_argument(
+        '--task',
+        type=str,
+        choices=['det', 'seg', 'multi_modality-det', 'mono-det'],
+        help='Determine the visualization method depending on the task.')
+    parser.add_argument(
+        '--aug',
+        action='store_true',
+        help='Whether to visualize augmented datasets or original dataset.')
+    parser.add_argument(
+        '--online',
+        action='store_true',
+        help='Whether to perform online visualization. Note that you often '
+        'need a monitor to do so.')
+    parser.add_argument(
+        '--cfg-options',
+        nargs='+',
+        action=DictAction,
+        help='override some settings in the used config, the key-value pair '
+        'in xxx=yyy format will be merged into config file. If the value to '
+        'be overwritten is a list, it should be like key="[a,b]" or key=a,b '
+        'It also allows nested list/tuple values, e.g. key="[(a,b),(c,d)]" '
+        'Note that the quotation marks are necessary and that no white space '
+        'is allowed.')
+    args = parser.parse_args()
+    return args
+
+
+def build_data_cfg(config_path, skip_type, aug, cfg_options):
+    """Build data config for loading visualization data."""
+
+    cfg = Config.fromfile(config_path)
+    if cfg_options is not None:
+        cfg.merge_from_dict(cfg_options)
+    # extract inner dataset of `RepeatDataset` as `cfg.data.train`
+    # so we don't need to worry about it later
+    if cfg.data.train['type'] == 'RepeatDataset':
+        cfg.data.train = cfg.data.train.dataset
+    # use only first dataset for `ConcatDataset`
+    if cfg.data.train['type'] == 'ConcatDataset':
+        cfg.data.train = cfg.data.train.datasets[0]
+    train_data_cfg = cfg.data.train
+
+    if aug:
+        show_pipeline = cfg.train_pipeline
+    else:
+        show_pipeline = cfg.eval_pipeline
+        for i in range(len(cfg.train_pipeline)):
+            if cfg.train_pipeline[i]['type'] == 'LoadAnnotations3D':
+                show_pipeline.insert(i, cfg.train_pipeline[i])
+            # Collect points as well as labels
+            if cfg.train_pipeline[i]['type'] == 'Collect3D':
+                if show_pipeline[-1]['type'] == 'Collect3D':
+                    show_pipeline[-1] = cfg.train_pipeline[i]
+                else:
+                    show_pipeline.append(cfg.train_pipeline[i])
+
+    train_data_cfg['pipeline'] = [
+        x for x in show_pipeline if x['type'] not in skip_type
+    ]
+
+    return cfg
+
+
+def to_depth_mode(points, bboxes):
+    """Convert points and bboxes to Depth Coord and Depth Box mode."""
+    if points is not None:
+        points = Coord3DMode.convert_point(points.copy(), Coord3DMode.LIDAR,
+                                           Coord3DMode.DEPTH)
+    if bboxes is not None:
+        bboxes = Box3DMode.convert(bboxes.clone(), Box3DMode.LIDAR,
+                                   Box3DMode.DEPTH)
+    return points, bboxes
+
+
+def show_det_data(input, out_dir, show=False):
+    """Visualize 3D point cloud and 3D bboxes."""
+    img_metas = input['img_metas']._data
+    points = input['points']._data.numpy()
+    gt_bboxes = input['gt_bboxes_3d']._data.tensor
+    if img_metas['box_mode_3d'] != Box3DMode.DEPTH:
+        points, gt_bboxes = to_depth_mode(points, gt_bboxes)
+    filename = osp.splitext(osp.basename(img_metas['pts_filename']))[0]
+    show_result(
+        points,
+        gt_bboxes.clone(),
+        None,
+        out_dir,
+        filename,
+        show=show,
+        snapshot=True)
+
+
+def show_seg_data(input, out_dir, show=False):
+    """Visualize 3D point cloud and segmentation mask."""
+    img_metas = input['img_metas']._data
+    points = input['points']._data.numpy()
+    gt_seg = input['pts_semantic_mask']._data.numpy()
+    filename = osp.splitext(osp.basename(img_metas['pts_filename']))[0]
+    show_seg_result(
+        points,
+        gt_seg.copy(),
+        None,
+        out_dir,
+        filename,
+        np.array(img_metas['PALETTE']),
+        img_metas['ignore_index'],
+        show=show,
+        snapshot=True)
+
+
+def show_proj_bbox_img(input, out_dir, show=False, is_nus_mono=False):
+    """Visualize 3D bboxes on 2D image by projection."""
+    gt_bboxes = input['gt_bboxes_3d']._data
+    img_metas = input['img_metas']._data
+    img = input['img']._data.numpy()
+    # need to transpose channel to first dim
+    img = img.transpose(1, 2, 0)
+    # no 3D gt bboxes, just show img
+    if gt_bboxes.tensor.shape[0] == 0:
+        gt_bboxes = None
+    filename = Path(img_metas['filename']).name
+    if isinstance(gt_bboxes, DepthInstance3DBoxes):
+        show_multi_modality_result(
+            img,
+            gt_bboxes,
+            None,
+            None,
+            out_dir,
+            filename,
+            box_mode='depth',
+            img_metas=img_metas,
+            show=show)
+    elif isinstance(gt_bboxes, LiDARInstance3DBoxes):
+        show_multi_modality_result(
+            img,
+            gt_bboxes,
+            None,
+            img_metas['lidar2img'],
+            out_dir,
+            filename,
+            box_mode='lidar',
+            img_metas=img_metas,
+            show=show)
+    elif isinstance(gt_bboxes, CameraInstance3DBoxes):
+        show_multi_modality_result(
+            img,
+            gt_bboxes,
+            None,
+            img_metas['cam2img'],
+            out_dir,
+            filename,
+            box_mode='camera',
+            img_metas=img_metas,
+            show=show)
+    else:
+        # can't project, just show img
+        warnings.warn(
+            f'unrecognized gt box type {type(gt_bboxes)}, only show image')
+        show_multi_modality_result(
+            img, None, None, None, out_dir, filename, show=show)
+
+
+def main():
+    args = parse_args()
+
+    if args.output_dir is not None:
+        mkdir_or_exist(args.output_dir)
+
+    cfg = build_data_cfg(args.config, args.skip_type, args.aug,
+                         args.cfg_options)
+    try:
+        dataset = build_dataset(
+            cfg.data.train, default_args=dict(filter_empty_gt=False))
+    except TypeError:  # seg dataset doesn't have `filter_empty_gt` key
+        dataset = build_dataset(cfg.data.train)
+
+    dataset_type = cfg.dataset_type
+    # configure visualization mode
+    vis_task = args.task  # 'det', 'seg', 'multi_modality-det', 'mono-det'
+    progress_bar = mmcv.ProgressBar(len(dataset))
+
+    for input in dataset:
+        if vis_task in ['det', 'multi_modality-det']:
+            # show 3D bboxes on 3D point clouds
+            show_det_data(input, args.output_dir, show=args.online)
+        if vis_task in ['multi_modality-det', 'mono-det']:
+            # project 3D bboxes to 2D image
+            show_proj_bbox_img(
+                input,
+                args.output_dir,
+                show=args.online,
+                is_nus_mono=(dataset_type == 'NuScenesMonoDataset'))
+        elif vis_task in ['seg']:
+            # show 3D segmentation mask on 3D point clouds
+            show_seg_data(input, args.output_dir, show=args.online)
+        progress_bar.update()
+
+
+if __name__ == '__main__':
+    main()
--- a/docker-hub/FlashOCC/Flashocc/tools/misc/fuse_conv_bn.py
+++ b/docker-hub/FlashOCC/Flashocc/tools/misc/fuse_conv_bn.py
+# Copyright (c) OpenMMLab. All rights reserved.
+import argparse
+
+import torch
+from mmcv.runner import save_checkpoint
+from torch import nn as nn
+
+from mmdet3d.apis import init_model
+
+
+def fuse_conv_bn(conv, bn):
+    """During inference, the functionary of batch norm layers is turned off but
+    only the mean and var alone channels are used, which exposes the chance to
+    fuse it with the preceding conv layers to save computations and simplify
+    network structures."""
+    conv_w = conv.weight
+    conv_b = conv.bias if conv.bias is not None else torch.zeros_like(
+        bn.running_mean)
+
+    factor = bn.weight / torch.sqrt(bn.running_var + bn.eps)
+    conv.weight = nn.Parameter(conv_w *
+                               factor.reshape([conv.out_channels, 1, 1, 1]))
+    conv.bias = nn.Parameter((conv_b - bn.running_mean) * factor + bn.bias)
+    return conv
+
+
+def fuse_module(m):
+    last_conv = None
+    last_conv_name = None
+
+    for name, child in m.named_children():
+        if isinstance(child, (nn.BatchNorm2d, nn.SyncBatchNorm)):
+            if last_conv is None:  # only fuse BN that is after Conv
+                continue
+            fused_conv = fuse_conv_bn(last_conv, child)
+            m._modules[last_conv_name] = fused_conv
+            # To reduce changes, set BN as Identity instead of deleting it.
+            m._modules[name] = nn.Identity()
+            last_conv = None
+        elif isinstance(child, nn.Conv2d):
+            last_conv = child
+            last_conv_name = name
+        else:
+            fuse_module(child)
+    return m
+
+
+def parse_args():
+    parser = argparse.ArgumentParser(
+        description='fuse Conv and BN layers in a model')
+    parser.add_argument('config', help='config file path')
+    parser.add_argument('checkpoint', help='checkpoint file path')
+    parser.add_argument('out', help='output path of the converted model')
+    args = parser.parse_args()
+    return args
+
+
+def main():
+    args = parse_args()
+    # build the model from a config file and a checkpoint file
+    model = init_model(args.config, args.checkpoint)
+    # fuse conv and bn layers of the model
+    fused_model = fuse_module(model)
+    save_checkpoint(fused_model, args.out)
+
+
+if __name__ == '__main__':
+    main()
--- a/docker-hub/FlashOCC/Flashocc/tools/misc/print_config.py
+++ b/docker-hub/FlashOCC/Flashocc/tools/misc/print_config.py
+# Copyright (c) OpenMMLab. All rights reserved.
+import argparse
+
+from mmcv import Config, DictAction
+
+
+def parse_args():
+    parser = argparse.ArgumentParser(description='Print the whole config')
+    parser.add_argument('config', help='config file path')
+    parser.add_argument(
+        '--options', nargs='+', action=DictAction, help='arguments in dict')
+    args = parser.parse_args()
+
+    return args
+
+
+def main():
+    args = parse_args()
+
+    cfg = Config.fromfile(args.config)
+    if args.options is not None:
+        cfg.merge_from_dict(args.options)
+    print(f'Config:\n{cfg.pretty_text}')
+
+
+if __name__ == '__main__':
+    main()
--- a/docker-hub/FlashOCC/Flashocc/tools/misc/visualize_results.py
+++ b/docker-hub/FlashOCC/Flashocc/tools/misc/visualize_results.py
+# Copyright (c) OpenMMLab. All rights reserved.
+import argparse
+
+import mmcv
+from mmcv import Config
+
+from mmdet3d.datasets import build_dataset
+
+
+def parse_args():
+    parser = argparse.ArgumentParser(
+        description='MMDet3D visualize the results')
+    parser.add_argument('config', help='test config file path')
+    parser.add_argument('--result', help='results file in pickle format')
+    parser.add_argument(
+        '--show-dir', help='directory where visualize results will be saved')
+    args = parser.parse_args()
+
+    return args
+
+
+def main():
+    args = parse_args()
+
+    if args.result is not None and \
+            not args.result.endswith(('.pkl', '.pickle')):
+        raise ValueError('The results file must be a pkl file.')
+
+    cfg = Config.fromfile(args.config)
+    cfg.data.test.test_mode = True
+
+    # build the dataset
+    dataset = build_dataset(cfg.data.test)
+    results = mmcv.load(args.result)
+
+    if getattr(dataset, 'show', None) is not None:
+        # data loading pipeline for showing
+        eval_pipeline = cfg.get('eval_pipeline', {})
+        if eval_pipeline:
+            dataset.show(results, args.show_dir, pipeline=eval_pipeline)
+        else:
+            dataset.show(results, args.show_dir)  # use default pipeline
+    else:
+        raise NotImplementedError(
+            'Show is not implemented for dataset {}!'.format(
+                type(dataset).__name__))
+
+
+if __name__ == '__main__':
+    main()
--- a/docker-hub/FlashOCC/Flashocc/tools/model_converters/convert_h3dnet_checkpoints.py
+++ b/docker-hub/FlashOCC/Flashocc/tools/model_converters/convert_h3dnet_checkpoints.py
+# Copyright (c) OpenMMLab. All rights reserved.
+import argparse
+import tempfile
+
+import torch
+from mmcv import Config
+from mmcv.runner import load_state_dict
+
+from mmdet3d.models import build_detector
+
+
+def parse_args():
+    parser = argparse.ArgumentParser(
+        description='MMDet3D upgrade model version(before v0.6.0) of H3DNet')
+    parser.add_argument('checkpoint', help='checkpoint file')
+    parser.add_argument('--out', help='path of the output checkpoint file')
+    args = parser.parse_args()
+    return args
+
+
+def parse_config(config_strings):
+    """Parse config from strings.
+
+    Args:
+        config_strings (string): strings of model config.
+
+    Returns:
+        Config: model config
+    """
+    temp_file = tempfile.NamedTemporaryFile()
+    config_path = f'{temp_file.name}.py'
+    with open(config_path, 'w') as f:
+        f.write(config_strings)
+
+    config = Config.fromfile(config_path)
+
+    # Update backbone config
+    if 'pool_mod' in config.model.backbone.backbones:
+        config.model.backbone.backbones.pop('pool_mod')
+
+    if 'sa_cfg' not in config.model.backbone:
+        config.model.backbone['sa_cfg'] = dict(
+            type='PointSAModule',
+            pool_mod='max',
+            use_xyz=True,
+            normalize_xyz=True)
+
+    if 'type' not in config.model.rpn_head.vote_aggregation_cfg:
+        config.model.rpn_head.vote_aggregation_cfg['type'] = 'PointSAModule'
+
+    # Update rpn_head config
+    if 'pred_layer_cfg' not in config.model.rpn_head:
+        config.model.rpn_head['pred_layer_cfg'] = dict(
+            in_channels=128, shared_conv_channels=(128, 128), bias=True)
+
+    if 'feat_channels' in config.model.rpn_head:
+        config.model.rpn_head.pop('feat_channels')
+
+    if 'vote_moudule_cfg' in config.model.rpn_head:
+        config.model.rpn_head['vote_module_cfg'] = config.model.rpn_head.pop(
+            'vote_moudule_cfg')
+
+    if config.model.rpn_head.vote_aggregation_cfg.use_xyz:
+        config.model.rpn_head.vote_aggregation_cfg.mlp_channels[0] -= 3
+
+    for cfg in config.model.roi_head.primitive_list:
+        cfg['vote_module_cfg'] = cfg.pop('vote_moudule_cfg')
+        cfg.vote_aggregation_cfg.mlp_channels[0] -= 3
+        if 'type' not in cfg.vote_aggregation_cfg:
+            cfg.vote_aggregation_cfg['type'] = 'PointSAModule'
+
+    if 'type' not in config.model.roi_head.bbox_head.suface_matching_cfg:
+        config.model.roi_head.bbox_head.suface_matching_cfg[
+            'type'] = 'PointSAModule'
+
+    if config.model.roi_head.bbox_head.suface_matching_cfg.use_xyz:
+        config.model.roi_head.bbox_head.suface_matching_cfg.mlp_channels[
+            0] -= 3
+
+    if 'type' not in config.model.roi_head.bbox_head.line_matching_cfg:
+        config.model.roi_head.bbox_head.line_matching_cfg[
+            'type'] = 'PointSAModule'
+
+    if config.model.roi_head.bbox_head.line_matching_cfg.use_xyz:
+        config.model.roi_head.bbox_head.line_matching_cfg.mlp_channels[0] -= 3
+
+    if 'proposal_module_cfg' in config.model.roi_head.bbox_head:
+        config.model.roi_head.bbox_head.pop('proposal_module_cfg')
+
+    temp_file.close()
+
+    return config
+
+
+def main():
+    """Convert keys in checkpoints for VoteNet.
+
+    There can be some breaking changes during the development of mmdetection3d,
+    and this tool is used for upgrading checkpoints trained with old versions
+    (before v0.6.0) to the latest one.
+    """
+    args = parse_args()
+    checkpoint = torch.load(args.checkpoint)
+    cfg = parse_config(checkpoint['meta']['config'])
+    # Build the model and load checkpoint
+    model = build_detector(
+        cfg.model,
+        train_cfg=cfg.get('train_cfg'),
+        test_cfg=cfg.get('test_cfg'))
+    orig_ckpt = checkpoint['state_dict']
+    converted_ckpt = orig_ckpt.copy()
+
+    if cfg['dataset_type'] == 'ScanNetDataset':
+        NUM_CLASSES = 18
+    elif cfg['dataset_type'] == 'SUNRGBDDataset':
+        NUM_CLASSES = 10
+    else:
+        raise NotImplementedError
+
+    RENAME_PREFIX = {
+        'rpn_head.conv_pred.0': 'rpn_head.conv_pred.shared_convs.layer0',
+        'rpn_head.conv_pred.1': 'rpn_head.conv_pred.shared_convs.layer1'
+    }
+
+    DEL_KEYS = [
+        'rpn_head.conv_pred.0.bn.num_batches_tracked',
+        'rpn_head.conv_pred.1.bn.num_batches_tracked'
+    ]
+
+    EXTRACT_KEYS = {
+        'rpn_head.conv_pred.conv_cls.weight':
+        ('rpn_head.conv_pred.conv_out.weight', [(0, 2), (-NUM_CLASSES, -1)]),
+        'rpn_head.conv_pred.conv_cls.bias':
+        ('rpn_head.conv_pred.conv_out.bias', [(0, 2), (-NUM_CLASSES, -1)]),
+        'rpn_head.conv_pred.conv_reg.weight':
+        ('rpn_head.conv_pred.conv_out.weight', [(2, -NUM_CLASSES)]),
+        'rpn_head.conv_pred.conv_reg.bias':
+        ('rpn_head.conv_pred.conv_out.bias', [(2, -NUM_CLASSES)])
+    }
+
+    # Delete some useless keys
+    for key in DEL_KEYS:
+        converted_ckpt.pop(key)
+
+    # Rename keys with specific prefix
+    RENAME_KEYS = dict()
+    for old_key in converted_ckpt.keys():
+        for rename_prefix in RENAME_PREFIX.keys():
+            if rename_prefix in old_key:
+                new_key = old_key.replace(rename_prefix,
+                                          RENAME_PREFIX[rename_prefix])
+                RENAME_KEYS[new_key] = old_key
+    for new_key, old_key in RENAME_KEYS.items():
+        converted_ckpt[new_key] = converted_ckpt.pop(old_key)
+
+    # Extract weights and rename the keys
+    for new_key, (old_key, indices) in EXTRACT_KEYS.items():
+        cur_layers = orig_ckpt[old_key]
+        converted_layers = []
+        for (start, end) in indices:
+            if end != -1:
+                converted_layers.append(cur_layers[start:end])
+            else:
+                converted_layers.append(cur_layers[start:])
+        converted_layers = torch.cat(converted_layers, 0)
+        converted_ckpt[new_key] = converted_layers
+        if old_key in converted_ckpt.keys():
+            converted_ckpt.pop(old_key)
+
+    # Check the converted checkpoint by loading to the model
+    load_state_dict(model, converted_ckpt, strict=True)
+    checkpoint['state_dict'] = converted_ckpt
+    torch.save(checkpoint, args.out)
+
+
+if __name__ == '__main__':
+    main()
--- a/docker-hub/FlashOCC/Flashocc/tools/model_converters/convert_votenet_checkpoints.py
+++ b/docker-hub/FlashOCC/Flashocc/tools/model_converters/convert_votenet_checkpoints.py
+# Copyright (c) OpenMMLab. All rights reserved.
+import argparse
+import tempfile
+
+import torch
+from mmcv import Config
+from mmcv.runner import load_state_dict
+
+from mmdet3d.models import build_detector
+
+
+def parse_args():
+    parser = argparse.ArgumentParser(
+        description='MMDet3D upgrade model version(before v0.6.0) of VoteNet')
+    parser.add_argument('checkpoint', help='checkpoint file')
+    parser.add_argument('--out', help='path of the output checkpoint file')
+    args = parser.parse_args()
+    return args
+
+
+def parse_config(config_strings):
+    """Parse config from strings.
+
+    Args:
+        config_strings (string): strings of model config.
+
+    Returns:
+        Config: model config
+    """
+    temp_file = tempfile.NamedTemporaryFile()
+    config_path = f'{temp_file.name}.py'
+    with open(config_path, 'w') as f:
+        f.write(config_strings)
+
+    config = Config.fromfile(config_path)
+
+    # Update backbone config
+    if 'pool_mod' in config.model.backbone:
+        config.model.backbone.pop('pool_mod')
+
+    if 'sa_cfg' not in config.model.backbone:
+        config.model.backbone['sa_cfg'] = dict(
+            type='PointSAModule',
+            pool_mod='max',
+            use_xyz=True,
+            normalize_xyz=True)
+
+    if 'type' not in config.model.bbox_head.vote_aggregation_cfg:
+        config.model.bbox_head.vote_aggregation_cfg['type'] = 'PointSAModule'
+
+    # Update bbox_head config
+    if 'pred_layer_cfg' not in config.model.bbox_head:
+        config.model.bbox_head['pred_layer_cfg'] = dict(
+            in_channels=128, shared_conv_channels=(128, 128), bias=True)
+
+    if 'feat_channels' in config.model.bbox_head:
+        config.model.bbox_head.pop('feat_channels')
+
+    if 'vote_moudule_cfg' in config.model.bbox_head:
+        config.model.bbox_head['vote_module_cfg'] = config.model.bbox_head.pop(
+            'vote_moudule_cfg')
+
+    if config.model.bbox_head.vote_aggregation_cfg.use_xyz:
+        config.model.bbox_head.vote_aggregation_cfg.mlp_channels[0] -= 3
+
+    temp_file.close()
+
+    return config
+
+
+def main():
+    """Convert keys in checkpoints for VoteNet.
+
+    There can be some breaking changes during the development of mmdetection3d,
+    and this tool is used for upgrading checkpoints trained with old versions
+    (before v0.6.0) to the latest one.
+    """
+    args = parse_args()
+    checkpoint = torch.load(args.checkpoint)
+    cfg = parse_config(checkpoint['meta']['config'])
+    # Build the model and load checkpoint
+    model = build_detector(
+        cfg.model,
+        train_cfg=cfg.get('train_cfg'),
+        test_cfg=cfg.get('test_cfg'))
+    orig_ckpt = checkpoint['state_dict']
+    converted_ckpt = orig_ckpt.copy()
+
+    if cfg['dataset_type'] == 'ScanNetDataset':
+        NUM_CLASSES = 18
+    elif cfg['dataset_type'] == 'SUNRGBDDataset':
+        NUM_CLASSES = 10
+    else:
+        raise NotImplementedError
+
+    RENAME_PREFIX = {
+        'bbox_head.conv_pred.0': 'bbox_head.conv_pred.shared_convs.layer0',
+        'bbox_head.conv_pred.1': 'bbox_head.conv_pred.shared_convs.layer1'
+    }
+
+    DEL_KEYS = [
+        'bbox_head.conv_pred.0.bn.num_batches_tracked',
+        'bbox_head.conv_pred.1.bn.num_batches_tracked'
+    ]
+
+    EXTRACT_KEYS = {
+        'bbox_head.conv_pred.conv_cls.weight':
+        ('bbox_head.conv_pred.conv_out.weight', [(0, 2), (-NUM_CLASSES, -1)]),
+        'bbox_head.conv_pred.conv_cls.bias':
+        ('bbox_head.conv_pred.conv_out.bias', [(0, 2), (-NUM_CLASSES, -1)]),
+        'bbox_head.conv_pred.conv_reg.weight':
+        ('bbox_head.conv_pred.conv_out.weight', [(2, -NUM_CLASSES)]),
+        'bbox_head.conv_pred.conv_reg.bias':
+        ('bbox_head.conv_pred.conv_out.bias', [(2, -NUM_CLASSES)])
+    }
+
+    # Delete some useless keys
+    for key in DEL_KEYS:
+        converted_ckpt.pop(key)
+
+    # Rename keys with specific prefix
+    RENAME_KEYS = dict()
+    for old_key in converted_ckpt.keys():
+        for rename_prefix in RENAME_PREFIX.keys():
+            if rename_prefix in old_key:
+                new_key = old_key.replace(rename_prefix,
+                                          RENAME_PREFIX[rename_prefix])
+                RENAME_KEYS[new_key] = old_key
+    for new_key, old_key in RENAME_KEYS.items():
+        converted_ckpt[new_key] = converted_ckpt.pop(old_key)
+
+    # Extract weights and rename the keys
+    for new_key, (old_key, indices) in EXTRACT_KEYS.items():
+        cur_layers = orig_ckpt[old_key]
+        converted_layers = []
+        for (start, end) in indices:
+            if end != -1:
+                converted_layers.append(cur_layers[start:end])
+            else:
+                converted_layers.append(cur_layers[start:])
+        converted_layers = torch.cat(converted_layers, 0)
+        converted_ckpt[new_key] = converted_layers
+        if old_key in converted_ckpt.keys():
+            converted_ckpt.pop(old_key)
+
+    # Check the converted checkpoint by loading to the model
+    load_state_dict(model, converted_ckpt, strict=True)
+    checkpoint['state_dict'] = converted_ckpt
+    torch.save(checkpoint, args.out)
+
+
+if __name__ == '__main__':
+    main()
--- a/docker-hub/FlashOCC/Flashocc/tools/model_converters/publish_model.py
+++ b/docker-hub/FlashOCC/Flashocc/tools/model_converters/publish_model.py
+# Copyright (c) OpenMMLab. All rights reserved.
+import argparse
+import subprocess
+
+import torch
+
+
+def parse_args():
+    parser = argparse.ArgumentParser(
+        description='Process a checkpoint to be published')
+    parser.add_argument('in_file', help='input checkpoint filename')
+    parser.add_argument('out_file', help='output checkpoint filename')
+    args = parser.parse_args()
+    return args
+
+
+def process_checkpoint(in_file, out_file):
+    checkpoint = torch.load(in_file, map_location='cpu')
+    # remove optimizer for smaller file size
+    if 'optimizer' in checkpoint:
+        del checkpoint['optimizer']
+    # if it is necessary to remove some sensitive data in checkpoint['meta'],
+    # add the code here.
+    torch.save(checkpoint, out_file)
+    sha = subprocess.check_output(['sha256sum', out_file]).decode()
+    final_file = out_file.rstrip('.pth') + '-{}.pth'.format(sha[:8])
+    subprocess.Popen(['mv', out_file, final_file])
+
+
+def main():
+    args = parse_args()
+    process_checkpoint(args.in_file, args.out_file)
+
+
+if __name__ == '__main__':
+    main()
--- a/docker-hub/FlashOCC/Flashocc/tools/model_converters/regnet2mmdet.py
+++ b/docker-hub/FlashOCC/Flashocc/tools/model_converters/regnet2mmdet.py
+# Copyright (c) OpenMMLab. All rights reserved.
+import argparse
+from collections import OrderedDict
+
+import torch
+
+
+def convert_stem(model_key, model_weight, state_dict, converted_names):
+    new_key = model_key.replace('stem.conv', 'conv1')
+    new_key = new_key.replace('stem.bn', 'bn1')
+    state_dict[new_key] = model_weight
+    converted_names.add(model_key)
+    print(f'Convert {model_key} to {new_key}')
+
+
+def convert_head(model_key, model_weight, state_dict, converted_names):
+    new_key = model_key.replace('head.fc', 'fc')
+    state_dict[new_key] = model_weight
+    converted_names.add(model_key)
+    print(f'Convert {model_key} to {new_key}')
+
+
+def convert_reslayer(model_key, model_weight, state_dict, converted_names):
+    split_keys = model_key.split('.')
+    layer, block, module = split_keys[:3]
+    block_id = int(block[1:])
+    layer_name = f'layer{int(layer[1:])}'
+    block_name = f'{block_id - 1}'
+
+    if block_id == 1 and module == 'bn':
+        new_key = f'{layer_name}.{block_name}.downsample.1.{split_keys[-1]}'
+    elif block_id == 1 and module == 'proj':
+        new_key = f'{layer_name}.{block_name}.downsample.0.{split_keys[-1]}'
+    elif module == 'f':
+        if split_keys[3] == 'a_bn':
+            module_name = 'bn1'
+        elif split_keys[3] == 'b_bn':
+            module_name = 'bn2'
+        elif split_keys[3] == 'c_bn':
+            module_name = 'bn3'
+        elif split_keys[3] == 'a':
+            module_name = 'conv1'
+        elif split_keys[3] == 'b':
+            module_name = 'conv2'
+        elif split_keys[3] == 'c':
+            module_name = 'conv3'
+        new_key = f'{layer_name}.{block_name}.{module_name}.{split_keys[-1]}'
+    else:
+        raise ValueError(f'Unsupported conversion of key {model_key}')
+    print(f'Convert {model_key} to {new_key}')
+    state_dict[new_key] = model_weight
+    converted_names.add(model_key)
+
+
+def convert(src, dst):
+    """Convert keys in pycls pretrained RegNet models to mmdet style."""
+    # load caffe model
+    regnet_model = torch.load(src)
+    blobs = regnet_model['model_state']
+    # convert to pytorch style
+    state_dict = OrderedDict()
+    converted_names = set()
+    for key, weight in blobs.items():
+        if 'stem' in key:
+            convert_stem(key, weight, state_dict, converted_names)
+        elif 'head' in key:
+            convert_head(key, weight, state_dict, converted_names)
+        elif key.startswith('s'):
+            convert_reslayer(key, weight, state_dict, converted_names)
+
+    # check if all layers are converted
+    for key in blobs:
+        if key not in converted_names:
+            print(f'not converted: {key}')
+    # save checkpoint
+    checkpoint = dict()
+    checkpoint['state_dict'] = state_dict
+    torch.save(checkpoint, dst)
+
+
+def main():
+    parser = argparse.ArgumentParser(description='Convert model keys')
+    parser.add_argument('src', help='src detectron model path')
+    parser.add_argument('dst', help='save path')
+    args = parser.parse_args()
+    convert(args.src, args.dst)
+
+
+if __name__ == '__main__':
+    main()
--- a/docker-hub/FlashOCC/Flashocc/tools/plot_fps_performance.py
+++ b/docker-hub/FlashOCC/Flashocc/tools/plot_fps_performance.py
+import matplotlib.pyplot as plt
+import mpl_toolkits.axisartist as axisartist
+# fig, _ = plt.subplots(3, 1, figsize=(5, 12))
+fig, _ = plt.subplots(1, 3, figsize=(15, 5))
+fig.set_tight_layout(True)
+# 设置全局颜色
+plt.rcParams['axes.prop_cycle'] = plt.cycler(color=['steelblue'])
+
+# plt.subplot(3, 1, 3)
+plt.subplot(1, 3, 3)
+
+
+# ax = axisartist.Subplot(fig, 111)
+# #将绘图区对象添加到画布中
+# fig.add_axes(ax)
+# #通过set_axisline_style方法设置绘图区的底部及左侧坐标轴样式
+# #"-|>"代表实心箭头："->"代表空心箭头
+# ax.axis["bottom"].set_axisline_style("->", size = 1.5)
+# ax.axis["left"].set_axisline_style("->", size = 1.5)
+# #通过set_visible方法设置绘图区的顶部及右侧坐标轴隐藏
+# ax.axis["top"].set_visible(False)
+# ax.axis["right"].set_visible(False)
+
+fontsize_ = 10
+
+# SparseOCC
+fps = [17.3]
+ray_iou = [14.1]
+labels = ['SparseOcc(8f)']
+plt.scatter(fps, ray_iou, color='dodgerblue')
+# 添加文本
+plt.text(fps[0]+1.5, ray_iou[0]-0.1, labels[0], fontsize=fontsize_, ha='center', va='top')
+
+# Panoptic-FlashOcc
+# fps = [29.0, 22.6, 22.0, 20.3] # 3090
+fps = [39.8, 35.2, 30.4, 30.2] # a100-80g
+# ray_iou = [12.6, 12.9, 14.2, 15.8]
+ray_iou = [12.89, 13.18, 14.52, 15.96]
+labels = ['Panoptic-\nFlashOcc-Tiny(1f)', 'Panoptic-\nFlashOcc(1f)', 'Panoptic-\nFlashOcc(2f)', 'Panoptic-\nFlashOcc(8f)']
+plt.scatter(fps, ray_iou, color='orange')
+# 添加文本
+plt.text(fps[0]-3.5, ray_iou[0]+0.0, labels[0], fontsize=fontsize_, ha='center', va='top')
+plt.text(fps[1]-3.0, ray_iou[1]+0.3, labels[1], fontsize=fontsize_, ha='center', va='top')
+plt.text(fps[2]+0.4, ray_iou[2], labels[2], fontsize=fontsize_, ha='left', va='bottom')
+plt.text(fps[3]+2., ray_iou[3]-0.1, labels[3], fontsize=fontsize_, ha='center', va='bottom')
+# 连接散点并画线
+plt.plot(fps, ray_iou, color='orange', linestyle='-')  # 修改线型
+plt.grid(True)
+plt.grid(color='gray', linestyle='--', linewidth=1, alpha=0.3)
+
+# 设置字体大小和粗细
+font = {'family': 'times new roman',
+        'color':  'black',
+        'weight': 'normal',
+        'size': 16,
+        }
+# 设置图表标题和坐标轴标签
+plt.xlabel('FPS (Hz)', fontdict=font)
+plt.ylabel('Occ3D-nuScenes (RayPQ)', fontdict=font)
+
+# 设置 y 轴范围
+plt.ylim(11.5, 16.5)
+# 设置 y 轴刻度
+plt.yticks([12, 13, 14, 15, 16])
+
+# 设置 x 轴范围
+# plt.xlim(16, 40)
+plt.xlim(0, 50)
+# 设置 y 轴刻度
+# plt.xticks([15, 20, 25, 30, 35, 40])
+plt.xticks([10, 20, 30, 40, 50])
+
+
+# plt.subplot(3, 1, 2)
+plt.subplot(1, 3, 2)
+# BEVFormer
+fps = [3.0]
+ray_iou = [23.7]
+labels = ['BEVFormer']
+plt.scatter(fps, ray_iou, color='dodgerblue')
+# 添加文本
+plt.text(fps[0]+3.2, ray_iou[0]+0.2, labels[0], fontsize=fontsize_, ha='center', va='top')
+
+# FB-Occ
+fps = [10.3]
+ray_iou = [27.9]
+labels = ['FB-Occ']
+plt.scatter(fps, ray_iou, color='dodgerblue')
+# 添加文本
+plt.text(fps[0], ray_iou[0]-0.2, labels[0], fontsize=fontsize_, ha='center', va='top')
+
+# SparseOCC
+fps = [17.3, 12.5]
+ray_iou = [30.3, 30.9]
+labels = ['SparseOcc(8f)', 'SparseOcc(16f)']
+plt.scatter(fps, ray_iou, color='dodgerblue')
+# 添加文本
+plt.text(fps[0], ray_iou[0]-0.2, labels[0], fontsize=fontsize_, ha='center', va='top')
+plt.text(fps[1], ray_iou[1]+0.2, labels[1], fontsize=fontsize_, ha='center', va='bottom')
+# 连接散点并画线
+plt.plot(fps, ray_iou, color='dodgerblue', linestyle='-')  # 修改线型
+
+# Panoptic-FlashOcc
+# fps = [29.0, 22.6, 22.0, 20.3] # 3090
+fps = [43.9, 38.7, 35.9, 35.6] # a100 80g
+ray_iou = [29.1, 29.4, 30.3, 31.6]
+labels = ['Panoptic-\nFlashOcc-Tiny(1f)', 'Panoptic-\nFlashOcc(1f)', 'Panoptic-\nFlashOcc(2f)', 'Panoptic-\nFlashOcc(8f)']
+plt.scatter(fps, ray_iou, color='orange')
+# 添加文本
+plt.text(fps[0]-2.0, ray_iou[0]-0.2, labels[0], fontsize=fontsize_, ha='center', va='top')
+plt.text(fps[1]-3.5, ray_iou[1]+0.4, labels[1], fontsize=fontsize_, ha='center', va='top')
+plt.text(fps[2]-7.0, ray_iou[2]-0.3, labels[2], fontsize=fontsize_, ha='left', va='bottom')
+plt.text(fps[3]-4.0, ray_iou[3]-0.5, labels[3], fontsize=fontsize_, ha='center', va='bottom')
+# 连接散点并画线
+plt.plot(fps, ray_iou, color='orange', linestyle='-')  # 修改线型
+plt.grid(True)
+plt.grid(color='gray', linestyle='--', linewidth=1, alpha=0.3)
+
+# 设置字体大小和粗细
+font = {'family': 'times new roman',
+        'color':  'black',
+        'weight': 'normal',
+        'size': 16,
+        }
+# 设置图表标题和坐标轴标签
+plt.xlabel('FPS (Hz)', fontdict=font)
+plt.ylabel('Occ3D-nuScenes (mIoU)', fontdict=font)
+
+# 设置 y 轴范围
+plt.ylim(23, 33)
+# 设置 y 轴刻度
+plt.yticks([24, 26, 28, 30, 32])
+
+# 设置 x 轴范围
+plt.xlim(2, 45)
+# 设置 y 轴刻度
+# plt.xticks([5, 10, 15, 20, 25, 30, 35])
+plt.xticks([10, 20, 30, 40, 50])
+# plt.xticks([15, 20, 25, 30, 35, 40, 45])
+
+
+
+
+# plt.subplot(3, 1, 1)
+plt.subplot(1, 3, 1)
+fps = [2.1, 5.4, 3.2, 7.6]
+ray_iou = [32.4, 29.6, 32.6, 33.5]
+labels = ['BEVFormer', 'BEVDet-Occ', 'BEVDet-Occ-\nLongterm', 'FB-Occ']
+
+# 绘制散点图
+plt.scatter(fps, ray_iou, color='dodgerblue')
+
+# 添加文本
+for i in range(len(fps)):
+    if labels[i] == 'BEVDet-Occ-\nLongterm':
+        plt.text(fps[i]+5.3, ray_iou[i]-0.2, labels[i], fontsize=fontsize_, ha='center', va='bottom')  # 通过减去0.5调整文本位置
+    elif labels[i] == 'BEVFormer':
+        plt.text(fps[i]+2.3, ray_iou[i]-0.2, labels[i], fontsize=fontsize_, ha='center', va='top')  # 通过减去0.5调整文本位置
+    elif labels[i] == 'BEVDet-Occ':
+        plt.text(fps[i]+0.2, ray_iou[i]+0.5, labels[i], fontsize=fontsize_, ha='center', va='top')  # 通过减去0.5调整文本位置
+    else:
+        plt.text(fps[i]+0.2, ray_iou[i]+0.4, labels[i], fontsize=fontsize_, ha='center', va='top')  # 通过减去0.5调整文本位置
+
+
+# SparseOCC
+fps = [17.3, 12.5]
+ray_iou = [34.0, 35.1]
+labels = ['SparseOcc(8f)', 'SparseOcc(16f)']
+plt.scatter(fps, ray_iou, color='dodgerblue')
+# 添加文本
+plt.text(fps[0], ray_iou[0]-0.2, labels[0], fontsize=fontsize_, ha='center', va='top')
+plt.text(fps[1], ray_iou[1]+0.2, labels[1], fontsize=fontsize_, ha='center', va='bottom')
+# 连接散点并画线
+plt.plot(fps, ray_iou, color='dodgerblue', linestyle='-')  # 修改线型
+
+# Panoptic-FlashOcc
+# fps = [29.0, 22.6, 22.0, 20.3]
+fps = [43.9, 38.7, 35.9, 35.6] # a100 80g
+ray_iou = [34.81, 35.22, 36.76, 38.50]
+labels = ['Panoptic-\nFlashOcc-Tiny(1f)', 'Panoptic-\nFlashOcc(1f)', 'Panoptic-\nFlashOcc(2f)', 'Panoptic-\nFlashOcc(8f)']
+plt.scatter(fps, ray_iou, color='orange')
+# 添加文本
+plt.text(fps[0]-4.0, ray_iou[0]+0.0, labels[0], fontsize=fontsize_, ha='center', va='top')
+plt.text(fps[1]-4.2, ray_iou[1]+0.4, labels[1], fontsize=fontsize_, ha='center', va='top')
+plt.text(fps[2]-8.5, ray_iou[2]-0.3, labels[2], fontsize=fontsize_, ha='left', va='bottom')
+plt.text(fps[3]-4.0, ray_iou[3]-0.5, labels[3], fontsize=fontsize_, ha='center', va='bottom')
+# 连接散点并画线
+plt.plot(fps, ray_iou, color='orange', linestyle='-')  # 修改线型
+plt.grid(True)
+plt.grid(color='gray', linestyle='--', linewidth=1, alpha=0.3)
+
+# 设置字体大小和粗细
+font = {'family': 'times new roman',
+        'color':  'black',
+        'weight': 'normal',
+        'size': 16,
+        }
+# 设置图表标题和坐标轴标签
+plt.xlabel('FPS (Hz)', fontdict=font)
+plt.ylabel('Occ3D-nuScenes (RayIoU)', fontdict=font)
+
+# 设置 y 轴范围
+plt.ylim(29, 39)
+# 设置 y 轴刻度
+plt.yticks([30, 32, 34, 36, 38])
+
+# 设置 x 轴范围
+plt.xlim(0, 45)
+# 设置 y 轴刻度
+# plt.xticks([0, 5, 10, 15, 20, 25, 30, 35])
+plt.xticks([10, 20, 30, 40, 50])
+
+# 保存图像
+plt.savefig('scatter_plot.png')
+plt.savefig('scatter_plot.pdf')
+# 显示图表
+plt.show()