add code

d2b71343 · 雍大凯 · 69e57885 · d2b71343 · d2b71343 · d2b71343
Commit d2b71343 authored Apr 08, 2026 by 雍大凯
20 changed files
--- a/docker-hub/FlashOCC/Flashocc/tools/analysis_tools/benchmark.py
+++ b/docker-hub/FlashOCC/Flashocc/tools/analysis_tools/benchmark.py
+# Copyright (c) OpenMMLab. All rights reserved.
+import sys
+import argparse
+import time
+import os
+import torch
+from mmcv import Config
+from mmcv.parallel import MMDataParallel
+from mmcv.runner import load_checkpoint, wrap_fp16_model
+
+from mmdet3d.datasets import build_dataloader, build_dataset
+from mmdet3d.models import build_detector
+from tools.misc.fuse_conv_bn import fuse_module
+
+sys.path.insert(0, os.getcwd())
+print(sys.path)
+
+
+def parse_args():
+    parser = argparse.ArgumentParser(description='MMDet benchmark a model')
+    parser.add_argument('config', help='test config file path')
+    parser.add_argument('checkpoint', help='checkpoint file')
+    parser.add_argument('--samples', default=500, help='samples to benchmark')
+    parser.add_argument(
+        '--log-interval', default=50, help='interval of logging')
+    parser.add_argument(
+        '--fuse-conv-bn',
+        action='store_true',
+        help='Whether to fuse conv and bn, this will slightly increase'
+        'the inference speed')
+    parser.add_argument(
+        '--w_pano',
+        action='store_true')
+    parser.add_argument(
+        '--w_panoproc',
+        action='store_true')
+    parser.add_argument(
+        '--no-acceleration',
+        action='store_true',
+        help='Omit the pre-computation acceleration')
+    args = parser.parse_args()
+    return args
+
+def main():
+    args = parse_args()
+
+    cfg = Config.fromfile(args.config)
+    # set cudnn_benchmark
+    if cfg.get('cudnn_benchmark', False):
+        torch.backends.cudnn.benchmark = True
+    cfg.model.pretrained = None
+    cfg.data.test.test_mode = True
+
+    # import modules from plguin/xx, registry will be updated
+    if hasattr(cfg, 'plugin'):
+        if cfg.plugin:
+            import importlib
+            if hasattr(cfg, 'plugin_dir'):
+                plugin_dir = cfg.plugin_dir
+                _module_dir = os.path.dirname(plugin_dir)
+                _module_dir = _module_dir.split('/')
+                _module_path = _module_dir[0]
+
+                for m in _module_dir[1:]:
+                    _module_path = _module_path + '.' + m
+                print(_module_path)
+                plg_lib = importlib.import_module(_module_path)
+            else:
+                # import dir is the dirpath for the config file
+                _module_dir = os.path.dirname(args.config)
+                _module_dir = _module_dir.split('/')
+                _module_path = _module_dir[0]
+                for m in _module_dir[1:]:
+                    _module_path = _module_path + '.' + m
+                plg_lib = importlib.import_module(_module_path)
+
+    # build the dataloader
+    # TODO: support multiple images per gpu (only minor changes are needed)
+    dataset = build_dataset(cfg.data.test)
+    data_loader = build_dataloader(
+        dataset,
+        samples_per_gpu=1,
+        workers_per_gpu=0,
+        dist=False,
+        shuffle=False)
+
+    # build the model and load checkpoint
+    if not args.no_acceleration:
+        cfg.model.img_view_transformer.accelerate=True
+    cfg.model.train_cfg = None
+    model = build_detector(cfg.model, test_cfg=cfg.get('test_cfg'))
+    fp16_cfg = cfg.get('fp16', None)
+    if fp16_cfg is not None:
+        wrap_fp16_model(model)
+    load_checkpoint(model, args.checkpoint, map_location='cpu')
+    if args.fuse_conv_bn:
+        model = fuse_module(model)
+
+    model = MMDataParallel(model, device_ids=[0])
+
+    model.eval()
+
+    # the first several iterations may be very slow so skip them
+    num_warmup = 5
+    pure_inf_time = 0
+
+    # benchmark with several samples and take the average
+    # for i, data_ori in enumerate(data_loader):
+    #     if i == 0:
+    #         break
+    # import copy
+    # for i in range(500):
+    #     data = copy.deepcopy(data_ori)
+    for i, data in enumerate(data_loader):
+
+        torch.cuda.synchronize()
+        start_time = time.perf_counter()
+
+        with torch.no_grad():
+            model(return_loss=False, rescale=True, 
+                  w_pano=args.w_pano,
+                  w_panoproc=args.w_panoproc,
+                  **data)
+
+        torch.cuda.synchronize()
+        elapsed = time.perf_counter() - start_time
+
+        if i >= num_warmup:
+            pure_inf_time += elapsed
+            if (i + 1) % args.log_interval == 0:
+                fps = (i + 1 - num_warmup) / pure_inf_time
+                print(f'Done image [{i + 1:<3}/ {args.samples}], '
+                      f'fps: {fps:.1f} img / s')
+
+        if (i + 1) == args.samples:
+            pure_inf_time += elapsed
+            fps = (i + 1 - num_warmup) / pure_inf_time
+            print(f'Overall \nfps: {fps:.2f} img / s '
+                  f'\ninference time: {1000 / fps:.2f} ms')
+            break
+
+
+if __name__ == '__main__':
+    main()
--- a/docker-hub/FlashOCC/Flashocc/tools/analysis_tools/benchmark_sequential.py
+++ b/docker-hub/FlashOCC/Flashocc/tools/analysis_tools/benchmark_sequential.py
+# Copyright (c) OpenMMLab. All rights reserved.
+import argparse
+import time
+import os
+import sys
+import torch
+from mmcv import Config
+from mmcv.parallel import MMDataParallel
+from mmcv.runner import load_checkpoint, wrap_fp16_model
+
+from mmdet3d.datasets import build_dataloader, build_dataset
+from mmdet3d.models import build_detector
+from tools.misc.fuse_conv_bn import fuse_module
+
+sys.path.insert(0, os.getcwd())
+print(sys.path)
+
+def parse_args():
+    parser = argparse.ArgumentParser(description='MMDet benchmark a model')
+    parser.add_argument('config', help='test config file path')
+    parser.add_argument('checkpoint', help='checkpoint file')
+    parser.add_argument('--samples', default=400, help='samples to benchmark')
+    parser.add_argument(
+        '--log-interval', default=50, help='interval of logging')
+    parser.add_argument(
+        '--fuse-conv-bn',
+        action='store_true',
+        help='Whether to fuse conv and bn, this will slightly increase'
+        'the inference speed')
+    parser.add_argument(
+        '--w_pano',
+        action='store_true')
+    parser.add_argument(
+        '--w_panoproc',
+        action='store_true')
+    parser.add_argument(
+        '--no-acceleration',
+        action='store_true',
+        help='Omit the pre-computation acceleration')
+    args = parser.parse_args()
+    return args
+
+
+def main():
+    args = parse_args()
+
+    cfg = Config.fromfile(args.config)
+    # set cudnn_benchmark
+    if cfg.get('cudnn_benchmark', False):
+        torch.backends.cudnn.benchmark = True
+    cfg.model.pretrained = None
+    cfg.data.test.test_mode = True
+
+    # import modules from plguin/xx, registry will be updated
+    if hasattr(cfg, 'plugin'):
+        if cfg.plugin:
+            import importlib
+            if hasattr(cfg, 'plugin_dir'):
+                plugin_dir = cfg.plugin_dir
+                _module_dir = os.path.dirname(plugin_dir)
+                _module_dir = _module_dir.split('/')
+                _module_path = _module_dir[0]
+
+                for m in _module_dir[1:]:
+                    _module_path = _module_path + '.' + m
+                print(_module_path)
+                plg_lib = importlib.import_module(_module_path)
+            else:
+                # import dir is the dirpath for the config file
+                _module_dir = os.path.dirname(args.config)
+                _module_dir = _module_dir.split('/')
+                _module_path = _module_dir[0]
+                for m in _module_dir[1:]:
+                    _module_path = _module_path + '.' + m
+                plg_lib = importlib.import_module(_module_path)
+
+    # build the dataloader
+    # TODO: support multiple images per gpu (only minor changes are needed)
+    dataset = build_dataset(cfg.data.test)
+    data_loader = build_dataloader(
+        dataset,
+        samples_per_gpu=1,
+        workers_per_gpu=0,
+        dist=False,
+        shuffle=False)
+
+    # build the model and load checkpoint
+    cfg.model.train_cfg = None
+    cfg.model.align_after_view_transfromation=True
+    if not args.no_acceleration:
+        cfg.model.img_view_transformer.accelerate=True
+    model = build_detector(cfg.model, test_cfg=cfg.get('test_cfg'))
+    fp16_cfg = cfg.get('fp16', None)
+    if fp16_cfg is not None:
+        wrap_fp16_model(model)
+    load_checkpoint(model, args.checkpoint, map_location='cpu')
+    if args.fuse_conv_bn:
+        model = fuse_module(model)
+
+    model = MMDataParallel(model, device_ids=[0])
+
+    model.eval()
+
+    # the first several iterations may be very slow so skip them
+    num_warmup = 5
+    pure_inf_time = 0
+
+    # benchmark with several samples and take the average
+    for i, data in enumerate(data_loader):
+        inputs = [d.cuda() for d in data['img_inputs'][0]]
+        with torch.no_grad():
+            feat_prev, inputs = model.module.extract_img_feat(
+                inputs, pred_prev=True, img_metas=None)
+        data['img_inputs'][0] = inputs
+
+        torch.cuda.synchronize()
+        start_time = time.perf_counter()
+
+        with torch.no_grad():
+            model(
+                return_loss=False,
+                rescale=True,
+                sequential=True,
+                feat_prev=feat_prev,
+                w_pano=args.w_pano,
+                w_panoproc=args.w_panoproc,
+                **data)
+
+        torch.cuda.synchronize()
+        elapsed = time.perf_counter() - start_time
+
+        if i >= num_warmup:
+            pure_inf_time += elapsed
+            if (i + 1) % args.log_interval == 0:
+                fps = (i + 1 - num_warmup) / pure_inf_time
+                print(f'Done image [{i + 1:<3}/ {args.samples}], '
+                      f'fps: {fps:.1f} img / s')
+
+        if (i + 1) == args.samples:
+            pure_inf_time += elapsed
+            fps = (i + 1 - num_warmup) / pure_inf_time
+            print(f'Overall \nfps: {fps:.2f} img / s '
+                  f'\ninference time: {1000 / fps:.2f} ms')
+            break
+
+
+if __name__ == '__main__':
+    main()
\ No newline at end of file
--- a/docker-hub/FlashOCC/Flashocc/tools/analysis_tools/benchmark_trt.py
+++ b/docker-hub/FlashOCC/Flashocc/tools/analysis_tools/benchmark_trt.py
+import time
+from typing import Dict, Optional, Sequence, Union
+import os
+from os import path as osp
+import sys
+sys.path.insert(0, os.getcwd())
+
+import tensorrt as trt
+import torch
+import torch.onnx
+from mmcv import Config
+from mmdeploy.backend.tensorrt import load_tensorrt_plugin
+
+try:
+    # If mmdet version > 2.23.0, compat_cfg would be imported and
+    # used from mmdet instead of mmdet3d.
+    from mmdet.utils import compat_cfg
+except ImportError:
+    from mmdet3d.utils import compat_cfg
+
+import argparse
+
+from mmdet3d.core import bbox3d2result
+from mmdet3d.core.bbox.structures.box_3d_mode import LiDARInstance3DBoxes
+from mmdet3d.datasets import build_dataloader, build_dataset
+from mmdet3d.models import build_model
+
+
+def parse_args():
+    parser = argparse.ArgumentParser(description='Deploy BEVDet with Tensorrt')
+    parser.add_argument('config', help='deploy config file path')
+    parser.add_argument('engine', help='checkpoint file')
+    parser.add_argument('--samples', default=500, help='samples to benchmark')
+    parser.add_argument('--postprocessing', action='store_true')
+    parser.add_argument('--eval', action='store_true')
+    parser.add_argument('--prefetch', action='store_true',
+                        help='use prefetch to accelerate the data loading, '
+                             'the inference speed is sightly degenerated due '
+                             'to the computational occupancy of prefetch')
+    args = parser.parse_args()
+    return args
+
+
+def torch_dtype_from_trt(dtype: trt.DataType) -> torch.dtype:
+    """Convert pytorch dtype to TensorRT dtype.
+
+    Args:
+        dtype (str.DataType): The data type in tensorrt.
+
+    Returns:
+        torch.dtype: The corresponding data type in torch.
+    """
+
+    if dtype == trt.bool:
+        return torch.bool
+    elif dtype == trt.int8:
+        return torch.int8
+    elif dtype == trt.int32:
+        return torch.int32
+    elif dtype == trt.float16:
+        return torch.float16
+    elif dtype == trt.float32:
+        return torch.float32
+    else:
+        raise TypeError(f'{dtype} is not supported by torch')
+
+
+class TRTWrapper(torch.nn.Module):
+
+    def __init__(self,
+                 engine: Union[str, trt.ICudaEngine],
+                 output_names: Optional[Sequence[str]] = None) -> None:
+        super().__init__()
+        self.engine = engine
+        if isinstance(self.engine, str):
+            with trt.Logger() as logger, trt.Runtime(logger) as runtime:
+                with open(self.engine, mode='rb') as f:
+                    engine_bytes = f.read()
+                self.engine = runtime.deserialize_cuda_engine(engine_bytes)
+        self.context = self.engine.create_execution_context()
+        names = [_ for _ in self.engine]
+        input_names = list(filter(self.engine.binding_is_input, names))
+        self._input_names = input_names
+        self._output_names = output_names
+
+        if self._output_names is None:
+            output_names = list(set(names) - set(input_names))
+            self._output_names = output_names
+
+    def forward(self, inputs: Dict[str, torch.Tensor]):
+        bindings = [None] * (len(self._input_names) + len(self._output_names))
+        for input_name, input_tensor in inputs.items():
+            idx = self.engine.get_binding_index(input_name)
+            self.context.set_binding_shape(idx, tuple(input_tensor.shape))
+            bindings[idx] = input_tensor.contiguous().data_ptr()
+
+            # create output tensors
+        outputs = {}
+        for output_name in self._output_names:
+            idx = self.engine.get_binding_index(output_name)
+            dtype = torch_dtype_from_trt(self.engine.get_binding_dtype(idx))
+            shape = tuple(self.context.get_binding_shape(idx))
+
+            device = torch.device('cuda')
+            output = torch.zeros(size=shape, dtype=dtype, device=device)
+            outputs[output_name] = output
+            bindings[idx] = output.data_ptr()
+        self.context.execute_async_v2(bindings,
+                                      torch.cuda.current_stream().cuda_stream)
+        return outputs
+
+
+def get_plugin_names():
+    return [pc.name for pc in trt.get_plugin_registry().plugin_creator_list]
+
+
+def main():
+
+    load_tensorrt_plugin()
+
+    args = parse_args()
+
+    if args.eval:
+        args.postprocessing=True
+        print('Warnings: evaluation requirement detected, set '
+              'postprocessing=True for evaluation purpose')
+    cfg = Config.fromfile(args.config)
+    cfg.model.pretrained = None
+    cfg.model.type = cfg.model.type + 'TRT'
+    cfg = compat_cfg(cfg)
+    cfg.gpu_ids = [0]
+
+    if not args.prefetch:
+        cfg.data.test_dataloader.workers_per_gpu=0
+
+    # import modules from plguin/xx, registry will be updated
+    if hasattr(cfg, 'plugin'):
+        if cfg.plugin:
+            import importlib
+            if hasattr(cfg, 'plugin_dir'):
+                plugin_dir = cfg.plugin_dir
+                _module_dir = os.path.dirname(plugin_dir)
+                _module_dir = _module_dir.split('/')
+                _module_path = _module_dir[0]
+
+                for m in _module_dir[1:]:
+                    _module_path = _module_path + '.' + m
+                print(_module_path)
+                plg_lib = importlib.import_module(_module_path)
+            else:
+                # import dir is the dirpath for the config file
+                _module_dir = os.path.dirname(args.config)
+                _module_dir = _module_dir.split('/')
+                _module_path = _module_dir[0]
+                for m in _module_dir[1:]:
+                    _module_path = _module_path + '.' + m
+                plg_lib = importlib.import_module(_module_path)
+
+    # build dataloader
+    assert cfg.data.test.test_mode
+    test_dataloader_default_args = dict(
+        samples_per_gpu=1, workers_per_gpu=2, dist=False, shuffle=False)
+    test_loader_cfg = {
+        **test_dataloader_default_args,
+        **cfg.data.get('test_dataloader', {})
+    }
+    dataset = build_dataset(cfg.data.test)
+    data_loader = build_dataloader(dataset, **test_loader_cfg)
+
+    # build the model
+    cfg.model.train_cfg = None
+    model = build_model(cfg.model, test_cfg=cfg.get('test_cfg'))
+
+    # build tensorrt model
+    if (cfg.model.get('wdet3d', True) == True) and (cfg.model.get('wocc', True) == False):
+        trt_model = TRTWrapper(args.engine, [f'output_{i}' for i in range(6 * len(model.pts_bbox_head.task_heads))])
+    elif (cfg.model.get('wdet3d', True) == True) and (cfg.model.get('wocc', True) == True):
+        trt_model = TRTWrapper(args.engine, [f'output_{i}' for i in range(1 + 6 * len(model.pts_bbox_head.task_heads))])
+    elif (cfg.model.get('wdet3d', True) == False) and (cfg.model.get('wocc', True) == True):
+        trt_model = TRTWrapper(args.engine, [f'output_{i}' for i in range(1)])
+    else:
+        raise(" At least one of wdet3d and wocc is set as True!! ")
+
+    num_warmup = 50
+    pure_inf_time = 0
+
+    init_ = True
+    metas = dict()
+    # benchmark with several samples and take the average
+    results = list()
+    for i, data in enumerate(data_loader):
+        if init_:
+            inputs = [t.cuda() for t in data['img_inputs'][0]]
+            if model.__class__.__name__ in ['FBOCCTRT', 'FBOCC2DTRT']:
+                metas_ = model.get_bev_pool_input(inputs, img_metas=data['img_metas'])
+            else:
+                if model.__class__.__name__ in ['BEVDetOCCTRT']:
+                    metas_ = model.get_bev_pool_input(inputs)
+                elif model.__class__.__name__ in ['BEVDepthOCCTRT']:
+                    metas_, mlp_input = model.get_bev_pool_input(inputs)
+            if model.__class__.__name__ in ['FBOCCTRT', 'FBOCC2DTRT', 'BEVDetOCCTRT']:
+                metas = dict(
+                    ranks_bev=metas_[0].int().contiguous(),
+                    ranks_depth=metas_[1].int().contiguous(),
+                    ranks_feat=metas_[2].int().contiguous(),
+                    interval_starts=metas_[3].int().contiguous(),
+                    interval_lengths=metas_[4].int().contiguous())
+            elif model.__class__.__name__ in ['BEVDepthOCCTRT']:
+                metas = dict(
+                    ranks_bev=metas_[0].int().contiguous(),
+                    ranks_depth=metas_[1].int().contiguous(),
+                    ranks_feat=metas_[2].int().contiguous(),
+                    interval_starts=metas_[3].int().contiguous(),
+                    interval_lengths=metas_[4].int().contiguous(),
+                    mlp_input=mlp_input)
+            init_ = False
+        img = data['img_inputs'][0][0].cuda().squeeze(0).contiguous()
+        if img.shape[0] > 6:
+            img = img[:6]
+        torch.cuda.synchronize()
+        start_time = time.perf_counter()
+        trt_output = trt_model.forward(dict(img=img, **metas))
+
+        # postprocessing
+        if args.postprocessing:
+            if cfg.model.get('wdet3d', True):
+                trt_output_det = [trt_output[f'output_{i}'] for i in
+                            range(6 * len(model.pts_bbox_head.task_heads))]
+                pred = model.result_deserialize(trt_output_det)
+                img_metas = [dict(box_type_3d=LiDARInstance3DBoxes)]
+                bbox_list = model.pts_bbox_head.get_bboxes(
+                    pred, img_metas, rescale=True)
+                bbox_results = [
+                    bbox3d2result(bboxes, scores, labels)
+                    for bboxes, scores, labels in bbox_list
+                ]
+            if cfg.model.get('wocc', True):
+                # occupancy
+                if cfg.model.get('wdet3d', True):
+                    occ_preds = model.occ_head.get_occ(trt_output['output_6'])      # List[(Dx, Dy, Dz), (Dx, Dy, Dz), ...]
+                else:
+                    occ_preds = model.occ_head.get_occ(trt_output['output_0'])      # List[(Dx, Dy, Dz), (Dx, Dy, Dz), ...]
+            if args.eval:
+                if cfg.model.get('wdet3d', True) and (not cfg.model.get('wocc', True)):
+                    results.append(bbox_results[0])
+                elif cfg.model.get('wdet3d', True) and cfg.model.get('wocc', True):
+                    results.append({'pts_bbox': bbox_results[0], 'pred_occ': occ_preds[0]})
+                elif (not cfg.model.get('wdet3d', False)) and cfg.model.get('wocc', True):
+                    results.append(occ_preds[0])
+
+        torch.cuda.synchronize()
+        elapsed = time.perf_counter() - start_time
+
+        if i >= num_warmup:
+            pure_inf_time += elapsed
+            if (i + 1) % 50 == 0:
+                fps = (i + 1 - num_warmup) / pure_inf_time
+                print(f'Done image [{i + 1:<3}/ {args.samples}], '
+                      f'fps: {fps:.2f} img / s')
+
+        if (i + 1) == args.samples:
+            pure_inf_time += elapsed
+            fps = (i + 1 - num_warmup) / pure_inf_time
+            print(f'Overall \nfps: {fps:.2f} img / s '
+                  f'\ninference time: {1000/fps:.2f} ms')
+            if not args.eval:
+                return
+
+    assert args.eval
+    eval_kwargs = cfg.get('evaluation', {}).copy()
+    # hard-code way to remove EvalHook args
+    for key in [
+        'interval', 'tmpdir', 'start', 'gpu_collect', 'save_best',
+        'rule'
+    ]:
+        eval_kwargs.pop(key, None)
+    eval_kwargs.update(dict(metric=args.eval))
+    print(dataset.evaluate(results, **eval_kwargs))
+
+
+if __name__ == '__main__':
+    fps = main()
--- a/docker-hub/FlashOCC/Flashocc/tools/analysis_tools/benchmark_view_transformer.py
+++ b/docker-hub/FlashOCC/Flashocc/tools/analysis_tools/benchmark_view_transformer.py
+# Copyright (c) OpenMMLab. All rights reserved.
+import argparse
+import time
+
+import numpy as np
+import torch
+from mmcv import Config
+from mmcv.parallel import MMDataParallel
+from mmcv.runner import load_checkpoint
+
+from mmdet3d.datasets import build_dataloader, build_dataset
+from mmdet3d.models import build_detector
+
+
+def parse_args():
+    parser = argparse.ArgumentParser(description='MMDet benchmark a model')
+    parser.add_argument('config', help='test config file path')
+    parser.add_argument('checkpoint', help='checkpoint file')
+    parser.add_argument('--samples', default=1000, help='samples to benchmark')
+    parser.add_argument(
+        '--log-interval', default=50, help='interval of logging')
+    parser.add_argument(
+        '--mem-only',
+        action='store_true',
+        help='Conduct the memory analysis only')
+    parser.add_argument(
+        '--no-acceleration',
+        action='store_true',
+        help='Omit the pre-computation acceleration')
+    args = parser.parse_args()
+    return args
+
+
+def main():
+    args = parse_args()
+
+    cfg = Config.fromfile(args.config)
+    # set cudnn_benchmark
+    if cfg.get('cudnn_benchmark', False):
+        torch.backends.cudnn.benchmark = True
+    cfg.model.pretrained = None
+    cfg.data.test.test_mode = True
+
+    # build the dataloader
+    # TODO: support multiple images per gpu (only minor changes are needed)
+    dataset = build_dataset(cfg.data.test)
+    data_loader = build_dataloader(
+        dataset,
+        samples_per_gpu=1,
+        workers_per_gpu=cfg.data.workers_per_gpu,
+        dist=False,
+        shuffle=False)
+
+    # build the model and load checkpoint
+    if not args.no_acceleration:
+        cfg.model.img_view_transformer.accelerate=True
+    cfg.model.train_cfg = None
+    assert cfg.model.type == 'BEVDet', \
+        'Please use class BEVDet for ' \
+        'view transformation inference ' \
+        'speed estimation instead of %s'% cfg.model.type
+    model = build_detector(cfg.model, test_cfg=cfg.get('test_cfg'))
+    load_checkpoint(model, args.checkpoint, map_location='cpu')
+    model = MMDataParallel(model, device_ids=[0])
+
+    model.eval()
+
+    # the first several iterations may be very slow so skip them
+    num_warmup = 100
+    pure_inf_time = 0
+    D = model.module.img_view_transformer.D
+    out_channels = model.module.img_view_transformer.out_channels
+    depth_net = model.module.img_view_transformer.depth_net
+    view_transformer = model.module.img_view_transformer
+    # benchmark with several samples and take the average
+    for i, data in enumerate(data_loader):
+
+        with torch.no_grad():
+            img_feat, _ = \
+                model.module.image_encoder(data['img_inputs'][0][0].cuda())
+            B, N, C, H, W = img_feat.shape
+            x = depth_net(img_feat.reshape(B * N, C, H, W))
+            depth_digit = x[:, :D, ...]
+            tran_feat = x[:, D:D + out_channels, ...]
+            depth = depth_digit.softmax(dim=1)
+        input = [img_feat] + [d.cuda() for d in data['img_inputs'][0][1:]]
+
+        if i == 0:
+            precomputed_memory_allocated = 0.0
+            if view_transformer.accelerate:
+                start_mem_allocated = torch.cuda.memory_allocated()
+                view_transformer.pre_compute(input)
+                end_mem_allocated = torch.cuda.memory_allocated()
+                precomputed_memory_allocated = \
+                    end_mem_allocated - start_mem_allocated
+                ref_max_mem_allocated = torch.cuda.max_memory_allocated()
+                # occupy the memory
+                size = (ref_max_mem_allocated - end_mem_allocated) // 4
+                occupy_tensor = torch.zeros(
+                    size=(size, ), device='cuda', dtype=torch.float32)
+            print('Memory analysis: \n'
+                  'precomputed_memory_allocated : %d B / %.01f MB \n' %
+                  (precomputed_memory_allocated,
+                   precomputed_memory_allocated / 1024 / 1024))
+            start_mem_allocated = torch.cuda.memory_allocated()
+            bev_feat = view_transformer.view_transform_core(
+                input, depth, tran_feat)[0]
+            end_max_mem_allocated = torch.cuda.max_memory_allocated()
+            peak_memory_allocated = \
+                end_max_mem_allocated - start_mem_allocated
+            total_memory_requirement = \
+                precomputed_memory_allocated + peak_memory_allocated
+            print('Memory analysis: \n'
+                  'Memory requirement : %d B / %.01f MB \n' %
+                  (total_memory_requirement,
+                   total_memory_requirement / 1024 / 1024))
+            if args.mem_only:
+                return
+
+        torch.cuda.synchronize()
+        start_time = time.perf_counter()
+        with torch.no_grad():
+            view_transformer.view_transform(input, depth, tran_feat)[0]
+        torch.cuda.synchronize()
+        elapsed = time.perf_counter() - start_time
+
+        if i >= num_warmup:
+            pure_inf_time += elapsed
+            if (i + 1) % args.log_interval == 0:
+                fps = (i + 1 - num_warmup) / pure_inf_time
+                print(f'Done image [{i + 1:<3}/ {args.samples}], '
+                      f'fps: {fps:.1f} img / s')
+
+        if (i + 1) == args.samples:
+            pure_inf_time += elapsed
+            fps = (i + 1 - num_warmup) / pure_inf_time
+            print(f'Overall fps: {fps:.1f} img / s')
+            return fps
+
+
+if __name__ == '__main__':
+    repeat_times = 1
+    fps_list = []
+    for _ in range(repeat_times):
+        fps = main()
+        time.sleep(5)
+        fps_list.append(fps)
+    fps_list = np.array(fps_list, dtype=np.float32)
+    print(f'Mean Overall fps: {fps_list.mean():.4f} +'
+          f' {np.sqrt(fps_list.var()):.4f} img / s')
--- a/docker-hub/FlashOCC/Flashocc/tools/analysis_tools/get_flops.py
+++ b/docker-hub/FlashOCC/Flashocc/tools/analysis_tools/get_flops.py
+# Copyright (c) OpenMMLab. All rights reserved.
+import argparse
+import os
+import sys
+sys.path.insert(0, os.getcwd())
+import torch
+from mmcv import Config, DictAction
+
+from mmdet3d.models import build_model
+
+try:
+    from mmcv.cnn import get_model_complexity_info
+except ImportError:
+    raise ImportError('Please upgrade mmcv to >0.6.2')
+
+
+def parse_args():
+    parser = argparse.ArgumentParser(description='Train a detector')
+    parser.add_argument('config', help='train config file path')
+    parser.add_argument(
+        '--shape',
+        type=int,
+        nargs='+',
+        default=[40000, 4],
+        help='input point cloud size')
+    parser.add_argument(
+        '--modality',
+        type=str,
+        default='point',
+        choices=['point', 'image', 'multi'],
+        help='input data modality')
+    parser.add_argument(
+        '--cfg-options',
+        nargs='+',
+        action=DictAction,
+        help='override some settings in the used config, the key-value pair '
+        'in xxx=yyy format will be merged into config file. If the value to '
+        'be overwritten is a list, it should be like key="[a,b]" or key=a,b '
+        'It also allows nested list/tuple values, e.g. key="[(a,b),(c,d)]" '
+        'Note that the quotation marks are necessary and that no white space '
+        'is allowed.')
+    args = parser.parse_args()
+    return args
+
+
+def construct_input(input_shape):
+    rot = torch.eye(4).float().cuda().view(1, 1, 4, 4).expand(1,6,4,4)
+
+    intrins = torch.eye(3).float().cuda().view(1,1, 3, 3).expand(1,6,3,3)
+    input = dict(img_inputs=[
+        torch.ones(()).new_empty((1, 6, *input_shape)).cuda(), rot,
+        rot, intrins, intrins,
+        torch.ones((1, 6, 3)).cuda(),
+        torch.eye(3).float().cuda().view(1, 3, 3)
+    ])
+    return input
+
+
+def main():
+
+    args = parse_args()
+
+    if args.modality == 'point':
+        assert len(args.shape) == 2, 'invalid input shape'
+        input_shape = tuple(args.shape)
+    elif args.modality == 'image':
+        if len(args.shape) == 1:
+            input_shape = (3, args.shape[0], args.shape[0])
+        elif len(args.shape) == 2:
+            input_shape = (3, ) + tuple(args.shape)
+        else:
+            raise ValueError('invalid input shape')
+    elif args.modality == 'multi':
+        raise NotImplementedError(
+            'FLOPs counter is currently not supported for models with '
+            'multi-modality input')
+
+    cfg = Config.fromfile(args.config)
+    # if 'stereo' in args.config or 'longterm' in args.config:
+    #     assert False,'Config has not supported: %s ' % args.config
+    if args.cfg_options is not None:
+        cfg.merge_from_dict(args.cfg_options)
+
+    # import modules from plguin/xx, registry will be updated
+    if hasattr(cfg, 'plugin'):
+        if cfg.plugin:
+            import importlib
+            if hasattr(cfg, 'plugin_dir'):
+                plugin_dir = cfg.plugin_dir
+                _module_dir = os.path.dirname(plugin_dir)
+                _module_dir = _module_dir.split('/')
+                _module_path = _module_dir[0]
+
+                for m in _module_dir[1:]:
+                    _module_path = _module_path + '.' + m
+                print(_module_path)
+                plg_lib = importlib.import_module(_module_path)
+            else:
+                # import dir is the dirpath for the config file
+                _module_dir = os.path.dirname(args.config)
+                _module_dir = _module_dir.split('/')
+                _module_path = _module_dir[0]
+                for m in _module_dir[1:]:
+                    _module_path = _module_path + '.' + m
+                plg_lib = importlib.import_module(_module_path)
+
+    model = build_model(
+        cfg.model,
+        train_cfg=cfg.get('train_cfg'),
+        test_cfg=cfg.get('test_cfg'))
+    if torch.cuda.is_available():
+        model.cuda()
+    model.eval()
+
+    if hasattr(model, 'forward_dummy'):
+        model.forward = model.forward_dummy
+    else:
+        raise NotImplementedError(
+            'FLOPs counter is currently not supported for {}'.format(
+                model.__class__.__name__))
+
+    flops, params = get_model_complexity_info(
+        model, input_shape, input_constructor=construct_input)
+    split_line = '=' * 30
+    print(f'{split_line}\nInput shape: {input_shape}\n'
+          f'Flops: {flops}\nParams: {params}\n{split_line}')
+    print('!!!Please be cautious if you use the results in papers. '
+          'You may need to check if all ops are supported and verify that the '
+          'flops computation is correct.')
+
+
+if __name__ == '__main__':
+    main()
--- a/docker-hub/FlashOCC/Flashocc/tools/analysis_tools/vis.py
+++ b/docker-hub/FlashOCC/Flashocc/tools/analysis_tools/vis.py
+# Copyright (c) Phigent Robotics. All rights reserved.
+import argparse
+import json
+import os
+import pickle
+
+import cv2
+import numpy as np
+from pyquaternion.quaternion import Quaternion
+
+from mmdet3d.core.bbox.structures.lidar_box3d import LiDARInstance3DBoxes as LB
+
+
+def check_point_in_img(points, height, width):
+    valid = np.logical_and(points[:, 0] >= 0, points[:, 1] >= 0)
+    valid = np.logical_and(
+        valid, np.logical_and(points[:, 0] < width, points[:, 1] < height))
+    return valid
+
+
+def depth2color(depth):
+    gray = max(0, min((depth + 2.5) / 3.0, 1.0))
+    max_lumi = 200
+    colors = np.array(
+        [[max_lumi, 0, max_lumi], [max_lumi, 0, 0], [max_lumi, max_lumi, 0],
+         [0, max_lumi, 0], [0, max_lumi, max_lumi], [0, 0, max_lumi]],
+        dtype=np.float32)
+    if gray == 1:
+        return tuple(colors[-1].tolist())
+    num_rank = len(colors) - 1
+    rank = np.floor(gray * num_rank).astype(np.int)
+    diff = (gray - rank / num_rank) * num_rank
+    return tuple(
+        (colors[rank] + (colors[rank + 1] - colors[rank]) * diff).tolist())
+
+
+def lidar2img(points_lidar, camrera_info):
+    points_lidar_homogeneous = \
+        np.concatenate([points_lidar,
+                        np.ones((points_lidar.shape[0], 1),
+                                dtype=points_lidar.dtype)], axis=1)
+    camera2lidar = np.eye(4, dtype=np.float32)
+    camera2lidar[:3, :3] = camrera_info['sensor2lidar_rotation']
+    camera2lidar[:3, 3] = camrera_info['sensor2lidar_translation']
+    lidar2camera = np.linalg.inv(camera2lidar)
+    points_camera_homogeneous = points_lidar_homogeneous @ lidar2camera.T
+    points_camera = points_camera_homogeneous[:, :3]
+    valid = np.ones((points_camera.shape[0]), dtype=bool)
+    valid = np.logical_and(points_camera[:, -1] > 0.5, valid)
+    points_camera = points_camera / points_camera[:, 2:3]
+    camera2img = camrera_info['cam_intrinsic']
+    points_img = points_camera @ camera2img.T
+    points_img = points_img[:, :2]
+    return points_img, valid
+
+
+def get_lidar2global(infos):
+    lidar2ego = np.eye(4, dtype=np.float32)
+    lidar2ego[:3, :3] = Quaternion(infos['lidar2ego_rotation']).rotation_matrix
+    lidar2ego[:3, 3] = infos['lidar2ego_translation']
+    ego2global = np.eye(4, dtype=np.float32)
+    ego2global[:3, :3] = Quaternion(
+        infos['ego2global_rotation']).rotation_matrix
+    ego2global[:3, 3] = infos['ego2global_translation']
+    return ego2global @ lidar2ego
+
+
+def parse_args():
+    parser = argparse.ArgumentParser(description='Visualize the predicted '
+                                     'result of nuScenes')
+    parser.add_argument(
+        'res', help='Path to the predicted result in json format')
+    parser.add_argument(
+        '--show-range',
+        type=int,
+        default=50,
+        help='Range of visualization in BEV')
+    parser.add_argument(
+        '--canva-size', type=int, default=1000, help='Size of canva in pixel')
+    parser.add_argument(
+        '--vis-frames',
+        type=int,
+        default=500,
+        help='Number of frames for visualization')
+    parser.add_argument(
+        '--scale-factor',
+        type=int,
+        default=4,
+        help='Trade-off between image-view and bev in size of '
+        'the visualized canvas')
+    parser.add_argument(
+        '--vis-thred',
+        type=float,
+        default=0.3,
+        help='Threshold the predicted results')
+    parser.add_argument('--draw-gt', action='store_true')
+    parser.add_argument(
+        '--version',
+        type=str,
+        default='val',
+        help='Version of nuScenes dataset')
+    parser.add_argument(
+        '--root_path',
+        type=str,
+        default='./data/nuscenes',
+        help='Path to nuScenes dataset')
+    parser.add_argument(
+        '--save_path',
+        type=str,
+        default='./vis',
+        help='Path to save visualization results')
+    parser.add_argument(
+        '--format',
+        type=str,
+        default='video',
+        choices=['video', 'image'],
+        help='The desired format of the visualization result')
+    parser.add_argument(
+        '--fps', type=int, default=20, help='Frame rate of video')
+    parser.add_argument(
+        '--video-prefix', type=str, default='vis', help='name of video')
+    args = parser.parse_args()
+    return args
+
+
+color_map = {0: (255, 255, 0), 1: (0, 255, 255)}
+
+
+def main():
+    args = parse_args()
+    # load predicted results
+    res = json.load(open(args.res, 'r'))
+    # load dataset information
+    info_path = \
+        args.root_path + '/bevdetv2-nuscenes_infos_%s.pkl' % args.version
+    dataset = pickle.load(open(info_path, 'rb'))
+    # prepare save path and medium
+    vis_dir = args.save_path
+    if not os.path.exists(vis_dir):
+        os.makedirs(vis_dir)
+    print('saving visualized result to %s' % vis_dir)
+    scale_factor = args.scale_factor
+    canva_size = args.canva_size
+    show_range = args.show_range
+    if args.format == 'video':
+        fourcc = cv2.VideoWriter_fourcc(*'MP4V')
+        vout = cv2.VideoWriter(
+            os.path.join(vis_dir, '%s.mp4' % args.video_prefix), fourcc,
+            args.fps, (int(1600 / scale_factor * 3),
+                       int(900 / scale_factor * 2 + canva_size)))
+
+    draw_boxes_indexes_bev = [(0, 1), (1, 2), (2, 3), (3, 0)]
+    draw_boxes_indexes_img_view = [(0, 1), (1, 2), (2, 3), (3, 0), (4, 5),
+                                   (5, 6), (6, 7), (7, 4), (0, 4), (1, 5),
+                                   (2, 6), (3, 7)]
+    views = [
+        'CAM_FRONT_LEFT', 'CAM_FRONT', 'CAM_FRONT_RIGHT', 'CAM_BACK_LEFT',
+        'CAM_BACK', 'CAM_BACK_RIGHT'
+    ]
+    print('start visualizing results')
+    for cnt, infos in enumerate(
+            dataset['infos'][:min(args.vis_frames, len(dataset['infos']))]):
+        if cnt % 10 == 0:
+            print('%d/%d' % (cnt, min(args.vis_frames, len(dataset['infos']))))
+        # collect instances
+        pred_res = res['results'][infos['token']]
+        pred_boxes = [
+            pred_res[rid]['translation'] + pred_res[rid]['size'] + [
+                Quaternion(pred_res[rid]['rotation']).yaw_pitch_roll[0] +
+                np.pi / 2
+            ] for rid in range(len(pred_res))
+        ]
+        if len(pred_boxes) == 0:
+            corners_lidar = np.zeros((0, 3), dtype=np.float32)
+        else:
+            pred_boxes = np.array(pred_boxes, dtype=np.float32)
+            boxes = LB(pred_boxes, origin=(0.5, 0.5, 0.0))
+            corners_global = boxes.corners.numpy().reshape(-1, 3)
+            corners_global = np.concatenate(
+                [corners_global,
+                 np.ones([corners_global.shape[0], 1])],
+                axis=1)
+            l2g = get_lidar2global(infos)
+            corners_lidar = corners_global @ np.linalg.inv(l2g).T
+            corners_lidar = corners_lidar[:, :3]
+        pred_flag = np.ones((corners_lidar.shape[0] // 8, ), dtype=np.bool)
+        scores = [
+            pred_res[rid]['detection_score'] for rid in range(len(pred_res))
+        ]
+        if args.draw_gt:
+            gt_boxes = infos['gt_boxes']
+            gt_boxes[:, -1] = gt_boxes[:, -1] + np.pi / 2
+            width = gt_boxes[:, 4].copy()
+            gt_boxes[:, 4] = gt_boxes[:, 3]
+            gt_boxes[:, 3] = width
+            corners_lidar_gt = \
+                LB(infos['gt_boxes'],
+                   origin=(0.5, 0.5, 0.5)).corners.numpy().reshape(-1, 3)
+            corners_lidar = np.concatenate([corners_lidar, corners_lidar_gt],
+                                           axis=0)
+            gt_flag = np.ones((corners_lidar_gt.shape[0] // 8), dtype=np.bool)
+            pred_flag = np.concatenate(
+                [pred_flag, np.logical_not(gt_flag)], axis=0)
+            scores = scores + [0 for _ in range(infos['gt_boxes'].shape[0])]
+        scores = np.array(scores, dtype=np.float32)
+        sort_ids = np.argsort(scores)
+
+        # image view
+        imgs = []
+        for view in views:
+            img = cv2.imread(infos['cams'][view]['data_path'])
+            # draw instances
+            corners_img, valid = lidar2img(corners_lidar, infos['cams'][view])
+            valid = np.logical_and(
+                valid,
+                check_point_in_img(corners_img, img.shape[0], img.shape[1]))
+            valid = valid.reshape(-1, 8)
+            corners_img = corners_img.reshape(-1, 8, 2).astype(np.int)
+            for aid in range(valid.shape[0]):
+                for index in draw_boxes_indexes_img_view:
+                    if valid[aid, index[0]] and valid[aid, index[1]]:
+                        cv2.line(
+                            img,
+                            tuple(corners_img[aid, index[0]]),
+                            tuple(corners_img[aid, index[1]]),
+                            color=color_map[int(pred_flag[aid])],
+                            thickness=scale_factor)
+            imgs.append(img)
+
+        # bird-eye-view
+        canvas = np.zeros((int(canva_size), int(canva_size), 3),
+                          dtype=np.uint8)
+        # draw lidar points
+        lidar_points = np.fromfile(infos['lidar_path'], dtype=np.float32)
+        lidar_points = lidar_points.reshape(-1, 5)[:, :3]
+        lidar_points[:, 1] = -lidar_points[:, 1]
+        lidar_points[:, :2] = \
+            (lidar_points[:, :2] + show_range) / show_range / 2.0 * canva_size
+        for p in lidar_points:
+            if check_point_in_img(
+                    p.reshape(1, 3), canvas.shape[1], canvas.shape[0])[0]:
+                color = depth2color(p[2])
+                cv2.circle(
+                    canvas, (int(p[0]), int(p[1])),
+                    radius=0,
+                    color=color,
+                    thickness=1)
+
+        # draw instances
+        corners_lidar = corners_lidar.reshape(-1, 8, 3)
+        corners_lidar[:, :, 1] = -corners_lidar[:, :, 1]
+        bottom_corners_bev = corners_lidar[:, [0, 3, 7, 4], :2]
+        bottom_corners_bev = \
+            (bottom_corners_bev + show_range) / show_range / 2.0 * canva_size
+        bottom_corners_bev = np.round(bottom_corners_bev).astype(np.int32)
+        center_bev = corners_lidar[:, [0, 3, 7, 4], :2].mean(axis=1)
+        head_bev = corners_lidar[:, [0, 4], :2].mean(axis=1)
+        canter_canvas = \
+            (center_bev + show_range) / show_range / 2.0 * canva_size
+        center_canvas = canter_canvas.astype(np.int32)
+        head_canvas = (head_bev + show_range) / show_range / 2.0 * canva_size
+        head_canvas = head_canvas.astype(np.int32)
+
+        for rid in sort_ids:
+            score = scores[rid]
+            if score < args.vis_thred and pred_flag[rid]:
+                continue
+            score = min(score * 2.0, 1.0) if pred_flag[rid] else 1.0
+            color = color_map[int(pred_flag[rid])]
+            for index in draw_boxes_indexes_bev:
+                cv2.line(
+                    canvas,
+                    bottom_corners_bev[rid, index[0]],
+                    bottom_corners_bev[rid, index[1]],
+                    [color[0] * score, color[1] * score, color[2] * score],
+                    thickness=1)
+            cv2.line(
+                canvas,
+                center_canvas[rid],
+                head_canvas[rid],
+                [color[0] * score, color[1] * score, color[2] * score],
+                1,
+                lineType=8)
+
+        # fuse image-view and bev
+        img = np.zeros((900 * 2 + canva_size * scale_factor, 1600 * 3, 3),
+                       dtype=np.uint8)
+        img[:900, :, :] = np.concatenate(imgs[:3], axis=1)
+        img_back = np.concatenate(
+            [imgs[3][:, ::-1, :], imgs[4][:, ::-1, :], imgs[5][:, ::-1, :]],
+            axis=1)
+        img[900 + canva_size * scale_factor:, :, :] = img_back
+        img = cv2.resize(img, (int(1600 / scale_factor * 3),
+                               int(900 / scale_factor * 2 + canva_size)))
+        w_begin = int((1600 * 3 / scale_factor - canva_size) // 2)
+        img[int(900 / scale_factor):int(900 / scale_factor) + canva_size,
+            w_begin:w_begin + canva_size, :] = canvas
+
+        if args.format == 'image':
+            cv2.imwrite(os.path.join(vis_dir, '%s.jpg' % infos['token']), img)
+        elif args.format == 'video':
+            vout.write(img)
+    if args.format == 'video':
+        vout.release()
+
+
+if __name__ == '__main__':
+    main()
--- a/docker-hub/FlashOCC/Flashocc/tools/analysis_tools/vis_occ.py
+++ b/docker-hub/FlashOCC/Flashocc/tools/analysis_tools/vis_occ.py
+import os
+
+import mmcv
+import open3d as o3d
+import numpy as np
+import torch
+import pickle
+import math
+from typing import Tuple, List, Dict, Iterable
+import argparse
+import cv2
+
+NOT_OBSERVED = -1
+FREE = 0
+OCCUPIED = 1
+FREE_LABEL = 17
+BINARY_OBSERVED = 1
+BINARY_NOT_OBSERVED = 0
+
+VOXEL_SIZE = [0.4, 0.4, 0.4]
+POINT_CLOUD_RANGE = [-40, -40, -1, 40, 40, 5.4]
+SPTIAL_SHAPE = [200, 200, 16]
+TGT_VOXEL_SIZE = [0.4, 0.4, 0.4]
+TGT_POINT_CLOUD_RANGE = [-40, -40, -1, 40, 40, 5.4]
+
+
+colormap_to_colors = np.array(
+    [
+        [0,   0,   0, 255],  # 0 undefined
+        [112, 128, 144, 255],  # 1 barrier  orange
+        [220, 20, 60, 255],    # 2 bicycle  Blue
+        [255, 127, 80, 255],   # 3 bus  Darkslategrey
+        [255, 158, 0, 255],  # 4 car  Crimson
+        [233, 150, 70, 255],   # 5 cons. Veh  Orangered
+        [255, 61, 99, 255],  # 6 motorcycle  Darkorange
+        [0, 0, 230, 255], # 7 pedestrian  Darksalmon
+        [47, 79, 79, 255],  # 8 traffic cone  Red
+        [255, 140, 0, 255],# 9 trailer  Slategrey
+        [255, 99, 71, 255],# 10 truck Burlywood
+        [0, 207, 191, 255],    # 11 drive sur  Green
+        [175, 0, 75, 255],  # 12 other lat  nuTonomy green
+        [75, 0, 75, 255],  # 13 sidewalk
+        [112, 180, 60, 255],    # 14 terrain
+        [222, 184, 135, 255],    # 15 manmade
+        [0, 175, 0, 255],   # 16 vegeyation
+], dtype=np.float32)
+
+
+
+def voxel2points(voxel, occ_show, voxelSize):
+    """
+    Args:
+        voxel: (Dx, Dy, Dz)
+        occ_show: (Dx, Dy, Dz)
+        voxelSize: (dx, dy, dz)
+
+    Returns:
+        points: (N, 3) 3: (x, y, z)
+        voxel: (N, ) cls_id
+        occIdx: (x_idx, y_idx, z_idx)
+    """
+    occIdx = torch.where(occ_show)
+    points = torch.cat((occIdx[0][:, None] * voxelSize[0] + POINT_CLOUD_RANGE[0], \
+                        occIdx[1][:, None] * voxelSize[1] + POINT_CLOUD_RANGE[1], \
+                        occIdx[2][:, None] * voxelSize[2] + POINT_CLOUD_RANGE[2]),
+                       dim=1)      # (N, 3) 3: (x, y, z)
+    return points, voxel[occIdx], occIdx
+
+
+def voxel_profile(voxel, voxel_size):
+    """
+    Args:
+        voxel: (N, 3)  3:(x, y, z)
+        voxel_size: (vx, vy, vz)
+
+    Returns:
+        box: (N, 7) (x, y, z - dz/2, vx, vy, vz, 0)
+    """
+    centers = torch.cat((voxel[:, :2], voxel[:, 2][:, None] - voxel_size[2] / 2), dim=1)     # (x, y, z - dz/2)
+    # centers = voxel
+    wlh = torch.cat((torch.tensor(voxel_size[0]).repeat(centers.shape[0])[:, None],
+                     torch.tensor(voxel_size[1]).repeat(centers.shape[0])[:, None],
+                     torch.tensor(voxel_size[2]).repeat(centers.shape[0])[:, None]), dim=1)
+    yaw = torch.full_like(centers[:, 0:1], 0)
+    return torch.cat((centers, wlh, yaw), dim=1)
+
+
+def rotz(t):
+    """Rotation about the z-axis."""
+    c = torch.cos(t)
+    s = torch.sin(t)
+    return torch.tensor([[c, -s,  0],
+                     [s,  c,  0],
+                     [0,  0,  1]])
+
+
+def my_compute_box_3d(center, size, heading_angle):
+    """
+    Args:
+        center: (N, 3)  3: (x, y, z - dz/2)
+        size: (N, 3)    3: (vx, vy, vz)
+        heading_angle: (N, 1)
+    Returns:
+        corners_3d: (N, 8, 3)
+    """
+    h, w, l = size[:, 2], size[:, 0], size[:, 1]
+    center[:, 2] = center[:, 2] + h / 2
+    l, w, h = (l / 2).unsqueeze(1), (w / 2).unsqueeze(1), (h / 2).unsqueeze(1)
+    x_corners = torch.cat([-l, l, l, -l, -l, l, l, -l], dim=1)[..., None]
+    y_corners = torch.cat([w, w, -w, -w, w, w, -w, -w], dim=1)[..., None]
+    z_corners = torch.cat([h, h, h, h, -h, -h, -h, -h], dim=1)[..., None]
+    corners_3d = torch.cat([x_corners, y_corners, z_corners], dim=2)
+    corners_3d[..., 0] += center[:, 0:1]
+    corners_3d[..., 1] += center[:, 1:2]
+    corners_3d[..., 2] += center[:, 2:3]
+    return corners_3d
+
+
+def show_point_cloud(points: np.ndarray, colors=True, points_colors=None, bbox3d=None, voxelize=False,
+                     bbox_corners=None, linesets=None, vis=None, offset=[0,0,0], large_voxel=True, voxel_size=0.4):
+    """
+    :param points: (N, 3)  3:(x, y, z)
+    :param colors: false 不显示点云颜色
+    :param points_colors: (N, 4）
+    :param bbox3d: voxel grid (N, 7) 7: (center, wlh, yaw=0)
+    :param voxelize: false 不显示voxel边界
+    :param bbox_corners: (N, 8, 3)  voxel grid 角点坐标, 用于绘制voxel grid 边界.
+    :param linesets: 用于绘制voxel grid 边界.
+    :return:
+    """
+    if vis is None:
+        vis = o3d.visualization.VisualizerWithKeyCallback()
+        vis.create_window()
+    if isinstance(offset, list) or isinstance(offset, tuple):
+        offset = np.array(offset)
+
+    pcd = o3d.geometry.PointCloud()
+    pcd.points = o3d.utility.Vector3dVector(points+offset)
+    if colors:
+        pcd.colors = o3d.utility.Vector3dVector(points_colors[:, :3])
+    mesh_frame = o3d.geometry.TriangleMesh.create_coordinate_frame(
+        size=1, origin=[0, 0, 0])
+
+    voxelGrid = o3d.geometry.VoxelGrid.create_from_point_cloud(pcd, voxel_size=voxel_size)
+    if large_voxel:
+        vis.add_geometry(voxelGrid)
+    else:
+        vis.add_geometry(pcd)
+
+    if voxelize:
+        line_sets = o3d.geometry.LineSet()
+        line_sets.points = o3d.open3d.utility.Vector3dVector(bbox_corners.reshape((-1, 3))+offset)
+        line_sets.lines = o3d.open3d.utility.Vector2iVector(linesets.reshape((-1, 2)))
+        line_sets.paint_uniform_color((0, 0, 0))
+        vis.add_geometry(line_sets)
+
+    vis.add_geometry(mesh_frame)
+
+    # ego_pcd = o3d.geometry.PointCloud()
+    # ego_points = generate_the_ego_car()
+    # ego_pcd.points = o3d.utility.Vector3dVector(ego_points)
+    # vis.add_geometry(ego_pcd)
+
+    return vis
+
+
+def show_occ(occ_state, occ_show, voxel_size, vis=None, offset=[0, 0, 0]):
+    """
+    Args:
+        occ_state: (Dx, Dy, Dz), cls_id
+        occ_show: (Dx, Dy, Dz), bool
+        voxel_size: [0.4, 0.4, 0.4]
+        vis: Visualizer
+        offset:
+
+    Returns:
+
+    """
+    colors = colormap_to_colors / 255
+    pcd, labels, occIdx = voxel2points(occ_state, occ_show, voxel_size)
+    # pcd: (N, 3)  3: (x, y, z)
+    # labels: (N, )  cls_id
+    _labels = labels % len(colors)
+    pcds_colors = colors[_labels]   # (N, 4)
+
+    bboxes = voxel_profile(pcd, voxel_size)    # (N, 7)   7: (x, y, z - dz/2, dx, dy, dz, 0)
+    bboxes_corners = my_compute_box_3d(bboxes[:, 0:3], bboxes[:, 3:6], bboxes[:, 6:7])      # (N, 8, 3)
+
+    bases_ = torch.arange(0, bboxes_corners.shape[0] * 8, 8)
+    edges = torch.tensor([[0, 1], [1, 2], [2, 3], [3, 0], [4, 5], [5, 6], [6, 7], [7, 4], [0, 4], [1, 5], [2, 6], [3, 7]])  # lines along y-axis
+    edges = edges.reshape((1, 12, 2)).repeat(bboxes_corners.shape[0], 1, 1)     # (N, 12, 2)
+    # (N, 12, 2) + (N, 1, 1) --> (N, 12, 2)   此时edges中记录的是bboxes_corners的整体id: (0, N*8).
+    edges = edges + bases_[:, None, None]
+
+    vis = show_point_cloud(
+        points=pcd.numpy(),
+        colors=True,
+        points_colors=pcds_colors,
+        voxelize=True,
+        bbox3d=bboxes.numpy(),
+        bbox_corners=bboxes_corners.numpy(),
+        linesets=edges.numpy(),
+        vis=vis,
+        offset=offset,
+        large_voxel=True,
+        voxel_size=0.4
+    )
+    return vis
+
+
+def generate_the_ego_car():
+    ego_range = [-2, -1, 0, 2, 1, 1.5]
+    ego_voxel_size=[0.1, 0.1, 0.1]
+    ego_xdim = int((ego_range[3] - ego_range[0]) / ego_voxel_size[0])
+    ego_ydim = int((ego_range[4] - ego_range[1]) / ego_voxel_size[1])
+    ego_zdim = int((ego_range[5] - ego_range[2]) / ego_voxel_size[2])
+    temp_x = np.arange(ego_xdim)
+    temp_y = np.arange(ego_ydim)
+    temp_z = np.arange(ego_zdim)
+    ego_xyz = np.stack(np.meshgrid(temp_y, temp_x, temp_z), axis=-1).reshape(-1, 3)
+    ego_point_x = (ego_xyz[:, 0:1] + 0.5) / ego_xdim * (ego_range[3] - ego_range[0]) + ego_range[0]
+    ego_point_y = (ego_xyz[:, 1:2] + 0.5) / ego_ydim * (ego_range[4] - ego_range[1]) + ego_range[1]
+    ego_point_z = (ego_xyz[:, 2:3] + 0.5) / ego_zdim * (ego_range[5] - ego_range[2]) + ego_range[2]
+    ego_point_xyz = np.concatenate((ego_point_y, ego_point_x, ego_point_z), axis=-1)
+    ego_points_label =  (np.ones((ego_point_xyz.shape[0]))*16).astype(np.uint8)
+    ego_dict = {}
+    ego_dict['point'] = ego_point_xyz
+    ego_dict['label'] = ego_points_label
+    return ego_point_xyz
+
+
+def parse_args():
+    parser = argparse.ArgumentParser(description='Visualize the predicted '
+                                     'result of nuScenes')
+    parser.add_argument(
+        'res', help='Path to the predicted result')
+    parser.add_argument(
+        '--canva-size', type=int, default=1000, help='Size of canva in pixel')
+    parser.add_argument(
+        '--vis-frames',
+        type=int,
+        default=500,
+        help='Number of frames for visualization')
+    parser.add_argument(
+        '--scale-factor',
+        type=int,
+        default=4,
+        help='Trade-off between image-view and bev in size of '
+        'the visualized canvas')
+    parser.add_argument(
+        '--version',
+        type=str,
+        default='val',
+        help='Version of nuScenes dataset')
+    parser.add_argument('--draw-gt', action='store_true')
+    parser.add_argument(
+        '--root_path',
+        type=str,
+        default='./data/nuscenes',
+        help='Path to nuScenes dataset')
+    parser.add_argument(
+        '--save_path',
+        type=str,
+        default='./vis',
+        help='Path to save visualization results')
+    parser.add_argument(
+        '--format',
+        type=str,
+        default='image',
+        choices=['video', 'image'],
+        help='The desired format of the visualization result')
+    parser.add_argument(
+        '--fps', type=int, default=10, help='Frame rate of video')
+    parser.add_argument(
+        '--video-prefix', type=str, default='vis', help='name of video')
+    args = parser.parse_args()
+    return args
+
+
+def main():
+    args = parse_args()
+    # load predicted results
+    results_dir = args.res
+
+    # load dataset information
+    info_path = \
+        args.root_path + '/bevdetv2-nuscenes_infos_%s.pkl' % args.version
+    dataset = pickle.load(open(info_path, 'rb'))
+    # prepare save path and medium
+    vis_dir = args.save_path
+    if not os.path.exists(vis_dir):
+        os.makedirs(vis_dir)
+    print('saving visualized result to %s' % vis_dir)
+    scale_factor = args.scale_factor
+    canva_size = args.canva_size
+    if args.format == 'video':
+        fourcc = cv2.VideoWriter_fourcc('m', 'p', '4', 'v')
+        vout = cv2.VideoWriter(
+            os.path.join(vis_dir, '%s.mp4' % args.video_prefix), fourcc,
+            args.fps, (int(1600 / scale_factor * 3),
+                       int(900 / scale_factor * 2 + canva_size)))
+
+    views = [
+        'CAM_FRONT_LEFT', 'CAM_FRONT', 'CAM_FRONT_RIGHT', 'CAM_BACK_LEFT',
+        'CAM_BACK', 'CAM_BACK_RIGHT'
+    ]
+    print('start visualizing results')
+
+    vis = o3d.visualization.VisualizerWithKeyCallback()
+    vis.create_window()
+
+    for cnt, info in enumerate(
+            dataset['infos'][:min(args.vis_frames, len(dataset['infos']))]):
+        if cnt % 10 == 0:
+            print('%d/%d' % (cnt, min(args.vis_frames, len(dataset['infos']))))
+
+        scene_name = info['scene_name']
+        sample_token = info['token']
+
+        pred_occ_path = os.path.join(results_dir, scene_name, sample_token, 'pred.npz')
+        gt_occ_path = info['occ_path']
+
+        pred_occ = np.load(pred_occ_path)['pred']
+        gt_data = np.load(os.path.join(args.root_path, gt_occ_path, 'labels.npz'))
+        voxel_label = gt_data['semantics']
+        lidar_mask = gt_data['mask_lidar']
+        camera_mask = gt_data['mask_camera']
+
+        # load imgs
+        imgs = []
+        for view in views:
+            img = cv2.imread(info['cams'][view]['data_path'])
+            imgs.append(img)
+
+        # occ_canvas
+        voxel_show = np.logical_and(pred_occ != FREE_LABEL, camera_mask)
+        # voxel_show = pred_occ != FREE_LABEL
+        voxel_size = VOXEL_SIZE
+        vis = show_occ(torch.from_numpy(pred_occ), torch.from_numpy(voxel_show), voxel_size=voxel_size, vis=vis,
+                       offset=[0, pred_occ.shape[0] * voxel_size[0] * 1.2 * 0, 0])
+
+        if args.draw_gt:
+            voxel_show = np.logical_and(voxel_label != FREE_LABEL, camera_mask)
+            vis = show_occ(torch.from_numpy(voxel_label), torch.from_numpy(voxel_show), voxel_size=voxel_size, vis=vis,
+                           offset=[0, voxel_label.shape[0] * voxel_size[0] * 1.2 * 1, 0])
+
+        view_control = vis.get_view_control()
+
+        look_at = np.array([-0.185, 0.513, 3.485])
+        front = np.array([-0.974, -0.055, 0.221])
+        up = np.array([0.221, 0.014, 0.975])
+        zoom = np.array([0.08])
+
+        view_control.set_lookat(look_at)
+        view_control.set_front(front)
+        view_control.set_up(up)
+        view_control.set_zoom(zoom)
+
+        opt = vis.get_render_option()
+        opt.background_color = np.asarray([1, 1, 1])
+        opt.line_width = 5
+
+        vis.poll_events()
+        vis.update_renderer()
+        vis.run()
+
+        # if args.format == 'image':
+        #     out_dir = os.path.join(vis_dir, f'{scene_name}', f'{sample_token}')
+        #     mmcv.mkdir_or_exist(out_dir)
+        #     vis.capture_screen_image(os.path.join(out_dir, 'screen_occ.png'), do_render=True)
+
+        occ_canvas = vis.capture_screen_float_buffer(do_render=True)
+        occ_canvas = np.asarray(occ_canvas)
+        occ_canvas = (occ_canvas * 255).astype(np.uint8)
+        occ_canvas = occ_canvas[..., [2, 1, 0]]
+        occ_canvas_resize = cv2.resize(occ_canvas, (canva_size, canva_size), interpolation=cv2.INTER_CUBIC)
+
+        vis.clear_geometries()
+
+        big_img = np.zeros((900 * 2 + canva_size * scale_factor, 1600 * 3, 3),
+                       dtype=np.uint8)
+        big_img[:900, :, :] = np.concatenate(imgs[:3], axis=1)
+        img_back = np.concatenate(
+            [imgs[3][:, ::-1, :], imgs[4][:, ::-1, :], imgs[5][:, ::-1, :]],
+            axis=1)
+        big_img[900 + canva_size * scale_factor:, :, :] = img_back
+        big_img = cv2.resize(big_img, (int(1600 / scale_factor * 3),
+                                       int(900 / scale_factor * 2 + canva_size)))
+        w_begin = int((1600 * 3 / scale_factor - canva_size) // 2)
+        big_img[int(900 / scale_factor):int(900 / scale_factor) + canva_size,
+                w_begin:w_begin + canva_size, :] = occ_canvas_resize
+
+        if args.format == 'image':
+            out_dir = os.path.join(vis_dir, f'{scene_name}', f'{sample_token}')
+            mmcv.mkdir_or_exist(out_dir)
+            for i, img in enumerate(imgs):
+                cv2.imwrite(os.path.join(out_dir, f'img{i}.png'), img)
+            cv2.imwrite(os.path.join(out_dir, 'occ.png'), occ_canvas)
+            cv2.imwrite(os.path.join(out_dir, 'overall.png'), big_img)
+        elif args.format == 'video':
+            cv2.putText(big_img, f'{cnt:{cnt}}', (5, 15), fontFace=cv2.FONT_HERSHEY_COMPLEX, color=(0, 0, 0),
+                        fontScale=0.5)
+            cv2.putText(big_img, f'{scene_name}', (5, 35), fontFace=cv2.FONT_HERSHEY_COMPLEX, color=(0, 0, 0),
+                        fontScale=0.5)
+            cv2.putText(big_img, f'{sample_token[:5]}', (5, 55), fontFace=cv2.FONT_HERSHEY_COMPLEX, color=(0, 0, 0),
+                        fontScale=0.5)
+            vout.write(big_img)
+
+    if args.format == 'video':
+        vout.release()
+    vis.destroy_window()
+
+
+if __name__ == '__main__':
+    main()
\ No newline at end of file
--- a/docker-hub/FlashOCC/Flashocc/tools/convert_bevdet_to_TRT.py
+++ b/docker-hub/FlashOCC/Flashocc/tools/convert_bevdet_to_TRT.py
+import argparse
+import sys
+import os
+sys.path.insert(0, os.getcwd())
+
+import torch.onnx
+from mmcv import Config
+from mmdeploy.backend.tensorrt.utils import save, search_cuda_version
+
+try:
+    # If mmdet version > 2.23.0, compat_cfg would be imported and
+    # used from mmdet instead of mmdet3d.
+    from mmdet.utils import compat_cfg
+except ImportError:
+    from mmdet3d.utils import compat_cfg
+
+import os
+from typing import Dict, Optional, Sequence, Union
+
+import h5py
+import mmcv
+import numpy as np
+import onnx
+import pycuda.driver as cuda
+import tensorrt as trt
+import torch
+import tqdm
+from mmcv.runner import load_checkpoint
+from mmdeploy.apis.core import no_mp
+from mmdeploy.backend.tensorrt.calib_utils import HDF5Calibrator
+from mmdeploy.backend.tensorrt.init_plugins import load_tensorrt_plugin
+from mmdeploy.utils import load_config
+from packaging import version
+from torch.utils.data import DataLoader
+
+from mmdet3d.datasets import build_dataloader, build_dataset
+from mmdet3d.models import build_model
+from mmdet.datasets import replace_ImageToTensor
+from tools.misc.fuse_conv_bn import fuse_module
+
+
+class HDF5CalibratorBEVDet(HDF5Calibrator):
+
+    def get_batch(self, names: Sequence[str], **kwargs) -> list:
+        """Get batch data."""
+        if self.count < self.dataset_length:
+            if self.count % 100 == 0:
+                print('%d/%d' % (self.count, self.dataset_length))
+            ret = []
+            for name in names:
+                input_group = self.calib_data[name]
+                if name == 'img':
+                    data_np = input_group[str(self.count)][...].astype(
+                        np.float32)
+                else:
+                    data_np = input_group[str(self.count)][...].astype(
+                        np.int32)
+
+                # tile the tensor so we can keep the same distribute
+                opt_shape = self.input_shapes[name]['opt_shape']
+                data_shape = data_np.shape
+
+                reps = [
+                    int(np.ceil(opt_s / data_s))
+                    for opt_s, data_s in zip(opt_shape, data_shape)
+                ]
+
+                data_np = np.tile(data_np, reps)
+
+                slice_list = tuple(slice(0, end) for end in opt_shape)
+                data_np = data_np[slice_list]
+
+                data_np_cuda_ptr = cuda.mem_alloc(data_np.nbytes)
+                cuda.memcpy_htod(data_np_cuda_ptr,
+                                 np.ascontiguousarray(data_np))
+                self.buffers[name] = data_np_cuda_ptr
+
+                ret.append(self.buffers[name])
+            self.count += 1
+            return ret
+        else:
+            return None
+
+
+def parse_args():
+    parser = argparse.ArgumentParser(description='Deploy BEVDet with Tensorrt')
+    parser.add_argument('config', help='deploy config file path')
+    parser.add_argument('checkpoint', help='checkpoint file')
+    parser.add_argument('work_dir', help='work dir to save file')
+    parser.add_argument(
+        '--prefix', default='bevdet', help='prefix of the save file name')
+    parser.add_argument(
+        '--fp16', action='store_true', help='Whether to use tensorrt fp16')
+    parser.add_argument(
+        '--int8', action='store_true', help='Whether to use tensorrt int8')
+    parser.add_argument(
+        '--fuse-conv-bn',
+        action='store_true',
+        help='Whether to fuse conv and bn, this will slightly increase'
+        'the inference speed')
+    parser.add_argument('--calib_num', type=int, help='num to calib')
+    args = parser.parse_args()
+    return args
+
+
+def get_plugin_names():
+    return [pc.name for pc in trt.get_plugin_registry().plugin_creator_list]
+
+
+def create_calib_input_data_impl(calib_file: str,
+                                 dataloader: DataLoader,
+                                 model_partition: bool = False,
+                                 metas: list = [],
+                                 calib_num = None) -> None:
+    with h5py.File(calib_file, mode='w') as file:
+        calib_data_group = file.create_group('calib_data')
+        assert not model_partition
+        # create end2end group
+        input_data_group = calib_data_group.create_group('end2end')
+        input_group_img = input_data_group.create_group('img')
+        input_keys = [
+            'ranks_bev', 'ranks_depth', 'ranks_feat', 'interval_starts',
+            'interval_lengths'
+        ]
+        input_groups = []
+        for input_key in input_keys:
+            input_groups.append(input_data_group.create_group(input_key))
+        metas = [
+            metas[i].int().detach().cpu().numpy() for i in range(len(metas))
+        ]
+        for data_id, input_data in enumerate(tqdm.tqdm(dataloader)):
+            # save end2end data
+            if (calib_num is not None) and (data_id > calib_num):
+                break
+            input_tensor = input_data['img_inputs'][0][0]
+            input_ndarray = input_tensor.squeeze(0).detach().cpu().numpy()
+            # print(input_ndarray.shape, input_ndarray.dtype)
+            input_group_img.create_dataset(
+                str(data_id),
+                shape=input_ndarray.shape,
+                compression='gzip',
+                compression_opts=4,
+                data=input_ndarray)
+            for kid, input_key in enumerate(input_keys):
+                input_groups[kid].create_dataset(
+                    str(data_id),
+                    shape=metas[kid].shape,
+                    compression='gzip',
+                    compression_opts=4,
+                    data=metas[kid])
+            file.flush()
+
+
+def create_calib_input_data(calib_file: str,
+                            deploy_cfg: Union[str, mmcv.Config],
+                            model_cfg: Union[str, mmcv.Config],
+                            model_checkpoint: Optional[str] = None,
+                            dataset_cfg: Optional[Union[str,
+                                                        mmcv.Config]] = None,
+                            dataset_type: str = 'val',
+                            device: str = 'cpu',
+                            metas: list = [None],
+                            calib_num = None) -> None:
+    """Create dataset for post-training quantization.
+
+    Args:
+        calib_file (str): The output calibration data file.
+        deploy_cfg (str | mmcv.Config): Deployment config file or
+            Config object.
+        model_cfg (str | mmcv.Config): Model config file or Config object.
+        model_checkpoint (str): A checkpoint path of PyTorch model,
+            defaults to `None`.
+        dataset_cfg (Optional[Union[str, mmcv.Config]], optional): Model
+            config to provide calibration dataset. If none, use `model_cfg`
+            as the dataset config. Defaults to None.
+        dataset_type (str, optional): The dataset type. Defaults to 'val'.
+        device (str, optional): Device to create dataset. Defaults to 'cpu'.
+    """
+    with no_mp():
+        if dataset_cfg is None:
+            dataset_cfg = model_cfg
+
+        # load cfg if necessary
+        deploy_cfg, model_cfg = load_config(deploy_cfg, model_cfg)
+
+        if dataset_cfg is None:
+            dataset_cfg = model_cfg
+
+        # load dataset_cfg if necessary
+        dataset_cfg = load_config(dataset_cfg)[0]
+
+        from mmdeploy.apis.utils import build_task_processor
+        task_processor = build_task_processor(model_cfg, deploy_cfg, device)
+
+        dataset = task_processor.build_dataset(dataset_cfg, dataset_type)
+
+        dataloader = task_processor.build_dataloader(
+            dataset, 1, 1, dist=False, shuffle=False)
+
+        create_calib_input_data_impl(
+            calib_file, dataloader, model_partition=False, metas=metas, calib_num=calib_num)
+
+
+def from_onnx(onnx_model: Union[str, onnx.ModelProto],
+              output_file_prefix: str,
+              input_shapes: Dict[str, Sequence[int]],
+              max_workspace_size: int = 0,
+              fp16_mode: bool = False,
+              int8_mode: bool = False,
+              int8_param: Optional[dict] = None,
+              device_id: int = 0,
+              log_level: trt.Logger.Severity = trt.Logger.ERROR,
+              **kwargs) -> trt.ICudaEngine:
+    """Create a tensorrt engine from ONNX.
+
+    Modified from mmdeploy.backend.tensorrt.utils.from_onnx
+    """
+
+    import os
+    old_cuda_device = os.environ.get('CUDA_DEVICE', None)
+    os.environ['CUDA_DEVICE'] = str(device_id)
+    import pycuda.autoinit  # noqa:F401
+    if old_cuda_device is not None:
+        os.environ['CUDA_DEVICE'] = old_cuda_device
+    else:
+        os.environ.pop('CUDA_DEVICE')
+
+    load_tensorrt_plugin()
+    # create builder and network
+    logger = trt.Logger(log_level)
+    builder = trt.Builder(logger)
+    EXPLICIT_BATCH = 1 << (int)(
+        trt.NetworkDefinitionCreationFlag.EXPLICIT_BATCH)
+    network = builder.create_network(EXPLICIT_BATCH)
+
+    # parse onnx
+    parser = trt.OnnxParser(network, logger)
+
+    if isinstance(onnx_model, str):
+        onnx_model = onnx.load(onnx_model)
+
+    if not parser.parse(onnx_model.SerializeToString()):
+        error_msgs = ''
+        for error in range(parser.num_errors):
+            error_msgs += f'{parser.get_error(error)}\n'
+        raise RuntimeError(f'Failed to parse onnx, {error_msgs}')
+
+    # config builder
+    if version.parse(trt.__version__) < version.parse('8'):
+        builder.max_workspace_size = max_workspace_size
+
+    config = builder.create_builder_config()
+    config.max_workspace_size = max_workspace_size
+
+    cuda_version = search_cuda_version()
+    if cuda_version is not None:
+        version_major = int(cuda_version.split('.')[0])
+        if version_major < 11:
+            # cu11 support cublasLt, so cudnn heuristic tactic should disable CUBLAS_LT # noqa E501
+            tactic_source = config.get_tactic_sources() - (
+                1 << int(trt.TacticSource.CUBLAS_LT))
+            config.set_tactic_sources(tactic_source)
+
+    profile = builder.create_optimization_profile()
+
+    for input_name, param in input_shapes.items():
+        min_shape = param['min_shape']
+        opt_shape = param['opt_shape']
+        max_shape = param['max_shape']
+        profile.set_shape(input_name, min_shape, opt_shape, max_shape)
+    config.add_optimization_profile(profile)
+
+    if fp16_mode:
+        if version.parse(trt.__version__) < version.parse('8'):
+            builder.fp16_mode = fp16_mode
+        config.set_flag(trt.BuilderFlag.FP16)
+
+    if int8_mode:
+        config.set_flag(trt.BuilderFlag.INT8)
+        assert int8_param is not None
+        config.int8_calibrator = HDF5CalibratorBEVDet(
+            int8_param['calib_file'],
+            input_shapes,
+            model_type=int8_param['model_type'],
+            device_id=device_id,
+            algorithm=int8_param.get(
+                'algorithm', trt.CalibrationAlgoType.ENTROPY_CALIBRATION_2))
+        if version.parse(trt.__version__) < version.parse('8'):
+            builder.int8_mode = int8_mode
+            builder.int8_calibrator = config.int8_calibrator
+
+    # create engine
+    engine = builder.build_engine(network, config)
+
+    assert engine is not None, 'Failed to create TensorRT engine'
+
+    save(engine, output_file_prefix + '.engine')
+    print('Save engine at ', output_file_prefix + '.engine')
+    return engine
+
+
+def main():
+    args = parse_args()
+
+    max_workspace_size = 200*200*256*(2**8)
+    
+    if not os.path.exists(args.work_dir):
+        os.makedirs(args.work_dir)
+
+    load_tensorrt_plugin()
+    assert 'bev_pool_v2' in get_plugin_names(), \
+        'bev_pool_v2 is not in the plugin list of tensorrt, ' \
+        'please install mmdeploy from ' \
+        'https://github.com/HuangJunJie2017/mmdeploy.git'
+
+    # if args.int8:
+    #     assert args.fp16
+    model_prefix = args.prefix
+    if args.int8:
+        model_prefix = model_prefix + '_int8'
+    elif args.fp16:
+        model_prefix = model_prefix + '_fp16'
+    cfg = Config.fromfile(args.config)
+    cfg.model.pretrained = None
+    cfg.model.type = cfg.model.type + 'TRT'
+
+    cfg = compat_cfg(cfg)
+    cfg.gpu_ids = [0]
+
+    # import modules from plguin/xx, registry will be updated
+    if hasattr(cfg, 'plugin'):
+        if cfg.plugin:
+            import importlib
+            if hasattr(cfg, 'plugin_dir'):
+                plugin_dir = cfg.plugin_dir
+                _module_dir = os.path.dirname(plugin_dir)
+                _module_dir = _module_dir.split('/')
+                _module_path = _module_dir[0]
+
+                for m in _module_dir[1:]:
+                    _module_path = _module_path + '.' + m
+                print(_module_path)
+                plg_lib = importlib.import_module(_module_path)
+            else:
+                # import dir is the dirpath for the config file
+                _module_dir = os.path.dirname(args.config)
+                _module_dir = _module_dir.split('/')
+                _module_path = _module_dir[0]
+                for m in _module_dir[1:]:
+                    _module_path = _module_path + '.' + m
+                plg_lib = importlib.import_module(_module_path)
+
+    # build the dataloader
+    test_dataloader_default_args = dict(
+        samples_per_gpu=1, workers_per_gpu=2, dist=False, shuffle=False)
+
+    if isinstance(cfg.data.test, dict):
+        cfg.data.test.test_mode = True
+        if cfg.data.test_dataloader.get('samples_per_gpu', 1) > 1:
+            # Replace 'ImageToTensor' to 'DefaultFormatBundle'
+            cfg.data.test.pipeline = replace_ImageToTensor(
+                cfg.data.test.pipeline)
+    elif isinstance(cfg.data.test, list):
+        for ds_cfg in cfg.data.test:
+            ds_cfg.test_mode = True
+        if cfg.data.test_dataloader.get('samples_per_gpu', 1) > 1:
+            for ds_cfg in cfg.data.test:
+                ds_cfg.pipeline = replace_ImageToTensor(ds_cfg.pipeline)
+
+    test_loader_cfg = {
+        **test_dataloader_default_args,
+        **cfg.data.get('test_dataloader', {})
+    }
+    dataset = build_dataset(cfg.data.test)
+    data_loader = build_dataloader(dataset, **test_loader_cfg)
+
+    # build the model and load checkpoint
+    cfg.model.train_cfg = None
+    model = build_model(cfg.model, test_cfg=cfg.get('test_cfg'))
+    # assert model.img_view_transformer.grid_size[0] == 128
+    # assert model.img_view_transformer.grid_size[1] == 128
+    # assert model.img_view_transformer.grid_size[2] == 1
+    if os.path.exists(args.checkpoint):
+        load_checkpoint(model, args.checkpoint, map_location='cpu')
+    else:
+        print(args.checkpoint, " does not exists!")
+    if args.fuse_conv_bn:
+        model_prefix = model_prefix + '_fuse'
+        model = fuse_module(model)
+    model.cuda()
+    model.eval()
+
+    for i, data in enumerate(data_loader):
+        inputs = [t.cuda() for t in data['img_inputs'][0]]
+        img = inputs[0].squeeze(0)
+        if img.shape[0] > 6:
+            img = img[:6]
+        if model.__class__.__name__ in ['FBOCCTRT', 'FBOCC2DTRT']:
+            metas = model.get_bev_pool_input(inputs, img_metas=data['img_metas'])
+        else:
+            if model.__class__.__name__ in ['BEVDetOCCTRT']:
+                metas = model.get_bev_pool_input(inputs)
+            elif model.__class__.__name__ in ['BEVDepthOCCTRT']:
+                metas, mlp_input = model.get_bev_pool_input(inputs)
+
+        if model.__class__.__name__ in ['FBOCCTRT', 'FBOCC2DTRT', 'BEVDetOCCTRT']:
+            onnx_input = (img.float().contiguous(), metas[1].int().contiguous(),
+                metas[2].int().contiguous(), metas[0].int().contiguous(),
+                metas[3].int().contiguous(), metas[4].int().contiguous())
+            dynamic_axes={
+                    "ranks_depth" : {0: 'M'},
+                    "ranks_feat" : {0: 'M'},
+                    "ranks_bev" : {0: 'M'},
+                    "interval_starts" : {0: 'N'},
+                    "interval_lengths" : {0: 'N'},
+                }
+            input_names=[
+                    'img', 'ranks_depth', 'ranks_feat', 'ranks_bev',
+                    'interval_starts', 'interval_lengths'
+                ]
+        elif model.__class__.__name__ in ['BEVDepthOCCTRT']:
+            onnx_input = (img.float().contiguous(), metas[1].int().contiguous(),
+                metas[2].int().contiguous(), metas[0].int().contiguous(),
+                metas[3].int().contiguous(), metas[4].int().contiguous(), mlp_input)
+            dynamic_axes={
+                    "ranks_depth" : {0: 'M'},
+                    "ranks_feat" : {0: 'M'},
+                    "ranks_bev" : {0: 'M'},
+                    "interval_starts" : {0: 'N'},
+                    "interval_lengths" : {0: 'N'},
+                    # "mlp_input" : {0: 'K'},
+                }
+            input_names=[
+                    'img', 'ranks_depth', 'ranks_feat', 'ranks_bev',
+                    'interval_starts', 'interval_lengths', 'mlp_input',
+                ]
+
+        with torch.no_grad():
+            if (model.wdet3d == True) and (model.wocc == False) :
+                output_names=[f'output_{j}' for j in range(6 * len(model.pts_bbox_head.task_heads))]
+            elif (model.wdet3d == True) and (model.wocc == True) :
+                output_names=[f'output_{j}' for j in range(1 + 6 * len(model.pts_bbox_head.task_heads))]
+            elif (model.wdet3d == False) and (model.wocc == True) :
+                output_names=[f'output_{j}' for j in range(1)]
+            else:
+                raise(" At least one of wdet3d and wocc is set as True!! ")
+
+            model.forward = model.forward_ori
+            torch.onnx.export(
+                model,
+                onnx_input,
+                args.work_dir + model_prefix + '.onnx',
+                opset_version=11,
+                dynamic_axes=dynamic_axes,
+                input_names=input_names,
+                output_names=output_names)
+            print('output_names:', output_names)
+            print('====== onnx is saved at : ', args.work_dir + model_prefix + '.onnx')
+            # check onnx model
+            onnx_model = onnx.load(args.work_dir + model_prefix + '.onnx')
+            try:
+                onnx.checker.check_model(onnx_model)
+            except Exception:
+                print('ONNX Model Incorrect')
+            else:
+                print('ONNX Model Correct')
+
+            model.forward = model.forward_with_argmax
+            output_names = [f'cls_occ_label']
+            torch.onnx.export(
+                model,
+                onnx_input,
+                args.work_dir + model_prefix + '_with_argmax.onnx',
+                opset_version=11,
+                dynamic_axes=dynamic_axes,
+                input_names=input_names,
+                output_names=output_names)
+            print('output_names:', output_names)
+            print('====== onnx is saved at : ', args.work_dir + model_prefix + '_with_argmax.onnx')
+            # check onnx model
+            onnx_model = onnx.load(args.work_dir + model_prefix + '_with_argmax.onnx')
+            try:
+                onnx.checker.check_model(onnx_model)
+            except Exception:
+                print('ONNX Model Incorrect')
+            else:
+                print('ONNX Model Correct')
+
+        break
+
+    # convert to tensorrt
+    num_points = metas[0].shape[0]
+    num_intervals = metas[3].shape[0]
+    img_shape = img.shape
+    input_shapes = dict(
+        img=dict(
+            min_shape=img_shape, opt_shape=img_shape, max_shape=img_shape),
+        ranks_depth=dict(
+            min_shape=[num_points],
+            opt_shape=[num_points],
+            max_shape=[num_points]),
+        ranks_feat=dict(
+            min_shape=[num_points],
+            opt_shape=[num_points],
+            max_shape=[num_points]),
+        ranks_bev=dict(
+            min_shape=[num_points],
+            opt_shape=[num_points],
+            max_shape=[num_points]),
+        interval_starts=dict(
+            min_shape=[num_intervals],
+            opt_shape=[num_intervals],
+            max_shape=[num_intervals]),
+        interval_lengths=dict(
+            min_shape=[num_intervals],
+            opt_shape=[num_intervals],
+            max_shape=[num_intervals]))
+    deploy_cfg = dict(
+        backend_config=dict(
+            type='tensorrt',
+            common_config=dict(
+                fp16_mode=args.fp16,
+                max_workspace_size=max_workspace_size,
+                int8_mode=args.int8),
+            model_inputs=[dict(input_shapes=input_shapes)]),
+        codebase_config=dict(
+            type='mmdet3d', task='VoxelDetection', model_type='end2end'))
+
+    if args.int8:
+        calib_filename = 'calib_data.h5'
+        calib_path = os.path.join(args.work_dir, calib_filename)
+        create_calib_input_data(
+            calib_path,
+            deploy_cfg,
+            args.config,
+            args.checkpoint,
+            dataset_cfg=None,
+            dataset_type='val',
+            device='cuda:0',
+            metas=metas,
+            calib_num=args.calib_num)
+
+    from_onnx(
+        args.work_dir + model_prefix + '.onnx',
+        args.work_dir + model_prefix,
+        fp16_mode=args.fp16,
+        int8_mode=args.int8,
+        int8_param=dict(
+            calib_file=os.path.join(args.work_dir, 'calib_data.h5'),
+            model_type='end2end'),
+        max_workspace_size=max_workspace_size,
+        input_shapes=input_shapes)
+
+    # if args.int8:
+    #     os.remove(calib_path)
+
+
+if __name__ == '__main__':
+
+    main()
--- a/docker-hub/FlashOCC/Flashocc/tools/create_data_bevdet.py
+++ b/docker-hub/FlashOCC/Flashocc/tools/create_data_bevdet.py
+# Copyright (c) OpenMMLab. All rights reserved.
+import pickle
+
+import numpy as np
+from nuscenes import NuScenes
+from nuscenes.utils.data_classes import Box
+from pyquaternion import Quaternion
+
+from tools.data_converter import nuscenes_converter as nuscenes_converter
+
+map_name_from_general_to_detection = {
+    'human.pedestrian.adult': 'pedestrian',
+    'human.pedestrian.child': 'pedestrian',
+    'human.pedestrian.wheelchair': 'ignore',
+    'human.pedestrian.stroller': 'ignore',
+    'human.pedestrian.personal_mobility': 'ignore',
+    'human.pedestrian.police_officer': 'pedestrian',
+    'human.pedestrian.construction_worker': 'pedestrian',
+    'animal': 'ignore',
+    'vehicle.car': 'car',
+    'vehicle.motorcycle': 'motorcycle',
+    'vehicle.bicycle': 'bicycle',
+    'vehicle.bus.bendy': 'bus',
+    'vehicle.bus.rigid': 'bus',
+    'vehicle.truck': 'truck',
+    'vehicle.construction': 'construction_vehicle',
+    'vehicle.emergency.ambulance': 'ignore',
+    'vehicle.emergency.police': 'ignore',
+    'vehicle.trailer': 'trailer',
+    'movable_object.barrier': 'barrier',
+    'movable_object.trafficcone': 'traffic_cone',
+    'movable_object.pushable_pullable': 'ignore',
+    'movable_object.debris': 'ignore',
+    'static_object.bicycle_rack': 'ignore',
+}
+classes = [
+    'car', 'truck', 'construction_vehicle', 'bus', 'trailer', 'barrier',
+    'motorcycle', 'bicycle', 'pedestrian', 'traffic_cone'
+]
+
+
+def get_gt(info):
+    """Generate gt labels from info.
+
+    Args:
+        info(dict): Infos needed to generate gt labels.
+
+    Returns:
+        Tensor: GT bboxes.
+        Tensor: GT labels.
+    """
+    ego2global_rotation = info['cams']['CAM_FRONT']['ego2global_rotation']
+    ego2global_translation = info['cams']['CAM_FRONT'][
+        'ego2global_translation']
+    trans = -np.array(ego2global_translation)
+    rot = Quaternion(ego2global_rotation).inverse
+    gt_boxes = list()
+    gt_labels = list()
+    for ann_info in info['ann_infos']:
+        # Use ego coordinate.
+        if (map_name_from_general_to_detection[ann_info['category_name']]
+                not in classes
+                or ann_info['num_lidar_pts'] + ann_info['num_radar_pts'] <= 0):
+            continue
+        box = Box(
+            ann_info['translation'],
+            ann_info['size'],
+            Quaternion(ann_info['rotation']),
+            velocity=ann_info['velocity'],
+        )
+        box.translate(trans)
+        box.rotate(rot)
+        box_xyz = np.array(box.center)
+        box_dxdydz = np.array(box.wlh)[[1, 0, 2]]
+        box_yaw = np.array([box.orientation.yaw_pitch_roll[0]])
+        box_velo = np.array(box.velocity[:2])
+        gt_box = np.concatenate([box_xyz, box_dxdydz, box_yaw, box_velo])
+        gt_boxes.append(gt_box)
+        gt_labels.append(
+            classes.index(
+                map_name_from_general_to_detection[ann_info['category_name']]))
+    return gt_boxes, gt_labels
+
+
+def nuscenes_data_prep(root_path, info_prefix, version, max_sweeps=10):
+    """Prepare data related to nuScenes dataset.
+
+    Related data consists of '.pkl' files recording basic infos,
+    2D annotations and groundtruth database.
+
+    Args:
+        root_path (str): Path of dataset root.
+        info_prefix (str): The prefix of info filenames.
+        version (str): Dataset version.
+        max_sweeps (int, optional): Number of input consecutive frames.
+            Default: 10
+    """
+    nuscenes_converter.create_nuscenes_infos(
+        root_path, info_prefix, version=version, max_sweeps=max_sweeps)
+
+
+def add_ann_adj_info(extra_tag):
+    nuscenes_version = 'v1.0-trainval'
+    dataroot = './data/nuscenes/'
+    nuscenes = NuScenes(nuscenes_version, dataroot)
+    for set in ['train', 'val']:
+        dataset = pickle.load(
+            open('%s/%s_infos_%s.pkl' % (dataroot, extra_tag, set), 'rb'))
+        for id in range(len(dataset['infos'])):
+            if id % 10 == 0:
+                print('%d/%d' % (id, len(dataset['infos'])))
+            info = dataset['infos'][id]
+            # get sweep adjacent frame info
+            sample = nuscenes.get('sample', info['token'])
+            ann_infos = list()
+            for ann in sample['anns']:
+                ann_info = nuscenes.get('sample_annotation', ann)
+                velocity = nuscenes.box_velocity(ann_info['token'])
+                if np.any(np.isnan(velocity)):
+                    velocity = np.zeros(3)
+                ann_info['velocity'] = velocity
+                ann_infos.append(ann_info)
+            dataset['infos'][id]['ann_infos'] = ann_infos
+            dataset['infos'][id]['ann_infos'] = get_gt(dataset['infos'][id])
+            dataset['infos'][id]['scene_token'] = sample['scene_token']
+
+            scene = nuscenes.get('scene', sample['scene_token'])
+            dataset['infos'][id]['scene_name'] = scene['name']
+            dataset['infos'][id]['occ_path'] = \
+                './data/nuscenes/gts/%s/%s'%(scene['name'], info['token'])
+        with open('%s/%s_infos_%s.pkl' % (dataroot, extra_tag, set),
+                  'wb') as fid:
+            pickle.dump(dataset, fid)
+
+
+if __name__ == '__main__':
+    dataset = 'nuscenes'
+    version = 'v1.0'
+    train_version = f'{version}-trainval'
+    root_path = 'data/nuscenes'
+    extra_tag = 'bevdetv2-nuscenes'
+    nuscenes_data_prep(
+        root_path=root_path,
+        info_prefix=extra_tag,
+        version=train_version,
+        max_sweeps=0)
+
+    print('add_ann_infos')
+    add_ann_adj_info(extra_tag)
--- a/docker-hub/FlashOCC/Flashocc/tools/data_converter/__init__.py
+++ b/docker-hub/FlashOCC/Flashocc/tools/data_converter/__init__.py
+# Copyright (c) OpenMMLab. All rights reserved.
--- a/docker-hub/FlashOCC/Flashocc/tools/data_converter/__pycache__/__init__.cpython-310.pyc
+++ b/docker-hub/FlashOCC/Flashocc/tools/data_converter/__pycache__/__init__.cpython-310.pyc
--- a/docker-hub/FlashOCC/Flashocc/tools/data_converter/__pycache__/nuscenes_converter.cpython-310.pyc
+++ b/docker-hub/FlashOCC/Flashocc/tools/data_converter/__pycache__/nuscenes_converter.cpython-310.pyc
--- a/docker-hub/FlashOCC/Flashocc/tools/data_converter/create_gt_database.py
+++ b/docker-hub/FlashOCC/Flashocc/tools/data_converter/create_gt_database.py
+# Copyright (c) OpenMMLab. All rights reserved.
+import pickle
+from os import path as osp
+
+import mmcv
+import numpy as np
+from mmcv import track_iter_progress
+from mmcv.ops import roi_align
+from pycocotools import mask as maskUtils
+from pycocotools.coco import COCO
+
+from mmdet3d.core.bbox import box_np_ops as box_np_ops
+from mmdet3d.datasets import build_dataset
+from mmdet.core.evaluation.bbox_overlaps import bbox_overlaps
+
+
+def _poly2mask(mask_ann, img_h, img_w):
+    if isinstance(mask_ann, list):
+        # polygon -- a single object might consist of multiple parts
+        # we merge all parts into one mask rle code
+        rles = maskUtils.frPyObjects(mask_ann, img_h, img_w)
+        rle = maskUtils.merge(rles)
+    elif isinstance(mask_ann['counts'], list):
+        # uncompressed RLE
+        rle = maskUtils.frPyObjects(mask_ann, img_h, img_w)
+    else:
+        # rle
+        rle = mask_ann
+    mask = maskUtils.decode(rle)
+    return mask
+
+
+def _parse_coco_ann_info(ann_info):
+    gt_bboxes = []
+    gt_labels = []
+    gt_bboxes_ignore = []
+    gt_masks_ann = []
+
+    for i, ann in enumerate(ann_info):
+        if ann.get('ignore', False):
+            continue
+        x1, y1, w, h = ann['bbox']
+        if ann['area'] <= 0:
+            continue
+        bbox = [x1, y1, x1 + w, y1 + h]
+        if ann.get('iscrowd', False):
+            gt_bboxes_ignore.append(bbox)
+        else:
+            gt_bboxes.append(bbox)
+            gt_masks_ann.append(ann['segmentation'])
+
+    if gt_bboxes:
+        gt_bboxes = np.array(gt_bboxes, dtype=np.float32)
+        gt_labels = np.array(gt_labels, dtype=np.int64)
+    else:
+        gt_bboxes = np.zeros((0, 4), dtype=np.float32)
+        gt_labels = np.array([], dtype=np.int64)
+
+    if gt_bboxes_ignore:
+        gt_bboxes_ignore = np.array(gt_bboxes_ignore, dtype=np.float32)
+    else:
+        gt_bboxes_ignore = np.zeros((0, 4), dtype=np.float32)
+
+    ann = dict(
+        bboxes=gt_bboxes, bboxes_ignore=gt_bboxes_ignore, masks=gt_masks_ann)
+
+    return ann
+
+
+def crop_image_patch_v2(pos_proposals, pos_assigned_gt_inds, gt_masks):
+    import torch
+    from torch.nn.modules.utils import _pair
+    device = pos_proposals.device
+    num_pos = pos_proposals.size(0)
+    fake_inds = (
+        torch.arange(num_pos,
+                     device=device).to(dtype=pos_proposals.dtype)[:, None])
+    rois = torch.cat([fake_inds, pos_proposals], dim=1)  # Nx5
+    mask_size = _pair(28)
+    rois = rois.to(device=device)
+    gt_masks_th = (
+        torch.from_numpy(gt_masks).to(device).index_select(
+            0, pos_assigned_gt_inds).to(dtype=rois.dtype))
+    # Use RoIAlign could apparently accelerate the training (~0.1s/iter)
+    targets = (
+        roi_align(gt_masks_th, rois, mask_size[::-1], 1.0, 0, True).squeeze(1))
+    return targets
+
+
+def crop_image_patch(pos_proposals, gt_masks, pos_assigned_gt_inds, org_img):
+    num_pos = pos_proposals.shape[0]
+    masks = []
+    img_patches = []
+    for i in range(num_pos):
+        gt_mask = gt_masks[pos_assigned_gt_inds[i]]
+        bbox = pos_proposals[i, :].astype(np.int32)
+        x1, y1, x2, y2 = bbox
+        w = np.maximum(x2 - x1 + 1, 1)
+        h = np.maximum(y2 - y1 + 1, 1)
+
+        mask_patch = gt_mask[y1:y1 + h, x1:x1 + w]
+        masked_img = gt_mask[..., None] * org_img
+        img_patch = masked_img[y1:y1 + h, x1:x1 + w]
+
+        img_patches.append(img_patch)
+        masks.append(mask_patch)
+    return img_patches, masks
+
+
+def create_groundtruth_database(dataset_class_name,
+                                data_path,
+                                info_prefix,
+                                info_path=None,
+                                mask_anno_path=None,
+                                used_classes=None,
+                                database_save_path=None,
+                                db_info_save_path=None,
+                                relative_path=True,
+                                add_rgb=False,
+                                lidar_only=False,
+                                bev_only=False,
+                                coors_range=None,
+                                with_mask=False):
+    """Given the raw data, generate the ground truth database.
+
+    Args:
+        dataset_class_name (str): Name of the input dataset.
+        data_path (str): Path of the data.
+        info_prefix (str): Prefix of the info file.
+        info_path (str, optional): Path of the info file.
+            Default: None.
+        mask_anno_path (str, optional): Path of the mask_anno.
+            Default: None.
+        used_classes (list[str], optional): Classes have been used.
+            Default: None.
+        database_save_path (str, optional): Path to save database.
+            Default: None.
+        db_info_save_path (str, optional): Path to save db_info.
+            Default: None.
+        relative_path (bool, optional): Whether to use relative path.
+            Default: True.
+        with_mask (bool, optional): Whether to use mask.
+            Default: False.
+    """
+    print(f'Create GT Database of {dataset_class_name}')
+    dataset_cfg = dict(
+        type=dataset_class_name, data_root=data_path, ann_file=info_path)
+    if dataset_class_name == 'KittiDataset':
+        file_client_args = dict(backend='disk')
+        dataset_cfg.update(
+            test_mode=False,
+            split='training',
+            modality=dict(
+                use_lidar=True,
+                use_depth=False,
+                use_lidar_intensity=True,
+                use_camera=with_mask,
+            ),
+            pipeline=[
+                dict(
+                    type='LoadPointsFromFile',
+                    coord_type='LIDAR',
+                    load_dim=4,
+                    use_dim=4,
+                    file_client_args=file_client_args),
+                dict(
+                    type='LoadAnnotations3D',
+                    with_bbox_3d=True,
+                    with_label_3d=True,
+                    file_client_args=file_client_args)
+            ])
+
+    elif dataset_class_name == 'NuScenesDataset':
+        dataset_cfg.update(
+            use_valid_flag=True,
+            pipeline=[
+                dict(
+                    type='LoadPointsFromFile',
+                    coord_type='LIDAR',
+                    load_dim=5,
+                    use_dim=5),
+                dict(
+                    type='LoadPointsFromMultiSweeps',
+                    sweeps_num=10,
+                    use_dim=[0, 1, 2, 3, 4],
+                    pad_empty_sweeps=True,
+                    remove_close=True),
+                dict(
+                    type='LoadAnnotations3D',
+                    with_bbox_3d=True,
+                    with_label_3d=True)
+            ])
+
+    elif dataset_class_name == 'WaymoDataset':
+        file_client_args = dict(backend='disk')
+        dataset_cfg.update(
+            test_mode=False,
+            split='training',
+            modality=dict(
+                use_lidar=True,
+                use_depth=False,
+                use_lidar_intensity=True,
+                use_camera=False,
+            ),
+            pipeline=[
+                dict(
+                    type='LoadPointsFromFile',
+                    coord_type='LIDAR',
+                    load_dim=6,
+                    use_dim=6,
+                    file_client_args=file_client_args),
+                dict(
+                    type='LoadAnnotations3D',
+                    with_bbox_3d=True,
+                    with_label_3d=True,
+                    file_client_args=file_client_args)
+            ])
+
+    dataset = build_dataset(dataset_cfg)
+
+    if database_save_path is None:
+        database_save_path = osp.join(data_path, f'{info_prefix}_gt_database')
+    if db_info_save_path is None:
+        db_info_save_path = osp.join(data_path,
+                                     f'{info_prefix}_dbinfos_train.pkl')
+    mmcv.mkdir_or_exist(database_save_path)
+    all_db_infos = dict()
+    if with_mask:
+        coco = COCO(osp.join(data_path, mask_anno_path))
+        imgIds = coco.getImgIds()
+        file2id = dict()
+        for i in imgIds:
+            info = coco.loadImgs([i])[0]
+            file2id.update({info['file_name']: i})
+
+    group_counter = 0
+    for j in track_iter_progress(list(range(len(dataset)))):
+        input_dict = dataset.get_data_info(j)
+        dataset.pre_pipeline(input_dict)
+        example = dataset.pipeline(input_dict)
+        annos = example['ann_info']
+        image_idx = example['sample_idx']
+        points = example['points'].tensor.numpy()
+        gt_boxes_3d = annos['gt_bboxes_3d'].tensor.numpy()
+        names = annos['gt_names']
+        group_dict = dict()
+        if 'group_ids' in annos:
+            group_ids = annos['group_ids']
+        else:
+            group_ids = np.arange(gt_boxes_3d.shape[0], dtype=np.int64)
+        difficulty = np.zeros(gt_boxes_3d.shape[0], dtype=np.int32)
+        if 'difficulty' in annos:
+            difficulty = annos['difficulty']
+
+        num_obj = gt_boxes_3d.shape[0]
+        point_indices = box_np_ops.points_in_rbbox(points, gt_boxes_3d)
+
+        if with_mask:
+            # prepare masks
+            gt_boxes = annos['gt_bboxes']
+            img_path = osp.split(example['img_info']['filename'])[-1]
+            if img_path not in file2id.keys():
+                print(f'skip image {img_path} for empty mask')
+                continue
+            img_id = file2id[img_path]
+            kins_annIds = coco.getAnnIds(imgIds=img_id)
+            kins_raw_info = coco.loadAnns(kins_annIds)
+            kins_ann_info = _parse_coco_ann_info(kins_raw_info)
+            h, w = annos['img_shape'][:2]
+            gt_masks = [
+                _poly2mask(mask, h, w) for mask in kins_ann_info['masks']
+            ]
+            # get mask inds based on iou mapping
+            bbox_iou = bbox_overlaps(kins_ann_info['bboxes'], gt_boxes)
+            mask_inds = bbox_iou.argmax(axis=0)
+            valid_inds = (bbox_iou.max(axis=0) > 0.5)
+
+            # mask the image
+            # use more precise crop when it is ready
+            # object_img_patches = np.ascontiguousarray(
+            #     np.stack(object_img_patches, axis=0).transpose(0, 3, 1, 2))
+            # crop image patches using roi_align
+            # object_img_patches = crop_image_patch_v2(
+            #     torch.Tensor(gt_boxes),
+            #     torch.Tensor(mask_inds).long(), object_img_patches)
+            object_img_patches, object_masks = crop_image_patch(
+                gt_boxes, gt_masks, mask_inds, annos['img'])
+
+        for i in range(num_obj):
+            filename = f'{image_idx}_{names[i]}_{i}.bin'
+            abs_filepath = osp.join(database_save_path, filename)
+            rel_filepath = osp.join(f'{info_prefix}_gt_database', filename)
+
+            # save point clouds and image patches for each object
+            gt_points = points[point_indices[:, i]]
+            gt_points[:, :3] -= gt_boxes_3d[i, :3]
+
+            if with_mask:
+                if object_masks[i].sum() == 0 or not valid_inds[i]:
+                    # Skip object for empty or invalid mask
+                    continue
+                img_patch_path = abs_filepath + '.png'
+                mask_patch_path = abs_filepath + '.mask.png'
+                mmcv.imwrite(object_img_patches[i], img_patch_path)
+                mmcv.imwrite(object_masks[i], mask_patch_path)
+
+            with open(abs_filepath, 'w') as f:
+                gt_points.tofile(f)
+
+            if (used_classes is None) or names[i] in used_classes:
+                db_info = {
+                    'name': names[i],
+                    'path': rel_filepath,
+                    'image_idx': image_idx,
+                    'gt_idx': i,
+                    'box3d_lidar': gt_boxes_3d[i],
+                    'num_points_in_gt': gt_points.shape[0],
+                    'difficulty': difficulty[i],
+                }
+                local_group_id = group_ids[i]
+                # if local_group_id >= 0:
+                if local_group_id not in group_dict:
+                    group_dict[local_group_id] = group_counter
+                    group_counter += 1
+                db_info['group_id'] = group_dict[local_group_id]
+                if 'score' in annos:
+                    db_info['score'] = annos['score'][i]
+                if with_mask:
+                    db_info.update({'box2d_camera': gt_boxes[i]})
+                if names[i] in all_db_infos:
+                    all_db_infos[names[i]].append(db_info)
+                else:
+                    all_db_infos[names[i]] = [db_info]
+
+    for k, v in all_db_infos.items():
+        print(f'load {len(v)} {k} database infos')
+
+    with open(db_info_save_path, 'wb') as f:
+        pickle.dump(all_db_infos, f)
+
+
+class GTDatabaseCreater:
+    """Given the raw data, generate the ground truth database. This is the
+    parallel version. For serialized version, please refer to
+    `create_groundtruth_database`
+
+    Args:
+        dataset_class_name (str): Name of the input dataset.
+        data_path (str): Path of the data.
+        info_prefix (str): Prefix of the info file.
+        info_path (str, optional): Path of the info file.
+            Default: None.
+        mask_anno_path (str, optional): Path of the mask_anno.
+            Default: None.
+        used_classes (list[str], optional): Classes have been used.
+            Default: None.
+        database_save_path (str, optional): Path to save database.
+            Default: None.
+        db_info_save_path (str, optional): Path to save db_info.
+            Default: None.
+        relative_path (bool, optional): Whether to use relative path.
+            Default: True.
+        with_mask (bool, optional): Whether to use mask.
+            Default: False.
+        num_worker (int, optional): the number of parallel workers to use.
+            Default: 8.
+    """
+
+    def __init__(self,
+                 dataset_class_name,
+                 data_path,
+                 info_prefix,
+                 info_path=None,
+                 mask_anno_path=None,
+                 used_classes=None,
+                 database_save_path=None,
+                 db_info_save_path=None,
+                 relative_path=True,
+                 add_rgb=False,
+                 lidar_only=False,
+                 bev_only=False,
+                 coors_range=None,
+                 with_mask=False,
+                 num_worker=8) -> None:
+        self.dataset_class_name = dataset_class_name
+        self.data_path = data_path
+        self.info_prefix = info_prefix
+        self.info_path = info_path
+        self.mask_anno_path = mask_anno_path
+        self.used_classes = used_classes
+        self.database_save_path = database_save_path
+        self.db_info_save_path = db_info_save_path
+        self.relative_path = relative_path
+        self.add_rgb = add_rgb
+        self.lidar_only = lidar_only
+        self.bev_only = bev_only
+        self.coors_range = coors_range
+        self.with_mask = with_mask
+        self.num_worker = num_worker
+        self.pipeline = None
+
+    def create_single(self, input_dict):
+        group_counter = 0
+        single_db_infos = dict()
+        example = self.pipeline(input_dict)
+        annos = example['ann_info']
+        image_idx = example['sample_idx']
+        points = example['points'].tensor.numpy()
+        gt_boxes_3d = annos['gt_bboxes_3d'].tensor.numpy()
+        names = annos['gt_names']
+        group_dict = dict()
+        if 'group_ids' in annos:
+            group_ids = annos['group_ids']
+        else:
+            group_ids = np.arange(gt_boxes_3d.shape[0], dtype=np.int64)
+        difficulty = np.zeros(gt_boxes_3d.shape[0], dtype=np.int32)
+        if 'difficulty' in annos:
+            difficulty = annos['difficulty']
+
+        num_obj = gt_boxes_3d.shape[0]
+        point_indices = box_np_ops.points_in_rbbox(points, gt_boxes_3d)
+
+        if self.with_mask:
+            # prepare masks
+            gt_boxes = annos['gt_bboxes']
+            img_path = osp.split(example['img_info']['filename'])[-1]
+            if img_path not in self.file2id.keys():
+                print(f'skip image {img_path} for empty mask')
+                return single_db_infos
+            img_id = self.file2id[img_path]
+            kins_annIds = self.coco.getAnnIds(imgIds=img_id)
+            kins_raw_info = self.coco.loadAnns(kins_annIds)
+            kins_ann_info = _parse_coco_ann_info(kins_raw_info)
+            h, w = annos['img_shape'][:2]
+            gt_masks = [
+                _poly2mask(mask, h, w) for mask in kins_ann_info['masks']
+            ]
+            # get mask inds based on iou mapping
+            bbox_iou = bbox_overlaps(kins_ann_info['bboxes'], gt_boxes)
+            mask_inds = bbox_iou.argmax(axis=0)
+            valid_inds = (bbox_iou.max(axis=0) > 0.5)
+
+            # mask the image
+            # use more precise crop when it is ready
+            # object_img_patches = np.ascontiguousarray(
+            #     np.stack(object_img_patches, axis=0).transpose(0, 3, 1, 2))
+            # crop image patches using roi_align
+            # object_img_patches = crop_image_patch_v2(
+            #     torch.Tensor(gt_boxes),
+            #     torch.Tensor(mask_inds).long(), object_img_patches)
+            object_img_patches, object_masks = crop_image_patch(
+                gt_boxes, gt_masks, mask_inds, annos['img'])
+
+        for i in range(num_obj):
+            filename = f'{image_idx}_{names[i]}_{i}.bin'
+            abs_filepath = osp.join(self.database_save_path, filename)
+            rel_filepath = osp.join(f'{self.info_prefix}_gt_database',
+                                    filename)
+
+            # save point clouds and image patches for each object
+            gt_points = points[point_indices[:, i]]
+            gt_points[:, :3] -= gt_boxes_3d[i, :3]
+
+            if self.with_mask:
+                if object_masks[i].sum() == 0 or not valid_inds[i]:
+                    # Skip object for empty or invalid mask
+                    continue
+                img_patch_path = abs_filepath + '.png'
+                mask_patch_path = abs_filepath + '.mask.png'
+                mmcv.imwrite(object_img_patches[i], img_patch_path)
+                mmcv.imwrite(object_masks[i], mask_patch_path)
+
+            with open(abs_filepath, 'w') as f:
+                gt_points.tofile(f)
+
+            if (self.used_classes is None) or names[i] in self.used_classes:
+                db_info = {
+                    'name': names[i],
+                    'path': rel_filepath,
+                    'image_idx': image_idx,
+                    'gt_idx': i,
+                    'box3d_lidar': gt_boxes_3d[i],
+                    'num_points_in_gt': gt_points.shape[0],
+                    'difficulty': difficulty[i],
+                }
+                local_group_id = group_ids[i]
+                # if local_group_id >= 0:
+                if local_group_id not in group_dict:
+                    group_dict[local_group_id] = group_counter
+                    group_counter += 1
+                db_info['group_id'] = group_dict[local_group_id]
+                if 'score' in annos:
+                    db_info['score'] = annos['score'][i]
+                if self.with_mask:
+                    db_info.update({'box2d_camera': gt_boxes[i]})
+                if names[i] in single_db_infos:
+                    single_db_infos[names[i]].append(db_info)
+                else:
+                    single_db_infos[names[i]] = [db_info]
+
+        return single_db_infos
+
+    def create(self):
+        print(f'Create GT Database of {self.dataset_class_name}')
+        dataset_cfg = dict(
+            type=self.dataset_class_name,
+            data_root=self.data_path,
+            ann_file=self.info_path)
+        if self.dataset_class_name == 'KittiDataset':
+            file_client_args = dict(backend='disk')
+            dataset_cfg.update(
+                test_mode=False,
+                split='training',
+                modality=dict(
+                    use_lidar=True,
+                    use_depth=False,
+                    use_lidar_intensity=True,
+                    use_camera=self.with_mask,
+                ),
+                pipeline=[
+                    dict(
+                        type='LoadPointsFromFile',
+                        coord_type='LIDAR',
+                        load_dim=4,
+                        use_dim=4,
+                        file_client_args=file_client_args),
+                    dict(
+                        type='LoadAnnotations3D',
+                        with_bbox_3d=True,
+                        with_label_3d=True,
+                        file_client_args=file_client_args)
+                ])
+
+        elif self.dataset_class_name == 'NuScenesDataset':
+            dataset_cfg.update(
+                use_valid_flag=True,
+                pipeline=[
+                    dict(
+                        type='LoadPointsFromFile',
+                        coord_type='LIDAR',
+                        load_dim=5,
+                        use_dim=5),
+                    dict(
+                        type='LoadPointsFromMultiSweeps',
+                        sweeps_num=10,
+                        use_dim=[0, 1, 2, 3, 4],
+                        pad_empty_sweeps=True,
+                        remove_close=True),
+                    dict(
+                        type='LoadAnnotations3D',
+                        with_bbox_3d=True,
+                        with_label_3d=True)
+                ])
+
+        elif self.dataset_class_name == 'WaymoDataset':
+            file_client_args = dict(backend='disk')
+            dataset_cfg.update(
+                test_mode=False,
+                split='training',
+                modality=dict(
+                    use_lidar=True,
+                    use_depth=False,
+                    use_lidar_intensity=True,
+                    use_camera=False,
+                ),
+                pipeline=[
+                    dict(
+                        type='LoadPointsFromFile',
+                        coord_type='LIDAR',
+                        load_dim=6,
+                        use_dim=6,
+                        file_client_args=file_client_args),
+                    dict(
+                        type='LoadAnnotations3D',
+                        with_bbox_3d=True,
+                        with_label_3d=True,
+                        file_client_args=file_client_args)
+                ])
+
+        dataset = build_dataset(dataset_cfg)
+        self.pipeline = dataset.pipeline
+        if self.database_save_path is None:
+            self.database_save_path = osp.join(
+                self.data_path, f'{self.info_prefix}_gt_database')
+        if self.db_info_save_path is None:
+            self.db_info_save_path = osp.join(
+                self.data_path, f'{self.info_prefix}_dbinfos_train.pkl')
+        mmcv.mkdir_or_exist(self.database_save_path)
+        if self.with_mask:
+            self.coco = COCO(osp.join(self.data_path, self.mask_anno_path))
+            imgIds = self.coco.getImgIds()
+            self.file2id = dict()
+            for i in imgIds:
+                info = self.coco.loadImgs([i])[0]
+                self.file2id.update({info['file_name']: i})
+
+        def loop_dataset(i):
+            input_dict = dataset.get_data_info(i)
+            dataset.pre_pipeline(input_dict)
+            return input_dict
+
+        multi_db_infos = mmcv.track_parallel_progress(
+            self.create_single, ((loop_dataset(i)
+                                  for i in range(len(dataset))), len(dataset)),
+            self.num_worker)
+        print('Make global unique group id')
+        group_counter_offset = 0
+        all_db_infos = dict()
+        for single_db_infos in track_iter_progress(multi_db_infos):
+            group_id = -1
+            for name, name_db_infos in single_db_infos.items():
+                for db_info in name_db_infos:
+                    group_id = max(group_id, db_info['group_id'])
+                    db_info['group_id'] += group_counter_offset
+                if name not in all_db_infos:
+                    all_db_infos[name] = []
+                all_db_infos[name].extend(name_db_infos)
+            group_counter_offset += (group_id + 1)
+
+        for k, v in all_db_infos.items():
+            print(f'load {len(v)} {k} database infos')
+
+        with open(self.db_info_save_path, 'wb') as f:
+            pickle.dump(all_db_infos, f)
--- a/docker-hub/FlashOCC/Flashocc/tools/data_converter/indoor_converter.py
+++ b/docker-hub/FlashOCC/Flashocc/tools/data_converter/indoor_converter.py
+# Copyright (c) OpenMMLab. All rights reserved.
+import os
+
+import mmcv
+import numpy as np
+
+from tools.data_converter.s3dis_data_utils import S3DISData, S3DISSegData
+from tools.data_converter.scannet_data_utils import ScanNetData, ScanNetSegData
+from tools.data_converter.sunrgbd_data_utils import SUNRGBDData
+
+
+def create_indoor_info_file(data_path,
+                            pkl_prefix='sunrgbd',
+                            save_path=None,
+                            workers=4,
+                            **kwargs):
+    """Create indoor information file.
+
+    Get information of the raw data and save it to the pkl file.
+
+    Args:
+        data_path (str): Path of the data.
+        pkl_prefix (str, optional): Prefix of the pkl to be saved.
+            Default: 'sunrgbd'.
+        save_path (str, optional): Path of the pkl to be saved. Default: None.
+        workers (int, optional): Number of threads to be used. Default: 4.
+        kwargs (dict): Additional parameters for dataset-specific Data class.
+            May include `use_v1` for SUN RGB-D and `num_points`.
+    """
+    assert os.path.exists(data_path)
+    assert pkl_prefix in ['sunrgbd', 'scannet', 's3dis'], \
+        f'unsupported indoor dataset {pkl_prefix}'
+    save_path = data_path if save_path is None else save_path
+    assert os.path.exists(save_path)
+
+    # generate infos for both detection and segmentation task
+    if pkl_prefix in ['sunrgbd', 'scannet']:
+        train_filename = os.path.join(save_path,
+                                      f'{pkl_prefix}_infos_train.pkl')
+        val_filename = os.path.join(save_path, f'{pkl_prefix}_infos_val.pkl')
+        if pkl_prefix == 'sunrgbd':
+            # SUN RGB-D has a train-val split
+            num_points = kwargs.get('num_points', -1)
+            use_v1 = kwargs.get('use_v1', False)
+            train_dataset = SUNRGBDData(
+                root_path=data_path,
+                split='train',
+                use_v1=use_v1,
+                num_points=num_points)
+            val_dataset = SUNRGBDData(
+                root_path=data_path,
+                split='val',
+                use_v1=use_v1,
+                num_points=num_points)
+        else:
+            # ScanNet has a train-val-test split
+            train_dataset = ScanNetData(root_path=data_path, split='train')
+            val_dataset = ScanNetData(root_path=data_path, split='val')
+            test_dataset = ScanNetData(root_path=data_path, split='test')
+            test_filename = os.path.join(save_path,
+                                         f'{pkl_prefix}_infos_test.pkl')
+
+        infos_train = train_dataset.get_infos(
+            num_workers=workers, has_label=True)
+        mmcv.dump(infos_train, train_filename, 'pkl')
+        print(f'{pkl_prefix} info train file is saved to {train_filename}')
+
+        infos_val = val_dataset.get_infos(num_workers=workers, has_label=True)
+        mmcv.dump(infos_val, val_filename, 'pkl')
+        print(f'{pkl_prefix} info val file is saved to {val_filename}')
+
+    if pkl_prefix == 'scannet':
+        infos_test = test_dataset.get_infos(
+            num_workers=workers, has_label=False)
+        mmcv.dump(infos_test, test_filename, 'pkl')
+        print(f'{pkl_prefix} info test file is saved to {test_filename}')
+
+    # generate infos for the semantic segmentation task
+    # e.g. re-sampled scene indexes and label weights
+    # scene indexes are used to re-sample rooms with different number of points
+    # label weights are used to balance classes with different number of points
+    if pkl_prefix == 'scannet':
+        # label weight computation function is adopted from
+        # https://github.com/charlesq34/pointnet2/blob/master/scannet/scannet_dataset.py#L24
+        num_points = kwargs.get('num_points', 8192)
+        train_dataset = ScanNetSegData(
+            data_root=data_path,
+            ann_file=train_filename,
+            split='train',
+            num_points=num_points,
+            label_weight_func=lambda x: 1.0 / np.log(1.2 + x))
+        # TODO: do we need to generate on val set?
+        val_dataset = ScanNetSegData(
+            data_root=data_path,
+            ann_file=val_filename,
+            split='val',
+            num_points=num_points,
+            label_weight_func=lambda x: 1.0 / np.log(1.2 + x))
+        # no need to generate for test set
+        train_dataset.get_seg_infos()
+        val_dataset.get_seg_infos()
+    elif pkl_prefix == 's3dis':
+        # S3DIS doesn't have a fixed train-val split
+        # it has 6 areas instead, so we generate info file for each of them
+        # in training, we will use dataset to wrap different areas
+        splits = [f'Area_{i}' for i in [1, 2, 3, 4, 5, 6]]
+        for split in splits:
+            dataset = S3DISData(root_path=data_path, split=split)
+            info = dataset.get_infos(num_workers=workers, has_label=True)
+            filename = os.path.join(save_path,
+                                    f'{pkl_prefix}_infos_{split}.pkl')
+            mmcv.dump(info, filename, 'pkl')
+            print(f'{pkl_prefix} info {split} file is saved to {filename}')
+            num_points = kwargs.get('num_points', 4096)
+            seg_dataset = S3DISSegData(
+                data_root=data_path,
+                ann_file=filename,
+                split=split,
+                num_points=num_points,
+                label_weight_func=lambda x: 1.0 / np.log(1.2 + x))
+            seg_dataset.get_seg_infos()
--- a/docker-hub/FlashOCC/Flashocc/tools/data_converter/kitti_converter.py
+++ b/docker-hub/FlashOCC/Flashocc/tools/data_converter/kitti_converter.py
+# Copyright (c) OpenMMLab. All rights reserved.
+from collections import OrderedDict
+from pathlib import Path
+
+import mmcv
+import numpy as np
+from nuscenes.utils.geometry_utils import view_points
+
+from mmdet3d.core.bbox import box_np_ops, points_cam2img
+from .kitti_data_utils import WaymoInfoGatherer, get_kitti_image_info
+from .nuscenes_converter import post_process_coords
+
+kitti_categories = ('Pedestrian', 'Cyclist', 'Car')
+
+
+def convert_to_kitti_info_version2(info):
+    """convert kitti info v1 to v2 if possible.
+
+    Args:
+        info (dict): Info of the input kitti data.
+            - image (dict): image info
+            - calib (dict): calibration info
+            - point_cloud (dict): point cloud info
+    """
+    if 'image' not in info or 'calib' not in info or 'point_cloud' not in info:
+        info['image'] = {
+            'image_shape': info['img_shape'],
+            'image_idx': info['image_idx'],
+            'image_path': info['img_path'],
+        }
+        info['calib'] = {
+            'R0_rect': info['calib/R0_rect'],
+            'Tr_velo_to_cam': info['calib/Tr_velo_to_cam'],
+            'P2': info['calib/P2'],
+        }
+        info['point_cloud'] = {
+            'velodyne_path': info['velodyne_path'],
+        }
+
+
+def _read_imageset_file(path):
+    with open(path, 'r') as f:
+        lines = f.readlines()
+    return [int(line) for line in lines]
+
+
+class _NumPointsInGTCalculater:
+    """Calculate the number of points inside the ground truth box. This is the
+    parallel version. For the serialized version, please refer to
+    `_calculate_num_points_in_gt`.
+
+    Args:
+        data_path (str): Path of the data.
+        relative_path (bool): Whether to use relative path.
+        remove_outside (bool, optional): Whether to remove points which are
+            outside of image. Default: True.
+        num_features (int, optional): Number of features per point.
+            Default: False.
+        num_worker (int, optional): the number of parallel workers to use.
+            Default: 8.
+    """
+
+    def __init__(self,
+                 data_path,
+                 relative_path,
+                 remove_outside=True,
+                 num_features=4,
+                 num_worker=8) -> None:
+        self.data_path = data_path
+        self.relative_path = relative_path
+        self.remove_outside = remove_outside
+        self.num_features = num_features
+        self.num_worker = num_worker
+
+    def calculate_single(self, info):
+        pc_info = info['point_cloud']
+        image_info = info['image']
+        calib = info['calib']
+        if self.relative_path:
+            v_path = str(Path(self.data_path) / pc_info['velodyne_path'])
+        else:
+            v_path = pc_info['velodyne_path']
+        points_v = np.fromfile(
+            v_path, dtype=np.float32,
+            count=-1).reshape([-1, self.num_features])
+        rect = calib['R0_rect']
+        Trv2c = calib['Tr_velo_to_cam']
+        P2 = calib['P2']
+        if self.remove_outside:
+            points_v = box_np_ops.remove_outside_points(
+                points_v, rect, Trv2c, P2, image_info['image_shape'])
+        annos = info['annos']
+        num_obj = len([n for n in annos['name'] if n != 'DontCare'])
+        dims = annos['dimensions'][:num_obj]
+        loc = annos['location'][:num_obj]
+        rots = annos['rotation_y'][:num_obj]
+        gt_boxes_camera = np.concatenate([loc, dims, rots[..., np.newaxis]],
+                                         axis=1)
+        gt_boxes_lidar = box_np_ops.box_camera_to_lidar(
+            gt_boxes_camera, rect, Trv2c)
+        indices = box_np_ops.points_in_rbbox(points_v[:, :3], gt_boxes_lidar)
+        num_points_in_gt = indices.sum(0)
+        num_ignored = len(annos['dimensions']) - num_obj
+        num_points_in_gt = np.concatenate(
+            [num_points_in_gt, -np.ones([num_ignored])])
+        annos['num_points_in_gt'] = num_points_in_gt.astype(np.int32)
+        return info
+
+    def calculate(self, infos):
+        ret_infos = mmcv.track_parallel_progress(self.calculate_single, infos,
+                                                 self.num_worker)
+        for i, ret_info in enumerate(ret_infos):
+            infos[i] = ret_info
+
+
+def _calculate_num_points_in_gt(data_path,
+                                infos,
+                                relative_path,
+                                remove_outside=True,
+                                num_features=4):
+    for info in mmcv.track_iter_progress(infos):
+        pc_info = info['point_cloud']
+        image_info = info['image']
+        calib = info['calib']
+        if relative_path:
+            v_path = str(Path(data_path) / pc_info['velodyne_path'])
+        else:
+            v_path = pc_info['velodyne_path']
+        points_v = np.fromfile(
+            v_path, dtype=np.float32, count=-1).reshape([-1, num_features])
+        rect = calib['R0_rect']
+        Trv2c = calib['Tr_velo_to_cam']
+        P2 = calib['P2']
+        if remove_outside:
+            points_v = box_np_ops.remove_outside_points(
+                points_v, rect, Trv2c, P2, image_info['image_shape'])
+
+        # points_v = points_v[points_v[:, 0] > 0]
+        annos = info['annos']
+        num_obj = len([n for n in annos['name'] if n != 'DontCare'])
+        # annos = kitti.filter_kitti_anno(annos, ['DontCare'])
+        dims = annos['dimensions'][:num_obj]
+        loc = annos['location'][:num_obj]
+        rots = annos['rotation_y'][:num_obj]
+        gt_boxes_camera = np.concatenate([loc, dims, rots[..., np.newaxis]],
+                                         axis=1)
+        gt_boxes_lidar = box_np_ops.box_camera_to_lidar(
+            gt_boxes_camera, rect, Trv2c)
+        indices = box_np_ops.points_in_rbbox(points_v[:, :3], gt_boxes_lidar)
+        num_points_in_gt = indices.sum(0)
+        num_ignored = len(annos['dimensions']) - num_obj
+        num_points_in_gt = np.concatenate(
+            [num_points_in_gt, -np.ones([num_ignored])])
+        annos['num_points_in_gt'] = num_points_in_gt.astype(np.int32)
+
+
+def create_kitti_info_file(data_path,
+                           pkl_prefix='kitti',
+                           with_plane=False,
+                           save_path=None,
+                           relative_path=True):
+    """Create info file of KITTI dataset.
+
+    Given the raw data, generate its related info file in pkl format.
+
+    Args:
+        data_path (str): Path of the data root.
+        pkl_prefix (str, optional): Prefix of the info file to be generated.
+            Default: 'kitti'.
+        with_plane (bool, optional): Whether to use plane information.
+            Default: False.
+        save_path (str, optional): Path to save the info file.
+            Default: None.
+        relative_path (bool, optional): Whether to use relative path.
+            Default: True.
+    """
+    imageset_folder = Path(data_path) / 'ImageSets'
+    train_img_ids = _read_imageset_file(str(imageset_folder / 'train.txt'))
+    val_img_ids = _read_imageset_file(str(imageset_folder / 'val.txt'))
+    test_img_ids = _read_imageset_file(str(imageset_folder / 'test.txt'))
+
+    print('Generate info. this may take several minutes.')
+    if save_path is None:
+        save_path = Path(data_path)
+    else:
+        save_path = Path(save_path)
+    kitti_infos_train = get_kitti_image_info(
+        data_path,
+        training=True,
+        velodyne=True,
+        calib=True,
+        with_plane=with_plane,
+        image_ids=train_img_ids,
+        relative_path=relative_path)
+    _calculate_num_points_in_gt(data_path, kitti_infos_train, relative_path)
+    filename = save_path / f'{pkl_prefix}_infos_train.pkl'
+    print(f'Kitti info train file is saved to {filename}')
+    mmcv.dump(kitti_infos_train, filename)
+    kitti_infos_val = get_kitti_image_info(
+        data_path,
+        training=True,
+        velodyne=True,
+        calib=True,
+        with_plane=with_plane,
+        image_ids=val_img_ids,
+        relative_path=relative_path)
+    _calculate_num_points_in_gt(data_path, kitti_infos_val, relative_path)
+    filename = save_path / f'{pkl_prefix}_infos_val.pkl'
+    print(f'Kitti info val file is saved to {filename}')
+    mmcv.dump(kitti_infos_val, filename)
+    filename = save_path / f'{pkl_prefix}_infos_trainval.pkl'
+    print(f'Kitti info trainval file is saved to {filename}')
+    mmcv.dump(kitti_infos_train + kitti_infos_val, filename)
+
+    kitti_infos_test = get_kitti_image_info(
+        data_path,
+        training=False,
+        label_info=False,
+        velodyne=True,
+        calib=True,
+        with_plane=False,
+        image_ids=test_img_ids,
+        relative_path=relative_path)
+    filename = save_path / f'{pkl_prefix}_infos_test.pkl'
+    print(f'Kitti info test file is saved to {filename}')
+    mmcv.dump(kitti_infos_test, filename)
+
+
+def create_waymo_info_file(data_path,
+                           pkl_prefix='waymo',
+                           save_path=None,
+                           relative_path=True,
+                           max_sweeps=5,
+                           workers=8):
+    """Create info file of waymo dataset.
+
+    Given the raw data, generate its related info file in pkl format.
+
+    Args:
+        data_path (str): Path of the data root.
+        pkl_prefix (str, optional): Prefix of the info file to be generated.
+            Default: 'waymo'.
+        save_path (str, optional): Path to save the info file.
+            Default: None.
+        relative_path (bool, optional): Whether to use relative path.
+            Default: True.
+        max_sweeps (int, optional): Max sweeps before the detection frame
+            to be used. Default: 5.
+    """
+    imageset_folder = Path(data_path) / 'ImageSets'
+    train_img_ids = _read_imageset_file(str(imageset_folder / 'train.txt'))
+    val_img_ids = _read_imageset_file(str(imageset_folder / 'val.txt'))
+    test_img_ids = _read_imageset_file(str(imageset_folder / 'test.txt'))
+
+    print('Generate info. this may take several minutes.')
+    if save_path is None:
+        save_path = Path(data_path)
+    else:
+        save_path = Path(save_path)
+    waymo_infos_gatherer_trainval = WaymoInfoGatherer(
+        data_path,
+        training=True,
+        velodyne=True,
+        calib=True,
+        pose=True,
+        relative_path=relative_path,
+        max_sweeps=max_sweeps,
+        num_worker=workers)
+    waymo_infos_gatherer_test = WaymoInfoGatherer(
+        data_path,
+        training=False,
+        label_info=False,
+        velodyne=True,
+        calib=True,
+        pose=True,
+        relative_path=relative_path,
+        max_sweeps=max_sweeps,
+        num_worker=workers)
+    num_points_in_gt_calculater = _NumPointsInGTCalculater(
+        data_path,
+        relative_path,
+        num_features=6,
+        remove_outside=False,
+        num_worker=workers)
+
+    waymo_infos_train = waymo_infos_gatherer_trainval.gather(train_img_ids)
+    num_points_in_gt_calculater.calculate(waymo_infos_train)
+    filename = save_path / f'{pkl_prefix}_infos_train.pkl'
+    print(f'Waymo info train file is saved to {filename}')
+    mmcv.dump(waymo_infos_train, filename)
+    waymo_infos_val = waymo_infos_gatherer_trainval.gather(val_img_ids)
+    num_points_in_gt_calculater.calculate(waymo_infos_val)
+    filename = save_path / f'{pkl_prefix}_infos_val.pkl'
+    print(f'Waymo info val file is saved to {filename}')
+    mmcv.dump(waymo_infos_val, filename)
+    filename = save_path / f'{pkl_prefix}_infos_trainval.pkl'
+    print(f'Waymo info trainval file is saved to {filename}')
+    mmcv.dump(waymo_infos_train + waymo_infos_val, filename)
+    waymo_infos_test = waymo_infos_gatherer_test.gather(test_img_ids)
+    filename = save_path / f'{pkl_prefix}_infos_test.pkl'
+    print(f'Waymo info test file is saved to {filename}')
+    mmcv.dump(waymo_infos_test, filename)
+
+
+def _create_reduced_point_cloud(data_path,
+                                info_path,
+                                save_path=None,
+                                back=False,
+                                num_features=4,
+                                front_camera_id=2):
+    """Create reduced point clouds for given info.
+
+    Args:
+        data_path (str): Path of original data.
+        info_path (str): Path of data info.
+        save_path (str, optional): Path to save reduced point cloud
+            data. Default: None.
+        back (bool, optional): Whether to flip the points to back.
+            Default: False.
+        num_features (int, optional): Number of point features. Default: 4.
+        front_camera_id (int, optional): The referenced/front camera ID.
+            Default: 2.
+    """
+    kitti_infos = mmcv.load(info_path)
+
+    for info in mmcv.track_iter_progress(kitti_infos):
+        pc_info = info['point_cloud']
+        image_info = info['image']
+        calib = info['calib']
+
+        v_path = pc_info['velodyne_path']
+        v_path = Path(data_path) / v_path
+        points_v = np.fromfile(
+            str(v_path), dtype=np.float32,
+            count=-1).reshape([-1, num_features])
+        rect = calib['R0_rect']
+        if front_camera_id == 2:
+            P2 = calib['P2']
+        else:
+            P2 = calib[f'P{str(front_camera_id)}']
+        Trv2c = calib['Tr_velo_to_cam']
+        # first remove z < 0 points
+        # keep = points_v[:, -1] > 0
+        # points_v = points_v[keep]
+        # then remove outside.
+        if back:
+            points_v[:, 0] = -points_v[:, 0]
+        points_v = box_np_ops.remove_outside_points(points_v, rect, Trv2c, P2,
+                                                    image_info['image_shape'])
+        if save_path is None:
+            save_dir = v_path.parent.parent / (v_path.parent.stem + '_reduced')
+            if not save_dir.exists():
+                save_dir.mkdir()
+            save_filename = save_dir / v_path.name
+            # save_filename = str(v_path) + '_reduced'
+            if back:
+                save_filename += '_back'
+        else:
+            save_filename = str(Path(save_path) / v_path.name)
+            if back:
+                save_filename += '_back'
+        with open(save_filename, 'w') as f:
+            points_v.tofile(f)
+
+
+def create_reduced_point_cloud(data_path,
+                               pkl_prefix,
+                               train_info_path=None,
+                               val_info_path=None,
+                               test_info_path=None,
+                               save_path=None,
+                               with_back=False):
+    """Create reduced point clouds for training/validation/testing.
+
+    Args:
+        data_path (str): Path of original data.
+        pkl_prefix (str): Prefix of info files.
+        train_info_path (str, optional): Path of training set info.
+            Default: None.
+        val_info_path (str, optional): Path of validation set info.
+            Default: None.
+        test_info_path (str, optional): Path of test set info.
+            Default: None.
+        save_path (str, optional): Path to save reduced point cloud data.
+            Default: None.
+        with_back (bool, optional): Whether to flip the points to back.
+            Default: False.
+    """
+    if train_info_path is None:
+        train_info_path = Path(data_path) / f'{pkl_prefix}_infos_train.pkl'
+    if val_info_path is None:
+        val_info_path = Path(data_path) / f'{pkl_prefix}_infos_val.pkl'
+    if test_info_path is None:
+        test_info_path = Path(data_path) / f'{pkl_prefix}_infos_test.pkl'
+
+    print('create reduced point cloud for training set')
+    _create_reduced_point_cloud(data_path, train_info_path, save_path)
+    print('create reduced point cloud for validation set')
+    _create_reduced_point_cloud(data_path, val_info_path, save_path)
+    print('create reduced point cloud for testing set')
+    _create_reduced_point_cloud(data_path, test_info_path, save_path)
+    if with_back:
+        _create_reduced_point_cloud(
+            data_path, train_info_path, save_path, back=True)
+        _create_reduced_point_cloud(
+            data_path, val_info_path, save_path, back=True)
+        _create_reduced_point_cloud(
+            data_path, test_info_path, save_path, back=True)
+
+
+def export_2d_annotation(root_path, info_path, mono3d=True):
+    """Export 2d annotation from the info file and raw data.
+
+    Args:
+        root_path (str): Root path of the raw data.
+        info_path (str): Path of the info file.
+        mono3d (bool, optional): Whether to export mono3d annotation.
+            Default: True.
+    """
+    # get bbox annotations for camera
+    kitti_infos = mmcv.load(info_path)
+    cat2Ids = [
+        dict(id=kitti_categories.index(cat_name), name=cat_name)
+        for cat_name in kitti_categories
+    ]
+    coco_ann_id = 0
+    coco_2d_dict = dict(annotations=[], images=[], categories=cat2Ids)
+    from os import path as osp
+    for info in mmcv.track_iter_progress(kitti_infos):
+        coco_infos = get_2d_boxes(info, occluded=[0, 1, 2, 3], mono3d=mono3d)
+        (height, width,
+         _) = mmcv.imread(osp.join(root_path,
+                                   info['image']['image_path'])).shape
+        coco_2d_dict['images'].append(
+            dict(
+                file_name=info['image']['image_path'],
+                id=info['image']['image_idx'],
+                Tri2v=info['calib']['Tr_imu_to_velo'],
+                Trv2c=info['calib']['Tr_velo_to_cam'],
+                rect=info['calib']['R0_rect'],
+                cam_intrinsic=info['calib']['P2'],
+                width=width,
+                height=height))
+        for coco_info in coco_infos:
+            if coco_info is None:
+                continue
+            # add an empty key for coco format
+            coco_info['segmentation'] = []
+            coco_info['id'] = coco_ann_id
+            coco_2d_dict['annotations'].append(coco_info)
+            coco_ann_id += 1
+    if mono3d:
+        json_prefix = f'{info_path[:-4]}_mono3d'
+    else:
+        json_prefix = f'{info_path[:-4]}'
+    mmcv.dump(coco_2d_dict, f'{json_prefix}.coco.json')
+
+
+def get_2d_boxes(info, occluded, mono3d=True):
+    """Get the 2D annotation records for a given info.
+
+    Args:
+        info: Information of the given sample data.
+        occluded: Integer (0, 1, 2, 3) indicating occlusion state:
+            0 = fully visible, 1 = partly occluded, 2 = largely occluded,
+            3 = unknown, -1 = DontCare
+        mono3d (bool): Whether to get boxes with mono3d annotation.
+
+    Return:
+        list[dict]: List of 2D annotation record that belongs to the input
+            `sample_data_token`.
+    """
+    # Get calibration information
+    P2 = info['calib']['P2']
+
+    repro_recs = []
+    # if no annotations in info (test dataset), then return
+    if 'annos' not in info:
+        return repro_recs
+
+    # Get all the annotation with the specified visibilties.
+    ann_dicts = info['annos']
+    mask = [(ocld in occluded) for ocld in ann_dicts['occluded']]
+    for k in ann_dicts.keys():
+        ann_dicts[k] = ann_dicts[k][mask]
+
+    # convert dict of list to list of dict
+    ann_recs = []
+    for i in range(len(ann_dicts['occluded'])):
+        ann_rec = {}
+        for k in ann_dicts.keys():
+            ann_rec[k] = ann_dicts[k][i]
+        ann_recs.append(ann_rec)
+
+    for ann_idx, ann_rec in enumerate(ann_recs):
+        # Augment sample_annotation with token information.
+        ann_rec['sample_annotation_token'] = \
+            f"{info['image']['image_idx']}.{ann_idx}"
+        ann_rec['sample_data_token'] = info['image']['image_idx']
+        sample_data_token = info['image']['image_idx']
+
+        loc = ann_rec['location'][np.newaxis, :]
+        dim = ann_rec['dimensions'][np.newaxis, :]
+        rot = ann_rec['rotation_y'][np.newaxis, np.newaxis]
+        # transform the center from [0.5, 1.0, 0.5] to [0.5, 0.5, 0.5]
+        dst = np.array([0.5, 0.5, 0.5])
+        src = np.array([0.5, 1.0, 0.5])
+        loc = loc + dim * (dst - src)
+        offset = (info['calib']['P2'][0, 3] - info['calib']['P0'][0, 3]) \
+            / info['calib']['P2'][0, 0]
+        loc_3d = np.copy(loc)
+        loc_3d[0, 0] += offset
+        gt_bbox_3d = np.concatenate([loc, dim, rot], axis=1).astype(np.float32)
+
+        # Filter out the corners that are not in front of the calibrated
+        # sensor.
+        corners_3d = box_np_ops.center_to_corner_box3d(
+            gt_bbox_3d[:, :3],
+            gt_bbox_3d[:, 3:6],
+            gt_bbox_3d[:, 6], [0.5, 0.5, 0.5],
+            axis=1)
+        corners_3d = corners_3d[0].T  # (1, 8, 3) -> (3, 8)
+        in_front = np.argwhere(corners_3d[2, :] > 0).flatten()
+        corners_3d = corners_3d[:, in_front]
+
+        # Project 3d box to 2d.
+        camera_intrinsic = P2
+        corner_coords = view_points(corners_3d, camera_intrinsic,
+                                    True).T[:, :2].tolist()
+
+        # Keep only corners that fall within the image.
+        final_coords = post_process_coords(corner_coords)
+
+        # Skip if the convex hull of the re-projected corners
+        # does not intersect the image canvas.
+        if final_coords is None:
+            continue
+        else:
+            min_x, min_y, max_x, max_y = final_coords
+
+        # Generate dictionary record to be included in the .json file.
+        repro_rec = generate_record(ann_rec, min_x, min_y, max_x, max_y,
+                                    sample_data_token,
+                                    info['image']['image_path'])
+
+        # If mono3d=True, add 3D annotations in camera coordinates
+        if mono3d and (repro_rec is not None):
+            repro_rec['bbox_cam3d'] = np.concatenate(
+                [loc_3d, dim, rot],
+                axis=1).astype(np.float32).squeeze().tolist()
+            repro_rec['velo_cam3d'] = -1  # no velocity in KITTI
+
+            center3d = np.array(loc).reshape([1, 3])
+            center2d = points_cam2img(
+                center3d, camera_intrinsic, with_depth=True)
+            repro_rec['center2d'] = center2d.squeeze().tolist()
+            # normalized center2D + depth
+            # samples with depth < 0 will be removed
+            if repro_rec['center2d'][2] <= 0:
+                continue
+
+            repro_rec['attribute_name'] = -1  # no attribute in KITTI
+            repro_rec['attribute_id'] = -1
+
+        repro_recs.append(repro_rec)
+
+    return repro_recs
+
+
+def generate_record(ann_rec, x1, y1, x2, y2, sample_data_token, filename):
+    """Generate one 2D annotation record given various information on top of
+    the 2D bounding box coordinates.
+
+    Args:
+        ann_rec (dict): Original 3d annotation record.
+        x1 (float): Minimum value of the x coordinate.
+        y1 (float): Minimum value of the y coordinate.
+        x2 (float): Maximum value of the x coordinate.
+        y2 (float): Maximum value of the y coordinate.
+        sample_data_token (str): Sample data token.
+        filename (str):The corresponding image file where the annotation
+            is present.
+
+    Returns:
+        dict: A sample 2D annotation record.
+            - file_name (str): file name
+            - image_id (str): sample data token
+            - area (float): 2d box area
+            - category_name (str): category name
+            - category_id (int): category id
+            - bbox (list[float]): left x, top y, x_size, y_size of 2d box
+            - iscrowd (int): whether the area is crowd
+    """
+    repro_rec = OrderedDict()
+    repro_rec['sample_data_token'] = sample_data_token
+    coco_rec = dict()
+
+    key_mapping = {
+        'name': 'category_name',
+        'num_points_in_gt': 'num_lidar_pts',
+        'sample_annotation_token': 'sample_annotation_token',
+        'sample_data_token': 'sample_data_token',
+    }
+
+    for key, value in ann_rec.items():
+        if key in key_mapping.keys():
+            repro_rec[key_mapping[key]] = value
+
+    repro_rec['bbox_corners'] = [x1, y1, x2, y2]
+    repro_rec['filename'] = filename
+
+    coco_rec['file_name'] = filename
+    coco_rec['image_id'] = sample_data_token
+    coco_rec['area'] = (y2 - y1) * (x2 - x1)
+
+    if repro_rec['category_name'] not in kitti_categories:
+        return None
+    cat_name = repro_rec['category_name']
+    coco_rec['category_name'] = cat_name
+    coco_rec['category_id'] = kitti_categories.index(cat_name)
+    coco_rec['bbox'] = [x1, y1, x2 - x1, y2 - y1]
+    coco_rec['iscrowd'] = 0
+
+    return coco_rec
--- a/docker-hub/FlashOCC/Flashocc/tools/data_converter/kitti_data_utils.py
+++ b/docker-hub/FlashOCC/Flashocc/tools/data_converter/kitti_data_utils.py
+# Copyright (c) OpenMMLab. All rights reserved.
+from collections import OrderedDict
+from concurrent import futures as futures
+from os import path as osp
+from pathlib import Path
+
+import mmcv
+import numpy as np
+from PIL import Image
+from skimage import io
+
+
+def get_image_index_str(img_idx, use_prefix_id=False):
+    if use_prefix_id:
+        return '{:07d}'.format(img_idx)
+    else:
+        return '{:06d}'.format(img_idx)
+
+
+def get_kitti_info_path(idx,
+                        prefix,
+                        info_type='image_2',
+                        file_tail='.png',
+                        training=True,
+                        relative_path=True,
+                        exist_check=True,
+                        use_prefix_id=False):
+    img_idx_str = get_image_index_str(idx, use_prefix_id)
+    img_idx_str += file_tail
+    prefix = Path(prefix)
+    if training:
+        file_path = Path('training') / info_type / img_idx_str
+    else:
+        file_path = Path('testing') / info_type / img_idx_str
+    if exist_check and not (prefix / file_path).exists():
+        raise ValueError('file not exist: {}'.format(file_path))
+    if relative_path:
+        return str(file_path)
+    else:
+        return str(prefix / file_path)
+
+
+def get_image_path(idx,
+                   prefix,
+                   training=True,
+                   relative_path=True,
+                   exist_check=True,
+                   info_type='image_2',
+                   use_prefix_id=False):
+    return get_kitti_info_path(idx, prefix, info_type, '.png', training,
+                               relative_path, exist_check, use_prefix_id)
+
+
+def get_label_path(idx,
+                   prefix,
+                   training=True,
+                   relative_path=True,
+                   exist_check=True,
+                   info_type='label_2',
+                   use_prefix_id=False):
+    return get_kitti_info_path(idx, prefix, info_type, '.txt', training,
+                               relative_path, exist_check, use_prefix_id)
+
+
+def get_plane_path(idx,
+                   prefix,
+                   training=True,
+                   relative_path=True,
+                   exist_check=True,
+                   info_type='planes',
+                   use_prefix_id=False):
+    return get_kitti_info_path(idx, prefix, info_type, '.txt', training,
+                               relative_path, exist_check, use_prefix_id)
+
+
+def get_velodyne_path(idx,
+                      prefix,
+                      training=True,
+                      relative_path=True,
+                      exist_check=True,
+                      use_prefix_id=False):
+    return get_kitti_info_path(idx, prefix, 'velodyne', '.bin', training,
+                               relative_path, exist_check, use_prefix_id)
+
+
+def get_calib_path(idx,
+                   prefix,
+                   training=True,
+                   relative_path=True,
+                   exist_check=True,
+                   use_prefix_id=False):
+    return get_kitti_info_path(idx, prefix, 'calib', '.txt', training,
+                               relative_path, exist_check, use_prefix_id)
+
+
+def get_pose_path(idx,
+                  prefix,
+                  training=True,
+                  relative_path=True,
+                  exist_check=True,
+                  use_prefix_id=False):
+    return get_kitti_info_path(idx, prefix, 'pose', '.txt', training,
+                               relative_path, exist_check, use_prefix_id)
+
+
+def get_timestamp_path(idx,
+                       prefix,
+                       training=True,
+                       relative_path=True,
+                       exist_check=True,
+                       use_prefix_id=False):
+    return get_kitti_info_path(idx, prefix, 'timestamp', '.txt', training,
+                               relative_path, exist_check, use_prefix_id)
+
+
+def get_label_anno(label_path):
+    annotations = {}
+    annotations.update({
+        'name': [],
+        'truncated': [],
+        'occluded': [],
+        'alpha': [],
+        'bbox': [],
+        'dimensions': [],
+        'location': [],
+        'rotation_y': []
+    })
+    with open(label_path, 'r') as f:
+        lines = f.readlines()
+    # if len(lines) == 0 or len(lines[0]) < 15:
+    #     content = []
+    # else:
+    content = [line.strip().split(' ') for line in lines]
+    num_objects = len([x[0] for x in content if x[0] != 'DontCare'])
+    annotations['name'] = np.array([x[0] for x in content])
+    num_gt = len(annotations['name'])
+    annotations['truncated'] = np.array([float(x[1]) for x in content])
+    annotations['occluded'] = np.array([int(x[2]) for x in content])
+    annotations['alpha'] = np.array([float(x[3]) for x in content])
+    annotations['bbox'] = np.array([[float(info) for info in x[4:8]]
+                                    for x in content]).reshape(-1, 4)
+    # dimensions will convert hwl format to standard lhw(camera) format.
+    annotations['dimensions'] = np.array([[float(info) for info in x[8:11]]
+                                          for x in content
+                                          ]).reshape(-1, 3)[:, [2, 0, 1]]
+    annotations['location'] = np.array([[float(info) for info in x[11:14]]
+                                        for x in content]).reshape(-1, 3)
+    annotations['rotation_y'] = np.array([float(x[14])
+                                          for x in content]).reshape(-1)
+    if len(content) != 0 and len(content[0]) == 16:  # have score
+        annotations['score'] = np.array([float(x[15]) for x in content])
+    else:
+        annotations['score'] = np.zeros((annotations['bbox'].shape[0], ))
+    index = list(range(num_objects)) + [-1] * (num_gt - num_objects)
+    annotations['index'] = np.array(index, dtype=np.int32)
+    annotations['group_ids'] = np.arange(num_gt, dtype=np.int32)
+    return annotations
+
+
+def _extend_matrix(mat):
+    mat = np.concatenate([mat, np.array([[0., 0., 0., 1.]])], axis=0)
+    return mat
+
+
+def get_kitti_image_info(path,
+                         training=True,
+                         label_info=True,
+                         velodyne=False,
+                         calib=False,
+                         with_plane=False,
+                         image_ids=7481,
+                         extend_matrix=True,
+                         num_worker=8,
+                         relative_path=True,
+                         with_imageshape=True):
+    """
+    KITTI annotation format version 2:
+    {
+        [optional]points: [N, 3+] point cloud
+        [optional, for kitti]image: {
+            image_idx: ...
+            image_path: ...
+            image_shape: ...
+        }
+        point_cloud: {
+            num_features: 4
+            velodyne_path: ...
+        }
+        [optional, for kitti]calib: {
+            R0_rect: ...
+            Tr_velo_to_cam: ...
+            P2: ...
+        }
+        annos: {
+            location: [num_gt, 3] array
+            dimensions: [num_gt, 3] array
+            rotation_y: [num_gt] angle array
+            name: [num_gt] ground truth name array
+            [optional]difficulty: kitti difficulty
+            [optional]group_ids: used for multi-part object
+        }
+    }
+    """
+    root_path = Path(path)
+    if not isinstance(image_ids, list):
+        image_ids = list(range(image_ids))
+
+    def map_func(idx):
+        info = {}
+        pc_info = {'num_features': 4}
+        calib_info = {}
+
+        image_info = {'image_idx': idx}
+        annotations = None
+        if velodyne:
+            pc_info['velodyne_path'] = get_velodyne_path(
+                idx, path, training, relative_path)
+        image_info['image_path'] = get_image_path(idx, path, training,
+                                                  relative_path)
+        if with_imageshape:
+            img_path = image_info['image_path']
+            if relative_path:
+                img_path = str(root_path / img_path)
+            image_info['image_shape'] = np.array(
+                io.imread(img_path).shape[:2], dtype=np.int32)
+        if label_info:
+            label_path = get_label_path(idx, path, training, relative_path)
+            if relative_path:
+                label_path = str(root_path / label_path)
+            annotations = get_label_anno(label_path)
+        info['image'] = image_info
+        info['point_cloud'] = pc_info
+        if calib:
+            calib_path = get_calib_path(
+                idx, path, training, relative_path=False)
+            with open(calib_path, 'r') as f:
+                lines = f.readlines()
+            P0 = np.array([float(info) for info in lines[0].split(' ')[1:13]
+                           ]).reshape([3, 4])
+            P1 = np.array([float(info) for info in lines[1].split(' ')[1:13]
+                           ]).reshape([3, 4])
+            P2 = np.array([float(info) for info in lines[2].split(' ')[1:13]
+                           ]).reshape([3, 4])
+            P3 = np.array([float(info) for info in lines[3].split(' ')[1:13]
+                           ]).reshape([3, 4])
+            if extend_matrix:
+                P0 = _extend_matrix(P0)
+                P1 = _extend_matrix(P1)
+                P2 = _extend_matrix(P2)
+                P3 = _extend_matrix(P3)
+            R0_rect = np.array([
+                float(info) for info in lines[4].split(' ')[1:10]
+            ]).reshape([3, 3])
+            if extend_matrix:
+                rect_4x4 = np.zeros([4, 4], dtype=R0_rect.dtype)
+                rect_4x4[3, 3] = 1.
+                rect_4x4[:3, :3] = R0_rect
+            else:
+                rect_4x4 = R0_rect
+
+            Tr_velo_to_cam = np.array([
+                float(info) for info in lines[5].split(' ')[1:13]
+            ]).reshape([3, 4])
+            Tr_imu_to_velo = np.array([
+                float(info) for info in lines[6].split(' ')[1:13]
+            ]).reshape([3, 4])
+            if extend_matrix:
+                Tr_velo_to_cam = _extend_matrix(Tr_velo_to_cam)
+                Tr_imu_to_velo = _extend_matrix(Tr_imu_to_velo)
+            calib_info['P0'] = P0
+            calib_info['P1'] = P1
+            calib_info['P2'] = P2
+            calib_info['P3'] = P3
+            calib_info['R0_rect'] = rect_4x4
+            calib_info['Tr_velo_to_cam'] = Tr_velo_to_cam
+            calib_info['Tr_imu_to_velo'] = Tr_imu_to_velo
+            info['calib'] = calib_info
+
+        if with_plane:
+            plane_path = get_plane_path(idx, path, training, relative_path)
+            if relative_path:
+                plane_path = str(root_path / plane_path)
+            lines = mmcv.list_from_file(plane_path)
+            info['plane'] = np.array([float(i) for i in lines[3].split()])
+
+        if annotations is not None:
+            info['annos'] = annotations
+            add_difficulty_to_annos(info)
+        return info
+
+    with futures.ThreadPoolExecutor(num_worker) as executor:
+        image_infos = executor.map(map_func, image_ids)
+
+    return list(image_infos)
+
+
+class WaymoInfoGatherer:
+    """
+    Parallel version of waymo dataset information gathering.
+    Waymo annotation format version like KITTI:
+    {
+        [optional]points: [N, 3+] point cloud
+        [optional, for kitti]image: {
+            image_idx: ...
+            image_path: ...
+            image_shape: ...
+        }
+        point_cloud: {
+            num_features: 6
+            velodyne_path: ...
+        }
+        [optional, for kitti]calib: {
+            R0_rect: ...
+            Tr_velo_to_cam0: ...
+            P0: ...
+        }
+        annos: {
+            location: [num_gt, 3] array
+            dimensions: [num_gt, 3] array
+            rotation_y: [num_gt] angle array
+            name: [num_gt] ground truth name array
+            [optional]difficulty: kitti difficulty
+            [optional]group_ids: used for multi-part object
+        }
+    }
+    """
+
+    def __init__(self,
+                 path,
+                 training=True,
+                 label_info=True,
+                 velodyne=False,
+                 calib=False,
+                 pose=False,
+                 extend_matrix=True,
+                 num_worker=8,
+                 relative_path=True,
+                 with_imageshape=True,
+                 max_sweeps=5) -> None:
+        self.path = path
+        self.training = training
+        self.label_info = label_info
+        self.velodyne = velodyne
+        self.calib = calib
+        self.pose = pose
+        self.extend_matrix = extend_matrix
+        self.num_worker = num_worker
+        self.relative_path = relative_path
+        self.with_imageshape = with_imageshape
+        self.max_sweeps = max_sweeps
+
+    def gather_single(self, idx):
+        root_path = Path(self.path)
+        info = {}
+        pc_info = {'num_features': 6}
+        calib_info = {}
+
+        image_info = {'image_idx': idx}
+        annotations = None
+        if self.velodyne:
+            pc_info['velodyne_path'] = get_velodyne_path(
+                idx,
+                self.path,
+                self.training,
+                self.relative_path,
+                use_prefix_id=True)
+            with open(
+                    get_timestamp_path(
+                        idx,
+                        self.path,
+                        self.training,
+                        relative_path=False,
+                        use_prefix_id=True)) as f:
+                info['timestamp'] = np.int64(f.read())
+        image_info['image_path'] = get_image_path(
+            idx,
+            self.path,
+            self.training,
+            self.relative_path,
+            info_type='image_0',
+            use_prefix_id=True)
+        if self.with_imageshape:
+            img_path = image_info['image_path']
+            if self.relative_path:
+                img_path = str(root_path / img_path)
+            # io using PIL is significantly faster than skimage
+            w, h = Image.open(img_path).size
+            image_info['image_shape'] = np.array((h, w), dtype=np.int32)
+        if self.label_info:
+            label_path = get_label_path(
+                idx,
+                self.path,
+                self.training,
+                self.relative_path,
+                info_type='label_all',
+                use_prefix_id=True)
+            if self.relative_path:
+                label_path = str(root_path / label_path)
+            annotations = get_label_anno(label_path)
+        info['image'] = image_info
+        info['point_cloud'] = pc_info
+        if self.calib:
+            calib_path = get_calib_path(
+                idx,
+                self.path,
+                self.training,
+                relative_path=False,
+                use_prefix_id=True)
+            with open(calib_path, 'r') as f:
+                lines = f.readlines()
+            P0 = np.array([float(info) for info in lines[0].split(' ')[1:13]
+                           ]).reshape([3, 4])
+            P1 = np.array([float(info) for info in lines[1].split(' ')[1:13]
+                           ]).reshape([3, 4])
+            P2 = np.array([float(info) for info in lines[2].split(' ')[1:13]
+                           ]).reshape([3, 4])
+            P3 = np.array([float(info) for info in lines[3].split(' ')[1:13]
+                           ]).reshape([3, 4])
+            P4 = np.array([float(info) for info in lines[4].split(' ')[1:13]
+                           ]).reshape([3, 4])
+            if self.extend_matrix:
+                P0 = _extend_matrix(P0)
+                P1 = _extend_matrix(P1)
+                P2 = _extend_matrix(P2)
+                P3 = _extend_matrix(P3)
+                P4 = _extend_matrix(P4)
+            R0_rect = np.array([
+                float(info) for info in lines[5].split(' ')[1:10]
+            ]).reshape([3, 3])
+            if self.extend_matrix:
+                rect_4x4 = np.zeros([4, 4], dtype=R0_rect.dtype)
+                rect_4x4[3, 3] = 1.
+                rect_4x4[:3, :3] = R0_rect
+            else:
+                rect_4x4 = R0_rect
+
+            Tr_velo_to_cam = np.array([
+                float(info) for info in lines[6].split(' ')[1:13]
+            ]).reshape([3, 4])
+            if self.extend_matrix:
+                Tr_velo_to_cam = _extend_matrix(Tr_velo_to_cam)
+            calib_info['P0'] = P0
+            calib_info['P1'] = P1
+            calib_info['P2'] = P2
+            calib_info['P3'] = P3
+            calib_info['P4'] = P4
+            calib_info['R0_rect'] = rect_4x4
+            calib_info['Tr_velo_to_cam'] = Tr_velo_to_cam
+            info['calib'] = calib_info
+        if self.pose:
+            pose_path = get_pose_path(
+                idx,
+                self.path,
+                self.training,
+                relative_path=False,
+                use_prefix_id=True)
+            info['pose'] = np.loadtxt(pose_path)
+
+        if annotations is not None:
+            info['annos'] = annotations
+            info['annos']['camera_id'] = info['annos'].pop('score')
+            add_difficulty_to_annos(info)
+
+        sweeps = []
+        prev_idx = idx
+        while len(sweeps) < self.max_sweeps:
+            prev_info = {}
+            prev_idx -= 1
+            prev_info['velodyne_path'] = get_velodyne_path(
+                prev_idx,
+                self.path,
+                self.training,
+                self.relative_path,
+                exist_check=False,
+                use_prefix_id=True)
+            if_prev_exists = osp.exists(
+                Path(self.path) / prev_info['velodyne_path'])
+            if if_prev_exists:
+                with open(
+                        get_timestamp_path(
+                            prev_idx,
+                            self.path,
+                            self.training,
+                            relative_path=False,
+                            use_prefix_id=True)) as f:
+                    prev_info['timestamp'] = np.int64(f.read())
+                prev_pose_path = get_pose_path(
+                    prev_idx,
+                    self.path,
+                    self.training,
+                    relative_path=False,
+                    use_prefix_id=True)
+                prev_info['pose'] = np.loadtxt(prev_pose_path)
+                sweeps.append(prev_info)
+            else:
+                break
+        info['sweeps'] = sweeps
+
+        return info
+
+    def gather(self, image_ids):
+        if not isinstance(image_ids, list):
+            image_ids = list(range(image_ids))
+        image_infos = mmcv.track_parallel_progress(self.gather_single,
+                                                   image_ids, self.num_worker)
+        return list(image_infos)
+
+
+def kitti_anno_to_label_file(annos, folder):
+    folder = Path(folder)
+    for anno in annos:
+        image_idx = anno['metadata']['image_idx']
+        label_lines = []
+        for j in range(anno['bbox'].shape[0]):
+            label_dict = {
+                'name': anno['name'][j],
+                'alpha': anno['alpha'][j],
+                'bbox': anno['bbox'][j],
+                'location': anno['location'][j],
+                'dimensions': anno['dimensions'][j],
+                'rotation_y': anno['rotation_y'][j],
+                'score': anno['score'][j],
+            }
+            label_line = kitti_result_line(label_dict)
+            label_lines.append(label_line)
+        label_file = folder / f'{get_image_index_str(image_idx)}.txt'
+        label_str = '\n'.join(label_lines)
+        with open(label_file, 'w') as f:
+            f.write(label_str)
+
+
+def add_difficulty_to_annos(info):
+    min_height = [40, 25,
+                  25]  # minimum height for evaluated groundtruth/detections
+    max_occlusion = [
+        0, 1, 2
+    ]  # maximum occlusion level of the groundtruth used for evaluation
+    max_trunc = [
+        0.15, 0.3, 0.5
+    ]  # maximum truncation level of the groundtruth used for evaluation
+    annos = info['annos']
+    dims = annos['dimensions']  # lhw format
+    bbox = annos['bbox']
+    height = bbox[:, 3] - bbox[:, 1]
+    occlusion = annos['occluded']
+    truncation = annos['truncated']
+    diff = []
+    easy_mask = np.ones((len(dims), ), dtype=np.bool)
+    moderate_mask = np.ones((len(dims), ), dtype=np.bool)
+    hard_mask = np.ones((len(dims), ), dtype=np.bool)
+    i = 0
+    for h, o, t in zip(height, occlusion, truncation):
+        if o > max_occlusion[0] or h <= min_height[0] or t > max_trunc[0]:
+            easy_mask[i] = False
+        if o > max_occlusion[1] or h <= min_height[1] or t > max_trunc[1]:
+            moderate_mask[i] = False
+        if o > max_occlusion[2] or h <= min_height[2] or t > max_trunc[2]:
+            hard_mask[i] = False
+        i += 1
+    is_easy = easy_mask
+    is_moderate = np.logical_xor(easy_mask, moderate_mask)
+    is_hard = np.logical_xor(hard_mask, moderate_mask)
+
+    for i in range(len(dims)):
+        if is_easy[i]:
+            diff.append(0)
+        elif is_moderate[i]:
+            diff.append(1)
+        elif is_hard[i]:
+            diff.append(2)
+        else:
+            diff.append(-1)
+    annos['difficulty'] = np.array(diff, np.int32)
+    return diff
+
+
+def kitti_result_line(result_dict, precision=4):
+    prec_float = '{' + ':.{}f'.format(precision) + '}'
+    res_line = []
+    all_field_default = OrderedDict([
+        ('name', None),
+        ('truncated', -1),
+        ('occluded', -1),
+        ('alpha', -10),
+        ('bbox', None),
+        ('dimensions', [-1, -1, -1]),
+        ('location', [-1000, -1000, -1000]),
+        ('rotation_y', -10),
+        ('score', 0.0),
+    ])
+    res_dict = [(key, None) for key, val in all_field_default.items()]
+    res_dict = OrderedDict(res_dict)
+    for key, val in result_dict.items():
+        if all_field_default[key] is None and val is None:
+            raise ValueError('you must specify a value for {}'.format(key))
+        res_dict[key] = val
+
+    for key, val in res_dict.items():
+        if key == 'name':
+            res_line.append(val)
+        elif key in ['truncated', 'alpha', 'rotation_y', 'score']:
+            if val is None:
+                res_line.append(str(all_field_default[key]))
+            else:
+                res_line.append(prec_float.format(val))
+        elif key == 'occluded':
+            if val is None:
+                res_line.append(str(all_field_default[key]))
+            else:
+                res_line.append('{}'.format(val))
+        elif key in ['bbox', 'dimensions', 'location']:
+            if val is None:
+                res_line += [str(v) for v in all_field_default[key]]
+            else:
+                res_line += [prec_float.format(v) for v in val]
+        else:
+            raise ValueError('unknown key. supported key:{}'.format(
+                res_dict.keys()))
+    return ' '.join(res_line)
--- a/docker-hub/FlashOCC/Flashocc/tools/data_converter/lyft_converter.py
+++ b/docker-hub/FlashOCC/Flashocc/tools/data_converter/lyft_converter.py
+# Copyright (c) OpenMMLab. All rights reserved.
+import os
+from logging import warning
+from os import path as osp
+
+import mmcv
+import numpy as np
+from lyft_dataset_sdk.lyftdataset import LyftDataset as Lyft
+from pyquaternion import Quaternion
+
+from mmdet3d.datasets import LyftDataset
+from .nuscenes_converter import (get_2d_boxes, get_available_scenes,
+                                 obtain_sensor2top)
+
+lyft_categories = ('car', 'truck', 'bus', 'emergency_vehicle', 'other_vehicle',
+                   'motorcycle', 'bicycle', 'pedestrian', 'animal')
+
+
+def create_lyft_infos(root_path,
+                      info_prefix,
+                      version='v1.01-train',
+                      max_sweeps=10):
+    """Create info file of lyft dataset.
+
+    Given the raw data, generate its related info file in pkl format.
+
+    Args:
+        root_path (str): Path of the data root.
+        info_prefix (str): Prefix of the info file to be generated.
+        version (str, optional): Version of the data.
+            Default: 'v1.01-train'.
+        max_sweeps (int, optional): Max number of sweeps.
+            Default: 10.
+    """
+    lyft = Lyft(
+        data_path=osp.join(root_path, version),
+        json_path=osp.join(root_path, version, version),
+        verbose=True)
+    available_vers = ['v1.01-train', 'v1.01-test']
+    assert version in available_vers
+    if version == 'v1.01-train':
+        train_scenes = mmcv.list_from_file('data/lyft/train.txt')
+        val_scenes = mmcv.list_from_file('data/lyft/val.txt')
+    elif version == 'v1.01-test':
+        train_scenes = mmcv.list_from_file('data/lyft/test.txt')
+        val_scenes = []
+    else:
+        raise ValueError('unknown')
+
+    # filter existing scenes.
+    available_scenes = get_available_scenes(lyft)
+    available_scene_names = [s['name'] for s in available_scenes]
+    train_scenes = list(
+        filter(lambda x: x in available_scene_names, train_scenes))
+    val_scenes = list(filter(lambda x: x in available_scene_names, val_scenes))
+    train_scenes = set([
+        available_scenes[available_scene_names.index(s)]['token']
+        for s in train_scenes
+    ])
+    val_scenes = set([
+        available_scenes[available_scene_names.index(s)]['token']
+        for s in val_scenes
+    ])
+
+    test = 'test' in version
+    if test:
+        print(f'test scene: {len(train_scenes)}')
+    else:
+        print(f'train scene: {len(train_scenes)}, \
+                val scene: {len(val_scenes)}')
+    train_lyft_infos, val_lyft_infos = _fill_trainval_infos(
+        lyft, train_scenes, val_scenes, test, max_sweeps=max_sweeps)
+
+    metadata = dict(version=version)
+    if test:
+        print(f'test sample: {len(train_lyft_infos)}')
+        data = dict(infos=train_lyft_infos, metadata=metadata)
+        info_name = f'{info_prefix}_infos_test'
+        info_path = osp.join(root_path, f'{info_name}.pkl')
+        mmcv.dump(data, info_path)
+    else:
+        print(f'train sample: {len(train_lyft_infos)}, \
+                val sample: {len(val_lyft_infos)}')
+        data = dict(infos=train_lyft_infos, metadata=metadata)
+        train_info_name = f'{info_prefix}_infos_train'
+        info_path = osp.join(root_path, f'{train_info_name}.pkl')
+        mmcv.dump(data, info_path)
+        data['infos'] = val_lyft_infos
+        val_info_name = f'{info_prefix}_infos_val'
+        info_val_path = osp.join(root_path, f'{val_info_name}.pkl')
+        mmcv.dump(data, info_val_path)
+
+
+def _fill_trainval_infos(lyft,
+                         train_scenes,
+                         val_scenes,
+                         test=False,
+                         max_sweeps=10):
+    """Generate the train/val infos from the raw data.
+
+    Args:
+        lyft (:obj:`LyftDataset`): Dataset class in the Lyft dataset.
+        train_scenes (list[str]): Basic information of training scenes.
+        val_scenes (list[str]): Basic information of validation scenes.
+        test (bool, optional): Whether use the test mode. In the test mode, no
+            annotations can be accessed. Default: False.
+        max_sweeps (int, optional): Max number of sweeps. Default: 10.
+
+    Returns:
+        tuple[list[dict]]: Information of training set and
+            validation set that will be saved to the info file.
+    """
+    train_lyft_infos = []
+    val_lyft_infos = []
+
+    for sample in mmcv.track_iter_progress(lyft.sample):
+        lidar_token = sample['data']['LIDAR_TOP']
+        sd_rec = lyft.get('sample_data', sample['data']['LIDAR_TOP'])
+        cs_record = lyft.get('calibrated_sensor',
+                             sd_rec['calibrated_sensor_token'])
+        pose_record = lyft.get('ego_pose', sd_rec['ego_pose_token'])
+        abs_lidar_path, boxes, _ = lyft.get_sample_data(lidar_token)
+        # nuScenes devkit returns more convenient relative paths while
+        # lyft devkit returns absolute paths
+        abs_lidar_path = str(abs_lidar_path)  # absolute path
+        lidar_path = abs_lidar_path.split(f'{os.getcwd()}/')[-1]
+        # relative path
+
+        mmcv.check_file_exist(lidar_path)
+
+        info = {
+            'lidar_path': lidar_path,
+            'token': sample['token'],
+            'sweeps': [],
+            'cams': dict(),
+            'lidar2ego_translation': cs_record['translation'],
+            'lidar2ego_rotation': cs_record['rotation'],
+            'ego2global_translation': pose_record['translation'],
+            'ego2global_rotation': pose_record['rotation'],
+            'timestamp': sample['timestamp'],
+        }
+
+        l2e_r = info['lidar2ego_rotation']
+        l2e_t = info['lidar2ego_translation']
+        e2g_r = info['ego2global_rotation']
+        e2g_t = info['ego2global_translation']
+        l2e_r_mat = Quaternion(l2e_r).rotation_matrix
+        e2g_r_mat = Quaternion(e2g_r).rotation_matrix
+
+        # obtain 6 image's information per frame
+        camera_types = [
+            'CAM_FRONT',
+            'CAM_FRONT_RIGHT',
+            'CAM_FRONT_LEFT',
+            'CAM_BACK',
+            'CAM_BACK_LEFT',
+            'CAM_BACK_RIGHT',
+        ]
+        for cam in camera_types:
+            cam_token = sample['data'][cam]
+            cam_path, _, cam_intrinsic = lyft.get_sample_data(cam_token)
+            cam_info = obtain_sensor2top(lyft, cam_token, l2e_t, l2e_r_mat,
+                                         e2g_t, e2g_r_mat, cam)
+            cam_info.update(cam_intrinsic=cam_intrinsic)
+            info['cams'].update({cam: cam_info})
+
+        # obtain sweeps for a single key-frame
+        sd_rec = lyft.get('sample_data', sample['data']['LIDAR_TOP'])
+        sweeps = []
+        while len(sweeps) < max_sweeps:
+            if not sd_rec['prev'] == '':
+                sweep = obtain_sensor2top(lyft, sd_rec['prev'], l2e_t,
+                                          l2e_r_mat, e2g_t, e2g_r_mat, 'lidar')
+                sweeps.append(sweep)
+                sd_rec = lyft.get('sample_data', sd_rec['prev'])
+            else:
+                break
+        info['sweeps'] = sweeps
+        # obtain annotation
+        if not test:
+            annotations = [
+                lyft.get('sample_annotation', token)
+                for token in sample['anns']
+            ]
+            locs = np.array([b.center for b in boxes]).reshape(-1, 3)
+            dims = np.array([b.wlh for b in boxes]).reshape(-1, 3)
+            rots = np.array([b.orientation.yaw_pitch_roll[0]
+                             for b in boxes]).reshape(-1, 1)
+
+            names = [b.name for b in boxes]
+            for i in range(len(names)):
+                if names[i] in LyftDataset.NameMapping:
+                    names[i] = LyftDataset.NameMapping[names[i]]
+            names = np.array(names)
+
+            # we need to convert box size to
+            # the format of our lidar coordinate system
+            # which is x_size, y_size, z_size (corresponding to l, w, h)
+            gt_boxes = np.concatenate([locs, dims[:, [1, 0, 2]], rots], axis=1)
+            assert len(gt_boxes) == len(
+                annotations), f'{len(gt_boxes)}, {len(annotations)}'
+            info['gt_boxes'] = gt_boxes
+            info['gt_names'] = names
+            info['num_lidar_pts'] = np.array(
+                [a['num_lidar_pts'] for a in annotations])
+            info['num_radar_pts'] = np.array(
+                [a['num_radar_pts'] for a in annotations])
+
+        if sample['scene_token'] in train_scenes:
+            train_lyft_infos.append(info)
+        else:
+            val_lyft_infos.append(info)
+
+    return train_lyft_infos, val_lyft_infos
+
+
+def export_2d_annotation(root_path, info_path, version):
+    """Export 2d annotation from the info file and raw data.
+
+    Args:
+        root_path (str): Root path of the raw data.
+        info_path (str): Path of the info file.
+        version (str): Dataset version.
+    """
+    warning.warn('DeprecationWarning: 2D annotations are not used on the '
+                 'Lyft dataset. The function export_2d_annotation will be '
+                 'deprecated.')
+    # get bbox annotations for camera
+    camera_types = [
+        'CAM_FRONT',
+        'CAM_FRONT_RIGHT',
+        'CAM_FRONT_LEFT',
+        'CAM_BACK',
+        'CAM_BACK_LEFT',
+        'CAM_BACK_RIGHT',
+    ]
+    lyft_infos = mmcv.load(info_path)['infos']
+    lyft = Lyft(
+        data_path=osp.join(root_path, version),
+        json_path=osp.join(root_path, version, version),
+        verbose=True)
+    # info_2d_list = []
+    cat2Ids = [
+        dict(id=lyft_categories.index(cat_name), name=cat_name)
+        for cat_name in lyft_categories
+    ]
+    coco_ann_id = 0
+    coco_2d_dict = dict(annotations=[], images=[], categories=cat2Ids)
+    for info in mmcv.track_iter_progress(lyft_infos):
+        for cam in camera_types:
+            cam_info = info['cams'][cam]
+            coco_infos = get_2d_boxes(
+                lyft,
+                cam_info['sample_data_token'],
+                visibilities=['', '1', '2', '3', '4'])
+            (height, width, _) = mmcv.imread(cam_info['data_path']).shape
+            coco_2d_dict['images'].append(
+                dict(
+                    file_name=cam_info['data_path'],
+                    id=cam_info['sample_data_token'],
+                    width=width,
+                    height=height))
+            for coco_info in coco_infos:
+                if coco_info is None:
+                    continue
+                # add an empty key for coco format
+                coco_info['segmentation'] = []
+                coco_info['id'] = coco_ann_id
+                coco_2d_dict['annotations'].append(coco_info)
+                coco_ann_id += 1
+    mmcv.dump(coco_2d_dict, f'{info_path[:-4]}.coco.json')
--- a/docker-hub/FlashOCC/Flashocc/tools/data_converter/lyft_data_fixer.py
+++ b/docker-hub/FlashOCC/Flashocc/tools/data_converter/lyft_data_fixer.py
+# Copyright (c) OpenMMLab. All rights reserved.
+import argparse
+import os
+
+import numpy as np
+
+
+def fix_lyft(root_folder='./data/lyft', version='v1.01'):
+    # refer to https://www.kaggle.com/c/3d-object-detection-for-autonomous-vehicles/discussion/110000  # noqa
+    lidar_path = 'lidar/host-a011_lidar1_1233090652702363606.bin'
+    root_folder = os.path.join(root_folder, f'{version}-train')
+    lidar_path = os.path.join(root_folder, lidar_path)
+    assert os.path.isfile(lidar_path), f'Please download the complete Lyft ' \
+        f'dataset and make sure {lidar_path} is present.'
+    points = np.fromfile(lidar_path, dtype=np.float32, count=-1)
+    try:
+        points.reshape([-1, 5])
+        print(f'This fix is not required for version {version}.')
+    except ValueError:
+        new_points = np.array(list(points) + [100.0, 1.0], dtype='float32')
+        new_points.tofile(lidar_path)
+        print(f'Appended 100.0 and 1.0 to the end of {lidar_path}.')
+
+
+parser = argparse.ArgumentParser(description='Lyft dataset fixer arg parser')
+parser.add_argument(
+    '--root-folder',
+    type=str,
+    default='./data/lyft',
+    help='specify the root path of Lyft dataset')
+parser.add_argument(
+    '--version',
+    type=str,
+    default='v1.01',
+    help='specify Lyft dataset version')
+args = parser.parse_args()
+
+if __name__ == '__main__':
+    fix_lyft(root_folder=args.root_folder, version=args.version)
--- a/docker-hub/FlashOCC/Flashocc/tools/data_converter/nuimage_converter.py
+++ b/docker-hub/FlashOCC/Flashocc/tools/data_converter/nuimage_converter.py
+# Copyright (c) OpenMMLab. All rights reserved.
+import argparse
+import base64
+from os import path as osp
+
+import mmcv
+import numpy as np
+from nuimages import NuImages
+from nuimages.utils.utils import mask_decode, name_to_index_mapping
+
+nus_categories = ('car', 'truck', 'trailer', 'bus', 'construction_vehicle',
+                  'bicycle', 'motorcycle', 'pedestrian', 'traffic_cone',
+                  'barrier')
+
+NAME_MAPPING = {
+    'movable_object.barrier': 'barrier',
+    'vehicle.bicycle': 'bicycle',
+    'vehicle.bus.bendy': 'bus',
+    'vehicle.bus.rigid': 'bus',
+    'vehicle.car': 'car',
+    'vehicle.construction': 'construction_vehicle',
+    'vehicle.motorcycle': 'motorcycle',
+    'human.pedestrian.adult': 'pedestrian',
+    'human.pedestrian.child': 'pedestrian',
+    'human.pedestrian.construction_worker': 'pedestrian',
+    'human.pedestrian.police_officer': 'pedestrian',
+    'movable_object.trafficcone': 'traffic_cone',
+    'vehicle.trailer': 'trailer',
+    'vehicle.truck': 'truck',
+}
+
+
+def parse_args():
+    parser = argparse.ArgumentParser(description='Data converter arg parser')
+    parser.add_argument(
+        '--data-root',
+        type=str,
+        default='./data/nuimages',
+        help='specify the root path of dataset')
+    parser.add_argument(
+        '--version',
+        type=str,
+        nargs='+',
+        default=['v1.0-mini'],
+        required=False,
+        help='specify the dataset version')
+    parser.add_argument(
+        '--out-dir',
+        type=str,
+        default='./data/nuimages/annotations/',
+        required=False,
+        help='path to save the exported json')
+    parser.add_argument(
+        '--nproc',
+        type=int,
+        default=4,
+        required=False,
+        help='workers to process semantic masks')
+    parser.add_argument('--extra-tag', type=str, default='nuimages')
+    args = parser.parse_args()
+    return args
+
+
+def get_img_annos(nuim, img_info, cat2id, out_dir, data_root, seg_root):
+    """Get semantic segmentation map for an image.
+
+    Args:
+        nuim (obj:`NuImages`): NuImages dataset object
+        img_info (dict): Meta information of img
+
+    Returns:
+        np.ndarray: Semantic segmentation map of the image
+    """
+    sd_token = img_info['token']
+    image_id = img_info['id']
+    name_to_index = name_to_index_mapping(nuim.category)
+
+    # Get image data.
+    width, height = img_info['width'], img_info['height']
+    semseg_mask = np.zeros((height, width)).astype('uint8')
+
+    # Load stuff / surface regions.
+    surface_anns = [
+        o for o in nuim.surface_ann if o['sample_data_token'] == sd_token
+    ]
+
+    # Draw stuff / surface regions.
+    for ann in surface_anns:
+        # Get color and mask.
+        category_token = ann['category_token']
+        category_name = nuim.get('category', category_token)['name']
+        if ann['mask'] is None:
+            continue
+        mask = mask_decode(ann['mask'])
+
+        # Draw mask for semantic segmentation.
+        semseg_mask[mask == 1] = name_to_index[category_name]
+
+    # Load object instances.
+    object_anns = [
+        o for o in nuim.object_ann if o['sample_data_token'] == sd_token
+    ]
+
+    # Sort by token to ensure that objects always appear in the
+    # instance mask in the same order.
+    object_anns = sorted(object_anns, key=lambda k: k['token'])
+
+    # Draw object instances.
+    # The 0 index is reserved for background; thus, the instances
+    # should start from index 1.
+    annotations = []
+    for i, ann in enumerate(object_anns, start=1):
+        # Get color, box, mask and name.
+        category_token = ann['category_token']
+        category_name = nuim.get('category', category_token)['name']
+        if ann['mask'] is None:
+            continue
+        mask = mask_decode(ann['mask'])
+
+        # Draw masks for semantic segmentation and instance segmentation.
+        semseg_mask[mask == 1] = name_to_index[category_name]
+
+        if category_name in NAME_MAPPING:
+            cat_name = NAME_MAPPING[category_name]
+            cat_id = cat2id[cat_name]
+
+            x_min, y_min, x_max, y_max = ann['bbox']
+            # encode calibrated instance mask
+            mask_anno = dict()
+            mask_anno['counts'] = base64.b64decode(
+                ann['mask']['counts']).decode()
+            mask_anno['size'] = ann['mask']['size']
+
+            data_anno = dict(
+                image_id=image_id,
+                category_id=cat_id,
+                bbox=[x_min, y_min, x_max - x_min, y_max - y_min],
+                area=(x_max - x_min) * (y_max - y_min),
+                segmentation=mask_anno,
+                iscrowd=0)
+            annotations.append(data_anno)
+
+    # after process, save semantic masks
+    img_filename = img_info['file_name']
+    seg_filename = img_filename.replace('jpg', 'png')
+    seg_filename = osp.join(seg_root, seg_filename)
+    mmcv.imwrite(semseg_mask, seg_filename)
+    return annotations, np.max(semseg_mask)
+
+
+def export_nuim_to_coco(nuim, data_root, out_dir, extra_tag, version, nproc):
+    print('Process category information')
+    categories = []
+    categories = [
+        dict(id=nus_categories.index(cat_name), name=cat_name)
+        for cat_name in nus_categories
+    ]
+    cat2id = {k_v['name']: k_v['id'] for k_v in categories}
+
+    images = []
+    print('Process image meta information...')
+    for sample_info in mmcv.track_iter_progress(nuim.sample_data):
+        if sample_info['is_key_frame']:
+            img_idx = len(images)
+            images.append(
+                dict(
+                    id=img_idx,
+                    token=sample_info['token'],
+                    file_name=sample_info['filename'],
+                    width=sample_info['width'],
+                    height=sample_info['height']))
+
+    seg_root = f'{out_dir}semantic_masks'
+    mmcv.mkdir_or_exist(seg_root)
+    mmcv.mkdir_or_exist(osp.join(data_root, 'calibrated'))
+
+    global process_img_anno
+
+    def process_img_anno(img_info):
+        single_img_annos, max_cls_id = get_img_annos(nuim, img_info, cat2id,
+                                                     out_dir, data_root,
+                                                     seg_root)
+        return single_img_annos, max_cls_id
+
+    print('Process img annotations...')
+    if nproc > 1:
+        outputs = mmcv.track_parallel_progress(
+            process_img_anno, images, nproc=nproc)
+    else:
+        outputs = []
+        for img_info in mmcv.track_iter_progress(images):
+            outputs.append(process_img_anno(img_info))
+
+    # Determine the index of object annotation
+    print('Process annotation information...')
+    annotations = []
+    max_cls_ids = []
+    for single_img_annos, max_cls_id in outputs:
+        max_cls_ids.append(max_cls_id)
+        for img_anno in single_img_annos:
+            img_anno.update(id=len(annotations))
+            annotations.append(img_anno)
+
+    max_cls_id = max(max_cls_ids)
+    print(f'Max ID of class in the semantic map: {max_cls_id}')
+
+    coco_format_json = dict(
+        images=images, annotations=annotations, categories=categories)
+
+    mmcv.mkdir_or_exist(out_dir)
+    out_file = osp.join(out_dir, f'{extra_tag}_{version}.json')
+    print(f'Annotation dumped to {out_file}')
+    mmcv.dump(coco_format_json, out_file)
+
+
+def main():
+    args = parse_args()
+    for version in args.version:
+        nuim = NuImages(
+            dataroot=args.data_root, version=version, verbose=True, lazy=True)
+        export_nuim_to_coco(nuim, args.data_root, args.out_dir, args.extra_tag,
+                            version, args.nproc)
+
+
+if __name__ == '__main__':
+    main()
--- a/docker-hub/FlashOCC/Flashocc/tools/data_converter/nuscenes_converter.py
+++ b/docker-hub/FlashOCC/Flashocc/tools/data_converter/nuscenes_converter.py
+# Copyright (c) OpenMMLab. All rights reserved.
+import os
+from collections import OrderedDict
+from os import path as osp
+from typing import List, Tuple, Union
+
+import mmcv
+import numpy as np
+from nuscenes.nuscenes import NuScenes
+from nuscenes.utils.geometry_utils import view_points
+from pyquaternion import Quaternion
+from shapely.geometry import MultiPoint, box
+
+from mmdet3d.core.bbox import points_cam2img
+from mmdet3d.datasets import NuScenesDataset
+
+nus_categories = ('car', 'truck', 'trailer', 'bus', 'construction_vehicle',
+                  'bicycle', 'motorcycle', 'pedestrian', 'traffic_cone',
+                  'barrier')
+
+nus_attributes = ('cycle.with_rider', 'cycle.without_rider',
+                  'pedestrian.moving', 'pedestrian.standing',
+                  'pedestrian.sitting_lying_down', 'vehicle.moving',
+                  'vehicle.parked', 'vehicle.stopped', 'None')
+
+
+def create_nuscenes_infos(root_path,
+                          info_prefix,
+                          version='v1.0-trainval',
+                          max_sweeps=10):
+    """Create info file of nuscene dataset.
+
+    Given the raw data, generate its related info file in pkl format.
+
+    Args:
+        root_path (str): Path of the data root.
+        info_prefix (str): Prefix of the info file to be generated.
+        version (str, optional): Version of the data.
+            Default: 'v1.0-trainval'.
+        max_sweeps (int, optional): Max number of sweeps.
+            Default: 10.
+    """
+    from nuscenes.nuscenes import NuScenes
+    nusc = NuScenes(version=version, dataroot=root_path, verbose=True)
+    from nuscenes.utils import splits
+    available_vers = ['v1.0-trainval', 'v1.0-test', 'v1.0-mini']
+    assert version in available_vers
+    if version == 'v1.0-trainval':
+        train_scenes = splits.train
+        val_scenes = splits.val
+    elif version == 'v1.0-test':
+        train_scenes = splits.test
+        val_scenes = []
+    elif version == 'v1.0-mini':
+        train_scenes = splits.mini_train
+        val_scenes = splits.mini_val
+    else:
+        raise ValueError('unknown')
+
+    # filter existing scenes.
+    available_scenes = get_available_scenes(nusc)
+    available_scene_names = [s['name'] for s in available_scenes]
+    train_scenes = list(
+        filter(lambda x: x in available_scene_names, train_scenes))
+    val_scenes = list(filter(lambda x: x in available_scene_names, val_scenes))
+    train_scenes = set([
+        available_scenes[available_scene_names.index(s)]['token']
+        for s in train_scenes
+    ])
+    val_scenes = set([
+        available_scenes[available_scene_names.index(s)]['token']
+        for s in val_scenes
+    ])
+
+    test = 'test' in version
+    if test:
+        print('test scene: {}'.format(len(train_scenes)))
+    else:
+        print('train scene: {}, val scene: {}'.format(
+            len(train_scenes), len(val_scenes)))
+    train_nusc_infos, val_nusc_infos = _fill_trainval_infos(
+        nusc, train_scenes, val_scenes, test, max_sweeps=max_sweeps)
+
+    metadata = dict(version=version)
+    if test:
+        print('test sample: {}'.format(len(train_nusc_infos)))
+        data = dict(infos=train_nusc_infos, metadata=metadata)
+        info_path = osp.join(root_path,
+                             '{}_infos_test.pkl'.format(info_prefix))
+        mmcv.dump(data, info_path)
+    else:
+        print('train sample: {}, val sample: {}'.format(
+            len(train_nusc_infos), len(val_nusc_infos)))
+        data = dict(infos=train_nusc_infos, metadata=metadata)
+        info_path = osp.join(root_path,
+                             '{}_infos_train.pkl'.format(info_prefix))
+        mmcv.dump(data, info_path)
+        data['infos'] = val_nusc_infos
+        info_val_path = osp.join(root_path,
+                                 '{}_infos_val.pkl'.format(info_prefix))
+        mmcv.dump(data, info_val_path)
+
+
+def get_available_scenes(nusc):
+    """Get available scenes from the input nuscenes class.
+
+    Given the raw data, get the information of available scenes for
+    further info generation.
+
+    Args:
+        nusc (class): Dataset class in the nuScenes dataset.
+
+    Returns:
+        available_scenes (list[dict]): List of basic information for the
+            available scenes.
+    """
+    available_scenes = []
+    print('total scene num: {}'.format(len(nusc.scene)))
+    for scene in nusc.scene:
+        scene_token = scene['token']
+        scene_rec = nusc.get('scene', scene_token)
+        sample_rec = nusc.get('sample', scene_rec['first_sample_token'])
+        sd_rec = nusc.get('sample_data', sample_rec['data']['LIDAR_TOP'])
+        has_more_frames = True
+        scene_not_exist = False
+        while has_more_frames:
+            lidar_path, boxes, _ = nusc.get_sample_data(sd_rec['token'])
+            lidar_path = str(lidar_path)
+            if os.getcwd() in lidar_path:
+                # path from lyftdataset is absolute path
+                lidar_path = lidar_path.split(f'{os.getcwd()}/')[-1]
+                # relative path
+            if not mmcv.is_filepath(lidar_path):
+                scene_not_exist = True
+                break
+            else:
+                break
+        if scene_not_exist:
+            continue
+        available_scenes.append(scene)
+    print('exist scene num: {}'.format(len(available_scenes)))
+    return available_scenes
+
+
+def _fill_trainval_infos(nusc,
+                         train_scenes,
+                         val_scenes,
+                         test=False,
+                         max_sweeps=10):
+    """Generate the train/val infos from the raw data.
+
+    Args:
+        nusc (:obj:`NuScenes`): Dataset class in the nuScenes dataset.
+        train_scenes (list[str]): Basic information of training scenes.
+        val_scenes (list[str]): Basic information of validation scenes.
+        test (bool, optional): Whether use the test mode. In test mode, no
+            annotations can be accessed. Default: False.
+        max_sweeps (int, optional): Max number of sweeps. Default: 10.
+
+    Returns:
+        tuple[list[dict]]: Information of training set and validation set
+            that will be saved to the info file.
+    """
+    train_nusc_infos = []
+    val_nusc_infos = []
+
+    for sample in mmcv.track_iter_progress(nusc.sample):
+        lidar_token = sample['data']['LIDAR_TOP']
+        sd_rec = nusc.get('sample_data', sample['data']['LIDAR_TOP'])
+        cs_record = nusc.get('calibrated_sensor',
+                             sd_rec['calibrated_sensor_token'])
+        pose_record = nusc.get('ego_pose', sd_rec['ego_pose_token'])
+        lidar_path, boxes, _ = nusc.get_sample_data(lidar_token)
+
+        mmcv.check_file_exist(lidar_path)
+
+        info = {
+            'lidar_path': lidar_path,
+            'token': sample['token'],
+            'sweeps': [],
+            'cams': dict(),
+            'lidar2ego_translation': cs_record['translation'],
+            'lidar2ego_rotation': cs_record['rotation'],
+            'ego2global_translation': pose_record['translation'],
+            'ego2global_rotation': pose_record['rotation'],
+            'timestamp': sample['timestamp'],
+        }
+
+        l2e_r = info['lidar2ego_rotation']
+        l2e_t = info['lidar2ego_translation']
+        e2g_r = info['ego2global_rotation']
+        e2g_t = info['ego2global_translation']
+        l2e_r_mat = Quaternion(l2e_r).rotation_matrix
+        e2g_r_mat = Quaternion(e2g_r).rotation_matrix
+
+        # obtain 6 image's information per frame
+        camera_types = [
+            'CAM_FRONT',
+            'CAM_FRONT_RIGHT',
+            'CAM_FRONT_LEFT',
+            'CAM_BACK',
+            'CAM_BACK_LEFT',
+            'CAM_BACK_RIGHT',
+        ]
+        for cam in camera_types:
+            cam_token = sample['data'][cam]
+            cam_path, _, cam_intrinsic = nusc.get_sample_data(cam_token)
+            cam_info = obtain_sensor2top(nusc, cam_token, l2e_t, l2e_r_mat,
+                                         e2g_t, e2g_r_mat, cam)
+            cam_info.update(cam_intrinsic=cam_intrinsic)
+            info['cams'].update({cam: cam_info})
+
+        # obtain sweeps for a single key-frame
+        sd_rec = nusc.get('sample_data', sample['data']['LIDAR_TOP'])
+        sweeps = []
+        while len(sweeps) < max_sweeps:
+            if not sd_rec['prev'] == '':
+                sweep = obtain_sensor2top(nusc, sd_rec['prev'], l2e_t,
+                                          l2e_r_mat, e2g_t, e2g_r_mat, 'lidar')
+                sweeps.append(sweep)
+                sd_rec = nusc.get('sample_data', sd_rec['prev'])
+            else:
+                break
+        info['sweeps'] = sweeps
+        # obtain annotation
+        if not test:
+            annotations = [
+                nusc.get('sample_annotation', token)
+                for token in sample['anns']
+            ]
+            locs = np.array([b.center for b in boxes]).reshape(-1, 3)
+            dims = np.array([b.wlh for b in boxes]).reshape(-1, 3)
+            rots = np.array([b.orientation.yaw_pitch_roll[0]
+                             for b in boxes]).reshape(-1, 1)
+            velocity = np.array(
+                [nusc.box_velocity(token)[:2] for token in sample['anns']])
+            valid_flag = np.array(
+                [(anno['num_lidar_pts'] + anno['num_radar_pts']) > 0
+                 for anno in annotations],
+                dtype=bool).reshape(-1)
+            # convert velo from global to lidar
+            for i in range(len(boxes)):
+                velo = np.array([*velocity[i], 0.0])
+                velo = velo @ np.linalg.inv(e2g_r_mat).T @ np.linalg.inv(
+                    l2e_r_mat).T
+                velocity[i] = velo[:2]
+
+            names = [b.name for b in boxes]
+            for i in range(len(names)):
+                if names[i] in NuScenesDataset.NameMapping:
+                    names[i] = NuScenesDataset.NameMapping[names[i]]
+            names = np.array(names)
+            # we need to convert box size to
+            # the format of our lidar coordinate system
+            # which is x_size, y_size, z_size (corresponding to l, w, h)
+            gt_boxes = np.concatenate([locs, dims[:, [1, 0, 2]], rots], axis=1)
+            assert len(gt_boxes) == len(
+                annotations), f'{len(gt_boxes)}, {len(annotations)}'
+            info['gt_boxes'] = gt_boxes
+            info['gt_names'] = names
+            info['gt_velocity'] = velocity.reshape(-1, 2)
+            info['num_lidar_pts'] = np.array(
+                [a['num_lidar_pts'] for a in annotations])
+            info['num_radar_pts'] = np.array(
+                [a['num_radar_pts'] for a in annotations])
+            info['valid_flag'] = valid_flag
+
+        if sample['scene_token'] in train_scenes:
+            train_nusc_infos.append(info)
+        else:
+            val_nusc_infos.append(info)
+
+    return train_nusc_infos, val_nusc_infos
+
+
+def obtain_sensor2top(nusc,
+                      sensor_token,
+                      l2e_t,
+                      l2e_r_mat,
+                      e2g_t,
+                      e2g_r_mat,
+                      sensor_type='lidar'):
+    """Obtain the info with RT matric from general sensor to Top LiDAR.
+
+    Args:
+        nusc (class): Dataset class in the nuScenes dataset.
+        sensor_token (str): Sample data token corresponding to the
+            specific sensor type.
+        l2e_t (np.ndarray): Translation from lidar to ego in shape (1, 3).
+        l2e_r_mat (np.ndarray): Rotation matrix from lidar to ego
+            in shape (3, 3).
+        e2g_t (np.ndarray): Translation from ego to global in shape (1, 3).
+        e2g_r_mat (np.ndarray): Rotation matrix from ego to global
+            in shape (3, 3).
+        sensor_type (str, optional): Sensor to calibrate. Default: 'lidar'.
+
+    Returns:
+        sweep (dict): Sweep information after transformation.
+    """
+    sd_rec = nusc.get('sample_data', sensor_token)
+    cs_record = nusc.get('calibrated_sensor',
+                         sd_rec['calibrated_sensor_token'])
+    pose_record = nusc.get('ego_pose', sd_rec['ego_pose_token'])
+    data_path = str(nusc.get_sample_data_path(sd_rec['token']))
+    if os.getcwd() in data_path:  # path from lyftdataset is absolute path
+        data_path = data_path.split(f'{os.getcwd()}/')[-1]  # relative path
+    sweep = {
+        'data_path': data_path,
+        'type': sensor_type,
+        'sample_data_token': sd_rec['token'],
+        'sensor2ego_translation': cs_record['translation'],
+        'sensor2ego_rotation': cs_record['rotation'],
+        'ego2global_translation': pose_record['translation'],
+        'ego2global_rotation': pose_record['rotation'],
+        'timestamp': sd_rec['timestamp']
+    }
+    l2e_r_s = sweep['sensor2ego_rotation']
+    l2e_t_s = sweep['sensor2ego_translation']
+    e2g_r_s = sweep['ego2global_rotation']
+    e2g_t_s = sweep['ego2global_translation']
+
+    # obtain the RT from sensor to Top LiDAR
+    # sweep->ego->global->ego'->lidar
+    l2e_r_s_mat = Quaternion(l2e_r_s).rotation_matrix
+    e2g_r_s_mat = Quaternion(e2g_r_s).rotation_matrix
+    R = (l2e_r_s_mat.T @ e2g_r_s_mat.T) @ (
+        np.linalg.inv(e2g_r_mat).T @ np.linalg.inv(l2e_r_mat).T)
+    T = (l2e_t_s @ e2g_r_s_mat.T + e2g_t_s) @ (
+        np.linalg.inv(e2g_r_mat).T @ np.linalg.inv(l2e_r_mat).T)
+    T -= e2g_t @ (np.linalg.inv(e2g_r_mat).T @ np.linalg.inv(l2e_r_mat).T
+                  ) + l2e_t @ np.linalg.inv(l2e_r_mat).T
+    sweep['sensor2lidar_rotation'] = R.T  # points @ R.T + T
+    sweep['sensor2lidar_translation'] = T
+    return sweep
+
+
+def export_2d_annotation(root_path, info_path, version, mono3d=True):
+    """Export 2d annotation from the info file and raw data.
+
+    Args:
+        root_path (str): Root path of the raw data.
+        info_path (str): Path of the info file.
+        version (str): Dataset version.
+        mono3d (bool, optional): Whether to export mono3d annotation.
+            Default: True.
+    """
+    # get bbox annotations for camera
+    camera_types = [
+        'CAM_FRONT',
+        'CAM_FRONT_RIGHT',
+        'CAM_FRONT_LEFT',
+        'CAM_BACK',
+        'CAM_BACK_LEFT',
+        'CAM_BACK_RIGHT',
+    ]
+    nusc_infos = mmcv.load(info_path)['infos']
+    nusc = NuScenes(version=version, dataroot=root_path, verbose=True)
+    # info_2d_list = []
+    cat2Ids = [
+        dict(id=nus_categories.index(cat_name), name=cat_name)
+        for cat_name in nus_categories
+    ]
+    coco_ann_id = 0
+    coco_2d_dict = dict(annotations=[], images=[], categories=cat2Ids)
+    for info in mmcv.track_iter_progress(nusc_infos):
+        for cam in camera_types:
+            cam_info = info['cams'][cam]
+            coco_infos = get_2d_boxes(
+                nusc,
+                cam_info['sample_data_token'],
+                visibilities=['', '1', '2', '3', '4'],
+                mono3d=mono3d)
+            (height, width, _) = mmcv.imread(cam_info['data_path']).shape
+            coco_2d_dict['images'].append(
+                dict(
+                    file_name=cam_info['data_path'].split('data/nuscenes/')
+                    [-1],
+                    id=cam_info['sample_data_token'],
+                    token=info['token'],
+                    cam2ego_rotation=cam_info['sensor2ego_rotation'],
+                    cam2ego_translation=cam_info['sensor2ego_translation'],
+                    ego2global_rotation=info['ego2global_rotation'],
+                    ego2global_translation=info['ego2global_translation'],
+                    cam_intrinsic=cam_info['cam_intrinsic'],
+                    width=width,
+                    height=height))
+            for coco_info in coco_infos:
+                if coco_info is None:
+                    continue
+                # add an empty key for coco format
+                coco_info['segmentation'] = []
+                coco_info['id'] = coco_ann_id
+                coco_2d_dict['annotations'].append(coco_info)
+                coco_ann_id += 1
+    if mono3d:
+        json_prefix = f'{info_path[:-4]}_mono3d'
+    else:
+        json_prefix = f'{info_path[:-4]}'
+    mmcv.dump(coco_2d_dict, f'{json_prefix}.coco.json')
+
+
+def get_2d_boxes(nusc,
+                 sample_data_token: str,
+                 visibilities: List[str],
+                 mono3d=True):
+    """Get the 2D annotation records for a given `sample_data_token`.
+
+    Args:
+        sample_data_token (str): Sample data token belonging to a camera
+            keyframe.
+        visibilities (list[str]): Visibility filter.
+        mono3d (bool): Whether to get boxes with mono3d annotation.
+
+    Return:
+        list[dict]: List of 2D annotation record that belongs to the input
+            `sample_data_token`.
+    """
+
+    # Get the sample data and the sample corresponding to that sample data.
+    sd_rec = nusc.get('sample_data', sample_data_token)
+
+    assert sd_rec[
+        'sensor_modality'] == 'camera', 'Error: get_2d_boxes only works' \
+        ' for camera sample_data!'
+    if not sd_rec['is_key_frame']:
+        raise ValueError(
+            'The 2D re-projections are available only for keyframes.')
+
+    s_rec = nusc.get('sample', sd_rec['sample_token'])
+
+    # Get the calibrated sensor and ego pose
+    # record to get the transformation matrices.
+    cs_rec = nusc.get('calibrated_sensor', sd_rec['calibrated_sensor_token'])
+    pose_rec = nusc.get('ego_pose', sd_rec['ego_pose_token'])
+    camera_intrinsic = np.array(cs_rec['camera_intrinsic'])
+
+    # Get all the annotation with the specified visibilties.
+    ann_recs = [
+        nusc.get('sample_annotation', token) for token in s_rec['anns']
+    ]
+    ann_recs = [
+        ann_rec for ann_rec in ann_recs
+        if (ann_rec['visibility_token'] in visibilities)
+    ]
+
+    repro_recs = []
+
+    for ann_rec in ann_recs:
+        # Augment sample_annotation with token information.
+        ann_rec['sample_annotation_token'] = ann_rec['token']
+        ann_rec['sample_data_token'] = sample_data_token
+
+        # Get the box in global coordinates.
+        box = nusc.get_box(ann_rec['token'])
+
+        # Move them to the ego-pose frame.
+        box.translate(-np.array(pose_rec['translation']))
+        box.rotate(Quaternion(pose_rec['rotation']).inverse)
+
+        # Move them to the calibrated sensor frame.
+        box.translate(-np.array(cs_rec['translation']))
+        box.rotate(Quaternion(cs_rec['rotation']).inverse)
+
+        # Filter out the corners that are not in front of the calibrated
+        # sensor.
+        corners_3d = box.corners()
+        in_front = np.argwhere(corners_3d[2, :] > 0).flatten()
+        corners_3d = corners_3d[:, in_front]
+
+        # Project 3d box to 2d.
+        corner_coords = view_points(corners_3d, camera_intrinsic,
+                                    True).T[:, :2].tolist()
+
+        # Keep only corners that fall within the image.
+        final_coords = post_process_coords(corner_coords)
+
+        # Skip if the convex hull of the re-projected corners
+        # does not intersect the image canvas.
+        if final_coords is None:
+            continue
+        else:
+            min_x, min_y, max_x, max_y = final_coords
+
+        # Generate dictionary record to be included in the .json file.
+        repro_rec = generate_record(ann_rec, min_x, min_y, max_x, max_y,
+                                    sample_data_token, sd_rec['filename'])
+
+        # If mono3d=True, add 3D annotations in camera coordinates
+        if mono3d and (repro_rec is not None):
+            loc = box.center.tolist()
+
+            dim = box.wlh
+            dim[[0, 1, 2]] = dim[[1, 2, 0]]  # convert wlh to our lhw
+            dim = dim.tolist()
+
+            rot = box.orientation.yaw_pitch_roll[0]
+            rot = [-rot]  # convert the rot to our cam coordinate
+
+            global_velo2d = nusc.box_velocity(box.token)[:2]
+            global_velo3d = np.array([*global_velo2d, 0.0])
+            e2g_r_mat = Quaternion(pose_rec['rotation']).rotation_matrix
+            c2e_r_mat = Quaternion(cs_rec['rotation']).rotation_matrix
+            cam_velo3d = global_velo3d @ np.linalg.inv(
+                e2g_r_mat).T @ np.linalg.inv(c2e_r_mat).T
+            velo = cam_velo3d[0::2].tolist()
+
+            repro_rec['bbox_cam3d'] = loc + dim + rot
+            repro_rec['velo_cam3d'] = velo
+
+            center3d = np.array(loc).reshape([1, 3])
+            center2d = points_cam2img(
+                center3d, camera_intrinsic, with_depth=True)
+            repro_rec['center2d'] = center2d.squeeze().tolist()
+            # normalized center2D + depth
+            # if samples with depth < 0 will be removed
+            if repro_rec['center2d'][2] <= 0:
+                continue
+
+            ann_token = nusc.get('sample_annotation',
+                                 box.token)['attribute_tokens']
+            if len(ann_token) == 0:
+                attr_name = 'None'
+            else:
+                attr_name = nusc.get('attribute', ann_token[0])['name']
+            attr_id = nus_attributes.index(attr_name)
+            repro_rec['attribute_name'] = attr_name
+            repro_rec['attribute_id'] = attr_id
+
+        repro_recs.append(repro_rec)
+
+    return repro_recs
+
+
+def post_process_coords(
+    corner_coords: List, imsize: Tuple[int, int] = (1600, 900)
+) -> Union[Tuple[float, float, float, float], None]:
+    """Get the intersection of the convex hull of the reprojected bbox corners
+    and the image canvas, return None if no intersection.
+
+    Args:
+        corner_coords (list[int]): Corner coordinates of reprojected
+            bounding box.
+        imsize (tuple[int]): Size of the image canvas.
+
+    Return:
+        tuple [float]: Intersection of the convex hull of the 2D box
+            corners and the image canvas.
+    """
+    polygon_from_2d_box = MultiPoint(corner_coords).convex_hull
+    img_canvas = box(0, 0, imsize[0], imsize[1])
+
+    if polygon_from_2d_box.intersects(img_canvas):
+        img_intersection = polygon_from_2d_box.intersection(img_canvas)
+        intersection_coords = np.array(
+            [coord for coord in img_intersection.exterior.coords])
+
+        min_x = min(intersection_coords[:, 0])
+        min_y = min(intersection_coords[:, 1])
+        max_x = max(intersection_coords[:, 0])
+        max_y = max(intersection_coords[:, 1])
+
+        return min_x, min_y, max_x, max_y
+    else:
+        return None
+
+
+def generate_record(ann_rec: dict, x1: float, y1: float, x2: float, y2: float,
+                    sample_data_token: str, filename: str) -> OrderedDict:
+    """Generate one 2D annotation record given various information on top of
+    the 2D bounding box coordinates.
+
+    Args:
+        ann_rec (dict): Original 3d annotation record.
+        x1 (float): Minimum value of the x coordinate.
+        y1 (float): Minimum value of the y coordinate.
+        x2 (float): Maximum value of the x coordinate.
+        y2 (float): Maximum value of the y coordinate.
+        sample_data_token (str): Sample data token.
+        filename (str):The corresponding image file where the annotation
+            is present.
+
+    Returns:
+        dict: A sample 2D annotation record.
+            - file_name (str): file name
+            - image_id (str): sample data token
+            - area (float): 2d box area
+            - category_name (str): category name
+            - category_id (int): category id
+            - bbox (list[float]): left x, top y, dx, dy of 2d box
+            - iscrowd (int): whether the area is crowd
+    """
+    repro_rec = OrderedDict()
+    repro_rec['sample_data_token'] = sample_data_token
+    coco_rec = dict()
+
+    relevant_keys = [
+        'attribute_tokens',
+        'category_name',
+        'instance_token',
+        'next',
+        'num_lidar_pts',
+        'num_radar_pts',
+        'prev',
+        'sample_annotation_token',
+        'sample_data_token',
+        'visibility_token',
+    ]
+
+    for key, value in ann_rec.items():
+        if key in relevant_keys:
+            repro_rec[key] = value
+
+    repro_rec['bbox_corners'] = [x1, y1, x2, y2]
+    repro_rec['filename'] = filename
+
+    coco_rec['file_name'] = filename
+    coco_rec['image_id'] = sample_data_token
+    coco_rec['area'] = (y2 - y1) * (x2 - x1)
+
+    if repro_rec['category_name'] not in NuScenesDataset.NameMapping:
+        return None
+    cat_name = NuScenesDataset.NameMapping[repro_rec['category_name']]
+    coco_rec['category_name'] = cat_name
+    coco_rec['category_id'] = nus_categories.index(cat_name)
+    coco_rec['bbox'] = [x1, y1, x2 - x1, y2 - y1]
+    coco_rec['iscrowd'] = 0
+
+    return coco_rec