init_0905

a9dc86e9 · lishj6 · 18eda5c1 · a9dc86e9 · a9dc86e9 · a9dc86e9
Commit a9dc86e9 authored Sep 05, 2025 by lishj6 🏸
20 changed files
--- a/.gitignore
+++ b/.gitignore
+*.pyc
+*.npy
+*.pth
\ No newline at end of file
--- a/LICENSE
+++ b/LICENSE
+MIT License
+Copyright (c) 2024 Horizon Robotics
+Permission is hereby granted, free of charge, to any person obtaining a copy
+of this software and associated documentation files (the "Software"), to deal
+in the Software without restriction, including without limitation the rights
+to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
+copies of the Software, and to permit persons to whom the Software is
+furnished to do so, subject to the following conditions:
+The above copyright notice and this permission notice shall be included in all
+copies or substantial portions of the Software.
+THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
+AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
+OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+SOFTWARE.
--- a/docs/quick_start.md
+++ b/docs/quick_start.md
+# Quick Start
+### Set up a new virtual environment
+```bash
+virtualenv mm_sparse4d --python=python3.8
+source mm_sparse4d/bin/activate
+```
+### Install packpages using pip3
+```bash
+sparse4d_path="path/to/sparse4d"
+cd ${sparse4d_path}
+pip3 install --upgrade pip
+pip3 install -r requirement.txt
+```
+### Compile the deformable_aggregation CUDA op
+```bash
+cd projects/mmdet3d_plugin/ops
+python3 setup.py develop
+cd ../../../
+```
+### Prepare the data
+Download the [NuScenes dataset](https://www.nuscenes.org/nuscenes#download) and create symbolic links.
+```bash
+cd ${sparse4d_path}
+mkdir data
+ln -s path/to/nuscenes ./data/nuscenes
+```
+Pack the meta-information and labels of the dataset, and generate the required .pkl files.
+```bash
+pkl_path="data/nuscenes_anno_pkls"
+mkdir -p ${pkl_path}
+python3 tools/nuscenes_converter.py --version v1.0-mini --info_prefix ${pkl_path}/nuscenes-mini
+python3 tools/nuscenes_converter.py --version v1.0-trainval,v1.0-test --info_prefix ${pkl_path}/nuscenes
+```
+### Generate anchors by K-means
+```bash
+python3 tools/anchor_generator.py --ann_file ${pkl_path}/nuscenes_infos_train.pkl
+```
+### Download pre-trained weights
+Download the required backbone [pre-trained weights](https://download.pytorch.org/models/resnet50-19c8e357.pth).
+```bash
+mkdir ckpt
+wget https://download.pytorch.org/models/resnet50-19c8e357.pth -O ckpt/resnet50-19c8e357.pth
+```
+### Commence training and testing
+```bash
+# train
+bash local_train.sh sparse4dv3_temporal_r50_1x8_bs6_256x704
+# test
+bash local_test.sh sparse4dv3_temporal_r50_1x8_bs6_256x704  path/to/checkpoint
+```
+For inference-related guidelines, please refer to the [tutorial/tutorial.ipynb](../tutorial/tutorial.ipynb).
--- a/local_test.sh
+++ b/local_test.sh
+export PYTHONPATH=$PYTHONPATH:./
+export CUDA_VISIBLE_DEVICES=3
+export PORT=29532
+gpus=(${CUDA_VISIBLE_DEVICES//,/ })
+gpu_num=${#gpus[@]}
+config=projects/configs/$1.py
+checkpoint=$2
+echo "number of gpus: "${gpu_num}
+echo "config file: "${config}
+echo "checkpoint: "${checkpoint}
+if [ ${gpu_num} -gt 1 ]
+then
+    bash ./tools/dist_test.sh \
+        ${config} \
+        ${checkpoint} \
+        ${gpu_num} \
+        --eval bbox \
+        $@
+else
+    python ./tools/test.py \
+        ${config} \
+        ${checkpoint} \
+        --eval bbox \
+        $@
+fi
--- a/local_train copy.sh
+++ b/local_train copy.sh
+export HIP_VISIBLE_DEVICES=0,1,2,3,4,5,6,7
+export PYTHONPATH=$PYTHONPATH:./
+gpus=(${HIP_VISIBLE_DEVICES//,/ })
+gpu_num=${#gpus[@]}
+echo "number of gpus: "${gpu_num}
+config=projects/configs/$1.py
+if [ ${gpu_num} -gt 1 ]
+then
+    bash ./tools/dist_train.sh \
+        ${config} \
+        ${gpu_num} \
+        --work-dir=work_dirs/$1
+else
+    python ./tools/train.py \
+        ${config}
+fi
--- a/local_train.sh
+++ b/local_train.sh
+export HIP_VISIBLE_DEVICES=0,1,2,3,4,5,6,7
+# export HIP_VISIBLE_DEVICES=1
+export PYTHONPATH=$PYTHONPATH:./
+gpus=(${HIP_VISIBLE_DEVICES//,/ })
+gpu_num=${#gpus[@]}
+echo "number of gpus: "${gpu_num}
+config=projects/configs/$1.py
+if [ ${gpu_num} -gt 1 ]
+then
+    bash ./tools/dist_train.sh \
+        ${config} \
+        ${gpu_num} \
+        --work-dir=work_dirs/$1
+else
+    python ./tools/train.py \
+        ${config}
+fi
--- a/projects/configs/sparse4dv3_temporal_r50_1x8_bs6_256x704.py
+++ b/projects/configs/sparse4dv3_temporal_r50_1x8_bs6_256x704.py
+"""
+mAP: 0.4647
+mATE: 0.5403
+mASE: 0.2623
+mAOE: 0.4590
+mAVE: 0.2198
+mAAE: 0.2059
+NDS: 0.5636
+Eval time: 176.9s
+Per-class results:
+Object Class    AP  ATE ASE AOE AVE AAE
+car 0.668   0.357   0.142   0.054   0.184   0.195
+truck   0.394   0.528   0.187   0.052   0.163   0.210
+bus 0.451   0.681   0.196   0.070   0.383   0.243
+trailer 0.185   0.971   0.247   0.634   0.175   0.202
+construction_vehicle    0.122   0.879   0.496   1.200   0.136   0.406
+pedestrian  0.559   0.517   0.287   0.513   0.282   0.151
+motorcycle  0.497   0.462   0.238   0.536   0.293   0.236
+bicycle 0.426   0.441   0.257   0.951   0.142   0.004
+traffic_cone    0.697   0.275   0.299   nan nan nan
+barrier 0.648   0.292   0.275   0.122   nan nan
+"""
+"""
+Per-class results:
+            AMOTA   AMOTP   RECALL  MOTAR   GT      MOTA    MOTP    MT  ML  FAF     TP      FP  FN  IDS FRAG TID    LGD
+bicycle     0.444   1.169   0.533   0.733   1993    0.389   0.566   53  57  19.3    1059    283 931 3   8   1.60    1.75
+bus         0.559   1.175   0.626   0.824   2112    0.515   0.751   42  35  14.8    1321    233 790 1   20  1.13    1.95
+car         0.678   0.755   0.733   0.819   58317   0.599   0.470   2053    1073    134.2   42626   7706    15565   126 295 0.76    1.03
+motorcy     0.522   1.060   0.609   0.823   1977    0.497   0.564   50  38  15.7    1194    211 773 10  17  1.97    2.17
+pedestr     0.548   1.059   0.652   0.791   25423   0.506   0.678   677 467 77.6    16274   3404    8854    295 225 1.33    1.85
+trailer     0.136   1.603   0.383   0.403   2425    0.154   0.981   30  79  52.6    926 553 1496    3   13  1.49    2.64
+truck       0.454   1.132   0.577   0.691   9650    0.399   0.594   210 214 45.7    5569    1723    4078    3   50  1.35    1.85
+Aggregated results:
+AMOTA   0.477
+AMOTP   1.136
+RECALL  0.588
+MOTAR   0.726
+GT  14556
+MOTA    0.437
+MOTP    0.658
+MT  3115
+ML  1963
+FAF 51.4
+TP  68969
+FP  14113
+FN  32487
+IDS 441
+FRAG    628
+TID 1.37
+LGD 1.89
+"""
+# ================ base config ===================
+plugin = True
+plugin_dir = "projects/mmdet3d_plugin/"
+dist_params = dict(backend="nccl")
+log_level = "INFO"
+work_dir = None
+total_batch_size = 48
+num_gpus = 8
+batch_size = total_batch_size // num_gpus
+num_iters_per_epoch = int(28130 // (num_gpus * batch_size))
+num_epochs = 100
+checkpoint_epoch_interval = 20
+checkpoint_config = dict(
+    interval=num_iters_per_epoch * checkpoint_epoch_interval
+)
+log_config = dict(
+    interval=1,
+    hooks=[
+        dict(type="TextLoggerHook", by_epoch=False),
+        dict(type="TensorboardLoggerHook"),
+    ],
+)
+load_from = None
+resume_from = None
+workflow = [("train", 1)]
+fp16 = dict(loss_scale=32.0)
+input_shape = (704, 256)
+tracking_test = True
+tracking_threshold = 0.2
+# ================== model ========================
+class_names = [
+    "car",
+    "truck",
+    "construction_vehicle",
+    "bus",
+    "trailer",
+    "barrier",
+    "motorcycle",
+    "bicycle",
+    "pedestrian",
+    "traffic_cone",
+]
+num_classes = len(class_names)
+embed_dims = 256
+num_groups = 8
+num_decoder = 6
+num_single_frame_decoder = 1
+use_deformable_func = True  # mmdet3d_plugin/ops/setup.py needs to be executed
+strides = [4, 8, 16, 32]
+num_levels = len(strides)
+num_depth_layers = 3
+drop_out = 0.1
+temporal = True
+decouple_attn = True
+with_quality_estimation = True
+model = dict(
+    type="Sparse4D",
+    use_grid_mask=True,
+    use_deformable_func=use_deformable_func,
+    img_backbone=dict(
+        type="ResNet",
+        depth=50,
+        num_stages=4,
+        frozen_stages=-1,
+        norm_eval=False,
+        style="pytorch",
+        with_cp=True,
+        out_indices=(0, 1, 2, 3),
+        norm_cfg=dict(type="BN", requires_grad=True),
+        pretrained="ckpt/resnet50-19c8e357.pth",
+    ),
+    img_neck=dict(
+        type="FPN",
+        num_outs=num_levels,
+        start_level=0,
+        out_channels=embed_dims,
+        add_extra_convs="on_output",
+        relu_before_extra_convs=True,
+        in_channels=[256, 512, 1024, 2048],
+    ),
+    depth_branch=dict(  # for auxiliary supervision only
+        type="DenseDepthNet",
+        embed_dims=embed_dims,
+        num_depth_layers=num_depth_layers,
+        loss_weight=0.2,
+    ),
+    head=dict(
+        type="Sparse4DHead",
+        cls_threshold_to_reg=0.05,
+        decouple_attn=decouple_attn,
+        instance_bank=dict(
+            type="InstanceBank",
+            num_anchor=900,
+            embed_dims=embed_dims,
+            anchor="nuscenes_kmeans900.npy",
+            anchor_handler=dict(type="SparseBox3DKeyPointsGenerator"),
+            num_temp_instances=600 if temporal else -1,
+            confidence_decay=0.6,
+            feat_grad=False,
+        ),
+        anchor_encoder=dict(
+            type="SparseBox3DEncoder",
+            vel_dims=3,
+            embed_dims=[128, 32, 32, 64] if decouple_attn else 256,
+            mode="cat" if decouple_attn else "add",
+            output_fc=not decouple_attn,
+            in_loops=1,
+            out_loops=4 if decouple_attn else 2,
+        ),
+        num_single_frame_decoder=num_single_frame_decoder,
+        operation_order=(
+            [
+                "gnn",
+                "norm",
+                "deformable",
+                "ffn",
+                "norm",
+                "refine",
+            ]
+            * num_single_frame_decoder
+            + [
+                "temp_gnn",
+                "gnn",
+                "norm",
+                "deformable",
+                "ffn",
+                "norm",
+                "refine",
+            ]
+            * (num_decoder - num_single_frame_decoder)
+        )[2:],
+        temp_graph_model=dict(
+            type="MultiheadAttention",
+            embed_dims=embed_dims if not decouple_attn else embed_dims * 2,
+            num_heads=num_groups,
+            batch_first=True,
+            dropout=drop_out,
+        )
+        if temporal
+        else None,
+        graph_model=dict(
+            type="MultiheadAttention",
+            embed_dims=embed_dims if not decouple_attn else embed_dims * 2,
+            num_heads=num_groups,
+            batch_first=True,
+            dropout=drop_out,
+        ),
+        norm_layer=dict(type="LN", normalized_shape=embed_dims),
+        ffn=dict(
+            type="AsymmetricFFN",
+            in_channels=embed_dims * 2,
+            pre_norm=dict(type="LN"),
+            embed_dims=embed_dims,
+            feedforward_channels=embed_dims * 4,
+            num_fcs=2,
+            ffn_drop=drop_out,
+            act_cfg=dict(type="ReLU", inplace=True),
+        ),
+        deformable_model=dict(
+            type="DeformableFeatureAggregation",
+            embed_dims=embed_dims,
+            num_groups=num_groups,
+            num_levels=num_levels,
+            num_cams=6,
+            attn_drop=0.15,
+            use_deformable_func=use_deformable_func,
+            use_camera_embed=True,
+            residual_mode="cat",
+            kps_generator=dict(
+                type="SparseBox3DKeyPointsGenerator",
+                num_learnable_pts=6,
+                fix_scale=[
+                    [0, 0, 0],
+                    [0.45, 0, 0],
+                    [-0.45, 0, 0],
+                    [0, 0.45, 0],
+                    [0, -0.45, 0],
+                    [0, 0, 0.45],
+                    [0, 0, -0.45],
+                ],
+            ),
+        ),
+        refine_layer=dict(
+            type="SparseBox3DRefinementModule",
+            embed_dims=embed_dims,
+            num_cls=num_classes,
+            refine_yaw=True,
+            with_quality_estimation=with_quality_estimation,
+        ),
+        sampler=dict(
+            type="SparseBox3DTarget",
+            num_dn_groups=5,
+            num_temp_dn_groups=3,
+            dn_noise_scale=[2.0] * 3 + [0.5] * 7,
+            max_dn_gt=32,
+            add_neg_dn=True,
+            cls_weight=2.0,
+            box_weight=0.25,
+            reg_weights=[2.0] * 3 + [0.5] * 3 + [0.0] * 4,
+            cls_wise_reg_weights={
+                class_names.index("traffic_cone"): [
+                    2.0,
+                    2.0,
+                    2.0,
+                    1.0,
+                    1.0,
+                    1.0,
+                    0.0,
+                    0.0,
+                    1.0,
+                    1.0,
+                ],
+            },
+        ),
+        loss_cls=dict(
+            type="FocalLoss",
+            use_sigmoid=True,
+            gamma=2.0,
+            alpha=0.25,
+            loss_weight=2.0,
+        ),
+        loss_reg=dict(
+            type="SparseBox3DLoss",
+            loss_box=dict(type="L1Loss", loss_weight=0.25),
+            loss_centerness=dict(type="CrossEntropyLoss", use_sigmoid=True),
+            loss_yawness=dict(type="GaussianFocalLoss"),
+            cls_allow_reverse=[class_names.index("barrier")],
+        ),
+        decoder=dict(type="SparseBox3DDecoder"),
+        reg_weights=[2.0] * 3 + [1.0] * 7,
+    ),
+)
+# ================== data ========================
+dataset_type = "NuScenes3DDetTrackDataset"
+data_root = "data/nuscenes/"
+anno_root = "data/nuscenes_cam/"
+anno_root = "data/nuscenes_anno_pkls/"
+file_client_args = dict(backend="disk")
+img_norm_cfg = dict(
+    mean=[123.675, 116.28, 103.53], std=[58.395, 57.12, 57.375], to_rgb=True
+)
+train_pipeline = [
+    dict(type="LoadMultiViewImageFromFiles", to_float32=True),
+    dict(
+        type="LoadPointsFromFile",
+        coord_type="LIDAR",
+        load_dim=5,
+        use_dim=5,
+        file_client_args=file_client_args,
+    ),
+    dict(type="ResizeCropFlipImage"),
+    dict(
+        type="MultiScaleDepthMapGenerator",
+        downsample=strides[:num_depth_layers],
+    ),
+    dict(type="BBoxRotation"),
+    dict(type="PhotoMetricDistortionMultiViewImage"),
+    dict(type="NormalizeMultiviewImage", **img_norm_cfg),
+    dict(
+        type="CircleObjectRangeFilter",
+        class_dist_thred=[55] * len(class_names),
+    ),
+    dict(type="InstanceNameFilter", classes=class_names),
+    dict(type="NuScenesSparse4DAdaptor"),
+    dict(
+        type="Collect",
+        keys=[
+            "img",
+            "timestamp",
+            "projection_mat",
+            "image_wh",
+            "gt_depth",
+            "focal",
+            "gt_bboxes_3d",
+            "gt_labels_3d",
+        ],
+        meta_keys=["T_global", "T_global_inv", "timestamp", "instance_id"],
+    ),
+]
+test_pipeline = [
+    dict(type="LoadMultiViewImageFromFiles", to_float32=True),
+    dict(type="ResizeCropFlipImage"),
+    dict(type="NormalizeMultiviewImage", **img_norm_cfg),
+    dict(type="NuScenesSparse4DAdaptor"),
+    dict(
+        type="Collect",
+        keys=[
+            "img",
+            "timestamp",
+            "projection_mat",
+            "image_wh",
+        ],
+        meta_keys=["T_global", "T_global_inv", "timestamp"],
+    ),
+]
+input_modality = dict(
+    use_lidar=False,
+    use_camera=True,
+    use_radar=False,
+    use_map=False,
+    use_external=False,
+)
+data_basic_config = dict(
+    type=dataset_type,
+    data_root=data_root,
+    classes=class_names,
+    modality=input_modality,
+    version="v1.0-trainval",
+)
+data_aug_conf = {
+    "resize_lim": (0.40, 0.47),
+    "final_dim": input_shape[::-1],
+    "bot_pct_lim": (0.0, 0.0),
+    "rot_lim": (-5.4, 5.4),
+    "H": 900,
+    "W": 1600,
+    "rand_flip": True,
+    "rot3d_range": [-0.3925, 0.3925],
+}
+data = dict(
+    samples_per_gpu=batch_size,
+    workers_per_gpu=batch_size,
+    train=dict(
+        **data_basic_config,
+        ann_file=anno_root + "nuscenes_infos_train.pkl",
+        pipeline=train_pipeline,
+        test_mode=False,
+        data_aug_conf=data_aug_conf,
+        with_seq_flag=True,
+        sequences_split_num=2,
+        keep_consistent_seq_aug=True,
+    ),
+    val=dict(
+        **data_basic_config,
+        ann_file=anno_root + "nuscenes_infos_val.pkl",
+        pipeline=test_pipeline,
+        data_aug_conf=data_aug_conf,
+        test_mode=True,
+        tracking=tracking_test,
+        tracking_threshold=tracking_threshold,
+    ),
+    test=dict(
+        **data_basic_config,
+        ann_file=anno_root + "nuscenes_infos_val.pkl",
+        pipeline=test_pipeline,
+        data_aug_conf=data_aug_conf,
+        test_mode=True,
+        tracking=tracking_test,
+        tracking_threshold=tracking_threshold,
+    ),
+)
+# ================== training ========================
+optimizer = dict(
+    type="AdamW",
+    lr=6e-4,
+    weight_decay=0.001,
+    paramwise_cfg=dict(
+        custom_keys={
+            "img_backbone": dict(lr_mult=0.5),
+        }
+    ),
+)
+optimizer_config = dict(grad_clip=dict(max_norm=25, norm_type=2))
+lr_config = dict(
+    policy="CosineAnnealing",
+    warmup="linear",
+    warmup_iters=500,
+    warmup_ratio=1.0 / 3,
+    min_lr_ratio=1e-3,
+)
+runner = dict(
+    type="IterBasedRunner",
+    max_iters=num_iters_per_epoch * num_epochs,
+)
+# ================== eval ========================
+vis_pipeline = [
+    dict(type="LoadMultiViewImageFromFiles", to_float32=True),
+    dict(
+        type="Collect",
+        keys=["img"],
+        meta_keys=["timestamp", "lidar2img"],
+    ),
+]
+evaluation = dict(
+    interval=num_iters_per_epoch * checkpoint_epoch_interval,
+    pipeline=vis_pipeline,
+    # out_dir="./vis",  # for visualization
+)
--- a/projects/mmdet3d_plugin/__init__.py
+++ b/projects/mmdet3d_plugin/__init__.py
+from .datasets import *
+from .models import *
+from .apis import *
+from .core.evaluation import *
--- a/projects/mmdet3d_plugin/apis/__init__.py
+++ b/projects/mmdet3d_plugin/apis/__init__.py
+from .train import custom_train_model
+from .mmdet_train import custom_train_detector
+# from .test import custom_multi_gpu_test
--- a/projects/mmdet3d_plugin/apis/mmdet_train.py
+++ b/projects/mmdet3d_plugin/apis/mmdet_train.py
+# ---------------------------------------------
+# Copyright (c) OpenMMLab. All rights reserved.
+# ---------------------------------------------
+#  Modified by Zhiqi Li
+# ---------------------------------------------
+import random
+import warnings
+import numpy as np
+import torch
+import torch.distributed as dist
+from mmcv.parallel import MMDataParallel, MMDistributedDataParallel
+from mmcv.runner import (
+    HOOKS,
+    DistSamplerSeedHook,
+    EpochBasedRunner,
+    Fp16OptimizerHook,
+    OptimizerHook,
+    build_optimizer,
+    build_runner,
+    get_dist_info,
+)
+from mmcv.utils import build_from_cfg
+from mmdet.core import EvalHook
+from mmdet.datasets import build_dataset, replace_ImageToTensor
+from mmdet.utils import get_root_logger
+import time
+import os.path as osp
+from projects.mmdet3d_plugin.datasets.builder import build_dataloader
+from projects.mmdet3d_plugin.core.evaluation.eval_hooks import (
+    CustomDistEvalHook,
+)
+from projects.mmdet3d_plugin.datasets import custom_build_dataset
+def custom_train_detector(
+    model,
+    dataset,
+    cfg,
+    distributed=False,
+    validate=False,
+    timestamp=None,
+    meta=None,
+):
+    logger = get_root_logger(cfg.log_level)
+    # prepare data loaders
+    dataset = dataset if isinstance(dataset, (list, tuple)) else [dataset]
+    # assert len(dataset)==1s
+    if "imgs_per_gpu" in cfg.data:
+        logger.warning(
+            '"imgs_per_gpu" is deprecated in MMDet V2.0. '
+            'Please use "samples_per_gpu" instead'
+        )
+        if "samples_per_gpu" in cfg.data:
+            logger.warning(
+                f'Got "imgs_per_gpu"={cfg.data.imgs_per_gpu} and '
+                f'"samples_per_gpu"={cfg.data.samples_per_gpu}, "imgs_per_gpu"'
+                f"={cfg.data.imgs_per_gpu} is used in this experiments"
+            )
+        else:
+            logger.warning(
+                'Automatically set "samples_per_gpu"="imgs_per_gpu"='
+                f"{cfg.data.imgs_per_gpu} in this experiments"
+            )
+        cfg.data.samples_per_gpu = cfg.data.imgs_per_gpu
+    if "runner" in cfg:
+        runner_type = cfg.runner["type"]
+    else:
+        runner_type = "EpochBasedRunner"
+    data_loaders = [
+        build_dataloader(
+            ds,
+            cfg.data.samples_per_gpu,
+            cfg.data.workers_per_gpu,
+            # cfg.gpus will be ignored if distributed
+            len(cfg.gpu_ids),
+            dist=distributed,
+            seed=cfg.seed,
+            nonshuffler_sampler=dict(
+                type="DistributedSampler"
+            ),  # dict(type='DistributedSampler'),
+            runner_type=runner_type,
+        )
+        for ds in dataset
+    ]
+    # put model on gpus
+    if distributed:
+        find_unused_parameters = cfg.get("find_unused_parameters", False)
+        # Sets the `find_unused_parameters` parameter in
+        # torch.nn.parallel.DistributedDataParallel
+        model = MMDistributedDataParallel(
+            model.cuda(),
+            device_ids=[torch.cuda.current_device()],
+            broadcast_buffers=False,
+            find_unused_parameters=find_unused_parameters,
+        )
+    else:
+        model = MMDataParallel(
+            model.cuda(cfg.gpu_ids[0]), device_ids=cfg.gpu_ids
+        )
+    # build runner
+    optimizer = build_optimizer(model, cfg.optimizer)
+    if "runner" not in cfg:
+        cfg.runner = {
+            "type": "EpochBasedRunner",
+            "max_epochs": cfg.total_epochs,
+        }
+        warnings.warn(
+            "config is now expected to have a `runner` section, "
+            "please set `runner` in your config.",
+            UserWarning,
+        )
+    else:
+        if "total_epochs" in cfg:
+            assert cfg.total_epochs == cfg.runner.max_epochs
+    runner = build_runner(
+        cfg.runner,
+        default_args=dict(
+            model=model,
+            optimizer=optimizer,
+            work_dir=cfg.work_dir,
+            logger=logger,
+            meta=meta,
+        ),
+    )
+    # an ugly workaround to make .log and .log.json filenames the same
+    runner.timestamp = timestamp
+    # fp16 setting
+    fp16_cfg = cfg.get("fp16", None)
+    if fp16_cfg is not None:
+        optimizer_config = Fp16OptimizerHook(
+            **cfg.optimizer_config, **fp16_cfg, distributed=distributed
+        )
+    elif distributed and "type" not in cfg.optimizer_config:
+        optimizer_config = OptimizerHook(**cfg.optimizer_config)
+    else:
+        optimizer_config = cfg.optimizer_config
+    # register hooks
+    runner.register_training_hooks(
+        cfg.lr_config,
+        optimizer_config,
+        cfg.checkpoint_config,
+        cfg.log_config,
+        cfg.get("momentum_config", None),
+    )
+    # register profiler hook
+    # trace_config = dict(type='tb_trace', dir_name='work_dir')
+    # profiler_config = dict(on_trace_ready=trace_config)
+    # runner.register_profiler_hook(profiler_config)
+    if distributed:
+        if isinstance(runner, EpochBasedRunner):
+            runner.register_hook(DistSamplerSeedHook())
+    # register eval hooks
+    if validate:
+        # Support batch_size > 1 in validation
+        val_samples_per_gpu = cfg.data.val.pop("samples_per_gpu", 1)
+        if val_samples_per_gpu > 1:
+            assert False
+            # Replace 'ImageToTensor' to 'DefaultFormatBundle'
+            cfg.data.val.pipeline = replace_ImageToTensor(
+                cfg.data.val.pipeline
+            )
+        val_dataset = custom_build_dataset(cfg.data.val, dict(test_mode=True))
+        val_dataloader = build_dataloader(
+            val_dataset,
+            samples_per_gpu=val_samples_per_gpu,
+            workers_per_gpu=cfg.data.workers_per_gpu,
+            dist=distributed,
+            shuffle=False,
+            nonshuffler_sampler=dict(type="DistributedSampler"),
+        )
+        eval_cfg = cfg.get("evaluation", {})
+        eval_cfg["by_epoch"] = cfg.runner["type"] != "IterBasedRunner"
+        eval_cfg["jsonfile_prefix"] = osp.join(
+            "val",
+            cfg.work_dir,
+            time.ctime().replace(" ", "_").replace(":", "_"),
+        )
+        eval_hook = CustomDistEvalHook if distributed else EvalHook
+        runner.register_hook(eval_hook(val_dataloader, **eval_cfg))
+    # user-defined hooks
+    if cfg.get("custom_hooks", None):
+        custom_hooks = cfg.custom_hooks
+        assert isinstance(
+            custom_hooks, list
+        ), f"custom_hooks expect list type, but got {type(custom_hooks)}"
+        for hook_cfg in cfg.custom_hooks:
+            assert isinstance(hook_cfg, dict), (
+                "Each item in custom_hooks expects dict type, but got "
+                f"{type(hook_cfg)}"
+            )
+            hook_cfg = hook_cfg.copy()
+            priority = hook_cfg.pop("priority", "NORMAL")
+            hook = build_from_cfg(hook_cfg, HOOKS)
+            runner.register_hook(hook, priority=priority)
+    if cfg.resume_from:
+        runner.resume(cfg.resume_from)
+    elif cfg.load_from:
+        runner.load_checkpoint(cfg.load_from)
+    runner.run(data_loaders, cfg.workflow)
--- a/projects/mmdet3d_plugin/apis/test.py
+++ b/projects/mmdet3d_plugin/apis/test.py
+# ---------------------------------------------
+# Copyright (c) OpenMMLab. All rights reserved.
+# ---------------------------------------------
+#  Modified by Zhiqi Li
+# ---------------------------------------------
+import os.path as osp
+import pickle
+import shutil
+import tempfile
+import time
+import mmcv
+import torch
+import torch.distributed as dist
+from mmcv.image import tensor2imgs
+from mmcv.runner import get_dist_info
+from mmdet.core import encode_mask_results
+import mmcv
+import numpy as np
+import pycocotools.mask as mask_util
+def custom_encode_mask_results(mask_results):
+    """Encode bitmap mask to RLE code. Semantic Masks only
+    Args:
+        mask_results (list | tuple[list]): bitmap mask results.
+            In mask scoring rcnn, mask_results is a tuple of (segm_results,
+            segm_cls_score).
+    Returns:
+        list | tuple: RLE encoded mask.
+    """
+    cls_segms = mask_results
+    num_classes = len(cls_segms)
+    encoded_mask_results = []
+    for i in range(len(cls_segms)):
+        encoded_mask_results.append(
+            mask_util.encode(
+                np.array(
+                    cls_segms[i][:, :, np.newaxis], order="F", dtype="uint8"
+                )
+            )[0]
+        )  # encoded with RLE
+    return [encoded_mask_results]
+def custom_multi_gpu_test(model, data_loader, tmpdir=None, gpu_collect=False):
+    """Test model with multiple gpus.
+    This method tests model with multiple gpus and collects the results
+    under two different modes: gpu and cpu modes. By setting 'gpu_collect=True'
+    it encodes results to gpu tensors and use gpu communication for results
+    collection. On cpu mode it saves the results on different gpus to 'tmpdir'
+    and collects them by the rank 0 worker.
+    Args:
+        model (nn.Module): Model to be tested.
+        data_loader (nn.Dataloader): Pytorch data loader.
+        tmpdir (str): Path of directory to save the temporary results from
+            different gpus under cpu mode.
+        gpu_collect (bool): Option to use either gpu or cpu to collect results.
+    Returns:
+        list: The prediction results.
+    """
+    model.eval()
+    bbox_results = []
+    mask_results = []
+    dataset = data_loader.dataset
+    rank, world_size = get_dist_info()
+    if rank == 0:
+        prog_bar = mmcv.ProgressBar(len(dataset))
+    time.sleep(2)  # This line can prevent deadlock problem in some cases.
+    have_mask = False
+    for i, data in enumerate(data_loader):
+        with torch.no_grad():
+            result = model(return_loss=False, rescale=True, **data)
+            # encode mask results
+            if isinstance(result, dict):
+                if "bbox_results" in result.keys():
+                    bbox_result = result["bbox_results"]
+                    batch_size = len(result["bbox_results"])
+                    bbox_results.extend(bbox_result)
+                if (
+                    "mask_results" in result.keys()
+                    and result["mask_results"] is not None
+                ):
+                    mask_result = custom_encode_mask_results(
+                        result["mask_results"]
+                    )
+                    mask_results.extend(mask_result)
+                    have_mask = True
+            else:
+                batch_size = len(result)
+                bbox_results.extend(result)
+        if rank == 0:
+            for _ in range(batch_size * world_size):
+                prog_bar.update()
+    # collect results from all ranks
+    if gpu_collect:
+        bbox_results = collect_results_gpu(bbox_results, len(dataset))
+        if have_mask:
+            mask_results = collect_results_gpu(mask_results, len(dataset))
+        else:
+            mask_results = None
+    else:
+        bbox_results = collect_results_cpu(bbox_results, len(dataset), tmpdir)
+        tmpdir = tmpdir + "_mask" if tmpdir is not None else None
+        if have_mask:
+            mask_results = collect_results_cpu(
+                mask_results, len(dataset), tmpdir
+            )
+        else:
+            mask_results = None
+    if mask_results is None:
+        return bbox_results
+    return {"bbox_results": bbox_results, "mask_results": mask_results}
+def collect_results_cpu(result_part, size, tmpdir=None):
+    rank, world_size = get_dist_info()
+    # create a tmp dir if it is not specified
+    if tmpdir is None:
+        MAX_LEN = 512
+        # 32 is whitespace
+        dir_tensor = torch.full(
+            (MAX_LEN,), 32, dtype=torch.uint8, device="cuda"
+        )
+        if rank == 0:
+            mmcv.mkdir_or_exist(".dist_test")
+            tmpdir = tempfile.mkdtemp(dir=".dist_test")
+            tmpdir = torch.tensor(
+                bytearray(tmpdir.encode()), dtype=torch.uint8, device="cuda"
+            )
+            dir_tensor[: len(tmpdir)] = tmpdir
+        dist.broadcast(dir_tensor, 0)
+        tmpdir = dir_tensor.cpu().numpy().tobytes().decode().rstrip()
+    else:
+        mmcv.mkdir_or_exist(tmpdir)
+    # dump the part result to the dir
+    mmcv.dump(result_part, osp.join(tmpdir, f"part_{rank}.pkl"))
+    dist.barrier()
+    # collect all parts
+    if rank != 0:
+        return None
+    else:
+        # load results of all parts from tmp dir
+        part_list = []
+        for i in range(world_size):
+            part_file = osp.join(tmpdir, f"part_{i}.pkl")
+            part_list.append(mmcv.load(part_file))
+        # sort the results
+        ordered_results = []
+        """
+        bacause we change the sample of the evaluation stage to make sure that
+        each gpu will handle continuous sample,
+        """
+        # for res in zip(*part_list):
+        for res in part_list:
+            ordered_results.extend(list(res))
+        # the dataloader may pad some samples
+        ordered_results = ordered_results[:size]
+        # remove tmp dir
+        shutil.rmtree(tmpdir)
+        return ordered_results
+def collect_results_gpu(result_part, size):
+    collect_results_cpu(result_part, size)
--- a/projects/mmdet3d_plugin/apis/train.py
+++ b/projects/mmdet3d_plugin/apis/train.py
+# ---------------------------------------------
+# Copyright (c) OpenMMLab. All rights reserved.
+# ---------------------------------------------
+#  Modified by Zhiqi Li
+# ---------------------------------------------
+from .mmdet_train import custom_train_detector
+# from mmseg.apis import train_segmentor
+from mmdet.apis import train_detector
+def custom_train_model(
+    model,
+    dataset,
+    cfg,
+    distributed=False,
+    validate=False,
+    timestamp=None,
+    meta=None,
+):
+    """A function wrapper for launching model training according to cfg.
+    Because we need different eval_hook in runner. Should be deprecated in the
+    future.
+    """
+    if cfg.model.type in ["EncoderDecoder3D"]:
+        assert False
+    else:
+        custom_train_detector(
+            model,
+            dataset,
+            cfg,
+            distributed=distributed,
+            validate=validate,
+            timestamp=timestamp,
+            meta=meta,
+        )
+def train_model(
+    model,
+    dataset,
+    cfg,
+    distributed=False,
+    validate=False,
+    timestamp=None,
+    meta=None,
+):
+    """A function wrapper for launching model training according to cfg.
+    Because we need different eval_hook in runner. Should be deprecated in the
+    future.
+    """
+    train_detector(
+        model,
+        dataset,
+        cfg,
+        distributed=distributed,
+        validate=validate,
+        timestamp=timestamp,
+        meta=meta,
+    )
--- a/projects/mmdet3d_plugin/core/box3d.py
+++ b/projects/mmdet3d_plugin/core/box3d.py
+X, Y, Z, W, L, H, SIN_YAW, COS_YAW, VX, VY, VZ = list(range(11))  # undecoded
+CNS, YNS = 0, 1  # centerness and yawness indices in qulity
+YAW = 6  # decoded
--- a/projects/mmdet3d_plugin/core/evaluation/__init__.py
+++ b/projects/mmdet3d_plugin/core/evaluation/__init__.py
+from .eval_hooks import CustomDistEvalHook
\ No newline at end of file
--- a/projects/mmdet3d_plugin/core/evaluation/eval_hooks.py
+++ b/projects/mmdet3d_plugin/core/evaluation/eval_hooks.py
+# Note: Considering that MMCV's EvalHook updated its interface in V1.3.16,
+# in order to avoid strong version dependency, we did not directly
+# inherit EvalHook but BaseDistEvalHook.
+import bisect
+import os.path as osp
+import mmcv
+import torch.distributed as dist
+from mmcv.runner import DistEvalHook as BaseDistEvalHook
+from mmcv.runner import EvalHook as BaseEvalHook
+from torch.nn.modules.batchnorm import _BatchNorm
+from mmdet.core.evaluation.eval_hooks import DistEvalHook
+def _calc_dynamic_intervals(start_interval, dynamic_interval_list):
+    assert mmcv.is_list_of(dynamic_interval_list, tuple)
+    dynamic_milestones = [0]
+    dynamic_milestones.extend(
+        [dynamic_interval[0] for dynamic_interval in dynamic_interval_list]
+    )
+    dynamic_intervals = [start_interval]
+    dynamic_intervals.extend(
+        [dynamic_interval[1] for dynamic_interval in dynamic_interval_list]
+    )
+    return dynamic_milestones, dynamic_intervals
+class CustomDistEvalHook(BaseDistEvalHook):
+    def __init__(self, *args, dynamic_intervals=None, **kwargs):
+        super(CustomDistEvalHook, self).__init__(*args, **kwargs)
+        self.use_dynamic_intervals = dynamic_intervals is not None
+        if self.use_dynamic_intervals:
+            (
+                self.dynamic_milestones,
+                self.dynamic_intervals,
+            ) = _calc_dynamic_intervals(self.interval, dynamic_intervals)
+    def _decide_interval(self, runner):
+        if self.use_dynamic_intervals:
+            progress = runner.epoch if self.by_epoch else runner.iter
+            step = bisect.bisect(self.dynamic_milestones, (progress + 1))
+            # Dynamically modify the evaluation interval
+            self.interval = self.dynamic_intervals[step - 1]
+    def before_train_epoch(self, runner):
+        """Evaluate the model only at the start of training by epoch."""
+        self._decide_interval(runner)
+        super().before_train_epoch(runner)
+    def before_train_iter(self, runner):
+        self._decide_interval(runner)
+        super().before_train_iter(runner)
+    def _do_evaluate(self, runner):
+        """perform evaluation and save ckpt."""
+        # Synchronization of BatchNorm's buffer (running_mean
+        # and running_var) is not supported in the DDP of pytorch,
+        # which may cause the inconsistent performance of models in
+        # different ranks, so we broadcast BatchNorm's buffers
+        # of rank 0 to other ranks to avoid this.
+        if self.broadcast_bn_buffer:
+            model = runner.model
+            for name, module in model.named_modules():
+                if (
+                    isinstance(module, _BatchNorm)
+                    and module.track_running_stats
+                ):
+                    dist.broadcast(module.running_var, 0)
+                    dist.broadcast(module.running_mean, 0)
+        if not self._should_evaluate(runner):
+            return
+        tmpdir = self.tmpdir
+        if tmpdir is None:
+            tmpdir = osp.join(runner.work_dir, ".eval_hook")
+        from projects.mmdet3d_plugin.apis.test import (
+            custom_multi_gpu_test,
+        )  # to solve circlur  import
+        results = custom_multi_gpu_test(
+            runner.model,
+            self.dataloader,
+            tmpdir=tmpdir,
+            gpu_collect=self.gpu_collect,
+        )
+        if runner.rank == 0:
+            print("\n")
+            runner.log_buffer.output["eval_iter_num"] = len(self.dataloader)
+            key_score = self.evaluate(runner, results)
+            if self.save_best:
+                self._save_ckpt(runner, key_score)
--- a/projects/mmdet3d_plugin/datasets/__init__.py
+++ b/projects/mmdet3d_plugin/datasets/__init__.py
+from .nuscenes_3d_det_track_dataset import NuScenes3DDetTrackDataset
+from .builder import *
+from .pipelines import *
+from .samplers import *
+__all__ = [
+    'NuScenes3DDetTrackDataset',
+    "custom_build_dataset",
+]
--- a/projects/mmdet3d_plugin/datasets/builder.py
+++ b/projects/mmdet3d_plugin/datasets/builder.py
+import copy
+import platform
+import random
+from functools import partial
+import numpy as np
+from mmcv.parallel import collate
+from mmcv.runner import get_dist_info
+from mmcv.utils import Registry, build_from_cfg
+from torch.utils.data import DataLoader
+from mmdet.datasets.samplers import GroupSampler
+from projects.mmdet3d_plugin.datasets.samplers import (
+    GroupInBatchSampler,
+    DistributedGroupSampler,
+    DistributedSampler,
+    build_sampler
+)
+def build_dataloader(
+    dataset,
+    samples_per_gpu,
+    workers_per_gpu,
+    num_gpus=1,
+    dist=True,
+    shuffle=True,
+    seed=None,
+    shuffler_sampler=None,
+    nonshuffler_sampler=None,
+    runner_type="EpochBasedRunner",
+    **kwargs
+):
+    """Build PyTorch DataLoader.
+    In distributed training, each GPU/process has a dataloader.
+    In non-distributed training, there is only one dataloader for all GPUs.
+    Args:
+        dataset (Dataset): A PyTorch dataset.
+        samples_per_gpu (int): Number of training samples on each GPU, i.e.,
+            batch size of each GPU.
+        workers_per_gpu (int): How many subprocesses to use for data loading
+            for each GPU.
+        num_gpus (int): Number of GPUs. Only used in non-distributed training.
+        dist (bool): Distributed training/test or not. Default: True.
+        shuffle (bool): Whether to shuffle the data at every epoch.
+            Default: True.
+        kwargs: any keyword argument to be used to initialize DataLoader
+    Returns:
+        DataLoader: A PyTorch dataloader.
+    """
+    rank, world_size = get_dist_info()
+    batch_sampler = None
+    if runner_type == 'IterBasedRunner':
+        print("Use GroupInBatchSampler !!!")
+        batch_sampler = GroupInBatchSampler(
+            dataset,
+            samples_per_gpu,
+            world_size,
+            rank,
+            seed=seed,
+        )
+        batch_size = 1
+        sampler = None
+        num_workers = workers_per_gpu
+    elif dist:
+        # DistributedGroupSampler will definitely shuffle the data to satisfy
+        # that images on each GPU are in the same group
+        if shuffle:
+            print("Use DistributedGroupSampler !!!")
+            sampler = build_sampler(
+                shuffler_sampler
+                if shuffler_sampler is not None
+                else dict(type="DistributedGroupSampler"),
+                dict(
+                    dataset=dataset,
+                    samples_per_gpu=samples_per_gpu,
+                    num_replicas=world_size,
+                    rank=rank,
+                    seed=seed,
+                ),
+            )
+        else:
+            sampler = build_sampler(
+                nonshuffler_sampler
+                if nonshuffler_sampler is not None
+                else dict(type="DistributedSampler"),
+                dict(
+                    dataset=dataset,
+                    num_replicas=world_size,
+                    rank=rank,
+                    shuffle=shuffle,
+                    seed=seed,
+                ),
+            )
+        batch_size = samples_per_gpu
+        num_workers = workers_per_gpu
+    else:
+        # assert False, 'not support in bevformer'
+        print("WARNING!!!!, Only can be used for obtain inference speed!!!!")
+        sampler = GroupSampler(dataset, samples_per_gpu) if shuffle else None
+        batch_size = num_gpus * samples_per_gpu
+        num_workers = num_gpus * workers_per_gpu
+    init_fn = (
+        partial(worker_init_fn, num_workers=num_workers, rank=rank, seed=seed)
+        if seed is not None
+        else None
+    )
+    data_loader = DataLoader(
+        dataset,
+        batch_size=batch_size,
+        sampler=sampler,
+        batch_sampler=batch_sampler,
+        num_workers=num_workers,
+        collate_fn=partial(collate, samples_per_gpu=samples_per_gpu),
+        pin_memory=False,
+        worker_init_fn=init_fn,
+        **kwargs
+    )
+    return data_loader
+def worker_init_fn(worker_id, num_workers, rank, seed):
+    # The seed of each worker equals to
+    # num_worker * rank + worker_id + user_seed
+    worker_seed = num_workers * rank + worker_id + seed
+    np.random.seed(worker_seed)
+    random.seed(worker_seed)
+# Copyright (c) OpenMMLab. All rights reserved.
+import platform
+from mmcv.utils import Registry, build_from_cfg
+from mmdet.datasets import DATASETS
+from mmdet.datasets.builder import _concat_dataset
+if platform.system() != "Windows":
+    # https://github.com/pytorch/pytorch/issues/973
+    import resource
+    rlimit = resource.getrlimit(resource.RLIMIT_NOFILE)
+    base_soft_limit = rlimit[0]
+    hard_limit = rlimit[1]
+    soft_limit = min(max(4096, base_soft_limit), hard_limit)
+    resource.setrlimit(resource.RLIMIT_NOFILE, (soft_limit, hard_limit))
+OBJECTSAMPLERS = Registry("Object sampler")
+def custom_build_dataset(cfg, default_args=None):
+    try:
+        from mmdet3d.datasets.dataset_wrappers import CBGSDataset
+    except:
+        CBGSDataset = None
+    from mmdet.datasets.dataset_wrappers import (
+        ClassBalancedDataset,
+        ConcatDataset,
+        RepeatDataset,
+    )
+    if isinstance(cfg, (list, tuple)):
+        dataset = ConcatDataset(
+            [custom_build_dataset(c, default_args) for c in cfg]
+        )
+    elif cfg["type"] == "ConcatDataset":
+        dataset = ConcatDataset(
+            [custom_build_dataset(c, default_args) for c in cfg["datasets"]],
+            cfg.get("separate_eval", True),
+        )
+    elif cfg["type"] == "RepeatDataset":
+        dataset = RepeatDataset(
+            custom_build_dataset(cfg["dataset"], default_args), cfg["times"]
+        )
+    elif cfg["type"] == "ClassBalancedDataset":
+        dataset = ClassBalancedDataset(
+            custom_build_dataset(cfg["dataset"], default_args),
+            cfg["oversample_thr"],
+        )
+    elif cfg["type"] == "CBGSDataset":
+        dataset = CBGSDataset(
+            custom_build_dataset(cfg["dataset"], default_args)
+        )
+    elif isinstance(cfg.get("ann_file"), (list, tuple)):
+        dataset = _concat_dataset(cfg, default_args)
+    else:
+        dataset = build_from_cfg(cfg, DATASETS, default_args)
+    return dataset
--- a/projects/mmdet3d_plugin/datasets/nuscenes_3d_det_track_dataset.py
+++ b/projects/mmdet3d_plugin/datasets/nuscenes_3d_det_track_dataset.py
--- a/projects/mmdet3d_plugin/datasets/pipelines/__init__.py
+++ b/projects/mmdet3d_plugin/datasets/pipelines/__init__.py
+from .transform import (
+    InstanceNameFilter,
+    CircleObjectRangeFilter,
+    NormalizeMultiviewImage,
+    NuScenesSparse4DAdaptor,
+    MultiScaleDepthMapGenerator,
+)
+from .augment import (
+    ResizeCropFlipImage,
+    BBoxRotation,
+    PhotoMetricDistortionMultiViewImage,
+)
+from .loading import LoadMultiViewImageFromFiles, LoadPointsFromFile
+__all__ = [
+    "InstanceNameFilter",
+    "ResizeCropFlipImage",
+    "BBoxRotation",
+    "CircleObjectRangeFilter",
+    "MultiScaleDepthMapGenerator",
+    "NormalizeMultiviewImage",
+    "PhotoMetricDistortionMultiViewImage",
+    "NuScenesSparse4DAdaptor",
+    "LoadMultiViewImageFromFiles",
+    "LoadPointsFromFile",
+]
--- a/projects/mmdet3d_plugin/datasets/pipelines/augment.py
+++ b/projects/mmdet3d_plugin/datasets/pipelines/augment.py
+import torch
+import numpy as np
+from numpy import random
+import mmcv
+from mmdet.datasets.builder import PIPELINES
+from PIL import Image
+@PIPELINES.register_module()
+class ResizeCropFlipImage(object):
+    def __call__(self, results):
+        aug_config = results.get("aug_config")
+        if aug_config is None:
+            return results
+        imgs = results["img"]
+        N = len(imgs)
+        new_imgs = []
+        for i in range(N):
+            img, mat = self._img_transform(
+                np.uint8(imgs[i]), aug_config,
+            )
+            new_imgs.append(np.array(img).astype(np.float32))
+            results["lidar2img"][i] = mat @ results["lidar2img"][i]
+            if "cam_intrinsic" in results:
+                results["cam_intrinsic"][i][:3, :3] *= aug_config["resize"]
+                # results["cam_intrinsic"][i][:3, :3] = (
+                #     mat[:3, :3] @ results["cam_intrinsic"][i][:3, :3]
+                # )
+        results["img"] = new_imgs
+        results["img_shape"] = [x.shape[:2] for x in new_imgs]
+        return results
+    def _img_transform(self, img, aug_configs):
+        H, W = img.shape[:2]
+        resize = aug_configs.get("resize", 1)
+        resize_dims = (int(W * resize), int(H * resize))
+        crop = aug_configs.get("crop", [0, 0, *resize_dims])
+        flip = aug_configs.get("flip", False)
+        rotate = aug_configs.get("rotate", 0)
+        origin_dtype = img.dtype
+        if origin_dtype != np.uint8:
+            min_value = img.min()
+            max_vaule = img.max()
+            scale = 255 / (max_vaule - min_value)
+            img = (img - min_value) * scale
+            img = np.uint8(img)
+        img = Image.fromarray(img)
+        img = img.resize(resize_dims).crop(crop)
+        if flip:
+            img = img.transpose(method=Image.FLIP_LEFT_RIGHT)
+        img = img.rotate(rotate)
+        img = np.array(img).astype(np.float32)
+        if origin_dtype != np.uint8:
+            img = img.astype(np.float32)
+            img = img / scale + min_value
+        transform_matrix = np.eye(3)
+        transform_matrix[:2, :2] *= resize
+        transform_matrix[:2, 2] -= np.array(crop[:2])
+        if flip:
+            flip_matrix = np.array(
+                [[-1, 0, crop[2] - crop[0]], [0, 1, 0], [0, 0, 1]]
+            )
+            transform_matrix = flip_matrix @ transform_matrix
+        rotate = rotate / 180 * np.pi
+        rot_matrix = np.array(
+            [
+                [np.cos(rotate), np.sin(rotate), 0],
+                [-np.sin(rotate), np.cos(rotate), 0],
+                [0, 0, 1],
+            ]
+        )
+        rot_center = np.array([crop[2] - crop[0], crop[3] - crop[1]]) / 2
+        rot_matrix[:2, 2] = -rot_matrix[:2, :2] @ rot_center + rot_center
+        transform_matrix = rot_matrix @ transform_matrix
+        extend_matrix = np.eye(4)
+        extend_matrix[:3, :3] = transform_matrix
+        return img, extend_matrix
+@PIPELINES.register_module()
+class BBoxRotation(object):
+    def __call__(self, results):
+        angle = results["aug_config"]["rotate_3d"]
+        rot_cos = np.cos(angle)
+        rot_sin = np.sin(angle)
+        rot_mat = np.array(
+            [
+                [rot_cos, -rot_sin, 0, 0],
+                [rot_sin, rot_cos, 0, 0],
+                [0, 0, 1, 0],
+                [0, 0, 0, 1],
+            ]
+        )
+        rot_mat_inv = np.linalg.inv(rot_mat)
+        num_view = len(results["lidar2img"])
+        for view in range(num_view):
+            results["lidar2img"][view] = (
+                results["lidar2img"][view] @ rot_mat_inv
+            )
+        if "lidar2global" in results:
+            results["lidar2global"] = results["lidar2global"] @ rot_mat_inv
+        if "gt_bboxes_3d" in results:
+            results["gt_bboxes_3d"] = self.box_rotate(
+                results["gt_bboxes_3d"], angle
+            )
+        return results
+    @staticmethod
+    def box_rotate(bbox_3d, angle):
+        rot_cos = np.cos(angle)
+        rot_sin = np.sin(angle)
+        rot_mat_T = np.array(
+            [[rot_cos, rot_sin, 0], [-rot_sin, rot_cos, 0], [0, 0, 1]]
+        )
+        bbox_3d[:, :3] = bbox_3d[:, :3] @ rot_mat_T
+        bbox_3d[:, 6] += angle
+        if bbox_3d.shape[-1] > 7:
+            vel_dims = bbox_3d[:, 7:].shape[-1]
+            bbox_3d[:, 7:] = bbox_3d[:, 7:] @ rot_mat_T[:vel_dims, :vel_dims]
+        return bbox_3d
+@PIPELINES.register_module()
+class PhotoMetricDistortionMultiViewImage:
+    """Apply photometric distortion to image sequentially, every transformation
+    is applied with a probability of 0.5. The position of random contrast is in
+    second or second to last.
+    1. random brightness
+    2. random contrast (mode 0)
+    3. convert color from BGR to HSV
+    4. random saturation
+    5. random hue
+    6. convert color from HSV to BGR
+    7. random contrast (mode 1)
+    8. randomly swap channels
+    Args:
+        brightness_delta (int): delta of brightness.
+        contrast_range (tuple): range of contrast.
+        saturation_range (tuple): range of saturation.
+        hue_delta (int): delta of hue.
+    """
+    def __init__(
+        self,
+        brightness_delta=32,
+        contrast_range=(0.5, 1.5),
+        saturation_range=(0.5, 1.5),
+        hue_delta=18,
+    ):
+        self.brightness_delta = brightness_delta
+        self.contrast_lower, self.contrast_upper = contrast_range
+        self.saturation_lower, self.saturation_upper = saturation_range
+        self.hue_delta = hue_delta
+    def __call__(self, results):
+        """Call function to perform photometric distortion on images.
+        Args:
+            results (dict): Result dict from loading pipeline.
+        Returns:
+            dict: Result dict with images distorted.
+        """
+        imgs = results["img"]
+        new_imgs = []
+        for img in imgs:
+            assert img.dtype == np.float32, (
+                "PhotoMetricDistortion needs the input image of dtype np.float32,"
+                ' please set "to_float32=True" in "LoadImageFromFile" pipeline'
+            )
+            # random brightness
+            if random.randint(2):
+                delta = random.uniform(
+                    -self.brightness_delta, self.brightness_delta
+                )
+                img += delta
+            # mode == 0 --> do random contrast first
+            # mode == 1 --> do random contrast last
+            mode = random.randint(2)
+            if mode == 1:
+                if random.randint(2):
+                    alpha = random.uniform(
+                        self.contrast_lower, self.contrast_upper
+                    )
+                    img *= alpha
+            # convert color from BGR to HSV
+            img = mmcv.bgr2hsv(img)
+            # random saturation
+            if random.randint(2):
+                img[..., 1] *= random.uniform(
+                    self.saturation_lower, self.saturation_upper
+                )
+            # random hue
+            if random.randint(2):
+                img[..., 0] += random.uniform(-self.hue_delta, self.hue_delta)
+                img[..., 0][img[..., 0] > 360] -= 360
+                img[..., 0][img[..., 0] < 0] += 360
+            # convert color from HSV to BGR
+            img = mmcv.hsv2bgr(img)
+            # random contrast
+            if mode == 0:
+                if random.randint(2):
+                    alpha = random.uniform(
+                        self.contrast_lower, self.contrast_upper
+                    )
+                    img *= alpha
+            # randomly swap channels
+            if random.randint(2):
+                img = img[..., random.permutation(3)]
+            new_imgs.append(img)
+        results["img"] = new_imgs
+        return results
+    def __repr__(self):
+        repr_str = self.__class__.__name__
+        repr_str += f"(\nbrightness_delta={self.brightness_delta},\n"
+        repr_str += "contrast_range="
+        repr_str += f"{(self.contrast_lower, self.contrast_upper)},\n"
+        repr_str += "saturation_range="
+        repr_str += f"{(self.saturation_lower, self.saturation_upper)},\n"
+        repr_str += f"hue_delta={self.hue_delta})"
+        return repr_str