add code

d2b71343 · 雍大凯 · 69e57885 · d2b71343 · d2b71343 · d2b71343
Commit d2b71343 authored Apr 08, 2026 by 雍大凯
20 changed files
--- a/docker-hub/FlashOCC/Flashocc/projects/configs/panoptic-flashocc/panoptic-flashocc-r50-depth.py
+++ b/docker-hub/FlashOCC/Flashocc/projects/configs/panoptic-flashocc/panoptic-flashocc-r50-depth.py
+_base_ = ['../../../mmdetection3d/configs/_base_/datasets/nus-3d.py',
+          '../../../mmdetection3d/configs/_base_/default_runtime.py']
+
+plugin = True
+plugin_dir = 'projects/mmdet3d_plugin/'
+point_cloud_range = [-40.0, -40.0, -5.0, 40.0, 40.0, 3.0]
+# For nuScenes we usually do 10-class detection
+class_names = [
+    'car', 'truck', 'construction_vehicle', 'bus', 'trailer', 'barrier',
+    'motorcycle', 'bicycle', 'pedestrian', 'traffic_cone'
+]
+
+data_config = {
+    'cams': [
+        'CAM_FRONT_LEFT', 'CAM_FRONT', 'CAM_FRONT_RIGHT', 'CAM_BACK_LEFT',
+        'CAM_BACK', 'CAM_BACK_RIGHT'
+    ],
+    'Ncams':
+    6,
+    'input_size': (256, 704),
+    'src_size': (900, 1600),
+
+    # Augmentation
+    'resize': (-0.06, 0.11),
+    'rot': (-5.4, 5.4),
+    'flip': True,
+    'crop_h': (0.0, 0.0),
+    'resize_test': 0.00,
+}
+
+grid_config = {
+    'x': [-40, 40, 0.4],
+    'y': [-40, 40, 0.4],
+    'z': [-1, 5.4, 6.4],
+    'depth': [1.0, 45.0, 0.5],
+}
+
+voxel_size = [0.1, 0.1, 0.2]
+
+numC_Trans = 80
+
+model = dict(
+    type='BEVDepthPano',     # single-frame
+    img_backbone=dict(
+        type='ResNet',
+        depth=50,
+        num_stages=4,
+        out_indices=(2, 3),
+        frozen_stages=-1,
+        norm_cfg=dict(type='BN', requires_grad=True),
+        norm_eval=False,
+        with_cp=True,
+        style='pytorch',
+        pretrained='torchvision://resnet50',
+    ),
+    img_neck=dict(
+        type='CustomFPN',
+        in_channels=[1024, 2048],
+        out_channels=256,
+        num_outs=1,
+        start_level=0,
+        out_ids=[0]),
+    img_view_transformer=dict(
+        type='LSSViewTransformerBEVDepth',
+        grid_config=grid_config,
+        input_size=data_config['input_size'],
+        in_channels=256,
+        out_channels=numC_Trans,
+        loss_depth_weight=1,
+        depthnet_cfg=dict(use_dcn=False, aspp_mid_channels=96),
+        downsample=16),
+    img_bev_encoder_backbone=dict(
+        type='CustomResNet',
+        numC_input=numC_Trans,
+        num_channels=[numC_Trans * 2, numC_Trans * 4, numC_Trans * 8]),
+    img_bev_encoder_neck=dict(
+        type='FPN_LSS',
+        in_channels=numC_Trans * 8 + numC_Trans * 2,
+        out_channels=256),
+    aux_centerness_head=dict(
+        type='Centerness_Head',
+        task_specific_weight=[1, 1, 0, 0, 0],
+        in_channels=256,
+        tasks=[
+            dict(num_class=10, class_names=['car', 'truck',
+                                            'construction_vehicle',
+                                            'bus', 'trailer',
+                                            'barrier',
+                                            'motorcycle', 'bicycle',
+                                            'pedestrian', 'traffic_cone']),
+        ],
+        common_heads=dict(
+            reg=(2, 2), height=(1, 2), dim=(3, 2), rot=(2, 2), vel=(2, 2)),
+        share_conv_channel=64,
+        bbox_coder=dict(
+            type='CenterPointBBoxCoder',
+            pc_range=point_cloud_range[:2],
+            post_center_range=[-61.2, -61.2, -10.0, 61.2, 61.2, 10.0],
+            max_num=500,
+            score_threshold=0.3, # 
+            out_size_factor=4,
+            voxel_size=voxel_size[:2],
+            code_size=9),
+        separate_head=dict(
+            type='SeparateHead', init_bias=-2.19, final_kernel=3),
+        loss_cls=dict(type='GaussianFocalLoss', reduction='mean'),
+        loss_bbox=dict(type='L1Loss', reduction='mean', loss_weight=0.25),
+        norm_bbox=True),
+    occ_head=dict(
+        type='BEVOCCHead2D_V2',
+        in_dim=256,
+        out_dim=256,
+        Dz=16,
+        use_mask=False,
+        num_classes=18,
+        use_predicter=True,
+        class_balance=True,
+        loss_occ=dict(
+            type='CustomFocalLoss',
+            use_sigmoid=True,
+            loss_weight=1.0
+        ),
+    ),
+    # model training and testing settings
+    train_cfg=dict(
+        pts=dict(
+            point_cloud_range=point_cloud_range,
+            grid_size=[800, 800, 40],
+            voxel_size=voxel_size,
+            out_size_factor=4,
+            dense_reg=1,
+            gaussian_overlap=0.1,
+            max_objs=500,
+            min_radius=2,
+            code_weights=[1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 0.2, 0.2])),
+    test_cfg=dict(
+        pts=dict(
+            max_per_img=500,
+            max_pool_nms=False,
+            min_radius=[4, 12, 10, 1, 0.85, 0.175],
+            score_threshold=0.1,
+            out_size_factor=4,
+            voxel_size=voxel_size[:2],
+            pre_max_size=1000,
+            post_max_size=500,
+
+            # Scale-NMS
+            nms_type=['rotate'],
+            nms_thr=[0.2],
+            nms_rescale_factor=[[1.0, 0.7, 0.7, 0.4, 0.55,
+                                 1.1, 1.0, 1.0, 1.5, 3.5]]
+        )
+    ),
+)
+
+# Data
+dataset_type = 'NuScenesDatasetOccpancy'
+data_root = 'data/nuscenes/'
+file_client_args = dict(backend='disk')
+
+bda_aug_conf = dict(
+    rot_lim=(-0., 0.),
+    scale_lim=(1., 1.),
+    flip_dx_ratio=0.5,
+    flip_dy_ratio=0.5
+)
+
+train_pipeline = [
+    dict(
+        type='PrepareImageInputs',
+        is_train=True,
+        data_config=data_config,
+        sequential=False),
+    dict(
+        type='LoadAnnotationsBEVDepth',
+        bda_aug_conf=bda_aug_conf,
+        classes=class_names,
+        is_train=True),
+    dict(type='LoadOccGTFromFile'),
+    dict(
+        type='LoadPointsFromFile',
+        coord_type='LIDAR',
+        load_dim=5,
+        use_dim=5,
+        file_client_args=file_client_args),
+    dict(type='PointToMultiViewDepth', downsample=1, grid_config=grid_config),
+    dict(type='DefaultFormatBundle3D', class_names=class_names),
+    dict(
+        type='Collect3D', keys=['img_inputs', 'gt_depth', 'voxel_semantics',
+                                'mask_lidar', 'mask_camera'])
+]
+
+test_pipeline = [
+    dict(type='PrepareImageInputs', data_config=data_config, sequential=False),
+    dict(
+        type='LoadAnnotationsBEVDepth',
+        bda_aug_conf=bda_aug_conf,
+        classes=class_names,
+        is_train=False),
+    dict(
+        type='LoadPointsFromFile',
+        coord_type='LIDAR',
+        load_dim=5,
+        use_dim=5,
+        file_client_args=file_client_args),
+    dict(
+        type='MultiScaleFlipAug3D',
+        img_scale=(1333, 800),
+        pts_scale_ratio=1,
+        flip=False,
+        transforms=[
+            dict(
+                type='DefaultFormatBundle3D',
+                class_names=class_names,
+                with_label=False),
+            dict(type='Collect3D', keys=['points', 'img_inputs'])
+        ])
+]
+
+
+input_modality = dict(
+    use_lidar=False,
+    use_camera=True,
+    use_radar=False,
+    use_map=False,
+    use_external=False)
+
+share_data_config = dict(
+    type=dataset_type,
+    data_root=data_root,
+    classes=class_names,
+    modality=input_modality,
+    stereo=False,
+    filter_empty_gt=False,
+    img_info_prototype='bevdet',
+)
+
+test_data_config = dict(
+    pipeline=test_pipeline,
+    ann_file=data_root + 'bevdetv2-nuscenes_infos_val.pkl')
+
+data = dict(
+    samples_per_gpu=4,
+    workers_per_gpu=4,
+    train=dict(
+        data_root=data_root,
+        ann_file=data_root + 'bevdetv2-nuscenes_infos_train.pkl',
+        pipeline=train_pipeline,
+        classes=class_names,
+        test_mode=False,
+        use_valid_flag=True,
+        # we use box_type_3d='LiDAR' in kitti and nuscenes dataset
+        # and box_type_3d='Depth' in sunrgbd and scannet dataset.
+        box_type_3d='LiDAR'),
+    val=test_data_config,
+    test=test_data_config)
+
+for key in ['val', 'train', 'test']:
+    data[key].update(share_data_config)
+
+# Optimizer
+optimizer = dict(type='AdamW', lr=1e-4, weight_decay=1e-2)
+optimizer_config = dict(grad_clip=dict(max_norm=5, norm_type=2))
+lr_config = dict(
+    policy='step',
+    warmup='linear',
+    warmup_iters=200,
+    warmup_ratio=0.001,
+    step=[24, ])
+runner = dict(type='EpochBasedRunner', max_epochs=24)
+
+custom_hooks = [
+    dict(
+        type='MEGVIIEMAHook',
+        init_updates=10560,
+        priority='NORMAL',
+    ),
+]
+
+load_from = "ckpts/bevdet-r50-4d-depth-cbgs.pth"
+# fp16 = dict(loss_scale='dynamic')
+evaluation = dict(interval=1, start=20, pipeline=test_pipeline)
+checkpoint_config = dict(interval=1, max_keep_ckpts=5)
+
+
+# use_mask = False
+# +----------------------+----------+----------+----------+
+# |     Class Names      | RayIoU@1 | RayIoU@2 | RayIoU@4 |
+# +----------------------+----------+----------+----------+
+# |        others        |  0.090   |  0.102   |  0.105   |
+# |       barrier        |  0.387   |  0.442   |  0.465   |
+# |       bicycle        |  0.218   |  0.257   |  0.265   |
+# |         bus          |  0.514   |  0.613   |  0.669   |
+# |         car          |  0.487   |  0.564   |  0.592   |
+# | construction_vehicle |  0.176   |  0.254   |  0.288   |
+# |      motorcycle      |  0.203   |  0.292   |  0.310   |
+# |      pedestrian      |  0.301   |  0.349   |  0.366   |
+# |     traffic_cone     |  0.280   |  0.313   |  0.321   |
+# |       trailer        |  0.227   |  0.313   |  0.390   |
+# |        truck         |  0.395   |  0.493   |  0.537   |
+# |  driveable_surface   |  0.534   |  0.618   |  0.708   |
+# |      other_flat      |  0.289   |  0.326   |  0.356   |
+# |       sidewalk       |  0.234   |  0.280   |  0.329   |
+# |       terrain        |  0.222   |  0.291   |  0.356   |                                                                                                                                                                                                                                                        
+# |       manmade        |  0.280   |  0.351   |  0.401   |                                                                                                                                                                                                                                                        
+# |      vegetation      |  0.176   |  0.273   |  0.359   |
+# +----------------------+----------+----------+----------+
+# |         MEAN         |  0.295   |  0.361   |  0.401   |
+# +----------------------+----------+----------+----------+
+
+
+# +----------------------+---------+---------+---------+
+# |     Class Names      | RayPQ@1 | RayPQ@2 | RayPQ@4 |
+# +----------------------+---------+---------+---------+
+# |        others        |  0.017  |  0.025  |  0.026  |
+# |       barrier        |  0.125  |  0.182  |  0.218  |
+# |       bicycle        |  0.051  |  0.072  |  0.076  |
+# |         bus          |  0.275  |  0.366  |  0.422  |
+# |         car          |  0.242  |  0.332  |  0.356  |
+# | construction_vehicle |  0.016  |  0.058  |  0.092  |
+# |      motorcycle      |  0.071  |  0.124  |  0.137  |
+# |      pedestrian      |  0.017  |  0.022  |  0.023  |
+# |     traffic_cone     |  0.032  |  0.040  |  0.044  |
+# |       trailer        |  0.035  |  0.055  |  0.063  |
+# |        truck         |  0.145  |  0.232  |  0.282  |
+# |  driveable_surface   |  0.410  |  0.537  |  0.665  |
+# |      other_flat      |  0.062  |  0.087  |  0.109  |
+# |       sidewalk       |  0.008  |  0.030  |  0.064  |
+# |       terrain        |  0.010  |  0.026  |  0.047  |
+# |       manmade        |  0.054  |  0.091  |  0.134  |
+# |      vegetation      |  0.003  |  0.022  |  0.092  |
+# +----------------------+---------+---------+---------+
+# |         MEAN         |  0.092  |  0.135  |  0.168  |
+# +----------------------+---------+---------+---------+
+# {'RayIoU': 0.35223182059688496, 'RayIoU@1': 0.29499743138394385, 'RayIoU@2': 0.3607063492639709, 'RayIoU@4': 0.4009916811427401, 'RayPQ': 0.13182524545677765, 'RayPQ@1': 0.09247682620339576, 'RayPQ@2': 0.1354024129684159, 'RayPQ@4': 0.16759649719852124}
+
--- a/docker-hub/FlashOCC/Flashocc/projects/configs/panoptic-flashocc/panoptic-flashocc-r50-depth4d-longterm16f-pano.py
+++ b/docker-hub/FlashOCC/Flashocc/projects/configs/panoptic-flashocc/panoptic-flashocc-r50-depth4d-longterm16f-pano.py
+_base_ = ['../../../mmdetection3d/configs/_base_/datasets/nus-3d.py',
+          '../../../mmdetection3d/configs/_base_/default_runtime.py']
+
+plugin = True
+plugin_dir = 'projects/mmdet3d_plugin/'
+# point_cloud_range = [-51.2, -51.2, -5.0, 51.2, 51.2, 3.0]
+point_cloud_range = [-40.0, -40.0, -5.0, 40.0, 40.0, 3.0]
+# For nuScenes we usually do 10-class detection
+class_names = [
+    'car', 'truck', 'construction_vehicle', 'bus', 'trailer', 'barrier',
+    'motorcycle', 'bicycle', 'pedestrian', 'traffic_cone'
+]
+
+data_config = {
+    'cams': [
+        'CAM_FRONT_LEFT', 'CAM_FRONT', 'CAM_FRONT_RIGHT', 'CAM_BACK_LEFT',
+        'CAM_BACK', 'CAM_BACK_RIGHT'
+    ],
+    'Ncams':
+    6,
+    'input_size': (256, 704),
+    'src_size': (900, 1600),
+
+    # Augmentation
+    'resize': (-0.06, 0.11),
+    'rot': (-5.4, 5.4),
+    'flip': True,
+    'crop_h': (0.0, 0.0),
+    'resize_test': 0.00,
+}
+
+grid_config = {
+    'x': [-40, 40, 0.4],
+    'y': [-40, 40, 0.4],
+    'z': [-1, 5.4, 6.4],
+    'depth': [1.0, 45.0, 0.5],
+}
+
+voxel_size = [0.1, 0.1, 0.2]
+numC_Trans = 80
+multi_adj_frame_id_cfg = (1, 16+1, 1)
+
+model = dict(
+    type='BEVDepth4DPano',
+    num_adj=multi_adj_frame_id_cfg[1]-1,
+    img_backbone=dict(
+        type='ResNet',
+        depth=50,
+        num_stages=4,
+        out_indices=(2, 3),
+        frozen_stages=-1,
+        norm_cfg=dict(type='BN', requires_grad=True),
+        norm_eval=False,
+        with_cp=True,
+        style='pytorch',
+        pretrained='torchvision://resnet50',
+    ),
+    img_neck=dict(
+        type='CustomFPN',
+        in_channels=[1024, 2048],
+        out_channels=512,
+        num_outs=1,
+        start_level=0,
+        out_ids=[0]),
+    img_view_transformer=dict(
+        type='LSSViewTransformerBEVDepth',
+        grid_config=grid_config,
+        input_size=data_config['input_size'],
+        in_channels=512,
+        out_channels=numC_Trans,
+        loss_depth_weight=1,
+        depthnet_cfg=dict(use_dcn=False, aspp_mid_channels=96),
+        downsample=16),
+    pre_process=dict(
+        type='CustomResNet',
+        numC_input=numC_Trans,
+        num_layer=[1, ],
+        num_channels=[numC_Trans, ],
+        stride=[1, ],
+        backbone_output_ids=[0, ]),
+    img_bev_encoder_backbone=dict(
+        type='CustomResNet',
+        numC_input=numC_Trans * (len(range(*multi_adj_frame_id_cfg))+1),
+        num_channels=[numC_Trans * 2, numC_Trans * 4, numC_Trans * 8]),
+    img_bev_encoder_neck=dict(
+        type='FPN_LSS',
+        in_channels=numC_Trans * 8 + numC_Trans * 2,
+        out_channels=256),
+    aux_centerness_head=dict(
+        type='Centerness_Head',
+        task_specific_weight=[1, 1, 0, 0, 0],
+        in_channels=256,
+        tasks=[
+            dict(num_class=10, class_names=['car', 'truck',
+                                            'construction_vehicle',
+                                            'bus', 'trailer',
+                                            'barrier',
+                                            'motorcycle', 'bicycle',
+                                            'pedestrian', 'traffic_cone']),
+        ],
+        common_heads=dict(
+            reg=(2, 2), height=(1, 2), dim=(3, 2), rot=(2, 2), vel=(2, 2)),
+        share_conv_channel=64,
+        bbox_coder=dict(
+            type='CenterPointBBoxCoder',
+            pc_range=point_cloud_range[:2],
+            post_center_range=[-61.2, -61.2, -10.0, 61.2, 61.2, 10.0],
+            max_num=500,
+            score_threshold=0.1, # 
+            out_size_factor=4,
+            voxel_size=voxel_size[:2],
+            code_size=9),
+        separate_head=dict(
+            type='SeparateHead', init_bias=-2.19, final_kernel=3),
+        loss_cls=dict(type='GaussianFocalLoss', reduction='mean'),
+        loss_bbox=dict(type='L1Loss', reduction='mean', loss_weight=0.25),
+        norm_bbox=True),
+    occ_head=dict(
+        type='BEVOCCHead2D_V2',
+        in_dim=256,
+        out_dim=256,
+        Dz=16,
+        use_mask=False,
+        num_classes=18,
+        use_predicter=True,
+        class_balance=True,
+        loss_occ=dict(
+            type='CustomFocalLoss',
+            use_sigmoid=True,
+            loss_weight=1.0
+        ),
+    ),
+    # model training and testing settings
+    train_cfg=dict(
+        pts=dict(
+            point_cloud_range=point_cloud_range,
+            grid_size=[800, 800, 40],
+            voxel_size=voxel_size,
+            out_size_factor=4,
+            dense_reg=1,
+            gaussian_overlap=0.1,
+            max_objs=500,
+            min_radius=2,
+            code_weights=[1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 0.2, 0.2])),
+    test_cfg=dict(
+        pts=dict(
+            max_per_img=500,
+            max_pool_nms=False,
+            min_radius=[4, 12, 10, 1, 0.85, 0.175],
+            score_threshold=0.1,
+            out_size_factor=4,
+            voxel_size=voxel_size[:2],
+            pre_max_size=1000,
+            post_max_size=500,
+
+            # Scale-NMS
+            nms_type=['rotate'],
+            nms_thr=[0.2],
+            nms_rescale_factor=[[1.0, 0.7, 0.7, 0.4, 0.55,
+                                 1.1, 1.0, 1.0, 1.5, 3.5]]
+        )
+    ),
+)
+
+# Data
+dataset_type = 'NuScenesDatasetOccpancy'
+data_root = 'data/nuscenes/'
+file_client_args = dict(backend='disk')
+
+bda_aug_conf = dict(
+    rot_lim=(-0., 0.),
+    scale_lim=(1., 1.),
+    flip_dx_ratio=0.5,
+    flip_dy_ratio=0.5
+)
+
+train_pipeline = [
+    dict(
+        type='PrepareImageInputs',
+        is_train=True,
+        data_config=data_config,
+        sequential=True),
+    dict(
+        type='LoadAnnotationsBEVDepth',
+        bda_aug_conf=bda_aug_conf,
+        classes=class_names,
+        is_train=True),
+    dict(type='LoadOccGTFromFile'),
+    dict(
+        type='LoadPointsFromFile',
+        coord_type='LIDAR',
+        load_dim=5,
+        use_dim=5,
+        file_client_args=file_client_args),
+    dict(type='PointToMultiViewDepth', downsample=1, grid_config=grid_config),
+    dict(type='DefaultFormatBundle3D', class_names=class_names),
+    dict(
+        type='Collect3D', keys=['img_inputs', 'gt_depth', 'voxel_semantics',
+                                'mask_lidar', 'mask_camera', 'gt_bboxes_3d', 'gt_labels_3d'])
+]
+
+test_pipeline = [
+    dict(type='PrepareImageInputs', data_config=data_config, sequential=True),
+    dict(
+        type='LoadAnnotationsBEVDepth',
+        bda_aug_conf=bda_aug_conf,
+        classes=class_names,
+        is_train=False),
+    dict(
+        type='LoadPointsFromFile',
+        coord_type='LIDAR',
+        load_dim=5,
+        use_dim=5,
+        file_client_args=file_client_args),
+    dict(
+        type='MultiScaleFlipAug3D',
+        img_scale=(1333, 800),
+        pts_scale_ratio=1,
+        flip=False,
+        transforms=[
+            dict(
+                type='DefaultFormatBundle3D',
+                class_names=class_names,
+                with_label=False),
+            dict(type='Collect3D', keys=['points', 'img_inputs', 'gt_bboxes_3d', 'gt_labels_3d'])
+        ])
+]
+
+
+input_modality = dict(
+    use_lidar=False,
+    use_camera=True,
+    use_radar=False,
+    use_map=False,
+    use_external=False)
+
+share_data_config = dict(
+    type=dataset_type,
+    data_root=data_root,
+    classes=class_names,
+    modality=input_modality,
+    stereo=False,
+    filter_empty_gt=False,
+    img_info_prototype='bevdet4d',
+    multi_adj_frame_id_cfg=multi_adj_frame_id_cfg,
+)
+
+test_data_config = dict(
+    pipeline=test_pipeline,
+    ann_file=data_root + 'bevdetv2-nuscenes_infos_val.pkl')
+
+data = dict(
+    samples_per_gpu=4,
+    workers_per_gpu=4,
+    train=dict(
+        data_root=data_root,
+        ann_file=data_root + 'bevdetv2-nuscenes_infos_train.pkl',
+        pipeline=train_pipeline,
+        classes=class_names,
+        test_mode=False,
+        use_valid_flag=True,
+        # we use box_type_3d='LiDAR' in kitti and nuscenes dataset
+        # and box_type_3d='Depth' in sunrgbd and scannet dataset.
+        box_type_3d='LiDAR'),
+    val=test_data_config,
+    test=test_data_config)
+
+for key in ['val', 'train', 'test']:
+    data[key].update(share_data_config)
+
+# Optimizer
+optimizer = dict(type='AdamW', lr=1e-4, weight_decay=1e-2)
+optimizer_config = dict(grad_clip=dict(max_norm=5, norm_type=2))
+lr_config = dict(
+    policy='step',
+    warmup='linear',
+    warmup_iters=200,
+    warmup_ratio=0.001,
+    step=[24, ])
+runner = dict(type='EpochBasedRunner', max_epochs=24)
+
+custom_hooks = [
+    dict(
+        type='MEGVIIEMAHook',
+        init_updates=10560,
+        priority='NORMAL',
+    ),
+]
+
+load_from = "ckpts/bevdet-r50-4d-depth-cbgs.pth"
+# fp16 = dict(loss_scale='dynamic')
+evaluation = dict(interval=1, start=20, pipeline=test_pipeline)
+checkpoint_config = dict(interval=1, max_keep_ckpts=5)
+
--- a/docker-hub/FlashOCC/Flashocc/projects/configs/panoptic-flashocc/panoptic-flashocc-r50-depth4d-longterm16f.py
+++ b/docker-hub/FlashOCC/Flashocc/projects/configs/panoptic-flashocc/panoptic-flashocc-r50-depth4d-longterm16f.py
+_base_ = ['../../../mmdetection3d/configs/_base_/datasets/nus-3d.py',
+          '../../../mmdetection3d/configs/_base_/default_runtime.py']
+
+plugin = True
+plugin_dir = 'projects/mmdet3d_plugin/'
+point_cloud_range = [-51.2, -51.2, -5.0, 51.2, 51.2, 3.0]
+# For nuScenes we usually do 10-class detection
+class_names = [
+    'car', 'truck', 'construction_vehicle', 'bus', 'trailer', 'barrier',
+    'motorcycle', 'bicycle', 'pedestrian', 'traffic_cone'
+]
+
+data_config = {
+    'cams': [
+        'CAM_FRONT_LEFT', 'CAM_FRONT', 'CAM_FRONT_RIGHT', 'CAM_BACK_LEFT',
+        'CAM_BACK', 'CAM_BACK_RIGHT'
+    ],
+    'Ncams':
+    6,
+    'input_size': (256, 704),
+    'src_size': (900, 1600),
+
+    # Augmentation
+    'resize': (-0.06, 0.11),
+    'rot': (-5.4, 5.4),
+    'flip': True,
+    'crop_h': (0.0, 0.0),
+    'resize_test': 0.00,
+}
+
+grid_config = {
+    'x': [-40, 40, 0.4],
+    'y': [-40, 40, 0.4],
+    'z': [-1, 5.4, 6.4],
+    'depth': [1.0, 45.0, 0.5],
+}
+
+voxel_size = [0.1, 0.1, 0.2]
+numC_Trans = 80
+multi_adj_frame_id_cfg = (1, 16+1, 1)
+
+model = dict(
+    type='BEVDepth4DOCC',
+    num_adj=multi_adj_frame_id_cfg[1]-1,
+    img_backbone=dict(
+        type='ResNet',
+        depth=50,
+        num_stages=4,
+        out_indices=(2, 3),
+        frozen_stages=-1,
+        norm_cfg=dict(type='BN', requires_grad=True),
+        norm_eval=False,
+        with_cp=True,
+        style='pytorch',
+        pretrained='torchvision://resnet50',
+    ),
+    img_neck=dict(
+        type='CustomFPN',
+        in_channels=[1024, 2048],
+        out_channels=512,
+        num_outs=1,
+        start_level=0,
+        out_ids=[0]),
+    img_view_transformer=dict(
+        type='LSSViewTransformerBEVDepth',
+        grid_config=grid_config,
+        input_size=data_config['input_size'],
+        in_channels=512,
+        out_channels=numC_Trans,
+        loss_depth_weight=1,
+        depthnet_cfg=dict(use_dcn=False, aspp_mid_channels=96),
+        downsample=16),
+    pre_process=dict(
+        type='CustomResNet',
+        numC_input=numC_Trans,
+        num_layer=[1, ],
+        num_channels=[numC_Trans, ],
+        stride=[1, ],
+        backbone_output_ids=[0, ]),
+    img_bev_encoder_backbone=dict(
+        type='CustomResNet',
+        numC_input=numC_Trans * (len(range(*multi_adj_frame_id_cfg))+1),
+        num_channels=[numC_Trans * 2, numC_Trans * 4, numC_Trans * 8]),
+    img_bev_encoder_neck=dict(
+        type='FPN_LSS',
+        in_channels=numC_Trans * 8 + numC_Trans * 2,
+        out_channels=256),
+    occ_head=dict(
+        type='BEVOCCHead2D_V2',
+        in_dim=256,
+        out_dim=256,
+        Dz=16,
+        use_mask=False,
+        num_classes=18,
+        use_predicter=True,
+        class_balance=True,
+        loss_occ=dict(
+            type='CustomFocalLoss',
+            use_sigmoid=True,
+            loss_weight=1.0
+        ),
+    )
+)
+
+# Data
+dataset_type = 'NuScenesDatasetOccpancy'
+data_root = 'data/nuscenes/'
+file_client_args = dict(backend='disk')
+
+bda_aug_conf = dict(
+    rot_lim=(-0., 0.),
+    scale_lim=(1., 1.),
+    flip_dx_ratio=0.5,
+    flip_dy_ratio=0.5
+)
+
+train_pipeline = [
+    dict(
+        type='PrepareImageInputs',
+        is_train=True,
+        data_config=data_config,
+        sequential=True),
+    dict(
+        type='LoadAnnotationsBEVDepth',
+        bda_aug_conf=bda_aug_conf,
+        classes=class_names,
+        is_train=True),
+    dict(type='LoadOccGTFromFile'),
+    dict(
+        type='LoadPointsFromFile',
+        coord_type='LIDAR',
+        load_dim=5,
+        use_dim=5,
+        file_client_args=file_client_args),
+    dict(type='PointToMultiViewDepth', downsample=1, grid_config=grid_config),
+    dict(type='DefaultFormatBundle3D', class_names=class_names),
+    dict(
+        type='Collect3D', keys=['img_inputs', 'gt_depth', 'voxel_semantics',
+                                'mask_lidar', 'mask_camera'])
+]
+
+test_pipeline = [
+    dict(type='PrepareImageInputs', data_config=data_config, sequential=True),
+    dict(
+        type='LoadAnnotationsBEVDepth',
+        bda_aug_conf=bda_aug_conf,
+        classes=class_names,
+        is_train=False),
+    dict(
+        type='LoadPointsFromFile',
+        coord_type='LIDAR',
+        load_dim=5,
+        use_dim=5,
+        file_client_args=file_client_args),
+    dict(
+        type='MultiScaleFlipAug3D',
+        img_scale=(1333, 800),
+        pts_scale_ratio=1,
+        flip=False,
+        transforms=[
+            dict(
+                type='DefaultFormatBundle3D',
+                class_names=class_names,
+                with_label=False),
+            dict(type='Collect3D', keys=['points', 'img_inputs'])
+        ])
+]
+
+
+input_modality = dict(
+    use_lidar=False,
+    use_camera=True,
+    use_radar=False,
+    use_map=False,
+    use_external=False)
+
+share_data_config = dict(
+    type=dataset_type,
+    data_root=data_root,
+    classes=class_names,
+    modality=input_modality,
+    stereo=False,
+    filter_empty_gt=False,
+    img_info_prototype='bevdet4d',
+    multi_adj_frame_id_cfg=multi_adj_frame_id_cfg,
+)
+
+test_data_config = dict(
+    pipeline=test_pipeline,
+    ann_file=data_root + 'bevdetv2-nuscenes_infos_val.pkl')
+
+data = dict(
+    samples_per_gpu=4,
+    workers_per_gpu=4,
+    train=dict(
+        data_root=data_root,
+        ann_file=data_root + 'bevdetv2-nuscenes_infos_train.pkl',
+        pipeline=train_pipeline,
+        classes=class_names,
+        test_mode=False,
+        use_valid_flag=True,
+        # we use box_type_3d='LiDAR' in kitti and nuscenes dataset
+        # and box_type_3d='Depth' in sunrgbd and scannet dataset.
+        box_type_3d='LiDAR'),
+    val=test_data_config,
+    test=test_data_config)
+
+for key in ['val', 'train', 'test']:
+    data[key].update(share_data_config)
+
+# Optimizer
+optimizer = dict(type='AdamW', lr=1e-4, weight_decay=1e-2)
+optimizer_config = dict(grad_clip=dict(max_norm=5, norm_type=2))
+lr_config = dict(
+    policy='step',
+    warmup='linear',
+    warmup_iters=200,
+    warmup_ratio=0.001,
+    step=[24, ])
+runner = dict(type='EpochBasedRunner', max_epochs=24)
+
+custom_hooks = [
+    dict(
+        type='MEGVIIEMAHook',
+        init_updates=10560,
+        priority='NORMAL',
+    ),
+]
+
+load_from = "ckpts/bevdet-r50-4d-depth-cbgs.pth"
+# fp16 = dict(loss_scale='dynamic')
+evaluation = dict(interval=1, start=20, pipeline=test_pipeline)
+checkpoint_config = dict(interval=1, max_keep_ckpts=5)
+
+
+# use_mask = False
+# ===> per class IoU of 6019 samples:
+# ===> others - IoU = 11.94
+# ===> barrier - IoU = 44.84
+# ===> bicycle - IoU = 26.66
+# ===> bus - IoU = 41.53
+# ===> car - IoU = 44.42
+# ===> construction_vehicle - IoU = 20.79
+# ===> motorcycle - IoU = 26.96
+# ===> pedestrian - IoU = 25.98
+# ===> traffic_cone - IoU = 29.25
+# ===> trailer - IoU = 24.24
+# ===> truck - IoU = 32.28
+# ===> driveable_surface - IoU = 60.5
+# ===> other_flat - IoU = 33.07
+# ===> sidewalk - IoU = 37.01
+# ===> terrain - IoU = 33.54
+# ===> manmade - IoU = 21.75
+# ===> vegetation - IoU = 21.58
+# ===> mIoU of 6019 samples: 31.55
+# {'mIoU': array([0.119, 0.448, 0.267, 0.415, 0.444, 0.208, 0.27 , 0.26 , 0.293,
+#        0.242, 0.323, 0.605, 0.331, 0.37 , 0.335, 0.217, 0.216, 0.839])}
+
+
+# +----------------------+----------+----------+----------+
+# |     Class Names      | RayIoU@1 | RayIoU@2 | RayIoU@4 |
+# +----------------------+----------+----------+----------+
+# |        others        |  0.110   |  0.118   |  0.119   |
+# |       barrier        |  0.444   |  0.484   |  0.499   |
+# |       bicycle        |  0.278   |  0.311   |  0.319   |
+# |         bus          |  0.537   |  0.635   |  0.691   |
+# |         car          |  0.512   |  0.585   |  0.611   |
+# | construction_vehicle |  0.153   |  0.218   |  0.238   |
+# |      motorcycle      |  0.228   |  0.310   |  0.330   |
+# |      pedestrian      |  0.338   |  0.387   |  0.401   |
+# |     traffic_cone     |  0.342   |  0.362   |  0.370   |
+# |       trailer        |  0.209   |  0.293   |  0.368   |
+# |        truck         |  0.422   |  0.511   |  0.555   |
+# |  driveable_surface   |  0.570   |  0.653   |  0.742   |
+# |      other_flat      |  0.301   |  0.340   |  0.375   |
+# |       sidewalk       |  0.266   |  0.319   |  0.370   |
+# |       terrain        |  0.261   |  0.334   |  0.400   |
+# |       manmade        |  0.360   |  0.435   |  0.485   |
+# |      vegetation      |  0.244   |  0.354   |  0.442   |
+# +----------------------+----------+----------+----------+
+# |         MEAN         |  0.328   |  0.391   |  0.430   |
+# +----------------------+----------+----------+----------+
+# {'RayIoU': 0.38313147213727416, 'RayIoU@1': 0.3279517851047602, 'RayIoU@2': 0.3911038935232673, 'RayIoU@4': 0.4303387377837949}
\ No newline at end of file
--- a/docker-hub/FlashOCC/Flashocc/projects/configs/panoptic-flashocc/panoptic-flashocc-r50-depth4d-longterm8f-pano.py
+++ b/docker-hub/FlashOCC/Flashocc/projects/configs/panoptic-flashocc/panoptic-flashocc-r50-depth4d-longterm8f-pano.py
+_base_ = ['../../../mmdetection3d/configs/_base_/datasets/nus-3d.py',
+          '../../../mmdetection3d/configs/_base_/default_runtime.py']
+
+plugin = True
+plugin_dir = 'projects/mmdet3d_plugin/'
+# point_cloud_range = [-51.2, -51.2, -5.0, 51.2, 51.2, 3.0]
+point_cloud_range = [-40.0, -40.0, -5.0, 40.0, 40.0, 3.0]
+# For nuScenes we usually do 10-class detection
+class_names = [
+    'car', 'truck', 'construction_vehicle', 'bus', 'trailer', 'barrier',
+    'motorcycle', 'bicycle', 'pedestrian', 'traffic_cone'
+]
+
+data_config = {
+    'cams': [
+        'CAM_FRONT_LEFT', 'CAM_FRONT', 'CAM_FRONT_RIGHT', 'CAM_BACK_LEFT',
+        'CAM_BACK', 'CAM_BACK_RIGHT'
+    ],
+    'Ncams':
+    6,
+    'input_size': (256, 704),
+    'src_size': (900, 1600),
+
+    # Augmentation
+    'resize': (-0.06, 0.11),
+    'rot': (-5.4, 5.4),
+    'flip': True,
+    'crop_h': (0.0, 0.0),
+    'resize_test': 0.00,
+}
+
+grid_config = {
+    'x': [-40, 40, 0.4],
+    'y': [-40, 40, 0.4],
+    'z': [-1, 5.4, 6.4],
+    'depth': [1.0, 45.0, 0.5],
+}
+
+voxel_size = [0.1, 0.1, 0.2]
+numC_Trans = 80
+multi_adj_frame_id_cfg = (1, 8+1, 1)
+
+model = dict(
+    type='BEVDepth4DPano',
+    num_adj=multi_adj_frame_id_cfg[1]-1,
+    img_backbone=dict(
+        type='ResNet',
+        depth=50,
+        num_stages=4,
+        out_indices=(2, 3),
+        frozen_stages=-1,
+        norm_cfg=dict(type='BN', requires_grad=True),
+        norm_eval=False,
+        with_cp=True,
+        style='pytorch',
+        pretrained='torchvision://resnet50',
+    ),
+    img_neck=dict(
+        type='CustomFPN',
+        in_channels=[1024, 2048],
+        out_channels=512,
+        num_outs=1,
+        start_level=0,
+        out_ids=[0]),
+    img_view_transformer=dict(
+        type='LSSViewTransformerBEVDepth',
+        grid_config=grid_config,
+        input_size=data_config['input_size'],
+        in_channels=512,
+        out_channels=numC_Trans,
+        loss_depth_weight=1,
+        depthnet_cfg=dict(use_dcn=False, aspp_mid_channels=96),
+        downsample=16),
+    pre_process=dict(
+        type='CustomResNet',
+        numC_input=numC_Trans,
+        num_layer=[1, ],
+        num_channels=[numC_Trans, ],
+        stride=[1, ],
+        backbone_output_ids=[0, ]),
+    img_bev_encoder_backbone=dict(
+        type='CustomResNet',
+        numC_input=numC_Trans * (len(range(*multi_adj_frame_id_cfg))+1),
+        num_channels=[numC_Trans * 2, numC_Trans * 4, numC_Trans * 8]),
+    img_bev_encoder_neck=dict(
+        type='FPN_LSS',
+        in_channels=numC_Trans * 8 + numC_Trans * 2,
+        out_channels=256),
+    aux_centerness_head=dict(
+        type='Centerness_Head',
+        task_specific_weight=[1, 1, 0, 0, 0],
+        in_channels=256,
+        tasks=[
+            dict(num_class=10, class_names=['car', 'truck',
+                                            'construction_vehicle',
+                                            'bus', 'trailer',
+                                            'barrier',
+                                            'motorcycle', 'bicycle',
+                                            'pedestrian', 'traffic_cone']),
+        ],
+        common_heads=dict(
+            reg=(2, 2), height=(1, 2), dim=(3, 2), rot=(2, 2), vel=(2, 2)),
+        share_conv_channel=64,
+        bbox_coder=dict(
+            type='CenterPointBBoxCoder',
+            pc_range=point_cloud_range[:2],
+            post_center_range=[-61.2, -61.2, -10.0, 61.2, 61.2, 10.0],
+            max_num=500,
+            score_threshold=0.3, # 
+            out_size_factor=4,
+            voxel_size=voxel_size[:2],
+            code_size=9),
+        separate_head=dict(
+            type='SeparateHead', init_bias=-2.19, final_kernel=3),
+        loss_cls=dict(type='GaussianFocalLoss', reduction='mean'),
+        loss_bbox=dict(type='L1Loss', reduction='mean', loss_weight=0.25),
+        norm_bbox=True),
+    occ_head=dict(
+        type='BEVOCCHead2D_V2',
+        in_dim=256,
+        out_dim=256,
+        Dz=16,
+        use_mask=False,
+        num_classes=18,
+        use_predicter=True,
+        class_balance=True,
+        loss_occ=dict(
+            type='CustomFocalLoss',
+            use_sigmoid=True,
+            loss_weight=1.0
+        ),
+    ),
+    # model training and testing settings
+    train_cfg=dict(
+        pts=dict(
+            point_cloud_range=point_cloud_range,
+            grid_size=[800, 800, 40],
+            voxel_size=voxel_size,
+            out_size_factor=4,
+            dense_reg=1,
+            gaussian_overlap=0.1,
+            max_objs=500,
+            min_radius=2,
+            code_weights=[1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 0.2, 0.2])),
+    test_cfg=dict(
+        pts=dict(
+            max_per_img=500,
+            max_pool_nms=False,
+            min_radius=[4, 12, 10, 1, 0.85, 0.175],
+            score_threshold=0.1,
+            out_size_factor=4,
+            voxel_size=voxel_size[:2],
+            pre_max_size=1000,
+            post_max_size=500,
+
+            # Scale-NMS
+            nms_type=['rotate'],
+            nms_thr=[0.2],
+            nms_rescale_factor=[[1.0, 0.7, 0.7, 0.4, 0.55,
+                                 1.1, 1.0, 1.0, 1.5, 3.5]]
+        )
+    ),
+)
+
+# Data
+dataset_type = 'NuScenesDatasetOccpancy'
+data_root = 'data/nuscenes/'
+file_client_args = dict(backend='disk')
+
+bda_aug_conf = dict(
+    rot_lim=(-0., 0.),
+    scale_lim=(1., 1.),
+    flip_dx_ratio=0.5,
+    flip_dy_ratio=0.5
+)
+
+train_pipeline = [
+    dict(
+        type='PrepareImageInputs',
+        is_train=True,
+        data_config=data_config,
+        sequential=True),
+    dict(
+        type='LoadAnnotationsBEVDepth',
+        bda_aug_conf=bda_aug_conf,
+        classes=class_names,
+        is_train=True),
+    dict(type='LoadOccGTFromFile'),
+    dict(
+        type='LoadPointsFromFile',
+        coord_type='LIDAR',
+        load_dim=5,
+        use_dim=5,
+        file_client_args=file_client_args),
+    dict(type='PointToMultiViewDepth', downsample=1, grid_config=grid_config),
+    dict(type='DefaultFormatBundle3D', class_names=class_names),
+    dict(
+        type='Collect3D', keys=['img_inputs', 'gt_depth', 'voxel_semantics',
+                                'mask_lidar', 'mask_camera', 'gt_bboxes_3d', 'gt_labels_3d'])
+]
+
+test_pipeline = [
+    dict(type='PrepareImageInputs', data_config=data_config, sequential=True),
+    dict(
+        type='LoadAnnotationsBEVDepth',
+        bda_aug_conf=bda_aug_conf,
+        classes=class_names,
+        is_train=False),
+    dict(
+        type='LoadPointsFromFile',
+        coord_type='LIDAR',
+        load_dim=5,
+        use_dim=5,
+        file_client_args=file_client_args),
+    dict(
+        type='MultiScaleFlipAug3D',
+        img_scale=(1333, 800),
+        pts_scale_ratio=1,
+        flip=False,
+        transforms=[
+            dict(
+                type='DefaultFormatBundle3D',
+                class_names=class_names,
+                with_label=False),
+            dict(type='Collect3D', keys=['points', 'img_inputs', 'gt_bboxes_3d', 'gt_labels_3d'])
+        ])
+]
+
+
+input_modality = dict(
+    use_lidar=False,
+    use_camera=True,
+    use_radar=False,
+    use_map=False,
+    use_external=False)
+
+share_data_config = dict(
+    type=dataset_type,
+    data_root=data_root,
+    classes=class_names,
+    modality=input_modality,
+    stereo=False,
+    filter_empty_gt=False,
+    img_info_prototype='bevdet4d',
+    multi_adj_frame_id_cfg=multi_adj_frame_id_cfg,
+)
+
+test_data_config = dict(
+    pipeline=test_pipeline,
+    ann_file=data_root + 'bevdetv2-nuscenes_infos_val.pkl')
+
+data = dict(
+    samples_per_gpu=4,
+    workers_per_gpu=4,
+    train=dict(
+        data_root=data_root,
+        ann_file=data_root + 'bevdetv2-nuscenes_infos_train.pkl',
+        pipeline=train_pipeline,
+        classes=class_names,
+        test_mode=False,
+        use_valid_flag=True,
+        # we use box_type_3d='LiDAR' in kitti and nuscenes dataset
+        # and box_type_3d='Depth' in sunrgbd and scannet dataset.
+        box_type_3d='LiDAR'),
+    val=test_data_config,
+    test=test_data_config)
+
+for key in ['val', 'train', 'test']:
+    data[key].update(share_data_config)
+
+# Optimizer
+optimizer = dict(type='AdamW', lr=1e-4, weight_decay=1e-2)
+optimizer_config = dict(grad_clip=dict(max_norm=5, norm_type=2))
+lr_config = dict(
+    policy='step',
+    warmup='linear',
+    warmup_iters=200,
+    warmup_ratio=0.001,
+    step=[24, ])
+runner = dict(type='EpochBasedRunner', max_epochs=24)
+
+custom_hooks = [
+    dict(
+        type='MEGVIIEMAHook',
+        init_updates=10560,
+        priority='NORMAL',
+    ),
+]
+
+load_from = "ckpts/bevdet-r50-4d-depth-cbgs.pth"
+# fp16 = dict(loss_scale='dynamic')
+evaluation = dict(interval=1, start=20, pipeline=test_pipeline)
+checkpoint_config = dict(interval=1, max_keep_ckpts=5)
+
+
+# use_mask = False
+# ===> per class IoU of 6019 samples:
+# ===> others - IoU = 11.51
+# ===> barrier - IoU = 45.87
+# ===> bicycle - IoU = 24.65
+# ===> bus - IoU = 41.75
+# ===> car - IoU = 46.15
+# ===> construction_vehicle - IoU = 20.96
+# ===> motorcycle - IoU = 26.82
+# ===> pedestrian - IoU = 26.77
+# ===> traffic_cone - IoU = 29.66
+# ===> trailer - IoU = 24.65
+# ===> truck - IoU = 32.75
+# ===> driveable_surface - IoU = 60.39
+# ===> other_flat - IoU = 32.87
+# ===> sidewalk - IoU = 36.49
+# ===> terrain - IoU = 33.16
+# ===> manmade - IoU = 21.3
+# ===> vegetation - IoU = 20.92
+# ===> mIoU of 6019 samples: 31.57
+# {'mIoU': array([0.115, 0.459, 0.247, 0.418, 0.461, 0.21 , 0.268, 0.268, 0.297,
+#        0.247, 0.328, 0.604, 0.329, 0.365, 0.332, 0.213, 0.209, 0.839])}
+
+
+# +----------------------+----------+----------+----------+
+# |     Class Names      | RayIoU@1 | RayIoU@2 | RayIoU@4 |
+# +----------------------+----------+----------+----------+
+# |        others        |  0.101   |  0.108   |  0.110   |
+# |       barrier        |  0.439   |  0.480   |  0.497   |
+# |       bicycle        |  0.258   |  0.286   |  0.293   |
+# |         bus          |  0.540   |  0.649   |  0.700   |
+# |         car          |  0.531   |  0.603   |  0.629   |
+# | construction_vehicle |  0.180   |  0.252   |  0.282   |
+# |      motorcycle      |  0.247   |  0.328   |  0.343   |
+# |      pedestrian      |  0.347   |  0.393   |  0.409   |
+# |     traffic_cone     |  0.346   |  0.371   |  0.378   |
+# |       trailer        |  0.209   |  0.292   |  0.384   |
+# |        truck         |  0.452   |  0.544   |  0.587   |
+# |  driveable_surface   |  0.562   |  0.646   |  0.734   |
+# |      other_flat      |  0.290   |  0.328   |  0.363   |
+# |       sidewalk       |  0.261   |  0.313   |  0.363   |
+# |       terrain        |  0.260   |  0.330   |  0.394   |
+# |       manmade        |  0.345   |  0.421   |  0.471   |
+# |      vegetation      |  0.229   |  0.337   |  0.423   |
+# +----------------------+----------+----------+----------+
+# |         MEAN         |  0.329   |  0.393   |  0.433   |
+# +----------------------+----------+----------+----------+
+# 6019it [10:36,  9.46it/s]
+# +----------------------+---------+---------+---------+
+# |     Class Names      | RayPQ@1 | RayPQ@2 | RayPQ@4 |
+# +----------------------+---------+---------+---------+
+# |        others        |  0.026  |  0.032  |  0.033  |
+# |       barrier        |  0.184  |  0.232  |  0.253  |
+# |       bicycle        |  0.088  |  0.103  |  0.108  |
+# |         bus          |  0.311  |  0.406  |  0.458  |
+# |         car          |  0.300  |  0.380  |  0.403  |
+# | construction_vehicle |  0.032  |  0.057  |  0.081  |
+# |      motorcycle      |  0.114  |  0.156  |  0.169  |
+# |      pedestrian      |  0.025  |  0.030  |  0.031  |
+# |     traffic_cone     |  0.071  |  0.081  |  0.085  |
+# |       trailer        |  0.049  |  0.077  |  0.088  |
+# |        truck         |  0.182  |  0.274  |  0.314  |
+# |  driveable_surface   |  0.457  |  0.574  |  0.702  |
+# |      other_flat      |  0.062  |  0.086  |  0.106  |
+# |       sidewalk       |  0.018  |  0.042  |  0.091  |
+# |       terrain        |  0.017  |  0.039  |  0.074  |
+# |       manmade        |  0.077  |  0.144  |  0.194  |
+# |      vegetation      |  0.002  |  0.061  |  0.162  |
+# +----------------------+---------+---------+---------+
+# |         MEAN         |  0.119  |  0.163  |  0.197  |
+# +----------------------+---------+---------+---------+
+# {'RayIoU': 0.3850202377154096, 'RayIoU@1': 0.3291477679560127, 'RayIoU@2': 0.39307010079658805, 'RayIoU@4': 0.4328428443936281, 
+#  'RayPQ': 0.15961266397677248, 'RayPQ@1': 0.11850092407498894, 'RayPQ@2': 0.1631862461686837, 'RayPQ@4': 0.19715082168664483}
--- a/docker-hub/FlashOCC/Flashocc/projects/configs/panoptic-flashocc/panoptic-flashocc-r50-depth4d-longterm8f.py
+++ b/docker-hub/FlashOCC/Flashocc/projects/configs/panoptic-flashocc/panoptic-flashocc-r50-depth4d-longterm8f.py
+_base_ = ['../../../mmdetection3d/configs/_base_/datasets/nus-3d.py',
+          '../../../mmdetection3d/configs/_base_/default_runtime.py']
+
+plugin = True
+plugin_dir = 'projects/mmdet3d_plugin/'
+point_cloud_range = [-51.2, -51.2, -5.0, 51.2, 51.2, 3.0]
+# For nuScenes we usually do 10-class detection
+class_names = [
+    'car', 'truck', 'construction_vehicle', 'bus', 'trailer', 'barrier',
+    'motorcycle', 'bicycle', 'pedestrian', 'traffic_cone'
+]
+
+data_config = {
+    'cams': [
+        'CAM_FRONT_LEFT', 'CAM_FRONT', 'CAM_FRONT_RIGHT', 'CAM_BACK_LEFT',
+        'CAM_BACK', 'CAM_BACK_RIGHT'
+    ],
+    'Ncams':
+    6,
+    'input_size': (256, 704),
+    'src_size': (900, 1600),
+
+    # Augmentation
+    'resize': (-0.06, 0.11),
+    'rot': (-5.4, 5.4),
+    'flip': True,
+    'crop_h': (0.0, 0.0),
+    'resize_test': 0.00,
+}
+
+grid_config = {
+    'x': [-40, 40, 0.4],
+    'y': [-40, 40, 0.4],
+    'z': [-1, 5.4, 6.4],
+    'depth': [1.0, 45.0, 0.5],
+}
+
+voxel_size = [0.1, 0.1, 0.2]
+numC_Trans = 80
+multi_adj_frame_id_cfg = (1, 8+1, 1)
+
+model = dict(
+    type='BEVDepth4DOCC',
+    num_adj=multi_adj_frame_id_cfg[1]-1,
+    img_backbone=dict(
+        type='ResNet',
+        depth=50,
+        num_stages=4,
+        out_indices=(2, 3),
+        frozen_stages=-1,
+        norm_cfg=dict(type='BN', requires_grad=True),
+        norm_eval=False,
+        with_cp=True,
+        style='pytorch',
+        pretrained='torchvision://resnet50',
+    ),
+    img_neck=dict(
+        type='CustomFPN',
+        in_channels=[1024, 2048],
+        out_channels=512,
+        num_outs=1,
+        start_level=0,
+        out_ids=[0]),
+    img_view_transformer=dict(
+        type='LSSViewTransformerBEVDepth',
+        grid_config=grid_config,
+        input_size=data_config['input_size'],
+        in_channels=512,
+        out_channels=numC_Trans,
+        loss_depth_weight=1,
+        depthnet_cfg=dict(use_dcn=False, aspp_mid_channels=96),
+        downsample=16),
+    pre_process=dict(
+        type='CustomResNet',
+        numC_input=numC_Trans,
+        num_layer=[1, ],
+        num_channels=[numC_Trans, ],
+        stride=[1, ],
+        backbone_output_ids=[0, ]),
+    img_bev_encoder_backbone=dict(
+        type='CustomResNet',
+        numC_input=numC_Trans * (len(range(*multi_adj_frame_id_cfg))+1),
+        num_channels=[numC_Trans * 2, numC_Trans * 4, numC_Trans * 8]),
+    img_bev_encoder_neck=dict(
+        type='FPN_LSS',
+        in_channels=numC_Trans * 8 + numC_Trans * 2,
+        out_channels=256),
+    occ_head=dict(
+        type='BEVOCCHead2D_V2',
+        in_dim=256,
+        out_dim=256,
+        Dz=16,
+        use_mask=False,
+        num_classes=18,
+        use_predicter=True,
+        class_balance=True,
+        loss_occ=dict(
+            type='CustomFocalLoss',
+            use_sigmoid=True,
+            loss_weight=1.0
+        ),
+    )
+)
+
+# Data
+dataset_type = 'NuScenesDatasetOccpancy'
+data_root = 'data/nuscenes/'
+file_client_args = dict(backend='disk')
+
+bda_aug_conf = dict(
+    rot_lim=(-0., 0.),
+    scale_lim=(1., 1.),
+    flip_dx_ratio=0.5,
+    flip_dy_ratio=0.5
+)
+
+train_pipeline = [
+    dict(
+        type='PrepareImageInputs',
+        is_train=True,
+        data_config=data_config,
+        sequential=True),
+    dict(
+        type='LoadAnnotationsBEVDepth',
+        bda_aug_conf=bda_aug_conf,
+        classes=class_names,
+        is_train=True),
+    dict(type='LoadOccGTFromFile'),
+    dict(
+        type='LoadPointsFromFile',
+        coord_type='LIDAR',
+        load_dim=5,
+        use_dim=5,
+        file_client_args=file_client_args),
+    dict(type='PointToMultiViewDepth', downsample=1, grid_config=grid_config),
+    dict(type='DefaultFormatBundle3D', class_names=class_names),
+    dict(
+        type='Collect3D', keys=['img_inputs', 'gt_depth', 'voxel_semantics',
+                                'mask_lidar', 'mask_camera'])
+]
+
+test_pipeline = [
+    dict(type='PrepareImageInputs', data_config=data_config, sequential=True),
+    dict(
+        type='LoadAnnotationsBEVDepth',
+        bda_aug_conf=bda_aug_conf,
+        classes=class_names,
+        is_train=False),
+    dict(
+        type='LoadPointsFromFile',
+        coord_type='LIDAR',
+        load_dim=5,
+        use_dim=5,
+        file_client_args=file_client_args),
+    dict(
+        type='MultiScaleFlipAug3D',
+        img_scale=(1333, 800),
+        pts_scale_ratio=1,
+        flip=False,
+        transforms=[
+            dict(
+                type='DefaultFormatBundle3D',
+                class_names=class_names,
+                with_label=False),
+            dict(type='Collect3D', keys=['points', 'img_inputs'])
+        ])
+]
+
+
+input_modality = dict(
+    use_lidar=False,
+    use_camera=True,
+    use_radar=False,
+    use_map=False,
+    use_external=False)
+
+share_data_config = dict(
+    type=dataset_type,
+    data_root=data_root,
+    classes=class_names,
+    modality=input_modality,
+    stereo=False,
+    filter_empty_gt=False,
+    img_info_prototype='bevdet4d',
+    multi_adj_frame_id_cfg=multi_adj_frame_id_cfg,
+)
+
+test_data_config = dict(
+    pipeline=test_pipeline,
+    ann_file=data_root + 'bevdetv2-nuscenes_infos_val.pkl')
+
+data = dict(
+    samples_per_gpu=4,
+    workers_per_gpu=4,
+    train=dict(
+        data_root=data_root,
+        ann_file=data_root + 'bevdetv2-nuscenes_infos_train.pkl',
+        pipeline=train_pipeline,
+        classes=class_names,
+        test_mode=False,
+        use_valid_flag=True,
+        # we use box_type_3d='LiDAR' in kitti and nuscenes dataset
+        # and box_type_3d='Depth' in sunrgbd and scannet dataset.
+        box_type_3d='LiDAR'),
+    val=test_data_config,
+    test=test_data_config)
+
+for key in ['val', 'train', 'test']:
+    data[key].update(share_data_config)
+
+# Optimizer
+optimizer = dict(type='AdamW', lr=1e-4, weight_decay=1e-2)
+optimizer_config = dict(grad_clip=dict(max_norm=5, norm_type=2))
+lr_config = dict(
+    policy='step',
+    warmup='linear',
+    warmup_iters=200,
+    warmup_ratio=0.001,
+    step=[24, ])
+runner = dict(type='EpochBasedRunner', max_epochs=24)
+
+custom_hooks = [
+    dict(
+        type='MEGVIIEMAHook',
+        init_updates=10560,
+        priority='NORMAL',
+    ),
+]
+
+load_from = "ckpts/bevdet-r50-4d-depth-cbgs.pth"
+# fp16 = dict(loss_scale='dynamic')
+evaluation = dict(interval=1, start=20, pipeline=test_pipeline)
+checkpoint_config = dict(interval=1, max_keep_ckpts=5)
+
+
+# use_mask = False
+# ===> per class IoU of 6019 samples:
+# ===> others - IoU = 11.5
+# ===> barrier - IoU = 44.1
+# ===> bicycle - IoU = 25.89
+# ===> bus - IoU = 41.0
+# ===> car - IoU = 44.57
+# ===> construction_vehicle - IoU = 21.88
+# ===> motorcycle - IoU = 27.31
+# ===> pedestrian - IoU = 25.95
+# ===> traffic_cone - IoU = 29.04
+# ===> trailer - IoU = 24.17
+# ===> truck - IoU = 31.81
+# ===> driveable_surface - IoU = 60.74
+# ===> other_flat - IoU = 33.84
+# ===> sidewalk - IoU = 36.62
+# ===> terrain - IoU = 33.96
+# ===> manmade - IoU = 21.54
+# ===> vegetation - IoU = 21.36
+# ===> mIoU of 6019 samples: 31.49
+# {'mIoU': array([0.115, 0.441, 0.259, 0.41 , 0.446, 0.219, 0.273, 0.259, 0.29 ,
+#        0.242, 0.318, 0.607, 0.338, 0.366, 0.34 , 0.215, 0.214, 0.839])}
+
+
+# +----------------------+----------+----------+----------+
+# |     Class Names      | RayIoU@1 | RayIoU@2 | RayIoU@4 |
+# +----------------------+----------+----------+----------+
+# |        others        |  0.107   |  0.115   |  0.116   |
+# |       barrier        |  0.442   |  0.485   |  0.501   |
+# |       bicycle        |  0.267   |  0.296   |  0.302   |
+# |         bus          |  0.533   |  0.632   |  0.683   |
+# |         car          |  0.516   |  0.590   |  0.616   |
+# | construction_vehicle |  0.170   |  0.251   |  0.282   |
+# |      motorcycle      |  0.231   |  0.325   |  0.350   |
+# |      pedestrian      |  0.340   |  0.386   |  0.400   |
+# |     traffic_cone     |  0.348   |  0.372   |  0.380   |
+# |       trailer        |  0.232   |  0.317   |  0.400   |
+# |        truck         |  0.427   |  0.514   |  0.559   |
+# |  driveable_surface   |  0.566   |  0.649   |  0.736   |
+# |      other_flat      |  0.302   |  0.341   |  0.374   |
+# |       sidewalk       |  0.261   |  0.313   |  0.363   |
+# |       terrain        |  0.258   |  0.333   |  0.399   |
+# |       manmade        |  0.348   |  0.426   |  0.479   |
+# |      vegetation      |  0.234   |  0.342   |  0.430   |
+# +----------------------+----------+----------+----------+
+# |         MEAN         |  0.328   |  0.393   |  0.434   |
+# +----------------------+----------+----------+----------+
+# {'RayIoU': 0.3851476341258822, 'RayIoU@1': 0.3284556495395326, 'RayIoU@2': 0.39334760720480005, 'RayIoU@4': 0.43363964563331386}
\ No newline at end of file
--- a/docker-hub/FlashOCC/Flashocc/projects/configs/panoptic-flashocc/panoptic-flashocc-r50-depth4d-pano.py
+++ b/docker-hub/FlashOCC/Flashocc/projects/configs/panoptic-flashocc/panoptic-flashocc-r50-depth4d-pano.py
+_base_ = ['../../../mmdetection3d/configs/_base_/datasets/nus-3d.py',
+          '../../../mmdetection3d/configs/_base_/default_runtime.py']
+
+plugin = True
+plugin_dir = 'projects/mmdet3d_plugin/'
+# point_cloud_range = [-51.2, -51.2, -5.0, 51.2, 51.2, 3.0]
+point_cloud_range = [-40.0, -40.0, -5.0, 40.0, 40.0, 3.0]
+# For nuScenes we usually do 10-class detection
+class_names = [
+    'car', 'truck', 'construction_vehicle', 'bus', 'trailer', 'barrier',
+    'motorcycle', 'bicycle', 'pedestrian', 'traffic_cone'
+]
+
+data_config = {
+    'cams': [
+        'CAM_FRONT_LEFT', 'CAM_FRONT', 'CAM_FRONT_RIGHT', 'CAM_BACK_LEFT',
+        'CAM_BACK', 'CAM_BACK_RIGHT'
+    ],
+    'Ncams':
+    6,
+    'input_size': (256, 704),
+    'src_size': (900, 1600),
+
+    # Augmentation
+    'resize': (-0.06, 0.11),
+    'rot': (-5.4, 5.4),
+    'flip': True,
+    'crop_h': (0.0, 0.0),
+    'resize_test': 0.00,
+}
+
+grid_config = {
+    'x': [-40, 40, 0.4],
+    'y': [-40, 40, 0.4],
+    'z': [-1, 5.4, 6.4],
+    'depth': [1.0, 45.0, 0.5],
+}
+
+voxel_size = [0.1, 0.1, 0.2]
+numC_Trans = 80
+multi_adj_frame_id_cfg = (1, 1+1, 1)
+
+model = dict(
+    type='BEVDepth4DPano',
+    img_backbone=dict(
+        type='ResNet',
+        depth=50,
+        num_stages=4,
+        out_indices=(2, 3),
+        frozen_stages=-1,
+        norm_cfg=dict(type='BN', requires_grad=True),
+        norm_eval=False,
+        with_cp=True,
+        style='pytorch',
+        pretrained='torchvision://resnet50',
+    ),
+    img_neck=dict(
+        type='CustomFPN',
+        in_channels=[1024, 2048],
+        out_channels=512,
+        num_outs=1,
+        start_level=0,
+        out_ids=[0]),
+    img_view_transformer=dict(
+        type='LSSViewTransformerBEVDepth',
+        grid_config=grid_config,
+        input_size=data_config['input_size'],
+        in_channels=512,
+        out_channels=numC_Trans,
+        loss_depth_weight=1,
+        depthnet_cfg=dict(use_dcn=False, aspp_mid_channels=96),
+        downsample=16),
+    pre_process=dict(
+        type='CustomResNet',
+        numC_input=numC_Trans,
+        num_layer=[1, ],
+        num_channels=[numC_Trans, ],
+        stride=[1, ],
+        backbone_output_ids=[0, ]),
+    img_bev_encoder_backbone=dict(
+        type='CustomResNet',
+        numC_input=numC_Trans * (len(range(*multi_adj_frame_id_cfg))+1),
+        num_channels=[numC_Trans * 2, numC_Trans * 4, numC_Trans * 8]),
+    img_bev_encoder_neck=dict(
+        type='FPN_LSS',
+        in_channels=numC_Trans * 8 + numC_Trans * 2,
+        out_channels=256),
+    aux_centerness_head=dict(
+        type='Centerness_Head',
+        task_specific_weight=[1, 1, 0, 0, 0],
+        in_channels=256,
+        tasks=[
+            dict(num_class=10, class_names=['car', 'truck',
+                                            'construction_vehicle',
+                                            'bus', 'trailer',
+                                            'barrier',
+                                            'motorcycle', 'bicycle',
+                                            'pedestrian', 'traffic_cone']),
+        ],
+        common_heads=dict(
+            reg=(2, 2), height=(1, 2), dim=(3, 2), rot=(2, 2), vel=(2, 2)),
+        share_conv_channel=64,
+        bbox_coder=dict(
+            type='CenterPointBBoxCoder',
+            pc_range=point_cloud_range[:2],
+            post_center_range=[-61.2, -61.2, -10.0, 61.2, 61.2, 10.0],
+            max_num=500,
+            score_threshold=0.3, # 
+            out_size_factor=4,
+            voxel_size=voxel_size[:2],
+            code_size=9),
+        separate_head=dict(
+            type='SeparateHead', init_bias=-2.19, final_kernel=3),
+        loss_cls=dict(type='GaussianFocalLoss', reduction='mean'),
+        loss_bbox=dict(type='L1Loss', reduction='mean', loss_weight=0.25),
+        norm_bbox=True),
+    occ_head=dict(
+        type='BEVOCCHead2D_V2',
+        in_dim=256,
+        out_dim=256,
+        Dz=16,
+        use_mask=False,
+        num_classes=18,
+        use_predicter=True,
+        class_balance=True,
+        loss_occ=dict(
+            type='CustomFocalLoss',
+            use_sigmoid=True,
+            loss_weight=1.0
+        ),
+    ),
+    # model training and testing settings
+    train_cfg=dict(
+        pts=dict(
+            point_cloud_range=point_cloud_range,
+            grid_size=[800, 800, 40],
+            voxel_size=voxel_size,
+            out_size_factor=4,
+            dense_reg=1,
+            gaussian_overlap=0.1,
+            max_objs=500,
+            min_radius=2,
+            code_weights=[1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 0.2, 0.2])),
+    test_cfg=dict(
+        pts=dict(
+            max_per_img=500,
+            max_pool_nms=False,
+            min_radius=[4, 12, 10, 1, 0.85, 0.175],
+            score_threshold=0.1,
+            out_size_factor=4,
+            voxel_size=voxel_size[:2],
+            pre_max_size=1000,
+            post_max_size=500,
+
+            # Scale-NMS
+            nms_type=['rotate'],
+            nms_thr=[0.2],
+            nms_rescale_factor=[[1.0, 0.7, 0.7, 0.4, 0.55,
+                                 1.1, 1.0, 1.0, 1.5, 3.5]]
+        )
+    ),
+)
+
+# Data
+dataset_type = 'NuScenesDatasetOccpancy'
+data_root = 'data/nuscenes/'
+file_client_args = dict(backend='disk')
+
+bda_aug_conf = dict(
+    rot_lim=(-0., 0.),
+    scale_lim=(1., 1.),
+    flip_dx_ratio=0.5,
+    flip_dy_ratio=0.5
+)
+
+train_pipeline = [
+    dict(
+        type='PrepareImageInputs',
+        is_train=True,
+        data_config=data_config,
+        sequential=True),
+    dict(
+        type='LoadAnnotationsBEVDepth',
+        bda_aug_conf=bda_aug_conf,
+        classes=class_names,
+        is_train=True),
+    dict(type='LoadOccGTFromFile'),
+    dict(
+        type='LoadPointsFromFile',
+        coord_type='LIDAR',
+        load_dim=5,
+        use_dim=5,
+        file_client_args=file_client_args),
+    dict(type='PointToMultiViewDepth', downsample=1, grid_config=grid_config),
+    dict(type='DefaultFormatBundle3D', class_names=class_names),
+    dict(
+        type='Collect3D', keys=['img_inputs', 'gt_depth', 'voxel_semantics',
+                                'mask_lidar', 'mask_camera', 'gt_bboxes_3d', 'gt_labels_3d'])
+]
+
+test_pipeline = [
+    dict(type='PrepareImageInputs', data_config=data_config, sequential=True),
+    dict(
+        type='LoadAnnotationsBEVDepth',
+        bda_aug_conf=bda_aug_conf,
+        classes=class_names,
+        is_train=False),
+    dict(
+        type='LoadPointsFromFile',
+        coord_type='LIDAR',
+        load_dim=5,
+        use_dim=5,
+        file_client_args=file_client_args),
+    dict(
+        type='MultiScaleFlipAug3D',
+        img_scale=(1333, 800),
+        pts_scale_ratio=1,
+        flip=False,
+        transforms=[
+            dict(
+                type='DefaultFormatBundle3D',
+                class_names=class_names,
+                with_label=False),
+            dict(type='Collect3D', keys=['points', 'img_inputs', 'gt_bboxes_3d', 'gt_labels_3d'])
+        ])
+]
+
+
+input_modality = dict(
+    use_lidar=False,
+    use_camera=True,
+    use_radar=False,
+    use_map=False,
+    use_external=False)
+
+share_data_config = dict(
+    type=dataset_type,
+    data_root=data_root,
+    classes=class_names,
+    modality=input_modality,
+    stereo=False,
+    filter_empty_gt=False,
+    img_info_prototype='bevdet4d',
+    multi_adj_frame_id_cfg=multi_adj_frame_id_cfg,
+)
+
+test_data_config = dict(
+    pipeline=test_pipeline,
+    ann_file=data_root + 'bevdetv2-nuscenes_infos_val.pkl')
+
+data = dict(
+    samples_per_gpu=4,
+    workers_per_gpu=4,
+    train=dict(
+        data_root=data_root,
+        ann_file=data_root + 'bevdetv2-nuscenes_infos_train.pkl',
+        pipeline=train_pipeline,
+        classes=class_names,
+        test_mode=False,
+        use_valid_flag=True,
+        # we use box_type_3d='LiDAR' in kitti and nuscenes dataset
+        # and box_type_3d='Depth' in sunrgbd and scannet dataset.
+        box_type_3d='LiDAR'),
+    val=test_data_config,
+    test=test_data_config)
+
+for key in ['val', 'train', 'test']:
+    data[key].update(share_data_config)
+
+# Optimizer
+optimizer = dict(type='AdamW', lr=1e-4, weight_decay=1e-2)
+optimizer_config = dict(grad_clip=dict(max_norm=5, norm_type=2))
+lr_config = dict(
+    policy='step',
+    warmup='linear',
+    warmup_iters=200,
+    warmup_ratio=0.001,
+    step=[24, ])
+runner = dict(type='EpochBasedRunner', max_epochs=24)
+
+custom_hooks = [
+    dict(
+        type='MEGVIIEMAHook',
+        init_updates=10560,
+        priority='NORMAL',
+    ),
+]
+
+load_from = "ckpts/bevdet-r50-4d-depth-cbgs.pth"
+# fp16 = dict(loss_scale='dynamic')
+evaluation = dict(interval=1, start=20, pipeline=test_pipeline)
+checkpoint_config = dict(interval=1, max_keep_ckpts=5)
+
+
+
+# ===> per class IoU of 6019 samples:
+# ===> others - IoU = 10.89
+# ===> barrier - IoU = 43.92
+# ===> bicycle - IoU = 24.42
+# ===> bus - IoU = 41.91
+# ===> car - IoU = 45.18
+# ===> construction_vehicle - IoU = 18.73
+# ===> motorcycle - IoU = 25.59
+# ===> pedestrian - IoU = 25.67
+# ===> traffic_cone - IoU = 25.86
+# ===> trailer - IoU = 25.29
+# ===> truck - IoU = 31.84
+# ===> driveable_surface - IoU = 59.03
+# ===> other_flat - IoU = 31.53
+# ===> sidewalk - IoU = 34.67
+# ===> terrain - IoU = 31.49
+# ===> manmade - IoU = 19.91
+# ===> vegetation - IoU = 19.31
+# ===> mIoU of 6019 samples: 30.31
+# {'mIoU': array([0.109, 0.439, 0.244, 0.419, 0.452, 0.187, 0.256, 0.257, 0.259,
+#        0.253, 0.318, 0.59 , 0.315, 0.347, 0.315, 0.199, 0.193, 0.835])}
+
+# +----------------------+----------+----------+----------+
+# |     Class Names      | RayIoU@1 | RayIoU@2 | RayIoU@4 |
+# +----------------------+----------+----------+----------+
+# |        others        |  0.094   |  0.105   |  0.107   |
+# |       barrier        |  0.411   |  0.460   |  0.480   |
+# |       bicycle        |  0.252   |  0.286   |  0.293   |
+# |         bus          |  0.541   |  0.646   |  0.698   |
+# |         car          |  0.520   |  0.594   |  0.621   |
+# | construction_vehicle |  0.164   |  0.235   |  0.264   |
+# |      motorcycle      |  0.212   |  0.305   |  0.321   |
+# |      pedestrian      |  0.326   |  0.373   |  0.389   |
+# |     traffic_cone     |  0.312   |  0.341   |  0.348   |
+# |       trailer        |  0.220   |  0.291   |  0.372   |
+# |        truck         |  0.430   |  0.520   |  0.565   |
+# |  driveable_surface   |  0.552   |  0.633   |  0.720   |
+# |      other_flat      |  0.293   |  0.330   |  0.361   |
+# |       sidewalk       |  0.242   |  0.291   |  0.340   |
+# |       terrain        |  0.236   |  0.305   |  0.369   |
+# |       manmade        |  0.303   |  0.378   |  0.429   |
+# |      vegetation      |  0.193   |  0.294   |  0.381   |
+# +----------------------+----------+----------+----------+
+# |         MEAN         |  0.312   |  0.376   |  0.415   |
+# +----------------------+----------+----------+----------+
+# 6019it [09:13, 10.87it/s]
+# +----------------------+---------+---------+---------+
+# |     Class Names      | RayPQ@1 | RayPQ@2 | RayPQ@4 |
+# +----------------------+---------+---------+---------+
+# |        others        |  0.020  |  0.028  |  0.030  |
+# |       barrier        |  0.155  |  0.211  |  0.235  |
+# |       bicycle        |  0.083  |  0.097  |  0.102  |
+# |         bus          |  0.299  |  0.391  |  0.442  |
+# |         car          |  0.277  |  0.360  |  0.384  |
+# | construction_vehicle |  0.011  |  0.062  |  0.077  |
+# |      motorcycle      |  0.098  |  0.149  |  0.166  |
+# |      pedestrian      |  0.021  |  0.026  |  0.027  |
+# |     traffic_cone     |  0.052  |  0.069  |  0.071  |
+# |       trailer        |  0.043  |  0.062  |  0.071  |
+# |        truck         |  0.158  |  0.248  |  0.293  |
+# |  driveable_surface   |  0.440  |  0.559  |  0.680  |
+# |      other_flat      |  0.065  |  0.089  |  0.107  |
+# |       sidewalk       |  0.012  |  0.029  |  0.060  |
+# |       terrain        |  0.009  |  0.028  |  0.053  |
+# |       manmade        |  0.060  |  0.108  |  0.153  |
+# |      vegetation      |  0.001  |  0.029  |  0.111  |
+# +----------------------+---------+---------+---------+
+# |         MEAN         |  0.106  |  0.150  |  0.180  |
+# +----------------------+---------+---------+---------+
+# {'RayIoU': 0.3676099569727112, 'RayIoU@1': 0.3118578145261225, 'RayIoU@2': 0.3757836068619914, 'RayIoU@4': 0.4151884495300196, 
+#  'RayPQ': 0.14529917059571107, 'RayPQ@1': 0.1061843618020449, 'RayPQ@2': 0.14961373290314467, 'RayPQ@4': 0.18009941708194366}
+
--- a/docker-hub/FlashOCC/Flashocc/projects/configs/panoptic-flashocc/panoptic-flashocc-r50-depth4d.py
+++ b/docker-hub/FlashOCC/Flashocc/projects/configs/panoptic-flashocc/panoptic-flashocc-r50-depth4d.py
+_base_ = ['../../../mmdetection3d/configs/_base_/datasets/nus-3d.py',
+          '../../../mmdetection3d/configs/_base_/default_runtime.py']
+
+plugin = True
+plugin_dir = 'projects/mmdet3d_plugin/'
+point_cloud_range = [-51.2, -51.2, -5.0, 51.2, 51.2, 3.0]
+# For nuScenes we usually do 10-class detection
+class_names = [
+    'car', 'truck', 'construction_vehicle', 'bus', 'trailer', 'barrier',
+    'motorcycle', 'bicycle', 'pedestrian', 'traffic_cone'
+]
+
+data_config = {
+    'cams': [
+        'CAM_FRONT_LEFT', 'CAM_FRONT', 'CAM_FRONT_RIGHT', 'CAM_BACK_LEFT',
+        'CAM_BACK', 'CAM_BACK_RIGHT'
+    ],
+    'Ncams':
+    6,
+    'input_size': (256, 704),
+    'src_size': (900, 1600),
+
+    # Augmentation
+    'resize': (-0.06, 0.11),
+    'rot': (-5.4, 5.4),
+    'flip': True,
+    'crop_h': (0.0, 0.0),
+    'resize_test': 0.00,
+}
+
+grid_config = {
+    'x': [-40, 40, 0.4],
+    'y': [-40, 40, 0.4],
+    'z': [-1, 5.4, 6.4],
+    'depth': [1.0, 45.0, 0.5],
+}
+
+voxel_size = [0.1, 0.1, 0.2]
+numC_Trans = 80
+multi_adj_frame_id_cfg = (1, 1+1, 1)
+
+model = dict(
+    type='BEVDepth4DOCC',
+    img_backbone=dict(
+        type='ResNet',
+        depth=50,
+        num_stages=4,
+        out_indices=(2, 3),
+        frozen_stages=-1,
+        norm_cfg=dict(type='BN', requires_grad=True),
+        norm_eval=False,
+        with_cp=True,
+        style='pytorch',
+        pretrained='torchvision://resnet50',
+    ),
+    img_neck=dict(
+        type='CustomFPN',
+        in_channels=[1024, 2048],
+        out_channels=512,
+        num_outs=1,
+        start_level=0,
+        out_ids=[0]),
+    img_view_transformer=dict(
+        type='LSSViewTransformerBEVDepth',
+        grid_config=grid_config,
+        input_size=data_config['input_size'],
+        in_channels=512,
+        out_channels=numC_Trans,
+        loss_depth_weight=1,
+        depthnet_cfg=dict(use_dcn=False, aspp_mid_channels=96),
+        downsample=16),
+    pre_process=dict(
+        type='CustomResNet',
+        numC_input=numC_Trans,
+        num_layer=[1, ],
+        num_channels=[numC_Trans, ],
+        stride=[1, ],
+        backbone_output_ids=[0, ]),
+    img_bev_encoder_backbone=dict(
+        type='CustomResNet',
+        numC_input=numC_Trans * (len(range(*multi_adj_frame_id_cfg))+1),
+        num_channels=[numC_Trans * 2, numC_Trans * 4, numC_Trans * 8]),
+    img_bev_encoder_neck=dict(
+        type='FPN_LSS',
+        in_channels=numC_Trans * 8 + numC_Trans * 2,
+        out_channels=256),
+    occ_head=dict(
+        type='BEVOCCHead2D_V2',
+        in_dim=256,
+        out_dim=256,
+        Dz=16,
+        use_mask=False,
+        num_classes=18,
+        use_predicter=True,
+        class_balance=True,
+        loss_occ=dict(
+            type='CustomFocalLoss',
+            use_sigmoid=True,
+            loss_weight=1.0
+        ),
+    )
+)
+
+# Data
+dataset_type = 'NuScenesDatasetOccpancy'
+data_root = 'data/nuscenes/'
+file_client_args = dict(backend='disk')
+
+bda_aug_conf = dict(
+    rot_lim=(-0., 0.),
+    scale_lim=(1., 1.),
+    flip_dx_ratio=0.5,
+    flip_dy_ratio=0.5
+)
+
+train_pipeline = [
+    dict(
+        type='PrepareImageInputs',
+        is_train=True,
+        data_config=data_config,
+        sequential=True),
+    dict(
+        type='LoadAnnotationsBEVDepth',
+        bda_aug_conf=bda_aug_conf,
+        classes=class_names,
+        is_train=True),
+    dict(type='LoadOccGTFromFile'),
+    dict(
+        type='LoadPointsFromFile',
+        coord_type='LIDAR',
+        load_dim=5,
+        use_dim=5,
+        file_client_args=file_client_args),
+    dict(type='PointToMultiViewDepth', downsample=1, grid_config=grid_config),
+    dict(type='DefaultFormatBundle3D', class_names=class_names),
+    dict(
+        type='Collect3D', keys=['img_inputs', 'gt_depth', 'voxel_semantics',
+                                'mask_lidar', 'mask_camera'])
+]
+
+test_pipeline = [
+    dict(type='PrepareImageInputs', data_config=data_config, sequential=True),
+    dict(
+        type='LoadAnnotationsBEVDepth',
+        bda_aug_conf=bda_aug_conf,
+        classes=class_names,
+        is_train=False),
+    dict(
+        type='LoadPointsFromFile',
+        coord_type='LIDAR',
+        load_dim=5,
+        use_dim=5,
+        file_client_args=file_client_args),
+    dict(
+        type='MultiScaleFlipAug3D',
+        img_scale=(1333, 800),
+        pts_scale_ratio=1,
+        flip=False,
+        transforms=[
+            dict(
+                type='DefaultFormatBundle3D',
+                class_names=class_names,
+                with_label=False),
+            dict(type='Collect3D', keys=['points', 'img_inputs'])
+        ])
+]
+
+
+input_modality = dict(
+    use_lidar=False,
+    use_camera=True,
+    use_radar=False,
+    use_map=False,
+    use_external=False)
+
+share_data_config = dict(
+    type=dataset_type,
+    data_root=data_root,
+    classes=class_names,
+    modality=input_modality,
+    stereo=False,
+    filter_empty_gt=False,
+    img_info_prototype='bevdet4d',
+    multi_adj_frame_id_cfg=multi_adj_frame_id_cfg,
+)
+
+test_data_config = dict(
+    pipeline=test_pipeline,
+    ann_file=data_root + 'bevdetv2-nuscenes_infos_val.pkl')
+
+data = dict(
+    samples_per_gpu=4,
+    workers_per_gpu=4,
+    train=dict(
+        data_root=data_root,
+        ann_file=data_root + 'bevdetv2-nuscenes_infos_train.pkl',
+        pipeline=train_pipeline,
+        classes=class_names,
+        test_mode=False,
+        use_valid_flag=True,
+        # we use box_type_3d='LiDAR' in kitti and nuscenes dataset
+        # and box_type_3d='Depth' in sunrgbd and scannet dataset.
+        box_type_3d='LiDAR'),
+    val=test_data_config,
+    test=test_data_config)
+
+for key in ['val', 'train', 'test']:
+    data[key].update(share_data_config)
+
+# Optimizer
+optimizer = dict(type='AdamW', lr=1e-4, weight_decay=1e-2)
+optimizer_config = dict(grad_clip=dict(max_norm=5, norm_type=2))
+lr_config = dict(
+    policy='step',
+    warmup='linear',
+    warmup_iters=200,
+    warmup_ratio=0.001,
+    step=[24, ])
+runner = dict(type='EpochBasedRunner', max_epochs=24)
+
+custom_hooks = [
+    dict(
+        type='MEGVIIEMAHook',
+        init_updates=10560,
+        priority='NORMAL',
+    ),
+]
+
+load_from = "ckpts/bevdet-r50-4d-depth-cbgs.pth"
+# fp16 = dict(loss_scale='dynamic')
+evaluation = dict(interval=1, start=20, pipeline=test_pipeline)
+checkpoint_config = dict(interval=1, max_keep_ckpts=5)
+
+# use_mask = False
+# ===> others - IoU = 9.99
+# ===> barrier - IoU = 41.3
+# ===> bicycle - IoU = 22.84
+# ===> bus - IoU = 41.17
+# ===> car - IoU = 41.89
+# ===> construction_vehicle - IoU = 20.84
+# ===> motorcycle - IoU = 25.25
+# ===> pedestrian - IoU = 23.98
+# ===> traffic_cone - IoU = 24.36
+# ===> trailer - IoU = 26.39
+# ===> truck - IoU = 30.41
+# ===> driveable_surface - IoU = 58.26
+# ===> other_flat - IoU = 31.86
+# ===> sidewalk - IoU = 34.47
+# ===> terrain - IoU = 31.96
+# ===> manmade - IoU = 18.87
+# ===> vegetation - IoU = 18.95
+# ===> mIoU of 6019 samples: 29.57
+# {'mIoU': array([0.1  , 0.413, 0.228, 0.412, 0.419, 0.208, 0.253, 0.24 , 0.244,
+#        0.264, 0.304, 0.583, 0.319, 0.345, 0.32 , 0.189, 0.189, 0.833])}
+
+
+# +----------------------+----------+----------+----------+
+# |     Class Names      | RayIoU@1 | RayIoU@2 | RayIoU@4 |
+# +----------------------+----------+----------+----------+
+# |        others        |  0.095   |  0.106   |  0.109   |
+# |       barrier        |  0.392   |  0.444   |  0.466   |
+# |       bicycle        |  0.236   |  0.279   |  0.287   |
+# |         bus          |  0.513   |  0.616   |  0.675   |
+# |         car          |  0.492   |  0.567   |  0.596   |
+# | construction_vehicle |  0.170   |  0.256   |  0.296   |
+# |      motorcycle      |  0.216   |  0.304   |  0.330   |
+# |      pedestrian      |  0.315   |  0.363   |  0.378   |
+# |     traffic_cone     |  0.280   |  0.315   |  0.323   |
+# |       trailer        |  0.210   |  0.294   |  0.397   |
+# |        truck         |  0.419   |  0.517   |  0.565   |
+# |  driveable_surface   |  0.540   |  0.621   |  0.708   |
+# |      other_flat      |  0.284   |  0.320   |  0.354   |
+# |       sidewalk       |  0.242   |  0.289   |  0.337   |
+# |       terrain        |  0.233   |  0.302   |  0.367   |
+# |       manmade        |  0.291   |  0.370   |  0.422   |
+# |      vegetation      |  0.190   |  0.290   |  0.376   |
+# +----------------------+----------+----------+----------+
+# |         MEAN         |  0.301   |  0.368   |  0.411   |
+# +----------------------+----------+----------+----------+
+# {'RayIoU': 0.3599406945036808, 'RayIoU@1': 0.30094679699387594, 'RayIoU@2': 0.36785252629427645, 'RayIoU@4': 0.4110227602228899}
\ No newline at end of file
--- a/docker-hub/FlashOCC/Flashocc/projects/flashocc_plugin.egg-info/PKG-INFO
+++ b/docker-hub/FlashOCC/Flashocc/projects/flashocc_plugin.egg-info/PKG-INFO
+Metadata-Version: 2.1
+Name: flashocc-plugin
+Version: 0.0.0
+Summary: OpenMMLab's next-generation platformfor general 3D object detection.
+Home-page: https://github.com/open-mmlab/mmdetection3d
+Author: MMDetection3D Contributors
+Author-email: zwwdev@gmail.com
+License: Apache License 2.0
+Keywords: computer vision,3D object detection
+Classifier: Development Status :: 4 - Beta
+Classifier: License :: OSI Approved :: Apache Software License
+Classifier: Operating System :: OS Independent
+Classifier: Programming Language :: Python :: 3
+Classifier: Programming Language :: Python :: 3.6
+Classifier: Programming Language :: Python :: 3.7
+Description-Content-Type: text/markdown
--- a/docker-hub/FlashOCC/Flashocc/projects/flashocc_plugin.egg-info/SOURCES.txt
+++ b/docker-hub/FlashOCC/Flashocc/projects/flashocc_plugin.egg-info/SOURCES.txt
+__init__.py
+setup.py
+configs/bevdet_occ/bevdet-occ-r50-4d-stereo.py
+configs/bevdet_occ/bevdet-occ-r50.py
+configs/bevdet_occ/bevdet-occ-stbase-4d-stereo-512x1408.py
+configs/flashocc/flashocc-r50-4d-stereo.py
+configs/flashocc/flashocc-r50-M0-trt.py
+configs/flashocc/flashocc-r50-M0.py
+configs/flashocc/flashocc-r50-trt.py
+configs/flashocc/flashocc-r50.py
+configs/flashocc/flashocc-stbase-4d-stereo-512x1408_4x4_1e-2.py
+configs/flashocc/flashocc-stbase-4d-stereo-512x1408_4x4_2e-4.py
+configs/panoptic-flashocc/panoptic-flashocc-r50-depth-pano.py
+configs/panoptic-flashocc/panoptic-flashocc-r50-depth-tiny-pano-trt.py
+configs/panoptic-flashocc/panoptic-flashocc-r50-depth-tiny-pano.py
+configs/panoptic-flashocc/panoptic-flashocc-r50-depth-tiny.py
+configs/panoptic-flashocc/panoptic-flashocc-r50-depth-trt.py
+configs/panoptic-flashocc/panoptic-flashocc-r50-depth.py
+configs/panoptic-flashocc/panoptic-flashocc-r50-depth4d-longterm16f-pano.py
+configs/panoptic-flashocc/panoptic-flashocc-r50-depth4d-longterm16f.py
+configs/panoptic-flashocc/panoptic-flashocc-r50-depth4d-longterm8f-pano.py
+configs/panoptic-flashocc/panoptic-flashocc-r50-depth4d-longterm8f.py
+configs/panoptic-flashocc/panoptic-flashocc-r50-depth4d-pano.py
+configs/panoptic-flashocc/panoptic-flashocc-r50-depth4d.py
+flashocc_plugin.egg-info/PKG-INFO
+flashocc_plugin.egg-info/SOURCES.txt
+flashocc_plugin.egg-info/dependency_links.txt
+flashocc_plugin.egg-info/not-zip-safe
+flashocc_plugin.egg-info/top_level.txt
+mmdet3d_plugin/__init__.py
+mmdet3d_plugin/core/__init__.py
+mmdet3d_plugin/core/bbox/__init__.py
+mmdet3d_plugin/core/bbox/coders/__init__.py
+mmdet3d_plugin/core/bbox/coders/centerpoint_bbox_coders.py
+mmdet3d_plugin/core/evaluation/__init__.py
+mmdet3d_plugin/core/evaluation/occ_metrics.py
+mmdet3d_plugin/core/evaluation/ray_metrics.py
+mmdet3d_plugin/core/evaluation/ray_pq.py
+mmdet3d_plugin/core/hook/__init__.py
+mmdet3d_plugin/core/hook/ema.py
+mmdet3d_plugin/core/hook/sequentialcontrol.py
+mmdet3d_plugin/core/hook/syncbncontrol.py
+mmdet3d_plugin/core/hook/utils.py
+mmdet3d_plugin/core/post_processing/__init__.py
+mmdet3d_plugin/core/post_processing/box3d_nms.py
+mmdet3d_plugin/datasets/__init__.py
+mmdet3d_plugin/datasets/ego_pose_dataset.py
+mmdet3d_plugin/datasets/nuscenes_dataset_bevdet.py
+mmdet3d_plugin/datasets/nuscenes_dataset_occ.py
+mmdet3d_plugin/datasets/pipelines/__init__.py
+mmdet3d_plugin/datasets/pipelines/formating.py
+mmdet3d_plugin/datasets/pipelines/loading.py
+mmdet3d_plugin/models/__init__.py
+mmdet3d_plugin/models/backbones/__init__.py
+mmdet3d_plugin/models/backbones/resnet.py
+mmdet3d_plugin/models/backbones/swin.py
+mmdet3d_plugin/models/dense_heads/__init__.py
+mmdet3d_plugin/models/dense_heads/bev_centerpoint_head.py
+mmdet3d_plugin/models/dense_heads/bev_occ_head.py
+mmdet3d_plugin/models/detectors/__init__.py
+mmdet3d_plugin/models/detectors/bevdepth.py
+mmdet3d_plugin/models/detectors/bevdepth4d.py
+mmdet3d_plugin/models/detectors/bevdet.py
+mmdet3d_plugin/models/detectors/bevdet4d.py
+mmdet3d_plugin/models/detectors/bevdet_occ.py
+mmdet3d_plugin/models/detectors/bevstereo4d.py
+mmdet3d_plugin/models/losses/__init__.py
+mmdet3d_plugin/models/losses/cross_entropy_loss.py
+mmdet3d_plugin/models/losses/focal_loss.py
+mmdet3d_plugin/models/losses/lovasz_softmax.py
+mmdet3d_plugin/models/losses/semkitti_loss.py
+mmdet3d_plugin/models/model_utils/__init__.py
+mmdet3d_plugin/models/model_utils/depthnet.py
+mmdet3d_plugin/models/necks/__init__.py
+mmdet3d_plugin/models/necks/fpn.py
+mmdet3d_plugin/models/necks/lss_fpn.py
+mmdet3d_plugin/models/necks/view_transformer.py
+mmdet3d_plugin/ops/__init__.py
+mmdet3d_plugin/ops/bev_pool/__init__.py
+mmdet3d_plugin/ops/bev_pool/bev_pool.py
+mmdet3d_plugin/ops/bev_pool/src/bev_max_pool.cpp
+mmdet3d_plugin/ops/bev_pool/src/bev_max_pool.h
+mmdet3d_plugin/ops/bev_pool/src/bev_max_pool_cuda.cu
+mmdet3d_plugin/ops/bev_pool/src/bev_max_pool_cuda.hip
+mmdet3d_plugin/ops/bev_pool/src/bev_max_pool_hip.cpp
+mmdet3d_plugin/ops/bev_pool/src/bev_pooling.cpp
+mmdet3d_plugin/ops/bev_pool/src/bev_pooling_hip.cpp
+mmdet3d_plugin/ops/bev_pool/src/bev_sum_pool.cpp
+mmdet3d_plugin/ops/bev_pool/src/bev_sum_pool.h
+mmdet3d_plugin/ops/bev_pool/src/bev_sum_pool_cuda.cu
+mmdet3d_plugin/ops/bev_pool/src/bev_sum_pool_cuda.hip
+mmdet3d_plugin/ops/bev_pool/src/bev_sum_pool_hip.cpp
+mmdet3d_plugin/ops/bev_pool_v2/__init__.py
+mmdet3d_plugin/ops/bev_pool_v2/bev_pool.py
+mmdet3d_plugin/ops/bev_pool_v2/src/bev_pool.cpp
+mmdet3d_plugin/ops/bev_pool_v2/src/bev_pool_cuda.cu
+mmdet3d_plugin/ops/bev_pool_v2/src/bev_pool_cuda.hip
+mmdet3d_plugin/ops/bev_pool_v2/src/bev_pool_hip.cpp
+mmdet3d_plugin/ops/nearest_assign/__init__.py
+mmdet3d_plugin/ops/nearest_assign/nearest_assign.py
+mmdet3d_plugin/ops/nearest_assign/src/nearest_assign.cpp
+mmdet3d_plugin/ops/nearest_assign/src/nearest_assign_cuda.cu
+mmdet3d_plugin/ops/nearest_assign/src/nearest_assign_cuda.hip
+mmdet3d_plugin/ops/nearest_assign/src/nearest_assign_hip.cpp
\ No newline at end of file
--- a/docker-hub/FlashOCC/Flashocc/projects/flashocc_plugin.egg-info/dependency_links.txt
+++ b/docker-hub/FlashOCC/Flashocc/projects/flashocc_plugin.egg-info/dependency_links.txt
+
--- a/docker-hub/FlashOCC/Flashocc/projects/flashocc_plugin.egg-info/not-zip-safe
+++ b/docker-hub/FlashOCC/Flashocc/projects/flashocc_plugin.egg-info/not-zip-safe
+
--- a/docker-hub/FlashOCC/Flashocc/projects/flashocc_plugin.egg-info/top_level.txt
+++ b/docker-hub/FlashOCC/Flashocc/projects/flashocc_plugin.egg-info/top_level.txt
+mmdet3d_plugin
--- a/docker-hub/FlashOCC/Flashocc/projects/mmdet3d_plugin/__init__.py
+++ b/docker-hub/FlashOCC/Flashocc/projects/mmdet3d_plugin/__init__.py
+from .datasets import *
+from .core import *
+from .models import *
--- a/docker-hub/FlashOCC/Flashocc/projects/mmdet3d_plugin/__pycache__/__init__.cpython-310.pyc
+++ b/docker-hub/FlashOCC/Flashocc/projects/mmdet3d_plugin/__pycache__/__init__.cpython-310.pyc
--- a/docker-hub/FlashOCC/Flashocc/projects/mmdet3d_plugin/core/__init__.py
+++ b/docker-hub/FlashOCC/Flashocc/projects/mmdet3d_plugin/core/__init__.py
+from .bbox import *
+from .hook import *
--- a/docker-hub/FlashOCC/Flashocc/projects/mmdet3d_plugin/core/__pycache__/__init__.cpython-310.pyc
+++ b/docker-hub/FlashOCC/Flashocc/projects/mmdet3d_plugin/core/__pycache__/__init__.cpython-310.pyc
--- a/docker-hub/FlashOCC/Flashocc/projects/mmdet3d_plugin/core/bbox/__init__.py
+++ b/docker-hub/FlashOCC/Flashocc/projects/mmdet3d_plugin/core/bbox/__init__.py
+from .coders import *
\ No newline at end of file
--- a/docker-hub/FlashOCC/Flashocc/projects/mmdet3d_plugin/core/bbox/__pycache__/__init__.cpython-310.pyc
+++ b/docker-hub/FlashOCC/Flashocc/projects/mmdet3d_plugin/core/bbox/__pycache__/__init__.cpython-310.pyc
--- a/docker-hub/FlashOCC/Flashocc/projects/mmdet3d_plugin/core/bbox/coders/__init__.py
+++ b/docker-hub/FlashOCC/Flashocc/projects/mmdet3d_plugin/core/bbox/coders/__init__.py
+from .centerpoint_bbox_coders import CenterPointBBoxCoder
+
+__all__ = ['CenterPointBBoxCoder']
\ No newline at end of file
--- a/docker-hub/FlashOCC/Flashocc/projects/mmdet3d_plugin/core/bbox/coders/__pycache__/__init__.cpython-310.pyc
+++ b/docker-hub/FlashOCC/Flashocc/projects/mmdet3d_plugin/core/bbox/coders/__pycache__/__init__.cpython-310.pyc