Commit 51e55208 authored by 雍大凯's avatar 雍大凯
Browse files

将子模块转换为普通目录

parent ba3cd005
{"env_info": "sys.platform: linux\nPython: 3.10.12 (main, May 27 2025, 17:12:29) [GCC 11.4.0]\nCUDA available: True\nGPU 0,1,2,3,4,5,6,7: BW200, UBB BW1000\nCUDA_HOME: /opt/dtk\nNVCC: Not Available\nGCC: x86_64-linux-gnu-gcc (Ubuntu 11.4.0-1ubuntu1~22.04) 11.4.0\nPyTorch: 2.4.1\nPyTorch compiling details: PyTorch built with:\n - GCC 10.3\n - C++ Version: 201703\n - Intel(R) Math Kernel Library Version 2020.0.4 Product Build 20200917 for Intel(R) 64 architecture applications\n - OpenMP 201511 (a.k.a. OpenMP 4.5)\n - LAPACK is enabled (usually provided by MKL)\n - NNPACK is enabled\n - CPU capability usage: AVX512\n - HIP Runtime 6.3.25211\n - MIOpen 2.17.0\n - Magma 2.8.0\n - Build settings: BLAS_INFO=mkl, BUILD_TYPE=Release, CXX_COMPILER=/opt/rh/gcc-toolset-10/root/usr/bin/c++, CXX_FLAGS= -D_GLIBCXX_USE_CXX11_ABI=1 -fvisibility-inlines-hidden -DUSE_PTHREADPOOL -DNDEBUG -DUSE_KINETO -DLIBKINETO_NOCUPTI -DUSE_FBGEMM -DUSE_PYTORCH_QNNPACK -DUSE_XNNPACK -DSYMBOLICATE_MOBILE_DEBUG_HANDLE -O2 -fPIC -Wall -Wextra -Werror=return-type -Werror=non-virtual-dtor -Werror=bool-operation -Wnarrowing -Wno-missing-field-initializers -Wno-type-limits -Wno-array-bounds -Wno-unknown-pragmas -Wno-unused-parameter -Wno-unused-function -Wno-unused-result -Wno-strict-overflow -Wno-strict-aliasing -Wno-stringop-overflow -Wsuggest-override -Wno-psabi -Wno-error=pedantic -Wno-error=old-style-cast -Wno-missing-braces -fdiagnostics-color=always -faligned-new -Wno-unused-but-set-variable -Wno-maybe-uninitialized -fno-math-errno -fno-trapping-math -Werror=format -Wno-stringop-overflow, FORCE_FALLBACK_CUDA_MPI=1, LAPACK_INFO=mkl, PERF_WITH_AVX=1, PERF_WITH_AVX2=1, PERF_WITH_AVX512=1, TORCH_VERSION=2.4.1, USE_CUDA=0, USE_CUDNN=OFF, USE_CUSPARSELT=OFF, USE_EXCEPTION_PTR=1, USE_GFLAGS=1, USE_GLOG=1, USE_GLOO=1, USE_MKL=ON, USE_MKLDNN=0, USE_MPI=1, USE_NCCL=1, USE_NNPACK=ON, USE_OPENMP=1, USE_ROCM=ON, USE_ROCM_KERNEL_ASSERT=OFF, \n\nTorchVision: 0.19.1\nOpenCV: 4.11.0\nMMCV: 1.6.1\nMMCV Compiler: GCC 11.4\nMMCV CUDA Compiler: rocm not available\nMMDetection: 2.26.0+c41df4b", "config": "plugin = True\nplugin_dir = 'projects/mmdet3d_plugin/'\ndist_params = dict(backend='nccl')\nlog_level = 'INFO'\nwork_dir = './work_dirs/sparse4dv3_temporal_r50_1x8_bs6_256x704'\ntotal_batch_size = 160\nnum_gpus = 8\nbatch_size = 20\nnum_iters_per_epoch = 175\nnum_epochs = 100\ncheckpoint_epoch_interval = 20\ncheckpoint_config = dict(interval=3500)\nlog_config = dict(\n interval=1,\n hooks=[\n dict(type='TextLoggerHook', by_epoch=False),\n dict(type='TensorboardLoggerHook')\n ])\nload_from = None\nresume_from = None\nworkflow = [('train', 1)]\nfp16 = dict(loss_scale=32.0)\ninput_shape = (704, 256)\ntracking_test = True\ntracking_threshold = 0.2\nclass_names = [\n 'car', 'truck', 'construction_vehicle', 'bus', 'trailer', 'barrier',\n 'motorcycle', 'bicycle', 'pedestrian', 'traffic_cone'\n]\nnum_classes = 10\nembed_dims = 256\nnum_groups = 8\nnum_decoder = 6\nnum_single_frame_decoder = 1\nuse_deformable_func = True\nstrides = [4, 8, 16, 32]\nnum_levels = 4\nnum_depth_layers = 3\ndrop_out = 0.1\ntemporal = True\ndecouple_attn = True\nwith_quality_estimation = True\nmodel = dict(\n type='Sparse4D',\n use_grid_mask=True,\n use_deformable_func=True,\n img_backbone=dict(\n type='ResNet',\n depth=50,\n num_stages=4,\n frozen_stages=-1,\n norm_eval=False,\n style='pytorch',\n with_cp=True,\n out_indices=(0, 1, 2, 3),\n norm_cfg=dict(type='BN', requires_grad=True),\n pretrained='ckpt/resnet50-19c8e357.pth'),\n img_neck=dict(\n type='FPN',\n num_outs=4,\n start_level=0,\n out_channels=256,\n add_extra_convs='on_output',\n relu_before_extra_convs=True,\n in_channels=[256, 512, 1024, 2048]),\n depth_branch=dict(\n type='DenseDepthNet',\n embed_dims=256,\n num_depth_layers=3,\n loss_weight=0.2),\n head=dict(\n type='Sparse4DHead',\n cls_threshold_to_reg=0.05,\n decouple_attn=True,\n instance_bank=dict(\n type='InstanceBank',\n num_anchor=900,\n embed_dims=256,\n anchor='nuscenes_kmeans900.npy',\n anchor_handler=dict(type='SparseBox3DKeyPointsGenerator'),\n num_temp_instances=600,\n confidence_decay=0.6,\n feat_grad=False),\n anchor_encoder=dict(\n type='SparseBox3DEncoder',\n vel_dims=3,\n embed_dims=[128, 32, 32, 64],\n mode='cat',\n output_fc=False,\n in_loops=1,\n out_loops=4),\n num_single_frame_decoder=1,\n operation_order=[\n 'deformable', 'ffn', 'norm', 'refine', 'temp_gnn', 'gnn', 'norm',\n 'deformable', 'ffn', 'norm', 'refine', 'temp_gnn', 'gnn', 'norm',\n 'deformable', 'ffn', 'norm', 'refine', 'temp_gnn', 'gnn', 'norm',\n 'deformable', 'ffn', 'norm', 'refine', 'temp_gnn', 'gnn', 'norm',\n 'deformable', 'ffn', 'norm', 'refine', 'temp_gnn', 'gnn', 'norm',\n 'deformable', 'ffn', 'norm', 'refine'\n ],\n temp_graph_model=dict(\n type='MultiheadAttention',\n embed_dims=512,\n num_heads=8,\n batch_first=True,\n dropout=0.1),\n graph_model=dict(\n type='MultiheadAttention',\n embed_dims=512,\n num_heads=8,\n batch_first=True,\n dropout=0.1),\n norm_layer=dict(type='LN', normalized_shape=256),\n ffn=dict(\n type='AsymmetricFFN',\n in_channels=512,\n pre_norm=dict(type='LN'),\n embed_dims=256,\n feedforward_channels=1024,\n num_fcs=2,\n ffn_drop=0.1,\n act_cfg=dict(type='ReLU', inplace=True)),\n deformable_model=dict(\n type='DeformableFeatureAggregation',\n embed_dims=256,\n num_groups=8,\n num_levels=4,\n num_cams=6,\n attn_drop=0.15,\n use_deformable_func=True,\n use_camera_embed=True,\n residual_mode='cat',\n kps_generator=dict(\n type='SparseBox3DKeyPointsGenerator',\n num_learnable_pts=6,\n fix_scale=[[0, 0, 0], [0.45, 0, 0], [-0.45, 0, 0],\n [0, 0.45, 0], [0, -0.45, 0], [0, 0, 0.45],\n [0, 0, -0.45]])),\n refine_layer=dict(\n type='SparseBox3DRefinementModule',\n embed_dims=256,\n num_cls=10,\n refine_yaw=True,\n with_quality_estimation=True),\n sampler=dict(\n type='SparseBox3DTarget',\n num_dn_groups=5,\n num_temp_dn_groups=3,\n dn_noise_scale=[2.0, 2.0, 2.0, 0.5, 0.5, 0.5, 0.5, 0.5, 0.5, 0.5],\n max_dn_gt=32,\n add_neg_dn=True,\n cls_weight=2.0,\n box_weight=0.25,\n reg_weights=[2.0, 2.0, 2.0, 0.5, 0.5, 0.5, 0.0, 0.0, 0.0, 0.0],\n cls_wise_reg_weights=dict(\n {9: [2.0, 2.0, 2.0, 1.0, 1.0, 1.0, 0.0, 0.0, 1.0, 1.0]})),\n loss_cls=dict(\n type='FocalLoss',\n use_sigmoid=True,\n gamma=2.0,\n alpha=0.25,\n loss_weight=2.0),\n loss_reg=dict(\n type='SparseBox3DLoss',\n loss_box=dict(type='L1Loss', loss_weight=0.25),\n loss_centerness=dict(type='CrossEntropyLoss', use_sigmoid=True),\n loss_yawness=dict(type='GaussianFocalLoss'),\n cls_allow_reverse=[5]),\n decoder=dict(type='SparseBox3DDecoder'),\n reg_weights=[2.0, 2.0, 2.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0]))\ndataset_type = 'NuScenes3DDetTrackDataset'\ndata_root = 'data/nuscenes/'\nanno_root = 'data/nuscenes_anno_pkls/'\nfile_client_args = dict(backend='disk')\nimg_norm_cfg = dict(\n mean=[123.675, 116.28, 103.53], std=[58.395, 57.12, 57.375], to_rgb=True)\ntrain_pipeline = [\n dict(type='LoadMultiViewImageFromFiles', to_float32=True),\n dict(\n type='LoadPointsFromFile',\n coord_type='LIDAR',\n load_dim=5,\n use_dim=5,\n file_client_args=dict(backend='disk')),\n dict(type='ResizeCropFlipImage'),\n dict(type='MultiScaleDepthMapGenerator', downsample=[4, 8, 16]),\n dict(type='BBoxRotation'),\n dict(type='PhotoMetricDistortionMultiViewImage'),\n dict(\n type='NormalizeMultiviewImage',\n mean=[123.675, 116.28, 103.53],\n std=[58.395, 57.12, 57.375],\n to_rgb=True),\n dict(\n type='CircleObjectRangeFilter',\n class_dist_thred=[55, 55, 55, 55, 55, 55, 55, 55, 55, 55]),\n dict(\n type='InstanceNameFilter',\n classes=[\n 'car', 'truck', 'construction_vehicle', 'bus', 'trailer',\n 'barrier', 'motorcycle', 'bicycle', 'pedestrian', 'traffic_cone'\n ]),\n dict(type='NuScenesSparse4DAdaptor'),\n dict(\n type='Collect',\n keys=[\n 'img', 'timestamp', 'projection_mat', 'image_wh', 'gt_depth',\n 'focal', 'gt_bboxes_3d', 'gt_labels_3d'\n ],\n meta_keys=['T_global', 'T_global_inv', 'timestamp', 'instance_id'])\n]\ntest_pipeline = [\n dict(type='LoadMultiViewImageFromFiles', to_float32=True),\n dict(type='ResizeCropFlipImage'),\n dict(\n type='NormalizeMultiviewImage',\n mean=[123.675, 116.28, 103.53],\n std=[58.395, 57.12, 57.375],\n to_rgb=True),\n dict(type='NuScenesSparse4DAdaptor'),\n dict(\n type='Collect',\n keys=['img', 'timestamp', 'projection_mat', 'image_wh'],\n meta_keys=['T_global', 'T_global_inv', 'timestamp'])\n]\ninput_modality = dict(\n use_lidar=False,\n use_camera=True,\n use_radar=False,\n use_map=False,\n use_external=False)\ndata_basic_config = dict(\n type='NuScenes3DDetTrackDataset',\n data_root='data/nuscenes/',\n classes=[\n 'car', 'truck', 'construction_vehicle', 'bus', 'trailer', 'barrier',\n 'motorcycle', 'bicycle', 'pedestrian', 'traffic_cone'\n ],\n modality=dict(\n use_lidar=False,\n use_camera=True,\n use_radar=False,\n use_map=False,\n use_external=False),\n version='v1.0-trainval')\ndata_aug_conf = dict(\n resize_lim=(0.4, 0.47),\n final_dim=(256, 704),\n bot_pct_lim=(0.0, 0.0),\n rot_lim=(-5.4, 5.4),\n H=900,\n W=1600,\n rand_flip=True,\n rot3d_range=[-0.3925, 0.3925])\ndata = dict(\n samples_per_gpu=20,\n workers_per_gpu=20,\n train=dict(\n type='NuScenes3DDetTrackDataset',\n data_root='data/nuscenes/',\n classes=[\n 'car', 'truck', 'construction_vehicle', 'bus', 'trailer',\n 'barrier', 'motorcycle', 'bicycle', 'pedestrian', 'traffic_cone'\n ],\n modality=dict(\n use_lidar=False,\n use_camera=True,\n use_radar=False,\n use_map=False,\n use_external=False),\n version='v1.0-trainval',\n ann_file='data/nuscenes_anno_pkls/nuscenes_infos_train.pkl',\n pipeline=[\n dict(type='LoadMultiViewImageFromFiles', to_float32=True),\n dict(\n type='LoadPointsFromFile',\n coord_type='LIDAR',\n load_dim=5,\n use_dim=5,\n file_client_args=dict(backend='disk')),\n dict(type='ResizeCropFlipImage'),\n dict(type='MultiScaleDepthMapGenerator', downsample=[4, 8, 16]),\n dict(type='BBoxRotation'),\n dict(type='PhotoMetricDistortionMultiViewImage'),\n dict(\n type='NormalizeMultiviewImage',\n mean=[123.675, 116.28, 103.53],\n std=[58.395, 57.12, 57.375],\n to_rgb=True),\n dict(\n type='CircleObjectRangeFilter',\n class_dist_thred=[55, 55, 55, 55, 55, 55, 55, 55, 55, 55]),\n dict(\n type='InstanceNameFilter',\n classes=[\n 'car', 'truck', 'construction_vehicle', 'bus', 'trailer',\n 'barrier', 'motorcycle', 'bicycle', 'pedestrian',\n 'traffic_cone'\n ]),\n dict(type='NuScenesSparse4DAdaptor'),\n dict(\n type='Collect',\n keys=[\n 'img', 'timestamp', 'projection_mat', 'image_wh',\n 'gt_depth', 'focal', 'gt_bboxes_3d', 'gt_labels_3d'\n ],\n meta_keys=[\n 'T_global', 'T_global_inv', 'timestamp', 'instance_id'\n ])\n ],\n test_mode=False,\n data_aug_conf=dict(\n resize_lim=(0.4, 0.47),\n final_dim=(256, 704),\n bot_pct_lim=(0.0, 0.0),\n rot_lim=(-5.4, 5.4),\n H=900,\n W=1600,\n rand_flip=True,\n rot3d_range=[-0.3925, 0.3925]),\n with_seq_flag=True,\n sequences_split_num=2,\n keep_consistent_seq_aug=True),\n val=dict(\n type='NuScenes3DDetTrackDataset',\n data_root='data/nuscenes/',\n classes=[\n 'car', 'truck', 'construction_vehicle', 'bus', 'trailer',\n 'barrier', 'motorcycle', 'bicycle', 'pedestrian', 'traffic_cone'\n ],\n modality=dict(\n use_lidar=False,\n use_camera=True,\n use_radar=False,\n use_map=False,\n use_external=False),\n version='v1.0-trainval',\n ann_file='data/nuscenes_anno_pkls/nuscenes_infos_val.pkl',\n pipeline=[\n dict(type='LoadMultiViewImageFromFiles', to_float32=True),\n dict(type='ResizeCropFlipImage'),\n dict(\n type='NormalizeMultiviewImage',\n mean=[123.675, 116.28, 103.53],\n std=[58.395, 57.12, 57.375],\n to_rgb=True),\n dict(type='NuScenesSparse4DAdaptor'),\n dict(\n type='Collect',\n keys=['img', 'timestamp', 'projection_mat', 'image_wh'],\n meta_keys=['T_global', 'T_global_inv', 'timestamp'])\n ],\n data_aug_conf=dict(\n resize_lim=(0.4, 0.47),\n final_dim=(256, 704),\n bot_pct_lim=(0.0, 0.0),\n rot_lim=(-5.4, 5.4),\n H=900,\n W=1600,\n rand_flip=True,\n rot3d_range=[-0.3925, 0.3925]),\n test_mode=True,\n tracking=True,\n tracking_threshold=0.2),\n test=dict(\n type='NuScenes3DDetTrackDataset',\n data_root='data/nuscenes/',\n classes=[\n 'car', 'truck', 'construction_vehicle', 'bus', 'trailer',\n 'barrier', 'motorcycle', 'bicycle', 'pedestrian', 'traffic_cone'\n ],\n modality=dict(\n use_lidar=False,\n use_camera=True,\n use_radar=False,\n use_map=False,\n use_external=False),\n version='v1.0-trainval',\n ann_file='data/nuscenes_anno_pkls/nuscenes_infos_val.pkl',\n pipeline=[\n dict(type='LoadMultiViewImageFromFiles', to_float32=True),\n dict(type='ResizeCropFlipImage'),\n dict(\n type='NormalizeMultiviewImage',\n mean=[123.675, 116.28, 103.53],\n std=[58.395, 57.12, 57.375],\n to_rgb=True),\n dict(type='NuScenesSparse4DAdaptor'),\n dict(\n type='Collect',\n keys=['img', 'timestamp', 'projection_mat', 'image_wh'],\n meta_keys=['T_global', 'T_global_inv', 'timestamp'])\n ],\n data_aug_conf=dict(\n resize_lim=(0.4, 0.47),\n final_dim=(256, 704),\n bot_pct_lim=(0.0, 0.0),\n rot_lim=(-5.4, 5.4),\n H=900,\n W=1600,\n rand_flip=True,\n rot3d_range=[-0.3925, 0.3925]),\n test_mode=True,\n tracking=True,\n tracking_threshold=0.2))\noptimizer = dict(\n type='Miopen_AdamW',\n lr=0.0006,\n weight_decay=0.001,\n paramwise_cfg=dict(custom_keys=dict(img_backbone=dict(lr_mult=0.5))))\noptimizer_config = dict(grad_clip=dict(max_norm=25, norm_type=2))\nlr_config = dict(\n policy='CosineAnnealing',\n warmup='linear',\n warmup_iters=500,\n warmup_ratio=0.3333333333333333,\n min_lr_ratio=0.001)\nrunner = dict(type='IterBasedRunner', max_iters=17500)\nvis_pipeline = [\n dict(type='LoadMultiViewImageFromFiles', to_float32=True),\n dict(type='Collect', keys=['img'], meta_keys=['timestamp', 'lidar2img'])\n]\nevaluation = dict(\n interval=3500,\n pipeline=[\n dict(type='LoadMultiViewImageFromFiles', to_float32=True),\n dict(\n type='Collect', keys=['img'], meta_keys=['timestamp', 'lidar2img'])\n ])\ngpu_ids = range(0, 8)\n", "seed": 0, "exp_name": "sparse4dv3_temporal_r50_1x8_bs6_256x704.py"}
{"env_info": "sys.platform: linux\nPython: 3.10.12 (main, May 27 2025, 17:12:29) [GCC 11.4.0]\nCUDA available: True\nGPU 0,1,2,3,4,5,6,7: BW200, UBB BW1000\nCUDA_HOME: /opt/dtk\nNVCC: Not Available\nGCC: x86_64-linux-gnu-gcc (Ubuntu 11.4.0-1ubuntu1~22.04) 11.4.0\nPyTorch: 2.4.1\nPyTorch compiling details: PyTorch built with:\n - GCC 10.3\n - C++ Version: 201703\n - Intel(R) Math Kernel Library Version 2020.0.4 Product Build 20200917 for Intel(R) 64 architecture applications\n - OpenMP 201511 (a.k.a. OpenMP 4.5)\n - LAPACK is enabled (usually provided by MKL)\n - NNPACK is enabled\n - CPU capability usage: AVX512\n - HIP Runtime 6.3.25211\n - MIOpen 2.17.0\n - Magma 2.8.0\n - Build settings: BLAS_INFO=mkl, BUILD_TYPE=Release, CXX_COMPILER=/opt/rh/gcc-toolset-10/root/usr/bin/c++, CXX_FLAGS= -D_GLIBCXX_USE_CXX11_ABI=1 -fvisibility-inlines-hidden -DUSE_PTHREADPOOL -DNDEBUG -DUSE_KINETO -DLIBKINETO_NOCUPTI -DUSE_FBGEMM -DUSE_PYTORCH_QNNPACK -DUSE_XNNPACK -DSYMBOLICATE_MOBILE_DEBUG_HANDLE -O2 -fPIC -Wall -Wextra -Werror=return-type -Werror=non-virtual-dtor -Werror=bool-operation -Wnarrowing -Wno-missing-field-initializers -Wno-type-limits -Wno-array-bounds -Wno-unknown-pragmas -Wno-unused-parameter -Wno-unused-function -Wno-unused-result -Wno-strict-overflow -Wno-strict-aliasing -Wno-stringop-overflow -Wsuggest-override -Wno-psabi -Wno-error=pedantic -Wno-error=old-style-cast -Wno-missing-braces -fdiagnostics-color=always -faligned-new -Wno-unused-but-set-variable -Wno-maybe-uninitialized -fno-math-errno -fno-trapping-math -Werror=format -Wno-stringop-overflow, FORCE_FALLBACK_CUDA_MPI=1, LAPACK_INFO=mkl, PERF_WITH_AVX=1, PERF_WITH_AVX2=1, PERF_WITH_AVX512=1, TORCH_VERSION=2.4.1, USE_CUDA=0, USE_CUDNN=OFF, USE_CUSPARSELT=OFF, USE_EXCEPTION_PTR=1, USE_GFLAGS=1, USE_GLOG=1, USE_GLOO=1, USE_MKL=ON, USE_MKLDNN=0, USE_MPI=1, USE_NCCL=1, USE_NNPACK=ON, USE_OPENMP=1, USE_ROCM=ON, USE_ROCM_KERNEL_ASSERT=OFF, \n\nTorchVision: 0.19.1\nOpenCV: 4.11.0\nMMCV: 1.6.1\nMMCV Compiler: GCC 11.4\nMMCV CUDA Compiler: rocm not available\nMMDetection: 2.26.0+c41df4b", "config": "plugin = True\nplugin_dir = 'projects/mmdet3d_plugin/'\ndist_params = dict(backend='nccl')\nlog_level = 'INFO'\nwork_dir = './work_dirs/sparse4dv3_temporal_r50_1x8_bs6_256x704'\ntotal_batch_size = 160\nnum_gpus = 8\nbatch_size = 20\nnum_iters_per_epoch = 175\nnum_epochs = 100\ncheckpoint_epoch_interval = 20\ncheckpoint_config = dict(interval=3500)\nlog_config = dict(\n interval=1,\n hooks=[\n dict(type='TextLoggerHook', by_epoch=False),\n dict(type='TensorboardLoggerHook')\n ])\nload_from = None\nresume_from = None\nworkflow = [('train', 1)]\nfp16 = dict(loss_scale=32.0)\ninput_shape = (704, 256)\ntracking_test = True\ntracking_threshold = 0.2\nclass_names = [\n 'car', 'truck', 'construction_vehicle', 'bus', 'trailer', 'barrier',\n 'motorcycle', 'bicycle', 'pedestrian', 'traffic_cone'\n]\nnum_classes = 10\nembed_dims = 256\nnum_groups = 8\nnum_decoder = 6\nnum_single_frame_decoder = 1\nuse_deformable_func = True\nstrides = [4, 8, 16, 32]\nnum_levels = 4\nnum_depth_layers = 3\ndrop_out = 0.1\ntemporal = True\ndecouple_attn = True\nwith_quality_estimation = True\nmodel = dict(\n type='Sparse4D',\n use_grid_mask=True,\n use_deformable_func=True,\n img_backbone=dict(\n type='ResNet',\n depth=50,\n num_stages=4,\n frozen_stages=-1,\n norm_eval=False,\n style='pytorch',\n with_cp=True,\n out_indices=(0, 1, 2, 3),\n norm_cfg=dict(type='BN', requires_grad=True),\n pretrained='ckpt/resnet50-19c8e357.pth'),\n img_neck=dict(\n type='FPN',\n num_outs=4,\n start_level=0,\n out_channels=256,\n add_extra_convs='on_output',\n relu_before_extra_convs=True,\n in_channels=[256, 512, 1024, 2048]),\n depth_branch=dict(\n type='DenseDepthNet',\n embed_dims=256,\n num_depth_layers=3,\n loss_weight=0.2),\n head=dict(\n type='Sparse4DHead',\n cls_threshold_to_reg=0.05,\n decouple_attn=True,\n instance_bank=dict(\n type='InstanceBank',\n num_anchor=900,\n embed_dims=256,\n anchor='nuscenes_kmeans900.npy',\n anchor_handler=dict(type='SparseBox3DKeyPointsGenerator'),\n num_temp_instances=600,\n confidence_decay=0.6,\n feat_grad=False),\n anchor_encoder=dict(\n type='SparseBox3DEncoder',\n vel_dims=3,\n embed_dims=[128, 32, 32, 64],\n mode='cat',\n output_fc=False,\n in_loops=1,\n out_loops=4),\n num_single_frame_decoder=1,\n operation_order=[\n 'deformable', 'ffn', 'norm', 'refine', 'temp_gnn', 'gnn', 'norm',\n 'deformable', 'ffn', 'norm', 'refine', 'temp_gnn', 'gnn', 'norm',\n 'deformable', 'ffn', 'norm', 'refine', 'temp_gnn', 'gnn', 'norm',\n 'deformable', 'ffn', 'norm', 'refine', 'temp_gnn', 'gnn', 'norm',\n 'deformable', 'ffn', 'norm', 'refine', 'temp_gnn', 'gnn', 'norm',\n 'deformable', 'ffn', 'norm', 'refine'\n ],\n temp_graph_model=dict(\n type='MultiheadAttention',\n embed_dims=512,\n num_heads=8,\n batch_first=True,\n dropout=0.1),\n graph_model=dict(\n type='MultiheadAttention',\n embed_dims=512,\n num_heads=8,\n batch_first=True,\n dropout=0.1),\n norm_layer=dict(type='LN', normalized_shape=256),\n ffn=dict(\n type='AsymmetricFFN',\n in_channels=512,\n pre_norm=dict(type='LN'),\n embed_dims=256,\n feedforward_channels=1024,\n num_fcs=2,\n ffn_drop=0.1,\n act_cfg=dict(type='ReLU', inplace=True)),\n deformable_model=dict(\n type='DeformableFeatureAggregation',\n embed_dims=256,\n num_groups=8,\n num_levels=4,\n num_cams=6,\n attn_drop=0.15,\n use_deformable_func=True,\n use_camera_embed=True,\n residual_mode='cat',\n kps_generator=dict(\n type='SparseBox3DKeyPointsGenerator',\n num_learnable_pts=6,\n fix_scale=[[0, 0, 0], [0.45, 0, 0], [-0.45, 0, 0],\n [0, 0.45, 0], [0, -0.45, 0], [0, 0, 0.45],\n [0, 0, -0.45]])),\n refine_layer=dict(\n type='SparseBox3DRefinementModule',\n embed_dims=256,\n num_cls=10,\n refine_yaw=True,\n with_quality_estimation=True),\n sampler=dict(\n type='SparseBox3DTarget',\n num_dn_groups=5,\n num_temp_dn_groups=3,\n dn_noise_scale=[2.0, 2.0, 2.0, 0.5, 0.5, 0.5, 0.5, 0.5, 0.5, 0.5],\n max_dn_gt=32,\n add_neg_dn=True,\n cls_weight=2.0,\n box_weight=0.25,\n reg_weights=[2.0, 2.0, 2.0, 0.5, 0.5, 0.5, 0.0, 0.0, 0.0, 0.0],\n cls_wise_reg_weights=dict(\n {9: [2.0, 2.0, 2.0, 1.0, 1.0, 1.0, 0.0, 0.0, 1.0, 1.0]})),\n loss_cls=dict(\n type='FocalLoss',\n use_sigmoid=True,\n gamma=2.0,\n alpha=0.25,\n loss_weight=2.0),\n loss_reg=dict(\n type='SparseBox3DLoss',\n loss_box=dict(type='L1Loss', loss_weight=0.25),\n loss_centerness=dict(type='CrossEntropyLoss', use_sigmoid=True),\n loss_yawness=dict(type='GaussianFocalLoss'),\n cls_allow_reverse=[5]),\n decoder=dict(type='SparseBox3DDecoder'),\n reg_weights=[2.0, 2.0, 2.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0]))\ndataset_type = 'NuScenes3DDetTrackDataset'\ndata_root = 'data/nuscenes/'\nanno_root = 'data/nuscenes_anno_pkls/'\nfile_client_args = dict(backend='disk')\nimg_norm_cfg = dict(\n mean=[123.675, 116.28, 103.53], std=[58.395, 57.12, 57.375], to_rgb=True)\ntrain_pipeline = [\n dict(type='LoadMultiViewImageFromFiles', to_float32=True),\n dict(\n type='LoadPointsFromFile',\n coord_type='LIDAR',\n load_dim=5,\n use_dim=5,\n file_client_args=dict(backend='disk')),\n dict(type='ResizeCropFlipImage'),\n dict(type='MultiScaleDepthMapGenerator', downsample=[4, 8, 16]),\n dict(type='BBoxRotation'),\n dict(type='PhotoMetricDistortionMultiViewImage'),\n dict(\n type='NormalizeMultiviewImage',\n mean=[123.675, 116.28, 103.53],\n std=[58.395, 57.12, 57.375],\n to_rgb=True),\n dict(\n type='CircleObjectRangeFilter',\n class_dist_thred=[55, 55, 55, 55, 55, 55, 55, 55, 55, 55]),\n dict(\n type='InstanceNameFilter',\n classes=[\n 'car', 'truck', 'construction_vehicle', 'bus', 'trailer',\n 'barrier', 'motorcycle', 'bicycle', 'pedestrian', 'traffic_cone'\n ]),\n dict(type='NuScenesSparse4DAdaptor'),\n dict(\n type='Collect',\n keys=[\n 'img', 'timestamp', 'projection_mat', 'image_wh', 'gt_depth',\n 'focal', 'gt_bboxes_3d', 'gt_labels_3d'\n ],\n meta_keys=['T_global', 'T_global_inv', 'timestamp', 'instance_id'])\n]\ntest_pipeline = [\n dict(type='LoadMultiViewImageFromFiles', to_float32=True),\n dict(type='ResizeCropFlipImage'),\n dict(\n type='NormalizeMultiviewImage',\n mean=[123.675, 116.28, 103.53],\n std=[58.395, 57.12, 57.375],\n to_rgb=True),\n dict(type='NuScenesSparse4DAdaptor'),\n dict(\n type='Collect',\n keys=['img', 'timestamp', 'projection_mat', 'image_wh'],\n meta_keys=['T_global', 'T_global_inv', 'timestamp'])\n]\ninput_modality = dict(\n use_lidar=False,\n use_camera=True,\n use_radar=False,\n use_map=False,\n use_external=False)\ndata_basic_config = dict(\n type='NuScenes3DDetTrackDataset',\n data_root='data/nuscenes/',\n classes=[\n 'car', 'truck', 'construction_vehicle', 'bus', 'trailer', 'barrier',\n 'motorcycle', 'bicycle', 'pedestrian', 'traffic_cone'\n ],\n modality=dict(\n use_lidar=False,\n use_camera=True,\n use_radar=False,\n use_map=False,\n use_external=False),\n version='v1.0-trainval')\ndata_aug_conf = dict(\n resize_lim=(0.4, 0.47),\n final_dim=(256, 704),\n bot_pct_lim=(0.0, 0.0),\n rot_lim=(-5.4, 5.4),\n H=900,\n W=1600,\n rand_flip=True,\n rot3d_range=[-0.3925, 0.3925])\ndata = dict(\n samples_per_gpu=20,\n workers_per_gpu=20,\n train=dict(\n type='NuScenes3DDetTrackDataset',\n data_root='data/nuscenes/',\n classes=[\n 'car', 'truck', 'construction_vehicle', 'bus', 'trailer',\n 'barrier', 'motorcycle', 'bicycle', 'pedestrian', 'traffic_cone'\n ],\n modality=dict(\n use_lidar=False,\n use_camera=True,\n use_radar=False,\n use_map=False,\n use_external=False),\n version='v1.0-trainval',\n ann_file='data/nuscenes_anno_pkls/nuscenes_infos_train.pkl',\n pipeline=[\n dict(type='LoadMultiViewImageFromFiles', to_float32=True),\n dict(\n type='LoadPointsFromFile',\n coord_type='LIDAR',\n load_dim=5,\n use_dim=5,\n file_client_args=dict(backend='disk')),\n dict(type='ResizeCropFlipImage'),\n dict(type='MultiScaleDepthMapGenerator', downsample=[4, 8, 16]),\n dict(type='BBoxRotation'),\n dict(type='PhotoMetricDistortionMultiViewImage'),\n dict(\n type='NormalizeMultiviewImage',\n mean=[123.675, 116.28, 103.53],\n std=[58.395, 57.12, 57.375],\n to_rgb=True),\n dict(\n type='CircleObjectRangeFilter',\n class_dist_thred=[55, 55, 55, 55, 55, 55, 55, 55, 55, 55]),\n dict(\n type='InstanceNameFilter',\n classes=[\n 'car', 'truck', 'construction_vehicle', 'bus', 'trailer',\n 'barrier', 'motorcycle', 'bicycle', 'pedestrian',\n 'traffic_cone'\n ]),\n dict(type='NuScenesSparse4DAdaptor'),\n dict(\n type='Collect',\n keys=[\n 'img', 'timestamp', 'projection_mat', 'image_wh',\n 'gt_depth', 'focal', 'gt_bboxes_3d', 'gt_labels_3d'\n ],\n meta_keys=[\n 'T_global', 'T_global_inv', 'timestamp', 'instance_id'\n ])\n ],\n test_mode=False,\n data_aug_conf=dict(\n resize_lim=(0.4, 0.47),\n final_dim=(256, 704),\n bot_pct_lim=(0.0, 0.0),\n rot_lim=(-5.4, 5.4),\n H=900,\n W=1600,\n rand_flip=True,\n rot3d_range=[-0.3925, 0.3925]),\n with_seq_flag=True,\n sequences_split_num=2,\n keep_consistent_seq_aug=True),\n val=dict(\n type='NuScenes3DDetTrackDataset',\n data_root='data/nuscenes/',\n classes=[\n 'car', 'truck', 'construction_vehicle', 'bus', 'trailer',\n 'barrier', 'motorcycle', 'bicycle', 'pedestrian', 'traffic_cone'\n ],\n modality=dict(\n use_lidar=False,\n use_camera=True,\n use_radar=False,\n use_map=False,\n use_external=False),\n version='v1.0-trainval',\n ann_file='data/nuscenes_anno_pkls/nuscenes_infos_val.pkl',\n pipeline=[\n dict(type='LoadMultiViewImageFromFiles', to_float32=True),\n dict(type='ResizeCropFlipImage'),\n dict(\n type='NormalizeMultiviewImage',\n mean=[123.675, 116.28, 103.53],\n std=[58.395, 57.12, 57.375],\n to_rgb=True),\n dict(type='NuScenesSparse4DAdaptor'),\n dict(\n type='Collect',\n keys=['img', 'timestamp', 'projection_mat', 'image_wh'],\n meta_keys=['T_global', 'T_global_inv', 'timestamp'])\n ],\n data_aug_conf=dict(\n resize_lim=(0.4, 0.47),\n final_dim=(256, 704),\n bot_pct_lim=(0.0, 0.0),\n rot_lim=(-5.4, 5.4),\n H=900,\n W=1600,\n rand_flip=True,\n rot3d_range=[-0.3925, 0.3925]),\n test_mode=True,\n tracking=True,\n tracking_threshold=0.2),\n test=dict(\n type='NuScenes3DDetTrackDataset',\n data_root='data/nuscenes/',\n classes=[\n 'car', 'truck', 'construction_vehicle', 'bus', 'trailer',\n 'barrier', 'motorcycle', 'bicycle', 'pedestrian', 'traffic_cone'\n ],\n modality=dict(\n use_lidar=False,\n use_camera=True,\n use_radar=False,\n use_map=False,\n use_external=False),\n version='v1.0-trainval',\n ann_file='data/nuscenes_anno_pkls/nuscenes_infos_val.pkl',\n pipeline=[\n dict(type='LoadMultiViewImageFromFiles', to_float32=True),\n dict(type='ResizeCropFlipImage'),\n dict(\n type='NormalizeMultiviewImage',\n mean=[123.675, 116.28, 103.53],\n std=[58.395, 57.12, 57.375],\n to_rgb=True),\n dict(type='NuScenesSparse4DAdaptor'),\n dict(\n type='Collect',\n keys=['img', 'timestamp', 'projection_mat', 'image_wh'],\n meta_keys=['T_global', 'T_global_inv', 'timestamp'])\n ],\n data_aug_conf=dict(\n resize_lim=(0.4, 0.47),\n final_dim=(256, 704),\n bot_pct_lim=(0.0, 0.0),\n rot_lim=(-5.4, 5.4),\n H=900,\n W=1600,\n rand_flip=True,\n rot3d_range=[-0.3925, 0.3925]),\n test_mode=True,\n tracking=True,\n tracking_threshold=0.2))\noptimizer = dict(\n type='Miopen_AdamW',\n lr=0.0006,\n weight_decay=0.001,\n paramwise_cfg=dict(custom_keys=dict(img_backbone=dict(lr_mult=0.5))))\noptimizer_config = dict(grad_clip=dict(max_norm=25, norm_type=2))\nlr_config = dict(\n policy='CosineAnnealing',\n warmup='linear',\n warmup_iters=500,\n warmup_ratio=0.3333333333333333,\n min_lr_ratio=0.001)\nrunner = dict(type='IterBasedRunner', max_iters=17500)\nvis_pipeline = [\n dict(type='LoadMultiViewImageFromFiles', to_float32=True),\n dict(type='Collect', keys=['img'], meta_keys=['timestamp', 'lidar2img'])\n]\nevaluation = dict(\n interval=3500,\n pipeline=[\n dict(type='LoadMultiViewImageFromFiles', to_float32=True),\n dict(\n type='Collect', keys=['img'], meta_keys=['timestamp', 'lidar2img'])\n ])\ngpu_ids = range(0, 8)\n", "seed": 0, "exp_name": "sparse4dv3_temporal_r50_1x8_bs6_256x704.py"}
{"env_info": "sys.platform: linux\nPython: 3.10.12 (main, May 27 2025, 17:12:29) [GCC 11.4.0]\nCUDA available: True\nGPU 0,1,2,3,4,5,6,7: BW200, UBB BW1000\nCUDA_HOME: /opt/dtk\nNVCC: Not Available\nGCC: x86_64-linux-gnu-gcc (Ubuntu 11.4.0-1ubuntu1~22.04) 11.4.0\nPyTorch: 2.4.1\nPyTorch compiling details: PyTorch built with:\n - GCC 10.3\n - C++ Version: 201703\n - Intel(R) Math Kernel Library Version 2020.0.4 Product Build 20200917 for Intel(R) 64 architecture applications\n - OpenMP 201511 (a.k.a. OpenMP 4.5)\n - LAPACK is enabled (usually provided by MKL)\n - NNPACK is enabled\n - CPU capability usage: AVX512\n - HIP Runtime 6.3.25211\n - MIOpen 2.17.0\n - Magma 2.8.0\n - Build settings: BLAS_INFO=mkl, BUILD_TYPE=Release, CXX_COMPILER=/opt/rh/gcc-toolset-10/root/usr/bin/c++, CXX_FLAGS= -D_GLIBCXX_USE_CXX11_ABI=1 -fvisibility-inlines-hidden -DUSE_PTHREADPOOL -DNDEBUG -DUSE_KINETO -DLIBKINETO_NOCUPTI -DUSE_FBGEMM -DUSE_PYTORCH_QNNPACK -DUSE_XNNPACK -DSYMBOLICATE_MOBILE_DEBUG_HANDLE -O2 -fPIC -Wall -Wextra -Werror=return-type -Werror=non-virtual-dtor -Werror=bool-operation -Wnarrowing -Wno-missing-field-initializers -Wno-type-limits -Wno-array-bounds -Wno-unknown-pragmas -Wno-unused-parameter -Wno-unused-function -Wno-unused-result -Wno-strict-overflow -Wno-strict-aliasing -Wno-stringop-overflow -Wsuggest-override -Wno-psabi -Wno-error=pedantic -Wno-error=old-style-cast -Wno-missing-braces -fdiagnostics-color=always -faligned-new -Wno-unused-but-set-variable -Wno-maybe-uninitialized -fno-math-errno -fno-trapping-math -Werror=format -Wno-stringop-overflow, FORCE_FALLBACK_CUDA_MPI=1, LAPACK_INFO=mkl, PERF_WITH_AVX=1, PERF_WITH_AVX2=1, PERF_WITH_AVX512=1, TORCH_VERSION=2.4.1, USE_CUDA=0, USE_CUDNN=OFF, USE_CUSPARSELT=OFF, USE_EXCEPTION_PTR=1, USE_GFLAGS=1, USE_GLOG=1, USE_GLOO=1, USE_MKL=ON, USE_MKLDNN=0, USE_MPI=1, USE_NCCL=1, USE_NNPACK=ON, USE_OPENMP=1, USE_ROCM=ON, USE_ROCM_KERNEL_ASSERT=OFF, \n\nTorchVision: 0.19.1\nOpenCV: 4.11.0\nMMCV: 1.6.1\nMMCV Compiler: GCC 11.4\nMMCV CUDA Compiler: rocm not available\nMMDetection: 2.26.0+c41df4b", "config": "plugin = True\nplugin_dir = 'projects/mmdet3d_plugin/'\ndist_params = dict(backend='nccl')\nlog_level = 'INFO'\nwork_dir = './work_dirs/sparse4dv3_temporal_r50_1x8_bs6_256x704'\ntotal_batch_size = 160\nnum_gpus = 8\nbatch_size = 20\nnum_iters_per_epoch = 175\nnum_epochs = 100\ncheckpoint_epoch_interval = 20\ncheckpoint_config = dict(interval=3500)\nlog_config = dict(\n interval=1,\n hooks=[\n dict(type='TextLoggerHook', by_epoch=False),\n dict(type='TensorboardLoggerHook')\n ])\nload_from = None\nresume_from = None\nworkflow = [('train', 1)]\nfp16 = dict(loss_scale=32.0)\ninput_shape = (704, 256)\ntracking_test = True\ntracking_threshold = 0.2\nclass_names = [\n 'car', 'truck', 'construction_vehicle', 'bus', 'trailer', 'barrier',\n 'motorcycle', 'bicycle', 'pedestrian', 'traffic_cone'\n]\nnum_classes = 10\nembed_dims = 256\nnum_groups = 8\nnum_decoder = 6\nnum_single_frame_decoder = 1\nuse_deformable_func = True\nstrides = [4, 8, 16, 32]\nnum_levels = 4\nnum_depth_layers = 3\ndrop_out = 0.1\ntemporal = True\ndecouple_attn = True\nwith_quality_estimation = True\nmodel = dict(\n type='Sparse4D',\n use_grid_mask=True,\n use_deformable_func=True,\n img_backbone=dict(\n type='ResNet',\n depth=50,\n num_stages=4,\n frozen_stages=-1,\n norm_eval=False,\n style='pytorch',\n with_cp=True,\n out_indices=(0, 1, 2, 3),\n norm_cfg=dict(type='BN', requires_grad=True),\n pretrained='ckpt/resnet50-19c8e357.pth'),\n img_neck=dict(\n type='FPN',\n num_outs=4,\n start_level=0,\n out_channels=256,\n add_extra_convs='on_output',\n relu_before_extra_convs=True,\n in_channels=[256, 512, 1024, 2048]),\n depth_branch=dict(\n type='DenseDepthNet',\n embed_dims=256,\n num_depth_layers=3,\n loss_weight=0.2),\n head=dict(\n type='Sparse4DHead',\n cls_threshold_to_reg=0.05,\n decouple_attn=True,\n instance_bank=dict(\n type='InstanceBank',\n num_anchor=900,\n embed_dims=256,\n anchor='nuscenes_kmeans900.npy',\n anchor_handler=dict(type='SparseBox3DKeyPointsGenerator'),\n num_temp_instances=600,\n confidence_decay=0.6,\n feat_grad=False),\n anchor_encoder=dict(\n type='SparseBox3DEncoder',\n vel_dims=3,\n embed_dims=[128, 32, 32, 64],\n mode='cat',\n output_fc=False,\n in_loops=1,\n out_loops=4),\n num_single_frame_decoder=1,\n operation_order=[\n 'deformable', 'ffn', 'norm', 'refine', 'temp_gnn', 'gnn', 'norm',\n 'deformable', 'ffn', 'norm', 'refine', 'temp_gnn', 'gnn', 'norm',\n 'deformable', 'ffn', 'norm', 'refine', 'temp_gnn', 'gnn', 'norm',\n 'deformable', 'ffn', 'norm', 'refine', 'temp_gnn', 'gnn', 'norm',\n 'deformable', 'ffn', 'norm', 'refine', 'temp_gnn', 'gnn', 'norm',\n 'deformable', 'ffn', 'norm', 'refine'\n ],\n temp_graph_model=dict(\n type='MultiheadAttention',\n embed_dims=512,\n num_heads=8,\n batch_first=True,\n dropout=0.1),\n graph_model=dict(\n type='MultiheadAttention',\n embed_dims=512,\n num_heads=8,\n batch_first=True,\n dropout=0.1),\n norm_layer=dict(type='LN', normalized_shape=256),\n ffn=dict(\n type='AsymmetricFFN',\n in_channels=512,\n pre_norm=dict(type='LN'),\n embed_dims=256,\n feedforward_channels=1024,\n num_fcs=2,\n ffn_drop=0.1,\n act_cfg=dict(type='ReLU', inplace=True)),\n deformable_model=dict(\n type='DeformableFeatureAggregation',\n embed_dims=256,\n num_groups=8,\n num_levels=4,\n num_cams=6,\n attn_drop=0.15,\n use_deformable_func=True,\n use_camera_embed=True,\n residual_mode='cat',\n kps_generator=dict(\n type='SparseBox3DKeyPointsGenerator',\n num_learnable_pts=6,\n fix_scale=[[0, 0, 0], [0.45, 0, 0], [-0.45, 0, 0],\n [0, 0.45, 0], [0, -0.45, 0], [0, 0, 0.45],\n [0, 0, -0.45]])),\n refine_layer=dict(\n type='SparseBox3DRefinementModule',\n embed_dims=256,\n num_cls=10,\n refine_yaw=True,\n with_quality_estimation=True),\n sampler=dict(\n type='SparseBox3DTarget',\n num_dn_groups=5,\n num_temp_dn_groups=3,\n dn_noise_scale=[2.0, 2.0, 2.0, 0.5, 0.5, 0.5, 0.5, 0.5, 0.5, 0.5],\n max_dn_gt=32,\n add_neg_dn=True,\n cls_weight=2.0,\n box_weight=0.25,\n reg_weights=[2.0, 2.0, 2.0, 0.5, 0.5, 0.5, 0.0, 0.0, 0.0, 0.0],\n cls_wise_reg_weights=dict(\n {9: [2.0, 2.0, 2.0, 1.0, 1.0, 1.0, 0.0, 0.0, 1.0, 1.0]})),\n loss_cls=dict(\n type='FocalLoss',\n use_sigmoid=True,\n gamma=2.0,\n alpha=0.25,\n loss_weight=2.0),\n loss_reg=dict(\n type='SparseBox3DLoss',\n loss_box=dict(type='L1Loss', loss_weight=0.25),\n loss_centerness=dict(type='CrossEntropyLoss', use_sigmoid=True),\n loss_yawness=dict(type='GaussianFocalLoss'),\n cls_allow_reverse=[5]),\n decoder=dict(type='SparseBox3DDecoder'),\n reg_weights=[2.0, 2.0, 2.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0]))\ndataset_type = 'NuScenes3DDetTrackDataset'\ndata_root = 'data/nuscenes/'\nanno_root = 'data/nuscenes_anno_pkls/'\nfile_client_args = dict(backend='disk')\nimg_norm_cfg = dict(\n mean=[123.675, 116.28, 103.53], std=[58.395, 57.12, 57.375], to_rgb=True)\ntrain_pipeline = [\n dict(type='LoadMultiViewImageFromFiles', to_float32=True),\n dict(\n type='LoadPointsFromFile',\n coord_type='LIDAR',\n load_dim=5,\n use_dim=5,\n file_client_args=dict(backend='disk')),\n dict(type='ResizeCropFlipImage'),\n dict(type='MultiScaleDepthMapGenerator', downsample=[4, 8, 16]),\n dict(type='BBoxRotation'),\n dict(type='PhotoMetricDistortionMultiViewImage'),\n dict(\n type='NormalizeMultiviewImage',\n mean=[123.675, 116.28, 103.53],\n std=[58.395, 57.12, 57.375],\n to_rgb=True),\n dict(\n type='CircleObjectRangeFilter',\n class_dist_thred=[55, 55, 55, 55, 55, 55, 55, 55, 55, 55]),\n dict(\n type='InstanceNameFilter',\n classes=[\n 'car', 'truck', 'construction_vehicle', 'bus', 'trailer',\n 'barrier', 'motorcycle', 'bicycle', 'pedestrian', 'traffic_cone'\n ]),\n dict(type='NuScenesSparse4DAdaptor'),\n dict(\n type='Collect',\n keys=[\n 'img', 'timestamp', 'projection_mat', 'image_wh', 'gt_depth',\n 'focal', 'gt_bboxes_3d', 'gt_labels_3d'\n ],\n meta_keys=['T_global', 'T_global_inv', 'timestamp', 'instance_id'])\n]\ntest_pipeline = [\n dict(type='LoadMultiViewImageFromFiles', to_float32=True),\n dict(type='ResizeCropFlipImage'),\n dict(\n type='NormalizeMultiviewImage',\n mean=[123.675, 116.28, 103.53],\n std=[58.395, 57.12, 57.375],\n to_rgb=True),\n dict(type='NuScenesSparse4DAdaptor'),\n dict(\n type='Collect',\n keys=['img', 'timestamp', 'projection_mat', 'image_wh'],\n meta_keys=['T_global', 'T_global_inv', 'timestamp'])\n]\ninput_modality = dict(\n use_lidar=False,\n use_camera=True,\n use_radar=False,\n use_map=False,\n use_external=False)\ndata_basic_config = dict(\n type='NuScenes3DDetTrackDataset',\n data_root='data/nuscenes/',\n classes=[\n 'car', 'truck', 'construction_vehicle', 'bus', 'trailer', 'barrier',\n 'motorcycle', 'bicycle', 'pedestrian', 'traffic_cone'\n ],\n modality=dict(\n use_lidar=False,\n use_camera=True,\n use_radar=False,\n use_map=False,\n use_external=False),\n version='v1.0-trainval')\ndata_aug_conf = dict(\n resize_lim=(0.4, 0.47),\n final_dim=(256, 704),\n bot_pct_lim=(0.0, 0.0),\n rot_lim=(-5.4, 5.4),\n H=900,\n W=1600,\n rand_flip=True,\n rot3d_range=[-0.3925, 0.3925])\ndata = dict(\n samples_per_gpu=20,\n workers_per_gpu=20,\n train=dict(\n type='NuScenes3DDetTrackDataset',\n data_root='data/nuscenes/',\n classes=[\n 'car', 'truck', 'construction_vehicle', 'bus', 'trailer',\n 'barrier', 'motorcycle', 'bicycle', 'pedestrian', 'traffic_cone'\n ],\n modality=dict(\n use_lidar=False,\n use_camera=True,\n use_radar=False,\n use_map=False,\n use_external=False),\n version='v1.0-trainval',\n ann_file='data/nuscenes_anno_pkls/nuscenes_infos_train.pkl',\n pipeline=[\n dict(type='LoadMultiViewImageFromFiles', to_float32=True),\n dict(\n type='LoadPointsFromFile',\n coord_type='LIDAR',\n load_dim=5,\n use_dim=5,\n file_client_args=dict(backend='disk')),\n dict(type='ResizeCropFlipImage'),\n dict(type='MultiScaleDepthMapGenerator', downsample=[4, 8, 16]),\n dict(type='BBoxRotation'),\n dict(type='PhotoMetricDistortionMultiViewImage'),\n dict(\n type='NormalizeMultiviewImage',\n mean=[123.675, 116.28, 103.53],\n std=[58.395, 57.12, 57.375],\n to_rgb=True),\n dict(\n type='CircleObjectRangeFilter',\n class_dist_thred=[55, 55, 55, 55, 55, 55, 55, 55, 55, 55]),\n dict(\n type='InstanceNameFilter',\n classes=[\n 'car', 'truck', 'construction_vehicle', 'bus', 'trailer',\n 'barrier', 'motorcycle', 'bicycle', 'pedestrian',\n 'traffic_cone'\n ]),\n dict(type='NuScenesSparse4DAdaptor'),\n dict(\n type='Collect',\n keys=[\n 'img', 'timestamp', 'projection_mat', 'image_wh',\n 'gt_depth', 'focal', 'gt_bboxes_3d', 'gt_labels_3d'\n ],\n meta_keys=[\n 'T_global', 'T_global_inv', 'timestamp', 'instance_id'\n ])\n ],\n test_mode=False,\n data_aug_conf=dict(\n resize_lim=(0.4, 0.47),\n final_dim=(256, 704),\n bot_pct_lim=(0.0, 0.0),\n rot_lim=(-5.4, 5.4),\n H=900,\n W=1600,\n rand_flip=True,\n rot3d_range=[-0.3925, 0.3925]),\n with_seq_flag=True,\n sequences_split_num=2,\n keep_consistent_seq_aug=True),\n val=dict(\n type='NuScenes3DDetTrackDataset',\n data_root='data/nuscenes/',\n classes=[\n 'car', 'truck', 'construction_vehicle', 'bus', 'trailer',\n 'barrier', 'motorcycle', 'bicycle', 'pedestrian', 'traffic_cone'\n ],\n modality=dict(\n use_lidar=False,\n use_camera=True,\n use_radar=False,\n use_map=False,\n use_external=False),\n version='v1.0-trainval',\n ann_file='data/nuscenes_anno_pkls/nuscenes_infos_val.pkl',\n pipeline=[\n dict(type='LoadMultiViewImageFromFiles', to_float32=True),\n dict(type='ResizeCropFlipImage'),\n dict(\n type='NormalizeMultiviewImage',\n mean=[123.675, 116.28, 103.53],\n std=[58.395, 57.12, 57.375],\n to_rgb=True),\n dict(type='NuScenesSparse4DAdaptor'),\n dict(\n type='Collect',\n keys=['img', 'timestamp', 'projection_mat', 'image_wh'],\n meta_keys=['T_global', 'T_global_inv', 'timestamp'])\n ],\n data_aug_conf=dict(\n resize_lim=(0.4, 0.47),\n final_dim=(256, 704),\n bot_pct_lim=(0.0, 0.0),\n rot_lim=(-5.4, 5.4),\n H=900,\n W=1600,\n rand_flip=True,\n rot3d_range=[-0.3925, 0.3925]),\n test_mode=True,\n tracking=True,\n tracking_threshold=0.2),\n test=dict(\n type='NuScenes3DDetTrackDataset',\n data_root='data/nuscenes/',\n classes=[\n 'car', 'truck', 'construction_vehicle', 'bus', 'trailer',\n 'barrier', 'motorcycle', 'bicycle', 'pedestrian', 'traffic_cone'\n ],\n modality=dict(\n use_lidar=False,\n use_camera=True,\n use_radar=False,\n use_map=False,\n use_external=False),\n version='v1.0-trainval',\n ann_file='data/nuscenes_anno_pkls/nuscenes_infos_val.pkl',\n pipeline=[\n dict(type='LoadMultiViewImageFromFiles', to_float32=True),\n dict(type='ResizeCropFlipImage'),\n dict(\n type='NormalizeMultiviewImage',\n mean=[123.675, 116.28, 103.53],\n std=[58.395, 57.12, 57.375],\n to_rgb=True),\n dict(type='NuScenesSparse4DAdaptor'),\n dict(\n type='Collect',\n keys=['img', 'timestamp', 'projection_mat', 'image_wh'],\n meta_keys=['T_global', 'T_global_inv', 'timestamp'])\n ],\n data_aug_conf=dict(\n resize_lim=(0.4, 0.47),\n final_dim=(256, 704),\n bot_pct_lim=(0.0, 0.0),\n rot_lim=(-5.4, 5.4),\n H=900,\n W=1600,\n rand_flip=True,\n rot3d_range=[-0.3925, 0.3925]),\n test_mode=True,\n tracking=True,\n tracking_threshold=0.2))\noptimizer = dict(\n type='Miopen_AdamW',\n lr=0.0006,\n weight_decay=0.001,\n paramwise_cfg=dict(custom_keys=dict(img_backbone=dict(lr_mult=0.5))))\noptimizer_config = dict(grad_clip=dict(max_norm=25, norm_type=2))\nlr_config = dict(\n policy='CosineAnnealing',\n warmup='linear',\n warmup_iters=500,\n warmup_ratio=0.3333333333333333,\n min_lr_ratio=0.001)\nrunner = dict(type='IterBasedRunner', max_iters=17500)\nvis_pipeline = [\n dict(type='LoadMultiViewImageFromFiles', to_float32=True),\n dict(type='Collect', keys=['img'], meta_keys=['timestamp', 'lidar2img'])\n]\nevaluation = dict(\n interval=3500,\n pipeline=[\n dict(type='LoadMultiViewImageFromFiles', to_float32=True),\n dict(\n type='Collect', keys=['img'], meta_keys=['timestamp', 'lidar2img'])\n ])\ngpu_ids = range(0, 8)\n", "seed": 0, "exp_name": "sparse4dv3_temporal_r50_1x8_bs6_256x704.py"}
{"env_info": "sys.platform: linux\nPython: 3.10.12 (main, May 27 2025, 17:12:29) [GCC 11.4.0]\nCUDA available: True\nGPU 0,1,2,3,4,5,6,7: BW200, UBB BW1000\nCUDA_HOME: /opt/dtk\nNVCC: Not Available\nGCC: x86_64-linux-gnu-gcc (Ubuntu 11.4.0-1ubuntu1~22.04) 11.4.0\nPyTorch: 2.4.1\nPyTorch compiling details: PyTorch built with:\n - GCC 10.3\n - C++ Version: 201703\n - Intel(R) Math Kernel Library Version 2020.0.4 Product Build 20200917 for Intel(R) 64 architecture applications\n - OpenMP 201511 (a.k.a. OpenMP 4.5)\n - LAPACK is enabled (usually provided by MKL)\n - NNPACK is enabled\n - CPU capability usage: AVX512\n - HIP Runtime 6.3.25211\n - MIOpen 2.17.0\n - Magma 2.8.0\n - Build settings: BLAS_INFO=mkl, BUILD_TYPE=Release, CXX_COMPILER=/opt/rh/gcc-toolset-10/root/usr/bin/c++, CXX_FLAGS= -D_GLIBCXX_USE_CXX11_ABI=1 -fvisibility-inlines-hidden -DUSE_PTHREADPOOL -DNDEBUG -DUSE_KINETO -DLIBKINETO_NOCUPTI -DUSE_FBGEMM -DUSE_PYTORCH_QNNPACK -DUSE_XNNPACK -DSYMBOLICATE_MOBILE_DEBUG_HANDLE -O2 -fPIC -Wall -Wextra -Werror=return-type -Werror=non-virtual-dtor -Werror=bool-operation -Wnarrowing -Wno-missing-field-initializers -Wno-type-limits -Wno-array-bounds -Wno-unknown-pragmas -Wno-unused-parameter -Wno-unused-function -Wno-unused-result -Wno-strict-overflow -Wno-strict-aliasing -Wno-stringop-overflow -Wsuggest-override -Wno-psabi -Wno-error=pedantic -Wno-error=old-style-cast -Wno-missing-braces -fdiagnostics-color=always -faligned-new -Wno-unused-but-set-variable -Wno-maybe-uninitialized -fno-math-errno -fno-trapping-math -Werror=format -Wno-stringop-overflow, FORCE_FALLBACK_CUDA_MPI=1, LAPACK_INFO=mkl, PERF_WITH_AVX=1, PERF_WITH_AVX2=1, PERF_WITH_AVX512=1, TORCH_VERSION=2.4.1, USE_CUDA=0, USE_CUDNN=OFF, USE_CUSPARSELT=OFF, USE_EXCEPTION_PTR=1, USE_GFLAGS=1, USE_GLOG=1, USE_GLOO=1, USE_MKL=ON, USE_MKLDNN=0, USE_MPI=1, USE_NCCL=1, USE_NNPACK=ON, USE_OPENMP=1, USE_ROCM=ON, USE_ROCM_KERNEL_ASSERT=OFF, \n\nTorchVision: 0.19.1\nOpenCV: 4.11.0\nMMCV: 1.6.1\nMMCV Compiler: GCC 11.4\nMMCV CUDA Compiler: rocm not available\nMMDetection: 2.26.0+c41df4b", "config": "plugin = True\nplugin_dir = 'projects/mmdet3d_plugin/'\ndist_params = dict(backend='nccl')\nlog_level = 'INFO'\nwork_dir = './work_dirs/sparse4dv3_temporal_r50_1x8_bs6_256x704'\ntotal_batch_size = 160\nnum_gpus = 8\nbatch_size = 20\nnum_iters_per_epoch = 175\nnum_epochs = 100\ncheckpoint_epoch_interval = 20\ncheckpoint_config = dict(interval=3500)\nlog_config = dict(\n interval=1,\n hooks=[\n dict(type='TextLoggerHook', by_epoch=False),\n dict(type='TensorboardLoggerHook')\n ])\nload_from = None\nresume_from = None\nworkflow = [('train', 1)]\nfp16 = dict(loss_scale=32.0)\ninput_shape = (704, 256)\ntracking_test = True\ntracking_threshold = 0.2\nclass_names = [\n 'car', 'truck', 'construction_vehicle', 'bus', 'trailer', 'barrier',\n 'motorcycle', 'bicycle', 'pedestrian', 'traffic_cone'\n]\nnum_classes = 10\nembed_dims = 256\nnum_groups = 8\nnum_decoder = 6\nnum_single_frame_decoder = 1\nuse_deformable_func = True\nstrides = [4, 8, 16, 32]\nnum_levels = 4\nnum_depth_layers = 3\ndrop_out = 0.1\ntemporal = True\ndecouple_attn = True\nwith_quality_estimation = True\nmodel = dict(\n type='Sparse4D',\n use_grid_mask=True,\n use_deformable_func=True,\n img_backbone=dict(\n type='ResNet',\n depth=50,\n num_stages=4,\n frozen_stages=-1,\n norm_eval=False,\n style='pytorch',\n with_cp=True,\n out_indices=(0, 1, 2, 3),\n norm_cfg=dict(type='BN', requires_grad=True),\n pretrained='ckpt/resnet50-19c8e357.pth'),\n img_neck=dict(\n type='FPN',\n num_outs=4,\n start_level=0,\n out_channels=256,\n add_extra_convs='on_output',\n relu_before_extra_convs=True,\n in_channels=[256, 512, 1024, 2048]),\n depth_branch=dict(\n type='DenseDepthNet',\n embed_dims=256,\n num_depth_layers=3,\n loss_weight=0.2),\n head=dict(\n type='Sparse4DHead',\n cls_threshold_to_reg=0.05,\n decouple_attn=True,\n instance_bank=dict(\n type='InstanceBank',\n num_anchor=900,\n embed_dims=256,\n anchor='nuscenes_kmeans900.npy',\n anchor_handler=dict(type='SparseBox3DKeyPointsGenerator'),\n num_temp_instances=600,\n confidence_decay=0.6,\n feat_grad=False),\n anchor_encoder=dict(\n type='SparseBox3DEncoder',\n vel_dims=3,\n embed_dims=[128, 32, 32, 64],\n mode='cat',\n output_fc=False,\n in_loops=1,\n out_loops=4),\n num_single_frame_decoder=1,\n operation_order=[\n 'deformable', 'ffn', 'norm', 'refine', 'temp_gnn', 'gnn', 'norm',\n 'deformable', 'ffn', 'norm', 'refine', 'temp_gnn', 'gnn', 'norm',\n 'deformable', 'ffn', 'norm', 'refine', 'temp_gnn', 'gnn', 'norm',\n 'deformable', 'ffn', 'norm', 'refine', 'temp_gnn', 'gnn', 'norm',\n 'deformable', 'ffn', 'norm', 'refine', 'temp_gnn', 'gnn', 'norm',\n 'deformable', 'ffn', 'norm', 'refine'\n ],\n temp_graph_model=dict(\n type='MultiheadAttention',\n embed_dims=512,\n num_heads=8,\n batch_first=True,\n dropout=0.1),\n graph_model=dict(\n type='MultiheadAttention',\n embed_dims=512,\n num_heads=8,\n batch_first=True,\n dropout=0.1),\n norm_layer=dict(type='LN', normalized_shape=256),\n ffn=dict(\n type='AsymmetricFFN',\n in_channels=512,\n pre_norm=dict(type='LN'),\n embed_dims=256,\n feedforward_channels=1024,\n num_fcs=2,\n ffn_drop=0.1,\n act_cfg=dict(type='ReLU', inplace=True)),\n deformable_model=dict(\n type='DeformableFeatureAggregation',\n embed_dims=256,\n num_groups=8,\n num_levels=4,\n num_cams=6,\n attn_drop=0.15,\n use_deformable_func=True,\n use_camera_embed=True,\n residual_mode='cat',\n kps_generator=dict(\n type='SparseBox3DKeyPointsGenerator',\n num_learnable_pts=6,\n fix_scale=[[0, 0, 0], [0.45, 0, 0], [-0.45, 0, 0],\n [0, 0.45, 0], [0, -0.45, 0], [0, 0, 0.45],\n [0, 0, -0.45]])),\n refine_layer=dict(\n type='SparseBox3DRefinementModule',\n embed_dims=256,\n num_cls=10,\n refine_yaw=True,\n with_quality_estimation=True),\n sampler=dict(\n type='SparseBox3DTarget',\n num_dn_groups=5,\n num_temp_dn_groups=3,\n dn_noise_scale=[2.0, 2.0, 2.0, 0.5, 0.5, 0.5, 0.5, 0.5, 0.5, 0.5],\n max_dn_gt=32,\n add_neg_dn=True,\n cls_weight=2.0,\n box_weight=0.25,\n reg_weights=[2.0, 2.0, 2.0, 0.5, 0.5, 0.5, 0.0, 0.0, 0.0, 0.0],\n cls_wise_reg_weights=dict(\n {9: [2.0, 2.0, 2.0, 1.0, 1.0, 1.0, 0.0, 0.0, 1.0, 1.0]})),\n loss_cls=dict(\n type='FocalLoss',\n use_sigmoid=True,\n gamma=2.0,\n alpha=0.25,\n loss_weight=2.0),\n loss_reg=dict(\n type='SparseBox3DLoss',\n loss_box=dict(type='L1Loss', loss_weight=0.25),\n loss_centerness=dict(type='CrossEntropyLoss', use_sigmoid=True),\n loss_yawness=dict(type='GaussianFocalLoss'),\n cls_allow_reverse=[5]),\n decoder=dict(type='SparseBox3DDecoder'),\n reg_weights=[2.0, 2.0, 2.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0]))\ndataset_type = 'NuScenes3DDetTrackDataset'\ndata_root = 'data/nuscenes/'\nanno_root = 'data/nuscenes_anno_pkls/'\nfile_client_args = dict(backend='disk')\nimg_norm_cfg = dict(\n mean=[123.675, 116.28, 103.53], std=[58.395, 57.12, 57.375], to_rgb=True)\ntrain_pipeline = [\n dict(type='LoadMultiViewImageFromFiles', to_float32=True),\n dict(\n type='LoadPointsFromFile',\n coord_type='LIDAR',\n load_dim=5,\n use_dim=5,\n file_client_args=dict(backend='disk')),\n dict(type='ResizeCropFlipImage'),\n dict(type='MultiScaleDepthMapGenerator', downsample=[4, 8, 16]),\n dict(type='BBoxRotation'),\n dict(type='PhotoMetricDistortionMultiViewImage'),\n dict(\n type='NormalizeMultiviewImage',\n mean=[123.675, 116.28, 103.53],\n std=[58.395, 57.12, 57.375],\n to_rgb=True),\n dict(\n type='CircleObjectRangeFilter',\n class_dist_thred=[55, 55, 55, 55, 55, 55, 55, 55, 55, 55]),\n dict(\n type='InstanceNameFilter',\n classes=[\n 'car', 'truck', 'construction_vehicle', 'bus', 'trailer',\n 'barrier', 'motorcycle', 'bicycle', 'pedestrian', 'traffic_cone'\n ]),\n dict(type='NuScenesSparse4DAdaptor'),\n dict(\n type='Collect',\n keys=[\n 'img', 'timestamp', 'projection_mat', 'image_wh', 'gt_depth',\n 'focal', 'gt_bboxes_3d', 'gt_labels_3d'\n ],\n meta_keys=['T_global', 'T_global_inv', 'timestamp', 'instance_id'])\n]\ntest_pipeline = [\n dict(type='LoadMultiViewImageFromFiles', to_float32=True),\n dict(type='ResizeCropFlipImage'),\n dict(\n type='NormalizeMultiviewImage',\n mean=[123.675, 116.28, 103.53],\n std=[58.395, 57.12, 57.375],\n to_rgb=True),\n dict(type='NuScenesSparse4DAdaptor'),\n dict(\n type='Collect',\n keys=['img', 'timestamp', 'projection_mat', 'image_wh'],\n meta_keys=['T_global', 'T_global_inv', 'timestamp'])\n]\ninput_modality = dict(\n use_lidar=False,\n use_camera=True,\n use_radar=False,\n use_map=False,\n use_external=False)\ndata_basic_config = dict(\n type='NuScenes3DDetTrackDataset',\n data_root='data/nuscenes/',\n classes=[\n 'car', 'truck', 'construction_vehicle', 'bus', 'trailer', 'barrier',\n 'motorcycle', 'bicycle', 'pedestrian', 'traffic_cone'\n ],\n modality=dict(\n use_lidar=False,\n use_camera=True,\n use_radar=False,\n use_map=False,\n use_external=False),\n version='v1.0-trainval')\ndata_aug_conf = dict(\n resize_lim=(0.4, 0.47),\n final_dim=(256, 704),\n bot_pct_lim=(0.0, 0.0),\n rot_lim=(-5.4, 5.4),\n H=900,\n W=1600,\n rand_flip=True,\n rot3d_range=[-0.3925, 0.3925])\ndata = dict(\n samples_per_gpu=20,\n workers_per_gpu=20,\n train=dict(\n type='NuScenes3DDetTrackDataset',\n data_root='data/nuscenes/',\n classes=[\n 'car', 'truck', 'construction_vehicle', 'bus', 'trailer',\n 'barrier', 'motorcycle', 'bicycle', 'pedestrian', 'traffic_cone'\n ],\n modality=dict(\n use_lidar=False,\n use_camera=True,\n use_radar=False,\n use_map=False,\n use_external=False),\n version='v1.0-trainval',\n ann_file='data/nuscenes_anno_pkls/nuscenes_infos_train.pkl',\n pipeline=[\n dict(type='LoadMultiViewImageFromFiles', to_float32=True),\n dict(\n type='LoadPointsFromFile',\n coord_type='LIDAR',\n load_dim=5,\n use_dim=5,\n file_client_args=dict(backend='disk')),\n dict(type='ResizeCropFlipImage'),\n dict(type='MultiScaleDepthMapGenerator', downsample=[4, 8, 16]),\n dict(type='BBoxRotation'),\n dict(type='PhotoMetricDistortionMultiViewImage'),\n dict(\n type='NormalizeMultiviewImage',\n mean=[123.675, 116.28, 103.53],\n std=[58.395, 57.12, 57.375],\n to_rgb=True),\n dict(\n type='CircleObjectRangeFilter',\n class_dist_thred=[55, 55, 55, 55, 55, 55, 55, 55, 55, 55]),\n dict(\n type='InstanceNameFilter',\n classes=[\n 'car', 'truck', 'construction_vehicle', 'bus', 'trailer',\n 'barrier', 'motorcycle', 'bicycle', 'pedestrian',\n 'traffic_cone'\n ]),\n dict(type='NuScenesSparse4DAdaptor'),\n dict(\n type='Collect',\n keys=[\n 'img', 'timestamp', 'projection_mat', 'image_wh',\n 'gt_depth', 'focal', 'gt_bboxes_3d', 'gt_labels_3d'\n ],\n meta_keys=[\n 'T_global', 'T_global_inv', 'timestamp', 'instance_id'\n ])\n ],\n test_mode=False,\n data_aug_conf=dict(\n resize_lim=(0.4, 0.47),\n final_dim=(256, 704),\n bot_pct_lim=(0.0, 0.0),\n rot_lim=(-5.4, 5.4),\n H=900,\n W=1600,\n rand_flip=True,\n rot3d_range=[-0.3925, 0.3925]),\n with_seq_flag=True,\n sequences_split_num=2,\n keep_consistent_seq_aug=True),\n val=dict(\n type='NuScenes3DDetTrackDataset',\n data_root='data/nuscenes/',\n classes=[\n 'car', 'truck', 'construction_vehicle', 'bus', 'trailer',\n 'barrier', 'motorcycle', 'bicycle', 'pedestrian', 'traffic_cone'\n ],\n modality=dict(\n use_lidar=False,\n use_camera=True,\n use_radar=False,\n use_map=False,\n use_external=False),\n version='v1.0-trainval',\n ann_file='data/nuscenes_anno_pkls/nuscenes_infos_val.pkl',\n pipeline=[\n dict(type='LoadMultiViewImageFromFiles', to_float32=True),\n dict(type='ResizeCropFlipImage'),\n dict(\n type='NormalizeMultiviewImage',\n mean=[123.675, 116.28, 103.53],\n std=[58.395, 57.12, 57.375],\n to_rgb=True),\n dict(type='NuScenesSparse4DAdaptor'),\n dict(\n type='Collect',\n keys=['img', 'timestamp', 'projection_mat', 'image_wh'],\n meta_keys=['T_global', 'T_global_inv', 'timestamp'])\n ],\n data_aug_conf=dict(\n resize_lim=(0.4, 0.47),\n final_dim=(256, 704),\n bot_pct_lim=(0.0, 0.0),\n rot_lim=(-5.4, 5.4),\n H=900,\n W=1600,\n rand_flip=True,\n rot3d_range=[-0.3925, 0.3925]),\n test_mode=True,\n tracking=True,\n tracking_threshold=0.2),\n test=dict(\n type='NuScenes3DDetTrackDataset',\n data_root='data/nuscenes/',\n classes=[\n 'car', 'truck', 'construction_vehicle', 'bus', 'trailer',\n 'barrier', 'motorcycle', 'bicycle', 'pedestrian', 'traffic_cone'\n ],\n modality=dict(\n use_lidar=False,\n use_camera=True,\n use_radar=False,\n use_map=False,\n use_external=False),\n version='v1.0-trainval',\n ann_file='data/nuscenes_anno_pkls/nuscenes_infos_val.pkl',\n pipeline=[\n dict(type='LoadMultiViewImageFromFiles', to_float32=True),\n dict(type='ResizeCropFlipImage'),\n dict(\n type='NormalizeMultiviewImage',\n mean=[123.675, 116.28, 103.53],\n std=[58.395, 57.12, 57.375],\n to_rgb=True),\n dict(type='NuScenesSparse4DAdaptor'),\n dict(\n type='Collect',\n keys=['img', 'timestamp', 'projection_mat', 'image_wh'],\n meta_keys=['T_global', 'T_global_inv', 'timestamp'])\n ],\n data_aug_conf=dict(\n resize_lim=(0.4, 0.47),\n final_dim=(256, 704),\n bot_pct_lim=(0.0, 0.0),\n rot_lim=(-5.4, 5.4),\n H=900,\n W=1600,\n rand_flip=True,\n rot3d_range=[-0.3925, 0.3925]),\n test_mode=True,\n tracking=True,\n tracking_threshold=0.2))\noptimizer = dict(\n type='Miopen_AdamW',\n lr=0.0006,\n weight_decay=0.001,\n paramwise_cfg=dict(custom_keys=dict(img_backbone=dict(lr_mult=0.5))))\noptimizer_config = dict(grad_clip=dict(max_norm=25, norm_type=2))\nlr_config = dict(\n policy='CosineAnnealing',\n warmup='linear',\n warmup_iters=500,\n warmup_ratio=0.3333333333333333,\n min_lr_ratio=0.001)\nrunner = dict(type='IterBasedRunner', max_iters=17500)\nvis_pipeline = [\n dict(type='LoadMultiViewImageFromFiles', to_float32=True),\n dict(type='Collect', keys=['img'], meta_keys=['timestamp', 'lidar2img'])\n]\nevaluation = dict(\n interval=3500,\n pipeline=[\n dict(type='LoadMultiViewImageFromFiles', to_float32=True),\n dict(\n type='Collect', keys=['img'], meta_keys=['timestamp', 'lidar2img'])\n ])\ngpu_ids = range(0, 8)\n", "seed": 0, "exp_name": "sparse4dv3_temporal_r50_1x8_bs6_256x704.py"}
Markdown is supported
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment