dataset_type = 'KittiDataset' data_root = 'data/kitti/' class_names = ['Pedestrian', 'Cyclist', 'Car'] input_modality = dict(use_lidar=False, use_camera=True) metainfo = dict(classes=class_names) file_client_args = dict(backend='disk') # Uncomment the following if use ceph or other file clients. # See https://mmcv.readthedocs.io/en/latest/api.html#mmcv.fileio.FileClient # for more details. # file_client_args = dict( # backend='petrel', path_mapping=dict(data='s3://kitti_data/')) train_pipeline = [ dict(type='LoadImageFromFileMono3D'), dict( type='LoadAnnotations3D', with_bbox=True, with_label=True, with_attr_label=False, with_bbox_3d=True, with_label_3d=True, with_bbox_depth=True), dict(type='Resize', scale=(1242, 375), keep_ratio=True), dict(type='RandomFlip3D', flip_ratio_bev_horizontal=0.5), dict( type='Pack3DDetInputs', keys=[ 'img', 'gt_bboxes', 'gt_bboxes_labels', 'gt_bboxes_3d', 'gt_labels_3d', 'centers_2d', 'depths' ]), ] test_pipeline = [ dict(type='LoadImageFromFileMono3D'), dict(type='Resize', scale=(1242, 375), keep_ratio=True), dict(type='Pack3DDetInputs', keys=['img']) ] eval_pipeline = [ dict(type='LoadImageFromFileMono3D'), dict(type='Pack3DDetInputs', keys=['img']) ] train_dataloader = dict( batch_size=2, num_workers=2, persistent_workers=True, sampler=dict(type='DefaultSampler', shuffle=True), dataset=dict( type=dataset_type, data_root=data_root, ann_file='kitti_infos_train.pkl', data_prefix=dict(img='training/image_2'), pipeline=train_pipeline, modality=input_modality, load_type='fov_image_based', test_mode=False, metainfo=metainfo, # we use box_type_3d='Camera' in monocular 3d # detection task box_type_3d='Camera')) val_dataloader = dict( batch_size=1, num_workers=2, persistent_workers=True, drop_last=False, sampler=dict(type='DefaultSampler', shuffle=False), dataset=dict( type=dataset_type, data_root=data_root, data_prefix=dict(img='training/image_2'), ann_file='kitti_infos_val.pkl', pipeline=test_pipeline, modality=input_modality, load_type='fov_image_based', metainfo=metainfo, test_mode=True, box_type_3d='Camera')) test_dataloader = val_dataloader val_evaluator = dict( type='KittiMetric', ann_file=data_root + 'kitti_infos_val.pkl', metric='bbox', pred_box_type_3d='Camera') test_evaluator = val_evaluator vis_backends = [dict(type='LocalVisBackend')] visualizer = dict( type='Det3DLocalVisualizer', vis_backends=vis_backends, name='visualizer')