Commit 7aa442d5 authored by raojy's avatar raojy
Browse files

raw_mmdetection

parent 9c03eaa8
# Copyright (c) OpenMMLab. All rights reserved.
from mmengine.dataset.dataset_wrapper import RepeatDataset
from mmengine.dataset.sampler import DefaultSampler
from mmengine.visualization.vis_backend import LocalVisBackend
from mmdet3d.datasets.scannet_dataset import ScanNetDataset
from mmdet3d.datasets.transforms.formating import Pack3DDetInputs
from mmdet3d.datasets.transforms.loading import (LoadAnnotations3D,
LoadPointsFromFile,
PointSegClassMapping)
from mmdet3d.datasets.transforms.test_time_aug import MultiScaleFlipAug3D
from mmdet3d.datasets.transforms.transforms_3d import (GlobalAlignment,
GlobalRotScaleTrans,
PointSample,
RandomFlip3D)
from mmdet3d.evaluation.metrics.indoor_metric import IndoorMetric
from mmdet3d.visualization.local_visualizer import Det3DLocalVisualizer
# dataset settings
dataset_type = 'ScanNetDataset'
data_root = 'data/scannet/'
metainfo = dict(
classes=('cabinet', 'bed', 'chair', 'sofa', 'table', 'door', 'window',
'bookshelf', 'picture', 'counter', 'desk', 'curtain',
'refrigerator', 'showercurtrain', 'toilet', 'sink', 'bathtub',
'garbagebin'))
# Example to use different file client
# Method 1: simply set the data root and let the file I/O module
# automatically infer from prefix (not support LMDB and Memcache yet)
# data_root = 's3://openmmlab/datasets/detection3d/scannet/'
# Method 2: Use backend_args, file_client_args in versions before 1.1.0
# backend_args = dict(
# backend='petrel',
# path_mapping=dict({
# './data/': 's3://openmmlab/datasets/detection3d/',
# 'data/': 's3://openmmlab/datasets/detection3d/'
# }))
backend_args = None
train_pipeline = [
dict(
type=LoadPointsFromFile,
coord_type='DEPTH',
shift_height=True,
load_dim=6,
use_dim=[0, 1, 2],
backend_args=backend_args),
dict(
type=LoadAnnotations3D,
with_bbox_3d=True,
with_label_3d=True,
with_mask_3d=True,
with_seg_3d=True,
backend_args=backend_args),
dict(type=GlobalAlignment, rotation_axis=2),
dict(type=PointSegClassMapping),
dict(type=PointSample, num_points=40000),
dict(
type=RandomFlip3D,
sync_2d=False,
flip_ratio_bev_horizontal=0.5,
flip_ratio_bev_vertical=0.5),
dict(
type=GlobalRotScaleTrans,
rot_range=[-0.087266, 0.087266],
scale_ratio_range=[1.0, 1.0],
shift_height=True),
dict(
type=Pack3DDetInputs,
keys=[
'points', 'gt_bboxes_3d', 'gt_labels_3d', 'pts_semantic_mask',
'pts_instance_mask'
])
]
test_pipeline = [
dict(
type=LoadPointsFromFile,
coord_type='DEPTH',
shift_height=True,
load_dim=6,
use_dim=[0, 1, 2],
backend_args=backend_args),
dict(type=GlobalAlignment, rotation_axis=2),
dict(
type=MultiScaleFlipAug3D,
img_scale=(1333, 800),
pts_scale_ratio=1,
flip=False,
transforms=[
dict(
type=GlobalRotScaleTrans,
rot_range=[0, 0],
scale_ratio_range=[1., 1.],
translation_std=[0, 0, 0]),
dict(
type=RandomFlip3D,
sync_2d=False,
flip_ratio_bev_horizontal=0.5,
flip_ratio_bev_vertical=0.5),
dict(type=PointSample, num_points=40000),
]),
dict(type=Pack3DDetInputs, keys=['points'])
]
train_dataloader = dict(
batch_size=8,
num_workers=4,
sampler=dict(type=DefaultSampler, shuffle=True),
dataset=dict(
type=RepeatDataset,
times=5,
dataset=dict(
type=ScanNetDataset,
data_root=data_root,
ann_file='scannet_infos_train.pkl',
pipeline=train_pipeline,
filter_empty_gt=False,
metainfo=metainfo,
# we use box_type_3d='LiDAR' in kitti and nuscenes dataset
# and box_type_3d='Depth' in sunrgbd and scannet dataset.
box_type_3d='Depth',
backend_args=backend_args)))
val_dataloader = dict(
batch_size=1,
num_workers=1,
sampler=dict(type=DefaultSampler, shuffle=False),
dataset=dict(
type=ScanNetDataset,
data_root=data_root,
ann_file='scannet_infos_val.pkl',
pipeline=test_pipeline,
metainfo=metainfo,
test_mode=True,
box_type_3d='Depth',
backend_args=backend_args))
test_dataloader = dict(
batch_size=1,
num_workers=1,
sampler=dict(type=DefaultSampler, shuffle=False),
dataset=dict(
type=ScanNetDataset,
data_root=data_root,
ann_file='scannet_infos_val.pkl',
pipeline=test_pipeline,
metainfo=metainfo,
test_mode=True,
box_type_3d='Depth',
backend_args=backend_args))
val_evaluator = dict(type=IndoorMetric)
test_evaluator = val_evaluator
vis_backends = [dict(type=LocalVisBackend)]
visualizer = dict(
type=Det3DLocalVisualizer, vis_backends=vis_backends, name='visualizer')
# Copyright (c) OpenMMLab. All rights reserved.
from mmcv.transforms.processing import TestTimeAug
from mmengine.dataset.sampler import DefaultSampler
from mmengine.visualization.vis_backend import LocalVisBackend
from mmdet3d.datasets.scannet_dataset import ScanNetSegDataset
from mmdet3d.datasets.transforms.formating import Pack3DDetInputs
from mmdet3d.datasets.transforms.loading import (LoadAnnotations3D,
LoadPointsFromFile,
NormalizePointsColor,
PointSegClassMapping)
from mmdet3d.datasets.transforms.transforms_3d import (IndoorPatchPointSample,
RandomFlip3D)
from mmdet3d.evaluation.metrics.seg_metric import SegMetric
from mmdet3d.models.segmentors.seg3d_tta import Seg3DTTAModel
from mmdet3d.visualization.local_visualizer import Det3DLocalVisualizer
# For ScanNet seg we usually do 20-class segmentation
class_names = ('wall', 'floor', 'cabinet', 'bed', 'chair', 'sofa', 'table',
'door', 'window', 'bookshelf', 'picture', 'counter', 'desk',
'curtain', 'refrigerator', 'showercurtrain', 'toilet', 'sink',
'bathtub', 'otherfurniture')
metainfo = dict(classes=class_names)
dataset_type = 'ScanNetSegDataset'
data_root = 'data/scannet/'
input_modality = dict(use_lidar=True, use_camera=False)
data_prefix = dict(
pts='points',
pts_instance_mask='instance_mask',
pts_semantic_mask='semantic_mask')
# Example to use different file client
# Method 1: simply set the data root and let the file I/O module
# automatically infer from prefix (not support LMDB and Memcache yet)
# data_root = 's3://openmmlab/datasets/detection3d/scannet/'
# Method 2: Use backend_args, file_client_args in versions before 1.1.0
# backend_args = dict(
# backend='petrel',
# path_mapping=dict({
# './data/': 's3://openmmlab/datasets/detection3d/',
# 'data/': 's3://openmmlab/datasets/detection3d/'
# }))
backend_args = None
num_points = 8192
train_pipeline = [
dict(
type=LoadPointsFromFile,
coord_type='DEPTH',
shift_height=False,
use_color=True,
load_dim=6,
use_dim=[0, 1, 2, 3, 4, 5],
backend_args=backend_args),
dict(
type=LoadAnnotations3D,
with_bbox_3d=False,
with_label_3d=False,
with_mask_3d=False,
with_seg_3d=True,
backend_args=backend_args),
dict(type=PointSegClassMapping),
dict(
type=IndoorPatchPointSample,
num_points=num_points,
block_size=1.5,
ignore_index=len(class_names),
use_normalized_coord=False,
enlarge_size=0.2,
min_unique_num=None),
dict(type=NormalizePointsColor, color_mean=None),
dict(type=Pack3DDetInputs, keys=['points', 'pts_semantic_mask'])
]
test_pipeline = [
dict(
type=LoadPointsFromFile,
coord_type='DEPTH',
shift_height=False,
use_color=True,
load_dim=6,
use_dim=[0, 1, 2, 3, 4, 5],
backend_args=backend_args),
dict(
type=LoadAnnotations3D,
with_bbox_3d=False,
with_label_3d=False,
with_mask_3d=False,
with_seg_3d=True,
backend_args=backend_args),
dict(type=NormalizePointsColor, color_mean=None),
dict(type=Pack3DDetInputs, keys=['points'])
]
# construct a pipeline for data and gt loading in show function
# please keep its loading function consistent with test_pipeline (e.g. client)
# we need to load gt seg_mask!
eval_pipeline = [
dict(
type=LoadPointsFromFile,
coord_type='DEPTH',
shift_height=False,
use_color=True,
load_dim=6,
use_dim=[0, 1, 2, 3, 4, 5],
backend_args=backend_args),
dict(type=NormalizePointsColor, color_mean=None),
dict(type=Pack3DDetInputs, keys=['points'])
]
tta_pipeline = [
dict(
type=LoadPointsFromFile,
coord_type='DEPTH',
shift_height=False,
use_color=True,
load_dim=6,
use_dim=[0, 1, 2, 3, 4, 5],
backend_args=backend_args),
dict(
type=LoadAnnotations3D,
with_bbox_3d=False,
with_label_3d=False,
with_mask_3d=False,
with_seg_3d=True,
backend_args=backend_args),
dict(type=NormalizePointsColor, color_mean=None),
dict(
type=TestTimeAug,
transforms=[[
dict(
type=RandomFlip3D,
sync_2d=False,
flip_ratio_bev_horizontal=0.,
flip_ratio_bev_vertical=0.)
], [dict(type=Pack3DDetInputs, keys=['points'])]])
]
train_dataloader = dict(
batch_size=8,
num_workers=4,
persistent_workers=True,
sampler=dict(type=DefaultSampler, shuffle=True),
dataset=dict(
type=ScanNetSegDataset,
data_root=data_root,
ann_file='scannet_infos_train.pkl',
metainfo=metainfo,
data_prefix=data_prefix,
pipeline=train_pipeline,
modality=input_modality,
ignore_index=len(class_names),
scene_idxs=data_root + 'seg_info/train_resampled_scene_idxs.npy',
test_mode=False,
backend_args=backend_args))
test_dataloader = dict(
batch_size=1,
num_workers=1,
persistent_workers=True,
drop_last=False,
sampler=dict(type=DefaultSampler, shuffle=False),
dataset=dict(
type=ScanNetSegDataset,
data_root=data_root,
ann_file='scannet_infos_val.pkl',
metainfo=metainfo,
data_prefix=data_prefix,
pipeline=test_pipeline,
modality=input_modality,
ignore_index=len(class_names),
test_mode=True,
backend_args=backend_args))
val_dataloader = test_dataloader
val_evaluator = dict(type=SegMetric)
test_evaluator = val_evaluator
vis_backends = [dict(type=LocalVisBackend)]
visualizer = dict(
type=Det3DLocalVisualizer, vis_backends=vis_backends, name='visualizer')
tta_model = dict(type=Seg3DTTAModel)
# Copyright (c) OpenMMLab. All rights reserved.
from mmcv.transforms.processing import TestTimeAug
from mmengine.dataset.sampler import DefaultSampler
from mmengine.visualization.vis_backend import LocalVisBackend
from mmdet3d.datasets.semantickitti_dataset import SemanticKittiDataset
from mmdet3d.datasets.transforms.formating import Pack3DDetInputs
from mmdet3d.datasets.transforms.loading import (LoadAnnotations3D,
LoadPointsFromFile,
PointSegClassMapping)
from mmdet3d.datasets.transforms.transforms_3d import (GlobalRotScaleTrans,
RandomFlip3D)
from mmdet3d.evaluation.metrics.seg_metric import SegMetric
from mmdet3d.models.segmentors.seg3d_tta import Seg3DTTAModel
from mmdet3d.visualization.local_visualizer import Det3DLocalVisualizer
# For SemanticKitti we usually do 19-class segmentation.
# For labels_map we follow the uniform format of MMDetection & MMSegmentation
# i.e. we consider the unlabeled class as the last one, which is different
# from the original implementation of some methods e.g. Cylinder3D.
dataset_type = 'SemanticKittiDataset'
data_root = 'data/semantickitti/'
class_names = [
'car', 'bicycle', 'motorcycle', 'truck', 'bus', 'person', 'bicyclist',
'motorcyclist', 'road', 'parking', 'sidewalk', 'other-ground', 'building',
'fence', 'vegetation', 'trunck', 'terrian', 'pole', 'traffic-sign'
]
labels_map = {
0: 19, # "unlabeled"
1: 19, # "outlier" mapped to "unlabeled" --------------mapped
10: 0, # "car"
11: 1, # "bicycle"
13: 4, # "bus" mapped to "other-vehicle" --------------mapped
15: 2, # "motorcycle"
16: 4, # "on-rails" mapped to "other-vehicle" ---------mapped
18: 3, # "truck"
20: 4, # "other-vehicle"
30: 5, # "person"
31: 6, # "bicyclist"
32: 7, # "motorcyclist"
40: 8, # "road"
44: 9, # "parking"
48: 10, # "sidewalk"
49: 11, # "other-ground"
50: 12, # "building"
51: 13, # "fence"
52: 19, # "other-structure" mapped to "unlabeled" ------mapped
60: 8, # "lane-marking" to "road" ---------------------mapped
70: 14, # "vegetation"
71: 15, # "trunk"
72: 16, # "terrain"
80: 17, # "pole"
81: 18, # "traffic-sign"
99: 19, # "other-object" to "unlabeled" ----------------mapped
252: 0, # "moving-car" to "car" ------------------------mapped
253: 6, # "moving-bicyclist" to "bicyclist" ------------mapped
254: 5, # "moving-person" to "person" ------------------mapped
255: 7, # "moving-motorcyclist" to "motorcyclist" ------mapped
256: 4, # "moving-on-rails" mapped to "other-vehic------mapped
257: 4, # "moving-bus" mapped to "other-vehicle" -------mapped
258: 3, # "moving-truck" to "truck" --------------------mapped
259: 4 # "moving-other"-vehicle to "other-vehicle"-----mapped
}
metainfo = dict(
classes=class_names, seg_label_mapping=labels_map, max_label=259)
input_modality = dict(use_lidar=True, use_camera=False)
# Example to use different file client
# Method 1: simply set the data root and let the file I/O module
# automatically infer from prefix (not support LMDB and Memcache yet)
# data_root = 's3://openmmlab/datasets/detection3d/semantickitti/'
# Method 2: Use backend_args, file_client_args in versions before 1.1.0
# backend_args = dict(
# backend='petrel',
# path_mapping=dict({
# './data/': 's3://openmmlab/datasets/detection3d/',
# 'data/': 's3://openmmlab/datasets/detection3d/'
# }))
backend_args = None
train_pipeline = [
dict(
type=LoadPointsFromFile,
coord_type='LIDAR',
load_dim=4,
use_dim=4,
backend_args=backend_args),
dict(
type=LoadAnnotations3D,
with_bbox_3d=False,
with_label_3d=False,
with_seg_3d=True,
seg_3d_dtype='np.int32',
seg_offset=2**16,
dataset_type='semantickitti',
backend_args=backend_args),
dict(type=PointSegClassMapping),
dict(
type=RandomFlip3D,
sync_2d=False,
flip_ratio_bev_horizontal=0.5,
flip_ratio_bev_vertical=0.5),
dict(
type=GlobalRotScaleTrans,
rot_range=[-0.78539816, 0.78539816],
scale_ratio_range=[0.95, 1.05],
translation_std=[0.1, 0.1, 0.1],
),
dict(type=Pack3DDetInputs, keys=['points', 'pts_semantic_mask'])
]
test_pipeline = [
dict(
type=LoadPointsFromFile,
coord_type='LIDAR',
load_dim=4,
use_dim=4,
backend_args=backend_args),
dict(
type=LoadAnnotations3D,
with_bbox_3d=False,
with_label_3d=False,
with_seg_3d=True,
seg_3d_dtype='np.int32',
seg_offset=2**16,
dataset_type='semantickitti',
backend_args=backend_args),
dict(type=PointSegClassMapping),
dict(type=Pack3DDetInputs, keys=['points'])
]
# construct a pipeline for data and gt loading in show function
# please keep its loading function consistent with test_pipeline (e.g. client)
eval_pipeline = [
dict(
type=LoadPointsFromFile,
coord_type='LIDAR',
load_dim=4,
use_dim=4,
backend_args=backend_args),
dict(type=Pack3DDetInputs, keys=['points'])
]
tta_pipeline = [
dict(
type=LoadPointsFromFile,
coord_type='LIDAR',
load_dim=4,
use_dim=4,
backend_args=backend_args),
dict(
type=LoadAnnotations3D,
with_bbox_3d=False,
with_label_3d=False,
with_seg_3d=True,
seg_3d_dtype='np.int32',
seg_offset=2**16,
dataset_type='semantickitti',
backend_args=backend_args),
dict(type=PointSegClassMapping),
dict(
type=TestTimeAug,
transforms=[[
dict(
type=RandomFlip3D,
sync_2d=False,
flip_ratio_bev_horizontal=0.,
flip_ratio_bev_vertical=0.),
dict(
type=RandomFlip3D,
sync_2d=False,
flip_ratio_bev_horizontal=0.,
flip_ratio_bev_vertical=1.),
dict(
type=RandomFlip3D,
sync_2d=False,
flip_ratio_bev_horizontal=1.,
flip_ratio_bev_vertical=0.),
dict(
type=RandomFlip3D,
sync_2d=False,
flip_ratio_bev_horizontal=1.,
flip_ratio_bev_vertical=1.)
],
[
dict(
type=GlobalRotScaleTrans,
rot_range=[pcd_rotate_range, pcd_rotate_range],
scale_ratio_range=[
pcd_scale_factor, pcd_scale_factor
],
translation_std=[0, 0, 0])
for pcd_rotate_range in [-0.78539816, 0.0, 0.78539816]
for pcd_scale_factor in [0.95, 1.0, 1.05]
], [dict(type=Pack3DDetInputs, keys=['points'])]])
]
train_dataloader = dict(
batch_size=2,
num_workers=4,
persistent_workers=True,
sampler=dict(type=DefaultSampler, shuffle=True),
dataset=dict(
type=SemanticKittiDataset,
data_root=data_root,
ann_file='semantickitti_infos_train.pkl',
pipeline=train_pipeline,
metainfo=metainfo,
modality=input_modality,
ignore_index=19,
backend_args=backend_args))
test_dataloader = dict(
batch_size=1,
num_workers=1,
persistent_workers=True,
drop_last=False,
sampler=dict(type=DefaultSampler, shuffle=False),
dataset=dict(
type=SemanticKittiDataset,
data_root=data_root,
ann_file='semantickitti_infos_val.pkl',
pipeline=test_pipeline,
metainfo=metainfo,
modality=input_modality,
ignore_index=19,
test_mode=True,
backend_args=backend_args))
val_dataloader = test_dataloader
val_evaluator = dict(type=SegMetric)
test_evaluator = val_evaluator
vis_backends = [dict(type=LocalVisBackend)]
visualizer = dict(
type=Det3DLocalVisualizer, vis_backends=vis_backends, name='visualizer')
tta_model = dict(type=Seg3DTTAModel)
# Copyright (c) OpenMMLab. All rights reserved.
from mmengine.dataset.dataset_wrapper import RepeatDataset
from mmengine.dataset.sampler import DefaultSampler
from mmengine.visualization.vis_backend import LocalVisBackend
from mmdet3d.datasets.sunrgbd_dataset import SUNRGBDDataset
from mmdet3d.datasets.transforms.formating import Pack3DDetInputs
from mmdet3d.datasets.transforms.loading import (LoadAnnotations3D,
LoadPointsFromFile)
from mmdet3d.datasets.transforms.test_time_aug import MultiScaleFlipAug3D
from mmdet3d.datasets.transforms.transforms_3d import (GlobalRotScaleTrans,
PointSample,
RandomFlip3D)
from mmdet3d.evaluation.metrics.indoor_metric import IndoorMetric
from mmdet3d.visualization.local_visualizer import Det3DLocalVisualizer
dataset_type = 'SUNRGBDDataset'
data_root = 'data/sunrgbd/'
class_names = ('bed', 'table', 'sofa', 'chair', 'toilet', 'desk', 'dresser',
'night_stand', 'bookshelf', 'bathtub')
metainfo = dict(classes=class_names)
# Example to use different file client
# Method 1: simply set the data root and let the file I/O module
# automatically infer from prefix (not support LMDB and Memcache yet)
# data_root = 's3://openmmlab/datasets/detection3d/sunrgbd/'
# Method 2: Use backend_args, file_client_args in versions before 1.1.0
# backend_args = dict(
# backend='petrel',
# path_mapping=dict({
# './data/': 's3://openmmlab/datasets/detection3d/',
# 'data/': 's3://openmmlab/datasets/detection3d/'
# }))
backend_args = None
train_pipeline = [
dict(
type=LoadPointsFromFile,
coord_type='DEPTH',
shift_height=True,
load_dim=6,
use_dim=[0, 1, 2],
backend_args=backend_args),
dict(type=LoadAnnotations3D),
dict(
type=RandomFlip3D,
sync_2d=False,
flip_ratio_bev_horizontal=0.5,
),
dict(
type=GlobalRotScaleTrans,
rot_range=[-0.523599, 0.523599],
scale_ratio_range=[0.85, 1.15],
shift_height=True),
dict(type=PointSample, num_points=20000),
dict(
type=Pack3DDetInputs, keys=['points', 'gt_bboxes_3d', 'gt_labels_3d'])
]
test_pipeline = [
dict(
type=LoadPointsFromFile,
coord_type='DEPTH',
shift_height=True,
load_dim=6,
use_dim=[0, 1, 2],
backend_args=backend_args),
dict(
type=MultiScaleFlipAug3D,
img_scale=(1333, 800),
pts_scale_ratio=1,
flip=False,
transforms=[
dict(
type=GlobalRotScaleTrans,
rot_range=[0, 0],
scale_ratio_range=[1., 1.],
translation_std=[0, 0, 0]),
dict(
type=RandomFlip3D,
sync_2d=False,
flip_ratio_bev_horizontal=0.5,
),
dict(type=PointSample, num_points=20000)
]),
dict(type=Pack3DDetInputs, keys=['points'])
]
train_dataloader = dict(
batch_size=16,
num_workers=4,
sampler=dict(type=DefaultSampler, shuffle=True),
dataset=dict(
type=RepeatDataset,
times=5,
dataset=dict(
type=SUNRGBDDataset,
data_root=data_root,
ann_file='sunrgbd_infos_train.pkl',
pipeline=train_pipeline,
filter_empty_gt=False,
metainfo=metainfo,
# we use box_type_3d='LiDAR' in kitti and nuscenes dataset
# and box_type_3d='Depth' in sunrgbd and scannet dataset.
box_type_3d='Depth',
backend_args=backend_args)))
val_dataloader = dict(
batch_size=1,
num_workers=1,
sampler=dict(type=DefaultSampler, shuffle=False),
dataset=dict(
type=SUNRGBDDataset,
data_root=data_root,
ann_file='sunrgbd_infos_val.pkl',
pipeline=test_pipeline,
metainfo=metainfo,
test_mode=True,
box_type_3d='Depth',
backend_args=backend_args))
test_dataloader = dict(
batch_size=1,
num_workers=1,
sampler=dict(type=DefaultSampler, shuffle=False),
dataset=dict(
type=SUNRGBDDataset,
data_root=data_root,
ann_file='sunrgbd_infos_val.pkl',
pipeline=test_pipeline,
metainfo=metainfo,
test_mode=True,
box_type_3d='Depth',
backend_args=backend_args))
val_evaluator = dict(type=IndoorMetric)
test_evaluator = val_evaluator
vis_backends = [dict(type=LocalVisBackend)]
visualizer = dict(
type=Det3DLocalVisualizer, vis_backends=vis_backends, name='visualizer')
# Copyright (c) OpenMMLab. All rights reserved.
from mmengine.dataset.dataset_wrapper import RepeatDataset
from mmengine.dataset.sampler import DefaultSampler
from mmengine.visualization.vis_backend import LocalVisBackend
from mmdet3d.datasets.transforms.formating import Pack3DDetInputs
from mmdet3d.datasets.transforms.loading import (LoadAnnotations3D,
LoadPointsFromFile)
from mmdet3d.datasets.transforms.test_time_aug import MultiScaleFlipAug3D
from mmdet3d.datasets.transforms.transforms_3d import ( # noqa
GlobalRotScaleTrans, ObjectRangeFilter, ObjectSample, PointShuffle,
PointsRangeFilter, RandomFlip3D)
from mmdet3d.datasets.waymo_dataset import WaymoDataset
from mmdet3d.evaluation.metrics.waymo_metric import WaymoMetric
from mmdet3d.visualization.local_visualizer import Det3DLocalVisualizer
# dataset settings
# D5 in the config name means the whole dataset is divided into 5 folds
# We only use one fold for efficient experiments
dataset_type = 'WaymoDataset'
# data_root = 's3://openmmlab/datasets/detection3d/waymo/kitti_format/'
data_root = 'data/waymo/kitti_format/'
# Example to use different file client
# Method 1: simply set the data root and let the file I/O module
# automatically infer from prefix (not support LMDB and Memcache yet)
# data_root = 's3://openmmlab/datasets/detection3d/waymo/kitti_format/'
# Method 2: Use backend_args, file_client_args in versions before 1.1.0
# backend_args = dict(
# backend='petrel',
# path_mapping=dict({
# './data/': 's3://openmmlab/datasets/detection3d/',
# 'data/': 's3://openmmlab/datasets/detection3d/'
# }))
backend_args = None
class_names = ['Car', 'Pedestrian', 'Cyclist']
metainfo = dict(classes=class_names)
point_cloud_range = [-74.88, -74.88, -2, 74.88, 74.88, 4]
input_modality = dict(use_lidar=True, use_camera=False)
db_sampler = dict(
data_root=data_root,
info_path=data_root + 'waymo_dbinfos_train.pkl',
rate=1.0,
prepare=dict(
filter_by_difficulty=[-1],
filter_by_min_points=dict(Car=5, Pedestrian=10, Cyclist=10)),
classes=class_names,
sample_groups=dict(Car=15, Pedestrian=10, Cyclist=10),
points_loader=dict(
type=LoadPointsFromFile,
coord_type='LIDAR',
load_dim=6,
use_dim=[0, 1, 2, 3, 4],
backend_args=backend_args),
backend_args=backend_args)
train_pipeline = [
dict(
type=LoadPointsFromFile,
coord_type='LIDAR',
load_dim=6,
use_dim=5,
backend_args=backend_args),
dict(type=LoadAnnotations3D, with_bbox_3d=True, with_label_3d=True),
# dict(type=ObjectSample, db_sampler=db_sampler),
dict(
type=RandomFlip3D,
sync_2d=False,
flip_ratio_bev_horizontal=0.5,
flip_ratio_bev_vertical=0.5),
dict(
type=GlobalRotScaleTrans,
rot_range=[-0.78539816, 0.78539816],
scale_ratio_range=[0.95, 1.05]),
dict(type=PointsRangeFilter, point_cloud_range=point_cloud_range),
dict(type=ObjectRangeFilter, point_cloud_range=point_cloud_range),
dict(type=PointShuffle),
dict(
type=Pack3DDetInputs, keys=['points', 'gt_bboxes_3d', 'gt_labels_3d'])
]
test_pipeline = [
dict(
type=LoadPointsFromFile,
coord_type='LIDAR',
load_dim=6,
use_dim=5,
backend_args=backend_args),
dict(
type=MultiScaleFlipAug3D,
img_scale=(1333, 800),
pts_scale_ratio=1,
flip=False,
transforms=[
dict(
type=GlobalRotScaleTrans,
rot_range=[0, 0],
scale_ratio_range=[1., 1.],
translation_std=[0, 0, 0]),
dict(type=RandomFlip3D),
dict(type=PointsRangeFilter, point_cloud_range=point_cloud_range)
]),
dict(type=Pack3DDetInputs, keys=['points'])
]
# construct a pipeline for data and gt loading in show function
# please keep its loading function consistent with test_pipeline (e.g. client)
eval_pipeline = [
dict(
type=LoadPointsFromFile,
coord_type='LIDAR',
load_dim=6,
use_dim=5,
backend_args=backend_args),
dict(type=Pack3DDetInputs, keys=['points']),
]
train_dataloader = dict(
batch_size=2,
num_workers=2,
persistent_workers=True,
sampler=dict(type=DefaultSampler, shuffle=True),
dataset=dict(
type=RepeatDataset,
times=2,
dataset=dict(
type=WaymoDataset,
data_root=data_root,
ann_file='waymo_infos_train.pkl',
data_prefix=dict(
pts='training/velodyne', sweeps='training/velodyne'),
pipeline=train_pipeline,
modality=input_modality,
test_mode=False,
metainfo=metainfo,
# we use box_type_3d='LiDAR' in kitti and nuscenes dataset
# and box_type_3d='Depth' in sunrgbd and scannet dataset.
box_type_3d='LiDAR',
# load one frame every five frames
load_interval=5,
backend_args=backend_args)))
val_dataloader = dict(
batch_size=1,
num_workers=1,
persistent_workers=True,
drop_last=False,
sampler=dict(type=DefaultSampler, shuffle=False),
dataset=dict(
type=WaymoDataset,
data_root=data_root,
data_prefix=dict(pts='training/velodyne', sweeps='training/velodyne'),
ann_file='waymo_infos_val.pkl',
pipeline=eval_pipeline,
modality=input_modality,
test_mode=True,
metainfo=metainfo,
box_type_3d='LiDAR',
backend_args=backend_args))
test_dataloader = dict(
batch_size=1,
num_workers=1,
persistent_workers=True,
drop_last=False,
sampler=dict(type=DefaultSampler, shuffle=False),
dataset=dict(
type=WaymoDataset,
data_root=data_root,
data_prefix=dict(pts='training/velodyne', sweeps='training/velodyne'),
ann_file='waymo_infos_val.pkl',
pipeline=eval_pipeline,
modality=input_modality,
test_mode=True,
metainfo=metainfo,
box_type_3d='LiDAR',
backend_args=backend_args))
val_evaluator = dict(
type=WaymoMetric,
ann_file='./data/waymo/kitti_format/waymo_infos_val.pkl',
waymo_bin_file='./data/waymo/waymo_format/gt.bin',
data_root='./data/waymo/waymo_format',
backend_args=backend_args,
convert_kitti_format=False)
test_evaluator = val_evaluator
vis_backends = [dict(type=LocalVisBackend)]
visualizer = dict(
type=Det3DLocalVisualizer, vis_backends=vis_backends, name='visualizer')
# Copyright (c) OpenMMLab. All rights reserved.
from mmengine.dataset.dataset_wrapper import RepeatDataset
from mmengine.dataset.sampler import DefaultSampler
from mmengine.visualization.vis_backend import LocalVisBackend
from mmdet3d.datasets.transforms.formating import Pack3DDetInputs
from mmdet3d.datasets.transforms.loading import (LoadAnnotations3D,
LoadPointsFromFile)
from mmdet3d.datasets.transforms.test_time_aug import MultiScaleFlipAug3D
from mmdet3d.datasets.transforms.transforms_3d import ( # noqa
GlobalRotScaleTrans, ObjectRangeFilter, ObjectSample, PointShuffle,
PointsRangeFilter, RandomFlip3D)
from mmdet3d.datasets.waymo_dataset import WaymoDataset
from mmdet3d.evaluation.metrics.waymo_metric import WaymoMetric
from mmdet3d.visualization.local_visualizer import Det3DLocalVisualizer
# dataset settings
# D5 in the config name means the whole dataset is divided into 5 folds
# We only use one fold for efficient experiments
dataset_type = 'WaymoDataset'
data_root = 'data/waymo/kitti_format/'
# Example to use different file client
# Method 1: simply set the data root and let the file I/O module
# automatically infer from prefix (not support LMDB and Memcache yet)
# data_root = 's3://openmmlab/datasets/detection3d/waymo/kitti_format/'
# Method 2: Use backend_args, file_client_args in versions before 1.1.0
# backend_args = dict(
# backend='petrel',
# path_mapping=dict({
# './data/': 's3://openmmlab/datasets/detection3d/',
# 'data/': 's3://openmmlab/datasets/detection3d/'
# }))
backend_args = None
class_names = ['Car']
metainfo = dict(classes=class_names)
point_cloud_range = [-74.88, -74.88, -2, 74.88, 74.88, 4]
input_modality = dict(use_lidar=True, use_camera=False)
db_sampler = dict(
data_root=data_root,
info_path=data_root + 'waymo_dbinfos_train.pkl',
rate=1.0,
prepare=dict(filter_by_difficulty=[-1], filter_by_min_points=dict(Car=5)),
classes=class_names,
sample_groups=dict(Car=15),
points_loader=dict(
type=LoadPointsFromFile,
coord_type='LIDAR',
load_dim=6,
use_dim=[0, 1, 2, 3, 4],
backend_args=backend_args),
backend_args=backend_args)
train_pipeline = [
dict(
type=LoadPointsFromFile,
coord_type='LIDAR',
load_dim=6,
use_dim=5,
backend_args=backend_args),
dict(type=LoadAnnotations3D, with_bbox_3d=True, with_label_3d=True),
dict(type=ObjectSample, db_sampler=db_sampler),
dict(
type=RandomFlip3D,
sync_2d=False,
flip_ratio_bev_horizontal=0.5,
flip_ratio_bev_vertical=0.5),
dict(
type=GlobalRotScaleTrans,
rot_range=[-0.78539816, 0.78539816],
scale_ratio_range=[0.95, 1.05]),
dict(type=PointsRangeFilter, point_cloud_range=point_cloud_range),
dict(type=ObjectRangeFilter, point_cloud_range=point_cloud_range),
dict(type=PointShuffle),
dict(
type=Pack3DDetInputs, keys=['points', 'gt_bboxes_3d', 'gt_labels_3d'])
]
test_pipeline = [
dict(
type=LoadPointsFromFile,
coord_type='LIDAR',
load_dim=6,
use_dim=5,
backend_args=backend_args),
dict(
type=MultiScaleFlipAug3D,
img_scale=(1333, 800),
pts_scale_ratio=1,
flip=False,
transforms=[
dict(
type=GlobalRotScaleTrans,
rot_range=[0, 0],
scale_ratio_range=[1., 1.],
translation_std=[0, 0, 0]),
dict(type=RandomFlip3D),
dict(type=PointsRangeFilter, point_cloud_range=point_cloud_range)
]),
dict(type=Pack3DDetInputs, keys=['points'])
]
# construct a pipeline for data and gt loading in show function
# please keep its loading function consistent with test_pipeline (e.g. client)
eval_pipeline = [
dict(
type=LoadPointsFromFile,
coord_type='LIDAR',
load_dim=6,
use_dim=5,
backend_args=backend_args),
dict(type=Pack3DDetInputs, keys=['points']),
]
train_dataloader = dict(
batch_size=2,
num_workers=2,
persistent_workers=True,
sampler=dict(type=DefaultSampler, shuffle=True),
dataset=dict(
type=RepeatDataset,
times=2,
dataset=dict(
type=WaymoDataset,
data_root=data_root,
ann_file='waymo_infos_train.pkl',
data_prefix=dict(
pts='training/velodyne', sweeps='training/velodyne'),
pipeline=train_pipeline,
modality=input_modality,
test_mode=False,
metainfo=metainfo,
# we use box_type_3d='LiDAR' in kitti and nuscenes dataset
# and box_type_3d='Depth' in sunrgbd and scannet dataset.
box_type_3d='LiDAR',
# load one frame every five frames
load_interval=5,
backend_args=backend_args)))
val_dataloader = dict(
batch_size=1,
num_workers=1,
persistent_workers=True,
drop_last=False,
sampler=dict(type=DefaultSampler, shuffle=False),
dataset=dict(
type=WaymoDataset,
data_root=data_root,
data_prefix=dict(pts='training/velodyne', sweeps='training/velodyne'),
ann_file='waymo_infos_val.pkl',
pipeline=eval_pipeline,
modality=input_modality,
test_mode=True,
metainfo=metainfo,
box_type_3d='LiDAR',
backend_args=backend_args))
test_dataloader = dict(
batch_size=1,
num_workers=1,
persistent_workers=True,
drop_last=False,
sampler=dict(type=DefaultSampler, shuffle=False),
dataset=dict(
type=WaymoDataset,
data_root=data_root,
data_prefix=dict(pts='training/velodyne', sweeps='training/velodyne'),
ann_file='waymo_infos_val.pkl',
pipeline=eval_pipeline,
modality=input_modality,
test_mode=True,
metainfo=metainfo,
box_type_3d='LiDAR',
backend_args=backend_args))
val_evaluator = dict(
type=WaymoMetric,
ann_file='./data/waymo/kitti_format/waymo_infos_val.pkl',
waymo_bin_file='./data/waymo/waymo_format/gt.bin',
data_root='./data/waymo/waymo_format',
convert_kitti_format=False,
backend_args=backend_args)
test_evaluator = val_evaluator
vis_backends = [dict(type=LocalVisBackend)]
visualizer = dict(
type=Det3DLocalVisualizer, vis_backends=vis_backends, name='visualizer')
# Copyright (c) OpenMMLab. All rights reserved.
from mmengine.dataset.sampler import DefaultSampler
from mmdet3d.datasets.transforms.formating import Pack3DDetInputs
from mmdet3d.datasets.transforms.loading import (LoadAnnotations3D,
LoadImageFromFileMono3D)
from mmdet3d.datasets.transforms.transforms_3d import (RandomFlip3D,
RandomResize3D)
from mmdet3d.datasets.waymo_dataset import WaymoDataset
from mmdet3d.evaluation.metrics.waymo_metric import WaymoMetric
# dataset settings
# D3 in the config name means the whole dataset is divided into 3 folds
# We only use one fold for efficient experiments
dataset_type = 'WaymoDataset'
data_root = 'data/waymo/kitti_format/'
class_names = ['Car', 'Pedestrian', 'Cyclist']
input_modality = dict(use_lidar=False, use_camera=True)
# Example to use different file client
# Method 1: simply set the data root and let the file I/O module
# automatically infer from prefix (not support LMDB and Memcache yet)
# data_root = 's3://openmmlab/datasets/detection3d/waymo/kitti_format/'
# Method 2: Use backend_args, file_client_args in versions before 1.1.0
# backend_args = dict(
# backend='petrel',
# path_mapping=dict({
# './data/': 's3://openmmlab/datasets/detection3d/',
# 'data/': 's3://openmmlab/datasets/detection3d/'
# }))
backend_args = None
train_pipeline = [
dict(type=LoadImageFromFileMono3D, backend_args=backend_args),
dict(
type=LoadAnnotations3D,
with_bbox=True,
with_label=True,
with_attr_label=False,
with_bbox_3d=True,
with_label_3d=True,
with_bbox_depth=True),
# base shape (1248, 832), scale (0.95, 1.05)
dict(
type=RandomResize3D,
scale=(1284, 832),
ratio_range=(0.95, 1.05),
keep_ratio=True,
),
dict(type=RandomFlip3D, flip_ratio_bev_horizontal=0.5),
dict(
type=Pack3DDetInputs,
keys=[
'img', 'gt_bboxes', 'gt_bboxes_labels', 'gt_bboxes_3d',
'gt_labels_3d', 'centers_2d', 'depths'
]),
]
test_pipeline = [
dict(type=LoadImageFromFileMono3D, backend_args=backend_args),
dict(
type=RandomResize3D,
scale=(1248, 832),
ratio_range=(1., 1.),
keep_ratio=True),
dict(type=Pack3DDetInputs, keys=['img']),
]
# construct a pipeline for data and gt loading in show function
# please keep its loading function consistent with test_pipeline (e.g. client)
eval_pipeline = [
dict(type=LoadImageFromFileMono3D, backend_args=backend_args),
dict(
type=RandomResize3D,
scale=(1248, 832),
ratio_range=(1., 1.),
keep_ratio=True),
dict(type=Pack3DDetInputs, keys=['img']),
]
metainfo = dict(CLASSES=class_names)
train_dataloader = dict(
batch_size=3,
num_workers=3,
persistent_workers=True,
sampler=dict(type=DefaultSampler, shuffle=True),
dataset=dict(
type=WaymoDataset,
data_root=data_root,
ann_file='waymo_infos_train.pkl',
data_prefix=dict(
pts='training/velodyne',
CAM_FRONT='training/image_0',
CAM_FRONT_LEFT='training/image_1',
CAM_FRONT_RIGHT='training/image_2',
CAM_SIDE_LEFT='training/image_3',
CAM_SIDE_RIGHT='training/image_4'),
pipeline=train_pipeline,
modality=input_modality,
test_mode=False,
metainfo=metainfo,
# we use box_type_3d='LiDAR' in kitti and nuscenes dataset
# and box_type_3d='Depth' in sunrgbd and scannet dataset.
box_type_3d='Camera',
load_type='fov_image_based',
# load one frame every three frames
load_interval=5,
backend_args=backend_args))
val_dataloader = dict(
batch_size=1,
num_workers=1,
persistent_workers=True,
drop_last=False,
sampler=dict(type=DefaultSampler, shuffle=False),
dataset=dict(
type=WaymoDataset,
data_root=data_root,
data_prefix=dict(
pts='training/velodyne',
CAM_FRONT='training/image_0',
CAM_FRONT_LEFT='training/image_1',
CAM_FRONT_RIGHT='training/image_2',
CAM_SIDE_LEFT='training/image_3',
CAM_SIDE_RIGHT='training/image_4'),
ann_file='waymo_infos_val.pkl',
pipeline=eval_pipeline,
modality=input_modality,
test_mode=True,
metainfo=metainfo,
# we use box_type_3d='LiDAR' in kitti and nuscenes dataset
# and box_type_3d='Depth' in sunrgbd and scannet dataset.
box_type_3d='Camera',
load_type='fov_image_based',
backend_args=backend_args))
test_dataloader = dict(
batch_size=1,
num_workers=1,
persistent_workers=True,
drop_last=False,
sampler=dict(type=DefaultSampler, shuffle=False),
dataset=dict(
type=WaymoDataset,
data_root=data_root,
data_prefix=dict(
pts='training/velodyne',
CAM_FRONT='training/image_0',
CAM_FRONT_LEFT='training/image_1',
CAM_FRONT_RIGHT='training/image_2',
CAM_SIDE_LEFT='training/image_3',
CAM_SIDE_RIGHT='training/image_4'),
ann_file='waymo_infos_val.pkl',
pipeline=eval_pipeline,
modality=input_modality,
test_mode=True,
metainfo=metainfo,
# we use box_type_3d='LiDAR' in kitti and nuscenes dataset
# and box_type_3d='Depth' in sunrgbd and scannet dataset.
box_type_3d='Camera',
load_type='fov_image_based',
backend_args=backend_args))
val_evaluator = dict(
type=WaymoMetric,
ann_file='./data/waymo/kitti_format/waymo_infos_val.pkl',
waymo_bin_file='./data/waymo/waymo_format/fov_gt.bin',
data_root='./data/waymo/waymo_format',
metric='LET_mAP',
load_type='fov_image_based',
backend_args=backend_args)
test_evaluator = val_evaluator
# Copyright (c) OpenMMLab. All rights reserved.
from mmengine.dataset.sampler import DefaultSampler
from mmdet3d.datasets.transforms.formating import Pack3DDetInputs
from mmdet3d.datasets.transforms.loading import (LoadAnnotations3D,
LoadMultiViewImageFromFiles)
from mmdet3d.datasets.transforms.transforms_3d import ( # noqa
MultiViewWrapper, ObjectNameFilter, ObjectRangeFilter,
PhotoMetricDistortion3D, RandomCrop3D, RandomFlip3D, RandomResize3D)
from mmdet3d.datasets.waymo_dataset import WaymoDataset
from mmdet3d.evaluation.metrics.waymo_metric import WaymoMetric
# dataset settings
# D3 in the config name means the whole dataset is divided into 3 folds
# We only use one fold for efficient experiments
dataset_type = 'WaymoDataset'
data_root = 'data/waymo/kitti_format/'
# Example to use different file client
# Method 1: simply set the data root and let the file I/O module
# automatically infer from prefix (not support LMDB and Memcache yet)
# data_root = 's3://openmmlab/datasets/detection3d/waymo/kitti_format/'
# Method 2: Use backend_args, file_client_args in versions before 1.1.0
# backend_args = dict(
# backend='petrel',
# path_mapping=dict({
# './data/': 's3://openmmlab/datasets/detection3d/',
# 'data/': 's3://openmmlab/datasets/detection3d/'
# }))
backend_args = None
class_names = ['Car', 'Pedestrian', 'Cyclist']
input_modality = dict(use_lidar=False, use_camera=True)
point_cloud_range = [-35.0, -75.0, -2, 75.0, 75.0, 4]
train_transforms = [
dict(type=PhotoMetricDistortion3D),
dict(
type=RandomResize3D,
scale=(1248, 832),
ratio_range=(0.95, 1.05),
keep_ratio=True),
dict(type=RandomCrop3D, crop_size=(720, 1080)),
dict(type=RandomFlip3D, flip_ratio_bev_horizontal=0.5, flip_box3d=False),
]
train_pipeline = [
dict(
type=LoadMultiViewImageFromFiles,
to_float32=True,
backend_args=backend_args),
dict(
type=LoadAnnotations3D,
with_bbox=True,
with_label=True,
with_attr_label=False,
with_bbox_3d=True,
with_label_3d=True,
with_bbox_depth=True),
dict(type=MultiViewWrapper, transforms=train_transforms),
dict(type=ObjectRangeFilter, point_cloud_range=point_cloud_range),
dict(type=ObjectNameFilter, classes=class_names),
dict(type=Pack3DDetInputs, keys=[
'img',
'gt_bboxes_3d',
'gt_labels_3d',
]),
]
test_transforms = [
dict(
type=RandomResize3D,
scale=(1248, 832),
ratio_range=(1., 1.),
keep_ratio=True)
]
test_pipeline = [
dict(
type=LoadMultiViewImageFromFiles,
to_float32=True,
backend_args=backend_args),
dict(type=MultiViewWrapper, transforms=test_transforms),
dict(type=Pack3DDetInputs, keys=['img'])
]
# construct a pipeline for data and gt loading in show function
# please keep its loading function consistent with test_pipeline (e.g. client)
eval_pipeline = [
dict(
type=LoadMultiViewImageFromFiles,
to_float32=True,
backend_args=backend_args),
dict(type=MultiViewWrapper, transforms=test_transforms),
dict(type=Pack3DDetInputs, keys=['img'])
]
metainfo = dict(classes=class_names)
train_dataloader = dict(
batch_size=2,
num_workers=2,
persistent_workers=True,
sampler=dict(type=DefaultSampler, shuffle=True),
dataset=dict(
type=WaymoDataset,
data_root=data_root,
ann_file='waymo_infos_train.pkl',
data_prefix=dict(
pts='training/velodyne',
CAM_FRONT='training/image_0',
CAM_FRONT_LEFT='training/image_1',
CAM_FRONT_RIGHT='training/image_2',
CAM_SIDE_LEFT='training/image_3',
CAM_SIDE_RIGHT='training/image_4'),
pipeline=train_pipeline,
modality=input_modality,
test_mode=False,
metainfo=metainfo,
box_type_3d='Lidar',
load_interval=5,
backend_args=backend_args))
val_dataloader = dict(
batch_size=1,
num_workers=1,
persistent_workers=True,
drop_last=False,
sampler=dict(type=DefaultSampler, shuffle=False),
dataset=dict(
type=WaymoDataset,
data_root=data_root,
ann_file='waymo_infos_val.pkl',
data_prefix=dict(
pts='training/velodyne',
CAM_FRONT='training/image_0',
CAM_FRONT_LEFT='training/image_1',
CAM_FRONT_RIGHT='training/image_2',
CAM_SIDE_LEFT='training/image_3',
CAM_SIDE_RIGHT='training/image_4'),
pipeline=eval_pipeline,
modality=input_modality,
test_mode=True,
metainfo=metainfo,
box_type_3d='Lidar',
backend_args=backend_args))
test_dataloader = dict(
batch_size=1,
num_workers=1,
persistent_workers=True,
drop_last=False,
sampler=dict(type=DefaultSampler, shuffle=False),
dataset=dict(
type=WaymoDataset,
data_root=data_root,
ann_file='waymo_infos_val.pkl',
data_prefix=dict(
pts='training/velodyne',
CAM_FRONT='training/image_0',
CAM_FRONT_LEFT='training/image_1',
CAM_FRONT_RIGHT='training/image_2',
CAM_SIDE_LEFT='training/image_3',
CAM_SIDE_RIGHT='training/image_4'),
pipeline=eval_pipeline,
modality=input_modality,
test_mode=True,
metainfo=metainfo,
box_type_3d='Lidar',
backend_args=backend_args))
val_evaluator = dict(
type=WaymoMetric,
ann_file='./data/waymo/kitti_format/waymo_infos_val.pkl',
waymo_bin_file='./data/waymo/waymo_format/cam_gt.bin',
data_root='./data/waymo/waymo_format',
metric='LET_mAP',
backend_args=backend_args)
test_evaluator = val_evaluator
# Copyright (c) OpenMMLab. All rights reserved.
from mmengine.dataset.sampler import DefaultSampler
from mmdet3d.datasets.transforms.formating import Pack3DDetInputs
from mmdet3d.datasets.transforms.loading import (LoadAnnotations3D,
LoadImageFromFileMono3D)
from mmdet3d.datasets.transforms.transforms_3d import (RandomFlip3D,
RandomResize3D)
from mmdet3d.datasets.waymo_dataset import WaymoDataset
from mmdet3d.evaluation.metrics.waymo_metric import WaymoMetric
# dataset settings
# D3 in the config name means the whole dataset is divided into 3 folds
# We only use one fold for efficient experiments
dataset_type = 'WaymoDataset'
data_root = 'data/waymo/kitti_format/'
class_names = ['Car', 'Pedestrian', 'Cyclist']
input_modality = dict(use_lidar=False, use_camera=True)
# Example to use different file client
# Method 1: simply set the data root and let the file I/O module
# automatically infer from prefix (not support LMDB and Memcache yet)
# data_root = 's3://openmmlab/datasets/detection3d/waymo/kitti_format/'
# Method 2: Use backend_args, file_client_args in versions before 1.1.0
# backend_args = dict(
# backend='petrel',
# path_mapping=dict({
# './data/': 's3://openmmlab/datasets/detection3d/',
# 'data/': 's3://openmmlab/datasets/detection3d/'
# }))
backend_args = None
train_pipeline = [
dict(type=LoadImageFromFileMono3D, backend_args=backend_args),
dict(
type=LoadAnnotations3D,
with_bbox=True,
with_label=True,
with_attr_label=False,
with_bbox_3d=True,
with_label_3d=True,
with_bbox_depth=True),
# base shape (1248, 832), scale (0.95, 1.05)
dict(
type=RandomResize3D,
scale=(1284, 832),
ratio_range=(0.95, 1.05),
keep_ratio=True,
),
dict(type=RandomFlip3D, flip_ratio_bev_horizontal=0.5),
dict(
type=Pack3DDetInputs,
keys=[
'img', 'gt_bboxes', 'gt_bboxes_labels', 'gt_bboxes_3d',
'gt_labels_3d', 'centers_2d', 'depths'
]),
]
test_pipeline = [
dict(type=LoadImageFromFileMono3D, backend_args=backend_args),
dict(
type=RandomResize3D,
scale=(1248, 832),
ratio_range=(1., 1.),
keep_ratio=True),
dict(type=Pack3DDetInputs, keys=['img']),
]
# construct a pipeline for data and gt loading in show function
# please keep its loading function consistent with test_pipeline (e.g. client)
eval_pipeline = [
dict(type=LoadImageFromFileMono3D, backend_args=backend_args),
dict(
type=RandomResize3D,
scale=(1248, 832),
ratio_range=(1., 1.),
keep_ratio=True),
dict(type=Pack3DDetInputs, keys=['img']),
]
metainfo = dict(classes=class_names)
train_dataloader = dict(
batch_size=3,
num_workers=3,
persistent_workers=True,
sampler=dict(type=DefaultSampler, shuffle=True),
dataset=dict(
type=WaymoDataset,
data_root=data_root,
ann_file='waymo_infos_train.pkl',
data_prefix=dict(
pts='training/velodyne',
CAM_FRONT='training/image_0',
CAM_FRONT_LEFT='training/image_1',
CAM_FRONT_RIGHT='training/image_2',
CAM_SIDE_LEFT='training/image_3',
CAM_SIDE_RIGHT='training/image_4'),
pipeline=train_pipeline,
modality=input_modality,
test_mode=False,
metainfo=metainfo,
# we use box_type_3d='LiDAR' in kitti and nuscenes dataset
# and box_type_3d='Depth' in sunrgbd and scannet dataset.
box_type_3d='Camera',
load_type='mv_image_based',
# load one frame every three frames
load_interval=5,
backend_args=backend_args))
val_dataloader = dict(
batch_size=1,
num_workers=1,
persistent_workers=True,
drop_last=False,
sampler=dict(type=DefaultSampler, shuffle=False),
dataset=dict(
type=WaymoDataset,
data_root=data_root,
data_prefix=dict(
pts='training/velodyne',
CAM_FRONT='training/image_0',
CAM_FRONT_LEFT='training/image_1',
CAM_FRONT_RIGHT='training/image_2',
CAM_SIDE_LEFT='training/image_3',
CAM_SIDE_RIGHT='training/image_4'),
ann_file='waymo_infos_val.pkl',
pipeline=eval_pipeline,
modality=input_modality,
test_mode=True,
metainfo=metainfo,
# we use box_type_3d='LiDAR' in kitti and nuscenes dataset
# and box_type_3d='Depth' in sunrgbd and scannet dataset.
box_type_3d='Camera',
load_type='mv_image_based',
backend_args=backend_args))
test_dataloader = dict(
batch_size=1,
num_workers=1,
persistent_workers=True,
drop_last=False,
sampler=dict(type=DefaultSampler, shuffle=False),
dataset=dict(
type=WaymoDataset,
data_root=data_root,
data_prefix=dict(
pts='training/velodyne',
CAM_FRONT='training/image_0',
CAM_FRONT_LEFT='training/image_1',
CAM_FRONT_RIGHT='training/image_2',
CAM_SIDE_LEFT='training/image_3',
CAM_SIDE_RIGHT='training/image_4'),
ann_file='waymo_infos_val.pkl',
pipeline=eval_pipeline,
modality=input_modality,
test_mode=True,
metainfo=metainfo,
# we use box_type_3d='LiDAR' in kitti and nuscenes dataset
# and box_type_3d='Depth' in sunrgbd and scannet dataset.
box_type_3d='Camera',
load_type='mv_image_based',
backend_args=backend_args))
val_evaluator = dict(
type=WaymoMetric,
ann_file='./data/waymo/kitti_format/waymo_infos_val.pkl',
waymo_bin_file='./data/waymo/waymo_format/cam_gt.bin',
data_root='./data/waymo/waymo_format',
metric='LET_mAP',
load_type='mv_image_based',
backend_args=backend_args)
test_evaluator = val_evaluator
# Copyright (c) OpenMMLab. All rights reserved.
from mmengine.hooks.checkpoint_hook import CheckpointHook
from mmengine.hooks.iter_timer_hook import IterTimerHook
from mmengine.hooks.logger_hook import LoggerHook
from mmengine.hooks.param_scheduler_hook import ParamSchedulerHook
from mmengine.hooks.sampler_seed_hook import DistSamplerSeedHook
from mmengine.runner.log_processor import LogProcessor
from mmdet3d.engine.hooks.visualization_hook import Det3DVisualizationHook
default_scope = 'mmdet3d'
default_hooks = dict(
timer=dict(type=IterTimerHook),
logger=dict(type=LoggerHook, interval=50),
param_scheduler=dict(type=ParamSchedulerHook),
checkpoint=dict(type=CheckpointHook, interval=-1),
sampler_seed=dict(type=DistSamplerSeedHook),
visualization=dict(type=Det3DVisualizationHook))
env_cfg = dict(
cudnn_benchmark=False,
mp_cfg=dict(mp_start_method='fork', opencv_num_threads=0),
dist_cfg=dict(backend='nccl'),
)
log_processor = dict(type=LogProcessor, window_size=50, by_epoch=True)
log_level = 'INFO'
load_from = None
resume = False
# TODO: support auto scaling lr
# Copyright (c) OpenMMLab. All rights reserved.
from torch.nn.modules.conv import Conv2d
from mmdet3d.models.backbones.second import SECOND
from mmdet3d.models.data_preprocessors.data_preprocessor import \
Det3DDataPreprocessor
from mmdet3d.models.dense_heads.centerpoint_head import (CenterHead,
SeparateHead)
from mmdet3d.models.detectors.centerpoint import CenterPoint
from mmdet3d.models.middle_encoders.pillar_scatter import PointPillarsScatter
from mmdet3d.models.necks.second_fpn import SECONDFPN
from mmdet3d.models.task_modules.coders.centerpoint_bbox_coders import \
CenterPointBBoxCoder
from mmdet3d.models.voxel_encoders.pillar_encoder import PillarFeatureNet
voxel_size = [0.2, 0.2, 8]
model = dict(
type=CenterPoint,
data_preprocessor=dict(
type=Det3DDataPreprocessor,
voxel=True,
voxel_layer=dict(
max_num_points=20,
voxel_size=voxel_size,
max_voxels=(30000, 40000))),
pts_voxel_encoder=dict(
type=PillarFeatureNet,
in_channels=5,
feat_channels=[64],
with_distance=False,
voxel_size=(0.2, 0.2, 8),
norm_cfg=dict(type='BN1d', eps=1e-3, momentum=0.01),
legacy=False),
pts_middle_encoder=dict(
type=PointPillarsScatter, in_channels=64, output_shape=(512, 512)),
pts_backbone=dict(
type=SECOND,
in_channels=64,
out_channels=[64, 128, 256],
layer_nums=[3, 5, 5],
layer_strides=[2, 2, 2],
norm_cfg=dict(type='BN', eps=1e-3, momentum=0.01),
conv_cfg=dict(type=Conv2d, bias=False)),
pts_neck=dict(
type=SECONDFPN,
in_channels=[64, 128, 256],
out_channels=[128, 128, 128],
upsample_strides=[0.5, 1, 2],
norm_cfg=dict(type='BN', eps=1e-3, momentum=0.01),
upsample_cfg=dict(type='deconv', bias=False),
use_conv_for_no_stride=True),
pts_bbox_head=dict(
type=CenterHead,
in_channels=sum([128, 128, 128]),
tasks=[
dict(num_class=1, class_names=['car']),
dict(num_class=2, class_names=['truck', 'construction_vehicle']),
dict(num_class=2, class_names=['bus', 'trailer']),
dict(num_class=1, class_names=['barrier']),
dict(num_class=2, class_names=['motorcycle', 'bicycle']),
dict(num_class=2, class_names=['pedestrian', 'traffic_cone']),
],
common_heads=dict(
reg=(2, 2), height=(1, 2), dim=(3, 2), rot=(2, 2), vel=(2, 2)),
share_conv_channel=64,
bbox_coder=dict(
type=CenterPointBBoxCoder,
post_center_range=[-61.2, -61.2, -10.0, 61.2, 61.2, 10.0],
max_num=500,
score_threshold=0.1,
out_size_factor=4,
voxel_size=voxel_size[:2],
code_size=9),
separate_head=dict(type=SeparateHead, init_bias=-2.19, final_kernel=3),
loss_cls=dict(type='mmdet.GaussianFocalLoss', reduction='mean'),
loss_bbox=dict(
type='mmdet.L1Loss', reduction='mean', loss_weight=0.25),
norm_bbox=True),
# model training and testing settings
train_cfg=dict(
pts=dict(
grid_size=[512, 512, 1],
voxel_size=voxel_size,
out_size_factor=4,
dense_reg=1,
gaussian_overlap=0.1,
max_objs=500,
min_radius=2,
code_weights=[1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 0.2, 0.2])),
test_cfg=dict(
pts=dict(
post_center_limit_range=[-61.2, -61.2, -10.0, 61.2, 61.2, 10.0],
max_per_img=500,
max_pool_nms=False,
min_radius=[4, 12, 10, 1, 0.85, 0.175],
score_threshold=0.1,
pc_range=[-51.2, -51.2],
out_size_factor=4,
voxel_size=voxel_size[:2],
nms_type='rotate',
pre_max_size=1000,
post_max_size=83,
nms_thr=0.2)))
# Copyright (c) OpenMMLab. All rights reserved.
from torch.nn.modules.conv import Conv2d
from mmdet3d.models.backbones.second import SECOND
from mmdet3d.models.data_preprocessors.data_preprocessor import \
Det3DDataPreprocessor
from mmdet3d.models.dense_heads.centerpoint_head import (CenterHead,
SeparateHead)
from mmdet3d.models.detectors.centerpoint import CenterPoint
from mmdet3d.models.middle_encoders.sparse_encoder import SparseEncoder
from mmdet3d.models.necks.second_fpn import SECONDFPN
from mmdet3d.models.task_modules.coders.centerpoint_bbox_coders import \
CenterPointBBoxCoder
from mmdet3d.models.voxel_encoders.voxel_encoder import HardSimpleVFE
voxel_size = [0.1, 0.1, 0.2]
model = dict(
type=CenterPoint,
data_preprocessor=dict(
type=Det3DDataPreprocessor,
voxel=True,
voxel_layer=dict(
max_num_points=10,
voxel_size=voxel_size,
max_voxels=(90000, 120000))),
pts_voxel_encoder=dict(type=HardSimpleVFE, num_features=5),
pts_middle_encoder=dict(
type=SparseEncoder,
in_channels=5,
sparse_shape=[41, 1024, 1024],
output_channels=128,
order=('conv', 'norm', 'act'),
encoder_channels=((16, 16, 32), (32, 32, 64), (64, 64, 128), (128,
128)),
encoder_paddings=((0, 0, 1), (0, 0, 1), (0, 0, [0, 1, 1]), (0, 0)),
block_type='basicblock'),
pts_backbone=dict(
type=SECOND,
in_channels=256,
out_channels=[128, 256],
layer_nums=[5, 5],
layer_strides=[1, 2],
norm_cfg=dict(type='BN', eps=1e-3, momentum=0.01),
conv_cfg=dict(type=Conv2d, bias=False)),
pts_neck=dict(
type=SECONDFPN,
in_channels=[128, 256],
out_channels=[256, 256],
upsample_strides=[1, 2],
norm_cfg=dict(type='BN', eps=1e-3, momentum=0.01),
upsample_cfg=dict(type='deconv', bias=False),
use_conv_for_no_stride=True),
pts_bbox_head=dict(
type=CenterHead,
in_channels=sum([256, 256]),
tasks=[
dict(num_class=1, class_names=['car']),
dict(num_class=2, class_names=['truck', 'construction_vehicle']),
dict(num_class=2, class_names=['bus', 'trailer']),
dict(num_class=1, class_names=['barrier']),
dict(num_class=2, class_names=['motorcycle', 'bicycle']),
dict(num_class=2, class_names=['pedestrian', 'traffic_cone']),
],
common_heads=dict(
reg=(2, 2), height=(1, 2), dim=(3, 2), rot=(2, 2), vel=(2, 2)),
share_conv_channel=64,
bbox_coder=dict(
type=CenterPointBBoxCoder,
post_center_range=[-61.2, -61.2, -10.0, 61.2, 61.2, 10.0],
max_num=500,
score_threshold=0.1,
out_size_factor=8,
voxel_size=voxel_size[:2],
code_size=9),
separate_head=dict(type=SeparateHead, init_bias=-2.19, final_kernel=3),
loss_cls=dict(type='mmdet.GaussianFocalLoss', reduction='mean'),
loss_bbox=dict(
type='mmdet.L1Loss', reduction='mean', loss_weight=0.25),
norm_bbox=True),
# model training and testing settings
train_cfg=dict(
pts=dict(
grid_size=[1024, 1024, 40],
voxel_size=voxel_size,
out_size_factor=8,
dense_reg=1,
gaussian_overlap=0.1,
max_objs=500,
min_radius=2,
code_weights=[1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 0.2, 0.2])),
test_cfg=dict(
pts=dict(
post_center_limit_range=[-61.2, -61.2, -10.0, 61.2, 61.2, 10.0],
max_per_img=500,
max_pool_nms=False,
min_radius=[4, 12, 10, 1, 0.85, 0.175],
score_threshold=0.1,
out_size_factor=8,
voxel_size=voxel_size[:2],
nms_type='rotate',
pre_max_size=1000,
post_max_size=83,
nms_thr=0.2)))
# Copyright (c) OpenMMLab. All rights reserved.
from mmdet3d.models import Cylinder3D
from mmdet3d.models.backbones import Asymm3DSpconv
from mmdet3d.models.data_preprocessors import Det3DDataPreprocessor
from mmdet3d.models.decode_heads.cylinder3d_head import Cylinder3DHead
from mmdet3d.models.losses import LovaszLoss
from mmdet3d.models.voxel_encoders import SegVFE
grid_shape = [480, 360, 32]
model = dict(
type=Cylinder3D,
data_preprocessor=dict(
type=Det3DDataPreprocessor,
voxel=True,
voxel_type='cylindrical',
voxel_layer=dict(
grid_shape=grid_shape,
point_cloud_range=[0, -3.14159265359, -4, 50, 3.14159265359, 2],
max_num_points=-1,
max_voxels=-1,
),
),
voxel_encoder=dict(
type=SegVFE,
feat_channels=[64, 128, 256, 256],
in_channels=6,
with_voxel_center=True,
feat_compression=16,
return_point_feats=False),
backbone=dict(
type=Asymm3DSpconv,
grid_size=grid_shape,
input_channels=16,
base_channels=32,
norm_cfg=dict(type='BN1d', eps=1e-5, momentum=0.1)),
decode_head=dict(
type=Cylinder3DHead,
channels=128,
num_classes=20,
loss_ce=dict(
type='mmdet.CrossEntropyLoss',
use_sigmoid=False,
class_weight=None,
loss_weight=1.0),
loss_lovasz=dict(type=LovaszLoss, loss_weight=1.0, reduction='none'),
),
train_cfg=None,
test_cfg=dict(mode='whole'),
)
# Copyright (c) OpenMMLab. All rights reserved.
from mmdet3d.models.data_preprocessors.data_preprocessor import \
Det3DDataPreprocessor
from mmdet3d.models.dense_heads.fcos_mono3d_head import FCOSMono3DHead
from mmdet3d.models.detectors.fcos_mono3d import FCOSMono3D
from mmdet3d.models.task_modules.coders.fcos3d_bbox_coder import \
FCOS3DBBoxCoder
# model settings
model = dict(
type=FCOSMono3D,
data_preprocessor=dict(
type=Det3DDataPreprocessor,
mean=[123.675, 116.28, 103.53],
std=[58.395, 57.12, 57.375],
bgr_to_rgb=True,
pad_size_divisor=32),
backbone=dict(
type='mmdet.ResNet',
depth=101,
num_stages=4,
out_indices=(0, 1, 2, 3),
frozen_stages=1,
norm_cfg=dict(type='BN', requires_grad=False),
norm_eval=True,
style='caffe',
init_cfg=dict(
type='Pretrained',
checkpoint='open-mmlab://detectron2/resnet101_caffe')),
neck=dict(
type='mmdet.FPN',
in_channels=[256, 512, 1024, 2048],
out_channels=256,
start_level=1,
add_extra_convs='on_output',
num_outs=5,
relu_before_extra_convs=True),
bbox_head=dict(
type=FCOSMono3DHead,
num_classes=10,
in_channels=256,
stacked_convs=2,
feat_channels=256,
use_direction_classifier=True,
diff_rad_by_sin=True,
pred_attrs=True,
pred_velo=True,
dir_offset=0.7854, # pi/4
dir_limit_offset=0,
strides=[8, 16, 32, 64, 128],
group_reg_dims=(2, 1, 3, 1, 2), # offset, depth, size, rot, velo
cls_branch=(256, ),
reg_branch=(
(256, ), # offset
(256, ), # depth
(256, ), # size
(256, ), # rot
() # velo
),
dir_branch=(256, ),
attr_branch=(256, ),
loss_cls=dict(
type='mmdet.FocalLoss',
use_sigmoid=True,
gamma=2.0,
alpha=0.25,
loss_weight=1.0),
loss_bbox=dict(
type='mmdet.SmoothL1Loss', beta=1.0 / 9.0, loss_weight=1.0),
loss_dir=dict(
type='mmdet.CrossEntropyLoss', use_sigmoid=False, loss_weight=1.0),
loss_attr=dict(
type='mmdet.CrossEntropyLoss', use_sigmoid=False, loss_weight=1.0),
loss_centerness=dict(
type='mmdet.CrossEntropyLoss', use_sigmoid=True, loss_weight=1.0),
bbox_coder=dict(type=FCOS3DBBoxCoder, code_size=9),
norm_on_bbox=True,
centerness_on_reg=True,
center_sampling=True,
conv_bias=True,
dcn_on_last_conv=True),
train_cfg=dict(
allowed_border=0,
code_weight=[1.0, 1.0, 0.2, 1.0, 1.0, 1.0, 1.0, 0.05, 0.05],
pos_weight=-1,
debug=False),
test_cfg=dict(
use_rotate_nms=True,
nms_across_levels=False,
nms_pre=1000,
nms_thr=0.8,
score_thr=0.05,
min_bbox_size=0,
max_per_img=200))
# Copyright (c) OpenMMLab. All rights reserved.
from mmdet3d.models.backbones.minkunet_backbone import MinkUNetBackbone
from mmdet3d.models.data_preprocessors.data_preprocessor import \
Det3DDataPreprocessor
from mmdet3d.models.decode_heads.minkunet_head import MinkUNetHead
from mmdet3d.models.segmentors.minkunet import MinkUNet
model = dict(
type=MinkUNet,
data_preprocessor=dict(
type=Det3DDataPreprocessor,
voxel=True,
voxel_type='minkunet',
batch_first=False,
max_voxels=80000,
voxel_layer=dict(
max_num_points=-1,
point_cloud_range=[-100, -100, -20, 100, 100, 20],
voxel_size=[0.05, 0.05, 0.05],
max_voxels=(-1, -1))),
backbone=dict(
type=MinkUNetBackbone,
in_channels=4,
num_stages=4,
base_channels=32,
encoder_channels=[32, 64, 128, 256],
encoder_blocks=[2, 2, 2, 2],
decoder_channels=[256, 128, 96, 96],
decoder_blocks=[2, 2, 2, 2],
block_type='basic',
sparseconv_backend='torchsparse'),
decode_head=dict(
type=MinkUNetHead,
channels=96,
num_classes=19,
dropout_ratio=0,
loss_decode=dict(type='mmdet.CrossEntropyLoss', avg_non_ignore=True),
ignore_index=19),
train_cfg=dict(),
test_cfg=dict())
# Copyright (c) OpenMMLab. All rights reserved.
if '_base_':
from .fcos3d import *
from mmdet3d.models.dense_heads.pgd_head import PGDHead
from mmdet3d.models.task_modules.coders.pgd_bbox_coder import PGDBBoxCoder
# model settings
model.merge(
dict(
bbox_head=dict(
_delete_=True,
type=PGDHead,
num_classes=10,
in_channels=256,
stacked_convs=2,
feat_channels=256,
use_direction_classifier=True,
diff_rad_by_sin=True,
pred_attrs=True,
pred_velo=True,
pred_bbox2d=True,
pred_keypoints=False,
dir_offset=0.7854, # pi/4
strides=[8, 16, 32, 64, 128],
group_reg_dims=(2, 1, 3, 1, 2), # offset, depth, size, rot, velo
cls_branch=(256, ),
reg_branch=(
(256, ), # offset
(256, ), # depth
(256, ), # size
(256, ), # rot
() # velo
),
dir_branch=(256, ),
attr_branch=(256, ),
loss_cls=dict(
type='mmdet.FocalLoss',
use_sigmoid=True,
gamma=2.0,
alpha=0.25,
loss_weight=1.0),
loss_bbox=dict(
type='mmdet.SmoothL1Loss', beta=1.0 / 9.0, loss_weight=1.0),
loss_dir=dict(
type='mmdet.CrossEntropyLoss',
use_sigmoid=False,
loss_weight=1.0),
loss_attr=dict(
type='mmdet.CrossEntropyLoss',
use_sigmoid=False,
loss_weight=1.0),
loss_centerness=dict(
type='mmdet.CrossEntropyLoss',
use_sigmoid=True,
loss_weight=1.0),
norm_on_bbox=True,
centerness_on_reg=True,
center_sampling=True,
conv_bias=True,
dcn_on_last_conv=True,
use_depth_classifier=True,
depth_branch=(256, ),
depth_range=(0, 50),
depth_unit=10,
division='uniform',
depth_bins=6,
bbox_coder=dict(type=PGDBBoxCoder, code_size=9)),
test_cfg=dict(
nms_pre=1000, nms_thr=0.8, score_thr=0.01, max_per_img=200)))
# Copyright (c) OpenMMLab. All rights reserved.
from torch.nn.modules.conv import Conv1d
from mmdet3d.models.backbones.pointnet2_sa_ssg import PointNet2SASSG
from mmdet3d.models.data_preprocessors.data_preprocessor import \
Det3DDataPreprocessor
from mmdet3d.models.dense_heads.vote_head import VoteHead
from mmdet3d.models.detectors.votenet import VoteNet
from mmdet3d.models.losses.chamfer_distance import ChamferDistance
model = dict(
type=VoteNet,
data_preprocessor=dict(type=Det3DDataPreprocessor),
backbone=dict(
type=PointNet2SASSG,
in_channels=4,
num_points=(2048, 1024, 512, 256),
radius=(0.2, 0.4, 0.8, 1.2),
num_samples=(64, 32, 16, 16),
sa_channels=((64, 64, 128), (128, 128, 256), (128, 128, 256),
(128, 128, 256)),
fp_channels=((256, 256), (256, 256)),
norm_cfg=dict(type='BN2d'),
sa_cfg=dict(
type='PointSAModule',
pool_mod='max',
use_xyz=True,
normalize_xyz=True)),
bbox_head=dict(
type=VoteHead,
vote_module_cfg=dict(
in_channels=256,
vote_per_seed=1,
gt_per_seed=3,
conv_channels=(256, 256),
conv_cfg=dict(type=Conv1d),
norm_cfg=dict(type='BN1d'),
norm_feats=True,
vote_loss=dict(
type=ChamferDistance,
mode='l1',
reduction='none',
loss_dst_weight=10.0)),
vote_aggregation_cfg=dict(
type='PointSAModule',
num_point=256,
radius=0.3,
num_sample=16,
mlp_channels=[256, 128, 128, 128],
use_xyz=True,
normalize_xyz=True),
pred_layer_cfg=dict(
in_channels=128, shared_conv_channels=(128, 128), bias=True),
objectness_loss=dict(
type='mmdet.CrossEntropyLoss',
class_weight=[0.2, 0.8],
reduction='sum',
loss_weight=5.0),
center_loss=dict(
type=ChamferDistance,
mode='l2',
reduction='sum',
loss_src_weight=10.0,
loss_dst_weight=10.0),
dir_class_loss=dict(
type='mmdet.CrossEntropyLoss', reduction='sum', loss_weight=1.0),
dir_res_loss=dict(
type='mmdet.SmoothL1Loss', reduction='sum', loss_weight=10.0),
size_class_loss=dict(
type='mmdet.CrossEntropyLoss', reduction='sum', loss_weight=1.0),
size_res_loss=dict(
type='mmdet.SmoothL1Loss', reduction='sum',
loss_weight=10.0 / 3.0),
semantic_loss=dict(
type='mmdet.CrossEntropyLoss', reduction='sum', loss_weight=1.0)),
# model training and testing settings
train_cfg=dict(
pos_distance_thr=0.3, neg_distance_thr=0.6, sample_mode='vote'),
test_cfg=dict(
sample_mode='seed',
nms_thr=0.25,
score_thr=0.05,
per_class_proposal=True))
# Copyright (c) OpenMMLab. All rights reserved.
from mmengine.optim.optimizer.optimizer_wrapper import OptimWrapper
from mmengine.optim.scheduler.lr_scheduler import CosineAnnealingLR, LinearLR
from mmengine.runner.loops import EpochBasedTrainLoop, TestLoop, ValLoop
from torch.optim.adamw import AdamW
# This schedule is mainly used by models with dynamic voxelization
# optimizer
lr = 0.003 # max learning rate
optim_wrapper = dict(
type=OptimWrapper,
optimizer=dict(type=AdamW, lr=lr, weight_decay=0.001, betas=(0.95, 0.99)),
clip_grad=dict(max_norm=10, norm_type=2),
)
param_scheduler = [
dict(type=LinearLR, start_factor=0.1, by_epoch=False, begin=0, end=1000),
dict(
type=CosineAnnealingLR,
begin=0,
T_max=40,
end=40,
by_epoch=True,
eta_min=1e-5)
]
# training schedule for 1x
train_cfg = dict(type=EpochBasedTrainLoop, max_epochs=40, val_interval=1)
val_cfg = dict(type=ValLoop)
test_cfg = dict(type=TestLoop)
# Default setting for scaling LR automatically
# - `enable` means enable scaling LR automatically
# or not by default.
# - `base_batch_size` = (8 GPUs) x (2 samples per GPU).
auto_scale_lr = dict(enable=False, base_batch_size=16)
# Copyright (c) OpenMMLab. All rights reserved.
from mmengine.optim.optimizer.optimizer_wrapper import OptimWrapper
from mmengine.optim.scheduler.lr_scheduler import CosineAnnealingLR
from mmengine.optim.scheduler.momentum_scheduler import CosineAnnealingMomentum
from torch.optim.adamw import AdamW
# For nuScenes dataset, we usually evaluate the model at the end of training.
# Since the models are trained by 24 epochs by default, we set evaluation
# interval to be 20. Please change the interval accordingly if you do not
# use a default schedule.
# optimizer
lr = 1e-4
# This schedule is mainly used by models on nuScenes dataset
# max_norm=10 is better for SECOND
optim_wrapper = dict(
type=OptimWrapper,
optimizer=dict(type=AdamW, lr=lr, weight_decay=0.01),
clip_grad=dict(max_norm=35, norm_type=2))
# learning rate
param_scheduler = [
# learning rate scheduler
# During the first 8 epochs, learning rate increases from 0 to lr * 10
# during the next 12 epochs, learning rate decreases from lr * 10 to
# lr * 1e-4
dict(
type=CosineAnnealingLR,
T_max=8,
eta_min=lr * 10,
begin=0,
end=8,
by_epoch=True,
convert_to_iter_based=True),
dict(
type=CosineAnnealingLR,
T_max=12,
eta_min=lr * 1e-4,
begin=8,
end=20,
by_epoch=True,
convert_to_iter_based=True),
# momentum scheduler
# During the first 8 epochs, momentum increases from 0 to 0.85 / 0.95
# during the next 12 epochs, momentum increases from 0.85 / 0.95 to 1
dict(
type=CosineAnnealingMomentum,
T_max=8,
eta_min=0.85 / 0.95,
begin=0,
end=8,
by_epoch=True,
convert_to_iter_based=True),
dict(
type=CosineAnnealingMomentum,
T_max=12,
eta_min=1,
begin=8,
end=20,
by_epoch=True,
convert_to_iter_based=True)
]
# runtime settings
train_cfg = dict(by_epoch=True, max_epochs=20, val_interval=20)
val_cfg = dict()
test_cfg = dict()
# Default setting for scaling LR automatically
# - `enable` means enable scaling LR automatically
# or not by default.
# - `base_batch_size` = (8 GPUs) x (4 samples per GPU).
auto_scale_lr = dict(enable=False, base_batch_size=32)
# Copyright (c) OpenMMLab. All rights reserved.
from mmengine.optim.optimizer.optimizer_wrapper import OptimWrapper
from mmengine.optim.scheduler.lr_scheduler import CosineAnnealingLR
from mmengine.optim.scheduler.momentum_scheduler import CosineAnnealingMomentum
from torch.optim.adamw import AdamW
# The schedule is usually used by models trained on KITTI dataset
# The learning rate set in the cyclic schedule is the initial learning rate
# rather than the max learning rate. Since the target_ratio is (10, 1e-4),
# the learning rate will change from 0.0018 to 0.018, than go to 0.0018*1e-4
lr = 0.0018
# The optimizer follows the setting in SECOND.Pytorch, but here we use
# the official AdamW optimizer implemented by PyTorch.
optim_wrapper = dict(
type=OptimWrapper,
optimizer=dict(type=AdamW, lr=lr, betas=(0.95, 0.99), weight_decay=0.01),
clip_grad=dict(max_norm=10, norm_type=2))
# learning rate
param_scheduler = [
# learning rate scheduler
# During the first 16 epochs, learning rate increases from 0 to lr * 10
# during the next 24 epochs, learning rate decreases from lr * 10 to
# lr * 1e-4
dict(
type=CosineAnnealingLR,
T_max=16,
eta_min=lr * 10,
begin=0,
end=16,
by_epoch=True,
convert_to_iter_based=True),
dict(
type=CosineAnnealingLR,
T_max=24,
eta_min=lr * 1e-4,
begin=16,
end=40,
by_epoch=True,
convert_to_iter_based=True),
# momentum scheduler
# During the first 16 epochs, momentum increases from 0 to 0.85 / 0.95
# during the next 24 epochs, momentum increases from 0.85 / 0.95 to 1
dict(
type=CosineAnnealingMomentum,
T_max=16,
eta_min=0.85 / 0.95,
begin=0,
end=16,
by_epoch=True,
convert_to_iter_based=True),
dict(
type=CosineAnnealingMomentum,
T_max=24,
eta_min=1,
begin=16,
end=40,
by_epoch=True,
convert_to_iter_based=True)
]
# Runtime settings,training schedule for 40e
# Although the max_epochs is 40, this schedule is usually used we
# RepeatDataset with repeat ratio N, thus the actual max epoch
# number could be Nx40
train_cfg = dict(by_epoch=True, max_epochs=40, val_interval=1)
val_cfg = dict()
test_cfg = dict()
# Default setting for scaling LR automatically
# - `enable` means enable scaling LR automatically
# or not by default.
# - `base_batch_size` = (8 GPUs) x (6 samples per GPU).
auto_scale_lr = dict(enable=False, base_batch_size=48)
Markdown is supported
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment