Commit aec41c7f authored by zhangwenwei's avatar zhangwenwei
Browse files

Merge master

parents 49f06039 4eca6606
...@@ -3,6 +3,6 @@ line_length = 79 ...@@ -3,6 +3,6 @@ line_length = 79
multi_line_output = 0 multi_line_output = 0
known_standard_library = setuptools known_standard_library = setuptools
known_first_party = mmdet,mmdet3d known_first_party = mmdet,mmdet3d
known_third_party = cv2,mmcv,numba,numpy,nuscenes,pycocotools,pyquaternion,pytest,shapely,six,skimage,torch,torchvision known_third_party = cv2,mmcv,numba,numpy,nuscenes,pycocotools,pyquaternion,pytest,scipy,shapely,six,skimage,torch,torchvision
no_lines_before = STDLIB,LOCALFOLDER no_lines_before = STDLIB,LOCALFOLDER
default_section = THIRDPARTY default_section = THIRDPARTY
...@@ -72,18 +72,21 @@ model = dict( ...@@ -72,18 +72,21 @@ model = dict(
feat_channels=512, feat_channels=512,
use_direction_classifier=True, use_direction_classifier=True,
encode_bg_as_zeros=True, encode_bg_as_zeros=True,
anchor_range=[ anchor_generator=dict(
[0, -40.0, -0.6, 70.4, 40.0, -0.6], type='Anchor3DRangeGenerator',
[0, -40.0, -0.6, 70.4, 40.0, -0.6], ranges=[
[0, -40.0, -1.78, 70.4, 40.0, -1.78], [0, -40.0, -0.6, 70.4, 40.0, -0.6],
], [0, -40.0, -0.6, 70.4, 40.0, -0.6],
[0, -40.0, -1.78, 70.4, 40.0, -1.78],
],
strides=[2],
sizes=[[0.6, 0.8, 1.73], [0.6, 1.76, 1.73], [1.6, 3.9, 1.56]],
rotations=[0, 1.57],
reshape_out=False),
assigner_per_size=True, assigner_per_size=True,
anchor_strides=[2],
anchor_sizes=[[0.6, 0.8, 1.73], [0.6, 1.76, 1.73], [1.6, 3.9, 1.56]],
anchor_rotations=[0, 1.57],
diff_rad_by_sin=True, diff_rad_by_sin=True,
assign_per_class=True, assign_per_class=True,
bbox_coder=dict(type='Residual3DBoxCoder', ), bbox_coder=dict(type='DeltaXYZWLHRBBoxCoder', ),
loss_cls=dict( loss_cls=dict(
type='FocalLoss', type='FocalLoss',
use_sigmoid=True, use_sigmoid=True,
......
...@@ -43,12 +43,15 @@ model = dict( ...@@ -43,12 +43,15 @@ model = dict(
feat_channels=384, feat_channels=384,
use_direction_classifier=True, use_direction_classifier=True,
encode_bg_as_zeros=True, encode_bg_as_zeros=True,
anchor_range=[0, -39.68, -1.78, 69.12, 39.68, -1.78], anchor_generator=dict(
anchor_strides=[2], type='Anchor3DRangeGenerator',
anchor_sizes=[[1.6, 3.9, 1.56]], ranges=[[0, -39.68, -1.78, 69.12, 39.68, -1.78]],
anchor_rotations=[0, 1.57], strides=[2],
sizes=[[1.6, 3.9, 1.56]],
rotations=[0, 1.57],
reshape_out=True),
diff_rad_by_sin=True, diff_rad_by_sin=True,
bbox_coder=dict(type='Residual3DBoxCoder', ), bbox_coder=dict(type='DeltaXYZWLHRBBoxCoder', ),
loss_cls=dict( loss_cls=dict(
type='FocalLoss', type='FocalLoss',
use_sigmoid=True, use_sigmoid=True,
...@@ -116,7 +119,7 @@ train_pipeline = [ ...@@ -116,7 +119,7 @@ train_pipeline = [
loc_noise_std=[0.25, 0.25, 0.25], loc_noise_std=[0.25, 0.25, 0.25],
global_rot_range=[0.0, 0.0], global_rot_range=[0.0, 0.0],
rot_uniform_noise=[-0.15707963267, 0.15707963267]), rot_uniform_noise=[-0.15707963267, 0.15707963267]),
dict(type='PointsRandomFlip', flip_ratio=0.5), dict(type='RandomFlip3D', flip_ratio=0.5),
dict( dict(
type='GlobalRotScale', type='GlobalRotScale',
rot_uniform_noise=[-0.78539816, 0.78539816], rot_uniform_noise=[-0.78539816, 0.78539816],
...@@ -125,7 +128,7 @@ train_pipeline = [ ...@@ -125,7 +128,7 @@ train_pipeline = [
dict(type='ObjectRangeFilter', point_cloud_range=point_cloud_range), dict(type='ObjectRangeFilter', point_cloud_range=point_cloud_range),
dict(type='PointShuffle'), dict(type='PointShuffle'),
dict(type='DefaultFormatBundle3D', class_names=class_names), dict(type='DefaultFormatBundle3D', class_names=class_names),
dict(type='Collect3D', keys=['points', 'gt_bboxes', 'gt_labels']), dict(type='Collect3D', keys=['points', 'gt_bboxes_3d', 'gt_labels_3d']),
] ]
test_pipeline = [ test_pipeline = [
dict(type='PointsRangeFilter', point_cloud_range=point_cloud_range), dict(type='PointsRangeFilter', point_cloud_range=point_cloud_range),
...@@ -133,7 +136,7 @@ test_pipeline = [ ...@@ -133,7 +136,7 @@ test_pipeline = [
type='DefaultFormatBundle3D', type='DefaultFormatBundle3D',
class_names=class_names, class_names=class_names,
with_label=False), with_label=False),
dict(type='Collect3D', keys=['points', 'gt_bboxes']), dict(type='Collect3D', keys=['points', 'gt_bboxes_3d']),
] ]
data = dict( data = dict(
...@@ -174,13 +177,13 @@ optimizer_config = dict(grad_clip=dict(max_norm=35, norm_type=2)) ...@@ -174,13 +177,13 @@ optimizer_config = dict(grad_clip=dict(max_norm=35, norm_type=2))
# learning policy # learning policy
lr_config = dict( lr_config = dict(
policy='cyclic', policy='cyclic',
target_ratio=[10, 1e-4], target_ratio=(10, 1e-4),
cyclic_times=1, cyclic_times=1,
step_ratio_up=0.4, step_ratio_up=0.4,
) )
momentum_config = dict( momentum_config = dict(
policy='cyclic', policy='cyclic',
target_ratio=[0.85 / 0.95, 1], target_ratio=(0.85 / 0.95, 1),
cyclic_times=1, cyclic_times=1,
step_ratio_up=0.4, step_ratio_up=0.4,
) )
......
...@@ -41,18 +41,21 @@ model = dict( ...@@ -41,18 +41,21 @@ model = dict(
feat_channels=512, feat_channels=512,
use_direction_classifier=True, use_direction_classifier=True,
encode_bg_as_zeros=True, encode_bg_as_zeros=True,
anchor_range=[ anchor_generator=dict(
[0, -40.0, -0.6, 70.4, 40.0, -0.6], type='Anchor3DRangeGenerator',
[0, -40.0, -0.6, 70.4, 40.0, -0.6], ranges=[
[0, -40.0, -1.78, 70.4, 40.0, -1.78], [0, -40.0, -0.6, 70.4, 40.0, -0.6],
], [0, -40.0, -0.6, 70.4, 40.0, -0.6],
anchor_strides=[2], [0, -40.0, -1.78, 70.4, 40.0, -1.78],
anchor_sizes=[[0.6, 0.8, 1.73], [0.6, 1.76, 1.73], [1.6, 3.9, 1.56]], ],
anchor_rotations=[0, 1.57], strides=[2],
sizes=[[0.6, 0.8, 1.73], [0.6, 1.76, 1.73], [1.6, 3.9, 1.56]],
rotations=[0, 1.57],
reshape_out=False),
diff_rad_by_sin=True, diff_rad_by_sin=True,
assigner_per_size=True, assigner_per_size=True,
assign_per_class=True, assign_per_class=True,
bbox_coder=dict(type='Residual3DBoxCoder', ), bbox_coder=dict(type='DeltaXYZWLHRBBoxCoder', ),
loss_cls=dict( loss_cls=dict(
type='FocalLoss', type='FocalLoss',
use_sigmoid=True, use_sigmoid=True,
......
...@@ -41,12 +41,15 @@ model = dict( ...@@ -41,12 +41,15 @@ model = dict(
feat_channels=512, feat_channels=512,
use_direction_classifier=True, use_direction_classifier=True,
encode_bg_as_zeros=True, encode_bg_as_zeros=True,
anchor_range=[0, -40.0, -1.78, 70.4, 40.0, -1.78], anchor_generator=dict(
anchor_strides=[2], type='Anchor3DRangeGenerator',
anchor_sizes=[[1.6, 3.9, 1.56]], ranges=[[0, -40.0, -1.78, 70.4, 40.0, -1.78]],
anchor_rotations=[0, 1.57], strides=[2],
sizes=[[1.6, 3.9, 1.56]],
rotations=[0, 1.57],
reshape_out=True),
diff_rad_by_sin=True, diff_rad_by_sin=True,
bbox_coder=dict(type='Residual3DBoxCoder', ), bbox_coder=dict(type='DeltaXYZWLHRBBoxCoder'),
loss_cls=dict( loss_cls=dict(
type='FocalLoss', type='FocalLoss',
use_sigmoid=True, use_sigmoid=True,
...@@ -91,7 +94,7 @@ input_modality = dict( ...@@ -91,7 +94,7 @@ input_modality = dict(
use_lidar=True, use_lidar=True,
use_depth=False, use_depth=False,
use_lidar_intensity=True, use_lidar_intensity=True,
use_camera=False, use_camera=True,
) )
db_sampler = dict( db_sampler = dict(
root_path=data_root, root_path=data_root,
...@@ -113,7 +116,7 @@ train_pipeline = [ ...@@ -113,7 +116,7 @@ train_pipeline = [
loc_noise_std=[1.0, 1.0, 0.5], loc_noise_std=[1.0, 1.0, 0.5],
global_rot_range=[0.0, 0.0], global_rot_range=[0.0, 0.0],
rot_uniform_noise=[-0.78539816, 0.78539816]), rot_uniform_noise=[-0.78539816, 0.78539816]),
dict(type='PointsRandomFlip', flip_ratio=0.5), dict(type='RandomFlip3D', flip_ratio=0.5),
dict( dict(
type='GlobalRotScale', type='GlobalRotScale',
rot_uniform_noise=[-0.78539816, 0.78539816], rot_uniform_noise=[-0.78539816, 0.78539816],
...@@ -122,7 +125,7 @@ train_pipeline = [ ...@@ -122,7 +125,7 @@ train_pipeline = [
dict(type='ObjectRangeFilter', point_cloud_range=point_cloud_range), dict(type='ObjectRangeFilter', point_cloud_range=point_cloud_range),
dict(type='PointShuffle'), dict(type='PointShuffle'),
dict(type='DefaultFormatBundle3D', class_names=class_names), dict(type='DefaultFormatBundle3D', class_names=class_names),
dict(type='Collect3D', keys=['points', 'gt_bboxes', 'gt_labels']), dict(type='Collect3D', keys=['points', 'gt_bboxes_3d', 'gt_labels_3d']),
] ]
test_pipeline = [ test_pipeline = [
dict(type='PointsRangeFilter', point_cloud_range=point_cloud_range), dict(type='PointsRangeFilter', point_cloud_range=point_cloud_range),
...@@ -130,7 +133,7 @@ test_pipeline = [ ...@@ -130,7 +133,7 @@ test_pipeline = [
type='DefaultFormatBundle3D', type='DefaultFormatBundle3D',
class_names=class_names, class_names=class_names,
with_label=False), with_label=False),
dict(type='Collect3D', keys=['points', 'gt_bboxes']), dict(type='Collect3D', keys=['points', 'gt_bboxes_3d']),
] ]
data = dict( data = dict(
...@@ -170,13 +173,13 @@ optimizer = dict(type='AdamW', lr=lr, betas=(0.95, 0.99), weight_decay=0.01) ...@@ -170,13 +173,13 @@ optimizer = dict(type='AdamW', lr=lr, betas=(0.95, 0.99), weight_decay=0.01)
optimizer_config = dict(grad_clip=dict(max_norm=10, norm_type=2)) optimizer_config = dict(grad_clip=dict(max_norm=10, norm_type=2))
lr_config = dict( lr_config = dict(
policy='cyclic', policy='cyclic',
target_ratio=[10, 1e-4], target_ratio=(10, 1e-4),
cyclic_times=1, cyclic_times=1,
step_ratio_up=0.4, step_ratio_up=0.4,
) )
momentum_config = dict( momentum_config = dict(
policy='cyclic', policy='cyclic',
target_ratio=[0.85 / 0.95, 1], target_ratio=(0.85 / 0.95, 1),
cyclic_times=1, cyclic_times=1,
step_ratio_up=0.4, step_ratio_up=0.4,
) )
......
...@@ -2,8 +2,7 @@ ...@@ -2,8 +2,7 @@
norm_cfg = dict(type='BN', requires_grad=False) norm_cfg = dict(type='BN', requires_grad=False)
model = dict( model = dict(
type='FasterRCNN', type='FasterRCNN',
pretrained=('./pretrain_detectron/' pretrained=('open-mmlab://resnet50_caffe_bgr'),
'ImageNetPretrained/MSRA/resnet50_msra.pth'),
backbone=dict( backbone=dict(
type='ResNet', type='ResNet',
depth=50, depth=50,
...@@ -22,11 +21,15 @@ model = dict( ...@@ -22,11 +21,15 @@ model = dict(
type='RPNHead', type='RPNHead',
in_channels=256, in_channels=256,
feat_channels=256, feat_channels=256,
anchor_scales=[8], anchor_generator=dict(
anchor_ratios=[1 / 3, 0.5, 1.0, 2.0, 3.0], type='AnchorGenerator',
anchor_strides=[4, 8, 16, 32, 64], scales=[8],
target_means=[.0, .0, .0, .0], ratios=[0.5, 1.0, 2.0],
target_stds=[1.0, 1.0, 1.0, 1.0], strides=[4, 8, 16, 32, 64]),
bbox_coder=dict(
type='DeltaXYWHBBoxCoder',
target_means=[.0, .0, .0, .0],
target_stds=[1.0, 1.0, 1.0, 1.0]),
loss_cls=dict( loss_cls=dict(
type='CrossEntropyLoss', use_sigmoid=True, loss_weight=1.0), type='CrossEntropyLoss', use_sigmoid=True, loss_weight=1.0),
loss_bbox=dict(type='L1Loss', loss_weight=1.0)), loss_bbox=dict(type='L1Loss', loss_weight=1.0)),
...@@ -43,8 +46,10 @@ model = dict( ...@@ -43,8 +46,10 @@ model = dict(
fc_out_channels=1024, fc_out_channels=1024,
roi_feat_size=7, roi_feat_size=7,
num_classes=80, num_classes=80,
target_means=[0., 0., 0., 0.], bbox_coder=dict(
target_stds=[0.1, 0.1, 0.2, 0.2], type='DeltaXYWHBBoxCoder',
target_means=[0., 0., 0., 0.],
target_stds=[0.1, 0.1, 0.2, 0.2]),
reg_class_agnostic=False, reg_class_agnostic=False,
loss_cls=dict( loss_cls=dict(
type='CrossEntropyLoss', use_sigmoid=False, loss_weight=1.0), type='CrossEntropyLoss', use_sigmoid=False, loss_weight=1.0),
......
...@@ -42,12 +42,15 @@ model = dict( ...@@ -42,12 +42,15 @@ model = dict(
feat_channels=384, feat_channels=384,
use_direction_classifier=True, use_direction_classifier=True,
encode_bg_as_zeros=True, encode_bg_as_zeros=True,
anchor_range=[0, -39.68, -1.78, 69.12, 39.68, -1.78], anchor_generator=dict(
anchor_strides=[2], type='Anchor3DRangeGenerator',
anchor_sizes=[[1.6, 3.9, 1.56]], ranges=[[0, -39.68, -1.78, 69.12, 39.68, -1.78]],
anchor_rotations=[0, 1.57], strides=[2],
sizes=[[1.6, 3.9, 1.56]],
rotations=[0, 1.57],
reshape_out=True),
diff_rad_by_sin=True, diff_rad_by_sin=True,
bbox_coder=dict(type='Residual3DBoxCoder', ), bbox_coder=dict(type='DeltaXYZWLHRBBoxCoder'),
loss_cls=dict( loss_cls=dict(
type='FocalLoss', type='FocalLoss',
use_sigmoid=True, use_sigmoid=True,
...@@ -113,7 +116,7 @@ train_pipeline = [ ...@@ -113,7 +116,7 @@ train_pipeline = [
loc_noise_std=[0.25, 0.25, 0.25], loc_noise_std=[0.25, 0.25, 0.25],
global_rot_range=[0.0, 0.0], global_rot_range=[0.0, 0.0],
rot_uniform_noise=[-0.15707963267, 0.15707963267]), rot_uniform_noise=[-0.15707963267, 0.15707963267]),
dict(type='PointsRandomFlip', flip_ratio=0.5), dict(type='RandomFlip3D', flip_ratio=0.5),
dict( dict(
type='GlobalRotScale', type='GlobalRotScale',
rot_uniform_noise=[-0.78539816, 0.78539816], rot_uniform_noise=[-0.78539816, 0.78539816],
...@@ -122,7 +125,7 @@ train_pipeline = [ ...@@ -122,7 +125,7 @@ train_pipeline = [
dict(type='ObjectRangeFilter', point_cloud_range=point_cloud_range), dict(type='ObjectRangeFilter', point_cloud_range=point_cloud_range),
dict(type='PointShuffle'), dict(type='PointShuffle'),
dict(type='DefaultFormatBundle3D', class_names=class_names), dict(type='DefaultFormatBundle3D', class_names=class_names),
dict(type='Collect3D', keys=['points', 'gt_bboxes', 'gt_labels']), dict(type='Collect3D', keys=['points', 'gt_bboxes_3d', 'gt_labels_3d']),
] ]
test_pipeline = [ test_pipeline = [
dict(type='PointsRangeFilter', point_cloud_range=point_cloud_range), dict(type='PointsRangeFilter', point_cloud_range=point_cloud_range),
...@@ -130,7 +133,7 @@ test_pipeline = [ ...@@ -130,7 +133,7 @@ test_pipeline = [
type='DefaultFormatBundle3D', type='DefaultFormatBundle3D',
class_names=class_names, class_names=class_names,
with_label=False), with_label=False),
dict(type='Collect3D', keys=['points', 'gt_bboxes']), dict(type='Collect3D', keys=['points', 'gt_bboxes_3d']),
] ]
data = dict( data = dict(
...@@ -175,13 +178,13 @@ optimizer_config = dict(grad_clip=dict(max_norm=35, norm_type=2)) ...@@ -175,13 +178,13 @@ optimizer_config = dict(grad_clip=dict(max_norm=35, norm_type=2))
# learning policy # learning policy
lr_config = dict( lr_config = dict(
policy='cyclic', policy='cyclic',
target_ratio=[10, 1e-4], target_ratio=(10, 1e-4),
cyclic_times=1, cyclic_times=1,
step_ratio_up=0.4, step_ratio_up=0.4,
) )
momentum_config = dict( momentum_config = dict(
policy='cyclic', policy='cyclic',
target_ratio=[0.85 / 0.95, 1], target_ratio=(0.85 / 0.95, 1),
cyclic_times=1, cyclic_times=1,
step_ratio_up=0.4, step_ratio_up=0.4,
) )
......
...@@ -41,12 +41,15 @@ model = dict( ...@@ -41,12 +41,15 @@ model = dict(
feat_channels=512, feat_channels=512,
use_direction_classifier=True, use_direction_classifier=True,
encode_bg_as_zeros=True, encode_bg_as_zeros=True,
anchor_range=[0, -40.0, -1.78, 70.4, 40.0, -1.78], anchor_generator=dict(
anchor_strides=[2], type='Anchor3DRangeGenerator',
anchor_sizes=[[1.6, 3.9, 1.56]], ranges=[[0, -40.0, -1.78, 70.4, 40.0, -1.78]],
anchor_rotations=[0, 1.57], strides=[2],
sizes=[[1.6, 3.9, 1.56]],
rotations=[0, 1.57],
reshape_out=True),
diff_rad_by_sin=True, diff_rad_by_sin=True,
bbox_coder=dict(type='Residual3DBoxCoder', ), bbox_coder=dict(type='DeltaXYZWLHRBBoxCoder'),
loss_cls=dict( loss_cls=dict(
type='FocalLoss', type='FocalLoss',
use_sigmoid=True, use_sigmoid=True,
...@@ -111,7 +114,7 @@ train_pipeline = [ ...@@ -111,7 +114,7 @@ train_pipeline = [
loc_noise_std=[1.0, 1.0, 0.5], loc_noise_std=[1.0, 1.0, 0.5],
global_rot_range=[0.0, 0.0], global_rot_range=[0.0, 0.0],
rot_uniform_noise=[-0.78539816, 0.78539816]), rot_uniform_noise=[-0.78539816, 0.78539816]),
dict(type='PointsRandomFlip', flip_ratio=0.5), dict(type='RandomFlip3D', flip_ratio=0.5),
dict( dict(
type='GlobalRotScale', type='GlobalRotScale',
rot_uniform_noise=[-0.78539816, 0.78539816], rot_uniform_noise=[-0.78539816, 0.78539816],
...@@ -120,7 +123,7 @@ train_pipeline = [ ...@@ -120,7 +123,7 @@ train_pipeline = [
dict(type='ObjectRangeFilter', point_cloud_range=point_cloud_range), dict(type='ObjectRangeFilter', point_cloud_range=point_cloud_range),
dict(type='PointShuffle'), dict(type='PointShuffle'),
dict(type='DefaultFormatBundle3D', class_names=class_names), dict(type='DefaultFormatBundle3D', class_names=class_names),
dict(type='Collect3D', keys=['points', 'gt_bboxes', 'gt_labels']), dict(type='Collect3D', keys=['points', 'gt_bboxes_3d', 'gt_labels_3d']),
] ]
test_pipeline = [ test_pipeline = [
dict(type='PointsRangeFilter', point_cloud_range=point_cloud_range), dict(type='PointsRangeFilter', point_cloud_range=point_cloud_range),
...@@ -128,7 +131,7 @@ test_pipeline = [ ...@@ -128,7 +131,7 @@ test_pipeline = [
type='DefaultFormatBundle3D', type='DefaultFormatBundle3D',
class_names=class_names, class_names=class_names,
with_label=False), with_label=False),
dict(type='Collect3D', keys=['points', 'gt_bboxes']), dict(type='Collect3D', keys=['points', 'gt_bboxes_3d']),
] ]
data = dict( data = dict(
...@@ -168,13 +171,13 @@ optimizer = dict(type='AdamW', lr=lr, betas=(0.95, 0.99), weight_decay=0.01) ...@@ -168,13 +171,13 @@ optimizer = dict(type='AdamW', lr=lr, betas=(0.95, 0.99), weight_decay=0.01)
optimizer_config = dict(grad_clip=dict(max_norm=10, norm_type=2)) optimizer_config = dict(grad_clip=dict(max_norm=10, norm_type=2))
lr_config = dict( lr_config = dict(
policy='cyclic', policy='cyclic',
target_ratio=[10, 1e-4], target_ratio=(10, 1e-4),
cyclic_times=1, cyclic_times=1,
step_ratio_up=0.4, step_ratio_up=0.4,
) )
momentum_config = dict( momentum_config = dict(
policy='cyclic', policy='cyclic',
target_ratio=[0.85 / 0.95, 1], target_ratio=(0.85 / 0.95, 1),
cyclic_times=1, cyclic_times=1,
step_ratio_up=0.4, step_ratio_up=0.4,
) )
......
...@@ -2,8 +2,7 @@ ...@@ -2,8 +2,7 @@
norm_cfg = dict(type='BN', requires_grad=False) norm_cfg = dict(type='BN', requires_grad=False)
model = dict( model = dict(
type='FasterRCNN', type='FasterRCNN',
pretrained=('./pretrain_detectron/' pretrained=('open-mmlab://resnet50_caffe_bgr'),
'ImageNetPretrained/MSRA/resnet50_msra.pth'),
backbone=dict( backbone=dict(
type='ResNet', type='ResNet',
depth=50, depth=50,
...@@ -22,11 +21,15 @@ model = dict( ...@@ -22,11 +21,15 @@ model = dict(
type='RPNHead', type='RPNHead',
in_channels=256, in_channels=256,
feat_channels=256, feat_channels=256,
anchor_scales=[8], anchor_generator=dict(
anchor_ratios=[0.5, 1.0, 2.0], type='AnchorGenerator',
anchor_strides=[4, 8, 16, 32, 64], scales=[8],
target_means=[.0, .0, .0, .0], ratios=[0.5, 1.0, 2.0],
target_stds=[1.0, 1.0, 1.0, 1.0], strides=[4, 8, 16, 32, 64]),
bbox_coder=dict(
type='DeltaXYWHBBoxCoder',
target_means=[.0, .0, .0, .0],
target_stds=[1.0, 1.0, 1.0, 1.0]),
loss_cls=dict( loss_cls=dict(
type='CrossEntropyLoss', use_sigmoid=True, loss_weight=1.0), type='CrossEntropyLoss', use_sigmoid=True, loss_weight=1.0),
loss_bbox=dict(type='L1Loss', loss_weight=1.0)), loss_bbox=dict(type='L1Loss', loss_weight=1.0)),
...@@ -42,9 +45,11 @@ model = dict( ...@@ -42,9 +45,11 @@ model = dict(
in_channels=256, in_channels=256,
fc_out_channels=1024, fc_out_channels=1024,
roi_feat_size=7, roi_feat_size=7,
num_classes=80, num_classes=10,
target_means=[0., 0., 0., 0.], bbox_coder=dict(
target_stds=[0.1, 0.1, 0.2, 0.2], type='DeltaXYWHBBoxCoder',
target_means=[0., 0., 0., 0.],
target_stds=[0.1, 0.1, 0.2, 0.2]),
reg_class_agnostic=False, reg_class_agnostic=False,
loss_cls=dict( loss_cls=dict(
type='CrossEntropyLoss', use_sigmoid=False, loss_weight=1.0), type='CrossEntropyLoss', use_sigmoid=False, loss_weight=1.0),
...@@ -105,12 +110,14 @@ test_cfg = dict( ...@@ -105,12 +110,14 @@ test_cfg = dict(
# e.g., nms=dict(type='soft_nms', iou_thr=0.5, min_score=0.05) # e.g., nms=dict(type='soft_nms', iou_thr=0.5, min_score=0.05)
) )
# dataset settings # dataset settings
dataset_type = 'NuScenes2DDataset' dataset_type = 'CocoDataset'
data_root = 'data/nuscenes/' data_root = 'data/nuscenes/'
# Values to be used for image normalization (BGR order) # Values to be used for image normalization (BGR order)
# Default mean pixel values are from ImageNet: [103.53, 116.28, 123.675] # Default mean pixel values are from ImageNet: [103.53, 116.28, 123.675]
# When using pre-trained models in Detectron1 or any MSRA models, # When using pre-trained models in Detectron1 or any MSRA models,
# std has been absorbed into its conv1 weights, so the std needs to be set 1. # std has been absorbed into its conv1 weights, so the std needs to be set 1.
classes = ('car', 'truck', 'trailer', 'bus', 'construction_vehicle', 'bicycle',
'motorcycle', 'pedestrian', 'traffic_cone', 'barrier')
img_norm_cfg = dict( img_norm_cfg = dict(
mean=[103.530, 116.280, 123.675], std=[1.0, 1.0, 1.0], to_rgb=False) mean=[103.530, 116.280, 123.675], std=[1.0, 1.0, 1.0], to_rgb=False)
train_pipeline = [ train_pipeline = [
...@@ -147,14 +154,17 @@ data = dict( ...@@ -147,14 +154,17 @@ data = dict(
workers_per_gpu=2, workers_per_gpu=2,
train=dict( train=dict(
type=dataset_type, type=dataset_type,
classes=classes,
ann_file=data_root + 'nuscenes_infos_train.coco.json', ann_file=data_root + 'nuscenes_infos_train.coco.json',
pipeline=train_pipeline), pipeline=train_pipeline),
val=dict( val=dict(
type=dataset_type, type=dataset_type,
classes=classes,
ann_file=data_root + 'nuscenes_infos_val.coco.json', ann_file=data_root + 'nuscenes_infos_val.coco.json',
pipeline=test_pipeline), pipeline=test_pipeline),
test=dict( test=dict(
type=dataset_type, type=dataset_type,
classes=classes,
ann_file=data_root + 'nuscenes_infos_val.coco.json', ann_file=data_root + 'nuscenes_infos_val.coco.json',
pipeline=test_pipeline)) pipeline=test_pipeline))
# optimizer # optimizer
......
...@@ -51,33 +51,35 @@ model = dict( ...@@ -51,33 +51,35 @@ model = dict(
feat_channels=384, feat_channels=384,
use_direction_classifier=True, use_direction_classifier=True,
encode_bg_as_zeros=True, encode_bg_as_zeros=True,
anchor_range=[ anchor_generator=dict(
[-49.6, -49.6, -1.80032795, 49.6, 49.6, -1.80032795], # car type='Anchor3DRangeGenerator',
[-49.6, -49.6, -1.74440365, 49.6, 49.6, -1.74440365], # truck ranges=[
[-49.6, -49.6, -1.68526504, 49.6, 49.6, -1.68526504], # trailer [-49.6, -49.6, -1.80032795, 49.6, 49.6, -1.80032795],
[-49.6, -49.6, -1.67339111, 49.6, 49.6, -1.67339111], # bicycle [-49.6, -49.6, -1.74440365, 49.6, 49.6, -1.74440365],
[-49.6, -49.6, -1.61785072, 49.6, 49.6, -1.61785072], # pedestrian [-49.6, -49.6, -1.68526504, 49.6, 49.6, -1.68526504],
[-49.6, -49.6, -1.80984986, 49.6, 49.6, [-49.6, -49.6, -1.67339111, 49.6, 49.6, -1.67339111],
-1.80984986], # traffic_cone [-49.6, -49.6, -1.61785072, 49.6, 49.6, -1.61785072],
[-49.6, -49.6, -1.763965, 49.6, 49.6, -1.763965], # barrier [-49.6, -49.6, -1.80984986, 49.6, 49.6, -1.80984986],
], [-49.6, -49.6, -1.763965, 49.6, 49.6, -1.763965],
anchor_strides=[2], ],
anchor_sizes=[ strides=[2],
[1.95017717, 4.60718145, 1.72270761], # car sizes=[
[2.4560939, 6.73778078, 2.73004906], # truck [1.95017717, 4.60718145, 1.72270761], # car
[2.87427237, 12.01320693, 3.81509561], # trailer [2.4560939, 6.73778078, 2.73004906], # truck
[0.60058911, 1.68452161, 1.27192197], # bicycle [2.87427237, 12.01320693, 3.81509561], # trailer
[0.66344886, 0.7256437, 1.75748069], # pedestrian [0.60058911, 1.68452161, 1.27192197], # bicycle
[0.39694519, 0.40359262, 1.06232151], # traffic_cone [0.66344886, 0.7256437, 1.75748069], # pedestrian
[2.49008838, 0.48578221, 0.98297065], # barrier [0.39694519, 0.40359262, 1.06232151], # traffic_cone
], [2.49008838, 0.48578221, 0.98297065], # barrier
anchor_custom_values=[0, 0], ],
anchor_rotations=[0, 1.57], custom_values=[0, 0],
rotations=[0, 1.57],
reshape_out=True),
assigner_per_size=False, assigner_per_size=False,
diff_rad_by_sin=True, diff_rad_by_sin=True,
dir_offset=0.7854, # pi/4 dir_offset=0.7854, # pi/4
dir_limit_offset=0, dir_limit_offset=0,
bbox_coder=dict(type='Residual3DBoxCoder', ), bbox_coder=dict(type='DeltaXYZWLHRBBoxCoder', ),
loss_cls=dict( loss_cls=dict(
type='FocalLoss', type='FocalLoss',
use_sigmoid=True, use_sigmoid=True,
......
...@@ -2,8 +2,7 @@ ...@@ -2,8 +2,7 @@
norm_cfg = dict(type='BN', requires_grad=False) norm_cfg = dict(type='BN', requires_grad=False)
model = dict( model = dict(
type='RetinaNet', type='RetinaNet',
pretrained=('./pretrain_detectron/' pretrained=('open-mmlab://resnet50_caffe_bgr'),
'ImageNetPretrained/MSRA/resnet50_msra.pth'),
backbone=dict( backbone=dict(
type='ResNet', type='ResNet',
depth=50, depth=50,
...@@ -26,12 +25,16 @@ model = dict( ...@@ -26,12 +25,16 @@ model = dict(
in_channels=256, in_channels=256,
stacked_convs=4, stacked_convs=4,
feat_channels=256, feat_channels=256,
octave_base_scale=4, anchor_generator=dict(
scales_per_octave=3, type='AnchorGenerator',
anchor_ratios=[0.5, 1.0, 2.0], octave_base_scale=4,
anchor_strides=[8, 16, 32, 64, 128], scales_per_octave=3,
target_means=[.0, .0, .0, .0], ratios=[0.5, 1.0, 2.0],
target_stds=[1.0, 1.0, 1.0, 1.0], strides=[8, 16, 32, 64, 128]),
bbox_coder=dict(
type='DeltaXYWHBBoxCoder',
target_means=[.0, .0, .0, .0],
target_stds=[1.0, 1.0, 1.0, 1.0]),
loss_cls=dict( loss_cls=dict(
type='FocalLoss', type='FocalLoss',
use_sigmoid=True, use_sigmoid=True,
......
from .train import train_detector from .train import batch_processor, train_detector
__all__ = [ __all__ = ['batch_processor', 'train_detector']
'train_detector',
]
import torch
from mmcv.parallel import MMDataParallel, MMDistributedDataParallel
from mmcv.runner import DistSamplerSeedHook, Runner
from mmdet.apis.train import parse_losses from mmdet.apis.train import parse_losses
from mmdet.core import (DistEvalHook, DistOptimizerHook, EvalHook,
Fp16OptimizerHook, build_optimizer)
from mmdet.datasets import build_dataloader, build_dataset
from mmdet.utils import get_root_logger
def batch_processor(model, data, train_mode): def batch_processor(model, data, train_mode):
...@@ -27,3 +35,87 @@ def batch_processor(model, data, train_mode): ...@@ -27,3 +35,87 @@ def batch_processor(model, data, train_mode):
outputs = dict(loss=loss, log_vars=log_vars, num_samples=num_samples) outputs = dict(loss=loss, log_vars=log_vars, num_samples=num_samples)
return outputs return outputs
def train_detector(model,
dataset,
cfg,
distributed=False,
validate=False,
timestamp=None,
meta=None):
logger = get_root_logger(cfg.log_level)
# prepare data loaders
dataset = dataset if isinstance(dataset, (list, tuple)) else [dataset]
data_loaders = [
build_dataloader(
ds,
cfg.data.samples_per_gpu,
cfg.data.workers_per_gpu,
# cfg.gpus will be ignored if distributed
len(cfg.gpu_ids),
dist=distributed,
seed=cfg.seed) for ds in dataset
]
# put model on gpus
if distributed:
find_unused_parameters = cfg.get('find_unused_parameters', False)
# Sets the `find_unused_parameters` parameter in
# torch.nn.parallel.DistributedDataParallel
model = MMDistributedDataParallel(
model.cuda(),
device_ids=[torch.cuda.current_device()],
broadcast_buffers=False,
find_unused_parameters=find_unused_parameters)
else:
model = MMDataParallel(
model.cuda(cfg.gpu_ids[0]), device_ids=cfg.gpu_ids)
# build runner
optimizer = build_optimizer(model, cfg.optimizer)
runner = Runner(
model,
batch_processor,
optimizer,
cfg.work_dir,
logger=logger,
meta=meta)
# an ugly walkaround to make the .log and .log.json filenames the same
runner.timestamp = timestamp
# fp16 setting
fp16_cfg = cfg.get('fp16', None)
if fp16_cfg is not None:
optimizer_config = Fp16OptimizerHook(
**cfg.optimizer_config, **fp16_cfg, distributed=distributed)
elif distributed:
optimizer_config = DistOptimizerHook(**cfg.optimizer_config)
else:
optimizer_config = cfg.optimizer_config
# register hooks
runner.register_training_hooks(cfg.lr_config, optimizer_config,
cfg.checkpoint_config, cfg.log_config)
if distributed:
runner.register_hook(DistSamplerSeedHook())
# register eval hooks
if validate:
val_dataset = build_dataset(cfg.data.val, dict(test_mode=True))
val_dataloader = build_dataloader(
val_dataset,
samples_per_gpu=1,
workers_per_gpu=cfg.data.workers_per_gpu,
dist=distributed,
shuffle=False)
eval_cfg = cfg.get('evaluation', {})
eval_hook = DistEvalHook if distributed else EvalHook
runner.register_hook(eval_hook(val_dataloader, **eval_cfg))
if cfg.resume_from:
runner.resume(cfg.resume_from)
elif cfg.load_from:
runner.load_checkpoint(cfg.load_from)
runner.run(data_loaders, cfg.workflow, cfg.total_epochs)
from .anchor_3d_generator import (AlignedAnchorGeneratorRange, from mmdet.core.anchor import build_anchor_generator
AnchorGeneratorRange) from .anchor_3d_generator import (AlignedAnchor3DRangeGenerator,
Anchor3DRangeGenerator)
__all__ = [ __all__ = [
'AlignedAnchorGeneratorRange', 'AnchorGeneratorRange', 'AlignedAnchor3DRangeGenerator', 'Anchor3DRangeGenerator',
'build_anchor_generator' 'build_anchor_generator'
] ]
def build_anchor_generator(cfg, **kwargs):
from . import anchor_3d_generator
import mmcv
if isinstance(cfg, dict):
return mmcv.runner.obj_from_dict(
cfg, anchor_3d_generator, default_args=kwargs)
else:
raise TypeError('Invalid type {} for building a sampler'.format(
type(cfg)))
import mmcv
import torch import torch
from mmdet.core.anchor import ANCHOR_GENERATORS
class AnchorGeneratorRange(object):
@ANCHOR_GENERATORS.register_module
class Anchor3DRangeGenerator(object):
"""3D Anchor Generator by range
This anchor generator generates anchors by the given range in different
feature levels.
Due the convention in 3D detection, different anchor sizes are related to
different ranges for different categories. However we find this setting
does not effect the performance much in some datasets, e.g., nuScenes.
Args:
ranges (list[list[float]]): Ranges of different anchors.
The ranges are the same across different feature levels. But may
vary for different anchor sizes if size_per_range is True.
sizes (list[list[float]]): 3D sizes of anchors.
strides (list[int]): Strides of anchors in different feature levels.
rotations (list(float)): Rotations of anchors in a feature grid.
custom_values (tuple(float)): Customized values of that anchor. For
example, in nuScenes the anchors have velocities.
reshape_out (bool): Whether to reshape the output into (Nx4)
size_per_range: Whether to use separate ranges for different sizes.
If size_per_range is True, the ranges should have the same length
as the sizes, if not, it will be duplicated.
"""
def __init__(self, def __init__(self,
anchor_ranges, ranges,
sizes=((1.6, 3.9, 1.56), ), sizes=[[1.6, 3.9, 1.56]],
stride=2, strides=[2],
rotations=(0, 3.1415926 / 2), rotations=[0, 1.5707963],
custom_values=(), custom_values=(),
cache_anchor=False): reshape_out=True,
size_per_range=True):
assert mmcv.is_list_of(ranges, list)
if size_per_range:
if len(sizes) != len(ranges):
assert len(ranges) == 1
ranges = ranges * len(sizes)
assert len(ranges) == len(sizes)
else:
assert len(ranges) == 1
assert mmcv.is_list_of(sizes, list)
assert isinstance(strides, list)
self.sizes = sizes self.sizes = sizes
self.stride = stride self.strides = strides
self.anchor_ranges = anchor_ranges self.ranges = ranges
if len(anchor_ranges) != len(sizes):
self.anchor_ranges = anchor_ranges * len(sizes)
self.rotations = rotations self.rotations = rotations
self.custom_values = custom_values self.custom_values = custom_values
self.cache_anchor = cache_anchor
self.cached_anchors = None self.cached_anchors = None
self.reshape_out = reshape_out
self.size_per_range = size_per_range
def __repr__(self): def __repr__(self):
s = self.__class__.__name__ + '(' s = self.__class__.__name__ + '('
s += 'anchor_range={}, '.format(self.anchor_ranges) s += f'anchor_range={self.ranges},\n'
s += 'stride={}, '.format(self.stride) s += f'strides={self.strides},\n'
s += 'sizes={}, '.format(self.sizes) s += f'sizes={self.sizes},\n'
s += 'rotations={})'.format(self.rotations) s += f'rotations={self.rotations},\n'
s += f'reshape_out={self.reshape_out},\n'
s += f'size_per_range={self.size_per_range})'
return s return s
@property @property
...@@ -34,40 +73,68 @@ class AnchorGeneratorRange(object): ...@@ -34,40 +73,68 @@ class AnchorGeneratorRange(object):
num_size = torch.tensor(self.sizes).reshape(-1, 3).size(0) num_size = torch.tensor(self.sizes).reshape(-1, 3).size(0)
return num_rot * num_size return num_rot * num_size
def grid_anchors(self, feature_map_size, device='cuda'): @property
def num_levels(self):
return len(self.strides)
def grid_anchors(self, featmap_sizes, device='cuda'):
"""Generate grid anchors in multiple feature levels
Args:
featmap_sizes (list[tuple]): List of feature map sizes in
multiple feature levels.
device (str): Device where the anchors will be put on.
Return:
list[torch.Tensor]: Anchors in multiple feature levels.
The sizes of each tensor should be [N, 4], where
N = width * height * num_base_anchors, width and height
are the sizes of the corresponding feature lavel,
num_base_anchors is the number of anchors for that level.
"""
assert self.num_levels == len(featmap_sizes)
multi_level_anchors = []
for i in range(self.num_levels):
anchors = self.single_level_grid_anchors(
featmap_sizes[i], self.strides[i], device=device)
if self.reshape_out:
anchors = anchors.reshape(-1, anchors.size(-1))
multi_level_anchors.append(anchors)
return multi_level_anchors
def single_level_grid_anchors(self, featmap_size, stride, device='cuda'):
# We reimplement the anchor generator using torch in cuda # We reimplement the anchor generator using torch in cuda
# torch: 0.6975 s for 1000 times # torch: 0.6975 s for 1000 times
# numpy: 4.3345 s for 1000 times # numpy: 4.3345 s for 1000 times
# which is ~5 times faster than numpy implementation # which is ~5 times faster than the numpy implementation
if (self.cache_anchor and self.cached_anchors): if not self.size_per_range:
return self.cached_anchors
if not isinstance(self.anchor_ranges[0], list):
return self.anchors_single_range( return self.anchors_single_range(
feature_map_size, featmap_size,
self.anchor_ranges, self.ranges[0],
stride,
self.sizes, self.sizes,
self.rotations, self.rotations,
device=device) device=device)
assert len(self.sizes) == len(self.anchor_ranges)
mr_anchors = [] mr_anchors = []
for anchor_range, anchor_size in zip(self.anchor_ranges, self.sizes): for anchor_range, anchor_size in zip(self.ranges, self.sizes):
mr_anchors.append( mr_anchors.append(
self.anchors_single_range( self.anchors_single_range(
feature_map_size, featmap_size,
anchor_range, anchor_range,
stride,
anchor_size, anchor_size,
self.rotations, self.rotations,
device=device)) device=device))
mr_anchors = torch.cat(mr_anchors, dim=-3) mr_anchors = torch.cat(mr_anchors, dim=-3)
if self.cache_anchor and not self.cached_anchors:
self.cached_anchors = mr_anchors
return mr_anchors return mr_anchors
def anchors_single_range(self, def anchors_single_range(self,
feature_size, feature_size,
anchor_range, anchor_range,
sizes=((1.6, 3.9, 1.56), ), stride=1,
rotations=(0, 3.1415927 / 2), sizes=[[1.6, 3.9, 1.56]],
rotations=[0, 1.5707963],
device='cuda'): device='cuda'):
"""Generate anchors in a single range """Generate anchors in a single range
Args: Args:
...@@ -106,7 +173,6 @@ class AnchorGeneratorRange(object): ...@@ -106,7 +173,6 @@ class AnchorGeneratorRange(object):
ret = torch.cat(rets, dim=-1).permute([2, 1, 0, 3, 4, 5]) ret = torch.cat(rets, dim=-1).permute([2, 1, 0, 3, 4, 5])
# [1, 200, 176, N, 2, 7] for kitti after permute # [1, 200, 176, N, 2, 7] for kitti after permute
# ret = ret.reshape(-1, 7)
if len(self.custom_values) > 0: if len(self.custom_values) > 0:
custom_ndim = len(self.custom_values) custom_ndim = len(self.custom_values)
...@@ -117,17 +183,42 @@ class AnchorGeneratorRange(object): ...@@ -117,17 +183,42 @@ class AnchorGeneratorRange(object):
return ret return ret
class AlignedAnchorGeneratorRange(AnchorGeneratorRange): @ANCHOR_GENERATORS.register_module
class AlignedAnchor3DRangeGenerator(Anchor3DRangeGenerator):
"""Aligned 3D Anchor Generator by range
This anchor generator uses a different manner to generate the positions
of anchors' centers from `Anchor3DRangeGenerator`.
Note:
The `align` means that the anchor's center is aligned with the voxel grid,
which is also the feature grid. The previous implementation of
`Anchor3DRangeGenerator` do not generate the anchors' center according
to the voxel grid. Rather, it generates the center by uniformly
distributing the anchors inside the minimum and maximum anchor ranges
according to the feature map sizes.
However, this makes the anchors center does not match the feature grid.
The AlignedAnchor3DRangeGenerator add + 1 when using the feature map sizes
to obtain the corners of the voxel grid. Then it shift the coordinates to
the center of voxel grid of use the left up corner to distribute anchors.
Args:
anchor_corner (bool): Whether to align with the corner of the voxel
grid. By default it is False and the anchor's center will be
the same as the corresponding voxel's center, which is also the
center of the corresponding greature grid.
"""
def __init__(self, shift_center=True, **kwargs): def __init__(self, align_corner=False, **kwargs):
super(AlignedAnchorGeneratorRange, self).__init__(**kwargs) super(AlignedAnchor3DRangeGenerator, self).__init__(**kwargs)
self.shift_center = shift_center self.align_corner = align_corner
def anchors_single_range(self, def anchors_single_range(self,
feature_size, feature_size,
anchor_range, anchor_range,
sizes=((1.6, 3.9, 1.56), ), stride,
rotations=(0, 3.1415927 / 2), sizes=[[1.6, 3.9, 1.56]],
rotations=[0, 1.5707963],
device='cuda'): device='cuda'):
"""Generate anchors in a single range """Generate anchors in a single range
Args: Args:
...@@ -155,11 +246,11 @@ class AlignedAnchorGeneratorRange(AnchorGeneratorRange): ...@@ -155,11 +246,11 @@ class AlignedAnchorGeneratorRange(AnchorGeneratorRange):
anchor_range[3], anchor_range[3],
feature_size[2] + 1, feature_size[2] + 1,
device=device) device=device)
sizes = torch.tensor(sizes, device=device).reshape(-1, 3) * self.stride sizes = torch.tensor(sizes, device=device).reshape(-1, 3) * stride
rotations = torch.tensor(rotations, device=device) rotations = torch.tensor(rotations, device=device)
# shift the anchor center # shift the anchor center
if self.shift_center: if not self.align_corner:
z_shift = (z_centers[1] - z_centers[0]) / 2 z_shift = (z_centers[1] - z_centers[0]) / 2
y_shift = (y_centers[1] - y_centers[0]) / 2 y_shift = (y_centers[1] - y_centers[0]) / 2
x_shift = (x_centers[1] - x_centers[0]) / 2 x_shift = (x_centers[1] - x_centers[0]) / 2
...@@ -187,7 +278,6 @@ class AlignedAnchorGeneratorRange(AnchorGeneratorRange): ...@@ -187,7 +278,6 @@ class AlignedAnchorGeneratorRange(AnchorGeneratorRange):
ret = torch.cat(rets, dim=-1).permute([2, 1, 0, 3, 4, 5]) ret = torch.cat(rets, dim=-1).permute([2, 1, 0, 3, 4, 5])
# [1, 200, 176, N, 2, 7] for kitti after permute # [1, 200, 176, N, 2, 7] for kitti after permute
# ret = ret.reshape(-1, 7)
if len(self.custom_values) > 0: if len(self.custom_values) > 0:
custom_ndim = len(self.custom_values) custom_ndim = len(self.custom_values)
......
from . import box_torch_ops from . import box_torch_ops
from .assigners import AssignResult, BaseAssigner, MaxIoUAssigner from .assigners import AssignResult, BaseAssigner, MaxIoUAssigner
from .coders import Residual3DBoxCoder from .coders import DeltaXYZWLHRBBoxCoder
# from .bbox_target import bbox_target # from .bbox_target import bbox_target
from .iou_calculators import (BboxOverlaps3D, BboxOverlapsNearest3D, from .iou_calculators import (BboxOverlaps3D, BboxOverlapsNearest3D,
bbox_overlaps_3d, bbox_overlaps_nearest_3d) bbox_overlaps_3d, bbox_overlaps_nearest_3d)
...@@ -18,7 +18,7 @@ __all__ = [ ...@@ -18,7 +18,7 @@ __all__ = [
'PseudoSampler', 'RandomSampler', 'InstanceBalancedPosSampler', 'PseudoSampler', 'RandomSampler', 'InstanceBalancedPosSampler',
'IoUBalancedNegSampler', 'CombinedSampler', 'SamplingResult', 'IoUBalancedNegSampler', 'CombinedSampler', 'SamplingResult',
'build_assigner', 'build_sampler', 'assign_and_sample', 'box_torch_ops', 'build_assigner', 'build_sampler', 'assign_and_sample', 'box_torch_ops',
'build_bbox_coder', 'Residual3DBoxCoder', 'boxes3d_to_bev_torch_lidar', 'build_bbox_coder', 'DeltaXYZWLHRBBoxCoder', 'boxes3d_to_bev_torch_lidar',
'BboxOverlapsNearest3D', 'BboxOverlaps3D', 'bbox_overlaps_nearest_3d', 'BboxOverlapsNearest3D', 'BboxOverlaps3D', 'bbox_overlaps_nearest_3d',
'bbox_overlaps_3d' 'bbox_overlaps_3d'
] ]
...@@ -14,7 +14,7 @@ def build_assigner(cfg, **kwargs): ...@@ -14,7 +14,7 @@ def build_assigner(cfg, **kwargs):
def build_bbox_coder(cfg, **kwargs): def build_bbox_coder(cfg, **kwargs):
if isinstance(cfg, coders.Residual3DBoxCoder): if isinstance(cfg, coders.DeltaXYZWLHRBBoxCoder):
return cfg return cfg
elif isinstance(cfg, dict): elif isinstance(cfg, dict):
return mmcv.runner.obj_from_dict(cfg, coders, default_args=kwargs) return mmcv.runner.obj_from_dict(cfg, coders, default_args=kwargs)
......
from .box_coder import Residual3DBoxCoder from mmdet.core.bbox import build_bbox_coder
from .delta_xywh_bbox_coder import DeltaXYZWLHRBBoxCoder
__all__ = ['Residual3DBoxCoder'] __all__ = ['build_bbox_coder', 'DeltaXYZWLHRBBoxCoder']
import numpy as np
import torch import torch
from mmdet.core.bbox import BaseBBoxCoder
from mmdet.core.bbox.builder import BBOX_CODERS
class Residual3DBoxCoder(object):
def __init__(self, code_size=7, mean=None, std=None): @BBOX_CODERS.register_module
super().__init__() class DeltaXYZWLHRBBoxCoder(BaseBBoxCoder):
self.code_size = code_size
self.mean = mean
self.std = std
@staticmethod
def encode_np(boxes, anchors):
"""
:param boxes: (N, 7) x, y, z, w, l, h, r
:param anchors: (N, 7)
:return:
"""
# need to convert boxes to z-center format
xa, ya, za, wa, la, ha, ra = np.split(anchors, 7, axis=-1)
xg, yg, zg, wg, lg, hg, rg = np.split(boxes, 7, axis=-1)
zg = zg + hg / 2
za = za + ha / 2
diagonal = np.sqrt(la**2 + wa**2) # 4.3
xt = (xg - xa) / diagonal
yt = (yg - ya) / diagonal
zt = (zg - za) / ha # 1.6
lt = np.log(lg / la)
wt = np.log(wg / wa)
ht = np.log(hg / ha)
rt = rg - ra
return np.concatenate([xt, yt, zt, wt, lt, ht, rt], axis=-1)
@staticmethod
def decode_np(box_encodings, anchors):
"""
:param box_encodings: (N, 7) x, y, z, w, l, h, r
:param anchors: (N, 7)
:return:
"""
# need to convert box_encodings to z-bottom format
xa, ya, za, wa, la, ha, ra = np.split(anchors, 7, axis=-1)
xt, yt, zt, wt, lt, ht, rt = np.split(box_encodings, 7, axis=-1)
za = za + ha / 2 def __init__(self, code_size=7):
diagonal = np.sqrt(la**2 + wa**2) super(DeltaXYZWLHRBBoxCoder, self).__init__()
xg = xt * diagonal + xa self.code_size = code_size
yg = yt * diagonal + ya
zg = zt * ha + za
lg = np.exp(lt) * la
wg = np.exp(wt) * wa
hg = np.exp(ht) * ha
rg = rt + ra
zg = zg - hg / 2
return np.concatenate([xg, yg, zg, wg, lg, hg, rg], axis=-1)
@staticmethod @staticmethod
def encode_torch(anchors, boxes, means, stds): def encode(anchors, boxes):
""" """
:param boxes: (N, 7+n) x, y, z, w, l, h, r, velo* :param boxes: (N, 7+n) x, y, z, w, l, h, r, velo*
:param anchors: (N, 7+n) :param anchors: (N, 7+n)
...@@ -85,7 +40,7 @@ class Residual3DBoxCoder(object): ...@@ -85,7 +40,7 @@ class Residual3DBoxCoder(object):
return torch.cat([xt, yt, zt, wt, lt, ht, rt, *cts], dim=-1) return torch.cat([xt, yt, zt, wt, lt, ht, rt, *cts], dim=-1)
@staticmethod @staticmethod
def decode_torch(anchors, box_encodings, means, stds): def decode(anchors, box_encodings):
""" """
:param box_encodings: (N, 7 + n) x, y, z, w, l, h, r :param box_encodings: (N, 7 + n) x, y, z, w, l, h, r
:param anchors: (N, 7) :param anchors: (N, 7)
......
import torch
from mmdet3d.ops.iou3d import boxes_iou3d_gpu from mmdet3d.ops.iou3d import boxes_iou3d_gpu
from mmdet.core.bbox import bbox_overlaps from mmdet.core.bbox import bbox_overlaps
from mmdet.core.bbox.iou_calculators.registry import IOU_CALCULATORS from mmdet.core.bbox.iou_calculators.builder import IOU_CALCULATORS
from .. import box_torch_ops from .. import box_torch_ops
...@@ -33,18 +35,21 @@ class BboxOverlaps3D(object): ...@@ -33,18 +35,21 @@ class BboxOverlaps3D(object):
def bbox_overlaps_nearest_3d(bboxes1, bboxes2, mode='iou', is_aligned=False): def bbox_overlaps_nearest_3d(bboxes1, bboxes2, mode='iou', is_aligned=False):
''' """Calculate nearest 3D IoU
:param bboxes1: Tensor, shape (N, 7) [x, y, z, h, w, l, ry]?
:param bboxes2: Tensor, shape (M, 7) [x, y, z, h, w, l, ry]? Args:
:param mode: mode (str): "iou" (intersection over union) or iof bboxes1: Tensor, shape (N, 7) [x, y, z, h, w, l, ry]?
bboxes2: Tensor, shape (M, 7) [x, y, z, h, w, l, ry]?
mode: mode (str): "iou" (intersection over union) or iof
(intersection over foreground). (intersection over foreground).
:return: iou: (M, N) not support aligned mode currently
rbboxes: [N, 5(x, y, xdim, ydim, rad)] rotated bboxes Return:
''' iou: (M, N) not support aligned mode currently
rbboxes1_bev = bboxes1.index_select( """
dim=-1, index=bboxes1.new_tensor([0, 1, 3, 4, 6]).long()) assert bboxes1.size(-1) == bboxes2.size(-1) == 7
rbboxes2_bev = bboxes2.index_select( column_index1 = bboxes1.new_tensor([0, 1, 3, 4, 6], dtype=torch.long)
dim=-1, index=bboxes1.new_tensor([0, 1, 3, 4, 6]).long()) rbboxes1_bev = bboxes1.index_select(dim=-1, index=column_index1)
rbboxes2_bev = bboxes2.index_select(dim=-1, index=column_index1)
# Change the bboxes to bev # Change the bboxes to bev
# box conversion and iou calculation in torch version on CUDA # box conversion and iou calculation in torch version on CUDA
...@@ -57,14 +62,18 @@ def bbox_overlaps_nearest_3d(bboxes1, bboxes2, mode='iou', is_aligned=False): ...@@ -57,14 +62,18 @@ def bbox_overlaps_nearest_3d(bboxes1, bboxes2, mode='iou', is_aligned=False):
def bbox_overlaps_3d(bboxes1, bboxes2, mode='iou'): def bbox_overlaps_3d(bboxes1, bboxes2, mode='iou'):
''' """Calculate 3D IoU using cuda implementation
:param bboxes1: Tensor, shape (N, 7) [x, y, z, h, w, l, ry] Args:
:param bboxes2: Tensor, shape (M, 7) [x, y, z, h, w, l, ry] bboxes1: Tensor, shape (N, 7) [x, y, z, h, w, l, ry]
:param mode: mode (str): "iou" (intersection over union) or bboxes2: Tensor, shape (M, 7) [x, y, z, h, w, l, ry]
mode: mode (str): "iou" (intersection over union) or
iof (intersection over foreground). iof (intersection over foreground).
:return: iou: (M, N) not support aligned mode currently
''' Return:
iou: (M, N) not support aligned mode currently
"""
# TODO: check the input dimension meanings, # TODO: check the input dimension meanings,
# this is inconsistent with that in bbox_overlaps_nearest_3d # this is inconsistent with that in bbox_overlaps_nearest_3d
assert bboxes1.size(-1) == bboxes2.size(-1) == 7
return boxes_iou3d_gpu(bboxes1, bboxes2, mode) return boxes_iou3d_gpu(bboxes1, bboxes2, mode)
Markdown is supported
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment