"...en/git@developer.sourcefind.cn:renzhc/diffusers_dcu.git" did not exist on "b88fef47851059ce32f161d17f00cd16d94af96a"
Commit f2b01720 authored by liyinhao's avatar liyinhao
Browse files

Merge branch 'master' into process_raw_data

parents 08c8adb6 47850641
# model settings
voxel_size = [0.25, 0.25, 8]
point_cloud_range = [-50, -50, -5, 50, 50, 3]
class_names = [
'car', 'truck', 'trailer', 'bus', 'construction_vehicle', 'bicycle',
'motorcycle', 'pedestrian', 'traffic_cone', 'barrier'
]
model = dict(
type='MVXFasterRCNNV2',
pts_voxel_layer=dict(
max_num_points=64, # max_points_per_voxel
point_cloud_range=point_cloud_range, # velodyne coordinates, x, y, z
voxel_size=voxel_size,
max_voxels=(30000, 40000), # (training, testing) max_coxels
),
pts_voxel_encoder=dict(
type='HardVFE',
num_input_features=4,
num_filters=[64, 64],
with_distance=False,
voxel_size=voxel_size,
with_cluster_center=True,
with_voxel_center=True,
point_cloud_range=point_cloud_range,
norm_cfg=dict(type='naiveSyncBN1d', eps=1e-3, momentum=0.01)),
pts_middle_encoder=dict(
type='PointPillarsScatter',
in_channels=64,
output_shape=[400, 400], # checked from PointCloud3D
),
pts_backbone=dict(
type='SECOND',
in_channels=64,
norm_cfg=dict(type='naiveSyncBN2d', eps=1e-3, momentum=0.01),
layer_nums=[3, 5, 5],
layer_strides=[2, 2, 2],
out_channels=[64, 128, 256],
),
pts_neck=dict(
type='SECONDFPN',
norm_cfg=dict(type='naiveSyncBN2d', eps=1e-3, momentum=0.01),
in_channels=[64, 128, 256],
upsample_strides=[1, 2, 4],
out_channels=[128, 128, 128],
),
pts_bbox_head=dict(
type='Anchor3DHead',
num_classes=10,
in_channels=384,
feat_channels=384,
use_direction_classifier=True,
anchor_generator=dict(
type='Anchor3DRangeGenerator',
ranges=[
[-49.6, -49.6, -1.80032795, 49.6, 49.6, -1.80032795],
[-49.6, -49.6, -1.74440365, 49.6, 49.6, -1.74440365],
[-49.6, -49.6, -1.68526504, 49.6, 49.6, -1.68526504],
[-49.6, -49.6, -1.67339111, 49.6, 49.6, -1.67339111],
[-49.6, -49.6, -1.61785072, 49.6, 49.6, -1.61785072],
[-49.6, -49.6, -1.80984986, 49.6, 49.6, -1.80984986],
[-49.6, -49.6, -1.763965, 49.6, 49.6, -1.763965],
],
sizes=[
[1.95017717, 4.60718145, 1.72270761], # car
[2.4560939, 6.73778078, 2.73004906], # truck
[2.87427237, 12.01320693, 3.81509561], # trailer
[0.60058911, 1.68452161, 1.27192197], # bicycle
[0.66344886, 0.7256437, 1.75748069], # pedestrian
[0.39694519, 0.40359262, 1.06232151], # traffic_cone
[2.49008838, 0.48578221, 0.98297065], # barrier
],
custom_values=[0, 0],
rotations=[0, 1.57],
reshape_out=True),
assigner_per_size=False,
diff_rad_by_sin=True,
dir_offset=0.7854, # pi/4
dir_limit_offset=0,
bbox_coder=dict(type='DeltaXYZWLHRBBoxCoder', code_size=9),
loss_cls=dict(
type='FocalLoss',
use_sigmoid=True,
gamma=2.0,
alpha=0.25,
loss_weight=1.0),
loss_bbox=dict(type='SmoothL1Loss', beta=1.0 / 9.0, loss_weight=1.0),
loss_dir=dict(
type='CrossEntropyLoss', use_sigmoid=False, loss_weight=0.2)))
# model training and testing settings
train_cfg = dict(
pts=dict(
assigner=dict( # for Car
type='MaxIoUAssigner',
iou_calculator=dict(type='BboxOverlapsNearest3D'),
pos_iou_thr=0.6,
neg_iou_thr=0.3,
min_pos_iou=0.3,
ignore_iof_thr=-1),
allowed_border=0,
code_weight=[1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 0.2, 0.2],
pos_weight=-1,
debug=False))
test_cfg = dict(
pts=dict(
use_rotate_nms=True,
nms_across_levels=False,
nms_pre=1000,
nms_thr=0.2,
score_thr=0.05,
min_bbox_size=0,
max_num=500
# soft-nms is also supported for rcnn testing
# e.g., nms=dict(type='soft_nms', iou_thr=0.5, min_score=0.05)
))
# dataset settings
dataset_type = 'NuScenesDataset'
data_root = 'data/nuscenes/'
img_norm_cfg = dict(
mean=[103.530, 116.280, 123.675], std=[1.0, 1.0, 1.0], to_rgb=False)
input_modality = dict(
use_lidar=True,
use_depth=False,
use_lidar_intensity=True,
use_camera=False,
)
db_sampler = dict(
data_root=data_root,
info_path=data_root + 'nuscenes_dbinfos_train.pkl',
rate=1.0,
object_rot_range=[0.0, 0.0],
prepare=dict(),
classes=class_names,
sample_groups=dict(
bus=4,
trailer=4,
truck=4,
))
file_client_args = dict(
backend='petrel',
path_mapping=dict({
'./data/nuscenes/': 's3://nuscenes/nuscenes/',
'data/nuscenes/': 's3://nuscenes/nuscenes/'
}))
train_pipeline = [
dict(
type='LoadPointsFromFile',
load_dim=5,
use_dim=5,
file_client_args=file_client_args),
dict(
type='LoadPointsFromMultiSweeps',
sweeps_num=10,
file_client_args=file_client_args),
dict(type='LoadAnnotations3D', with_bbox_3d=True, with_label_3d=True),
dict(
type='GlobalRotScale',
rot_uniform_noise=[-0.3925, 0.3925],
scaling_uniform_noise=[0.95, 1.05],
trans_normal_noise=[0, 0, 0]),
dict(type='RandomFlip3D', flip_ratio=0.5),
dict(type='PointsRangeFilter', point_cloud_range=point_cloud_range),
dict(type='ObjectRangeFilter', point_cloud_range=point_cloud_range),
dict(type='PointShuffle'),
dict(type='DefaultFormatBundle3D', class_names=class_names),
dict(type='Collect3D', keys=['points', 'gt_bboxes_3d', 'gt_labels_3d']),
]
test_pipeline = [
dict(
type='LoadPointsFromFile',
load_dim=5,
use_dim=5,
file_client_args=file_client_args),
dict(
type='LoadPointsFromMultiSweeps',
sweeps_num=10,
file_client_args=file_client_args),
dict(type='PointsRangeFilter', point_cloud_range=point_cloud_range),
dict(type='RandomFlip3D', flip_ratio=0),
dict(
type='DefaultFormatBundle3D',
class_names=class_names,
with_label=False),
dict(type='Collect3D', keys=['points']),
]
data = dict(
samples_per_gpu=4,
workers_per_gpu=4,
train=dict(
type=dataset_type,
data_root=data_root,
ann_file=data_root + 'nuscenes_infos_train.pkl',
pipeline=train_pipeline,
modality=input_modality,
classes=class_names,
test_mode=False),
val=dict(
type=dataset_type,
data_root=data_root,
ann_file=data_root + 'nuscenes_infos_val.pkl',
pipeline=test_pipeline,
modality=input_modality,
classes=class_names,
test_mode=True),
test=dict(
type=dataset_type,
data_root=data_root,
ann_file=data_root + 'nuscenes_infos_val.pkl',
pipeline=test_pipeline,
modality=input_modality,
classes=class_names,
test_mode=True))
# optimizer
optimizer = dict(type='AdamW', lr=0.001, weight_decay=0.01)
# max_norm=10 is better for SECOND
optimizer_config = dict(grad_clip=dict(max_norm=35, norm_type=2))
lr_config = dict(
policy='step',
warmup='linear',
warmup_iters=1000,
warmup_ratio=1.0 / 1000,
step=[20, 23])
momentum_config = None
checkpoint_config = dict(interval=1)
# yapf:disable
evaluation = dict(interval=24)
log_config = dict(
interval=50,
hooks=[
dict(type='TextLoggerHook'),
dict(type='TensorboardLoggerHook')
])
# yapf:enable
# runtime settings
total_epochs = 24
dist_params = dict(backend='nccl')
log_level = 'INFO'
work_dir = './work_dirs/hv_pointpillars_secfpn_sbn-all_4x8_2x_nus-3d'
load_from = None
resume_from = None
workflow = [('train', 1)]
# model settings
voxel_size = [0.05, 0.05, 0.1]
point_cloud_range = [0, -40, -3, 70.4, 40, 1] # velodyne coordinates, x, y, z
model = dict(
type='VoxelNet',
voxel_layer=dict(
max_num_points=5, # max_points_per_voxel
point_cloud_range=point_cloud_range,
voxel_size=voxel_size,
max_voxels=(16000, 40000), # (training, testing) max_coxels
),
voxel_encoder=dict(
type='VoxelFeatureExtractorV3',
num_input_features=4,
num_filters=[4],
with_distance=False),
middle_encoder=dict(
type='SparseEncoder',
in_channels=4,
sparse_shape=[41, 1600, 1408],
order=('conv', 'norm', 'act')),
backbone=dict(
type='SECOND',
in_channels=256,
layer_nums=[5, 5],
layer_strides=[1, 2],
out_channels=[128, 256],
),
neck=dict(
type='SECONDFPN',
in_channels=[128, 256],
upsample_strides=[1, 2],
out_channels=[256, 256],
),
bbox_head=dict(
type='Anchor3DHead',
num_classes=1,
in_channels=512,
feat_channels=512,
use_direction_classifier=True,
anchor_generator=dict(
type='Anchor3DRangeGenerator',
ranges=[[0, -40.0, -1.78, 70.4, 40.0, -1.78]],
sizes=[[1.6, 3.9, 1.56]],
rotations=[0, 1.57],
reshape_out=True),
diff_rad_by_sin=True,
bbox_coder=dict(type='DeltaXYZWLHRBBoxCoder'),
loss_cls=dict(
type='FocalLoss',
use_sigmoid=True,
gamma=2.0,
alpha=0.25,
loss_weight=1.0),
loss_bbox=dict(type='SmoothL1Loss', beta=1.0 / 9.0, loss_weight=2.0),
loss_dir=dict(
type='CrossEntropyLoss', use_sigmoid=False, loss_weight=0.2),
),
)
# model training and testing settings
train_cfg = dict(
assigner=dict(
type='MaxIoUAssigner',
iou_calculator=dict(type='BboxOverlapsNearest3D'),
pos_iou_thr=0.6,
neg_iou_thr=0.45,
min_pos_iou=0.45,
ignore_iof_thr=-1),
allowed_border=0,
pos_weight=-1,
debug=False)
test_cfg = dict(
use_rotate_nms=True,
nms_across_levels=False,
nms_thr=0.01,
score_thr=0.3,
min_bbox_size=0,
nms_pre=100,
max_num=50)
# dataset settings
dataset_type = 'KittiDataset'
data_root = 'data/kitti/'
class_names = ['Car']
img_norm_cfg = dict(
mean=[123.675, 116.28, 103.53], std=[58.395, 57.12, 57.375], to_rgb=True)
input_modality = dict(
use_lidar=False,
use_lidar_reduced=True,
use_depth=False,
use_lidar_intensity=True,
use_camera=False,
)
db_sampler = dict(
data_root=data_root,
info_path=data_root + 'kitti_dbinfos_train.pkl',
rate=1.0,
object_rot_range=[0.0, 0.0],
prepare=dict(
filter_by_difficulty=[-1],
filter_by_min_points=dict(Car=5),
),
classes=class_names,
sample_groups=dict(Car=15),
)
file_client_args = dict(
backend='petrel', path_mapping=dict(data='s3://kitti_data/'))
train_pipeline = [
dict(
type='LoadPointsFromFile',
load_dim=4,
use_dim=4,
file_client_args=file_client_args),
dict(
type='LoadAnnotations3D',
with_bbox_3d=True,
with_label_3d=True,
file_client_args=file_client_args),
dict(type='ObjectSample', db_sampler=db_sampler),
dict(
type='ObjectNoise',
num_try=100,
loc_noise_std=[1.0, 1.0, 0.5],
global_rot_range=[0.0, 0.0],
rot_uniform_noise=[-0.78539816, 0.78539816]),
dict(type='RandomFlip3D', flip_ratio=0.5),
dict(
type='GlobalRotScale',
rot_uniform_noise=[-0.78539816, 0.78539816],
scaling_uniform_noise=[0.95, 1.05]),
dict(type='PointsRangeFilter', point_cloud_range=point_cloud_range),
dict(type='ObjectRangeFilter', point_cloud_range=point_cloud_range),
dict(type='PointShuffle'),
dict(type='DefaultFormatBundle3D', class_names=class_names),
dict(type='Collect3D', keys=['points', 'gt_bboxes_3d', 'gt_labels_3d']),
]
test_pipeline = [
dict(
type='LoadPointsFromFile',
load_dim=4,
use_dim=4,
file_client_args=file_client_args),
dict(type='PointsRangeFilter', point_cloud_range=point_cloud_range),
dict(
type='DefaultFormatBundle3D',
class_names=class_names,
with_label=False),
dict(type='Collect3D', keys=['points']),
]
data = dict(
samples_per_gpu=6,
workers_per_gpu=4,
train=dict(
type=dataset_type,
data_root=data_root,
ann_file=data_root + 'kitti_infos_train.pkl',
split='training',
pts_prefix='velodyne_reduced',
pipeline=train_pipeline,
modality=input_modality,
classes=class_names,
test_mode=False),
val=dict(
type=dataset_type,
data_root=data_root,
ann_file=data_root + 'kitti_infos_val.pkl',
split='training',
pts_prefix='velodyne_reduced',
pipeline=test_pipeline,
modality=input_modality,
classes=class_names,
test_mode=True),
test=dict(
type=dataset_type,
data_root=data_root,
ann_file=data_root + 'kitti_infos_val.pkl',
split='training',
pts_prefix='velodyne_reduced',
pipeline=test_pipeline,
modality=input_modality,
classes=class_names,
test_mode=True))
# optimizer
lr = 0.0018 # max learning rate
optimizer = dict(type='AdamW', lr=lr, betas=(0.95, 0.99), weight_decay=0.01)
optimizer_config = dict(grad_clip=dict(max_norm=10, norm_type=2))
lr_config = dict(
policy='cyclic',
target_ratio=(10, 1e-4),
cyclic_times=1,
step_ratio_up=0.4,
)
momentum_config = dict(
policy='cyclic',
target_ratio=(0.85 / 0.95, 1),
cyclic_times=1,
step_ratio_up=0.4,
)
checkpoint_config = dict(interval=1)
# yapf:disable
log_config = dict(
interval=50,
hooks=[
dict(type='TextLoggerHook'),
dict(type='TensorboardLoggerHook')
])
# yapf:enable
# runtime settings
total_epochs = 80
dist_params = dict(backend='nccl')
log_level = 'INFO'
work_dir = './work_dirs/sec_secfpn_80e'
load_from = None
resume_from = None
workflow = [('train', 1)]
...@@ -28,8 +28,8 @@ model = dict( ...@@ -28,8 +28,8 @@ model = dict(
), ),
pts_voxel_encoder=dict( pts_voxel_encoder=dict(
type='DynamicVFE', type='DynamicVFE',
num_input_features=4, in_channels=4,
num_filters=[64, 64], feat_channels=[64, 64],
with_distance=False, with_distance=False,
voxel_size=voxel_size, voxel_size=voxel_size,
with_cluster_center=True, with_cluster_center=True,
......
...@@ -12,8 +12,8 @@ model = dict( ...@@ -12,8 +12,8 @@ model = dict(
), ),
voxel_encoder=dict( voxel_encoder=dict(
type='DynamicPillarFeatureNet', type='DynamicPillarFeatureNet',
num_input_features=4, in_channels=4,
num_filters=[64], feat_channels=[64],
with_distance=False, with_distance=False,
voxel_size=voxel_size, voxel_size=voxel_size,
point_cloud_range=point_cloud_range, point_cloud_range=point_cloud_range,
...@@ -189,6 +189,7 @@ momentum_config = dict( ...@@ -189,6 +189,7 @@ momentum_config = dict(
step_ratio_up=0.4, step_ratio_up=0.4,
) )
checkpoint_config = dict(interval=1) checkpoint_config = dict(interval=1)
evaluation = dict(interval=2)
# yapf:disable # yapf:disable
log_config = dict( log_config = dict(
interval=50, interval=50,
......
...@@ -11,8 +11,7 @@ model = dict( ...@@ -11,8 +11,7 @@ model = dict(
max_voxels=(-1, -1), # (training, testing) max_coxels max_voxels=(-1, -1), # (training, testing) max_coxels
), ),
voxel_encoder=dict( voxel_encoder=dict(
type='DynamicVFEV3', type='DynamicSimpleVFE',
num_input_features=4,
voxel_size=voxel_size, voxel_size=voxel_size,
point_cloud_range=point_cloud_range), point_cloud_range=point_cloud_range),
middle_encoder=dict( middle_encoder=dict(
...@@ -214,6 +213,7 @@ lr_config = dict( ...@@ -214,6 +213,7 @@ lr_config = dict(
min_lr_ratio=1e-5) min_lr_ratio=1e-5)
momentum_config = None momentum_config = None
checkpoint_config = dict(interval=1) checkpoint_config = dict(interval=1)
evaluation = dict(interval=2)
# yapf:disable # yapf:disable
log_config = dict( log_config = dict(
interval=50, interval=50,
......
...@@ -11,8 +11,7 @@ model = dict( ...@@ -11,8 +11,7 @@ model = dict(
max_voxels=(-1, -1), # (training, testing) max_coxels max_voxels=(-1, -1), # (training, testing) max_coxels
), ),
voxel_encoder=dict( voxel_encoder=dict(
type='DynamicVFEV3', type='DynamicSimpleVFE',
num_input_features=4,
voxel_size=voxel_size, voxel_size=voxel_size,
point_cloud_range=point_cloud_range), point_cloud_range=point_cloud_range),
middle_encoder=dict( middle_encoder=dict(
...@@ -184,6 +183,7 @@ momentum_config = dict( ...@@ -184,6 +183,7 @@ momentum_config = dict(
step_ratio_up=0.4, step_ratio_up=0.4,
) )
checkpoint_config = dict(interval=1) checkpoint_config = dict(interval=1)
evaluation = dict(interval=2)
# yapf:disable # yapf:disable
log_config = dict( log_config = dict(
interval=50, interval=50,
......
...@@ -2,7 +2,7 @@ ...@@ -2,7 +2,7 @@
norm_cfg = dict(type='BN', requires_grad=False) norm_cfg = dict(type='BN', requires_grad=False)
model = dict( model = dict(
type='FasterRCNN', type='FasterRCNN',
pretrained=('open-mmlab://resnet50_caffe_bgr'), pretrained=('open-mmlab://detectron2/resnet50_caffe'),
backbone=dict( backbone=dict(
type='ResNet', type='ResNet',
depth=50, depth=50,
......
...@@ -10,11 +10,7 @@ model = dict( ...@@ -10,11 +10,7 @@ model = dict(
voxel_size=voxel_size, voxel_size=voxel_size,
max_voxels=(16000, 40000) # (training, testing) max_coxels max_voxels=(16000, 40000) # (training, testing) max_coxels
), ),
voxel_encoder=dict( voxel_encoder=dict(type='HardSimpleVFE'),
type='VoxelFeatureExtractorV3',
num_input_features=4,
num_filters=[4],
with_distance=False),
middle_encoder=dict( middle_encoder=dict(
type='SparseUNet', type='SparseUNet',
in_channels=4, in_channels=4,
...@@ -306,6 +302,7 @@ momentum_config = dict( ...@@ -306,6 +302,7 @@ momentum_config = dict(
cyclic_times=1, cyclic_times=1,
step_ratio_up=0.4) step_ratio_up=0.4)
checkpoint_config = dict(interval=1) checkpoint_config = dict(interval=1)
evaluation = dict(interval=2)
# yapf:disable # yapf:disable
log_config = dict( log_config = dict(
interval=50, interval=50,
......
...@@ -10,11 +10,7 @@ model = dict( ...@@ -10,11 +10,7 @@ model = dict(
voxel_size=voxel_size, voxel_size=voxel_size,
max_voxels=(16000, 40000) # (training, testing) max_coxels max_voxels=(16000, 40000) # (training, testing) max_coxels
), ),
voxel_encoder=dict( voxel_encoder=dict(type='HardSimpleVFE'),
type='VoxelFeatureExtractorV3',
num_input_features=4,
num_filters=[4],
with_distance=False),
middle_encoder=dict( middle_encoder=dict(
type='SparseUNet', type='SparseUNet',
in_channels=4, in_channels=4,
...@@ -265,6 +261,7 @@ momentum_config = dict( ...@@ -265,6 +261,7 @@ momentum_config = dict(
cyclic_times=1, cyclic_times=1,
step_ratio_up=0.4) step_ratio_up=0.4)
checkpoint_config = dict(interval=1) checkpoint_config = dict(interval=1)
evaluation = dict(interval=2)
# yapf:disable # yapf:disable
log_config = dict( log_config = dict(
interval=50, interval=50,
......
...@@ -10,8 +10,8 @@ model = dict( ...@@ -10,8 +10,8 @@ model = dict(
), ),
voxel_encoder=dict( voxel_encoder=dict(
type='PillarFeatureNet', type='PillarFeatureNet',
num_input_features=4, in_channels=4,
num_filters=[64], feat_channels=[64],
with_distance=False, with_distance=False,
# these two arguments should be consistent with the voxel_generator # these two arguments should be consistent with the voxel_generator
voxel_size=[0.16, 0.16, 4], voxel_size=[0.16, 0.16, 4],
...@@ -192,6 +192,7 @@ momentum_config = dict( ...@@ -192,6 +192,7 @@ momentum_config = dict(
step_ratio_up=0.4, step_ratio_up=0.4,
) )
checkpoint_config = dict(interval=1) checkpoint_config = dict(interval=1)
evaluation = dict(interval=2)
# yapf:disable # yapf:disable
log_config = dict( log_config = dict(
interval=50, interval=50,
......
...@@ -10,11 +10,7 @@ model = dict( ...@@ -10,11 +10,7 @@ model = dict(
voxel_size=voxel_size, voxel_size=voxel_size,
max_voxels=(16000, 40000), # (training, testing) max_coxels max_voxels=(16000, 40000), # (training, testing) max_coxels
), ),
voxel_encoder=dict( voxel_encoder=dict(type='HardSimpleVFE'),
type='VoxelFeatureExtractorV3',
num_input_features=4,
num_filters=[4],
with_distance=False),
middle_encoder=dict( middle_encoder=dict(
type='SparseEncoder', type='SparseEncoder',
in_channels=4, in_channels=4,
...@@ -104,9 +100,21 @@ db_sampler = dict( ...@@ -104,9 +100,21 @@ db_sampler = dict(
classes=class_names, classes=class_names,
sample_groups=dict(Car=15), sample_groups=dict(Car=15),
) )
file_client_args = dict(backend='disk')
# file_client_args = dict(
# backend='petrel', path_mapping=dict(data='s3://kitti_data/'))
train_pipeline = [ train_pipeline = [
dict(type='LoadPointsFromFile', load_dim=4, use_dim=4), dict(
dict(type='LoadAnnotations3D', with_bbox_3d=True, with_label_3d=True), type='LoadPointsFromFile',
load_dim=4,
use_dim=4,
file_client_args=file_client_args),
dict(
type='LoadAnnotations3D',
with_bbox_3d=True,
with_label_3d=True,
file_client_args=file_client_args),
dict(type='ObjectSample', db_sampler=db_sampler), dict(type='ObjectSample', db_sampler=db_sampler),
dict( dict(
type='ObjectNoise', type='ObjectNoise',
...@@ -126,7 +134,11 @@ train_pipeline = [ ...@@ -126,7 +134,11 @@ train_pipeline = [
dict(type='Collect3D', keys=['points', 'gt_bboxes_3d', 'gt_labels_3d']), dict(type='Collect3D', keys=['points', 'gt_bboxes_3d', 'gt_labels_3d']),
] ]
test_pipeline = [ test_pipeline = [
dict(type='LoadPointsFromFile', load_dim=4, use_dim=4), dict(
type='LoadPointsFromFile',
load_dim=4,
use_dim=4,
file_client_args=file_client_args),
dict(type='PointsRangeFilter', point_cloud_range=point_cloud_range), dict(type='PointsRangeFilter', point_cloud_range=point_cloud_range),
dict( dict(
type='DefaultFormatBundle3D', type='DefaultFormatBundle3D',
...@@ -139,15 +151,18 @@ data = dict( ...@@ -139,15 +151,18 @@ data = dict(
samples_per_gpu=6, samples_per_gpu=6,
workers_per_gpu=4, workers_per_gpu=4,
train=dict( train=dict(
type=dataset_type, type='RepeatDataset',
data_root=data_root, times=2,
ann_file=data_root + 'kitti_infos_train.pkl', dataset=dict(
split='training', type=dataset_type,
pts_prefix='velodyne_reduced', data_root=data_root,
pipeline=train_pipeline, ann_file=data_root + 'kitti_infos_train.pkl',
modality=input_modality, split='training',
classes=class_names, pts_prefix='velodyne_reduced',
test_mode=False), pipeline=train_pipeline,
modality=input_modality,
classes=class_names,
test_mode=False)),
val=dict( val=dict(
type=dataset_type, type=dataset_type,
data_root=data_root, data_root=data_root,
...@@ -185,6 +200,7 @@ momentum_config = dict( ...@@ -185,6 +200,7 @@ momentum_config = dict(
step_ratio_up=0.4, step_ratio_up=0.4,
) )
checkpoint_config = dict(interval=1) checkpoint_config = dict(interval=1)
evaluation = dict(interval=1)
# yapf:disable # yapf:disable
log_config = dict( log_config = dict(
interval=50, interval=50,
...@@ -194,7 +210,7 @@ log_config = dict( ...@@ -194,7 +210,7 @@ log_config = dict(
]) ])
# yapf:enable # yapf:enable
# runtime settings # runtime settings
total_epochs = 80 total_epochs = 40
dist_params = dict(backend='nccl') dist_params = dict(backend='nccl')
log_level = 'INFO' log_level = 'INFO'
work_dir = './work_dirs/sec_secfpn_80e' work_dir = './work_dirs/sec_secfpn_80e'
......
...@@ -2,7 +2,7 @@ ...@@ -2,7 +2,7 @@
norm_cfg = dict(type='BN', requires_grad=False) norm_cfg = dict(type='BN', requires_grad=False)
model = dict( model = dict(
type='FasterRCNN', type='FasterRCNN',
pretrained=('open-mmlab://resnet50_caffe_bgr'), pretrained=('open-mmlab://detectron2/resnet50_caffe'),
backbone=dict( backbone=dict(
type='ResNet', type='ResNet',
depth=50, depth=50,
...@@ -120,13 +120,25 @@ classes = ('car', 'truck', 'trailer', 'bus', 'construction_vehicle', 'bicycle', ...@@ -120,13 +120,25 @@ classes = ('car', 'truck', 'trailer', 'bus', 'construction_vehicle', 'bicycle',
'motorcycle', 'pedestrian', 'traffic_cone', 'barrier') 'motorcycle', 'pedestrian', 'traffic_cone', 'barrier')
img_norm_cfg = dict( img_norm_cfg = dict(
mean=[103.530, 116.280, 123.675], std=[1.0, 1.0, 1.0], to_rgb=False) mean=[103.530, 116.280, 123.675], std=[1.0, 1.0, 1.0], to_rgb=False)
# file_client_args = dict(backend='disk')
file_client_args = dict(
backend='petrel',
path_mapping=dict({
'./data/nuscenes/': 's3://nuscenes/nuscenes/',
'data/nuscenes/': 's3://nuscenes/nuscenes/'
}))
train_pipeline = [ train_pipeline = [
dict(type='LoadImageFromFile'), dict(type='LoadImageFromFile', file_client_args=file_client_args),
dict(type='LoadAnnotations', with_bbox=True, with_mask=False), dict(
type='LoadAnnotations',
with_bbox=True,
with_mask=False,
file_client_args=file_client_args),
dict( dict(
type='Resize', type='Resize',
img_scale=[(1200, 720), (1920, 1080)], img_scale=(1280, 720),
multiscale_mode='range', ratio_range=(0.75, 1.25),
keep_ratio=True), keep_ratio=True),
dict(type='RandomFlip', flip_ratio=0.5), dict(type='RandomFlip', flip_ratio=0.5),
dict(type='Normalize', **img_norm_cfg), dict(type='Normalize', **img_norm_cfg),
...@@ -135,10 +147,10 @@ train_pipeline = [ ...@@ -135,10 +147,10 @@ train_pipeline = [
dict(type='Collect', keys=['img', 'gt_bboxes', 'gt_labels']), dict(type='Collect', keys=['img', 'gt_bboxes', 'gt_labels']),
] ]
test_pipeline = [ test_pipeline = [
dict(type='LoadImageFromFile'), dict(type='LoadImageFromFile', file_client_args=file_client_args),
dict( dict(
type='MultiScaleFlipAug', type='MultiScaleFlipAug',
img_scale=(1600, 900), img_scale=(1280, 720),
flip=False, flip=False,
transforms=[ transforms=[
dict(type='Resize', keep_ratio=True), dict(type='Resize', keep_ratio=True),
...@@ -192,6 +204,6 @@ total_epochs = 12 ...@@ -192,6 +204,6 @@ total_epochs = 12
dist_params = dict(backend='nccl', port=29501) dist_params = dict(backend='nccl', port=29501)
log_level = 'INFO' log_level = 'INFO'
work_dir = './work_dirs/faster_rcnn_r50_fpn_1x' work_dir = './work_dirs/faster_rcnn_r50_fpn_1x'
load_from = None load_from = './pretrain_mmdet/faster_r50_fpn_detectron2-caffe_freezeBN_l1-loss_roialign-v2_3x-4767dd8e.pth' # noqa
resume_from = None resume_from = None
workflow = [('train', 1)] workflow = [('train', 1)]
# model settings
norm_cfg = dict(type='BN', requires_grad=False)
model = dict(
type='FasterRCNN',
pretrained='open-mmlab://regnetx_3.2gf',
backbone=dict(
type='RegNet',
arch='regnetx_3.2gf',
out_indices=(0, 1, 2, 3),
frozen_stages=1,
base_channels=32,
norm_cfg=dict(type='BN', requires_grad=True),
norm_eval=True,
style='pytorch'),
neck=dict(
type='FPN',
in_channels=[96, 192, 432, 1008],
out_channels=256,
num_outs=5),
rpn_head=dict(
type='RPNHead',
in_channels=256,
feat_channels=256,
anchor_generator=dict(
type='AnchorGenerator',
scales=[8],
ratios=[0.5, 1.0, 2.0],
strides=[4, 8, 16, 32, 64]),
bbox_coder=dict(
type='DeltaXYWHBBoxCoder',
target_means=[.0, .0, .0, .0],
target_stds=[1.0, 1.0, 1.0, 1.0]),
loss_cls=dict(
type='CrossEntropyLoss', use_sigmoid=True, loss_weight=1.0),
loss_bbox=dict(type='L1Loss', loss_weight=1.0)),
roi_head=dict(
type='StandardRoIHead',
bbox_roi_extractor=dict(
type='SingleRoIExtractor',
roi_layer=dict(type='RoIAlign', out_size=7, sample_num=0),
out_channels=256,
featmap_strides=[4, 8, 16, 32]),
bbox_head=dict(
type='Shared2FCBBoxHead',
in_channels=256,
fc_out_channels=1024,
roi_feat_size=7,
num_classes=10,
bbox_coder=dict(
type='DeltaXYWHBBoxCoder',
target_means=[0., 0., 0., 0.],
target_stds=[0.1, 0.1, 0.2, 0.2]),
reg_class_agnostic=False,
loss_cls=dict(
type='CrossEntropyLoss', use_sigmoid=False, loss_weight=1.0),
loss_bbox=dict(type='L1Loss', loss_weight=1.0))))
# model training and testing settings
train_cfg = dict(
rpn=dict(
assigner=dict(
type='MaxIoUAssigner',
pos_iou_thr=0.7,
neg_iou_thr=0.3,
min_pos_iou=0.3,
ignore_iof_thr=-1),
sampler=dict(
type='RandomSampler',
num=256,
pos_fraction=0.5,
neg_pos_ub=-1,
add_gt_as_proposals=False),
allowed_border=-1,
pos_weight=-1,
debug=False),
rpn_proposal=dict(
nms_across_levels=False,
nms_pre=2000,
# following the setting of detectron,
# which improves ~0.2 bbox mAP.
nms_post=1000,
max_num=1000,
nms_thr=0.7,
min_bbox_size=0),
rcnn=dict(
assigner=dict(
type='MaxIoUAssigner',
pos_iou_thr=0.5,
neg_iou_thr=0.5,
min_pos_iou=0.5,
ignore_iof_thr=-1),
sampler=dict(
type='RandomSampler',
num=512,
pos_fraction=0.25,
neg_pos_ub=-1,
add_gt_as_proposals=True),
pos_weight=-1,
debug=False))
test_cfg = dict(
rpn=dict(
nms_across_levels=False,
nms_pre=1000,
nms_post=1000,
max_num=1000,
nms_thr=0.7,
min_bbox_size=0),
rcnn=dict(
score_thr=0.05, nms=dict(type='nms', iou_thr=0.5), max_per_img=100)
# soft-nms is also supported for rcnn testing
# e.g., nms=dict(type='soft_nms', iou_thr=0.5, min_score=0.05)
)
# dataset settings
dataset_type = 'CocoDataset'
data_root = 'data/nuscenes/'
classes = ('car', 'truck', 'trailer', 'bus', 'construction_vehicle', 'bicycle',
'motorcycle', 'pedestrian', 'traffic_cone', 'barrier')
img_norm_cfg = dict(
# The mean and std is used in PyCls when training RegNets
mean=[103.53, 116.28, 123.675],
std=[57.375, 57.12, 58.395],
to_rgb=False)
file_client_args = dict(
backend='petrel',
path_mapping=dict({
'./data/nuscenes/': 's3://nuscenes/nuscenes/',
'data/nuscenes/': 's3://nuscenes/nuscenes/'
}))
train_pipeline = [
dict(type='LoadImageFromFile', file_client_args=file_client_args),
dict(
type='LoadAnnotations',
with_bbox=True,
with_mask=False,
file_client_args=file_client_args),
dict(
type='Resize',
img_scale=(1280, 720),
ratio_range=(0.75, 1.25),
keep_ratio=True),
dict(type='RandomFlip', flip_ratio=0.5),
dict(type='Normalize', **img_norm_cfg),
dict(type='Pad', size_divisor=32),
dict(type='DefaultFormatBundle'),
dict(type='Collect', keys=['img', 'gt_bboxes', 'gt_labels']),
]
test_pipeline = [
dict(type='LoadImageFromFile', file_client_args=file_client_args),
dict(
type='MultiScaleFlipAug',
img_scale=(1280, 720),
flip=False,
transforms=[
dict(type='Resize', keep_ratio=True),
dict(type='RandomFlip'),
dict(type='Normalize', **img_norm_cfg),
dict(type='Pad', size_divisor=32),
dict(type='ImageToTensor', keys=['img']),
dict(type='Collect', keys=['img']),
])
]
data = dict(
samples_per_gpu=2,
workers_per_gpu=2,
train=dict(
type=dataset_type,
classes=classes,
ann_file=data_root + 'nuscenes_infos_train.coco.json',
pipeline=train_pipeline),
val=dict(
type=dataset_type,
classes=classes,
ann_file=data_root + 'nuscenes_infos_val.coco.json',
pipeline=test_pipeline),
test=dict(
type=dataset_type,
classes=classes,
ann_file=data_root + 'nuscenes_infos_val.coco.json',
pipeline=test_pipeline))
# optimizer
optimizer = dict(type='SGD', lr=0.02, momentum=0.9, weight_decay=0.00005)
optimizer_config = dict(grad_clip=dict(max_norm=35, norm_type=2))
# learning policy
lr_config = dict(
policy='step',
warmup='linear',
warmup_iters=1000,
warmup_ratio=1.0 / 1000,
step=[8, 11])
checkpoint_config = dict(interval=1)
# yapf:disable
log_config = dict(
interval=50,
hooks=[
dict(type='TextLoggerHook'),
dict(type='TensorboardLoggerHook')
])
# yapf:enable
evaluation = dict(interval=1)
# runtime settings
total_epochs = 12
dist_params = dict(backend='nccl', port=29501)
log_level = 'INFO'
work_dir = './work_dirs/faster_rcnn_r50_fpn_1x'
load_from = './pretrain_mmdet/mask_rcnn_regnetx-3GF_fpn_mstrain_3x_coco_box-AP-43.1_mask-AP-38.7-e003695a.pth' # noqa
resume_from = None
workflow = [('train', 1)]
...@@ -15,8 +15,8 @@ model = dict( ...@@ -15,8 +15,8 @@ model = dict(
), ),
pts_voxel_encoder=dict( pts_voxel_encoder=dict(
type='HardVFE', type='HardVFE',
num_input_features=4, in_channels=4,
num_filters=[64, 64], feat_channels=[64, 64],
with_distance=False, with_distance=False,
voxel_size=voxel_size, voxel_size=voxel_size,
with_cluster_center=True, with_cluster_center=True,
...@@ -85,9 +85,7 @@ model = dict( ...@@ -85,9 +85,7 @@ model = dict(
loss_weight=1.0), loss_weight=1.0),
loss_bbox=dict(type='SmoothL1Loss', beta=1.0 / 9.0, loss_weight=1.0), loss_bbox=dict(type='SmoothL1Loss', beta=1.0 / 9.0, loss_weight=1.0),
loss_dir=dict( loss_dir=dict(
type='CrossEntropyLoss', use_sigmoid=False, loss_weight=0.2), type='CrossEntropyLoss', use_sigmoid=False, loss_weight=0.2)))
),
)
# model training and testing settings # model training and testing settings
train_cfg = dict( train_cfg = dict(
pts=dict( pts=dict(
...@@ -138,10 +136,23 @@ db_sampler = dict( ...@@ -138,10 +136,23 @@ db_sampler = dict(
trailer=4, trailer=4,
truck=4, truck=4,
)) ))
file_client_args = dict(backend='disk')
# file_client_args = dict(
# backend='petrel',
# path_mapping=dict({
# './data/nuscenes/': 's3://nuscenes/nuscenes/',
# 'data/nuscenes/': 's3://nuscenes/nuscenes/'
# }))
train_pipeline = [ train_pipeline = [
dict(type='LoadPointsFromFile', load_dim=5, use_dim=5), dict(
dict(type='LoadPointsFromMultiSweeps', sweeps_num=10), type='LoadPointsFromFile',
load_dim=5,
use_dim=5,
file_client_args=file_client_args),
dict(
type='LoadPointsFromMultiSweeps',
sweeps_num=10,
file_client_args=file_client_args),
dict(type='LoadAnnotations3D', with_bbox_3d=True, with_label_3d=True), dict(type='LoadAnnotations3D', with_bbox_3d=True, with_label_3d=True),
dict( dict(
type='GlobalRotScale', type='GlobalRotScale',
...@@ -156,8 +167,15 @@ train_pipeline = [ ...@@ -156,8 +167,15 @@ train_pipeline = [
dict(type='Collect3D', keys=['points', 'gt_bboxes_3d', 'gt_labels_3d']), dict(type='Collect3D', keys=['points', 'gt_bboxes_3d', 'gt_labels_3d']),
] ]
test_pipeline = [ test_pipeline = [
dict(type='LoadPointsFromFile', load_dim=5, use_dim=5), dict(
dict(type='LoadPointsFromMultiSweeps', sweeps_num=10), type='LoadPointsFromFile',
load_dim=5,
use_dim=5,
file_client_args=file_client_args),
dict(
type='LoadPointsFromMultiSweeps',
sweeps_num=10,
file_client_args=file_client_args),
dict(type='PointsRangeFilter', point_cloud_range=point_cloud_range), dict(type='PointsRangeFilter', point_cloud_range=point_cloud_range),
dict(type='RandomFlip3D', flip_ratio=0), dict(type='RandomFlip3D', flip_ratio=0),
dict( dict(
......
import torch import torch
from mmcv.parallel import MMDataParallel, MMDistributedDataParallel from mmcv.parallel import MMDataParallel, MMDistributedDataParallel
from mmcv.runner import DistSamplerSeedHook, Runner from mmcv.runner import DistSamplerSeedHook, Runner, build_optimizer
from mmdet3d.utils import get_root_logger from mmdet3d.utils import get_root_logger
from mmdet.apis.train import parse_losses from mmdet.apis.train import parse_losses
from mmdet.core import (DistEvalHook, DistOptimizerHook, EvalHook, from mmdet.core import (DistEvalHook, DistOptimizerHook, EvalHook,
Fp16OptimizerHook, build_optimizer) Fp16OptimizerHook)
from mmdet.datasets import build_dataloader, build_dataset from mmdet.datasets import build_dataloader, build_dataset
......
...@@ -7,7 +7,8 @@ from .iou_calculators import (BboxOverlaps3D, BboxOverlapsNearest3D, ...@@ -7,7 +7,8 @@ from .iou_calculators import (BboxOverlaps3D, BboxOverlapsNearest3D,
from .samplers import (BaseSampler, CombinedSampler, from .samplers import (BaseSampler, CombinedSampler,
InstanceBalancedPosSampler, IoUBalancedNegSampler, InstanceBalancedPosSampler, IoUBalancedNegSampler,
PseudoSampler, RandomSampler, SamplingResult) PseudoSampler, RandomSampler, SamplingResult)
from .structures import Box3DMode, CameraInstance3DBoxes, LiDARInstance3DBoxes from .structures import (BaseInstance3DBoxes, Box3DMode, CameraInstance3DBoxes,
DepthInstance3DBoxes, LiDARInstance3DBoxes)
from .transforms import (bbox3d2result, bbox3d2roi, from .transforms import (bbox3d2result, bbox3d2roi,
box3d_to_corner3d_upright_depth, box3d_to_corner3d_upright_depth,
boxes3d_to_bev_torch_lidar) boxes3d_to_bev_torch_lidar)
...@@ -25,5 +26,6 @@ __all__ = [ ...@@ -25,5 +26,6 @@ __all__ = [
'BboxOverlapsNearest3D', 'BboxOverlaps3D', 'bbox_overlaps_nearest_3d', 'BboxOverlapsNearest3D', 'BboxOverlaps3D', 'bbox_overlaps_nearest_3d',
'bbox_overlaps_3d', 'Box3DMode', 'LiDARInstance3DBoxes', 'bbox_overlaps_3d', 'Box3DMode', 'LiDARInstance3DBoxes',
'CameraInstance3DBoxes', 'bbox3d2roi', 'bbox3d2result', 'CameraInstance3DBoxes', 'bbox3d2roi', 'bbox3d2result',
'box3d_to_corner3d_upright_depth' 'box3d_to_corner3d_upright_depth', 'DepthInstance3DBoxes',
'BaseInstance3DBoxes'
] ]
...@@ -466,8 +466,8 @@ def get_frustum(bbox_image, C, near_clip=0.001, far_clip=100): ...@@ -466,8 +466,8 @@ def get_frustum(bbox_image, C, near_clip=0.001, far_clip=100):
def surface_equ_3d(polygon_surfaces): def surface_equ_3d(polygon_surfaces):
# return [a, b, c], d in ax+by+cz+d=0 # return [a, b, c], d in ax+by+cz+d=0
# polygon_surfaces: [num_polygon, num_surfaces, num_points_of_polygon, 3] # polygon_surfaces: [num_polygon, num_surfaces, num_points_of_polygon, 3]
surface_vec = polygon_surfaces[:, :, :2, :] - polygon_surfaces[:, :, surface_vec = polygon_surfaces[:, :, :2, :] - \
1:3, :] polygon_surfaces[:, :, 1:3, :]
# normal_vec: [..., 3] # normal_vec: [..., 3]
normal_vec = np.cross(surface_vec[:, :, 0, :], surface_vec[:, :, 1, :]) normal_vec = np.cross(surface_vec[:, :, 0, :], surface_vec[:, :, 1, :])
# print(normal_vec.shape, points[..., 0, :].shape) # print(normal_vec.shape, points[..., 0, :].shape)
......
from .base_box3d import BaseInstance3DBoxes
from .box_3d_mode import Box3DMode from .box_3d_mode import Box3DMode
from .cam_box3d import CameraInstance3DBoxes from .cam_box3d import CameraInstance3DBoxes
from .depth_box3d import DepthInstance3DBoxes
from .lidar_box3d import LiDARInstance3DBoxes from .lidar_box3d import LiDARInstance3DBoxes
__all__ = ['Box3DMode', 'LiDARInstance3DBoxes', 'CameraInstance3DBoxes'] __all__ = [
'Box3DMode', 'BaseInstance3DBoxes', 'LiDARInstance3DBoxes',
'CameraInstance3DBoxes', 'DepthInstance3DBoxes'
]
...@@ -10,13 +10,24 @@ from .utils import limit_period, xywhr2xyxyr ...@@ -10,13 +10,24 @@ from .utils import limit_period, xywhr2xyxyr
class BaseInstance3DBoxes(object): class BaseInstance3DBoxes(object):
"""Base class for 3D Boxes """Base class for 3D Boxes
Note:
The box is bottom centered, i.e. the relative position of origin in
the box is [0.5, 0.5, 0].
Args: Args:
tensor (torch.Tensor | np.ndarray): a Nxbox_dim matrix. tensor (torch.Tensor | np.ndarray | list): a Nxbox_dim matrix.
box_dim (int): number of the dimension of a box box_dim (int): number of the dimension of a box
Each row is (x, y, z, x_size, y_size, z_size, yaw). Each row is (x, y, z, x_size, y_size, z_size, yaw).
Default to 7.
with_yaw (bool): Whether the box is with yaw rotation.
If False, the value of yaw will be set to 0 as minmax boxes.
Default to True.
origin (tuple): The relative position of origin in the box.
Default to [0.5, 0.5, 0]. This will guide the box be converted to
[0.5, 0.5, 0] mode.
""" """
def __init__(self, tensor, box_dim=7): def __init__(self, tensor, box_dim=7, with_yaw=True, origin=[0.5, 0.5, 0]):
if isinstance(tensor, torch.Tensor): if isinstance(tensor, torch.Tensor):
device = tensor.device device = tensor.device
else: else:
...@@ -28,9 +39,22 @@ class BaseInstance3DBoxes(object): ...@@ -28,9 +39,22 @@ class BaseInstance3DBoxes(object):
tensor = tensor.reshape((0, box_dim)).to( tensor = tensor.reshape((0, box_dim)).to(
dtype=torch.float32, device=device) dtype=torch.float32, device=device)
assert tensor.dim() == 2 and tensor.size(-1) == box_dim, tensor.size() assert tensor.dim() == 2 and tensor.size(-1) == box_dim, tensor.size()
self.box_dim = box_dim
if not with_yaw and tensor.shape[-1] == 6:
assert box_dim == 6
fake_rot = tensor.new_zeros(tensor.shape[0], 1)
tensor = torch.cat((tensor, fake_rot), dim=-1)
self.box_dim = box_dim + 1
else:
self.box_dim = box_dim
self.with_yaw = with_yaw
self.tensor = tensor self.tensor = tensor
if origin != [0.5, 0.5, 0]:
dst = self.tensor.new_tensor([0.5, 0.5, 0])
src = self.tensor.new_tensor(origin)
self.tensor[:, :3] += self.tensor[:, 3:6] * (dst - src)
@property @property
def volume(self): def volume(self):
"""Computes the volume of all the boxes. """Computes the volume of all the boxes.
...@@ -51,12 +75,21 @@ class BaseInstance3DBoxes(object): ...@@ -51,12 +75,21 @@ class BaseInstance3DBoxes(object):
""" """
return self.tensor[:, 3:6] return self.tensor[:, 3:6]
@property
def yaw(self):
"""Obtain the rotation of all the boxes.
Returns:
torch.Tensor: a vector with yaw of each box.
"""
return self.tensor[:, 6]
@property @property
def height(self): def height(self):
"""Obtain the height of all the boxes. """Obtain the height of all the boxes.
Returns: Returns:
torch.Tensor: a vector with volume of each box. torch.Tensor: a vector with height of each box.
""" """
return self.tensor[:, 5] return self.tensor[:, 5]
...@@ -135,8 +168,8 @@ class BaseInstance3DBoxes(object): ...@@ -135,8 +168,8 @@ class BaseInstance3DBoxes(object):
pass pass
@abstractmethod @abstractmethod
def flip(self): def flip(self, bev_direction='horizontal'):
"""Flip the boxes in horizontal direction """Flip the boxes in BEV along given BEV direction
""" """
pass pass
...@@ -184,8 +217,26 @@ class BaseInstance3DBoxes(object): ...@@ -184,8 +217,26 @@ class BaseInstance3DBoxes(object):
(x_min, y_min, x_max, y_max) (x_min, y_min, x_max, y_max)
Returns: Returns:
a binary vector, indicating whether each box is inside torch.Tensor: Indicating whether each box is inside
the reference range. the reference range.
"""
pass
@abstractmethod
def convert_to(self, dst, rt_mat=None):
"""Convert self to `dst` mode.
Args:
dst (BoxMode): the target Box mode
rt_mat (np.ndarray | torch.Tensor): The rotation and translation
matrix between different coordinates. Defaults to None.
The conversion from `src` coordinates to `dst` coordinates
usually comes along the change of sensors, e.g., from camera
to LiDAR. This requires a transformation matrix.
Returns:
BaseInstance3DBoxes:
The converted box of the same type in the `dst` mode.
""" """
pass pass
...@@ -193,8 +244,7 @@ class BaseInstance3DBoxes(object): ...@@ -193,8 +244,7 @@ class BaseInstance3DBoxes(object):
"""Scale the box with horizontal and vertical scaling factors """Scale the box with horizontal and vertical scaling factors
Args: Args:
scale_factors (float): scale_factors (float): scale factors to scale the boxes.
scale factors to scale the boxes.
""" """
self.tensor[:, :6] *= scale_factor self.tensor[:, :6] *= scale_factor
self.tensor[:, 7:] *= scale_factor self.tensor[:, 7:] *= scale_factor
...@@ -218,9 +268,8 @@ class BaseInstance3DBoxes(object): ...@@ -218,9 +268,8 @@ class BaseInstance3DBoxes(object):
threshold (float): the threshold of minimal sizes threshold (float): the threshold of minimal sizes
Returns: Returns:
Tensor: torch.Tensor: a binary vector which represents whether each
a binary vector which represents whether each box is empty box is empty (False) or non-empty (True).
(False) or non-empty (True).
""" """
box = self.tensor box = self.tensor
size_x = box[..., 3] size_x = box[..., 3]
...@@ -245,15 +294,19 @@ class BaseInstance3DBoxes(object): ...@@ -245,15 +294,19 @@ class BaseInstance3DBoxes(object):
subject to Pytorch's indexing semantics. subject to Pytorch's indexing semantics.
Returns: Returns:
Boxes: Create a new :class:`Boxes` by indexing. BaseInstance3DBoxes: Create a new :class:`BaseInstance3DBoxes`
by indexing.
""" """
original_type = type(self) original_type = type(self)
if isinstance(item, int): if isinstance(item, int):
return original_type(self.tensor[item].view(1, -1)) return original_type(
self.tensor[item].view(1, -1),
box_dim=self.box_dim,
with_yaw=self.with_yaw)
b = self.tensor[item] b = self.tensor[item]
assert b.dim() == 2, \ assert b.dim() == 2, \
f'Indexing on Boxes with {item} failed to return a matrix!' f'Indexing on Boxes with {item} failed to return a matrix!'
return original_type(b) return original_type(b, box_dim=self.box_dim, with_yaw=self.with_yaw)
def __len__(self): def __len__(self):
return self.tensor.shape[0] return self.tensor.shape[0]
...@@ -283,24 +336,30 @@ class BaseInstance3DBoxes(object): ...@@ -283,24 +336,30 @@ class BaseInstance3DBoxes(object):
def to(self, device): def to(self, device):
original_type = type(self) original_type = type(self)
return original_type(self.tensor.to(device)) return original_type(
self.tensor.to(device),
box_dim=self.box_dim,
with_yaw=self.with_yaw)
def clone(self): def clone(self):
"""Clone the Boxes. """Clone the Boxes.
Returns: Returns:
Boxes BaseInstance3DBoxes: Box object with the same properties as self.
""" """
original_type = type(self) original_type = type(self)
return original_type(self.tensor.clone()) return original_type(
self.tensor.clone(), box_dim=self.box_dim, with_yaw=self.with_yaw)
@property @property
def device(self): def device(self):
return self.tensor.device return self.tensor.device
def __iter__(self): def __iter__(self):
""" """Yield a box as a Tensor of shape (4,) at a time.
Yield a box as a Tensor of shape (4,) at a time.
Returns:
torch.Tensor: a box of shape (4,).
""" """
yield from self.tensor yield from self.tensor
...@@ -387,3 +446,23 @@ class BaseInstance3DBoxes(object): ...@@ -387,3 +446,23 @@ class BaseInstance3DBoxes(object):
iou3d = overlaps_3d / torch.clamp(volume1, min=1e-8) iou3d = overlaps_3d / torch.clamp(volume1, min=1e-8)
return iou3d return iou3d
def new_box(self, data):
"""Create a new box object with data.
The new box and its tensor has the similar properties
as self and self.tensor, respectively.
Args:
data (torch.Tensor | numpy.array | list): Data which the
returned Tensor copies.
Returns:
BaseInstance3DBoxes: A new bbox with data and other
properties are similar to self.
"""
new_tensor = self.tensor.new_tensor(data) \
if not isinstance(data, torch.Tensor) else data.to(self.device)
original_type = type(self)
return original_type(
new_tensor, box_dim=self.box_dim, with_yaw=self.with_yaw)
...@@ -5,6 +5,7 @@ import torch ...@@ -5,6 +5,7 @@ import torch
from .base_box3d import BaseInstance3DBoxes from .base_box3d import BaseInstance3DBoxes
from .cam_box3d import CameraInstance3DBoxes from .cam_box3d import CameraInstance3DBoxes
from .depth_box3d import DepthInstance3DBoxes
from .lidar_box3d import LiDARInstance3DBoxes from .lidar_box3d import LiDARInstance3DBoxes
...@@ -61,7 +62,8 @@ class Box3DMode(IntEnum): ...@@ -61,7 +62,8 @@ class Box3DMode(IntEnum):
"""Convert boxes from `src` mode to `dst` mode. """Convert boxes from `src` mode to `dst` mode.
Args: Args:
box (tuple | list | np.ndarray | torch.Tensor): box (tuple | list | np.ndarray |
torch.Tensor | BaseInstance3DBoxes):
can be a k-tuple, k-list or an Nxk array/tensor, where k = 7 can be a k-tuple, k-list or an Nxk array/tensor, where k = 7
src (BoxMode): the src Box mode src (BoxMode): the src Box mode
dst (BoxMode): the target Box mode dst (BoxMode): the target Box mode
...@@ -72,7 +74,7 @@ class Box3DMode(IntEnum): ...@@ -72,7 +74,7 @@ class Box3DMode(IntEnum):
to LiDAR. This requires a transformation matrix. to LiDAR. This requires a transformation matrix.
Returns: Returns:
(tuple | list | np.ndarray | torch.Tensor): (tuple | list | np.ndarray | torch.Tensor | BaseInstance3DBoxes):
The converted box of the same type. The converted box of the same type.
""" """
if src == dst: if src == dst:
...@@ -113,6 +115,14 @@ class Box3DMode(IntEnum): ...@@ -113,6 +115,14 @@ class Box3DMode(IntEnum):
if rt_mat is None: if rt_mat is None:
rt_mat = arr.new_tensor([[1, 0, 0], [0, 0, -1], [0, 1, 0]]) rt_mat = arr.new_tensor([[1, 0, 0], [0, 0, -1], [0, 1, 0]])
xyz_size = torch.cat([x_size, z_size, y_size], dim=-1) xyz_size = torch.cat([x_size, z_size, y_size], dim=-1)
elif src == Box3DMode.LIDAR and dst == Box3DMode.DEPTH:
if rt_mat is None:
rt_mat = arr.new_tensor([[0, -1, 0], [1, 0, 0], [0, 0, 1]])
xyz_size = torch.cat([y_size, x_size, z_size], dim=-1)
elif src == Box3DMode.DEPTH and dst == Box3DMode.LIDAR:
if rt_mat is None:
rt_mat = arr.new_tensor([[0, 1, 0], [-1, 0, 0], [0, 0, 1]])
xyz_size = torch.cat([y_size, x_size, z_size], dim=-1)
else: else:
raise NotImplementedError( raise NotImplementedError(
f'Conversion from Box3DMode {src} to {dst} ' f'Conversion from Box3DMode {src} to {dst} '
...@@ -141,10 +151,13 @@ class Box3DMode(IntEnum): ...@@ -141,10 +151,13 @@ class Box3DMode(IntEnum):
target_type = CameraInstance3DBoxes target_type = CameraInstance3DBoxes
elif dst == Box3DMode.LIDAR: elif dst == Box3DMode.LIDAR:
target_type = LiDARInstance3DBoxes target_type = LiDARInstance3DBoxes
elif dst == Box3DMode.DEPTH:
target_type = DepthInstance3DBoxes
else: else:
raise NotImplementedError( raise NotImplementedError(
f'Conversion to {dst} through {original_type}' f'Conversion to {dst} through {original_type}'
' is not supported yet') ' is not supported yet')
return target_type(arr, box_dim=arr.size(-1)) return target_type(
arr, box_dim=arr.size(-1), with_yaw=box.with_yaw)
else: else:
return arr return arr
Markdown is supported
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment