Commit f27d308f authored by yinchimaoliang's avatar yinchimaoliang
Browse files

merge master

parents c66ae813 27ebcfac
...@@ -3,6 +3,6 @@ line_length = 79 ...@@ -3,6 +3,6 @@ line_length = 79
multi_line_output = 0 multi_line_output = 0
known_standard_library = setuptools known_standard_library = setuptools
known_first_party = mmdet,mmdet3d known_first_party = mmdet,mmdet3d
known_third_party = cv2,mmcv,numba,numpy,nuscenes,plyfile,pycocotools,pyquaternion,pytest,scipy,shapely,six,skimage,torch,torchvision known_third_party = cv2,mmcv,numba,numpy,nuscenes,plyfile,pycocotools,pyquaternion,pytest,scipy,shapely,six,skimage,terminaltables,torch,torchvision
no_lines_before = STDLIB,LOCALFOLDER no_lines_before = STDLIB,LOCALFOLDER
default_section = THIRDPARTY default_section = THIRDPARTY
...@@ -15,6 +15,7 @@ repos: ...@@ -15,6 +15,7 @@ repos:
rev: v0.30.0 rev: v0.30.0
hooks: hooks:
- id: yapf - id: yapf
- repo: https://github.com/pre-commit/pre-commit-hooks - repo: https://github.com/pre-commit/pre-commit-hooks
rev: v2.5.0 rev: v2.5.0
hooks: hooks:
......
# model settings
voxel_size = [0.25, 0.25, 8]
point_cloud_range = [-50, -50, -5, 50, 50, 3]
class_names = [
'car', 'truck', 'trailer', 'bus', 'construction_vehicle', 'bicycle',
'motorcycle', 'pedestrian', 'traffic_cone', 'barrier'
]
model = dict(
type='MVXFasterRCNNV2',
pts_voxel_layer=dict(
max_num_points=64, # max_points_per_voxel
point_cloud_range=point_cloud_range, # velodyne coordinates, x, y, z
voxel_size=voxel_size,
max_voxels=(30000, 40000), # (training, testing) max_coxels
),
pts_voxel_encoder=dict(
type='HardVFE',
num_input_features=4,
num_filters=[64, 64],
with_distance=False,
voxel_size=voxel_size,
with_cluster_center=True,
with_voxel_center=True,
point_cloud_range=point_cloud_range,
norm_cfg=dict(type='naiveSyncBN1d', eps=1e-3, momentum=0.01)),
pts_middle_encoder=dict(
type='PointPillarsScatter',
in_channels=64,
output_shape=[400, 400], # checked from PointCloud3D
),
pts_backbone=dict(
type='SECOND',
in_channels=64,
norm_cfg=dict(type='naiveSyncBN2d', eps=1e-3, momentum=0.01),
layer_nums=[3, 5, 5],
layer_strides=[2, 2, 2],
out_channels=[64, 128, 256],
),
pts_neck=dict(
type='SECONDFPN',
norm_cfg=dict(type='naiveSyncBN2d', eps=1e-3, momentum=0.01),
in_channels=[64, 128, 256],
upsample_strides=[1, 2, 4],
out_channels=[128, 128, 128],
),
pts_bbox_head=dict(
type='Anchor3DHead',
num_classes=10,
in_channels=384,
feat_channels=384,
use_direction_classifier=True,
anchor_generator=dict(
type='Anchor3DRangeGenerator',
ranges=[
[-49.6, -49.6, -1.80032795, 49.6, 49.6, -1.80032795],
[-49.6, -49.6, -1.74440365, 49.6, 49.6, -1.74440365],
[-49.6, -49.6, -1.68526504, 49.6, 49.6, -1.68526504],
[-49.6, -49.6, -1.67339111, 49.6, 49.6, -1.67339111],
[-49.6, -49.6, -1.61785072, 49.6, 49.6, -1.61785072],
[-49.6, -49.6, -1.80984986, 49.6, 49.6, -1.80984986],
[-49.6, -49.6, -1.763965, 49.6, 49.6, -1.763965],
],
sizes=[
[1.95017717, 4.60718145, 1.72270761], # car
[2.4560939, 6.73778078, 2.73004906], # truck
[2.87427237, 12.01320693, 3.81509561], # trailer
[0.60058911, 1.68452161, 1.27192197], # bicycle
[0.66344886, 0.7256437, 1.75748069], # pedestrian
[0.39694519, 0.40359262, 1.06232151], # traffic_cone
[2.49008838, 0.48578221, 0.98297065], # barrier
],
custom_values=[0, 0],
rotations=[0, 1.57],
reshape_out=True),
assigner_per_size=False,
diff_rad_by_sin=True,
dir_offset=0.7854, # pi/4
dir_limit_offset=0,
bbox_coder=dict(type='DeltaXYZWLHRBBoxCoder', code_size=9),
loss_cls=dict(
type='FocalLoss',
use_sigmoid=True,
gamma=2.0,
alpha=0.25,
loss_weight=1.0),
loss_bbox=dict(type='SmoothL1Loss', beta=1.0 / 9.0, loss_weight=1.0),
loss_dir=dict(
type='CrossEntropyLoss', use_sigmoid=False, loss_weight=0.2)))
# model training and testing settings
train_cfg = dict(
pts=dict(
assigner=dict( # for Car
type='MaxIoUAssigner',
iou_calculator=dict(type='BboxOverlapsNearest3D'),
pos_iou_thr=0.6,
neg_iou_thr=0.3,
min_pos_iou=0.3,
ignore_iof_thr=-1),
allowed_border=0,
code_weight=[1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 0.2, 0.2],
pos_weight=-1,
debug=False))
test_cfg = dict(
pts=dict(
use_rotate_nms=True,
nms_across_levels=False,
nms_pre=1000,
nms_thr=0.2,
score_thr=0.05,
min_bbox_size=0,
max_num=500
# soft-nms is also supported for rcnn testing
# e.g., nms=dict(type='soft_nms', iou_thr=0.5, min_score=0.05)
))
# dataset settings
dataset_type = 'NuScenesDataset'
data_root = 'data/nuscenes/'
img_norm_cfg = dict(
mean=[103.530, 116.280, 123.675], std=[1.0, 1.0, 1.0], to_rgb=False)
input_modality = dict(
use_lidar=True,
use_depth=False,
use_lidar_intensity=True,
use_camera=False,
)
db_sampler = dict(
data_root=data_root,
info_path=data_root + 'nuscenes_dbinfos_train.pkl',
rate=1.0,
object_rot_range=[0.0, 0.0],
prepare=dict(),
classes=class_names,
sample_groups=dict(
bus=4,
trailer=4,
truck=4,
))
file_client_args = dict(
backend='petrel',
path_mapping=dict({
'./data/nuscenes/': 's3://nuscenes/nuscenes/',
'data/nuscenes/': 's3://nuscenes/nuscenes/'
}))
train_pipeline = [
dict(
type='LoadPointsFromFile',
load_dim=5,
use_dim=5,
file_client_args=file_client_args),
dict(
type='LoadPointsFromMultiSweeps',
sweeps_num=10,
file_client_args=file_client_args),
dict(type='LoadAnnotations3D', with_bbox_3d=True, with_label_3d=True),
dict(
type='GlobalRotScale',
rot_uniform_noise=[-0.3925, 0.3925],
scaling_uniform_noise=[0.95, 1.05],
trans_normal_noise=[0, 0, 0]),
dict(type='RandomFlip3D', flip_ratio=0.5),
dict(type='PointsRangeFilter', point_cloud_range=point_cloud_range),
dict(type='ObjectRangeFilter', point_cloud_range=point_cloud_range),
dict(type='PointShuffle'),
dict(type='DefaultFormatBundle3D', class_names=class_names),
dict(type='Collect3D', keys=['points', 'gt_bboxes_3d', 'gt_labels_3d']),
]
test_pipeline = [
dict(
type='LoadPointsFromFile',
load_dim=5,
use_dim=5,
file_client_args=file_client_args),
dict(
type='LoadPointsFromMultiSweeps',
sweeps_num=10,
file_client_args=file_client_args),
dict(type='PointsRangeFilter', point_cloud_range=point_cloud_range),
dict(type='RandomFlip3D', flip_ratio=0),
dict(
type='DefaultFormatBundle3D',
class_names=class_names,
with_label=False),
dict(type='Collect3D', keys=['points']),
]
data = dict(
samples_per_gpu=4,
workers_per_gpu=4,
train=dict(
type=dataset_type,
data_root=data_root,
ann_file=data_root + 'nuscenes_infos_train.pkl',
pipeline=train_pipeline,
modality=input_modality,
classes=class_names,
test_mode=False),
val=dict(
type=dataset_type,
data_root=data_root,
ann_file=data_root + 'nuscenes_infos_val.pkl',
pipeline=test_pipeline,
modality=input_modality,
classes=class_names,
test_mode=True),
test=dict(
type=dataset_type,
data_root=data_root,
ann_file=data_root + 'nuscenes_infos_val.pkl',
pipeline=test_pipeline,
modality=input_modality,
classes=class_names,
test_mode=True))
# optimizer
optimizer = dict(type='AdamW', lr=0.001, weight_decay=0.01)
# max_norm=10 is better for SECOND
optimizer_config = dict(grad_clip=dict(max_norm=35, norm_type=2))
lr_config = dict(
policy='step',
warmup='linear',
warmup_iters=1000,
warmup_ratio=1.0 / 1000,
step=[20, 23])
momentum_config = None
checkpoint_config = dict(interval=1)
# yapf:disable
evaluation = dict(interval=24)
log_config = dict(
interval=50,
hooks=[
dict(type='TextLoggerHook'),
dict(type='TensorboardLoggerHook')
])
# yapf:enable
# runtime settings
total_epochs = 24
dist_params = dict(backend='nccl')
log_level = 'INFO'
work_dir = './work_dirs/hv_pointpillars_secfpn_sbn-all_4x8_2x_nus-3d'
load_from = None
resume_from = None
workflow = [('train', 1)]
# model settings
voxel_size = [0.05, 0.05, 0.1]
point_cloud_range = [0, -40, -3, 70.4, 40, 1] # velodyne coordinates, x, y, z
model = dict(
type='VoxelNet',
voxel_layer=dict(
max_num_points=5, # max_points_per_voxel
point_cloud_range=point_cloud_range,
voxel_size=voxel_size,
max_voxels=(16000, 40000), # (training, testing) max_coxels
),
voxel_encoder=dict(
type='VoxelFeatureExtractorV3',
num_input_features=4,
num_filters=[4],
with_distance=False),
middle_encoder=dict(
type='SparseEncoder',
in_channels=4,
sparse_shape=[41, 1600, 1408],
order=('conv', 'norm', 'act')),
backbone=dict(
type='SECOND',
in_channels=256,
layer_nums=[5, 5],
layer_strides=[1, 2],
out_channels=[128, 256],
),
neck=dict(
type='SECONDFPN',
in_channels=[128, 256],
upsample_strides=[1, 2],
out_channels=[256, 256],
),
bbox_head=dict(
type='Anchor3DHead',
num_classes=1,
in_channels=512,
feat_channels=512,
use_direction_classifier=True,
anchor_generator=dict(
type='Anchor3DRangeGenerator',
ranges=[[0, -40.0, -1.78, 70.4, 40.0, -1.78]],
sizes=[[1.6, 3.9, 1.56]],
rotations=[0, 1.57],
reshape_out=True),
diff_rad_by_sin=True,
bbox_coder=dict(type='DeltaXYZWLHRBBoxCoder'),
loss_cls=dict(
type='FocalLoss',
use_sigmoid=True,
gamma=2.0,
alpha=0.25,
loss_weight=1.0),
loss_bbox=dict(type='SmoothL1Loss', beta=1.0 / 9.0, loss_weight=2.0),
loss_dir=dict(
type='CrossEntropyLoss', use_sigmoid=False, loss_weight=0.2),
),
)
# model training and testing settings
train_cfg = dict(
assigner=dict(
type='MaxIoUAssigner',
iou_calculator=dict(type='BboxOverlapsNearest3D'),
pos_iou_thr=0.6,
neg_iou_thr=0.45,
min_pos_iou=0.45,
ignore_iof_thr=-1),
allowed_border=0,
pos_weight=-1,
debug=False)
test_cfg = dict(
use_rotate_nms=True,
nms_across_levels=False,
nms_thr=0.01,
score_thr=0.3,
min_bbox_size=0,
nms_pre=100,
max_num=50)
# dataset settings
dataset_type = 'KittiDataset'
data_root = 'data/kitti/'
class_names = ['Car']
img_norm_cfg = dict(
mean=[123.675, 116.28, 103.53], std=[58.395, 57.12, 57.375], to_rgb=True)
input_modality = dict(
use_lidar=False,
use_lidar_reduced=True,
use_depth=False,
use_lidar_intensity=True,
use_camera=False,
)
db_sampler = dict(
data_root=data_root,
info_path=data_root + 'kitti_dbinfos_train.pkl',
rate=1.0,
object_rot_range=[0.0, 0.0],
prepare=dict(
filter_by_difficulty=[-1],
filter_by_min_points=dict(Car=5),
),
classes=class_names,
sample_groups=dict(Car=15),
)
file_client_args = dict(
backend='petrel', path_mapping=dict(data='s3://kitti_data/'))
train_pipeline = [
dict(
type='LoadPointsFromFile',
load_dim=4,
use_dim=4,
file_client_args=file_client_args),
dict(
type='LoadAnnotations3D',
with_bbox_3d=True,
with_label_3d=True,
file_client_args=file_client_args),
dict(type='ObjectSample', db_sampler=db_sampler),
dict(
type='ObjectNoise',
num_try=100,
loc_noise_std=[1.0, 1.0, 0.5],
global_rot_range=[0.0, 0.0],
rot_uniform_noise=[-0.78539816, 0.78539816]),
dict(type='RandomFlip3D', flip_ratio=0.5),
dict(
type='GlobalRotScale',
rot_uniform_noise=[-0.78539816, 0.78539816],
scaling_uniform_noise=[0.95, 1.05]),
dict(type='PointsRangeFilter', point_cloud_range=point_cloud_range),
dict(type='ObjectRangeFilter', point_cloud_range=point_cloud_range),
dict(type='PointShuffle'),
dict(type='DefaultFormatBundle3D', class_names=class_names),
dict(type='Collect3D', keys=['points', 'gt_bboxes_3d', 'gt_labels_3d']),
]
test_pipeline = [
dict(
type='LoadPointsFromFile',
load_dim=4,
use_dim=4,
file_client_args=file_client_args),
dict(type='PointsRangeFilter', point_cloud_range=point_cloud_range),
dict(
type='DefaultFormatBundle3D',
class_names=class_names,
with_label=False),
dict(type='Collect3D', keys=['points']),
]
data = dict(
samples_per_gpu=6,
workers_per_gpu=4,
train=dict(
type=dataset_type,
data_root=data_root,
ann_file=data_root + 'kitti_infos_train.pkl',
split='training',
pts_prefix='velodyne_reduced',
pipeline=train_pipeline,
modality=input_modality,
classes=class_names,
test_mode=False),
val=dict(
type=dataset_type,
data_root=data_root,
ann_file=data_root + 'kitti_infos_val.pkl',
split='training',
pts_prefix='velodyne_reduced',
pipeline=test_pipeline,
modality=input_modality,
classes=class_names,
test_mode=True),
test=dict(
type=dataset_type,
data_root=data_root,
ann_file=data_root + 'kitti_infos_val.pkl',
split='training',
pts_prefix='velodyne_reduced',
pipeline=test_pipeline,
modality=input_modality,
classes=class_names,
test_mode=True))
# optimizer
lr = 0.0018 # max learning rate
optimizer = dict(type='AdamW', lr=lr, betas=(0.95, 0.99), weight_decay=0.01)
optimizer_config = dict(grad_clip=dict(max_norm=10, norm_type=2))
lr_config = dict(
policy='cyclic',
target_ratio=(10, 1e-4),
cyclic_times=1,
step_ratio_up=0.4,
)
momentum_config = dict(
policy='cyclic',
target_ratio=(0.85 / 0.95, 1),
cyclic_times=1,
step_ratio_up=0.4,
)
checkpoint_config = dict(interval=1)
# yapf:disable
log_config = dict(
interval=50,
hooks=[
dict(type='TextLoggerHook'),
dict(type='TensorboardLoggerHook')
])
# yapf:enable
# runtime settings
total_epochs = 80
dist_params = dict(backend='nccl')
log_level = 'INFO'
work_dir = './work_dirs/sec_secfpn_80e'
load_from = None
resume_from = None
workflow = [('train', 1)]
...@@ -255,13 +255,11 @@ optimizer = dict(type='AdamW', lr=0.003, betas=(0.95, 0.99), weight_decay=0.01) ...@@ -255,13 +255,11 @@ optimizer = dict(type='AdamW', lr=0.003, betas=(0.95, 0.99), weight_decay=0.01)
# max_norm=10 is better for SECOND # max_norm=10 is better for SECOND
optimizer_config = dict(grad_clip=dict(max_norm=35, norm_type=2)) optimizer_config = dict(grad_clip=dict(max_norm=35, norm_type=2))
lr_config = dict( lr_config = dict(
policy='cosine', policy='CosineAnealing',
warmup='linear', warmup='linear',
warmup_iters=1000, warmup_iters=1000,
warmup_ratio=1.0 / 10, warmup_ratio=1.0 / 10,
target_lr=1e-5, min_lr_ratio=1e-5)
as_ratio=True,
)
momentum_config = None momentum_config = None
checkpoint_config = dict(interval=1) checkpoint_config = dict(interval=1)
# yapf:disable # yapf:disable
......
...@@ -207,13 +207,11 @@ optimizer = dict( ...@@ -207,13 +207,11 @@ optimizer = dict(
weight_decay=0.001) weight_decay=0.001)
optimizer_config = dict(grad_clip=dict(max_norm=10, norm_type=2)) optimizer_config = dict(grad_clip=dict(max_norm=10, norm_type=2))
lr_config = dict( lr_config = dict(
policy='cosine', policy='CosineAnealing',
warmup='linear', warmup='linear',
warmup_iters=1000, warmup_iters=1000,
warmup_ratio=1.0 / 10, warmup_ratio=1.0 / 10,
target_lr=1e-5, min_lr_ratio=1e-5)
as_ratio=True,
)
momentum_config = None momentum_config = None
checkpoint_config = dict(interval=1) checkpoint_config = dict(interval=1)
# yapf:disable # yapf:disable
......
# model settings
model = dict(
type='VoteNet',
backbone=dict(
type='PointNet2SASSG',
in_channels=4,
num_points=(2048, 1024, 512, 256),
radius=(0.2, 0.4, 0.8, 1.2),
num_samples=(64, 32, 16, 16),
sa_channels=((64, 64, 128), (128, 128, 256), (128, 128, 256),
(128, 128, 256)),
fp_channels=((256, 256), (256, 256)),
norm_cfg=dict(type='BN2d'),
pool_mod='max'),
bbox_head=dict(
type='VoteHead',
num_classes=18,
bbox_coder=dict(
type='PartialBinBasedBBoxCoder',
num_sizes=18,
num_dir_bins=1,
with_rot=False,
mean_sizes=[[0.76966727, 0.8116021, 0.92573744],
[1.876858, 1.8425595, 1.1931566],
[0.61328, 0.6148609, 0.7182701],
[1.3955007, 1.5121545, 0.83443564],
[0.97949594, 1.0675149, 0.6329687],
[0.531663, 0.5955577, 1.7500148],
[0.9624706, 0.72462326, 1.1481868],
[0.83221924, 1.0490936, 1.6875663],
[0.21132214, 0.4206159, 0.5372846],
[1.4440073, 1.8970833, 0.26985747],
[1.0294262, 1.4040797, 0.87554324],
[1.3766412, 0.65521795, 1.6813129],
[0.6650819, 0.71111923, 1.298853],
[0.41999173, 0.37906948, 1.7513971],
[0.59359556, 0.5912492, 0.73919016],
[0.50867593, 0.50656086, 0.30136237],
[1.1511526, 1.0546296, 0.49706793],
[0.47535285, 0.49249494, 0.5802117]]),
vote_moudule_cfg=dict(
in_channels=256,
vote_per_seed=1,
gt_per_seed=3,
conv_channels=(256, 256),
conv_cfg=dict(type='Conv1d'),
norm_cfg=dict(type='BN1d'),
norm_feats=True,
vote_loss=dict(
type='ChamferDistance',
mode='l1',
reduction='none',
loss_dst_weight=10.0)),
vote_aggregation_cfg=dict(
num_point=256,
radius=0.3,
num_sample=16,
mlp_channels=[256, 128, 128, 128],
use_xyz=True,
normalize_xyz=True),
feat_channels=(128, 128),
conv_cfg=dict(type='Conv1d'),
norm_cfg=dict(type='BN1d'),
objectness_loss=dict(
type='CrossEntropyLoss',
class_weight=[0.2, 0.8],
reduction='sum',
loss_weight=5.0),
center_loss=dict(
type='ChamferDistance',
mode='l2',
reduction='sum',
loss_src_weight=10.0,
loss_dst_weight=10.0),
dir_class_loss=dict(
type='CrossEntropyLoss', reduction='sum', loss_weight=1.0),
dir_res_loss=dict(
type='SmoothL1Loss', reduction='sum', loss_weight=10.0),
size_class_loss=dict(
type='CrossEntropyLoss', reduction='sum', loss_weight=1.0),
size_res_loss=dict(
type='SmoothL1Loss', reduction='sum', loss_weight=10.0 / 3.0),
semantic_loss=dict(
type='CrossEntropyLoss', reduction='sum', loss_weight=1.0)))
# model training and testing settings
train_cfg = dict(pos_distance_thr=0.3, neg_distance_thr=0.6, sample_mod='vote')
test_cfg = dict(
sample_mod='seed', nms_thr=0.25, score_thr=0.05, per_class_proposal=True)
# dataset settings
dataset_type = 'ScanNetDataset'
data_root = './data/scannet/'
class_names = ('cabinet', 'bed', 'chair', 'sofa', 'table', 'door', 'window',
'bookshelf', 'picture', 'counter', 'desk', 'curtain',
'refrigerator', 'showercurtrain', 'toilet', 'sink', 'bathtub',
'garbagebin')
train_pipeline = [
dict(
type='LoadPointsFromFile',
shift_height=True,
load_dim=6,
use_dim=[0, 1, 2]),
dict(
type='LoadAnnotations3D',
with_bbox_3d=True,
with_label_3d=True,
with_mask_3d=True,
with_seg_3d=True),
dict(
type='PointSegClassMapping',
valid_cat_ids=(3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 14, 16, 24, 28, 33, 34,
36, 39)),
dict(type='IndoorPointSample', num_points=40000),
dict(type='IndoorFlipData', flip_ratio_yz=0.5, flip_ratio_xz=0.5),
dict(
type='IndoorGlobalRotScale',
shift_height=True,
rot_range=[-1 / 36, 1 / 36],
scale_range=None),
dict(type='DefaultFormatBundle3D', class_names=class_names),
dict(
type='Collect3D',
keys=[
'points', 'gt_bboxes_3d', 'gt_labels_3d', 'pts_semantic_mask',
'pts_instance_mask'
])
]
test_pipeline = [
dict(
type='LoadPointsFromFile',
shift_height=True,
load_dim=6,
use_dim=[0, 1, 2]),
dict(type='IndoorPointSample', num_points=40000),
dict(type='DefaultFormatBundle3D', class_names=class_names),
dict(type='Collect3D', keys=['points'])
]
data = dict(
samples_per_gpu=8,
workers_per_gpu=4,
train=dict(
type='RepeatDataset',
times=5,
dataset=dict(
type=dataset_type,
data_root=data_root,
ann_file=data_root + 'scannet_infos_train.pkl',
pipeline=train_pipeline,
filter_empty_gt=False,
classes=class_names)),
val=dict(
type=dataset_type,
data_root=data_root,
ann_file=data_root + 'scannet_infos_val.pkl',
pipeline=test_pipeline,
classes=class_names,
test_mode=True),
test=dict(
type=dataset_type,
data_root=data_root,
ann_file=data_root + 'scannet_infos_val.pkl',
pipeline=test_pipeline,
classes=class_names,
test_mode=True))
# optimizer
lr = 0.008 # max learning rate
optimizer = dict(type='Adam', lr=lr)
optimizer_config = dict(grad_clip=dict(max_norm=10, norm_type=2))
lr_config = dict(policy='step', warmup=None, step=[24, 32])
checkpoint_config = dict(interval=1)
# yapf:disable
log_config = dict(
interval=10,
hooks=[
dict(type='TextLoggerHook'),
dict(type='TensorboardLoggerHook')
])
# yapf:enable
# runtime settings
total_epochs = 36
dist_params = dict(backend='nccl')
log_level = 'INFO'
find_unused_parameters = True
work_dir = './work_dirs/votenet_scannet'
load_from = None
resume_from = None
workflow = [('train', 1)]
# model settings
model = dict(
type='VoteNet',
backbone=dict(
type='PointNet2SASSG',
in_channels=4,
num_points=(2048, 1024, 512, 256),
radius=(0.2, 0.4, 0.8, 1.2),
num_samples=(64, 32, 16, 16),
sa_channels=((64, 64, 128), (128, 128, 256), (128, 128, 256),
(128, 128, 256)),
fp_channels=((256, 256), (256, 256)),
norm_cfg=dict(type='BN2d'),
pool_mod='max'),
bbox_head=dict(
type='VoteHead',
num_classes=10,
bbox_coder=dict(
type='PartialBinBasedBBoxCoder',
num_sizes=10,
num_dir_bins=12,
with_rot=True,
mean_sizes=[[2.114256, 1.620300, 0.927272],
[0.791118, 1.279516, 0.718182],
[0.923508, 1.867419, 0.845495],
[0.591958, 0.552978, 0.827272],
[0.699104, 0.454178, 0.75625],
[0.69519, 1.346299, 0.736364],
[0.528526, 1.002642, 1.172878],
[0.500618, 0.632163, 0.683424],
[0.404671, 1.071108, 1.688889],
[0.76584, 1.398258, 0.472728]]),
vote_moudule_cfg=dict(
in_channels=256,
vote_per_seed=1,
gt_per_seed=3,
conv_channels=(256, 256),
conv_cfg=dict(type='Conv1d'),
norm_cfg=dict(type='BN1d'),
norm_feats=True,
vote_loss=dict(
type='ChamferDistance',
mode='l1',
reduction='none',
loss_dst_weight=10.0)),
vote_aggregation_cfg=dict(
num_point=256,
radius=0.3,
num_sample=16,
mlp_channels=[256, 128, 128, 128],
use_xyz=True,
normalize_xyz=True),
feat_channels=(128, 128),
conv_cfg=dict(type='Conv1d'),
norm_cfg=dict(type='BN1d'),
objectness_loss=dict(
type='CrossEntropyLoss',
class_weight=[0.2, 0.8],
reduction='sum',
loss_weight=5.0),
center_loss=dict(
type='ChamferDistance',
mode='l2',
reduction='sum',
loss_src_weight=10.0,
loss_dst_weight=10.0),
dir_class_loss=dict(
type='CrossEntropyLoss', reduction='sum', loss_weight=1.0),
dir_res_loss=dict(
type='SmoothL1Loss', reduction='sum', loss_weight=10.0),
size_class_loss=dict(
type='CrossEntropyLoss', reduction='sum', loss_weight=1.0),
size_res_loss=dict(
type='SmoothL1Loss', reduction='sum', loss_weight=10.0 / 3.0),
semantic_loss=dict(
type='CrossEntropyLoss', reduction='sum', loss_weight=1.0)))
# model training and testing settings
train_cfg = dict(pos_distance_thr=0.3, neg_distance_thr=0.6, sample_mod='vote')
test_cfg = dict(
sample_mod='seed', nms_thr=0.25, score_thr=0.05, per_class_proposal=True)
# dataset settings
dataset_type = 'SUNRGBDDataset'
data_root = 'data/sunrgbd/'
class_names = ('bed', 'table', 'sofa', 'chair', 'toilet', 'desk', 'dresser',
'night_stand', 'bookshelf', 'bathtub')
train_pipeline = [
dict(
type='LoadPointsFromFile',
shift_height=True,
load_dim=6,
use_dim=[0, 1, 2]),
dict(type='LoadAnnotations3D'),
dict(type='IndoorFlipData', flip_ratio_yz=0.5),
dict(
type='IndoorGlobalRotScale',
shift_height=True,
rot_range=[-1 / 6, 1 / 6],
scale_range=[0.85, 1.15]),
dict(type='IndoorPointSample', num_points=20000),
dict(type='DefaultFormatBundle3D', class_names=class_names),
dict(type='Collect3D', keys=['points', 'gt_bboxes_3d', 'gt_labels_3d'])
]
test_pipeline = [
dict(
type='LoadPointsFromFile',
shift_height=True,
load_dim=6,
use_dim=[0, 1, 2]),
dict(type='IndoorPointSample', num_points=20000),
dict(type='DefaultFormatBundle3D', class_names=class_names),
dict(type='Collect3D', keys=['points'])
]
data = dict(
samples_per_gpu=16,
workers_per_gpu=4,
train=dict(
type='RepeatDataset',
times=5,
dataset=dict(
type=dataset_type,
data_root=data_root,
ann_file=data_root + 'sunrgbd_infos_train.pkl',
pipeline=train_pipeline,
classes=class_names,
filter_empty_gt=False)),
val=dict(
type=dataset_type,
data_root=data_root,
ann_file=data_root + 'sunrgbd_infos_val.pkl',
pipeline=test_pipeline,
classes=class_names,
test_mode=True),
test=dict(
type=dataset_type,
data_root=data_root,
ann_file=data_root + 'sunrgbd_infos_val.pkl',
pipeline=test_pipeline,
classes=class_names,
test_mode=True))
# optimizer
lr = 0.008 # max learning rate
optimizer = dict(type='Adam', lr=lr)
optimizer_config = dict(grad_clip=dict(max_norm=10, norm_type=2))
lr_config = dict(policy='step', warmup=None, step=[24, 32])
checkpoint_config = dict(interval=1)
# yapf:disable
log_config = dict(
interval=30,
hooks=[
dict(type='TextLoggerHook'),
dict(type='TensorboardLoggerHook')
])
# yapf:enable
# runtime settings
total_epochs = 36
dist_params = dict(backend='nccl')
log_level = 'INFO'
find_unused_parameters = True
work_dir = './work_dirs/votenet_sunrgbd'
load_from = None
resume_from = None
workflow = [('train', 1)]
...@@ -8,7 +8,9 @@ from .samplers import (BaseSampler, CombinedSampler, ...@@ -8,7 +8,9 @@ from .samplers import (BaseSampler, CombinedSampler,
InstanceBalancedPosSampler, IoUBalancedNegSampler, InstanceBalancedPosSampler, IoUBalancedNegSampler,
PseudoSampler, RandomSampler, SamplingResult) PseudoSampler, RandomSampler, SamplingResult)
from .structures import Box3DMode, CameraInstance3DBoxes, LiDARInstance3DBoxes from .structures import Box3DMode, CameraInstance3DBoxes, LiDARInstance3DBoxes
from .transforms import bbox3d2result, bbox3d2roi, boxes3d_to_bev_torch_lidar from .transforms import (bbox3d2result, bbox3d2roi,
box3d_to_corner3d_upright_depth,
boxes3d_to_bev_torch_lidar)
from .assign_sampling import ( # isort:skip, avoid recursive imports from .assign_sampling import ( # isort:skip, avoid recursive imports
build_bbox_coder, # temporally settings build_bbox_coder, # temporally settings
...@@ -22,5 +24,6 @@ __all__ = [ ...@@ -22,5 +24,6 @@ __all__ = [
'build_bbox_coder', 'DeltaXYZWLHRBBoxCoder', 'boxes3d_to_bev_torch_lidar', 'build_bbox_coder', 'DeltaXYZWLHRBBoxCoder', 'boxes3d_to_bev_torch_lidar',
'BboxOverlapsNearest3D', 'BboxOverlaps3D', 'bbox_overlaps_nearest_3d', 'BboxOverlapsNearest3D', 'BboxOverlaps3D', 'bbox_overlaps_nearest_3d',
'bbox_overlaps_3d', 'Box3DMode', 'LiDARInstance3DBoxes', 'bbox_overlaps_3d', 'Box3DMode', 'LiDARInstance3DBoxes',
'CameraInstance3DBoxes', 'bbox3d2roi', 'bbox3d2result' 'CameraInstance3DBoxes', 'bbox3d2roi', 'bbox3d2result',
'box3d_to_corner3d_upright_depth'
] ]
from mmdet.core.bbox import build_bbox_coder from mmdet.core.bbox import build_bbox_coder
from .delta_xyzwhlr_bbox_coder import DeltaXYZWLHRBBoxCoder from .delta_xyzwhlr_bbox_coder import DeltaXYZWLHRBBoxCoder
from .partial_bin_based_bbox_coder import PartialBinBasedBBoxCoder
__all__ = ['build_bbox_coder', 'DeltaXYZWLHRBBoxCoder'] __all__ = [
'build_bbox_coder', 'DeltaXYZWLHRBBoxCoder', 'PartialBinBasedBBoxCoder'
]
import numpy as np
import torch
from mmdet.core.bbox import BaseBBoxCoder
from mmdet.core.bbox.builder import BBOX_CODERS
@BBOX_CODERS.register_module()
class PartialBinBasedBBoxCoder(BaseBBoxCoder):
"""Partial bin based bbox coder
Args:
num_dir_bins (int): Number of bins to encode direction angle.
num_sizes (int): Number of size clusters.
mean_sizes (list[list[int]]): Mean size of bboxes in each class.
with_rot (bool): Whether the bbox is with rotation.
"""
def __init__(self, num_dir_bins, num_sizes, mean_sizes, with_rot=True):
super(PartialBinBasedBBoxCoder, self).__init__()
assert len(mean_sizes) == num_sizes
self.num_dir_bins = num_dir_bins
self.num_sizes = num_sizes
self.mean_sizes = mean_sizes
self.with_rot = with_rot
def encode(self, gt_bboxes_3d, gt_labels_3d):
"""Encode ground truth to prediction targets.
Args:
gt_bboxes_3d (Tensor): 3d gt bboxes with shape (n, 7).
gt_labels_3d (Tensor): Gt classes.
Returns:
tuple: Targets of center, size and direction.
"""
# generate center target
center_target = gt_bboxes_3d[..., 0:3]
# generate bbox size target
size_class_target = gt_labels_3d
size_res_target = gt_bboxes_3d[..., 3:6] - gt_bboxes_3d.new_tensor(
self.mean_sizes)[size_class_target]
# generate dir target
box_num = gt_bboxes_3d.shape[0]
if self.with_rot:
(dir_class_target,
dir_res_target) = self.angle2class(gt_bboxes_3d[..., 6])
else:
dir_class_target = gt_labels_3d.new_zeros(box_num)
dir_res_target = gt_bboxes_3d.new_zeros(box_num)
return (center_target, size_class_target, size_res_target,
dir_class_target, dir_res_target)
def decode(self, bbox_out):
"""Decode predicted parts to bbox3d.
Args:
bbox_out (dict): predictions from model, should contain keys below
- center: predicted bottom center of bboxes.
- dir_class: predicted bbox direction class.
- dir_res: predicted bbox direction residual.
- size_class: predicted bbox size class.
- size_res: predicted bbox size residual.
Returns:
Tensor: decoded bbox3d with shape (batch, n, 7)
"""
center = bbox_out['center']
batch_size, num_proposal = center.shape[:2]
# decode heading angle
if self.with_rot:
dir_class = torch.argmax(bbox_out['dir_class'], -1)
dir_res = torch.gather(bbox_out['dir_res'], 2,
dir_class.unsqueeze(-1))
dir_res.squeeze_(2)
dir_angle = self.class2angle(dir_class, dir_res).reshape(
batch_size, num_proposal, 1)
else:
dir_angle = center.new_zeros(batch_size, num_proposal, 1)
# decode bbox size
size_class = torch.argmax(bbox_out['size_class'], -1, keepdim=True)
size_res = torch.gather(bbox_out['size_res'], 2,
size_class.unsqueeze(-1).repeat(1, 1, 1, 3))
mean_sizes = center.new_tensor(self.mean_sizes)
size_base = torch.index_select(mean_sizes, 0, size_class.reshape(-1))
bbox_size = size_base.reshape(batch_size, num_proposal,
-1) + size_res.squeeze(2)
bbox3d = torch.cat([center, bbox_size, dir_angle], dim=-1)
return bbox3d
def split_pred(self, preds, base_xyz):
"""Split predicted features to specific parts.
Args:
preds (Tensor): predicted features to split.
base_xyz (Tensor): coordinates of points.
Returns:
dict: split results.
"""
results = {}
start, end = 0, 0
preds_trans = preds.transpose(2, 1)
# decode objectness score
end += 2
results['obj_scores'] = preds_trans[..., start:end]
start = end
# decode center
end += 3
# (batch_size, num_proposal, 3)
results['center'] = base_xyz + preds_trans[..., start:end]
start = end
# decode direction
end += self.num_dir_bins
results['dir_class'] = preds_trans[..., start:end]
start = end
end += self.num_dir_bins
dir_res_norm = preds_trans[..., start:end]
start = end
results['dir_res_norm'] = dir_res_norm
results['dir_res'] = dir_res_norm * (np.pi / self.num_dir_bins)
# decode size
end += self.num_sizes
results['size_class'] = preds_trans[..., start:end]
start = end
end += self.num_sizes * 3
size_res_norm = preds_trans[..., start:end]
batch_size, num_proposal = preds_trans.shape[:2]
size_res_norm = size_res_norm.view(
[batch_size, num_proposal, self.num_sizes, 3])
start = end
results['size_res_norm'] = size_res_norm
mean_sizes = preds.new_tensor(self.mean_sizes)
results['size_res'] = (
size_res_norm * mean_sizes.unsqueeze(0).unsqueeze(0))
# decode semantic score
results['sem_scores'] = preds_trans[..., start:]
return results
def angle2class(self, angle):
"""Convert continuous angle to a discrete class and a residual.
Convert continuous angle to a discrete class and a small
regression number from class center angle to current angle.
Args:
angle (Tensor): Angle is from 0-2pi (or -pi~pi), class center at
0, 1*(2pi/N), 2*(2pi/N) ... (N-1)*(2pi/N)
Returns:
tuple: Encoded discrete class and residual.
"""
angle = angle % (2 * np.pi)
angle_per_class = 2 * np.pi / float(self.num_dir_bins)
shifted_angle = (angle + angle_per_class / 2) % (2 * np.pi)
angle_cls = shifted_angle // angle_per_class
angle_res = shifted_angle - (
angle_cls * angle_per_class + angle_per_class / 2)
return angle_cls.long(), angle_res
def class2angle(self, angle_cls, angle_res, limit_period=True):
"""Inverse function to angle2class
Args:
angle_cls (Tensor): Angle class to decode.
angle_res (Tensor): Angle residual to decode.
limit_period (bool): Whether to limit angle to [-pi, pi].
Returns:
Tensor: angle decoded from angle_cls and angle_res.
"""
angle_per_class = 2 * np.pi / float(self.num_dir_bins)
angle_center = angle_cls.float() * angle_per_class
angle = angle_center + angle_res
if limit_period:
angle[angle > np.pi] -= 2 * np.pi
return angle
...@@ -84,3 +84,87 @@ def bbox3d2result(bboxes, scores, labels): ...@@ -84,3 +84,87 @@ def bbox3d2result(bboxes, scores, labels):
""" """
return dict( return dict(
boxes_3d=bboxes.cpu(), scores_3d=scores.cpu(), labels_3d=labels.cpu()) boxes_3d=bboxes.cpu(), scores_3d=scores.cpu(), labels_3d=labels.cpu())
def upright_depth_to_lidar_torch(points=None,
bboxes=None,
to_bottom_center=False):
"""Convert points and boxes in upright depth coordinate to lidar.
Args:
points (None | Tensor): points in upright depth coordinate.
bboxes (None | Tensor): bboxes in upright depth coordinate.
to_bottom_center (bool): covert bboxes to bottom center.
Returns:
tuple: points and bboxes in lidar coordinate.
"""
if points is not None:
points_lidar = points.clone()
points_lidar = points_lidar[..., [1, 0, 2]]
points_lidar[..., 1] *= -1
else:
points_lidar = None
if bboxes is not None:
bboxes_lidar = bboxes.clone()
bboxes_lidar = bboxes_lidar[..., [1, 0, 2, 4, 3, 5, 6]]
bboxes_lidar[..., 1] *= -1
if to_bottom_center:
bboxes_lidar[..., 2] -= 0.5 * bboxes_lidar[..., 5]
else:
bboxes_lidar = None
return points_lidar, bboxes_lidar
def box3d_to_corner3d_upright_depth(boxes3d):
"""Convert box3d to corner3d in upright depth coordinate
Args:
boxes3d (Tensor): boxes with shape [n,7] in upright depth coordinate
Returns:
Tensor: boxes with [n, 8, 3] in upright depth coordinate
"""
boxes_num = boxes3d.shape[0]
ry = boxes3d[:, 6:7]
l, w, h = boxes3d[:, 3:4], boxes3d[:, 4:5], boxes3d[:, 5:6]
zeros = boxes3d.new_zeros((boxes_num, 1))
ones = boxes3d.new_ones((boxes_num, 1))
# zeros = torch.cuda.FloatTensor(boxes_num, 1).fill_(0)
# ones = torch.cuda.FloatTensor(boxes_num, 1).fill_(1)
x_corners = torch.cat(
[-l / 2., l / 2., l / 2., -l / 2., -l / 2., l / 2., l / 2., -l / 2.],
dim=1) # (N, 8)
y_corners = torch.cat(
[w / 2., w / 2., -w / 2., -w / 2., w / 2., w / 2., -w / 2., -w / 2.],
dim=1) # (N, 8)
z_corners = torch.cat(
[h / 2., h / 2., h / 2., h / 2., -h / 2., -h / 2., -h / 2., -h / 2.],
dim=1) # (N, 8)
temp_corners = torch.cat(
(x_corners.unsqueeze(dim=2), y_corners.unsqueeze(dim=2),
z_corners.unsqueeze(dim=2)),
dim=2) # (N, 8, 3)
cosa, sina = torch.cos(-ry), torch.sin(-ry)
raw_1 = torch.cat([cosa, -sina, zeros], dim=1) # (N, 3)
raw_2 = torch.cat([sina, cosa, zeros], dim=1) # (N, 3)
raw_3 = torch.cat([zeros, zeros, ones], dim=1) # (N, 3)
R = torch.cat((raw_1.unsqueeze(dim=1), raw_2.unsqueeze(dim=1),
raw_3.unsqueeze(dim=1)),
dim=1) # (N, 3, 3)
rotated_corners = torch.matmul(temp_corners, R) # (N, 8, 3)
x_corners = rotated_corners[:, :, 0]
y_corners = rotated_corners[:, :, 1]
z_corners = rotated_corners[:, :, 2]
x_loc, y_loc, z_loc = boxes3d[:, 0], boxes3d[:, 1], boxes3d[:, 2]
x = x_loc.view(-1, 1) + x_corners.view(-1, 8)
y = y_loc.view(-1, 1) + y_corners.view(-1, 8)
z = z_loc.view(-1, 1) + z_corners.view(-1, 8)
corners3d = torch.cat(
(x.view(-1, 8, 1), y.view(-1, 8, 1), z.view(-1, 8, 1)), dim=2)
return corners3d
import numpy as np import numpy as np
import torch import torch
from mmcv.utils import print_log
from terminaltables import AsciiTable
from mmdet3d.core.bbox.iou_calculators.iou3d_calculator import bbox_overlaps_3d from mmdet3d.core.bbox.iou_calculators.iou3d_calculator import bbox_overlaps_3d
...@@ -263,14 +265,14 @@ def eval_map_recall(det_infos, gt_infos, ovthresh=None): ...@@ -263,14 +265,14 @@ def eval_map_recall(det_infos, gt_infos, ovthresh=None):
recall[iou_idx][label], precision[iou_idx][label], ap[iou_idx][ recall[iou_idx][label], precision[iou_idx][label], ap[iou_idx][
label] = ret_values[i][iou_idx] label] = ret_values[i][iou_idx]
else: else:
recall[iou_idx][label] = [0] recall[iou_idx][label] = np.zeros(1)
precision[iou_idx][label] = [0] precision[iou_idx][label] = np.zeros(1)
ap[iou_idx][label] = [0] ap[iou_idx][label] = np.zeros(1)
return recall, precision, ap return recall, precision, ap
def indoor_eval(gt_annos, dt_annos, metric, label2cat): def indoor_eval(gt_annos, dt_annos, metric, label2cat, logger=None):
"""Scannet Evaluation. """Scannet Evaluation.
Evaluate the result of the detection. Evaluate the result of the detection.
...@@ -280,6 +282,8 @@ def indoor_eval(gt_annos, dt_annos, metric, label2cat): ...@@ -280,6 +282,8 @@ def indoor_eval(gt_annos, dt_annos, metric, label2cat):
dt_annos (list[dict]): Detection annotations. dt_annos (list[dict]): Detection annotations.
metric (list[float]): AP IoU thresholds. metric (list[float]): AP IoU thresholds.
label2cat (dict): {label: cat}. label2cat (dict): {label: cat}.
logger (logging.Logger | str | None): The way to print the mAP
summary. See `mmdet.utils.print_log()` for details. Default: None.
Return: Return:
dict: Dict of results. dict: Dict of results.
...@@ -301,20 +305,41 @@ def indoor_eval(gt_annos, dt_annos, metric, label2cat): ...@@ -301,20 +305,41 @@ def indoor_eval(gt_annos, dt_annos, metric, label2cat):
boxes_3d=np.array([], dtype=np.float32), boxes_3d=np.array([], dtype=np.float32),
labels_3d=np.array([], dtype=np.int64))) labels_3d=np.array([], dtype=np.int64)))
result_str = str()
result_str += 'mAP'
rec, prec, ap = eval_map_recall(dt_annos, gt_infos, metric) rec, prec, ap = eval_map_recall(dt_annos, gt_infos, metric)
ret_dict = {} ret_dict = dict()
header = ['classes']
table_columns = [[label2cat[label]
for label in ap[0].keys()] + ['Overall']]
for i, iou_thresh in enumerate(metric): for i, iou_thresh in enumerate(metric):
header.append(f'AP_{iou_thresh:.2f}')
header.append(f'AR_{iou_thresh:.2f}')
rec_list = [] rec_list = []
for label in ap[i].keys(): for label in ap[i].keys():
ret_dict[f'{label2cat[label]}_AP_{iou_thresh:.2f}'] = float( ret_dict[f'{label2cat[label]}_AP_{iou_thresh:.2f}'] = float(
ap[i][label][0]) ap[i][label][0])
ret_dict[f'mAP_{iou_thresh:.2f}'] = float( ret_dict[f'mAP_{iou_thresh:.2f}'] = float(
np.mean(list(ap[i].values()))) np.mean(list(ap[i].values())))
table_columns.append(list(map(float, list(ap[i].values()))))
table_columns[-1] += [ret_dict[f'mAP_{iou_thresh:.2f}']]
table_columns[-1] = [f'{x:.4f}' for x in table_columns[-1]]
for label in rec[i].keys(): for label in rec[i].keys():
ret_dict[f'{label2cat[label]}_rec_{iou_thresh:.2f}'] = float( ret_dict[f'{label2cat[label]}_rec_{iou_thresh:.2f}'] = float(
rec[i][label][-1]) rec[i][label][-1])
rec_list.append(rec[i][label][-1]) rec_list.append(rec[i][label][-1])
ret_dict[f'mAR_{iou_thresh:.2f}'] = float(np.mean(rec_list)) ret_dict[f'mAR_{iou_thresh:.2f}'] = float(np.mean(rec_list))
table_columns.append(list(map(float, rec_list)))
table_columns[-1] += [ret_dict[f'mAR_{iou_thresh:.2f}']]
table_columns[-1] = [f'{x:.4f}' for x in table_columns[-1]]
table_data = [header]
table_rows = list(zip(*table_columns))
table_data += table_rows
table = AsciiTable(table_data)
table.inner_footing_row_border = True
print_log('\n' + table.table, logger=logger)
return ret_dict return ret_dict
from mmdet.core.post_processing import (merge_aug_bboxes, merge_aug_masks, from mmdet.core.post_processing import (merge_aug_bboxes, merge_aug_masks,
merge_aug_proposals, merge_aug_scores, merge_aug_proposals, merge_aug_scores,
multiclass_nms) multiclass_nms)
from .box3d_nms import box3d_multiclass_nms from .box3d_nms import aligned_3d_nms, box3d_multiclass_nms
__all__ = [ __all__ = [
'multiclass_nms', 'merge_aug_proposals', 'merge_aug_bboxes', 'multiclass_nms', 'merge_aug_proposals', 'merge_aug_bboxes',
'merge_aug_scores', 'merge_aug_masks', 'box3d_multiclass_nms' 'merge_aug_scores', 'merge_aug_masks', 'box3d_multiclass_nms',
'aligned_3d_nms'
] ]
...@@ -64,3 +64,52 @@ def box3d_multiclass_nms(mlvl_bboxes, ...@@ -64,3 +64,52 @@ def box3d_multiclass_nms(mlvl_bboxes,
labels = mlvl_scores.new_zeros((0, mlvl_scores.size(-1))) labels = mlvl_scores.new_zeros((0, mlvl_scores.size(-1)))
dir_scores = mlvl_scores.new_zeros((0, )) dir_scores = mlvl_scores.new_zeros((0, ))
return bboxes, scores, labels, dir_scores return bboxes, scores, labels, dir_scores
def aligned_3d_nms(boxes, scores, classes, thresh):
"""3d nms for aligned boxes.
Args:
boxes (Tensor): Aligned box with shape [n, 6].
scores (Tensor): Scores of each box.
classes (Tensor): Class of each box.
thresh (float): Iou threshold for nms.
Returns:
Tensor: Indices of selected boxes.
"""
x1 = boxes[:, 0]
y1 = boxes[:, 1]
z1 = boxes[:, 2]
x2 = boxes[:, 3]
y2 = boxes[:, 4]
z2 = boxes[:, 5]
area = (x2 - x1) * (y2 - y1) * (z2 - z1)
zero = boxes.new_zeros(1, )
score_sorted = torch.argsort(scores)
pick = []
while (score_sorted.shape[0] != 0):
last = score_sorted.shape[0]
i = score_sorted[-1]
pick.append(i)
xx1 = torch.max(x1[i], x1[score_sorted[:last - 1]])
yy1 = torch.max(y1[i], y1[score_sorted[:last - 1]])
zz1 = torch.max(z1[i], z1[score_sorted[:last - 1]])
xx2 = torch.min(x2[i], x2[score_sorted[:last - 1]])
yy2 = torch.min(y2[i], y2[score_sorted[:last - 1]])
zz2 = torch.min(z2[i], z2[score_sorted[:last - 1]])
classes1 = classes[i]
classes2 = classes[score_sorted[:last - 1]]
inter_l = torch.max(zero, xx2 - xx1)
inter_w = torch.max(zero, yy2 - yy1)
inter_h = torch.max(zero, zz2 - zz1)
inter = inter_l * inter_w * inter_h
iou = inter / (area[i] + area[score_sorted[:last - 1]] - inter)
iou = iou * (classes1 == classes2).float()
score_sorted = score_sorted[torch.nonzero(iou <= thresh).flatten()]
indices = boxes.new_tensor(pick, dtype=torch.long)
return indices
...@@ -8,8 +8,8 @@ from .loader import DistributedGroupSampler, GroupSampler, build_dataloader ...@@ -8,8 +8,8 @@ from .loader import DistributedGroupSampler, GroupSampler, build_dataloader
from .nuscenes_dataset import NuScenesDataset from .nuscenes_dataset import NuScenesDataset
from .pipelines import (GlobalRotScale, IndoorFlipData, IndoorGlobalRotScale, from .pipelines import (GlobalRotScale, IndoorFlipData, IndoorGlobalRotScale,
IndoorPointSample, IndoorPointsColorJitter, IndoorPointSample, IndoorPointsColorJitter,
IndoorPointsColorNormalize, LoadAnnotations3D, LoadAnnotations3D, LoadPointsFromFile,
LoadPointsFromFile, ObjectNoise, ObjectRangeFilter, NormalizePointsColor, ObjectNoise, ObjectRangeFilter,
ObjectSample, PointShuffle, PointsRangeFilter, ObjectSample, PointShuffle, PointsRangeFilter,
RandomFlip3D) RandomFlip3D)
from .scannet_dataset import ScanNetDataset from .scannet_dataset import ScanNetDataset
...@@ -21,7 +21,7 @@ __all__ = [ ...@@ -21,7 +21,7 @@ __all__ = [
'CocoDataset', 'Kitti2DDataset', 'NuScenesDataset', 'ObjectSample', 'CocoDataset', 'Kitti2DDataset', 'NuScenesDataset', 'ObjectSample',
'RandomFlip3D', 'ObjectNoise', 'GlobalRotScale', 'PointShuffle', 'RandomFlip3D', 'ObjectNoise', 'GlobalRotScale', 'PointShuffle',
'ObjectRangeFilter', 'PointsRangeFilter', 'Collect3D', 'ObjectRangeFilter', 'PointsRangeFilter', 'Collect3D',
'LoadPointsFromFile', 'IndoorPointsColorNormalize', 'IndoorPointSample', 'LoadPointsFromFile', 'NormalizePointsColor', 'IndoorPointSample',
'LoadAnnotations3D', 'IndoorPointsColorJitter', 'IndoorGlobalRotScale', 'LoadAnnotations3D', 'IndoorPointsColorJitter', 'IndoorGlobalRotScale',
'IndoorFlipData', 'SUNRGBDDataset', 'ScanNetDataset', 'Custom3DDataset' 'IndoorFlipData', 'SUNRGBDDataset', 'ScanNetDataset', 'Custom3DDataset'
] ]
...@@ -3,7 +3,6 @@ import tempfile ...@@ -3,7 +3,6 @@ import tempfile
import mmcv import mmcv
import numpy as np import numpy as np
from mmcv.utils import print_log
from torch.utils.data import Dataset from torch.utils.data import Dataset
from mmdet.datasets import DATASETS from mmdet.datasets import DATASETS
...@@ -19,12 +18,14 @@ class Custom3DDataset(Dataset): ...@@ -19,12 +18,14 @@ class Custom3DDataset(Dataset):
pipeline=None, pipeline=None,
classes=None, classes=None,
modality=None, modality=None,
filter_empty_gt=True,
test_mode=False): test_mode=False):
super().__init__() super().__init__()
self.data_root = data_root self.data_root = data_root
self.ann_file = ann_file self.ann_file = ann_file
self.test_mode = test_mode self.test_mode = test_mode
self.modality = modality self.modality = modality
self.filter_empty_gt = filter_empty_gt
self.CLASSES = self.get_classes(classes) self.CLASSES = self.get_classes(classes)
self.data_infos = self.load_annotations(self.ann_file) self.data_infos = self.load_annotations(self.ann_file)
...@@ -52,7 +53,7 @@ class Custom3DDataset(Dataset): ...@@ -52,7 +53,7 @@ class Custom3DDataset(Dataset):
if not self.test_mode: if not self.test_mode:
annos = self.get_ann_info(index) annos = self.get_ann_info(index)
input_dict['ann_info'] = annos input_dict['ann_info'] = annos
if len(annos['gt_bboxes_3d']) == 0: if self.filter_empty_gt and len(annos['gt_bboxes_3d']) == 0:
return None return None
return input_dict return input_dict
...@@ -67,7 +68,8 @@ class Custom3DDataset(Dataset): ...@@ -67,7 +68,8 @@ class Custom3DDataset(Dataset):
return None return None
self.pre_pipeline(input_dict) self.pre_pipeline(input_dict)
example = self.pipeline(input_dict) example = self.pipeline(input_dict)
if example is None or len(example['gt_bboxes_3d']._data) == 0: if self.filter_empty_gt and (example is None or len(
example['gt_bboxes_3d']._data) == 0):
return None return None
return example return example
...@@ -124,23 +126,20 @@ class Custom3DDataset(Dataset): ...@@ -124,23 +126,20 @@ class Custom3DDataset(Dataset):
results (list[dict]): List of results. results (list[dict]): List of results.
metric (str | list[str]): Metrics to be evaluated. metric (str | list[str]): Metrics to be evaluated.
iou_thr (list[float]): AP IoU thresholds. iou_thr (list[float]): AP IoU thresholds.
""" """
from mmdet3d.core.evaluation import indoor_eval from mmdet3d.core.evaluation import indoor_eval
assert isinstance( assert isinstance(
results, list), f'Expect results to be list, got {type(results)}.' results, list), f'Expect results to be list, got {type(results)}.'
assert len(results) > 0, f'Expect length of results > 0.'
assert len(results) == len(self.data_infos)
assert isinstance( assert isinstance(
results[0], dict results[0], dict
), f'Expect elements in results to be dict, got {type(results[0])}.' ), f'Expect elements in results to be dict, got {type(results[0])}.'
gt_annos = [info['annos'] for info in self.data_infos] gt_annos = [info['annos'] for info in self.data_infos]
label2cat = {i: cat_id for i, cat_id in enumerate(self.CLASSES)} label2cat = {i: cat_id for i, cat_id in enumerate(self.CLASSES)}
ret_dict = indoor_eval(gt_annos, results, iou_thr, label2cat) ret_dict = indoor_eval(
gt_annos, results, iou_thr, label2cat, logger=logger)
result_str = str()
for key, val in ret_dict.items():
result_str += f'{key} : {val} \n'
mAP_25, mAP_50 = ret_dict['mAP_0.25'], ret_dict['mAP_0.50']
result_str += f'mAP(0.25): {mAP_25} mAP(0.50): {mAP_50}'
print_log('\n' + result_str, logger=logger)
return ret_dict return ret_dict
......
...@@ -3,10 +3,11 @@ from .dbsampler import DataBaseSampler, MMDataBaseSampler ...@@ -3,10 +3,11 @@ from .dbsampler import DataBaseSampler, MMDataBaseSampler
from .formating import DefaultFormatBundle, DefaultFormatBundle3D from .formating import DefaultFormatBundle, DefaultFormatBundle3D
from .indoor_augment import (IndoorFlipData, IndoorGlobalRotScale, from .indoor_augment import (IndoorFlipData, IndoorGlobalRotScale,
IndoorPointsColorJitter) IndoorPointsColorJitter)
from .indoor_loading import (IndoorPointsColorNormalize, LoadAnnotations3D, from .indoor_loading import (LoadAnnotations3D, LoadPointsFromFile,
LoadPointsFromFile) NormalizePointsColor)
from .indoor_sample import IndoorPointSample from .indoor_sample import IndoorPointSample
from .loading import LoadMultiViewImageFromFiles from .loading import LoadMultiViewImageFromFiles
from .point_seg_class_mapping import PointSegClassMapping
from .train_aug import (GlobalRotScale, ObjectNoise, ObjectRangeFilter, from .train_aug import (GlobalRotScale, ObjectNoise, ObjectRangeFilter,
ObjectSample, PointShuffle, PointsRangeFilter, ObjectSample, PointShuffle, PointsRangeFilter,
RandomFlip3D) RandomFlip3D)
...@@ -17,6 +18,6 @@ __all__ = [ ...@@ -17,6 +18,6 @@ __all__ = [
'Compose', 'LoadMultiViewImageFromFiles', 'LoadPointsFromFile', 'Compose', 'LoadMultiViewImageFromFiles', 'LoadPointsFromFile',
'DefaultFormatBundle', 'DefaultFormatBundle3D', 'DataBaseSampler', 'DefaultFormatBundle', 'DefaultFormatBundle3D', 'DataBaseSampler',
'IndoorGlobalRotScale', 'IndoorPointsColorJitter', 'IndoorFlipData', 'IndoorGlobalRotScale', 'IndoorPointsColorJitter', 'IndoorFlipData',
'MMDataBaseSampler', 'IndoorPointsColorNormalize', 'LoadAnnotations3D', 'MMDataBaseSampler', 'NormalizePointsColor', 'LoadAnnotations3D',
'IndoorPointSample' 'IndoorPointSample', 'PointSegClassMapping'
] ]
...@@ -224,7 +224,7 @@ class IndoorGlobalRotScale(object): ...@@ -224,7 +224,7 @@ class IndoorGlobalRotScale(object):
results['scale_ratio'] = scale_ratio results['scale_ratio'] = scale_ratio
results['points'] = points results['points'] = points
results['gt_bboxes_3d'] = gt_bboxes_3d results['gt_bboxes_3d'] = gt_bboxes_3d.astype(np.float32)
return results return results
def __repr__(self): def __repr__(self):
......
...@@ -6,8 +6,8 @@ from mmdet.datasets.pipelines import LoadAnnotations ...@@ -6,8 +6,8 @@ from mmdet.datasets.pipelines import LoadAnnotations
@PIPELINES.register_module() @PIPELINES.register_module()
class IndoorPointsColorNormalize(object): class NormalizePointsColor(object):
"""Indoor points color normalize """Normalize color of points
Normalize color of the points. Normalize color of the points.
...@@ -45,9 +45,16 @@ class LoadPointsFromFile(object): ...@@ -45,9 +45,16 @@ class LoadPointsFromFile(object):
use_dim (list[int]): Which dimensions of the points to be used. use_dim (list[int]): Which dimensions of the points to be used.
Default: [0, 1, 2]. For KITTI dataset, set use_dim=4 Default: [0, 1, 2]. For KITTI dataset, set use_dim=4
or use_dim=[0, 1, 2, 3] to use the intensity dimension or use_dim=[0, 1, 2, 3] to use the intensity dimension
file_client_args (dict): Config dict of file clients, refer to
https://github.com/open-mmlab/mmcv/blob/master/mmcv/fileio/file_client.py
for more details.
""" """
def __init__(self, load_dim=6, use_dim=[0, 1, 2], shift_height=False): def __init__(self,
load_dim=6,
use_dim=[0, 1, 2],
shift_height=False,
file_client_args=dict(backend='disk')):
self.shift_height = shift_height self.shift_height = shift_height
if isinstance(use_dim, int): if isinstance(use_dim, int):
use_dim = list(range(use_dim)) use_dim = list(range(use_dim))
...@@ -56,8 +63,16 @@ class LoadPointsFromFile(object): ...@@ -56,8 +63,16 @@ class LoadPointsFromFile(object):
self.load_dim = load_dim self.load_dim = load_dim
self.use_dim = use_dim self.use_dim = use_dim
self.file_client_args = file_client_args.copy()
self.file_client = None
def _load_points(self, pts_filename): def _load_points(self, pts_filename):
if self.file_client is None:
self.file_client = mmcv.FileClient(**self.file_client_args)
try:
pts_bytes = self.file_client.get(pts_filename)
points = np.frombuffer(pts_bytes, dtype=np.float32)
except ConnectionError:
mmcv.check_file_exist(pts_filename) mmcv.check_file_exist(pts_filename)
if pts_filename.endswith('.npy'): if pts_filename.endswith('.npy'):
points = np.load(pts_filename) points = np.load(pts_filename)
...@@ -113,6 +128,9 @@ class LoadAnnotations3D(LoadAnnotations): ...@@ -113,6 +128,9 @@ class LoadAnnotations3D(LoadAnnotations):
Defaults to False. Defaults to False.
poly2mask (bool, optional): Whether to convert polygon annotations poly2mask (bool, optional): Whether to convert polygon annotations
to bitmasks. Defaults to True. to bitmasks. Defaults to True.
file_client_args (dict): Config dict of file clients, refer to
https://github.com/open-mmlab/mmcv/blob/master/mmcv/fileio/file_client.py
for more details.
""" """
def __init__(self, def __init__(self,
...@@ -124,8 +142,15 @@ class LoadAnnotations3D(LoadAnnotations): ...@@ -124,8 +142,15 @@ class LoadAnnotations3D(LoadAnnotations):
with_label=False, with_label=False,
with_mask=False, with_mask=False,
with_seg=False, with_seg=False,
poly2mask=True): poly2mask=True,
super().__init__(with_bbox, with_label, with_mask, with_seg, poly2mask) file_client_args=dict(backend='disk')):
super().__init__(
with_bbox,
with_label,
with_mask,
with_seg,
poly2mask,
file_client_args=file_client_args)
self.with_bbox_3d = with_bbox_3d self.with_bbox_3d = with_bbox_3d
self.with_label_3d = with_label_3d self.with_label_3d = with_label_3d
self.with_mask_3d = with_mask_3d self.with_mask_3d = with_mask_3d
...@@ -142,16 +167,35 @@ class LoadAnnotations3D(LoadAnnotations): ...@@ -142,16 +167,35 @@ class LoadAnnotations3D(LoadAnnotations):
def _load_masks_3d(self, results): def _load_masks_3d(self, results):
pts_instance_mask_path = results['ann_info']['pts_instance_mask_path'] pts_instance_mask_path = results['ann_info']['pts_instance_mask_path']
if self.file_client is None:
self.file_client = mmcv.FileClient(**self.file_client_args)
try:
mask_bytes = self.file_client.get(pts_instance_mask_path)
pts_instance_mask = np.frombuffer(mask_bytes, dtype=np.int)
except ConnectionError:
mmcv.check_file_exist(pts_instance_mask_path) mmcv.check_file_exist(pts_instance_mask_path)
pts_instance_mask = np.fromfile(pts_instance_mask_path, dtype=np.long) pts_instance_mask = np.fromfile(
pts_instance_mask_path, dtype=np.long)
results['pts_instance_mask'] = pts_instance_mask results['pts_instance_mask'] = pts_instance_mask
results['pts_mask_fields'].append(results['pts_instance_mask']) results['pts_mask_fields'].append(results['pts_instance_mask'])
return results return results
def _load_semantic_seg_3d(self, results): def _load_semantic_seg_3d(self, results):
pts_semantic_mask_path = results['ann_info']['pts_semantic_mask_path'] pts_semantic_mask_path = results['ann_info']['pts_semantic_mask_path']
if self.file_client is None:
self.file_client = mmcv.FileClient(**self.file_client_args)
try:
mask_bytes = self.file_client.get(pts_semantic_mask_path)
# add .copy() to fix read-only bug
pts_semantic_mask = np.frombuffer(mask_bytes, dtype=np.int).copy()
except ConnectionError:
mmcv.check_file_exist(pts_semantic_mask_path) mmcv.check_file_exist(pts_semantic_mask_path)
pts_semantic_mask = np.fromfile(pts_semantic_mask_path, dtype=np.long) pts_semantic_mask = np.fromfile(
pts_semantic_mask_path, dtype=np.long)
results['pts_semantic_mask'] = pts_semantic_mask results['pts_semantic_mask'] = pts_semantic_mask
results['pts_seg_fields'].append(results['pts_semantic_mask']) results['pts_seg_fields'].append(results['pts_semantic_mask'])
return results return results
......
Markdown is supported
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment