Skip to content
GitLab
Menu
Projects
Groups
Snippets
Loading...
Help
Help
Support
Community forum
Keyboard shortcuts
?
Submit feedback
Contribute to GitLab
Sign in / Register
Toggle navigation
Menu
Open sidebar
OpenDAS
mmdetection3d
Commits
19a56f6b
Commit
19a56f6b
authored
Jun 04, 2020
by
zhangwenwei
Browse files
Merge branch 'votenet' into 'master'
Votenet See merge request open-mmlab/mmdet.3d!46
parents
ac3590a1
f717eb62
Changes
34
Hide whitespace changes
Inline
Side-by-side
Showing
20 changed files
with
1376 additions
and
14 deletions
+1376
-14
configs/scannet/votenet_scannet-3d-18class.py
configs/scannet/votenet_scannet-3d-18class.py
+188
-0
configs/sunrgbd/votenet_sunrgbd-3d-10class.py
configs/sunrgbd/votenet_sunrgbd-3d-10class.py
+164
-0
mmdet3d/core/bbox/__init__.py
mmdet3d/core/bbox/__init__.py
+5
-2
mmdet3d/core/bbox/coders/__init__.py
mmdet3d/core/bbox/coders/__init__.py
+4
-1
mmdet3d/core/bbox/coders/partial_bin_based_bbox_coder.py
mmdet3d/core/bbox/coders/partial_bin_based_bbox_coder.py
+193
-0
mmdet3d/core/bbox/transforms.py
mmdet3d/core/bbox/transforms.py
+84
-0
mmdet3d/core/post_processing/__init__.py
mmdet3d/core/post_processing/__init__.py
+3
-2
mmdet3d/core/post_processing/box3d_nms.py
mmdet3d/core/post_processing/box3d_nms.py
+49
-0
mmdet3d/datasets/custom_3d.py
mmdet3d/datasets/custom_3d.py
+8
-2
mmdet3d/datasets/pipelines/__init__.py
mmdet3d/datasets/pipelines/__init__.py
+2
-1
mmdet3d/datasets/pipelines/indoor_augment.py
mmdet3d/datasets/pipelines/indoor_augment.py
+1
-1
mmdet3d/datasets/pipelines/indoor_loading.py
mmdet3d/datasets/pipelines/indoor_loading.py
+2
-1
mmdet3d/datasets/pipelines/point_seg_class_mapping.py
mmdet3d/datasets/pipelines/point_seg_class_mapping.py
+36
-0
mmdet3d/datasets/scannet_dataset.py
mmdet3d/datasets/scannet_dataset.py
+2
-1
mmdet3d/datasets/sunrgbd_dataset.py
mmdet3d/datasets/sunrgbd_dataset.py
+2
-1
mmdet3d/models/__init__.py
mmdet3d/models/__init__.py
+1
-0
mmdet3d/models/dense_heads/__init__.py
mmdet3d/models/dense_heads/__init__.py
+2
-1
mmdet3d/models/dense_heads/vote_head.py
mmdet3d/models/dense_heads/vote_head.py
+518
-0
mmdet3d/models/detectors/__init__.py
mmdet3d/models/detectors/__init__.py
+2
-1
mmdet3d/models/detectors/votenet.py
mmdet3d/models/detectors/votenet.py
+110
-0
No files found.
configs/scannet/votenet_scannet-3d-18class.py
0 → 100644
View file @
19a56f6b
# model settings
model
=
dict
(
type
=
'VoteNet'
,
backbone
=
dict
(
type
=
'PointNet2SASSG'
,
in_channels
=
4
,
num_points
=
(
2048
,
1024
,
512
,
256
),
radius
=
(
0.2
,
0.4
,
0.8
,
1.2
),
num_samples
=
(
64
,
32
,
16
,
16
),
sa_channels
=
((
64
,
64
,
128
),
(
128
,
128
,
256
),
(
128
,
128
,
256
),
(
128
,
128
,
256
)),
fp_channels
=
((
256
,
256
),
(
256
,
256
)),
norm_cfg
=
dict
(
type
=
'BN2d'
),
pool_mod
=
'max'
),
bbox_head
=
dict
(
type
=
'VoteHead'
,
num_classes
=
18
,
bbox_coder
=
dict
(
type
=
'PartialBinBasedBBoxCoder'
,
num_sizes
=
18
,
num_dir_bins
=
1
,
with_rot
=
False
,
mean_sizes
=
[[
0.76966727
,
0.8116021
,
0.92573744
],
[
1.876858
,
1.8425595
,
1.1931566
],
[
0.61328
,
0.6148609
,
0.7182701
],
[
1.3955007
,
1.5121545
,
0.83443564
],
[
0.97949594
,
1.0675149
,
0.6329687
],
[
0.531663
,
0.5955577
,
1.7500148
],
[
0.9624706
,
0.72462326
,
1.1481868
],
[
0.83221924
,
1.0490936
,
1.6875663
],
[
0.21132214
,
0.4206159
,
0.5372846
],
[
1.4440073
,
1.8970833
,
0.26985747
],
[
1.0294262
,
1.4040797
,
0.87554324
],
[
1.3766412
,
0.65521795
,
1.6813129
],
[
0.6650819
,
0.71111923
,
1.298853
],
[
0.41999173
,
0.37906948
,
1.7513971
],
[
0.59359556
,
0.5912492
,
0.73919016
],
[
0.50867593
,
0.50656086
,
0.30136237
],
[
1.1511526
,
1.0546296
,
0.49706793
],
[
0.47535285
,
0.49249494
,
0.5802117
]]),
vote_moudule_cfg
=
dict
(
in_channels
=
256
,
vote_per_seed
=
1
,
gt_per_seed
=
3
,
conv_channels
=
(
256
,
256
),
conv_cfg
=
dict
(
type
=
'Conv1d'
),
norm_cfg
=
dict
(
type
=
'BN1d'
),
norm_feats
=
True
,
vote_loss
=
dict
(
type
=
'ChamferDistance'
,
mode
=
'l1'
,
reduction
=
'none'
,
loss_dst_weight
=
10.0
)),
vote_aggregation_cfg
=
dict
(
num_point
=
256
,
radius
=
0.3
,
num_sample
=
16
,
mlp_channels
=
[
256
,
128
,
128
,
128
],
use_xyz
=
True
,
normalize_xyz
=
True
),
feat_channels
=
(
128
,
128
),
conv_cfg
=
dict
(
type
=
'Conv1d'
),
norm_cfg
=
dict
(
type
=
'BN1d'
),
objectness_loss
=
dict
(
type
=
'CrossEntropyLoss'
,
class_weight
=
[
0.2
,
0.8
],
reduction
=
'sum'
,
loss_weight
=
5.0
),
center_loss
=
dict
(
type
=
'ChamferDistance'
,
mode
=
'l2'
,
reduction
=
'sum'
,
loss_src_weight
=
10.0
,
loss_dst_weight
=
10.0
),
dir_class_loss
=
dict
(
type
=
'CrossEntropyLoss'
,
reduction
=
'sum'
,
loss_weight
=
1.0
),
dir_res_loss
=
dict
(
type
=
'SmoothL1Loss'
,
reduction
=
'sum'
,
loss_weight
=
10.0
),
size_class_loss
=
dict
(
type
=
'CrossEntropyLoss'
,
reduction
=
'sum'
,
loss_weight
=
1.0
),
size_res_loss
=
dict
(
type
=
'SmoothL1Loss'
,
reduction
=
'sum'
,
loss_weight
=
10.0
/
3.0
),
semantic_loss
=
dict
(
type
=
'CrossEntropyLoss'
,
reduction
=
'sum'
,
loss_weight
=
1.0
)))
# model training and testing settings
train_cfg
=
dict
(
pos_distance_thr
=
0.3
,
neg_distance_thr
=
0.6
,
sample_mod
=
'vote'
)
test_cfg
=
dict
(
sample_mod
=
'seed'
,
nms_thr
=
0.25
,
score_thr
=
0.05
,
per_class_proposal
=
True
)
# dataset settings
dataset_type
=
'ScanNetDataset'
data_root
=
'./data/scannet/'
class_names
=
(
'cabinet'
,
'bed'
,
'chair'
,
'sofa'
,
'table'
,
'door'
,
'window'
,
'bookshelf'
,
'picture'
,
'counter'
,
'desk'
,
'curtain'
,
'refrigerator'
,
'showercurtrain'
,
'toilet'
,
'sink'
,
'bathtub'
,
'garbagebin'
)
train_pipeline
=
[
dict
(
type
=
'LoadPointsFromFile'
,
shift_height
=
True
,
load_dim
=
6
,
use_dim
=
[
0
,
1
,
2
]),
dict
(
type
=
'LoadAnnotations3D'
,
with_bbox_3d
=
True
,
with_label_3d
=
True
,
with_mask_3d
=
True
,
with_seg_3d
=
True
),
dict
(
type
=
'PointSegClassMapping'
,
valid_cat_ids
=
(
3
,
4
,
5
,
6
,
7
,
8
,
9
,
10
,
11
,
12
,
14
,
16
,
24
,
28
,
33
,
34
,
36
,
39
)),
dict
(
type
=
'IndoorPointSample'
,
num_points
=
40000
),
dict
(
type
=
'IndoorFlipData'
,
flip_ratio_yz
=
0.5
,
flip_ratio_xz
=
0.5
),
dict
(
type
=
'IndoorGlobalRotScale'
,
shift_height
=
True
,
rot_range
=
[
-
1
/
36
,
1
/
36
],
scale_range
=
None
),
dict
(
type
=
'DefaultFormatBundle3D'
,
class_names
=
class_names
),
dict
(
type
=
'Collect3D'
,
keys
=
[
'points'
,
'gt_bboxes_3d'
,
'gt_labels_3d'
,
'pts_semantic_mask'
,
'pts_instance_mask'
])
]
test_pipeline
=
[
dict
(
type
=
'LoadPointsFromFile'
,
shift_height
=
True
,
load_dim
=
6
,
use_dim
=
[
0
,
1
,
2
]),
dict
(
type
=
'IndoorPointSample'
,
num_points
=
40000
),
dict
(
type
=
'DefaultFormatBundle3D'
,
class_names
=
class_names
),
dict
(
type
=
'Collect3D'
,
keys
=
[
'points'
])
]
data
=
dict
(
samples_per_gpu
=
8
,
workers_per_gpu
=
4
,
train
=
dict
(
type
=
'RepeatDataset'
,
times
=
5
,
dataset
=
dict
(
type
=
dataset_type
,
data_root
=
data_root
,
ann_file
=
data_root
+
'scannet_infos_train.pkl'
,
pipeline
=
train_pipeline
,
filter_empty_gt
=
False
,
classes
=
class_names
)),
val
=
dict
(
type
=
dataset_type
,
data_root
=
data_root
,
ann_file
=
data_root
+
'scannet_infos_val.pkl'
,
pipeline
=
test_pipeline
,
classes
=
class_names
,
test_mode
=
True
),
test
=
dict
(
type
=
dataset_type
,
data_root
=
data_root
,
ann_file
=
data_root
+
'scannet_infos_val.pkl'
,
pipeline
=
test_pipeline
,
classes
=
class_names
,
test_mode
=
True
))
# optimizer
lr
=
0.008
# max learning rate
optimizer
=
dict
(
type
=
'Adam'
,
lr
=
lr
)
optimizer_config
=
dict
(
grad_clip
=
dict
(
max_norm
=
10
,
norm_type
=
2
))
lr_config
=
dict
(
policy
=
'step'
,
warmup
=
None
,
step
=
[
24
,
32
])
checkpoint_config
=
dict
(
interval
=
1
)
# yapf:disable
log_config
=
dict
(
interval
=
10
,
hooks
=
[
dict
(
type
=
'TextLoggerHook'
),
dict
(
type
=
'TensorboardLoggerHook'
)
])
# yapf:enable
# runtime settings
total_epochs
=
36
dist_params
=
dict
(
backend
=
'nccl'
)
log_level
=
'INFO'
find_unused_parameters
=
True
work_dir
=
'./work_dirs/votenet_scannet'
load_from
=
None
resume_from
=
None
workflow
=
[(
'train'
,
1
)]
configs/sunrgbd/votenet_sunrgbd-3d-10class.py
0 → 100644
View file @
19a56f6b
# model settings
model
=
dict
(
type
=
'VoteNet'
,
backbone
=
dict
(
type
=
'PointNet2SASSG'
,
in_channels
=
4
,
num_points
=
(
2048
,
1024
,
512
,
256
),
radius
=
(
0.2
,
0.4
,
0.8
,
1.2
),
num_samples
=
(
64
,
32
,
16
,
16
),
sa_channels
=
((
64
,
64
,
128
),
(
128
,
128
,
256
),
(
128
,
128
,
256
),
(
128
,
128
,
256
)),
fp_channels
=
((
256
,
256
),
(
256
,
256
)),
norm_cfg
=
dict
(
type
=
'BN2d'
),
pool_mod
=
'max'
),
bbox_head
=
dict
(
type
=
'VoteHead'
,
num_classes
=
10
,
bbox_coder
=
dict
(
type
=
'PartialBinBasedBBoxCoder'
,
num_sizes
=
10
,
num_dir_bins
=
12
,
with_rot
=
True
,
mean_sizes
=
[[
2.114256
,
1.620300
,
0.927272
],
[
0.791118
,
1.279516
,
0.718182
],
[
0.923508
,
1.867419
,
0.845495
],
[
0.591958
,
0.552978
,
0.827272
],
[
0.699104
,
0.454178
,
0.75625
],
[
0.69519
,
1.346299
,
0.736364
],
[
0.528526
,
1.002642
,
1.172878
],
[
0.500618
,
0.632163
,
0.683424
],
[
0.404671
,
1.071108
,
1.688889
],
[
0.76584
,
1.398258
,
0.472728
]]),
vote_moudule_cfg
=
dict
(
in_channels
=
256
,
vote_per_seed
=
1
,
gt_per_seed
=
3
,
conv_channels
=
(
256
,
256
),
conv_cfg
=
dict
(
type
=
'Conv1d'
),
norm_cfg
=
dict
(
type
=
'BN1d'
),
norm_feats
=
True
,
vote_loss
=
dict
(
type
=
'ChamferDistance'
,
mode
=
'l1'
,
reduction
=
'none'
,
loss_dst_weight
=
10.0
)),
vote_aggregation_cfg
=
dict
(
num_point
=
256
,
radius
=
0.3
,
num_sample
=
16
,
mlp_channels
=
[
256
,
128
,
128
,
128
],
use_xyz
=
True
,
normalize_xyz
=
True
),
feat_channels
=
(
128
,
128
),
conv_cfg
=
dict
(
type
=
'Conv1d'
),
norm_cfg
=
dict
(
type
=
'BN1d'
),
objectness_loss
=
dict
(
type
=
'CrossEntropyLoss'
,
class_weight
=
[
0.2
,
0.8
],
reduction
=
'sum'
,
loss_weight
=
5.0
),
center_loss
=
dict
(
type
=
'ChamferDistance'
,
mode
=
'l2'
,
reduction
=
'sum'
,
loss_src_weight
=
10.0
,
loss_dst_weight
=
10.0
),
dir_class_loss
=
dict
(
type
=
'CrossEntropyLoss'
,
reduction
=
'sum'
,
loss_weight
=
1.0
),
dir_res_loss
=
dict
(
type
=
'SmoothL1Loss'
,
reduction
=
'sum'
,
loss_weight
=
10.0
),
size_class_loss
=
dict
(
type
=
'CrossEntropyLoss'
,
reduction
=
'sum'
,
loss_weight
=
1.0
),
size_res_loss
=
dict
(
type
=
'SmoothL1Loss'
,
reduction
=
'sum'
,
loss_weight
=
10.0
/
3.0
),
semantic_loss
=
dict
(
type
=
'CrossEntropyLoss'
,
reduction
=
'sum'
,
loss_weight
=
1.0
)))
# model training and testing settings
train_cfg
=
dict
(
pos_distance_thr
=
0.3
,
neg_distance_thr
=
0.6
,
sample_mod
=
'vote'
)
test_cfg
=
dict
(
sample_mod
=
'seed'
,
nms_thr
=
0.25
,
score_thr
=
0.05
,
per_class_proposal
=
True
)
# dataset settings
dataset_type
=
'SUNRGBDDataset'
data_root
=
'data/sunrgbd/'
class_names
=
(
'bed'
,
'table'
,
'sofa'
,
'chair'
,
'toilet'
,
'desk'
,
'dresser'
,
'night_stand'
,
'bookshelf'
,
'bathtub'
)
train_pipeline
=
[
dict
(
type
=
'LoadPointsFromFile'
,
shift_height
=
True
,
load_dim
=
6
,
use_dim
=
[
0
,
1
,
2
]),
dict
(
type
=
'LoadAnnotations3D'
),
dict
(
type
=
'IndoorFlipData'
,
flip_ratio_yz
=
0.5
),
dict
(
type
=
'IndoorGlobalRotScale'
,
shift_height
=
True
,
rot_range
=
[
-
1
/
6
,
1
/
6
],
scale_range
=
[
0.85
,
1.15
]),
dict
(
type
=
'IndoorPointSample'
,
num_points
=
20000
),
dict
(
type
=
'DefaultFormatBundle3D'
,
class_names
=
class_names
),
dict
(
type
=
'Collect3D'
,
keys
=
[
'points'
,
'gt_bboxes_3d'
,
'gt_labels_3d'
])
]
test_pipeline
=
[
dict
(
type
=
'LoadPointsFromFile'
,
shift_height
=
True
,
load_dim
=
6
,
use_dim
=
[
0
,
1
,
2
]),
dict
(
type
=
'IndoorPointSample'
,
num_points
=
20000
),
dict
(
type
=
'DefaultFormatBundle3D'
,
class_names
=
class_names
),
dict
(
type
=
'Collect3D'
,
keys
=
[
'points'
])
]
data
=
dict
(
samples_per_gpu
=
16
,
workers_per_gpu
=
4
,
train
=
dict
(
type
=
'RepeatDataset'
,
times
=
5
,
dataset
=
dict
(
type
=
dataset_type
,
data_root
=
data_root
,
ann_file
=
data_root
+
'sunrgbd_infos_train.pkl'
,
pipeline
=
train_pipeline
,
classes
=
class_names
,
filter_empty_gt
=
False
)),
val
=
dict
(
type
=
dataset_type
,
data_root
=
data_root
,
ann_file
=
data_root
+
'sunrgbd_infos_val.pkl'
,
pipeline
=
test_pipeline
,
classes
=
class_names
,
test_mode
=
True
),
test
=
dict
(
type
=
dataset_type
,
data_root
=
data_root
,
ann_file
=
data_root
+
'sunrgbd_infos_val.pkl'
,
pipeline
=
test_pipeline
,
classes
=
class_names
,
test_mode
=
True
))
# optimizer
lr
=
0.008
# max learning rate
optimizer
=
dict
(
type
=
'Adam'
,
lr
=
lr
)
optimizer_config
=
dict
(
grad_clip
=
dict
(
max_norm
=
10
,
norm_type
=
2
))
lr_config
=
dict
(
policy
=
'step'
,
warmup
=
None
,
step
=
[
24
,
32
])
checkpoint_config
=
dict
(
interval
=
1
)
# yapf:disable
log_config
=
dict
(
interval
=
30
,
hooks
=
[
dict
(
type
=
'TextLoggerHook'
),
dict
(
type
=
'TensorboardLoggerHook'
)
])
# yapf:enable
# runtime settings
total_epochs
=
36
dist_params
=
dict
(
backend
=
'nccl'
)
log_level
=
'INFO'
find_unused_parameters
=
True
work_dir
=
'./work_dirs/votenet_sunrgbd'
load_from
=
None
resume_from
=
None
workflow
=
[(
'train'
,
1
)]
mmdet3d/core/bbox/__init__.py
View file @
19a56f6b
...
@@ -8,7 +8,9 @@ from .samplers import (BaseSampler, CombinedSampler,
...
@@ -8,7 +8,9 @@ from .samplers import (BaseSampler, CombinedSampler,
InstanceBalancedPosSampler
,
IoUBalancedNegSampler
,
InstanceBalancedPosSampler
,
IoUBalancedNegSampler
,
PseudoSampler
,
RandomSampler
,
SamplingResult
)
PseudoSampler
,
RandomSampler
,
SamplingResult
)
from
.structures
import
Box3DMode
,
CameraInstance3DBoxes
,
LiDARInstance3DBoxes
from
.structures
import
Box3DMode
,
CameraInstance3DBoxes
,
LiDARInstance3DBoxes
from
.transforms
import
bbox3d2result
,
bbox3d2roi
,
boxes3d_to_bev_torch_lidar
from
.transforms
import
(
bbox3d2result
,
bbox3d2roi
,
box3d_to_corner3d_upright_depth
,
boxes3d_to_bev_torch_lidar
)
from
.assign_sampling
import
(
# isort:skip, avoid recursive imports
from
.assign_sampling
import
(
# isort:skip, avoid recursive imports
build_bbox_coder
,
# temporally settings
build_bbox_coder
,
# temporally settings
...
@@ -22,5 +24,6 @@ __all__ = [
...
@@ -22,5 +24,6 @@ __all__ = [
'build_bbox_coder'
,
'DeltaXYZWLHRBBoxCoder'
,
'boxes3d_to_bev_torch_lidar'
,
'build_bbox_coder'
,
'DeltaXYZWLHRBBoxCoder'
,
'boxes3d_to_bev_torch_lidar'
,
'BboxOverlapsNearest3D'
,
'BboxOverlaps3D'
,
'bbox_overlaps_nearest_3d'
,
'BboxOverlapsNearest3D'
,
'BboxOverlaps3D'
,
'bbox_overlaps_nearest_3d'
,
'bbox_overlaps_3d'
,
'Box3DMode'
,
'LiDARInstance3DBoxes'
,
'bbox_overlaps_3d'
,
'Box3DMode'
,
'LiDARInstance3DBoxes'
,
'CameraInstance3DBoxes'
,
'bbox3d2roi'
,
'bbox3d2result'
'CameraInstance3DBoxes'
,
'bbox3d2roi'
,
'bbox3d2result'
,
'box3d_to_corner3d_upright_depth'
]
]
mmdet3d/core/bbox/coders/__init__.py
View file @
19a56f6b
from
mmdet.core.bbox
import
build_bbox_coder
from
mmdet.core.bbox
import
build_bbox_coder
from
.delta_xyzwhlr_bbox_coder
import
DeltaXYZWLHRBBoxCoder
from
.delta_xyzwhlr_bbox_coder
import
DeltaXYZWLHRBBoxCoder
from
.partial_bin_based_bbox_coder
import
PartialBinBasedBBoxCoder
__all__
=
[
'build_bbox_coder'
,
'DeltaXYZWLHRBBoxCoder'
]
__all__
=
[
'build_bbox_coder'
,
'DeltaXYZWLHRBBoxCoder'
,
'PartialBinBasedBBoxCoder'
]
mmdet3d/core/bbox/coders/partial_bin_based_bbox_coder.py
0 → 100644
View file @
19a56f6b
import
numpy
as
np
import
torch
from
mmdet.core.bbox
import
BaseBBoxCoder
from
mmdet.core.bbox.builder
import
BBOX_CODERS
@
BBOX_CODERS
.
register_module
()
class
PartialBinBasedBBoxCoder
(
BaseBBoxCoder
):
"""Partial bin based bbox coder
Args:
num_dir_bins (int): Number of bins to encode direction angle.
num_sizes (int): Number of size clusters.
mean_sizes (list[list[int]]): Mean size of bboxes in each class.
with_rot (bool): Whether the bbox is with rotation.
"""
def
__init__
(
self
,
num_dir_bins
,
num_sizes
,
mean_sizes
,
with_rot
=
True
):
super
(
PartialBinBasedBBoxCoder
,
self
).
__init__
()
assert
len
(
mean_sizes
)
==
num_sizes
self
.
num_dir_bins
=
num_dir_bins
self
.
num_sizes
=
num_sizes
self
.
mean_sizes
=
mean_sizes
self
.
with_rot
=
with_rot
def
encode
(
self
,
gt_bboxes_3d
,
gt_labels_3d
):
"""Encode ground truth to prediction targets.
Args:
gt_bboxes_3d (Tensor): 3d gt bboxes with shape (n, 7).
gt_labels_3d (Tensor): Gt classes.
Returns:
tuple: Targets of center, size and direction.
"""
# generate center target
center_target
=
gt_bboxes_3d
[...,
0
:
3
]
# generate bbox size target
size_class_target
=
gt_labels_3d
size_res_target
=
gt_bboxes_3d
[...,
3
:
6
]
-
gt_bboxes_3d
.
new_tensor
(
self
.
mean_sizes
)[
size_class_target
]
# generate dir target
box_num
=
gt_bboxes_3d
.
shape
[
0
]
if
self
.
with_rot
:
(
dir_class_target
,
dir_res_target
)
=
self
.
angle2class
(
gt_bboxes_3d
[...,
6
])
else
:
dir_class_target
=
gt_labels_3d
.
new_zeros
(
box_num
)
dir_res_target
=
gt_bboxes_3d
.
new_zeros
(
box_num
)
return
(
center_target
,
size_class_target
,
size_res_target
,
dir_class_target
,
dir_res_target
)
def
decode
(
self
,
bbox_out
):
"""Decode predicted parts to bbox3d.
Args:
bbox_out (dict): predictions from model, should contain keys below
- center: predicted bottom center of bboxes.
- dir_class: predicted bbox direction class.
- dir_res: predicted bbox direction residual.
- size_class: predicted bbox size class.
- size_res: predicted bbox size residual.
Returns:
Tensor: decoded bbox3d with shape (batch, n, 7)
"""
center
=
bbox_out
[
'center'
]
batch_size
,
num_proposal
=
center
.
shape
[:
2
]
# decode heading angle
if
self
.
with_rot
:
dir_class
=
torch
.
argmax
(
bbox_out
[
'dir_class'
],
-
1
)
dir_res
=
torch
.
gather
(
bbox_out
[
'dir_res'
],
2
,
dir_class
.
unsqueeze
(
-
1
))
dir_res
.
squeeze_
(
2
)
dir_angle
=
self
.
class2angle
(
dir_class
,
dir_res
).
reshape
(
batch_size
,
num_proposal
,
1
)
else
:
dir_angle
=
center
.
new_zeros
(
batch_size
,
num_proposal
,
1
)
# decode bbox size
size_class
=
torch
.
argmax
(
bbox_out
[
'size_class'
],
-
1
,
keepdim
=
True
)
size_res
=
torch
.
gather
(
bbox_out
[
'size_res'
],
2
,
size_class
.
unsqueeze
(
-
1
).
repeat
(
1
,
1
,
1
,
3
))
mean_sizes
=
center
.
new_tensor
(
self
.
mean_sizes
)
size_base
=
torch
.
index_select
(
mean_sizes
,
0
,
size_class
.
reshape
(
-
1
))
bbox_size
=
size_base
.
reshape
(
batch_size
,
num_proposal
,
-
1
)
+
size_res
.
squeeze
(
2
)
bbox3d
=
torch
.
cat
([
center
,
bbox_size
,
dir_angle
],
dim
=-
1
)
return
bbox3d
def
split_pred
(
self
,
preds
,
base_xyz
):
"""Split predicted features to specific parts.
Args:
preds (Tensor): predicted features to split.
base_xyz (Tensor): coordinates of points.
Returns:
dict: split results.
"""
results
=
{}
start
,
end
=
0
,
0
preds_trans
=
preds
.
transpose
(
2
,
1
)
# decode objectness score
end
+=
2
results
[
'obj_scores'
]
=
preds_trans
[...,
start
:
end
]
start
=
end
# decode center
end
+=
3
# (batch_size, num_proposal, 3)
results
[
'center'
]
=
base_xyz
+
preds_trans
[...,
start
:
end
]
start
=
end
# decode direction
end
+=
self
.
num_dir_bins
results
[
'dir_class'
]
=
preds_trans
[...,
start
:
end
]
start
=
end
end
+=
self
.
num_dir_bins
dir_res_norm
=
preds_trans
[...,
start
:
end
]
start
=
end
results
[
'dir_res_norm'
]
=
dir_res_norm
results
[
'dir_res'
]
=
dir_res_norm
*
(
np
.
pi
/
self
.
num_dir_bins
)
# decode size
end
+=
self
.
num_sizes
results
[
'size_class'
]
=
preds_trans
[...,
start
:
end
]
start
=
end
end
+=
self
.
num_sizes
*
3
size_res_norm
=
preds_trans
[...,
start
:
end
]
batch_size
,
num_proposal
=
preds_trans
.
shape
[:
2
]
size_res_norm
=
size_res_norm
.
view
(
[
batch_size
,
num_proposal
,
self
.
num_sizes
,
3
])
start
=
end
results
[
'size_res_norm'
]
=
size_res_norm
mean_sizes
=
preds
.
new_tensor
(
self
.
mean_sizes
)
results
[
'size_res'
]
=
(
size_res_norm
*
mean_sizes
.
unsqueeze
(
0
).
unsqueeze
(
0
))
# decode semantic score
results
[
'sem_scores'
]
=
preds_trans
[...,
start
:]
return
results
def
angle2class
(
self
,
angle
):
"""Convert continuous angle to a discrete class and a residual.
Convert continuous angle to a discrete class and a small
regression number from class center angle to current angle.
Args:
angle (Tensor): Angle is from 0-2pi (or -pi~pi), class center at
0, 1*(2pi/N), 2*(2pi/N) ... (N-1)*(2pi/N)
Returns:
tuple: Encoded discrete class and residual.
"""
angle
=
angle
%
(
2
*
np
.
pi
)
angle_per_class
=
2
*
np
.
pi
/
float
(
self
.
num_dir_bins
)
shifted_angle
=
(
angle
+
angle_per_class
/
2
)
%
(
2
*
np
.
pi
)
angle_cls
=
shifted_angle
//
angle_per_class
angle_res
=
shifted_angle
-
(
angle_cls
*
angle_per_class
+
angle_per_class
/
2
)
return
angle_cls
.
long
(),
angle_res
def
class2angle
(
self
,
angle_cls
,
angle_res
,
limit_period
=
True
):
"""Inverse function to angle2class
Args:
angle_cls (Tensor): Angle class to decode.
angle_res (Tensor): Angle residual to decode.
limit_period (bool): Whether to limit angle to [-pi, pi].
Returns:
Tensor: angle decoded from angle_cls and angle_res.
"""
angle_per_class
=
2
*
np
.
pi
/
float
(
self
.
num_dir_bins
)
angle_center
=
angle_cls
.
float
()
*
angle_per_class
angle
=
angle_center
+
angle_res
if
limit_period
:
angle
[
angle
>
np
.
pi
]
-=
2
*
np
.
pi
return
angle
mmdet3d/core/bbox/transforms.py
View file @
19a56f6b
...
@@ -84,3 +84,87 @@ def bbox3d2result(bboxes, scores, labels):
...
@@ -84,3 +84,87 @@ def bbox3d2result(bboxes, scores, labels):
"""
"""
return
dict
(
return
dict
(
boxes_3d
=
bboxes
.
cpu
(),
scores_3d
=
scores
.
cpu
(),
labels_3d
=
labels
.
cpu
())
boxes_3d
=
bboxes
.
cpu
(),
scores_3d
=
scores
.
cpu
(),
labels_3d
=
labels
.
cpu
())
def
upright_depth_to_lidar_torch
(
points
=
None
,
bboxes
=
None
,
to_bottom_center
=
False
):
"""Convert points and boxes in upright depth coordinate to lidar.
Args:
points (None | Tensor): points in upright depth coordinate.
bboxes (None | Tensor): bboxes in upright depth coordinate.
to_bottom_center (bool): covert bboxes to bottom center.
Returns:
tuple: points and bboxes in lidar coordinate.
"""
if
points
is
not
None
:
points_lidar
=
points
.
clone
()
points_lidar
=
points_lidar
[...,
[
1
,
0
,
2
]]
points_lidar
[...,
1
]
*=
-
1
else
:
points_lidar
=
None
if
bboxes
is
not
None
:
bboxes_lidar
=
bboxes
.
clone
()
bboxes_lidar
=
bboxes_lidar
[...,
[
1
,
0
,
2
,
4
,
3
,
5
,
6
]]
bboxes_lidar
[...,
1
]
*=
-
1
if
to_bottom_center
:
bboxes_lidar
[...,
2
]
-=
0.5
*
bboxes_lidar
[...,
5
]
else
:
bboxes_lidar
=
None
return
points_lidar
,
bboxes_lidar
def
box3d_to_corner3d_upright_depth
(
boxes3d
):
"""Convert box3d to corner3d in upright depth coordinate
Args:
boxes3d (Tensor): boxes with shape [n,7] in upright depth coordinate
Returns:
Tensor: boxes with [n, 8, 3] in upright depth coordinate
"""
boxes_num
=
boxes3d
.
shape
[
0
]
ry
=
boxes3d
[:,
6
:
7
]
l
,
w
,
h
=
boxes3d
[:,
3
:
4
],
boxes3d
[:,
4
:
5
],
boxes3d
[:,
5
:
6
]
zeros
=
boxes3d
.
new_zeros
((
boxes_num
,
1
))
ones
=
boxes3d
.
new_ones
((
boxes_num
,
1
))
# zeros = torch.cuda.FloatTensor(boxes_num, 1).fill_(0)
# ones = torch.cuda.FloatTensor(boxes_num, 1).fill_(1)
x_corners
=
torch
.
cat
(
[
-
l
/
2.
,
l
/
2.
,
l
/
2.
,
-
l
/
2.
,
-
l
/
2.
,
l
/
2.
,
l
/
2.
,
-
l
/
2.
],
dim
=
1
)
# (N, 8)
y_corners
=
torch
.
cat
(
[
w
/
2.
,
w
/
2.
,
-
w
/
2.
,
-
w
/
2.
,
w
/
2.
,
w
/
2.
,
-
w
/
2.
,
-
w
/
2.
],
dim
=
1
)
# (N, 8)
z_corners
=
torch
.
cat
(
[
h
/
2.
,
h
/
2.
,
h
/
2.
,
h
/
2.
,
-
h
/
2.
,
-
h
/
2.
,
-
h
/
2.
,
-
h
/
2.
],
dim
=
1
)
# (N, 8)
temp_corners
=
torch
.
cat
(
(
x_corners
.
unsqueeze
(
dim
=
2
),
y_corners
.
unsqueeze
(
dim
=
2
),
z_corners
.
unsqueeze
(
dim
=
2
)),
dim
=
2
)
# (N, 8, 3)
cosa
,
sina
=
torch
.
cos
(
-
ry
),
torch
.
sin
(
-
ry
)
raw_1
=
torch
.
cat
([
cosa
,
-
sina
,
zeros
],
dim
=
1
)
# (N, 3)
raw_2
=
torch
.
cat
([
sina
,
cosa
,
zeros
],
dim
=
1
)
# (N, 3)
raw_3
=
torch
.
cat
([
zeros
,
zeros
,
ones
],
dim
=
1
)
# (N, 3)
R
=
torch
.
cat
((
raw_1
.
unsqueeze
(
dim
=
1
),
raw_2
.
unsqueeze
(
dim
=
1
),
raw_3
.
unsqueeze
(
dim
=
1
)),
dim
=
1
)
# (N, 3, 3)
rotated_corners
=
torch
.
matmul
(
temp_corners
,
R
)
# (N, 8, 3)
x_corners
=
rotated_corners
[:,
:,
0
]
y_corners
=
rotated_corners
[:,
:,
1
]
z_corners
=
rotated_corners
[:,
:,
2
]
x_loc
,
y_loc
,
z_loc
=
boxes3d
[:,
0
],
boxes3d
[:,
1
],
boxes3d
[:,
2
]
x
=
x_loc
.
view
(
-
1
,
1
)
+
x_corners
.
view
(
-
1
,
8
)
y
=
y_loc
.
view
(
-
1
,
1
)
+
y_corners
.
view
(
-
1
,
8
)
z
=
z_loc
.
view
(
-
1
,
1
)
+
z_corners
.
view
(
-
1
,
8
)
corners3d
=
torch
.
cat
(
(
x
.
view
(
-
1
,
8
,
1
),
y
.
view
(
-
1
,
8
,
1
),
z
.
view
(
-
1
,
8
,
1
)),
dim
=
2
)
return
corners3d
mmdet3d/core/post_processing/__init__.py
View file @
19a56f6b
from
mmdet.core.post_processing
import
(
merge_aug_bboxes
,
merge_aug_masks
,
from
mmdet.core.post_processing
import
(
merge_aug_bboxes
,
merge_aug_masks
,
merge_aug_proposals
,
merge_aug_scores
,
merge_aug_proposals
,
merge_aug_scores
,
multiclass_nms
)
multiclass_nms
)
from
.box3d_nms
import
box3d_multiclass_nms
from
.box3d_nms
import
aligned_3d_nms
,
box3d_multiclass_nms
__all__
=
[
__all__
=
[
'multiclass_nms'
,
'merge_aug_proposals'
,
'merge_aug_bboxes'
,
'multiclass_nms'
,
'merge_aug_proposals'
,
'merge_aug_bboxes'
,
'merge_aug_scores'
,
'merge_aug_masks'
,
'box3d_multiclass_nms'
'merge_aug_scores'
,
'merge_aug_masks'
,
'box3d_multiclass_nms'
,
'aligned_3d_nms'
]
]
mmdet3d/core/post_processing/box3d_nms.py
View file @
19a56f6b
...
@@ -64,3 +64,52 @@ def box3d_multiclass_nms(mlvl_bboxes,
...
@@ -64,3 +64,52 @@ def box3d_multiclass_nms(mlvl_bboxes,
labels
=
mlvl_scores
.
new_zeros
((
0
,
mlvl_scores
.
size
(
-
1
)))
labels
=
mlvl_scores
.
new_zeros
((
0
,
mlvl_scores
.
size
(
-
1
)))
dir_scores
=
mlvl_scores
.
new_zeros
((
0
,
))
dir_scores
=
mlvl_scores
.
new_zeros
((
0
,
))
return
bboxes
,
scores
,
labels
,
dir_scores
return
bboxes
,
scores
,
labels
,
dir_scores
def
aligned_3d_nms
(
boxes
,
scores
,
classes
,
thresh
):
"""3d nms for aligned boxes.
Args:
boxes (Tensor): Aligned box with shape [n, 6].
scores (Tensor): Scores of each box.
classes (Tensor): Class of each box.
thresh (float): Iou threshold for nms.
Returns:
Tensor: Indices of selected boxes.
"""
x1
=
boxes
[:,
0
]
y1
=
boxes
[:,
1
]
z1
=
boxes
[:,
2
]
x2
=
boxes
[:,
3
]
y2
=
boxes
[:,
4
]
z2
=
boxes
[:,
5
]
area
=
(
x2
-
x1
)
*
(
y2
-
y1
)
*
(
z2
-
z1
)
zero
=
boxes
.
new_zeros
(
1
,
)
score_sorted
=
torch
.
argsort
(
scores
)
pick
=
[]
while
(
score_sorted
.
shape
[
0
]
!=
0
):
last
=
score_sorted
.
shape
[
0
]
i
=
score_sorted
[
-
1
]
pick
.
append
(
i
)
xx1
=
torch
.
max
(
x1
[
i
],
x1
[
score_sorted
[:
last
-
1
]])
yy1
=
torch
.
max
(
y1
[
i
],
y1
[
score_sorted
[:
last
-
1
]])
zz1
=
torch
.
max
(
z1
[
i
],
z1
[
score_sorted
[:
last
-
1
]])
xx2
=
torch
.
min
(
x2
[
i
],
x2
[
score_sorted
[:
last
-
1
]])
yy2
=
torch
.
min
(
y2
[
i
],
y2
[
score_sorted
[:
last
-
1
]])
zz2
=
torch
.
min
(
z2
[
i
],
z2
[
score_sorted
[:
last
-
1
]])
classes1
=
classes
[
i
]
classes2
=
classes
[
score_sorted
[:
last
-
1
]]
inter_l
=
torch
.
max
(
zero
,
xx2
-
xx1
)
inter_w
=
torch
.
max
(
zero
,
yy2
-
yy1
)
inter_h
=
torch
.
max
(
zero
,
zz2
-
zz1
)
inter
=
inter_l
*
inter_w
*
inter_h
iou
=
inter
/
(
area
[
i
]
+
area
[
score_sorted
[:
last
-
1
]]
-
inter
)
iou
=
iou
*
(
classes1
==
classes2
).
float
()
score_sorted
=
score_sorted
[
torch
.
nonzero
(
iou
<=
thresh
).
flatten
()]
indices
=
boxes
.
new_tensor
(
pick
,
dtype
=
torch
.
long
)
return
indices
mmdet3d/datasets/custom_3d.py
View file @
19a56f6b
...
@@ -19,12 +19,14 @@ class Custom3DDataset(Dataset):
...
@@ -19,12 +19,14 @@ class Custom3DDataset(Dataset):
pipeline
=
None
,
pipeline
=
None
,
classes
=
None
,
classes
=
None
,
modality
=
None
,
modality
=
None
,
filter_empty_gt
=
True
,
test_mode
=
False
):
test_mode
=
False
):
super
().
__init__
()
super
().
__init__
()
self
.
data_root
=
data_root
self
.
data_root
=
data_root
self
.
ann_file
=
ann_file
self
.
ann_file
=
ann_file
self
.
test_mode
=
test_mode
self
.
test_mode
=
test_mode
self
.
modality
=
modality
self
.
modality
=
modality
self
.
filter_empty_gt
=
filter_empty_gt
self
.
CLASSES
=
self
.
get_classes
(
classes
)
self
.
CLASSES
=
self
.
get_classes
(
classes
)
self
.
data_infos
=
self
.
load_annotations
(
self
.
ann_file
)
self
.
data_infos
=
self
.
load_annotations
(
self
.
ann_file
)
...
@@ -52,7 +54,7 @@ class Custom3DDataset(Dataset):
...
@@ -52,7 +54,7 @@ class Custom3DDataset(Dataset):
if
not
self
.
test_mode
:
if
not
self
.
test_mode
:
annos
=
self
.
get_ann_info
(
index
)
annos
=
self
.
get_ann_info
(
index
)
input_dict
[
'ann_info'
]
=
annos
input_dict
[
'ann_info'
]
=
annos
if
len
(
annos
[
'gt_bboxes_3d'
])
==
0
:
if
self
.
filter_empty_gt
and
len
(
annos
[
'gt_bboxes_3d'
])
==
0
:
return
None
return
None
return
input_dict
return
input_dict
...
@@ -67,7 +69,8 @@ class Custom3DDataset(Dataset):
...
@@ -67,7 +69,8 @@ class Custom3DDataset(Dataset):
return
None
return
None
self
.
pre_pipeline
(
input_dict
)
self
.
pre_pipeline
(
input_dict
)
example
=
self
.
pipeline
(
input_dict
)
example
=
self
.
pipeline
(
input_dict
)
if
example
is
None
or
len
(
example
[
'gt_bboxes_3d'
].
_data
)
==
0
:
if
self
.
filter_empty_gt
and
(
example
is
None
or
len
(
example
[
'gt_bboxes_3d'
].
_data
)
==
0
):
return
None
return
None
return
example
return
example
...
@@ -124,10 +127,13 @@ class Custom3DDataset(Dataset):
...
@@ -124,10 +127,13 @@ class Custom3DDataset(Dataset):
results (list[dict]): List of results.
results (list[dict]): List of results.
metric (str | list[str]): Metrics to be evaluated.
metric (str | list[str]): Metrics to be evaluated.
iou_thr (list[float]): AP IoU thresholds.
iou_thr (list[float]): AP IoU thresholds.
"""
"""
from
mmdet3d.core.evaluation
import
indoor_eval
from
mmdet3d.core.evaluation
import
indoor_eval
assert
isinstance
(
assert
isinstance
(
results
,
list
),
f
'Expect results to be list, got
{
type
(
results
)
}
.'
results
,
list
),
f
'Expect results to be list, got
{
type
(
results
)
}
.'
assert
len
(
results
)
>
0
,
f
'Expect length of results > 0.'
assert
len
(
results
)
==
len
(
self
.
data_infos
)
assert
isinstance
(
assert
isinstance
(
results
[
0
],
dict
results
[
0
],
dict
),
f
'Expect elements in results to be dict, got
{
type
(
results
[
0
])
}
.'
),
f
'Expect elements in results to be dict, got
{
type
(
results
[
0
])
}
.'
...
...
mmdet3d/datasets/pipelines/__init__.py
View file @
19a56f6b
...
@@ -7,6 +7,7 @@ from .indoor_loading import (LoadAnnotations3D, LoadPointsFromFile,
...
@@ -7,6 +7,7 @@ from .indoor_loading import (LoadAnnotations3D, LoadPointsFromFile,
NormalizePointsColor
)
NormalizePointsColor
)
from
.indoor_sample
import
IndoorPointSample
from
.indoor_sample
import
IndoorPointSample
from
.loading
import
LoadMultiViewImageFromFiles
from
.loading
import
LoadMultiViewImageFromFiles
from
.point_seg_class_mapping
import
PointSegClassMapping
from
.train_aug
import
(
GlobalRotScale
,
ObjectNoise
,
ObjectRangeFilter
,
from
.train_aug
import
(
GlobalRotScale
,
ObjectNoise
,
ObjectRangeFilter
,
ObjectSample
,
PointShuffle
,
PointsRangeFilter
,
ObjectSample
,
PointShuffle
,
PointsRangeFilter
,
RandomFlip3D
)
RandomFlip3D
)
...
@@ -18,5 +19,5 @@ __all__ = [
...
@@ -18,5 +19,5 @@ __all__ = [
'DefaultFormatBundle'
,
'DefaultFormatBundle3D'
,
'DataBaseSampler'
,
'DefaultFormatBundle'
,
'DefaultFormatBundle3D'
,
'DataBaseSampler'
,
'IndoorGlobalRotScale'
,
'IndoorPointsColorJitter'
,
'IndoorFlipData'
,
'IndoorGlobalRotScale'
,
'IndoorPointsColorJitter'
,
'IndoorFlipData'
,
'MMDataBaseSampler'
,
'NormalizePointsColor'
,
'LoadAnnotations3D'
,
'MMDataBaseSampler'
,
'NormalizePointsColor'
,
'LoadAnnotations3D'
,
'IndoorPointSample'
'IndoorPointSample'
,
'PointSegClassMapping'
]
]
mmdet3d/datasets/pipelines/indoor_augment.py
View file @
19a56f6b
...
@@ -224,7 +224,7 @@ class IndoorGlobalRotScale(object):
...
@@ -224,7 +224,7 @@ class IndoorGlobalRotScale(object):
results
[
'scale_ratio'
]
=
scale_ratio
results
[
'scale_ratio'
]
=
scale_ratio
results
[
'points'
]
=
points
results
[
'points'
]
=
points
results
[
'gt_bboxes_3d'
]
=
gt_bboxes_3d
results
[
'gt_bboxes_3d'
]
=
gt_bboxes_3d
.
astype
(
np
.
float32
)
return
results
return
results
def
__repr__
(
self
):
def
__repr__
(
self
):
...
...
mmdet3d/datasets/pipelines/indoor_loading.py
View file @
19a56f6b
...
@@ -189,7 +189,8 @@ class LoadAnnotations3D(LoadAnnotations):
...
@@ -189,7 +189,8 @@ class LoadAnnotations3D(LoadAnnotations):
self
.
file_client
=
mmcv
.
FileClient
(
**
self
.
file_client_args
)
self
.
file_client
=
mmcv
.
FileClient
(
**
self
.
file_client_args
)
try
:
try
:
mask_bytes
=
self
.
file_client
.
get
(
pts_semantic_mask_path
)
mask_bytes
=
self
.
file_client
.
get
(
pts_semantic_mask_path
)
pts_semantic_mask
=
np
.
frombuffer
(
mask_bytes
,
dtype
=
np
.
int
)
# add .copy() to fix read-only bug
pts_semantic_mask
=
np
.
frombuffer
(
mask_bytes
,
dtype
=
np
.
int
).
copy
()
except
ConnectionError
:
except
ConnectionError
:
mmcv
.
check_file_exist
(
pts_semantic_mask_path
)
mmcv
.
check_file_exist
(
pts_semantic_mask_path
)
pts_semantic_mask
=
np
.
fromfile
(
pts_semantic_mask
=
np
.
fromfile
(
...
...
mmdet3d/datasets/pipelines/point_seg_class_mapping.py
0 → 100644
View file @
19a56f6b
from
mmdet.datasets.builder
import
PIPELINES
@
PIPELINES
.
register_module
()
class
PointSegClassMapping
(
object
):
"""Map original semantic class to valid category ids.
Map valid classes as 0~len(valid_cat_ids)-1 and
others as len(valid_cat_ids).
Args:
valid_cat_ids (tuple[int): A tuple of valid category.
"""
def
__init__
(
self
,
valid_cat_ids
):
self
.
valid_cat_ids
=
valid_cat_ids
def
__call__
(
self
,
results
):
assert
'pts_semantic_mask'
in
results
pts_semantic_mask
=
results
[
'pts_semantic_mask'
]
neg_cls
=
len
(
self
.
valid_cat_ids
)
for
i
in
range
(
pts_semantic_mask
.
shape
[
0
]):
if
pts_semantic_mask
[
i
]
in
self
.
valid_cat_ids
:
converted_id
=
self
.
valid_cat_ids
.
index
(
pts_semantic_mask
[
i
])
pts_semantic_mask
[
i
]
=
converted_id
else
:
pts_semantic_mask
[
i
]
=
neg_cls
results
[
'pts_semantic_mask'
]
=
pts_semantic_mask
return
results
def
__repr__
(
self
):
repr_str
=
self
.
__class__
.
__name__
repr_str
+=
'(valid_cat_ids={})'
.
format
(
self
.
valid_cat_ids
)
return
repr_str
mmdet3d/datasets/scannet_dataset.py
View file @
19a56f6b
...
@@ -20,9 +20,10 @@ class ScanNetDataset(Custom3DDataset):
...
@@ -20,9 +20,10 @@ class ScanNetDataset(Custom3DDataset):
pipeline
=
None
,
pipeline
=
None
,
classes
=
None
,
classes
=
None
,
modality
=
None
,
modality
=
None
,
filter_empty_gt
=
True
,
test_mode
=
False
):
test_mode
=
False
):
super
().
__init__
(
data_root
,
ann_file
,
pipeline
,
classes
,
modality
,
super
().
__init__
(
data_root
,
ann_file
,
pipeline
,
classes
,
modality
,
test_mode
)
filter_empty_gt
,
test_mode
)
def
get_ann_info
(
self
,
index
):
def
get_ann_info
(
self
,
index
):
# Use index to get the annos, thus the evalhook could also use this api
# Use index to get the annos, thus the evalhook could also use this api
...
...
mmdet3d/datasets/sunrgbd_dataset.py
View file @
19a56f6b
...
@@ -16,9 +16,10 @@ class SUNRGBDDataset(Custom3DDataset):
...
@@ -16,9 +16,10 @@ class SUNRGBDDataset(Custom3DDataset):
pipeline
=
None
,
pipeline
=
None
,
classes
=
None
,
classes
=
None
,
modality
=
None
,
modality
=
None
,
filter_empty_gt
=
True
,
test_mode
=
False
):
test_mode
=
False
):
super
().
__init__
(
data_root
,
ann_file
,
pipeline
,
classes
,
modality
,
super
().
__init__
(
data_root
,
ann_file
,
pipeline
,
classes
,
modality
,
test_mode
)
filter_empty_gt
,
test_mode
)
def
get_ann_info
(
self
,
index
):
def
get_ann_info
(
self
,
index
):
# Use index to get the annos, thus the evalhook could also use this api
# Use index to get the annos, thus the evalhook could also use this api
...
...
mmdet3d/models/__init__.py
View file @
19a56f6b
...
@@ -8,6 +8,7 @@ from .detectors import * # noqa: F401,F403
...
@@ -8,6 +8,7 @@ from .detectors import * # noqa: F401,F403
from
.fusion_layers
import
*
# noqa: F401,F403
from
.fusion_layers
import
*
# noqa: F401,F403
from
.losses
import
*
# noqa: F401,F403
from
.losses
import
*
# noqa: F401,F403
from
.middle_encoders
import
*
# noqa: F401,F403
from
.middle_encoders
import
*
# noqa: F401,F403
from
.model_utils
import
*
# noqa: F401,F403
from
.necks
import
*
# noqa: F401,F403
from
.necks
import
*
# noqa: F401,F403
from
.registry
import
FUSION_LAYERS
,
MIDDLE_ENCODERS
,
VOXEL_ENCODERS
from
.registry
import
FUSION_LAYERS
,
MIDDLE_ENCODERS
,
VOXEL_ENCODERS
from
.roi_heads
import
*
# noqa: F401,F403
from
.roi_heads
import
*
# noqa: F401,F403
...
...
mmdet3d/models/dense_heads/__init__.py
View file @
19a56f6b
from
.anchor3d_head
import
Anchor3DHead
from
.anchor3d_head
import
Anchor3DHead
from
.parta2_rpn_head
import
PartA2RPNHead
from
.parta2_rpn_head
import
PartA2RPNHead
from
.vote_head
import
VoteHead
__all__
=
[
'Anchor3DHead'
,
'PartA2RPNHead'
]
__all__
=
[
'Anchor3DHead'
,
'PartA2RPNHead'
,
'VoteHead'
]
mmdet3d/models/dense_heads/vote_head.py
0 → 100644
View file @
19a56f6b
import
numpy
as
np
import
torch
import
torch.nn
as
nn
import
torch.nn.functional
as
F
from
mmcv.cnn
import
ConvModule
from
mmdet3d.core
import
build_bbox_coder
,
multi_apply
from
mmdet3d.core.bbox.box_torch_ops
import
boxes3d_to_corners3d_lidar_torch
from
mmdet3d.core.bbox.transforms
import
upright_depth_to_lidar_torch
from
mmdet3d.core.post_processing
import
aligned_3d_nms
from
mmdet3d.models.builder
import
build_loss
from
mmdet3d.models.losses
import
chamfer_distance
from
mmdet3d.models.model_utils
import
VoteModule
from
mmdet3d.ops
import
(
PointSAModule
,
furthest_point_sample
,
points_in_boxes_batch
)
from
mmdet.models
import
HEADS
@
HEADS
.
register_module
()
class
VoteHead
(
nn
.
Module
):
"""Bbox head of Votenet.
https://arxiv.org/pdf/1904.09664.pdf
Args:
num_classes (int): The number of class.
bbox_coder (BaseBBoxCoder): Bbox coder for encoding and
decoding boxes.
train_cfg (dict): Config for training.
test_cfg (dict): Config for testing.
vote_moudule_cfg (dict): Config of VoteModule for point-wise votes.
vote_aggregation_cfg (dict): Config of vote aggregation layer.
feat_channels (tuple[int]): Convolution channels of
prediction layer.
conv_cfg (dict): Config of convolution in prediction layer.
norm_cfg (dict): Config of BN in prediction layer.
objectness_loss (dict): Config of objectness loss.
center_loss (dict): Config of center loss.
dir_class_loss (dict): Config of direction classification loss.
dir_res_loss (dict): Config of direction residual regression loss.
size_class_loss (dict): Config of size classification loss.
size_res_loss (dict): Config of size residual regression loss.
semantic_loss (dict): Config of point-wise semantic segmentation loss.
"""
def
__init__
(
self
,
num_classes
,
bbox_coder
,
train_cfg
=
None
,
test_cfg
=
None
,
vote_moudule_cfg
=
None
,
vote_aggregation_cfg
=
None
,
feat_channels
=
(
128
,
128
),
conv_cfg
=
dict
(
type
=
'Conv1d'
),
norm_cfg
=
dict
(
type
=
'BN1d'
),
objectness_loss
=
None
,
center_loss
=
None
,
dir_class_loss
=
None
,
dir_res_loss
=
None
,
size_class_loss
=
None
,
size_res_loss
=
None
,
semantic_loss
=
None
):
super
(
VoteHead
,
self
).
__init__
()
self
.
num_classes
=
num_classes
self
.
train_cfg
=
train_cfg
self
.
test_cfg
=
test_cfg
self
.
gt_per_seed
=
vote_moudule_cfg
[
'gt_per_seed'
]
self
.
num_proposal
=
vote_aggregation_cfg
[
'num_point'
]
self
.
objectness_loss
=
build_loss
(
objectness_loss
)
self
.
center_loss
=
build_loss
(
center_loss
)
self
.
dir_class_loss
=
build_loss
(
dir_class_loss
)
self
.
dir_res_loss
=
build_loss
(
dir_res_loss
)
self
.
size_class_loss
=
build_loss
(
size_class_loss
)
self
.
size_res_loss
=
build_loss
(
size_res_loss
)
self
.
semantic_loss
=
build_loss
(
semantic_loss
)
assert
vote_aggregation_cfg
[
'mlp_channels'
][
0
]
==
vote_moudule_cfg
[
'in_channels'
]
self
.
bbox_coder
=
build_bbox_coder
(
bbox_coder
)
self
.
num_sizes
=
self
.
bbox_coder
.
num_sizes
self
.
num_dir_bins
=
self
.
bbox_coder
.
num_dir_bins
self
.
vote_module
=
VoteModule
(
**
vote_moudule_cfg
)
self
.
vote_aggregation
=
PointSAModule
(
**
vote_aggregation_cfg
)
prev_channel
=
vote_aggregation_cfg
[
'mlp_channels'
][
-
1
]
conv_pred_list
=
list
()
for
k
in
range
(
len
(
feat_channels
)):
conv_pred_list
.
append
(
ConvModule
(
prev_channel
,
feat_channels
[
k
],
1
,
padding
=
0
,
conv_cfg
=
conv_cfg
,
norm_cfg
=
norm_cfg
,
bias
=
True
,
inplace
=
True
))
prev_channel
=
feat_channels
[
k
]
self
.
conv_pred
=
nn
.
Sequential
(
*
conv_pred_list
)
# Objectness scores (2), center residual (3),
# heading class+residual (num_dir_bins*2),
# size class+residual(num_sizes*4)
conv_out_channel
=
(
2
+
3
+
self
.
num_dir_bins
*
2
+
self
.
num_sizes
*
4
+
num_classes
)
self
.
conv_pred
.
add_module
(
'conv_out'
,
nn
.
Conv1d
(
prev_channel
,
conv_out_channel
,
1
))
def
init_weights
(
self
):
pass
def
forward
(
self
,
feat_dict
,
sample_mod
):
"""Forward pass.
The forward of VoteHead is devided into 4 steps:
1. Generate vote_points from seed_points.
2. Aggregate vote_points.
3. Predict bbox and score.
4. Decode predictions.
Args:
feat_dict (dict): feature dict from backbone.
sample_mod (str): sample mode for vote aggregation layer.
valid modes are "vote", "seed" and "random".
"""
assert
sample_mod
in
[
'vote'
,
'seed'
,
'random'
]
seed_points
=
feat_dict
[
'fp_xyz'
][
-
1
]
seed_features
=
feat_dict
[
'fp_features'
][
-
1
]
seed_indices
=
feat_dict
[
'fp_indices'
][
-
1
]
# 1. generate vote_points from seed_points
vote_points
,
vote_features
=
self
.
vote_module
(
seed_points
,
seed_features
)
results
=
dict
(
seed_points
=
seed_points
,
seed_indices
=
seed_indices
,
vote_points
=
vote_points
,
vote_features
=
vote_features
)
# 2. aggregate vote_points
if
sample_mod
==
'vote'
:
# use fps in vote_aggregation
sample_indices
=
None
elif
sample_mod
==
'seed'
:
# FPS on seed and choose the votes corresponding to the seeds
sample_indices
=
furthest_point_sample
(
seed_points
,
self
.
num_proposal
)
elif
sample_mod
==
'random'
:
# Random sampling from the votes
batch_size
,
num_seed
=
seed_points
.
shape
[:
2
]
sample_indices
=
seed_points
.
new_tensor
(
torch
.
randint
(
0
,
num_seed
,
(
batch_size
,
self
.
num_proposal
)),
dtype
=
torch
.
int32
)
else
:
raise
NotImplementedError
vote_aggregation_ret
=
self
.
vote_aggregation
(
vote_points
,
vote_features
,
sample_indices
)
aggregated_points
,
features
,
aggregated_indices
=
vote_aggregation_ret
results
[
'aggregated_points'
]
=
aggregated_points
results
[
'aggregated_indices'
]
=
aggregated_indices
# 3. predict bbox and score
predictions
=
self
.
conv_pred
(
features
)
# 4. decode predictions
decode_res
=
self
.
bbox_coder
.
split_pred
(
predictions
,
aggregated_points
)
results
.
update
(
decode_res
)
return
results
def
loss
(
self
,
bbox_preds
,
points
,
gt_bboxes_3d
,
gt_labels_3d
,
pts_semantic_mask
=
None
,
pts_instance_mask
=
None
,
img_meta
=
None
,
gt_bboxes_ignore
=
None
):
targets
=
self
.
get_targets
(
points
,
gt_bboxes_3d
,
gt_labels_3d
,
pts_semantic_mask
,
pts_instance_mask
,
bbox_preds
)
(
vote_targets
,
vote_target_masks
,
size_class_targets
,
size_res_targets
,
dir_class_targets
,
dir_res_targets
,
center_targets
,
mask_targets
,
valid_gt_masks
,
objectness_targets
,
objectness_weights
,
box_loss_weights
,
valid_gt_weights
)
=
targets
# calculate vote loss
vote_loss
=
self
.
vote_module
.
get_loss
(
bbox_preds
[
'seed_points'
],
bbox_preds
[
'vote_points'
],
bbox_preds
[
'seed_indices'
],
vote_target_masks
,
vote_targets
)
# calculate objectness loss
objectness_loss
=
self
.
objectness_loss
(
bbox_preds
[
'obj_scores'
].
transpose
(
2
,
1
),
objectness_targets
,
weight
=
objectness_weights
)
# calculate center loss
source2target_loss
,
target2source_loss
=
self
.
center_loss
(
bbox_preds
[
'center'
],
center_targets
,
src_weight
=
box_loss_weights
,
dst_weight
=
valid_gt_weights
)
center_loss
=
source2target_loss
+
target2source_loss
# calculate direction class loss
dir_class_loss
=
self
.
dir_class_loss
(
bbox_preds
[
'dir_class'
].
transpose
(
2
,
1
),
dir_class_targets
,
weight
=
box_loss_weights
)
# calculate direction residual loss
batch_size
,
proposal_num
=
size_class_targets
.
shape
[:
2
]
heading_label_one_hot
=
vote_targets
.
new_zeros
(
(
batch_size
,
proposal_num
,
self
.
num_dir_bins
))
heading_label_one_hot
.
scatter_
(
2
,
dir_class_targets
.
unsqueeze
(
-
1
),
1
)
dir_res_norm
=
torch
.
sum
(
bbox_preds
[
'dir_res_norm'
]
*
heading_label_one_hot
,
-
1
)
dir_res_loss
=
self
.
dir_res_loss
(
dir_res_norm
,
dir_res_targets
,
weight
=
box_loss_weights
)
# calculate size class loss
size_class_loss
=
self
.
size_class_loss
(
bbox_preds
[
'size_class'
].
transpose
(
2
,
1
),
size_class_targets
,
weight
=
box_loss_weights
)
# calculate size residual loss
one_hot_size_targets
=
vote_targets
.
new_zeros
(
(
batch_size
,
proposal_num
,
self
.
num_sizes
))
one_hot_size_targets
.
scatter_
(
2
,
size_class_targets
.
unsqueeze
(
-
1
),
1
)
one_hot_size_targets_expand
=
one_hot_size_targets
.
unsqueeze
(
-
1
).
repeat
(
1
,
1
,
1
,
3
)
size_residual_norm
=
torch
.
sum
(
bbox_preds
[
'size_res_norm'
]
*
one_hot_size_targets_expand
,
2
)
box_loss_weights_expand
=
box_loss_weights
.
unsqueeze
(
-
1
).
repeat
(
1
,
1
,
3
)
size_res_loss
=
self
.
size_res_loss
(
size_residual_norm
,
size_res_targets
,
weight
=
box_loss_weights_expand
)
# calculate semantic loss
semantic_loss
=
self
.
semantic_loss
(
bbox_preds
[
'sem_scores'
].
transpose
(
2
,
1
),
mask_targets
,
weight
=
box_loss_weights
)
losses
=
dict
(
vote_loss
=
vote_loss
,
objectness_loss
=
objectness_loss
,
semantic_loss
=
semantic_loss
,
center_loss
=
center_loss
,
dir_class_loss
=
dir_class_loss
,
dir_res_loss
=
dir_res_loss
,
size_class_loss
=
size_class_loss
,
size_res_loss
=
size_res_loss
)
return
losses
def
get_targets
(
self
,
points
,
gt_bboxes_3d
,
gt_labels_3d
,
pts_semantic_mask
=
None
,
pts_instance_mask
=
None
,
bbox_preds
=
None
):
"""Get targets of vote head.
Args:
points (list[Tensor]): Points of each batch.
gt_bboxes_3d (list[Tensor]): gt bboxes of each batch.
gt_labels_3d (list[Tensor]): gt class labels of each batch.
pts_semantic_mask (None | list[Tensor]): point-wise semantic
label of each batch.
pts_instance_mask (None | list[Tensor]): point-wise instance
label of each batch.
bbox_preds (Tensor): Bbox predictions of vote head.
Returns:
tuple: Targets of vote head.
"""
# find empty example
valid_gt_masks
=
list
()
gt_num
=
list
()
for
index
in
range
(
len
(
gt_labels_3d
)):
if
len
(
gt_labels_3d
[
index
])
==
0
:
gt_bboxes_3d
[
index
]
=
gt_bboxes_3d
[
index
].
new_zeros
(
1
,
gt_bboxes_3d
[
index
].
shape
[
-
1
])
gt_labels_3d
[
index
]
=
gt_labels_3d
[
index
].
new_zeros
(
1
)
valid_gt_masks
.
append
(
gt_labels_3d
[
index
].
new_zeros
(
1
))
gt_num
.
append
(
1
)
else
:
valid_gt_masks
.
append
(
gt_labels_3d
[
index
].
new_ones
(
gt_labels_3d
[
index
].
shape
))
gt_num
.
append
(
gt_labels_3d
[
index
].
shape
[
0
])
max_gt_num
=
max
(
gt_num
)
if
pts_semantic_mask
is
None
:
pts_semantic_mask
=
[
None
for
i
in
range
(
len
(
gt_labels_3d
))]
pts_instance_mask
=
[
None
for
i
in
range
(
len
(
gt_labels_3d
))]
aggregated_points
=
[
bbox_preds
[
'aggregated_points'
][
i
]
for
i
in
range
(
len
(
gt_labels_3d
))
]
(
vote_targets
,
vote_target_masks
,
size_class_targets
,
size_res_targets
,
dir_class_targets
,
dir_res_targets
,
center_targets
,
mask_targets
,
objectness_targets
,
objectness_masks
)
=
multi_apply
(
self
.
get_targets_single
,
points
,
gt_bboxes_3d
,
gt_labels_3d
,
pts_semantic_mask
,
pts_instance_mask
,
aggregated_points
)
# pad targets as original code of votenet.
for
index
in
range
(
len
(
gt_labels_3d
)):
pad_num
=
max_gt_num
-
gt_labels_3d
[
index
].
shape
[
0
]
center_targets
[
index
]
=
F
.
pad
(
center_targets
[
index
],
(
0
,
0
,
0
,
pad_num
))
valid_gt_masks
[
index
]
=
F
.
pad
(
valid_gt_masks
[
index
],
(
0
,
pad_num
))
vote_targets
=
torch
.
stack
(
vote_targets
)
vote_target_masks
=
torch
.
stack
(
vote_target_masks
)
center_targets
=
torch
.
stack
(
center_targets
)
valid_gt_masks
=
torch
.
stack
(
valid_gt_masks
)
objectness_targets
=
torch
.
stack
(
objectness_targets
)
objectness_weights
=
torch
.
stack
(
objectness_masks
)
objectness_weights
/=
(
torch
.
sum
(
objectness_weights
)
+
1e-6
)
box_loss_weights
=
objectness_targets
.
float
()
/
(
torch
.
sum
(
objectness_targets
).
float
()
+
1e-6
)
valid_gt_weights
=
valid_gt_masks
.
float
()
/
(
torch
.
sum
(
valid_gt_masks
.
float
())
+
1e-6
)
dir_class_targets
=
torch
.
stack
(
dir_class_targets
)
dir_res_targets
=
torch
.
stack
(
dir_res_targets
)
size_class_targets
=
torch
.
stack
(
size_class_targets
)
size_res_targets
=
torch
.
stack
(
size_res_targets
)
mask_targets
=
torch
.
stack
(
mask_targets
)
return
(
vote_targets
,
vote_target_masks
,
size_class_targets
,
size_res_targets
,
dir_class_targets
,
dir_res_targets
,
center_targets
,
mask_targets
,
valid_gt_masks
,
objectness_targets
,
objectness_weights
,
box_loss_weights
,
valid_gt_weights
)
def
get_targets_single
(
self
,
points
,
gt_bboxes_3d
,
gt_labels_3d
,
pts_semantic_mask
=
None
,
pts_instance_mask
=
None
,
aggregated_points
=
None
):
assert
self
.
bbox_coder
.
with_rot
or
pts_semantic_mask
is
not
None
# generate votes target
num_points
=
points
.
shape
[
0
]
if
self
.
bbox_coder
.
with_rot
:
points_lidar
,
gt_bboxes_3d_lidar
=
upright_depth_to_lidar_torch
(
points
,
gt_bboxes_3d
,
to_bottom_center
=
True
)
vote_targets
=
points
.
new_zeros
([
num_points
,
3
*
self
.
gt_per_seed
])
vote_target_masks
=
points
.
new_zeros
([
num_points
],
dtype
=
torch
.
long
)
vote_target_idx
=
points
.
new_zeros
([
num_points
],
dtype
=
torch
.
long
)
box_indices_all
=
points_in_boxes_batch
(
points_lidar
.
unsqueeze
(
0
),
gt_bboxes_3d_lidar
.
unsqueeze
(
0
))[
0
]
for
i
in
range
(
gt_bboxes_3d
.
shape
[
0
]):
box_indices
=
box_indices_all
[:,
i
]
indices
=
torch
.
nonzero
(
box_indices
).
squeeze
(
-
1
)
selected_points
=
points
[
indices
]
vote_target_masks
[
indices
]
=
1
vote_targets_tmp
=
vote_targets
[
indices
]
votes
=
gt_bboxes_3d
[
i
][:
3
].
unsqueeze
(
0
)
-
selected_points
[:,
:
3
]
for
j
in
range
(
self
.
gt_per_seed
):
column_indices
=
torch
.
nonzero
(
vote_target_idx
[
indices
]
==
j
).
squeeze
(
-
1
)
vote_targets_tmp
[
column_indices
,
int
(
j
*
3
):
int
(
j
*
3
+
3
)]
=
votes
[
column_indices
]
if
j
==
0
:
vote_targets_tmp
[
column_indices
]
=
votes
[
column_indices
].
repeat
(
1
,
self
.
gt_per_seed
)
vote_targets
[
indices
]
=
vote_targets_tmp
vote_target_idx
[
indices
]
=
torch
.
clamp
(
vote_target_idx
[
indices
]
+
1
,
max
=
2
)
elif
pts_semantic_mask
is
not
None
:
vote_targets
=
points
.
new_zeros
([
num_points
,
3
])
vote_target_masks
=
points
.
new_zeros
([
num_points
],
dtype
=
torch
.
long
)
for
i
in
torch
.
unique
(
pts_instance_mask
):
indices
=
torch
.
nonzero
(
pts_instance_mask
==
i
).
squeeze
(
-
1
)
if
pts_semantic_mask
[
indices
[
0
]]
<
self
.
num_classes
:
selected_points
=
points
[
indices
,
:
3
]
center
=
0.5
*
(
selected_points
.
min
(
0
)[
0
]
+
selected_points
.
max
(
0
)[
0
])
vote_targets
[
indices
,
:]
=
center
-
selected_points
vote_target_masks
[
indices
]
=
1
vote_targets
=
vote_targets
.
repeat
((
1
,
self
.
gt_per_seed
))
else
:
raise
NotImplementedError
(
center_targets
,
size_class_targets
,
size_res_targets
,
dir_class_targets
,
dir_res_targets
)
=
self
.
bbox_coder
.
encode
(
gt_bboxes_3d
,
gt_labels_3d
)
proposal_num
=
aggregated_points
.
shape
[
0
]
distance1
,
_
,
assignment
,
_
=
chamfer_distance
(
aggregated_points
.
unsqueeze
(
0
),
center_targets
.
unsqueeze
(
0
),
reduction
=
'none'
)
assignment
=
assignment
.
squeeze
(
0
)
euclidean_distance1
=
torch
.
sqrt
(
distance1
.
squeeze
(
0
)
+
1e-6
)
objectness_targets
=
points
.
new_zeros
((
proposal_num
),
dtype
=
torch
.
long
)
objectness_targets
[
euclidean_distance1
<
self
.
train_cfg
[
'pos_distance_thr'
]]
=
1
objectness_masks
=
points
.
new_zeros
((
proposal_num
))
objectness_masks
[
euclidean_distance1
<
self
.
train_cfg
[
'pos_distance_thr'
]]
=
1.0
objectness_masks
[
euclidean_distance1
>
self
.
train_cfg
[
'neg_distance_thr'
]]
=
1.0
dir_class_targets
=
dir_class_targets
[
assignment
]
dir_res_targets
=
dir_res_targets
[
assignment
]
dir_res_targets
/=
(
np
.
pi
/
self
.
num_dir_bins
)
size_class_targets
=
size_class_targets
[
assignment
]
size_res_targets
=
size_res_targets
[
assignment
]
one_hot_size_targets
=
gt_bboxes_3d
.
new_zeros
(
(
proposal_num
,
self
.
num_sizes
))
one_hot_size_targets
.
scatter_
(
1
,
size_class_targets
.
unsqueeze
(
-
1
),
1
)
one_hot_size_targets
=
one_hot_size_targets
.
unsqueeze
(
-
1
).
repeat
(
1
,
1
,
3
)
mean_sizes
=
size_res_targets
.
new_tensor
(
self
.
bbox_coder
.
mean_sizes
).
unsqueeze
(
0
)
pos_mean_sizes
=
torch
.
sum
(
one_hot_size_targets
*
mean_sizes
,
1
)
size_res_targets
/=
pos_mean_sizes
mask_targets
=
gt_labels_3d
[
assignment
]
return
(
vote_targets
,
vote_target_masks
,
size_class_targets
,
size_res_targets
,
dir_class_targets
,
dir_res_targets
,
center_targets
,
mask_targets
.
long
(),
objectness_targets
,
objectness_masks
)
def
get_bboxes
(
self
,
points
,
bbox_preds
,
img_meta
,
rescale
=
False
):
# decode boxes
obj_scores
=
F
.
softmax
(
bbox_preds
[
'obj_scores'
],
dim
=-
1
)[...,
-
1
]
sem_scores
=
F
.
softmax
(
bbox_preds
[
'sem_scores'
],
dim
=-
1
)
bbox_depth
=
self
.
bbox_coder
.
decode
(
bbox_preds
)
points_lidar
,
bbox_lidar
=
upright_depth_to_lidar_torch
(
points
[...,
:
3
],
bbox_depth
,
to_bottom_center
=
True
)
batch_size
=
bbox_depth
.
shape
[
0
]
results
=
list
()
for
b
in
range
(
batch_size
):
bbox_selected
,
score_selected
,
labels
=
self
.
multiclass_nms_single
(
obj_scores
[
b
],
sem_scores
[
b
],
bbox_lidar
[
b
],
points_lidar
[
b
])
results
.
append
((
bbox_selected
,
score_selected
,
labels
))
return
results
def
multiclass_nms_single
(
self
,
obj_scores
,
sem_scores
,
bbox
,
points_lidar
):
box_indices
=
points_in_boxes_batch
(
points_lidar
.
unsqueeze
(
0
),
bbox
.
unsqueeze
(
0
))[
0
]
nonempty_box_mask
=
box_indices
.
T
.
sum
(
1
)
>
5
bbox_classes
=
torch
.
argmax
(
sem_scores
,
-
1
)
# boxes3d to aligned boxes
corner3d
=
boxes3d_to_corners3d_lidar_torch
(
bbox
)
minmax_box3d
=
corner3d
.
new
(
torch
.
Size
((
corner3d
.
shape
[
0
],
6
)))
minmax_box3d
[:,
:
3
]
=
torch
.
min
(
corner3d
,
dim
=
1
)[
0
]
minmax_box3d
[:,
3
:]
=
torch
.
max
(
corner3d
,
dim
=
1
)[
0
]
nms_selected
=
aligned_3d_nms
(
minmax_box3d
[
nonempty_box_mask
],
obj_scores
[
nonempty_box_mask
],
bbox_classes
[
nonempty_box_mask
],
self
.
test_cfg
.
nms_thr
)
# filter empty boxes and boxes with low score
scores_mask
=
(
obj_scores
>
self
.
test_cfg
.
score_thr
)
nonempty_box_inds
=
torch
.
nonzero
(
nonempty_box_mask
).
flatten
()
nonempty_mask
=
torch
.
zeros_like
(
bbox_classes
).
scatter
(
0
,
nonempty_box_inds
[
nms_selected
],
1
)
selected
=
(
nonempty_mask
.
bool
()
&
scores_mask
.
bool
())
if
self
.
test_cfg
.
per_class_proposal
:
bbox_selected
,
score_selected
,
labels
=
[],
[],
[]
for
k
in
range
(
sem_scores
.
shape
[
-
1
]):
bbox_selected
.
append
(
bbox
[
selected
])
score_selected
.
append
(
obj_scores
[
selected
]
*
sem_scores
[
selected
][:,
k
])
labels
.
append
(
torch
.
zeros_like
(
bbox_classes
[
selected
]).
fill_
(
k
))
bbox_selected
=
torch
.
cat
(
bbox_selected
,
0
)
score_selected
=
torch
.
cat
(
score_selected
,
0
)
labels
=
torch
.
cat
(
labels
,
0
)
else
:
bbox_selected
=
bbox
[
selected
]
score_selected
=
obj_scores
[
selected
]
labels
=
bbox_classes
[
selected
]
return
bbox_selected
,
score_selected
,
labels
mmdet3d/models/detectors/__init__.py
View file @
19a56f6b
...
@@ -4,10 +4,11 @@ from .mvx_faster_rcnn import (DynamicMVXFasterRCNN, DynamicMVXFasterRCNNV2,
...
@@ -4,10 +4,11 @@ from .mvx_faster_rcnn import (DynamicMVXFasterRCNN, DynamicMVXFasterRCNNV2,
from
.mvx_single_stage
import
MVXSingleStageDetector
from
.mvx_single_stage
import
MVXSingleStageDetector
from
.mvx_two_stage
import
MVXTwoStageDetector
from
.mvx_two_stage
import
MVXTwoStageDetector
from
.parta2
import
PartA2
from
.parta2
import
PartA2
from
.votenet
import
VoteNet
from
.voxelnet
import
DynamicVoxelNet
,
VoxelNet
from
.voxelnet
import
DynamicVoxelNet
,
VoxelNet
__all__
=
[
__all__
=
[
'BaseDetector'
,
'VoxelNet'
,
'DynamicVoxelNet'
,
'MVXSingleStageDetector'
,
'BaseDetector'
,
'VoxelNet'
,
'DynamicVoxelNet'
,
'MVXSingleStageDetector'
,
'MVXTwoStageDetector'
,
'DynamicMVXFasterRCNN'
,
'DynamicMVXFasterRCNNV2'
,
'MVXTwoStageDetector'
,
'DynamicMVXFasterRCNN'
,
'DynamicMVXFasterRCNNV2'
,
'DynamicMVXFasterRCNNV3'
,
'PartA2'
'DynamicMVXFasterRCNNV3'
,
'PartA2'
,
'VoteNet'
]
]
mmdet3d/models/detectors/votenet.py
0 → 100644
View file @
19a56f6b
import
torch
from
mmdet3d.core
import
bbox3d2result
from
mmdet.models
import
DETECTORS
,
SingleStageDetector
@
DETECTORS
.
register_module
()
class
VoteNet
(
SingleStageDetector
):
"""VoteNet model.
https://arxiv.org/pdf/1904.09664.pdf
"""
def
__init__
(
self
,
backbone
,
bbox_head
=
None
,
train_cfg
=
None
,
test_cfg
=
None
,
pretrained
=
None
):
super
(
VoteNet
,
self
).
__init__
(
backbone
=
backbone
,
bbox_head
=
bbox_head
,
train_cfg
=
train_cfg
,
test_cfg
=
test_cfg
,
pretrained
=
pretrained
)
def
extract_feat
(
self
,
points
):
x
=
self
.
backbone
(
points
)
if
self
.
with_neck
:
x
=
self
.
neck
(
x
)
return
x
def
forward_train
(
self
,
points
,
img_meta
,
gt_bboxes_3d
,
gt_labels_3d
,
pts_semantic_mask
=
None
,
pts_instance_mask
=
None
,
gt_bboxes_ignore
=
None
):
"""Forward of training.
Args:
points (list[Tensor]): Points of each batch.
img_meta (list): Image metas.
gt_bboxes_3d (list[Tensor]): gt bboxes of each batch.
gt_labels_3d (list[Tensor]): gt class labels of each batch.
pts_semantic_mask (None | list[Tensor]): point-wise semantic
label of each batch.
pts_instance_mask (None | list[Tensor]): point-wise instance
label of each batch.
gt_bboxes_ignore (None | list[Tensor]): Specify which bounding.
Returns:
dict: Losses.
"""
points_cat
=
torch
.
stack
(
points
)
# tmp
x
=
self
.
extract_feat
(
points_cat
)
bbox_preds
=
self
.
bbox_head
(
x
,
self
.
train_cfg
.
sample_mod
)
loss_inputs
=
(
points
,
gt_bboxes_3d
,
gt_labels_3d
,
pts_semantic_mask
,
pts_instance_mask
,
img_meta
)
losses
=
self
.
bbox_head
.
loss
(
bbox_preds
,
*
loss_inputs
,
gt_bboxes_ignore
=
gt_bboxes_ignore
)
return
losses
def
forward_test
(
self
,
**
kwargs
):
return
self
.
simple_test
(
**
kwargs
)
def
forward
(
self
,
return_loss
=
True
,
**
kwargs
):
if
return_loss
:
return
self
.
forward_train
(
**
kwargs
)
else
:
return
self
.
forward_test
(
**
kwargs
)
def
simple_test
(
self
,
points
,
img_meta
,
gt_bboxes_3d
=
None
,
gt_labels_3d
=
None
,
pts_semantic_mask
=
None
,
pts_instance_mask
=
None
,
rescale
=
False
):
"""Forward of testing.
Args:
points (list[Tensor]): Points of each sample.
img_meta (list): Image metas.
gt_bboxes_3d (list[Tensor]): gt bboxes of each sample.
gt_labels_3d (list[Tensor]): gt class labels of each sample.
pts_semantic_mask (None | list[Tensor]): point-wise semantic
label of each sample.
pts_instance_mask (None | list[Tensor]): point-wise instance
label of each sample.
rescale (bool): Whether to rescale results.
Returns:
list: Predicted 3d boxes.
"""
points_cat
=
torch
.
stack
(
points
)
# tmp
x
=
self
.
extract_feat
(
points_cat
)
bbox_preds
=
self
.
bbox_head
(
x
,
self
.
test_cfg
.
sample_mod
)
bbox_list
=
self
.
bbox_head
.
get_bboxes
(
points_cat
,
bbox_preds
,
img_meta
,
rescale
=
rescale
)
bbox_results
=
[
bbox3d2result
(
bboxes
,
scores
,
labels
)
for
bboxes
,
scores
,
labels
in
bbox_list
]
return
bbox_results
[
0
]
Prev
1
2
Next
Write
Preview
Markdown
is supported
0%
Try again
or
attach a new file
.
Attach a file
Cancel
You are about to add
0
people
to the discussion. Proceed with caution.
Finish editing this message first!
Cancel
Please
register
or
sign in
to comment