Skip to content
GitLab
Menu
Projects
Groups
Snippets
Loading...
Help
Help
Support
Community forum
Keyboard shortcuts
?
Submit feedback
Contribute to GitLab
Sign in / Register
Toggle navigation
Menu
Open sidebar
OpenDAS
mmdetection3d
Commits
9ebb75da
Commit
9ebb75da
authored
Jul 18, 2022
by
jshilong
Committed by
ChaimZhu
Jul 20, 2022
Browse files
[refactor]Groupfree3d
parent
b496f579
Changes
17
Show whitespace changes
Inline
Side-by-side
Showing
17 changed files
with
807 additions
and
586 deletions
+807
-586
configs/_base_/models/groupfree3d.py
configs/_base_/models/groupfree3d.py
+14
-10
configs/groupfree3d/groupfree3d_8x4_scannet-3d-18class-L12-O256.py
...roupfree3d/groupfree3d_8x4_scannet-3d-18class-L12-O256.py
+64
-39
configs/groupfree3d/groupfree3d_8x4_scannet-3d-18class-L6-O256.py
...groupfree3d/groupfree3d_8x4_scannet-3d-18class-L6-O256.py
+64
-39
configs/groupfree3d/groupfree3d_8x4_scannet-3d-18class-w2x-L12-O256.py
...free3d/groupfree3d_8x4_scannet-3d-18class-w2x-L12-O256.py
+64
-39
configs/groupfree3d/groupfree3d_8x4_scannet-3d-18class-w2x-L12-O512.py
...free3d/groupfree3d_8x4_scannet-3d-18class-w2x-L12-O512.py
+64
-39
configs/h3dnet/debug.py
configs/h3dnet/debug.py
+69
-0
mmdet3d/datasets/det3d_dataset.py
mmdet3d/datasets/det3d_dataset.py
+2
-0
mmdet3d/datasets/kitti_dataset.py
mmdet3d/datasets/kitti_dataset.py
+1
-0
mmdet3d/datasets/pipelines/formating.py
mmdet3d/datasets/pipelines/formating.py
+4
-4
mmdet3d/datasets/scannet_dataset.py
mmdet3d/datasets/scannet_dataset.py
+1
-0
mmdet3d/datasets/sunrgbd_dataset.py
mmdet3d/datasets/sunrgbd_dataset.py
+8
-124
mmdet3d/models/data_preprocessors/data_preprocessor.py
mmdet3d/models/data_preprocessors/data_preprocessor.py
+1
-1
mmdet3d/models/dense_heads/groupfree3d_head.py
mmdet3d/models/dense_heads/groupfree3d_head.py
+316
-202
mmdet3d/models/dense_heads/vote_head.py
mmdet3d/models/dense_heads/vote_head.py
+4
-4
mmdet3d/models/detectors/groupfree3dnet.py
mmdet3d/models/detectors/groupfree3dnet.py
+52
-71
tests/test_models/test_detectors/test_groupfree3d.py
tests/test_models/test_detectors/test_groupfree3d.py
+52
-0
tests/utils/model_utils.py
tests/utils/model_utils.py
+27
-14
No files found.
configs/_base_/models/groupfree3d.py
View file @
9ebb75da
model
=
dict
(
model
=
dict
(
type
=
'GroupFree3DNet'
,
type
=
'GroupFree3DNet'
,
data_preprocessor
=
dict
(
type
=
'Det3DDataPreprocessor'
),
backbone
=
dict
(
backbone
=
dict
(
type
=
'PointNet2SASSG'
,
type
=
'PointNet2SASSG'
,
in_channels
=
3
,
in_channels
=
3
,
...
@@ -38,33 +39,36 @@ model = dict(
...
@@ -38,33 +39,36 @@ model = dict(
pred_layer_cfg
=
dict
(
pred_layer_cfg
=
dict
(
in_channels
=
288
,
shared_conv_channels
=
(
288
,
288
),
bias
=
True
),
in_channels
=
288
,
shared_conv_channels
=
(
288
,
288
),
bias
=
True
),
sampling_objectness_loss
=
dict
(
sampling_objectness_loss
=
dict
(
type
=
'FocalLoss'
,
type
=
'
mmdet.
FocalLoss'
,
use_sigmoid
=
True
,
use_sigmoid
=
True
,
gamma
=
2.0
,
gamma
=
2.0
,
alpha
=
0.25
,
alpha
=
0.25
,
loss_weight
=
8.0
),
loss_weight
=
8.0
),
objectness_loss
=
dict
(
objectness_loss
=
dict
(
type
=
'FocalLoss'
,
type
=
'
mmdet.
FocalLoss'
,
use_sigmoid
=
True
,
use_sigmoid
=
True
,
gamma
=
2.0
,
gamma
=
2.0
,
alpha
=
0.25
,
alpha
=
0.25
,
loss_weight
=
1.0
),
loss_weight
=
1.0
),
center_loss
=
dict
(
center_loss
=
dict
(
type
=
'SmoothL1Loss'
,
reduction
=
'sum'
,
loss_weight
=
10.0
),
type
=
'
mmdet.
SmoothL1Loss'
,
reduction
=
'sum'
,
loss_weight
=
10.0
),
dir_class_loss
=
dict
(
dir_class_loss
=
dict
(
type
=
'CrossEntropyLoss'
,
reduction
=
'sum'
,
loss_weight
=
1.0
),
type
=
'
mmdet.
CrossEntropyLoss'
,
reduction
=
'sum'
,
loss_weight
=
1.0
),
dir_res_loss
=
dict
(
dir_res_loss
=
dict
(
type
=
'SmoothL1Loss'
,
reduction
=
'sum'
,
loss_weight
=
10.0
),
type
=
'
mmdet.
SmoothL1Loss'
,
reduction
=
'sum'
,
loss_weight
=
10.0
),
size_class_loss
=
dict
(
size_class_loss
=
dict
(
type
=
'CrossEntropyLoss'
,
reduction
=
'sum'
,
loss_weight
=
1.0
),
type
=
'
mmdet.
CrossEntropyLoss'
,
reduction
=
'sum'
,
loss_weight
=
1.0
),
size_res_loss
=
dict
(
size_res_loss
=
dict
(
type
=
'SmoothL1Loss'
,
beta
=
1.0
,
reduction
=
'sum'
,
loss_weight
=
10.0
),
type
=
'mmdet.SmoothL1Loss'
,
beta
=
1.0
,
reduction
=
'sum'
,
loss_weight
=
10.0
),
semantic_loss
=
dict
(
semantic_loss
=
dict
(
type
=
'CrossEntropyLoss'
,
reduction
=
'sum'
,
loss_weight
=
1.0
)),
type
=
'
mmdet.
CrossEntropyLoss'
,
reduction
=
'sum'
,
loss_weight
=
1.0
)),
# model training and testing settings
# model training and testing settings
train_cfg
=
dict
(
sample_mod
=
'kps'
),
train_cfg
=
dict
(
sample_mod
e
=
'kps'
),
test_cfg
=
dict
(
test_cfg
=
dict
(
sample_mod
=
'kps'
,
sample_mod
e
=
'kps'
,
nms_thr
=
0.25
,
nms_thr
=
0.25
,
score_thr
=
0.0
,
score_thr
=
0.0
,
per_class_proposal
=
True
,
per_class_proposal
=
True
,
...
...
configs/groupfree3d/groupfree3d_8x4_scannet-3d-18class-L12-O256.py
View file @
9ebb75da
...
@@ -35,34 +35,37 @@ model = dict(
...
@@ -35,34 +35,37 @@ model = dict(
[
1.1511526
,
1.0546296
,
0.49706793
],
[
1.1511526
,
1.0546296
,
0.49706793
],
[
0.47535285
,
0.49249494
,
0.5802117
]]),
[
0.47535285
,
0.49249494
,
0.5802117
]]),
sampling_objectness_loss
=
dict
(
sampling_objectness_loss
=
dict
(
type
=
'FocalLoss'
,
type
=
'
mmdet.
FocalLoss'
,
use_sigmoid
=
True
,
use_sigmoid
=
True
,
gamma
=
2.0
,
gamma
=
2.0
,
alpha
=
0.25
,
alpha
=
0.25
,
loss_weight
=
8.0
),
loss_weight
=
8.0
),
objectness_loss
=
dict
(
objectness_loss
=
dict
(
type
=
'FocalLoss'
,
type
=
'
mmdet.
FocalLoss'
,
use_sigmoid
=
True
,
use_sigmoid
=
True
,
gamma
=
2.0
,
gamma
=
2.0
,
alpha
=
0.25
,
alpha
=
0.25
,
loss_weight
=
1.0
),
loss_weight
=
1.0
),
center_loss
=
dict
(
center_loss
=
dict
(
type
=
'SmoothL1Loss'
,
beta
=
0.04
,
reduction
=
'sum'
,
loss_weight
=
10.0
),
type
=
'mmdet.SmoothL1Loss'
,
beta
=
0.04
,
reduction
=
'sum'
,
loss_weight
=
10.0
),
dir_class_loss
=
dict
(
dir_class_loss
=
dict
(
type
=
'CrossEntropyLoss'
,
reduction
=
'sum'
,
loss_weight
=
1.0
),
type
=
'
mmdet.
CrossEntropyLoss'
,
reduction
=
'sum'
,
loss_weight
=
1.0
),
dir_res_loss
=
dict
(
dir_res_loss
=
dict
(
type
=
'SmoothL1Loss'
,
reduction
=
'sum'
,
loss_weight
=
10.0
),
type
=
'
mmdet.
SmoothL1Loss'
,
reduction
=
'sum'
,
loss_weight
=
10.0
),
size_class_loss
=
dict
(
size_class_loss
=
dict
(
type
=
'CrossEntropyLoss'
,
reduction
=
'sum'
,
loss_weight
=
1.0
),
type
=
'
mmdet.
CrossEntropyLoss'
,
reduction
=
'sum'
,
loss_weight
=
1.0
),
size_res_loss
=
dict
(
size_res_loss
=
dict
(
type
=
'SmoothL1Loss'
,
type
=
'
mmdet.
SmoothL1Loss'
,
beta
=
1.0
/
9.0
,
beta
=
1.0
/
9.0
,
reduction
=
'sum'
,
reduction
=
'sum'
,
loss_weight
=
10.0
/
9.0
),
loss_weight
=
10.0
/
9.0
),
semantic_loss
=
dict
(
semantic_loss
=
dict
(
type
=
'CrossEntropyLoss'
,
reduction
=
'sum'
,
loss_weight
=
1.0
)),
type
=
'
mmdet.
CrossEntropyLoss'
,
reduction
=
'sum'
,
loss_weight
=
1.0
)),
test_cfg
=
dict
(
test_cfg
=
dict
(
sample_mod
=
'kps'
,
sample_mod
e
=
'kps'
,
nms_thr
=
0.25
,
nms_thr
=
0.25
,
score_thr
=
0.0
,
score_thr
=
0.0
,
per_class_proposal
=
True
,
per_class_proposal
=
True
,
...
@@ -75,6 +78,9 @@ class_names = ('cabinet', 'bed', 'chair', 'sofa', 'table', 'door', 'window',
...
@@ -75,6 +78,9 @@ class_names = ('cabinet', 'bed', 'chair', 'sofa', 'table', 'door', 'window',
'bookshelf'
,
'picture'
,
'counter'
,
'desk'
,
'curtain'
,
'bookshelf'
,
'picture'
,
'counter'
,
'desk'
,
'curtain'
,
'refrigerator'
,
'showercurtrain'
,
'toilet'
,
'sink'
,
'bathtub'
,
'refrigerator'
,
'showercurtrain'
,
'toilet'
,
'sink'
,
'bathtub'
,
'garbagebin'
)
'garbagebin'
)
metainfo
=
dict
(
CLASSES
=
class_names
)
train_pipeline
=
[
train_pipeline
=
[
dict
(
dict
(
type
=
'LoadPointsFromFile'
,
type
=
'LoadPointsFromFile'
,
...
@@ -102,9 +108,8 @@ train_pipeline = [
...
@@ -102,9 +108,8 @@ train_pipeline = [
type
=
'GlobalRotScaleTrans'
,
type
=
'GlobalRotScaleTrans'
,
rot_range
=
[
-
0.087266
,
0.087266
],
rot_range
=
[
-
0.087266
,
0.087266
],
scale_ratio_range
=
[
1.0
,
1.0
]),
scale_ratio_range
=
[
1.0
,
1.0
]),
dict
(
type
=
'DefaultFormatBundle3D'
,
class_names
=
class_names
),
dict
(
dict
(
type
=
'
Collect3D
'
,
type
=
'
Pack3DDetInputs
'
,
keys
=
[
keys
=
[
'points'
,
'gt_bboxes_3d'
,
'gt_labels_3d'
,
'pts_semantic_mask'
,
'points'
,
'gt_bboxes_3d'
,
'gt_labels_3d'
,
'pts_semantic_mask'
,
'pts_instance_mask'
'pts_instance_mask'
...
@@ -134,52 +139,60 @@ test_pipeline = [
...
@@ -134,52 +139,60 @@ test_pipeline = [
flip_ratio_bev_horizontal
=
0.5
,
flip_ratio_bev_horizontal
=
0.5
,
flip_ratio_bev_vertical
=
0.5
),
flip_ratio_bev_vertical
=
0.5
),
dict
(
type
=
'PointSample'
,
num_points
=
50000
),
dict
(
type
=
'PointSample'
,
num_points
=
50000
),
dict
(
]),
type
=
'DefaultFormatBundle3D'
,
dict
(
type
=
'Pack3DDetInputs'
,
keys
=
[
'points'
])
class_names
=
class_names
,
with_label
=
False
),
dict
(
type
=
'Collect3D'
,
keys
=
[
'points'
])
])
]
]
data
=
dict
(
train_dataloader
=
dict
(
samples_per_gpu
=
8
,
batch_size
=
8
,
workers_per_gpu
=
4
,
num_workers
=
4
,
train
=
dict
(
sampler
=
dict
(
type
=
'DefaultSampler'
,
shuffle
=
True
),
dataset
=
dict
(
type
=
'RepeatDataset'
,
type
=
'RepeatDataset'
,
times
=
5
,
times
=
5
,
dataset
=
dict
(
dataset
=
dict
(
type
=
dataset_type
,
type
=
dataset_type
,
data_root
=
data_root
,
data_root
=
data_root
,
ann_file
=
data_root
+
'scannet_infos_train.pkl'
,
ann_file
=
'scannet_infos_train.pkl'
,
pipeline
=
train_pipeline
,
pipeline
=
train_pipeline
,
filter_empty_gt
=
False
,
filter_empty_gt
=
False
,
classes
=
class_names
,
metainfo
=
metainfo
,
# we use box_type_3d='LiDAR' in kitti and nuscenes dataset
# we use box_type_3d='LiDAR' in kitti and nuscenes dataset
# and box_type_3d='Depth' in sunrgbd and scannet dataset.
# and box_type_3d='Depth' in sunrgbd and scannet dataset.
box_type_3d
=
'Depth'
)),
box_type_3d
=
'Depth'
)))
val
=
dict
(
val_dataloader
=
dict
(
batch_size
=
1
,
num_workers
=
1
,
sampler
=
dict
(
type
=
'DefaultSampler'
,
shuffle
=
False
),
dataset
=
dict
(
type
=
dataset_type
,
type
=
dataset_type
,
data_root
=
data_root
,
data_root
=
data_root
,
ann_file
=
data_root
+
'scannet_infos_val.pkl'
,
ann_file
=
'scannet_infos_val.pkl'
,
pipeline
=
test_pipeline
,
pipeline
=
test_pipeline
,
classes
=
class_names
,
metainfo
=
metainfo
,
test_mode
=
True
,
test_mode
=
True
,
box_type_3d
=
'Depth'
),
box_type_3d
=
'Depth'
))
test
=
dict
(
test_dataloader
=
dict
(
batch_size
=
1
,
num_workers
=
1
,
sampler
=
dict
(
type
=
'DefaultSampler'
,
shuffle
=
False
),
dataset
=
dict
(
type
=
dataset_type
,
type
=
dataset_type
,
data_root
=
data_root
,
data_root
=
data_root
,
ann_file
=
data_root
+
'scannet_infos_val.pkl'
,
ann_file
=
'scannet_infos_val.pkl'
,
pipeline
=
test_pipeline
,
pipeline
=
test_pipeline
,
classes
=
class_names
,
metainfo
=
metainfo
,
test_mode
=
True
,
test_mode
=
True
,
box_type_3d
=
'Depth'
))
box_type_3d
=
'Depth'
))
val_evaluator
=
dict
(
type
=
'IndoorMetric'
)
test_evaluator
=
val_evaluator
# optimizer
# optimizer
lr
=
0.006
lr
=
0.006
optimizer
=
dict
(
optim_wrapper
=
dict
(
lr
=
lr
,
type
=
'OptimWrapper'
,
weight_decay
=
0.0005
,
optimizer
=
dict
(
type
=
'AdamW'
,
lr
=
lr
,
weight_decay
=
0.0005
),
clip_grad
=
dict
(
max_norm
=
0.1
,
norm_type
=
2
),
paramwise_cfg
=
dict
(
paramwise_cfg
=
dict
(
custom_keys
=
{
custom_keys
=
{
'bbox_head.decoder_layers'
:
dict
(
lr_mult
=
0.1
,
decay_mult
=
1.0
),
'bbox_head.decoder_layers'
:
dict
(
lr_mult
=
0.1
,
decay_mult
=
1.0
),
...
@@ -191,9 +204,21 @@ optimizer = dict(
...
@@ -191,9 +204,21 @@ optimizer = dict(
'bbox_head.decoder_key_proj'
:
dict
(
lr_mult
=
0.1
,
decay_mult
=
1.0
)
'bbox_head.decoder_key_proj'
:
dict
(
lr_mult
=
0.1
,
decay_mult
=
1.0
)
}))
}))
optimizer_config
=
dict
(
grad_clip
=
dict
(
max_norm
=
0.1
,
norm_type
=
2
))
# learning rate
lr_config
=
dict
(
policy
=
'step'
,
warmup
=
None
,
step
=
[
56
,
68
])
param_scheduler
=
[
dict
(
type
=
'MultiStepLR'
,
begin
=
0
,
end
=
80
,
by_epoch
=
True
,
milestones
=
[
56
,
68
],
gamma
=
0.1
)
]
# training schedule for 1x
train_cfg
=
dict
(
type
=
'EpochBasedTrainLoop'
,
max_epochs
=
80
,
val_interval
=
1
)
val_cfg
=
dict
(
type
=
'ValLoop'
)
test_cfg
=
dict
(
type
=
'TestLoop'
)
# runtime settings
default_hooks
=
dict
(
runner
=
dict
(
type
=
'EpochBasedRunner'
,
max_epochs
=
80
)
checkpoint
=
dict
(
type
=
'CheckpointHook'
,
interval
=
1
,
max_keep_ckpts
=
10
))
checkpoint_config
=
dict
(
interval
=
1
,
max_keep_ckpts
=
10
)
configs/groupfree3d/groupfree3d_8x4_scannet-3d-18class-L6-O256.py
View file @
9ebb75da
...
@@ -34,34 +34,37 @@ model = dict(
...
@@ -34,34 +34,37 @@ model = dict(
[
1.1511526
,
1.0546296
,
0.49706793
],
[
1.1511526
,
1.0546296
,
0.49706793
],
[
0.47535285
,
0.49249494
,
0.5802117
]]),
[
0.47535285
,
0.49249494
,
0.5802117
]]),
sampling_objectness_loss
=
dict
(
sampling_objectness_loss
=
dict
(
type
=
'FocalLoss'
,
type
=
'
mmdet.
FocalLoss'
,
use_sigmoid
=
True
,
use_sigmoid
=
True
,
gamma
=
2.0
,
gamma
=
2.0
,
alpha
=
0.25
,
alpha
=
0.25
,
loss_weight
=
8.0
),
loss_weight
=
8.0
),
objectness_loss
=
dict
(
objectness_loss
=
dict
(
type
=
'FocalLoss'
,
type
=
'
mmdet.
FocalLoss'
,
use_sigmoid
=
True
,
use_sigmoid
=
True
,
gamma
=
2.0
,
gamma
=
2.0
,
alpha
=
0.25
,
alpha
=
0.25
,
loss_weight
=
1.0
),
loss_weight
=
1.0
),
center_loss
=
dict
(
center_loss
=
dict
(
type
=
'SmoothL1Loss'
,
beta
=
0.04
,
reduction
=
'sum'
,
loss_weight
=
10.0
),
type
=
'mmdet.SmoothL1Loss'
,
beta
=
0.04
,
reduction
=
'sum'
,
loss_weight
=
10.0
),
dir_class_loss
=
dict
(
dir_class_loss
=
dict
(
type
=
'CrossEntropyLoss'
,
reduction
=
'sum'
,
loss_weight
=
1.0
),
type
=
'
mmdet.
CrossEntropyLoss'
,
reduction
=
'sum'
,
loss_weight
=
1.0
),
dir_res_loss
=
dict
(
dir_res_loss
=
dict
(
type
=
'SmoothL1Loss'
,
reduction
=
'sum'
,
loss_weight
=
10.0
),
type
=
'
mmdet.
SmoothL1Loss'
,
reduction
=
'sum'
,
loss_weight
=
10.0
),
size_class_loss
=
dict
(
size_class_loss
=
dict
(
type
=
'CrossEntropyLoss'
,
reduction
=
'sum'
,
loss_weight
=
1.0
),
type
=
'
mmdet.
CrossEntropyLoss'
,
reduction
=
'sum'
,
loss_weight
=
1.0
),
size_res_loss
=
dict
(
size_res_loss
=
dict
(
type
=
'SmoothL1Loss'
,
type
=
'
mmdet.
SmoothL1Loss'
,
beta
=
1.0
/
9.0
,
beta
=
1.0
/
9.0
,
reduction
=
'sum'
,
reduction
=
'sum'
,
loss_weight
=
10.0
/
9.0
),
loss_weight
=
10.0
/
9.0
),
semantic_loss
=
dict
(
semantic_loss
=
dict
(
type
=
'CrossEntropyLoss'
,
reduction
=
'sum'
,
loss_weight
=
1.0
)),
type
=
'
mmdet.
CrossEntropyLoss'
,
reduction
=
'sum'
,
loss_weight
=
1.0
)),
test_cfg
=
dict
(
test_cfg
=
dict
(
sample_mod
=
'kps'
,
sample_mod
e
=
'kps'
,
nms_thr
=
0.25
,
nms_thr
=
0.25
,
score_thr
=
0.0
,
score_thr
=
0.0
,
per_class_proposal
=
True
,
per_class_proposal
=
True
,
...
@@ -74,6 +77,9 @@ class_names = ('cabinet', 'bed', 'chair', 'sofa', 'table', 'door', 'window',
...
@@ -74,6 +77,9 @@ class_names = ('cabinet', 'bed', 'chair', 'sofa', 'table', 'door', 'window',
'bookshelf'
,
'picture'
,
'counter'
,
'desk'
,
'curtain'
,
'bookshelf'
,
'picture'
,
'counter'
,
'desk'
,
'curtain'
,
'refrigerator'
,
'showercurtrain'
,
'toilet'
,
'sink'
,
'bathtub'
,
'refrigerator'
,
'showercurtrain'
,
'toilet'
,
'sink'
,
'bathtub'
,
'garbagebin'
)
'garbagebin'
)
metainfo
=
dict
(
CLASSES
=
class_names
)
train_pipeline
=
[
train_pipeline
=
[
dict
(
dict
(
type
=
'LoadPointsFromFile'
,
type
=
'LoadPointsFromFile'
,
...
@@ -101,9 +107,8 @@ train_pipeline = [
...
@@ -101,9 +107,8 @@ train_pipeline = [
type
=
'GlobalRotScaleTrans'
,
type
=
'GlobalRotScaleTrans'
,
rot_range
=
[
-
0.087266
,
0.087266
],
rot_range
=
[
-
0.087266
,
0.087266
],
scale_ratio_range
=
[
1.0
,
1.0
]),
scale_ratio_range
=
[
1.0
,
1.0
]),
dict
(
type
=
'DefaultFormatBundle3D'
,
class_names
=
class_names
),
dict
(
dict
(
type
=
'
Collect3D
'
,
type
=
'
Pack3DDetInputs
'
,
keys
=
[
keys
=
[
'points'
,
'gt_bboxes_3d'
,
'gt_labels_3d'
,
'pts_semantic_mask'
,
'points'
,
'gt_bboxes_3d'
,
'gt_labels_3d'
,
'pts_semantic_mask'
,
'pts_instance_mask'
'pts_instance_mask'
...
@@ -133,52 +138,60 @@ test_pipeline = [
...
@@ -133,52 +138,60 @@ test_pipeline = [
flip_ratio_bev_horizontal
=
0.5
,
flip_ratio_bev_horizontal
=
0.5
,
flip_ratio_bev_vertical
=
0.5
),
flip_ratio_bev_vertical
=
0.5
),
dict
(
type
=
'PointSample'
,
num_points
=
50000
),
dict
(
type
=
'PointSample'
,
num_points
=
50000
),
dict
(
]),
type
=
'DefaultFormatBundle3D'
,
dict
(
type
=
'Pack3DDetInputs'
,
keys
=
[
'points'
])
class_names
=
class_names
,
with_label
=
False
),
dict
(
type
=
'Collect3D'
,
keys
=
[
'points'
])
])
]
]
data
=
dict
(
train_dataloader
=
dict
(
samples_per_gpu
=
8
,
batch_size
=
8
,
workers_per_gpu
=
4
,
num_workers
=
4
,
train
=
dict
(
sampler
=
dict
(
type
=
'DefaultSampler'
,
shuffle
=
True
),
dataset
=
dict
(
type
=
'RepeatDataset'
,
type
=
'RepeatDataset'
,
times
=
5
,
times
=
5
,
dataset
=
dict
(
dataset
=
dict
(
type
=
dataset_type
,
type
=
dataset_type
,
data_root
=
data_root
,
data_root
=
data_root
,
ann_file
=
data_root
+
'scannet_infos_train.pkl'
,
ann_file
=
'scannet_infos_train.pkl'
,
pipeline
=
train_pipeline
,
pipeline
=
train_pipeline
,
filter_empty_gt
=
False
,
filter_empty_gt
=
False
,
classes
=
class_names
,
metainfo
=
metainfo
,
# we use box_type_3d='LiDAR' in kitti and nuscenes dataset
# we use box_type_3d='LiDAR' in kitti and nuscenes dataset
# and box_type_3d='Depth' in sunrgbd and scannet dataset.
# and box_type_3d='Depth' in sunrgbd and scannet dataset.
box_type_3d
=
'Depth'
)),
box_type_3d
=
'Depth'
)))
val
=
dict
(
val_dataloader
=
dict
(
batch_size
=
1
,
num_workers
=
1
,
sampler
=
dict
(
type
=
'DefaultSampler'
,
shuffle
=
False
),
dataset
=
dict
(
type
=
dataset_type
,
type
=
dataset_type
,
data_root
=
data_root
,
data_root
=
data_root
,
ann_file
=
data_root
+
'scannet_infos_val.pkl'
,
ann_file
=
'scannet_infos_val.pkl'
,
pipeline
=
test_pipeline
,
pipeline
=
test_pipeline
,
classes
=
class_names
,
metainfo
=
metainfo
,
test_mode
=
True
,
test_mode
=
True
,
box_type_3d
=
'Depth'
),
box_type_3d
=
'Depth'
))
test
=
dict
(
test_dataloader
=
dict
(
batch_size
=
1
,
num_workers
=
1
,
sampler
=
dict
(
type
=
'DefaultSampler'
,
shuffle
=
False
),
dataset
=
dict
(
type
=
dataset_type
,
type
=
dataset_type
,
data_root
=
data_root
,
data_root
=
data_root
,
ann_file
=
data_root
+
'scannet_infos_val.pkl'
,
ann_file
=
'scannet_infos_val.pkl'
,
pipeline
=
test_pipeline
,
pipeline
=
test_pipeline
,
classes
=
class_names
,
metainfo
=
metainfo
,
test_mode
=
True
,
test_mode
=
True
,
box_type_3d
=
'Depth'
))
box_type_3d
=
'Depth'
))
val_evaluator
=
dict
(
type
=
'IndoorMetric'
)
test_evaluator
=
val_evaluator
# optimizer
# optimizer
lr
=
0.006
lr
=
0.006
optimizer
=
dict
(
optim_wrapper
=
dict
(
lr
=
lr
,
type
=
'OptimWrapper'
,
weight_decay
=
0.0005
,
optimizer
=
dict
(
type
=
'AdamW'
,
lr
=
lr
,
weight_decay
=
0.0005
),
clip_grad
=
dict
(
max_norm
=
0.1
,
norm_type
=
2
),
paramwise_cfg
=
dict
(
paramwise_cfg
=
dict
(
custom_keys
=
{
custom_keys
=
{
'bbox_head.decoder_layers'
:
dict
(
lr_mult
=
0.1
,
decay_mult
=
1.0
),
'bbox_head.decoder_layers'
:
dict
(
lr_mult
=
0.1
,
decay_mult
=
1.0
),
...
@@ -190,9 +203,21 @@ optimizer = dict(
...
@@ -190,9 +203,21 @@ optimizer = dict(
'bbox_head.decoder_key_proj'
:
dict
(
lr_mult
=
0.1
,
decay_mult
=
1.0
)
'bbox_head.decoder_key_proj'
:
dict
(
lr_mult
=
0.1
,
decay_mult
=
1.0
)
}))
}))
optimizer_config
=
dict
(
grad_clip
=
dict
(
max_norm
=
0.1
,
norm_type
=
2
))
# learning rate
lr_config
=
dict
(
policy
=
'step'
,
warmup
=
None
,
step
=
[
56
,
68
])
param_scheduler
=
[
dict
(
type
=
'MultiStepLR'
,
begin
=
0
,
end
=
80
,
by_epoch
=
True
,
milestones
=
[
56
,
68
],
gamma
=
0.1
)
]
# training schedule for 1x
train_cfg
=
dict
(
type
=
'EpochBasedTrainLoop'
,
max_epochs
=
80
,
val_interval
=
1
)
val_cfg
=
dict
(
type
=
'ValLoop'
)
test_cfg
=
dict
(
type
=
'TestLoop'
)
# runtime settings
default_hooks
=
dict
(
runner
=
dict
(
type
=
'EpochBasedRunner'
,
max_epochs
=
80
)
checkpoint
=
dict
(
type
=
'CheckpointHook'
,
interval
=
1
,
max_keep_ckpts
=
10
))
checkpoint_config
=
dict
(
interval
=
1
,
max_keep_ckpts
=
10
)
configs/groupfree3d/groupfree3d_8x4_scannet-3d-18class-w2x-L12-O256.py
View file @
9ebb75da
...
@@ -50,34 +50,37 @@ model = dict(
...
@@ -50,34 +50,37 @@ model = dict(
[
1.1511526
,
1.0546296
,
0.49706793
],
[
1.1511526
,
1.0546296
,
0.49706793
],
[
0.47535285
,
0.49249494
,
0.5802117
]]),
[
0.47535285
,
0.49249494
,
0.5802117
]]),
sampling_objectness_loss
=
dict
(
sampling_objectness_loss
=
dict
(
type
=
'FocalLoss'
,
type
=
'
mmdet.
FocalLoss'
,
use_sigmoid
=
True
,
use_sigmoid
=
True
,
gamma
=
2.0
,
gamma
=
2.0
,
alpha
=
0.25
,
alpha
=
0.25
,
loss_weight
=
8.0
),
loss_weight
=
8.0
),
objectness_loss
=
dict
(
objectness_loss
=
dict
(
type
=
'FocalLoss'
,
type
=
'
mmdet.
FocalLoss'
,
use_sigmoid
=
True
,
use_sigmoid
=
True
,
gamma
=
2.0
,
gamma
=
2.0
,
alpha
=
0.25
,
alpha
=
0.25
,
loss_weight
=
1.0
),
loss_weight
=
1.0
),
center_loss
=
dict
(
center_loss
=
dict
(
type
=
'SmoothL1Loss'
,
beta
=
0.04
,
reduction
=
'sum'
,
loss_weight
=
10.0
),
type
=
'mmdet.SmoothL1Loss'
,
beta
=
0.04
,
reduction
=
'sum'
,
loss_weight
=
10.0
),
dir_class_loss
=
dict
(
dir_class_loss
=
dict
(
type
=
'CrossEntropyLoss'
,
reduction
=
'sum'
,
loss_weight
=
1.0
),
type
=
'
mmdet.
CrossEntropyLoss'
,
reduction
=
'sum'
,
loss_weight
=
1.0
),
dir_res_loss
=
dict
(
dir_res_loss
=
dict
(
type
=
'SmoothL1Loss'
,
reduction
=
'sum'
,
loss_weight
=
10.0
),
type
=
'
mmdet.
SmoothL1Loss'
,
reduction
=
'sum'
,
loss_weight
=
10.0
),
size_class_loss
=
dict
(
size_class_loss
=
dict
(
type
=
'CrossEntropyLoss'
,
reduction
=
'sum'
,
loss_weight
=
1.0
),
type
=
'
mmdet.
CrossEntropyLoss'
,
reduction
=
'sum'
,
loss_weight
=
1.0
),
size_res_loss
=
dict
(
size_res_loss
=
dict
(
type
=
'SmoothL1Loss'
,
type
=
'
mmdet.
SmoothL1Loss'
,
beta
=
1.0
/
9.0
,
beta
=
1.0
/
9.0
,
reduction
=
'sum'
,
reduction
=
'sum'
,
loss_weight
=
10.0
/
9.0
),
loss_weight
=
10.0
/
9.0
),
semantic_loss
=
dict
(
semantic_loss
=
dict
(
type
=
'CrossEntropyLoss'
,
reduction
=
'sum'
,
loss_weight
=
1.0
)),
type
=
'
mmdet.
CrossEntropyLoss'
,
reduction
=
'sum'
,
loss_weight
=
1.0
)),
test_cfg
=
dict
(
test_cfg
=
dict
(
sample_mod
=
'kps'
,
sample_mod
e
=
'kps'
,
nms_thr
=
0.25
,
nms_thr
=
0.25
,
score_thr
=
0.0
,
score_thr
=
0.0
,
per_class_proposal
=
True
,
per_class_proposal
=
True
,
...
@@ -90,6 +93,9 @@ class_names = ('cabinet', 'bed', 'chair', 'sofa', 'table', 'door', 'window',
...
@@ -90,6 +93,9 @@ class_names = ('cabinet', 'bed', 'chair', 'sofa', 'table', 'door', 'window',
'bookshelf'
,
'picture'
,
'counter'
,
'desk'
,
'curtain'
,
'bookshelf'
,
'picture'
,
'counter'
,
'desk'
,
'curtain'
,
'refrigerator'
,
'showercurtrain'
,
'toilet'
,
'sink'
,
'bathtub'
,
'refrigerator'
,
'showercurtrain'
,
'toilet'
,
'sink'
,
'bathtub'
,
'garbagebin'
)
'garbagebin'
)
metainfo
=
dict
(
CLASSES
=
class_names
)
train_pipeline
=
[
train_pipeline
=
[
dict
(
dict
(
type
=
'LoadPointsFromFile'
,
type
=
'LoadPointsFromFile'
,
...
@@ -117,9 +123,8 @@ train_pipeline = [
...
@@ -117,9 +123,8 @@ train_pipeline = [
type
=
'GlobalRotScaleTrans'
,
type
=
'GlobalRotScaleTrans'
,
rot_range
=
[
-
0.087266
,
0.087266
],
rot_range
=
[
-
0.087266
,
0.087266
],
scale_ratio_range
=
[
1.0
,
1.0
]),
scale_ratio_range
=
[
1.0
,
1.0
]),
dict
(
type
=
'DefaultFormatBundle3D'
,
class_names
=
class_names
),
dict
(
dict
(
type
=
'
Collect3D
'
,
type
=
'
Pack3DDetInputs
'
,
keys
=
[
keys
=
[
'points'
,
'gt_bboxes_3d'
,
'gt_labels_3d'
,
'pts_semantic_mask'
,
'points'
,
'gt_bboxes_3d'
,
'gt_labels_3d'
,
'pts_semantic_mask'
,
'pts_instance_mask'
'pts_instance_mask'
...
@@ -149,52 +154,60 @@ test_pipeline = [
...
@@ -149,52 +154,60 @@ test_pipeline = [
flip_ratio_bev_horizontal
=
0.5
,
flip_ratio_bev_horizontal
=
0.5
,
flip_ratio_bev_vertical
=
0.5
),
flip_ratio_bev_vertical
=
0.5
),
dict
(
type
=
'PointSample'
,
num_points
=
50000
),
dict
(
type
=
'PointSample'
,
num_points
=
50000
),
dict
(
]),
type
=
'DefaultFormatBundle3D'
,
dict
(
type
=
'Pack3DDetInputs'
,
keys
=
[
'points'
])
class_names
=
class_names
,
with_label
=
False
),
dict
(
type
=
'Collect3D'
,
keys
=
[
'points'
])
])
]
]
data
=
dict
(
train_dataloader
=
dict
(
samples_per_gpu
=
8
,
batch_size
=
8
,
workers_per_gpu
=
4
,
num_workers
=
4
,
train
=
dict
(
sampler
=
dict
(
type
=
'DefaultSampler'
,
shuffle
=
True
),
dataset
=
dict
(
type
=
'RepeatDataset'
,
type
=
'RepeatDataset'
,
times
=
5
,
times
=
5
,
dataset
=
dict
(
dataset
=
dict
(
type
=
dataset_type
,
type
=
dataset_type
,
data_root
=
data_root
,
data_root
=
data_root
,
ann_file
=
data_root
+
'scannet_infos_train.pkl'
,
ann_file
=
'scannet_infos_train.pkl'
,
pipeline
=
train_pipeline
,
pipeline
=
train_pipeline
,
filter_empty_gt
=
False
,
filter_empty_gt
=
False
,
classes
=
class_names
,
metainfo
=
metainfo
,
# we use box_type_3d='LiDAR' in kitti and nuscenes dataset
# we use box_type_3d='LiDAR' in kitti and nuscenes dataset
# and box_type_3d='Depth' in sunrgbd and scannet dataset.
# and box_type_3d='Depth' in sunrgbd and scannet dataset.
box_type_3d
=
'Depth'
)),
box_type_3d
=
'Depth'
)))
val
=
dict
(
val_dataloader
=
dict
(
batch_size
=
1
,
num_workers
=
1
,
sampler
=
dict
(
type
=
'DefaultSampler'
,
shuffle
=
False
),
dataset
=
dict
(
type
=
dataset_type
,
type
=
dataset_type
,
data_root
=
data_root
,
data_root
=
data_root
,
ann_file
=
data_root
+
'scannet_infos_val.pkl'
,
ann_file
=
'scannet_infos_val.pkl'
,
pipeline
=
test_pipeline
,
pipeline
=
test_pipeline
,
classes
=
class_names
,
metainfo
=
metainfo
,
test_mode
=
True
,
test_mode
=
True
,
box_type_3d
=
'Depth'
),
box_type_3d
=
'Depth'
))
test
=
dict
(
test_dataloader
=
dict
(
batch_size
=
1
,
num_workers
=
1
,
sampler
=
dict
(
type
=
'DefaultSampler'
,
shuffle
=
False
),
dataset
=
dict
(
type
=
dataset_type
,
type
=
dataset_type
,
data_root
=
data_root
,
data_root
=
data_root
,
ann_file
=
data_root
+
'scannet_infos_val.pkl'
,
ann_file
=
'scannet_infos_val.pkl'
,
pipeline
=
test_pipeline
,
pipeline
=
test_pipeline
,
classes
=
class_names
,
metainfo
=
metainfo
,
test_mode
=
True
,
test_mode
=
True
,
box_type_3d
=
'Depth'
))
box_type_3d
=
'Depth'
))
val_evaluator
=
dict
(
type
=
'IndoorMetric'
)
test_evaluator
=
val_evaluator
# optimizer
# optimizer
lr
=
0.006
lr
=
0.006
optimizer
=
dict
(
optim_wrapper
=
dict
(
lr
=
lr
,
type
=
'OptimWrapper'
,
weight_decay
=
0.0005
,
optimizer
=
dict
(
type
=
'AdamW'
,
lr
=
lr
,
weight_decay
=
0.0005
),
clip_grad
=
dict
(
max_norm
=
0.1
,
norm_type
=
2
),
paramwise_cfg
=
dict
(
paramwise_cfg
=
dict
(
custom_keys
=
{
custom_keys
=
{
'bbox_head.decoder_layers'
:
dict
(
lr_mult
=
0.1
,
decay_mult
=
1.0
),
'bbox_head.decoder_layers'
:
dict
(
lr_mult
=
0.1
,
decay_mult
=
1.0
),
...
@@ -206,9 +219,21 @@ optimizer = dict(
...
@@ -206,9 +219,21 @@ optimizer = dict(
'bbox_head.decoder_key_proj'
:
dict
(
lr_mult
=
0.1
,
decay_mult
=
1.0
)
'bbox_head.decoder_key_proj'
:
dict
(
lr_mult
=
0.1
,
decay_mult
=
1.0
)
}))
}))
optimizer_config
=
dict
(
grad_clip
=
dict
(
max_norm
=
0.1
,
norm_type
=
2
))
# learning rate
lr_config
=
dict
(
policy
=
'step'
,
warmup
=
None
,
step
=
[
56
,
68
])
param_scheduler
=
[
dict
(
type
=
'MultiStepLR'
,
begin
=
0
,
end
=
80
,
by_epoch
=
True
,
milestones
=
[
56
,
68
],
gamma
=
0.1
)
]
# training schedule for 1x
train_cfg
=
dict
(
type
=
'EpochBasedTrainLoop'
,
max_epochs
=
80
,
val_interval
=
1
)
val_cfg
=
dict
(
type
=
'ValLoop'
)
test_cfg
=
dict
(
type
=
'TestLoop'
)
# runtime settings
default_hooks
=
dict
(
runner
=
dict
(
type
=
'EpochBasedRunner'
,
max_epochs
=
80
)
checkpoint
=
dict
(
type
=
'CheckpointHook'
,
interval
=
1
,
max_keep_ckpts
=
10
))
checkpoint_config
=
dict
(
interval
=
1
,
max_keep_ckpts
=
10
)
configs/groupfree3d/groupfree3d_8x4_scannet-3d-18class-w2x-L12-O512.py
View file @
9ebb75da
...
@@ -51,34 +51,37 @@ model = dict(
...
@@ -51,34 +51,37 @@ model = dict(
[
1.1511526
,
1.0546296
,
0.49706793
],
[
1.1511526
,
1.0546296
,
0.49706793
],
[
0.47535285
,
0.49249494
,
0.5802117
]]),
[
0.47535285
,
0.49249494
,
0.5802117
]]),
sampling_objectness_loss
=
dict
(
sampling_objectness_loss
=
dict
(
type
=
'FocalLoss'
,
type
=
'
mmdet.
FocalLoss'
,
use_sigmoid
=
True
,
use_sigmoid
=
True
,
gamma
=
2.0
,
gamma
=
2.0
,
alpha
=
0.25
,
alpha
=
0.25
,
loss_weight
=
8.0
),
loss_weight
=
8.0
),
objectness_loss
=
dict
(
objectness_loss
=
dict
(
type
=
'FocalLoss'
,
type
=
'
mmdet.
FocalLoss'
,
use_sigmoid
=
True
,
use_sigmoid
=
True
,
gamma
=
2.0
,
gamma
=
2.0
,
alpha
=
0.25
,
alpha
=
0.25
,
loss_weight
=
1.0
),
loss_weight
=
1.0
),
center_loss
=
dict
(
center_loss
=
dict
(
type
=
'SmoothL1Loss'
,
beta
=
0.04
,
reduction
=
'sum'
,
loss_weight
=
10.0
),
type
=
'mmdet.SmoothL1Loss'
,
beta
=
0.04
,
reduction
=
'sum'
,
loss_weight
=
10.0
),
dir_class_loss
=
dict
(
dir_class_loss
=
dict
(
type
=
'CrossEntropyLoss'
,
reduction
=
'sum'
,
loss_weight
=
1.0
),
type
=
'
mmdet.
CrossEntropyLoss'
,
reduction
=
'sum'
,
loss_weight
=
1.0
),
dir_res_loss
=
dict
(
dir_res_loss
=
dict
(
type
=
'SmoothL1Loss'
,
reduction
=
'sum'
,
loss_weight
=
10.0
),
type
=
'
mmdet.
SmoothL1Loss'
,
reduction
=
'sum'
,
loss_weight
=
10.0
),
size_class_loss
=
dict
(
size_class_loss
=
dict
(
type
=
'CrossEntropyLoss'
,
reduction
=
'sum'
,
loss_weight
=
1.0
),
type
=
'
mmdet.
CrossEntropyLoss'
,
reduction
=
'sum'
,
loss_weight
=
1.0
),
size_res_loss
=
dict
(
size_res_loss
=
dict
(
type
=
'SmoothL1Loss'
,
type
=
'
mmdet.
SmoothL1Loss'
,
beta
=
1.0
/
9.0
,
beta
=
1.0
/
9.0
,
reduction
=
'sum'
,
reduction
=
'sum'
,
loss_weight
=
10.0
/
9.0
),
loss_weight
=
10.0
/
9.0
),
semantic_loss
=
dict
(
semantic_loss
=
dict
(
type
=
'CrossEntropyLoss'
,
reduction
=
'sum'
,
loss_weight
=
1.0
)),
type
=
'
mmdet.
CrossEntropyLoss'
,
reduction
=
'sum'
,
loss_weight
=
1.0
)),
test_cfg
=
dict
(
test_cfg
=
dict
(
sample_mod
=
'kps'
,
sample_mod
e
=
'kps'
,
nms_thr
=
0.25
,
nms_thr
=
0.25
,
score_thr
=
0.0
,
score_thr
=
0.0
,
per_class_proposal
=
True
,
per_class_proposal
=
True
,
...
@@ -91,6 +94,9 @@ class_names = ('cabinet', 'bed', 'chair', 'sofa', 'table', 'door', 'window',
...
@@ -91,6 +94,9 @@ class_names = ('cabinet', 'bed', 'chair', 'sofa', 'table', 'door', 'window',
'bookshelf'
,
'picture'
,
'counter'
,
'desk'
,
'curtain'
,
'bookshelf'
,
'picture'
,
'counter'
,
'desk'
,
'curtain'
,
'refrigerator'
,
'showercurtrain'
,
'toilet'
,
'sink'
,
'bathtub'
,
'refrigerator'
,
'showercurtrain'
,
'toilet'
,
'sink'
,
'bathtub'
,
'garbagebin'
)
'garbagebin'
)
metainfo
=
dict
(
CLASSES
=
class_names
)
train_pipeline
=
[
train_pipeline
=
[
dict
(
dict
(
type
=
'LoadPointsFromFile'
,
type
=
'LoadPointsFromFile'
,
...
@@ -118,9 +124,8 @@ train_pipeline = [
...
@@ -118,9 +124,8 @@ train_pipeline = [
type
=
'GlobalRotScaleTrans'
,
type
=
'GlobalRotScaleTrans'
,
rot_range
=
[
-
0.087266
,
0.087266
],
rot_range
=
[
-
0.087266
,
0.087266
],
scale_ratio_range
=
[
1.0
,
1.0
]),
scale_ratio_range
=
[
1.0
,
1.0
]),
dict
(
type
=
'DefaultFormatBundle3D'
,
class_names
=
class_names
),
dict
(
dict
(
type
=
'
Collect3D
'
,
type
=
'
Pack3DDetInputs
'
,
keys
=
[
keys
=
[
'points'
,
'gt_bboxes_3d'
,
'gt_labels_3d'
,
'pts_semantic_mask'
,
'points'
,
'gt_bboxes_3d'
,
'gt_labels_3d'
,
'pts_semantic_mask'
,
'pts_instance_mask'
'pts_instance_mask'
...
@@ -150,52 +155,60 @@ test_pipeline = [
...
@@ -150,52 +155,60 @@ test_pipeline = [
flip_ratio_bev_horizontal
=
0.5
,
flip_ratio_bev_horizontal
=
0.5
,
flip_ratio_bev_vertical
=
0.5
),
flip_ratio_bev_vertical
=
0.5
),
dict
(
type
=
'PointSample'
,
num_points
=
50000
),
dict
(
type
=
'PointSample'
,
num_points
=
50000
),
dict
(
]),
type
=
'DefaultFormatBundle3D'
,
dict
(
type
=
'Pack3DDetInputs'
,
keys
=
[
'points'
])
class_names
=
class_names
,
with_label
=
False
),
dict
(
type
=
'Collect3D'
,
keys
=
[
'points'
])
])
]
]
data
=
dict
(
train_dataloader
=
dict
(
samples_per_gpu
=
8
,
batch_size
=
8
,
workers_per_gpu
=
4
,
num_workers
=
4
,
train
=
dict
(
sampler
=
dict
(
type
=
'DefaultSampler'
,
shuffle
=
True
),
dataset
=
dict
(
type
=
'RepeatDataset'
,
type
=
'RepeatDataset'
,
times
=
5
,
times
=
5
,
dataset
=
dict
(
dataset
=
dict
(
type
=
dataset_type
,
type
=
dataset_type
,
data_root
=
data_root
,
data_root
=
data_root
,
ann_file
=
data_root
+
'scannet_infos_train.pkl'
,
ann_file
=
'scannet_infos_train.pkl'
,
pipeline
=
train_pipeline
,
pipeline
=
train_pipeline
,
filter_empty_gt
=
False
,
filter_empty_gt
=
False
,
classes
=
class_names
,
metainfo
=
metainfo
,
# we use box_type_3d='LiDAR' in kitti and nuscenes dataset
# we use box_type_3d='LiDAR' in kitti and nuscenes dataset
# and box_type_3d='Depth' in sunrgbd and scannet dataset.
# and box_type_3d='Depth' in sunrgbd and scannet dataset.
box_type_3d
=
'Depth'
)),
box_type_3d
=
'Depth'
)))
val
=
dict
(
val_dataloader
=
dict
(
batch_size
=
1
,
num_workers
=
1
,
sampler
=
dict
(
type
=
'DefaultSampler'
,
shuffle
=
False
),
dataset
=
dict
(
type
=
dataset_type
,
type
=
dataset_type
,
data_root
=
data_root
,
data_root
=
data_root
,
ann_file
=
data_root
+
'scannet_infos_val.pkl'
,
ann_file
=
'scannet_infos_val.pkl'
,
pipeline
=
test_pipeline
,
pipeline
=
test_pipeline
,
classes
=
class_names
,
metainfo
=
metainfo
,
test_mode
=
True
,
test_mode
=
True
,
box_type_3d
=
'Depth'
),
box_type_3d
=
'Depth'
))
test
=
dict
(
test_dataloader
=
dict
(
batch_size
=
1
,
num_workers
=
1
,
sampler
=
dict
(
type
=
'DefaultSampler'
,
shuffle
=
False
),
dataset
=
dict
(
type
=
dataset_type
,
type
=
dataset_type
,
data_root
=
data_root
,
data_root
=
data_root
,
ann_file
=
data_root
+
'scannet_infos_val.pkl'
,
ann_file
=
'scannet_infos_val.pkl'
,
pipeline
=
test_pipeline
,
pipeline
=
test_pipeline
,
classes
=
class_names
,
metainfo
=
metainfo
,
test_mode
=
True
,
test_mode
=
True
,
box_type_3d
=
'Depth'
))
box_type_3d
=
'Depth'
))
val_evaluator
=
dict
(
type
=
'IndoorMetric'
)
test_evaluator
=
val_evaluator
# optimizer
# optimizer
lr
=
0.006
lr
=
0.006
optimizer
=
dict
(
optim_wrapper
=
dict
(
lr
=
lr
,
type
=
'OptimWrapper'
,
weight_decay
=
0.0005
,
optimizer
=
dict
(
type
=
'AdamW'
,
lr
=
lr
,
weight_decay
=
0.0005
),
clip_grad
=
dict
(
max_norm
=
0.1
,
norm_type
=
2
),
paramwise_cfg
=
dict
(
paramwise_cfg
=
dict
(
custom_keys
=
{
custom_keys
=
{
'bbox_head.decoder_layers'
:
dict
(
lr_mult
=
0.1
,
decay_mult
=
1.0
),
'bbox_head.decoder_layers'
:
dict
(
lr_mult
=
0.1
,
decay_mult
=
1.0
),
...
@@ -207,9 +220,21 @@ optimizer = dict(
...
@@ -207,9 +220,21 @@ optimizer = dict(
'bbox_head.decoder_key_proj'
:
dict
(
lr_mult
=
0.1
,
decay_mult
=
1.0
)
'bbox_head.decoder_key_proj'
:
dict
(
lr_mult
=
0.1
,
decay_mult
=
1.0
)
}))
}))
optimizer_config
=
dict
(
grad_clip
=
dict
(
max_norm
=
0.1
,
norm_type
=
2
))
# learning rate
lr_config
=
dict
(
policy
=
'step'
,
warmup
=
None
,
step
=
[
56
,
68
])
param_scheduler
=
[
dict
(
type
=
'MultiStepLR'
,
begin
=
0
,
end
=
80
,
by_epoch
=
True
,
milestones
=
[
56
,
68
],
gamma
=
0.1
)
]
# training schedule for 1x
train_cfg
=
dict
(
type
=
'EpochBasedTrainLoop'
,
max_epochs
=
80
,
val_interval
=
1
)
val_cfg
=
dict
(
type
=
'ValLoop'
)
test_cfg
=
dict
(
type
=
'TestLoop'
)
# runtime settings
default_hooks
=
dict
(
runner
=
dict
(
type
=
'EpochBasedRunner'
,
max_epochs
=
80
)
checkpoint
=
dict
(
type
=
'CheckpointHook'
,
interval
=
1
,
max_keep_ckpts
=
10
))
checkpoint_config
=
dict
(
interval
=
1
,
max_keep_ckpts
=
10
)
configs/h3dnet/debug.py
0 → 100644
View file @
9ebb75da
_base_
=
[
'../_base_/datasets/scannet-3d-18class.py'
,
'../_base_/models/h3dnet.py'
,
'../_base_/schedules/schedule_3x.py'
,
'../_base_/default_runtime.py'
]
# model settings
model
=
dict
(
rpn_head
=
dict
(
num_classes
=
18
,
bbox_coder
=
dict
(
type
=
'PartialBinBasedBBoxCoder'
,
num_sizes
=
18
,
num_dir_bins
=
24
,
with_rot
=
False
,
mean_sizes
=
[[
0.76966727
,
0.8116021
,
0.92573744
],
[
1.876858
,
1.8425595
,
1.1931566
],
[
0.61328
,
0.6148609
,
0.7182701
],
[
1.3955007
,
1.5121545
,
0.83443564
],
[
0.97949594
,
1.0675149
,
0.6329687
],
[
0.531663
,
0.5955577
,
1.7500148
],
[
0.9624706
,
0.72462326
,
1.1481868
],
[
0.83221924
,
1.0490936
,
1.6875663
],
[
0.21132214
,
0.4206159
,
0.5372846
],
[
1.4440073
,
1.8970833
,
0.26985747
],
[
1.0294262
,
1.4040797
,
0.87554324
],
[
1.3766412
,
0.65521795
,
1.6813129
],
[
0.6650819
,
0.71111923
,
1.298853
],
[
0.41999173
,
0.37906948
,
1.7513971
],
[
0.59359556
,
0.5912492
,
0.73919016
],
[
0.50867593
,
0.50656086
,
0.30136237
],
[
1.1511526
,
1.0546296
,
0.49706793
],
[
0.47535285
,
0.49249494
,
0.5802117
]])),
roi_head
=
dict
(
bbox_head
=
dict
(
num_classes
=
18
,
bbox_coder
=
dict
(
type
=
'PartialBinBasedBBoxCoder'
,
num_sizes
=
18
,
num_dir_bins
=
24
,
with_rot
=
False
,
mean_sizes
=
[[
0.76966727
,
0.8116021
,
0.92573744
],
[
1.876858
,
1.8425595
,
1.1931566
],
[
0.61328
,
0.6148609
,
0.7182701
],
[
1.3955007
,
1.5121545
,
0.83443564
],
[
0.97949594
,
1.0675149
,
0.6329687
],
[
0.531663
,
0.5955577
,
1.7500148
],
[
0.9624706
,
0.72462326
,
1.1481868
],
[
0.83221924
,
1.0490936
,
1.6875663
],
[
0.21132214
,
0.4206159
,
0.5372846
],
[
1.4440073
,
1.8970833
,
0.26985747
],
[
1.0294262
,
1.4040797
,
0.87554324
],
[
1.3766412
,
0.65521795
,
1.6813129
],
[
0.6650819
,
0.71111923
,
1.298853
],
[
0.41999173
,
0.37906948
,
1.7513971
],
[
0.59359556
,
0.5912492
,
0.73919016
],
[
0.50867593
,
0.50656086
,
0.30136237
],
[
1.1511526
,
1.0546296
,
0.49706793
],
[
0.47535285
,
0.49249494
,
0.5802117
]]))))
train_dataloader
=
dict
(
batch_size
=
3
,
num_workers
=
2
,
)
# yapf:disable
default_hooks
=
dict
(
logger
=
dict
(
type
=
'LoggerHook'
,
interval
=
30
)
)
# yapf:enable
mmdet3d/datasets/det3d_dataset.py
View file @
9ebb75da
...
@@ -229,6 +229,8 @@ class Det3DDataset(BaseDataset):
...
@@ -229,6 +229,8 @@ class Det3DDataset(BaseDataset):
self
.
data_prefix
.
get
(
'pts'
,
''
),
self
.
data_prefix
.
get
(
'pts'
,
''
),
info
[
'lidar_points'
][
'lidar_path'
])
info
[
'lidar_points'
][
'lidar_path'
])
info
[
'lidar_path'
]
=
info
[
'lidar_points'
][
'lidar_path'
]
if
self
.
modality
[
'use_camera'
]:
if
self
.
modality
[
'use_camera'
]:
for
cam_id
,
img_info
in
info
[
'images'
].
items
():
for
cam_id
,
img_info
in
info
[
'images'
].
items
():
if
'img_path'
in
img_info
:
if
'img_path'
in
img_info
:
...
...
mmdet3d/datasets/kitti_dataset.py
View file @
9ebb75da
...
@@ -128,6 +128,7 @@ class KittiDataset(Det3DDataset):
...
@@ -128,6 +128,7 @@ class KittiDataset(Det3DDataset):
"""
"""
ann_info
=
super
().
parse_ann_info
(
info
)
ann_info
=
super
().
parse_ann_info
(
info
)
if
ann_info
is
None
:
if
ann_info
is
None
:
ann_info
=
dict
()
# empty instance
# empty instance
ann_info
[
'gt_bboxes_3d'
]
=
np
.
zeros
((
0
,
7
),
dtype
=
np
.
float32
)
ann_info
[
'gt_bboxes_3d'
]
=
np
.
zeros
((
0
,
7
),
dtype
=
np
.
float32
)
ann_info
[
'gt_labels_3d'
]
=
np
.
zeros
(
0
,
dtype
=
np
.
int64
)
ann_info
[
'gt_labels_3d'
]
=
np
.
zeros
(
0
,
dtype
=
np
.
int64
)
...
...
mmdet3d/datasets/pipelines/formating.py
View file @
9ebb75da
...
@@ -31,14 +31,14 @@ class Pack3DDetInputs(BaseTransform):
...
@@ -31,14 +31,14 @@ class Pack3DDetInputs(BaseTransform):
def
__init__
(
def
__init__
(
self
,
self
,
keys
:
dict
,
keys
:
dict
,
meta_keys
:
dict
=
(
'
filename
'
,
'ori_shape'
,
'img_shape'
,
'lidar2img'
,
meta_keys
:
dict
=
(
'
img_path
'
,
'ori_shape'
,
'img_shape'
,
'lidar2img'
,
'depth2img'
,
'cam2img'
,
'pad_shape'
,
'scale_factor'
,
'depth2img'
,
'cam2img'
,
'pad_shape'
,
'scale_factor'
,
'flip'
,
'pcd_horizontal_flip'
,
'pcd_vertical_flip'
,
'flip'
,
'pcd_horizontal_flip'
,
'pcd_vertical_flip'
,
'box_mode_3d'
,
'box_type_3d'
,
'img_norm_cfg'
,
'box_mode_3d'
,
'box_type_3d'
,
'img_norm_cfg'
,
'pcd_trans'
,
'sample_idx'
,
'pcd_scale_factor'
,
'pcd_trans'
,
'sample_idx'
,
'pcd_scale_factor'
,
'pcd_rotation'
,
'pcd_rotation_angle'
,
'pcd_rotation'
,
'pcd_rotation_angle'
,
'lidar_path'
,
'pts_filename'
,
'transformation_3d_flow'
,
'transformation_3d_flow'
,
'trans_mat'
,
'trans_mat'
,
'affine_aug'
)):
'affine_aug'
)):
self
.
keys
=
keys
self
.
keys
=
keys
self
.
meta_keys
=
meta_keys
self
.
meta_keys
=
meta_keys
...
...
mmdet3d/datasets/scannet_dataset.py
View file @
9ebb75da
...
@@ -138,6 +138,7 @@ class ScanNetDataset(Det3DDataset):
...
@@ -138,6 +138,7 @@ class ScanNetDataset(Det3DDataset):
ann_info
=
super
().
parse_ann_info
(
info
)
ann_info
=
super
().
parse_ann_info
(
info
)
# empty gt
# empty gt
if
ann_info
is
None
:
if
ann_info
is
None
:
ann_info
=
dict
()
ann_info
[
'gt_bboxes_3d'
]
=
np
.
zeros
((
0
,
6
),
dtype
=
np
.
float32
)
ann_info
[
'gt_bboxes_3d'
]
=
np
.
zeros
((
0
,
6
),
dtype
=
np
.
float32
)
ann_info
[
'gt_labels_3d'
]
=
np
.
zeros
((
0
,
),
dtype
=
np
.
int64
)
ann_info
[
'gt_labels_3d'
]
=
np
.
zeros
((
0
,
),
dtype
=
np
.
int64
)
# to target box structure
# to target box structure
...
...
mmdet3d/datasets/sunrgbd_dataset.py
View file @
9ebb75da
# Copyright (c) OpenMMLab. All rights reserved.
# Copyright (c) OpenMMLab. All rights reserved.
from
collections
import
OrderedDict
from
os
import
path
as
osp
from
typing
import
Callable
,
List
,
Optional
,
Union
from
typing
import
Callable
,
List
,
Optional
,
Union
from
mmdet3d.core
import
show_multi_modality_result
,
show_result
import
numpy
as
np
from
mmdet3d.core.bbox
import
DepthInstance3DBoxes
from
mmdet3d.core.bbox
import
DepthInstance3DBoxes
from
mmdet3d.registry
import
DATASETS
from
mmdet3d.registry
import
DATASETS
from
mmdet.core
import
eval_map
from
.det3d_dataset
import
Det3DDataset
from
.det3d_dataset
import
Det3DDataset
from
.pipelines
import
Compose
@
DATASETS
.
register_module
()
@
DATASETS
.
register_module
()
...
@@ -86,128 +83,15 @@ class SUNRGBDDataset(Det3DDataset):
...
@@ -86,128 +83,15 @@ class SUNRGBDDataset(Det3DDataset):
dict: Processed `ann_info`
dict: Processed `ann_info`
"""
"""
ann_info
=
super
().
parse_ann_info
(
info
)
ann_info
=
super
().
parse_ann_info
(
info
)
# empty gt
if
ann_info
is
None
:
ann_info
=
dict
()
ann_info
[
'gt_bboxes_3d'
]
=
np
.
zeros
((
0
,
6
),
dtype
=
np
.
float32
)
ann_info
[
'gt_labels_3d'
]
=
np
.
zeros
((
0
,
),
dtype
=
np
.
int64
)
# to target box structure
# to target box structure
ann_info
[
'gt_bboxes_3d'
]
=
DepthInstance3DBoxes
(
ann_info
[
'gt_bboxes_3d'
]
=
DepthInstance3DBoxes
(
ann_info
[
'gt_bboxes_3d'
],
ann_info
[
'gt_bboxes_3d'
],
origin
=
(
0.5
,
0.5
,
0.5
)).
convert_to
(
self
.
box_mode_3d
)
origin
=
(
0.5
,
0.5
,
0.5
)).
convert_to
(
self
.
box_mode_3d
)
return
ann_info
return
ann_info
def
_build_default_pipeline
(
self
):
"""Build the default pipeline for this dataset."""
pipeline
=
[
dict
(
type
=
'LoadPointsFromFile'
,
coord_type
=
'DEPTH'
,
shift_height
=
False
,
load_dim
=
6
,
use_dim
=
[
0
,
1
,
2
]),
dict
(
type
=
'DefaultFormatBundle3D'
,
class_names
=
self
.
CLASSES
,
with_label
=
False
),
dict
(
type
=
'Collect3D'
,
keys
=
[
'points'
])
]
if
self
.
modality
[
'use_camera'
]:
pipeline
.
insert
(
0
,
dict
(
type
=
'LoadImageFromFile'
))
return
Compose
(
pipeline
)
# TODO fix this
def
show
(
self
,
results
,
out_dir
,
show
=
True
,
pipeline
=
None
):
"""Results visualization.
Args:
results (list[dict]): List of bounding boxes results.
out_dir (str): Output directory of visualization result.
show (bool): Visualize the results online.
pipeline (list[dict], optional): raw data loading for showing.
Default: None.
"""
assert
out_dir
is
not
None
,
'Expect out_dir, got none.'
pipeline
=
self
.
_get_pipeline
(
pipeline
)
for
i
,
result
in
enumerate
(
results
):
data_info
=
self
.
data_infos
[
i
]
pts_path
=
data_info
[
'pts_path'
]
file_name
=
osp
.
split
(
pts_path
)[
-
1
].
split
(
'.'
)[
0
]
points
,
img_metas
,
img
=
self
.
_extract_data
(
i
,
pipeline
,
[
'points'
,
'img_metas'
,
'img'
])
# scale colors to [0, 255]
points
=
points
.
numpy
()
points
[:,
3
:]
*=
255
gt_bboxes
=
self
.
get_ann_info
(
i
)[
'gt_bboxes_3d'
].
tensor
.
numpy
()
pred_bboxes
=
result
[
'boxes_3d'
].
tensor
.
numpy
()
show_result
(
points
,
gt_bboxes
.
copy
(),
pred_bboxes
.
copy
(),
out_dir
,
file_name
,
show
)
# multi-modality visualization
if
self
.
modality
[
'use_camera'
]:
img
=
img
.
numpy
()
# need to transpose channel to first dim
img
=
img
.
transpose
(
1
,
2
,
0
)
pred_bboxes
=
DepthInstance3DBoxes
(
pred_bboxes
,
origin
=
(
0.5
,
0.5
,
0
))
gt_bboxes
=
DepthInstance3DBoxes
(
gt_bboxes
,
origin
=
(
0.5
,
0.5
,
0
))
show_multi_modality_result
(
img
,
gt_bboxes
,
pred_bboxes
,
None
,
out_dir
,
file_name
,
box_mode
=
'depth'
,
img_metas
=
img_metas
,
show
=
show
)
def
evaluate
(
self
,
results
,
metric
=
None
,
iou_thr
=
(
0.25
,
0.5
),
iou_thr_2d
=
(
0.5
,
),
logger
=
None
,
show
=
False
,
out_dir
=
None
,
pipeline
=
None
):
"""Evaluate.
Evaluation in indoor protocol.
Args:
results (list[dict]): List of results.
metric (str | list[str], optional): Metrics to be evaluated.
Default: None.
iou_thr (list[float], optional): AP IoU thresholds for 3D
evaluation. Default: (0.25, 0.5).
iou_thr_2d (list[float], optional): AP IoU thresholds for 2D
evaluation. Default: (0.5, ).
show (bool, optional): Whether to visualize.
Default: False.
out_dir (str, optional): Path to save the visualization results.
Default: None.
pipeline (list[dict], optional): raw data loading for showing.
Default: None.
Returns:
dict: Evaluation results.
"""
# evaluate 3D detection performance
if
isinstance
(
results
[
0
],
dict
):
return
super
().
evaluate
(
results
,
metric
,
iou_thr
,
logger
,
show
,
out_dir
,
pipeline
)
# evaluate 2D detection performance
else
:
eval_results
=
OrderedDict
()
annotations
=
[
self
.
get_ann_info
(
i
)
for
i
in
range
(
len
(
self
))]
iou_thr_2d
=
(
iou_thr_2d
)
if
isinstance
(
iou_thr_2d
,
float
)
else
iou_thr_2d
for
iou_thr_2d_single
in
iou_thr_2d
:
mean_ap
,
_
=
eval_map
(
results
,
annotations
,
scale_ranges
=
None
,
iou_thr
=
iou_thr_2d_single
,
dataset
=
self
.
CLASSES
,
logger
=
logger
)
eval_results
[
'mAP_'
+
str
(
iou_thr_2d_single
)]
=
mean_ap
return
eval_results
mmdet3d/models/data_preprocessors/data_preprocessor.py
View file @
9ebb75da
...
@@ -172,7 +172,7 @@ class Det3DDataPreprocessor(DetDataPreprocessor):
...
@@ -172,7 +172,7 @@ class Det3DDataPreprocessor(DetDataPreprocessor):
# image tensor.
# image tensor.
inputs_dict
=
[{
inputs_dict
=
[{
k
:
v
.
to
(
self
.
_device
)
k
:
v
.
to
(
self
.
_device
)
for
k
,
v
in
_data
[
'inputs'
].
items
()
for
k
,
v
in
_data
[
'inputs'
].
items
()
if
v
is
not
None
}
for
_data
in
data
]
}
for
_data
in
data
]
batch_data_samples
:
List
[
BaseDataElement
]
=
[]
batch_data_samples
:
List
[
BaseDataElement
]
=
[]
...
...
mmdet3d/models/dense_heads/groupfree3d_head.py
View file @
9ebb75da
# Copyright (c) OpenMMLab. All rights reserved.
# Copyright (c) OpenMMLab. All rights reserved.
import
copy
import
copy
from
typing
import
Dict
,
List
,
Optional
,
Tuple
import
numpy
as
np
import
numpy
as
np
import
torch
import
torch
from
mmcv
import
ConfigDict
from
mmcv.cnn
import
ConvModule
,
xavier_init
from
mmcv.cnn
import
ConvModule
,
xavier_init
from
mmcv.cnn.bricks.transformer
import
(
build_positional_encoding
,
from
mmcv.cnn.bricks.transformer
import
(
build_positional_encoding
,
build_transformer_layer
)
build_transformer_layer
)
from
mmcv.ops
import
PointsSampler
as
Points_Sampler
from
mmcv.ops
import
PointsSampler
as
Points_Sampler
from
mmcv.ops
import
gather_points
from
mmcv.ops
import
gather_points
from
mmcv.runner
import
BaseModule
,
force_fp32
from
mmcv.runner
import
BaseModule
from
mmengine
import
InstanceData
from
torch
import
Tensor
from
torch
import
nn
as
nn
from
torch
import
nn
as
nn
from
torch.nn
import
functional
as
F
from
torch.nn
import
functional
as
F
from
mmdet3d.core.post_processing
import
aligned_3d_nms
from
mmdet3d.core.post_processing
import
aligned_3d_nms
from
mmdet3d.registry
import
MODELS
from
mmdet3d.registry
import
MODELS
from
mmdet.core
import
build_bbox_coder
,
multi_apply
from
mmdet.core
import
build_bbox_coder
,
multi_apply
from
..
builder
import
build_loss
from
..
.core
import
BaseInstance3DBoxes
,
Det3DDataSample
,
SampleList
from
.base_conv_bbox_head
import
BaseConvBboxHead
from
.base_conv_bbox_head
import
BaseConvBboxHead
EPS
=
1e-6
EPS
=
1e-6
...
@@ -38,12 +40,12 @@ class PointsObjClsModule(BaseModule):
...
@@ -38,12 +40,12 @@ class PointsObjClsModule(BaseModule):
"""
"""
def
__init__
(
self
,
def
__init__
(
self
,
in_channel
,
in_channel
:
int
,
num_convs
=
3
,
num_convs
:
int
=
3
,
conv_cfg
=
dict
(
type
=
'Conv1d'
),
conv_cfg
:
dict
=
dict
(
type
=
'Conv1d'
),
norm_cfg
=
dict
(
type
=
'BN1d'
),
norm_cfg
:
dict
=
dict
(
type
=
'BN1d'
),
act_cfg
=
dict
(
type
=
'ReLU'
),
act_cfg
:
dict
=
dict
(
type
=
'ReLU'
),
init_cfg
=
None
):
init_cfg
:
Optional
[
dict
]
=
None
):
super
().
__init__
(
init_cfg
=
init_cfg
)
super
().
__init__
(
init_cfg
=
init_cfg
)
conv_channels
=
[
in_channel
for
_
in
range
(
num_convs
-
1
)]
conv_channels
=
[
in_channel
for
_
in
range
(
num_convs
-
1
)]
conv_channels
.
append
(
1
)
conv_channels
.
append
(
1
)
...
@@ -85,11 +87,12 @@ class GeneralSamplingModule(nn.Module):
...
@@ -85,11 +87,12 @@ class GeneralSamplingModule(nn.Module):
Sampling points with given index.
Sampling points with given index.
"""
"""
def
forward
(
self
,
xyz
,
features
,
sample_inds
):
def
forward
(
self
,
xyz
:
Tensor
,
features
:
Tensor
,
sample_inds
:
Tensor
)
->
Tuple
[
Tensor
]:
"""Forward pass.
"""Forward pass.
Args:
Args:
xyz: (B, N, 3) the coordinates of the features.
xyz
(Tensor)
: (B, N, 3) the coordinates of the features.
features (Tensor): (B, C, N) features to sample.
features (Tensor): (B, C, N) features to sample.
sample_inds (Tensor): (B, M) the given index,
sample_inds (Tensor): (B, M) the given index,
where M is the number of points.
where M is the number of points.
...
@@ -118,56 +121,61 @@ class GroupFree3DHead(BaseModule):
...
@@ -118,56 +121,61 @@ class GroupFree3DHead(BaseModule):
decoding boxes.
decoding boxes.
num_decoder_layers (int): The number of transformer decoder layers.
num_decoder_layers (int): The number of transformer decoder layers.
transformerlayers (dict): Config for transformer decoder.
transformerlayers (dict): Config for transformer decoder.
train_cfg (dict): Config for training.
train_cfg (dict
, optional
): Config for training.
test_cfg (dict): Config for testing.
test_cfg (dict
, optional
): Config for testing.
num_proposal (int): The number of initial sampling candidates.
num_proposal (int): The number of initial sampling candidates.
pred_layer_cfg (dict): Config of classfication and regression
pred_layer_cfg (dict
, optional
): Config of classfication and regression
prediction layers.
prediction layers.
size_cls_agnostic (bool): Whether the predicted size is class-agnostic.
size_cls_agnostic (bool): Whether the predicted size is class-agnostic.
gt_per_seed (int): the number of candidate instance each point belongs
gt_per_seed (int): the number of candidate instance each point belongs
to.
to.
sampling_objectness_loss (dict): Config of initial sampling
sampling_objectness_loss (dict
, optional
): Config of initial sampling
objectness loss.
objectness loss.
objectness_loss (dict): Config of objectness loss.
objectness_loss (dict, optional): Config of objectness loss.
center_loss (dict): Config of center loss.
center_loss (dict, optional): Config of center loss.
dir_class_loss (dict): Config of direction classification loss.
dir_class_loss (dict, optional): Config of direction classification
dir_res_loss (dict): Config of direction residual regression loss.
loss.
size_class_loss (dict): Config of size classification loss.
dir_res_loss (dict, optional): Config of direction residual
size_res_loss (dict): Config of size residual regression loss.
regression loss.
size_reg_loss (dict): Config of class-agnostic size regression loss.
size_class_loss (dict, optional): Config of size classification loss.
semantic_loss (dict): Config of point-wise semantic segmentation loss.
size_res_loss (dict, optional): Config of size residual
regression loss.
size_reg_loss (dict, optional): Config of class-agnostic size
regression loss.
semantic_loss (dict, optional): Config of point-wise semantic
segmentation loss.
"""
"""
def
__init__
(
self
,
def
__init__
(
self
,
num_classes
,
num_classes
:
int
,
in_channels
,
in_channels
:
int
,
bbox_coder
,
bbox_coder
:
dict
,
num_decoder_layers
,
num_decoder_layers
:
int
,
transformerlayers
,
transformerlayers
:
dict
,
decoder_self_posembeds
=
dict
(
decoder_self_posembeds
:
dict
=
dict
(
type
=
'ConvBNPositionalEncoding'
,
type
=
'ConvBNPositionalEncoding'
,
input_channel
=
6
,
input_channel
=
6
,
num_pos_feats
=
288
),
num_pos_feats
=
288
),
decoder_cross_posembeds
=
dict
(
decoder_cross_posembeds
:
dict
=
dict
(
type
=
'ConvBNPositionalEncoding'
,
type
=
'ConvBNPositionalEncoding'
,
input_channel
=
3
,
input_channel
=
3
,
num_pos_feats
=
288
),
num_pos_feats
=
288
),
train_cfg
=
None
,
train_cfg
:
Optional
[
dict
]
=
None
,
test_cfg
=
None
,
test_cfg
:
Optional
[
dict
]
=
None
,
num_proposal
=
128
,
num_proposal
:
int
=
128
,
pred_layer_cfg
=
None
,
pred_layer_cfg
:
Optional
[
dict
]
=
None
,
size_cls_agnostic
=
True
,
size_cls_agnostic
:
bool
=
True
,
gt_per_seed
=
3
,
gt_per_seed
:
int
=
3
,
sampling_objectness_loss
=
None
,
sampling_objectness_loss
:
Optional
[
dict
]
=
None
,
objectness_loss
=
None
,
objectness_loss
:
Optional
[
dict
]
=
None
,
center_loss
=
None
,
center_loss
:
Optional
[
dict
]
=
None
,
dir_class_loss
=
None
,
dir_class_loss
:
Optional
[
dict
]
=
None
,
dir_res_loss
=
None
,
dir_res_loss
:
Optional
[
dict
]
=
None
,
size_class_loss
=
None
,
size_class_loss
:
Optional
[
dict
]
=
None
,
size_res_loss
=
None
,
size_res_loss
:
Optional
[
dict
]
=
None
,
size_reg_loss
=
None
,
size_reg_loss
:
Optional
[
dict
]
=
None
,
semantic_loss
=
None
,
semantic_loss
:
Optional
[
dict
]
=
None
,
init_cfg
=
None
):
init_cfg
:
Optional
[
dict
]
=
None
):
super
(
GroupFree3DHead
,
self
).
__init__
(
init_cfg
=
init_cfg
)
super
(
GroupFree3DHead
,
self
).
__init__
(
init_cfg
=
init_cfg
)
self
.
num_classes
=
num_classes
self
.
num_classes
=
num_classes
self
.
train_cfg
=
train_cfg
self
.
train_cfg
=
train_cfg
...
@@ -179,7 +187,7 @@ class GroupFree3DHead(BaseModule):
...
@@ -179,7 +187,7 @@ class GroupFree3DHead(BaseModule):
self
.
gt_per_seed
=
gt_per_seed
self
.
gt_per_seed
=
gt_per_seed
# Transformer decoder layers
# Transformer decoder layers
if
isinstance
(
transformerlayers
,
ConfigD
ict
):
if
isinstance
(
transformerlayers
,
d
ict
):
transformerlayers
=
[
transformerlayers
=
[
copy
.
deepcopy
(
transformerlayers
)
copy
.
deepcopy
(
transformerlayers
)
for
_
in
range
(
num_decoder_layers
)
for
_
in
range
(
num_decoder_layers
)
...
@@ -239,17 +247,17 @@ class GroupFree3DHead(BaseModule):
...
@@ -239,17 +247,17 @@ class GroupFree3DHead(BaseModule):
num_cls_out_channels
=
self
.
_get_cls_out_channels
(),
num_cls_out_channels
=
self
.
_get_cls_out_channels
(),
num_reg_out_channels
=
self
.
_get_reg_out_channels
()))
num_reg_out_channels
=
self
.
_get_reg_out_channels
()))
self
.
sampling_objectness
_loss
=
build_loss
(
sampling_objectness_loss
)
self
.
loss_
sampling_objectness
=
MODELS
.
build
(
sampling_objectness_loss
)
self
.
objectness
_loss
=
build_loss
(
objectness_loss
)
self
.
loss_
objectness
=
MODELS
.
build
(
objectness_loss
)
self
.
center
_loss
=
build_loss
(
center_loss
)
self
.
loss_
center
=
MODELS
.
build
(
center_loss
)
self
.
dir_res
_loss
=
build_loss
(
dir_res_loss
)
self
.
loss_
dir_res
=
MODELS
.
build
(
dir_res_loss
)
self
.
dir_class
_loss
=
build_loss
(
dir_class_loss
)
self
.
loss_
dir_class
=
MODELS
.
build
(
dir_class_loss
)
self
.
semantic
_loss
=
build_loss
(
semantic_loss
)
self
.
loss_
semantic
=
MODELS
.
build
(
semantic_loss
)
if
self
.
size_cls_agnostic
:
if
self
.
size_cls_agnostic
:
self
.
size_reg
_loss
=
build_loss
(
size_reg_loss
)
self
.
loss_
size_reg
=
MODELS
.
build
(
size_reg_loss
)
else
:
else
:
self
.
size_res
_loss
=
build_loss
(
size_res_loss
)
self
.
loss_
size_res
=
MODELS
.
build
(
size_res_loss
)
self
.
size_class
_loss
=
build_loss
(
size_class_loss
)
self
.
loss_
size_class
=
MODELS
.
build
(
size_class_loss
)
def
init_weights
(
self
):
def
init_weights
(
self
):
"""Initialize weights of transformer decoder in GroupFree3DHead."""
"""Initialize weights of transformer decoder in GroupFree3DHead."""
...
@@ -279,16 +287,18 @@ class GroupFree3DHead(BaseModule):
...
@@ -279,16 +287,18 @@ class GroupFree3DHead(BaseModule):
else
:
else
:
return
3
+
self
.
num_dir_bins
*
2
+
self
.
num_sizes
*
4
return
3
+
self
.
num_dir_bins
*
2
+
self
.
num_sizes
*
4
def
_extract_input
(
self
,
feat_dict
)
:
def
_extract_input
(
self
,
feat_dict
:
dict
)
->
Tuple
[
Tensor
]
:
"""Extract inputs from features dictionary.
"""Extract inputs from features dictionary.
Args:
Args:
feat_dict (dict): Feature dict from backbone.
feat_dict (dict): Feature dict from backbone.
Returns:
Returns:
torch.Tensor: Coordinates of input points.
Tuple[Tensor]:
torch.Tensor: Features of input points.
torch.Tensor: Indices of input points.
- seed_points (Tensor): Coordinates of input points.
- seed_features (Tensor): Features of input points.
- seed_indices (Tensor): Indices of input points.
"""
"""
seed_points
=
feat_dict
[
'fp_xyz'
][
-
1
]
seed_points
=
feat_dict
[
'fp_xyz'
][
-
1
]
...
@@ -297,7 +307,20 @@ class GroupFree3DHead(BaseModule):
...
@@ -297,7 +307,20 @@ class GroupFree3DHead(BaseModule):
return
seed_points
,
seed_features
,
seed_indices
return
seed_points
,
seed_features
,
seed_indices
def
forward
(
self
,
feat_dict
,
sample_mod
):
@
property
def
sample_mode
(
self
):
"""
Returns:
str: Sample mode for initial candidates sampling.
"""
if
self
.
training
:
sample_mode
=
self
.
train_cfg
.
sample_mode
else
:
sample_mode
=
self
.
test_cfg
.
sample_mode
assert
sample_mode
in
[
'fps'
,
'kps'
]
return
sample_mode
def
forward
(
self
,
feat_dict
:
dict
)
->
dict
:
"""Forward pass.
"""Forward pass.
Note:
Note:
...
@@ -308,12 +331,12 @@ class GroupFree3DHead(BaseModule):
...
@@ -308,12 +331,12 @@ class GroupFree3DHead(BaseModule):
Args:
Args:
feat_dict (dict): Feature dict from backbone.
feat_dict (dict): Feature dict from backbone.
sample_mod (str): sample mode for initial candidates sampling.
Returns:
Returns:
results (dict): Predictions of GroupFree3D head.
results (dict): Predictions of GroupFree3D head.
"""
"""
assert
sample_mod
in
[
'fps'
,
'kps'
]
sample_mod
e
=
self
.
sample_mode
seed_xyz
,
seed_features
,
seed_indices
=
self
.
_extract_input
(
feat_dict
)
seed_xyz
,
seed_features
,
seed_indices
=
self
.
_extract_input
(
feat_dict
)
...
@@ -323,9 +346,9 @@ class GroupFree3DHead(BaseModule):
...
@@ -323,9 +346,9 @@ class GroupFree3DHead(BaseModule):
seed_indices
=
seed_indices
)
seed_indices
=
seed_indices
)
# 1. Initial object candidates sampling.
# 1. Initial object candidates sampling.
if
sample_mod
==
'fps'
:
if
sample_mod
e
==
'fps'
:
sample_inds
=
self
.
fps_module
(
seed_xyz
,
seed_features
)
sample_inds
=
self
.
fps_module
(
seed_xyz
,
seed_features
)
elif
sample_mod
==
'kps'
:
elif
sample_mod
e
==
'kps'
:
points_obj_cls_logits
=
self
.
points_obj_cls
(
points_obj_cls_logits
=
self
.
points_obj_cls
(
seed_features
)
# (batch_size, 1, num_seed)
seed_features
)
# (batch_size, 1, num_seed)
points_obj_cls_scores
=
points_obj_cls_logits
.
sigmoid
().
squeeze
(
1
)
points_obj_cls_scores
=
points_obj_cls_logits
.
sigmoid
().
squeeze
(
1
)
...
@@ -334,7 +357,7 @@ class GroupFree3DHead(BaseModule):
...
@@ -334,7 +357,7 @@ class GroupFree3DHead(BaseModule):
results
[
'seeds_obj_cls_logits'
]
=
points_obj_cls_logits
results
[
'seeds_obj_cls_logits'
]
=
points_obj_cls_logits
else
:
else
:
raise
NotImplementedError
(
raise
NotImplementedError
(
f
'Sample mode
{
sample_mod
}
is not supported!'
)
f
'Sample mode
{
sample_mod
e
}
is not supported!'
)
candidate_xyz
,
candidate_features
,
sample_inds
=
self
.
gsample_module
(
candidate_xyz
,
candidate_features
,
sample_inds
=
self
.
gsample_module
(
seed_xyz
,
seed_features
,
sample_inds
)
seed_xyz
,
seed_features
,
sample_inds
)
...
@@ -391,40 +414,74 @@ class GroupFree3DHead(BaseModule):
...
@@ -391,40 +414,74 @@ class GroupFree3DHead(BaseModule):
return
results
return
results
@
force_fp32
(
apply_to
=
(
'bbox_preds'
,
))
def
loss
(
self
,
points
:
List
[
torch
.
Tensor
],
feats_dict
:
Dict
[
str
,
def
loss
(
self
,
torch
.
Tensor
],
bbox_preds
,
batch_data_samples
:
SampleList
,
**
kwargs
)
->
dict
:
points
,
"""
gt_bboxes_3d
,
Args:
gt_labels_3d
,
points (list[tensor]): Points cloud of multiple samples.
pts_semantic_mask
=
None
,
feats_dict (dict): Predictions from backbone or FPN.
pts_instance_mask
=
None
,
batch_data_samples (list[:obj:`Det3DDataSample`]): Each item
img_metas
=
None
,
contains the meta information of each sample and
gt_bboxes_ignore
=
None
,
corresponding annotations.
ret_target
=
False
):
Returns:
dict: A dictionary of loss components.
"""
preds_dict
=
self
.
forward
(
feats_dict
)
batch_gt_instance_3d
=
[]
batch_gt_instances_ignore
=
[]
batch_input_metas
=
[]
batch_pts_semantic_mask
=
[]
batch_pts_instance_mask
=
[]
for
data_sample
in
batch_data_samples
:
batch_input_metas
.
append
(
data_sample
.
metainfo
)
batch_gt_instance_3d
.
append
(
data_sample
.
gt_instances_3d
)
batch_gt_instances_ignore
.
append
(
data_sample
.
get
(
'ignored_instances'
,
None
))
batch_pts_semantic_mask
.
append
(
data_sample
.
gt_pts_seg
.
get
(
'pts_semantic_mask'
,
None
))
batch_pts_instance_mask
.
append
(
data_sample
.
gt_pts_seg
.
get
(
'pts_instance_mask'
,
None
))
loss_inputs
=
(
points
,
preds_dict
,
batch_gt_instance_3d
)
losses
=
self
.
loss_by_feat
(
*
loss_inputs
,
batch_pts_semantic_mask
=
batch_pts_semantic_mask
,
batch_pts_instance_mask
=
batch_pts_instance_mask
,
batch_input_metas
=
batch_input_metas
,
batch_gt_instances_ignore
=
batch_gt_instances_ignore
)
return
losses
def
loss_by_feat
(
self
,
points
:
List
[
torch
.
Tensor
],
feats_dict
:
dict
,
batch_gt_instances_3d
:
List
[
InstanceData
],
batch_pts_semantic_mask
:
Optional
[
List
[
torch
.
Tensor
]]
=
None
,
batch_pts_instance_mask
:
Optional
[
List
[
torch
.
Tensor
]]
=
None
,
ret_target
:
bool
=
False
,
**
kwargs
)
->
dict
:
"""Compute loss.
"""Compute loss.
Args:
Args:
bbox_preds (dict): Predictions from forward of vote head.
points (list[torch.Tensor]): Input points.
points (list[torch.Tensor]): Input points.
gt_bboxes_3d (list[:obj:`BaseInstance3DBoxes`]): Ground truth
feats_dict (dict): Predictions from previous component.
bboxes of each sample.
batch_gt_instances_3d (list[:obj:`InstanceData`]): Batch of
gt_labels_3d (list[torch.Tensor]): Labels of each sample.
gt_instances. It usually includes ``bboxes_3d`` and
pts_semantic_mask (list[torch.Tensor]): Point-wise
``labels_3d`` attributes.
semantic mask.
batch_pts_semantic_mask (list[tensor]): Semantic mask
pts_instance_mask (list[torch.Tensor]): Point-wise
of points cloud. Defaults to None.
instance mask.
batch_pts_semantic_mask (list[tensor]): Instance mask
img_metas (list[dict]): Contain pcd and img's meta info.
of points cloud. Defaults to None.
gt_bboxes_ignore (list[torch.Tensor]): Specify
ret_target (bool): Return targets or not. Defaults to False.
which bounding.
ret_target (Bool): Return targets or not.
Returns:
Returns:
dict: Losses of GroupFree3D.
dict: Losses of
`
GroupFree3D
`
.
"""
"""
targets
=
self
.
get_targets
(
points
,
gt_bboxes_3d
,
gt_label
s_3d
,
targets
=
self
.
get_targets
(
points
,
feats_dict
,
batch_gt_instance
s_3d
,
pts_semantic_mask
,
pts_instance_mask
,
batch_
pts_semantic_mask
,
b
box_preds
)
b
atch_pts_instance_mask
)
(
sampling_targets
,
sampling_weights
,
assigned_size_targets
,
(
sampling_targets
,
sampling_weights
,
assigned_size_targets
,
size_class_targets
,
size_res_targets
,
dir_class_targets
,
size_class_targets
,
size_res_targets
,
dir_class_targets
,
dir_res_targets
,
center_targets
,
assigned_center_targets
,
dir_res_targets
,
center_targets
,
assigned_center_targets
,
...
@@ -436,8 +493,8 @@ class GroupFree3DHead(BaseModule):
...
@@ -436,8 +493,8 @@ class GroupFree3DHead(BaseModule):
losses
=
dict
()
losses
=
dict
()
# calculate objectness classification loss
# calculate objectness classification loss
sampling_obj_score
=
bbox_preds
[
'seeds_obj_cls_logits'
].
reshape
(
-
1
,
1
)
sampling_obj_score
=
feats_dict
[
'seeds_obj_cls_logits'
].
reshape
(
-
1
,
1
)
sampling_objectness_loss
=
self
.
sampling_objectness
_loss
(
sampling_objectness_loss
=
self
.
loss_
sampling_objectness
(
sampling_obj_score
,
sampling_obj_score
,
1
-
sampling_targets
.
reshape
(
-
1
),
1
-
sampling_targets
.
reshape
(
-
1
),
sampling_weights
.
reshape
(
-
1
),
sampling_weights
.
reshape
(
-
1
),
...
@@ -445,14 +502,14 @@ class GroupFree3DHead(BaseModule):
...
@@ -445,14 +502,14 @@ class GroupFree3DHead(BaseModule):
losses
[
'sampling_objectness_loss'
]
=
sampling_objectness_loss
losses
[
'sampling_objectness_loss'
]
=
sampling_objectness_loss
prefixes
=
[
'proposal.'
]
+
[
prefixes
=
[
'proposal.'
]
+
[
f
's
{
i
}
.'
for
i
in
range
(
bbox_preds
[
'num_decoder_layers'
])
f
's
{
i
}
.'
for
i
in
range
(
feats_dict
[
'num_decoder_layers'
])
]
]
num_stages
=
len
(
prefixes
)
num_stages
=
len
(
prefixes
)
for
prefix
in
prefixes
:
for
prefix
in
prefixes
:
# calculate objectness loss
# calculate objectness loss
obj_score
=
bbox_preds
[
f
'
{
prefix
}
obj_scores'
].
transpose
(
2
,
1
)
obj_score
=
feats_dict
[
f
'
{
prefix
}
obj_scores'
].
transpose
(
2
,
1
)
objectness_loss
=
self
.
objectness
_loss
(
objectness_loss
=
self
.
loss_
objectness
(
obj_score
.
reshape
(
-
1
,
1
),
obj_score
.
reshape
(
-
1
,
1
),
1
-
objectness_targets
.
reshape
(
-
1
),
1
-
objectness_targets
.
reshape
(
-
1
),
objectness_weights
.
reshape
(
-
1
),
objectness_weights
.
reshape
(
-
1
),
...
@@ -462,15 +519,15 @@ class GroupFree3DHead(BaseModule):
...
@@ -462,15 +519,15 @@ class GroupFree3DHead(BaseModule):
# calculate center loss
# calculate center loss
box_loss_weights_expand
=
box_loss_weights
.
unsqueeze
(
-
1
).
expand
(
box_loss_weights_expand
=
box_loss_weights
.
unsqueeze
(
-
1
).
expand
(
-
1
,
-
1
,
3
)
-
1
,
-
1
,
3
)
center_loss
=
self
.
center
_loss
(
center_loss
=
self
.
loss_
center
(
bbox_preds
[
f
'
{
prefix
}
center'
],
feats_dict
[
f
'
{
prefix
}
center'
],
assigned_center_targets
,
assigned_center_targets
,
weight
=
box_loss_weights_expand
)
weight
=
box_loss_weights_expand
)
losses
[
f
'
{
prefix
}
center_loss'
]
=
center_loss
/
num_stages
losses
[
f
'
{
prefix
}
center_loss'
]
=
center_loss
/
num_stages
# calculate direction class loss
# calculate direction class loss
dir_class_loss
=
self
.
dir_class
_loss
(
dir_class_loss
=
self
.
loss_
dir_class
(
bbox_preds
[
f
'
{
prefix
}
dir_class'
].
transpose
(
2
,
1
),
feats_dict
[
f
'
{
prefix
}
dir_class'
].
transpose
(
2
,
1
),
dir_class_targets
,
dir_class_targets
,
weight
=
box_loss_weights
)
weight
=
box_loss_weights
)
losses
[
f
'
{
prefix
}
dir_class_loss'
]
=
dir_class_loss
/
num_stages
losses
[
f
'
{
prefix
}
dir_class_loss'
]
=
dir_class_loss
/
num_stages
...
@@ -481,24 +538,24 @@ class GroupFree3DHead(BaseModule):
...
@@ -481,24 +538,24 @@ class GroupFree3DHead(BaseModule):
heading_label_one_hot
.
scatter_
(
2
,
dir_class_targets
.
unsqueeze
(
-
1
),
heading_label_one_hot
.
scatter_
(
2
,
dir_class_targets
.
unsqueeze
(
-
1
),
1
)
1
)
dir_res_norm
=
torch
.
sum
(
dir_res_norm
=
torch
.
sum
(
bbox_preds
[
f
'
{
prefix
}
dir_res_norm'
]
*
heading_label_one_hot
,
feats_dict
[
f
'
{
prefix
}
dir_res_norm'
]
*
heading_label_one_hot
,
-
1
)
-
1
)
dir_res_loss
=
self
.
dir_res
_loss
(
dir_res_loss
=
self
.
loss_
dir_res
(
dir_res_norm
,
dir_res_targets
,
weight
=
box_loss_weights
)
dir_res_norm
,
dir_res_targets
,
weight
=
box_loss_weights
)
losses
[
f
'
{
prefix
}
dir_res_loss'
]
=
dir_res_loss
/
num_stages
losses
[
f
'
{
prefix
}
dir_res_loss'
]
=
dir_res_loss
/
num_stages
if
self
.
size_cls_agnostic
:
if
self
.
size_cls_agnostic
:
# calculate class-agnostic size loss
# calculate class-agnostic size loss
size_reg_loss
=
self
.
size_reg
_loss
(
size_reg_loss
=
self
.
loss_
size_reg
(
bbox_preds
[
f
'
{
prefix
}
size'
],
feats_dict
[
f
'
{
prefix
}
size'
],
assigned_size_targets
,
assigned_size_targets
,
weight
=
box_loss_weights_expand
)
weight
=
box_loss_weights_expand
)
losses
[
f
'
{
prefix
}
size_reg_loss'
]
=
size_reg_loss
/
num_stages
losses
[
f
'
{
prefix
}
size_reg_loss'
]
=
size_reg_loss
/
num_stages
else
:
else
:
# calculate size class loss
# calculate size class loss
size_class_loss
=
self
.
size_class
_loss
(
size_class_loss
=
self
.
loss_
size_class
(
bbox_preds
[
f
'
{
prefix
}
size_class'
].
transpose
(
2
,
1
),
feats_dict
[
f
'
{
prefix
}
size_class'
].
transpose
(
2
,
1
),
size_class_targets
,
size_class_targets
,
weight
=
box_loss_weights
)
weight
=
box_loss_weights
)
losses
[
losses
[
...
@@ -513,19 +570,19 @@ class GroupFree3DHead(BaseModule):
...
@@ -513,19 +570,19 @@ class GroupFree3DHead(BaseModule):
one_hot_size_targets_expand
=
one_hot_size_targets
.
unsqueeze
(
one_hot_size_targets_expand
=
one_hot_size_targets
.
unsqueeze
(
-
1
).
expand
(
-
1
,
-
1
,
-
1
,
3
).
contiguous
()
-
1
).
expand
(
-
1
,
-
1
,
-
1
,
3
).
contiguous
()
size_residual_norm
=
torch
.
sum
(
size_residual_norm
=
torch
.
sum
(
bbox_preds
[
f
'
{
prefix
}
size_res_norm'
]
*
feats_dict
[
f
'
{
prefix
}
size_res_norm'
]
*
one_hot_size_targets_expand
,
2
)
one_hot_size_targets_expand
,
2
)
box_loss_weights_expand
=
box_loss_weights
.
unsqueeze
(
box_loss_weights_expand
=
box_loss_weights
.
unsqueeze
(
-
1
).
expand
(
-
1
,
-
1
,
3
)
-
1
).
expand
(
-
1
,
-
1
,
3
)
size_res_loss
=
self
.
size_res
_loss
(
size_res_loss
=
self
.
loss_
size_res
(
size_residual_norm
,
size_residual_norm
,
size_res_targets
,
size_res_targets
,
weight
=
box_loss_weights_expand
)
weight
=
box_loss_weights_expand
)
losses
[
f
'
{
prefix
}
size_res_loss'
]
=
size_res_loss
/
num_stages
losses
[
f
'
{
prefix
}
size_res_loss'
]
=
size_res_loss
/
num_stages
# calculate semantic loss
# calculate semantic loss
semantic_loss
=
self
.
semantic
_loss
(
semantic_loss
=
self
.
loss_
semantic
(
bbox_preds
[
f
'
{
prefix
}
sem_scores'
].
transpose
(
2
,
1
),
feats_dict
[
f
'
{
prefix
}
sem_scores'
].
transpose
(
2
,
1
),
mask_targets
,
mask_targets
,
weight
=
box_loss_weights
)
weight
=
box_loss_weights
)
losses
[
f
'
{
prefix
}
semantic_loss'
]
=
semantic_loss
/
num_stages
losses
[
f
'
{
prefix
}
semantic_loss'
]
=
semantic_loss
/
num_stages
...
@@ -535,27 +592,29 @@ class GroupFree3DHead(BaseModule):
...
@@ -535,27 +592,29 @@ class GroupFree3DHead(BaseModule):
return
losses
return
losses
def
get_targets
(
self
,
def
get_targets
(
points
,
self
,
gt_bboxes_3d
,
points
:
List
[
Tensor
],
gt_labels_3d
,
feats_dict
:
dict
=
None
,
pts_semantic_mask
=
None
,
batch_gt_instances_3d
:
List
[
InstanceData
]
=
None
,
pts_instance_mask
=
None
,
batch_pts_semantic_mask
:
List
[
torch
.
Tensor
]
=
None
,
bbox_preds
=
None
,
batch_pts_instance_mask
:
List
[
torch
.
Tensor
]
=
None
,
max_gt_num
=
64
):
max_gt_num
:
int
=
64
,
):
"""Generate targets of GroupFree3D head.
"""Generate targets of GroupFree3D head.
Args:
Args:
points (list[torch.Tensor]): Points of each batch.
points (list[torch.Tensor]): Points of each batch.
gt_bboxes_3d (list[:obj:`BaseInstance3DBoxes`]): Ground truth
feats_dict (torch.Tensor): Predictions of previous component.
bboxes of each batch.
batch_gt_instances_3d (list[:obj:`InstanceData`]): Batch of
gt_labels_3d (list[torch.Tensor]): Labels of each batch.
gt_instances. It usually includes ``bboxes_3d`` and
pts_semantic_mask (list[torch.Tensor]): Point-wise semantic
``labels_3d`` attributes.
label of each batch.
batch_pts_semantic_mask (list[tensor]): Semantic gt mask for
pts_instance_mask (list[torch.Tensor]): Point-wise instance
point clouds. Defaults to None.
label of each batch.
batch_pts_instance_mask (list[tensor]): Instance gt mask for
bbox_preds (torch.Tensor): Bounding box predictions of vote head.
point clouds. Defaults to None.
max_gt_num (int): Max number of GTs for single batch.
max_gt_num (int): Max number of GTs for single batch. Defaults
to 64.
Returns:
Returns:
tuple[torch.Tensor]: Targets of GroupFree3D head.
tuple[torch.Tensor]: Targets of GroupFree3D head.
...
@@ -563,51 +622,67 @@ class GroupFree3DHead(BaseModule):
...
@@ -563,51 +622,67 @@ class GroupFree3DHead(BaseModule):
# find empty example
# find empty example
valid_gt_masks
=
list
()
valid_gt_masks
=
list
()
gt_num
=
list
()
gt_num
=
list
()
for
index
in
range
(
len
(
gt_labels_3d
)):
batch_gt_labels_3d
=
[
if
len
(
gt_labels_3d
[
index
])
==
0
:
gt_instances_3d
.
labels_3d
fake_box
=
gt_bboxes_3d
[
index
].
tensor
.
new_zeros
(
for
gt_instances_3d
in
batch_gt_instances_3d
1
,
gt_bboxes_3d
[
index
].
tensor
.
shape
[
-
1
])
]
gt_bboxes_3d
[
index
]
=
gt_bboxes_3d
[
index
].
new_box
(
fake_box
)
batch_gt_bboxes_3d
=
[
gt_labels_3d
[
index
]
=
gt_labels_3d
[
index
].
new_zeros
(
1
)
gt_instances_3d
.
bboxes_3d
valid_gt_masks
.
append
(
gt_labels_3d
[
index
].
new_zeros
(
1
))
for
gt_instances_3d
in
batch_gt_instances_3d
]
for
index
in
range
(
len
(
batch_gt_labels_3d
)):
if
len
(
batch_gt_labels_3d
[
index
])
==
0
:
fake_box
=
batch_gt_bboxes_3d
[
index
].
tensor
.
new_zeros
(
1
,
batch_gt_bboxes_3d
[
index
].
tensor
.
shape
[
-
1
])
batch_gt_bboxes_3d
[
index
]
=
batch_gt_bboxes_3d
[
index
].
new_box
(
fake_box
)
batch_gt_labels_3d
[
index
]
=
batch_gt_labels_3d
[
index
].
new_zeros
(
1
)
valid_gt_masks
.
append
(
batch_gt_labels_3d
[
index
].
new_zeros
(
1
))
gt_num
.
append
(
1
)
gt_num
.
append
(
1
)
else
:
else
:
valid_gt_masks
.
append
(
gt_labels_3d
[
index
].
new_ones
(
valid_gt_masks
.
append
(
batch_gt_labels_3d
[
index
].
new_ones
(
gt_labels_3d
[
index
].
shape
))
batch_gt_labels_3d
[
index
].
shape
))
gt_num
.
append
(
gt_labels_3d
[
index
].
shape
[
0
])
gt_num
.
append
(
batch_gt_labels_3d
[
index
].
shape
[
0
])
# max_gt_num = max(gt_num)
max_gt_nums
=
[
max_gt_num
for
_
in
range
(
len
(
gt_labels_3d
))]
max_gt_nums
=
[
max_gt_num
for
_
in
range
(
len
(
batch_
gt_labels_3d
))]
if
pts_semantic_mask
is
None
:
if
batch_pts_semantic_mask
is
None
:
pts_semantic_mask
=
[
None
for
i
in
range
(
len
(
gt_labels_3d
))]
batch_pts_semantic_mask
=
[
pts_instance_mask
=
[
None
for
i
in
range
(
len
(
gt_labels_3d
))]
None
for
i
in
range
(
len
(
batch_gt_labels_3d
))
]
batch_pts_instance_mask
=
[
None
for
i
in
range
(
len
(
batch_gt_labels_3d
))
]
seed_points
=
[
seed_points
=
[
bbox_preds
[
'seed_points'
][
i
]
for
i
in
range
(
len
(
gt_labels_3d
))
feats_dict
[
'seed_points'
][
i
]
for
i
in
range
(
len
(
batch_gt_labels_3d
))
]
]
seed_indices
=
[
seed_indices
=
[
bbox_preds
[
'seed_indices'
][
i
]
for
i
in
range
(
len
(
gt_labels_3d
))
feats_dict
[
'seed_indices'
][
i
]
for
i
in
range
(
len
(
batch_gt_labels_3d
))
]
]
candidate_indices
=
[
candidate_indices
=
[
bbox_preds
[
'query_points_sample_inds'
][
i
]
feats_dict
[
'query_points_sample_inds'
][
i
]
for
i
in
range
(
len
(
gt_labels_3d
))
for
i
in
range
(
len
(
batch_
gt_labels_3d
))
]
]
(
sampling_targets
,
assigned_size_targets
,
size_class_targets
,
(
sampling_targets
,
assigned_size_targets
,
size_class_targets
,
size_res_targets
,
dir_class_targets
,
dir_res_targets
,
center_targets
,
size_res_targets
,
dir_class_targets
,
dir_res_targets
,
center_targets
,
assigned_center_targets
,
mask_targets
,
objectness_targets
,
assigned_center_targets
,
mask_targets
,
objectness_masks
)
=
multi_apply
(
self
.
get_targets_single
,
points
,
objectness_targets
,
objectness_masks
)
=
multi_apply
(
gt_bboxes_3d
,
gt_label
s_3d
,
self
.
_get_targets_single
,
points
,
batch_gt_bboxe
s_3d
,
pts_semantic_mask
,
pts_instance
_mask
,
batch_gt_labels_3d
,
batch_pts_semantic
_mask
,
max_gt_nums
,
seed_points
,
batch_pts_instance_mask
,
max_gt_nums
,
seed_points
,
seed_indices
,
seed_indices
,
candidate_indices
)
candidate_indices
)
# pad targets as original code of GroupFree3D.
# pad targets as original code of GroupFree3D.
for
index
in
range
(
len
(
gt_labels_3d
)):
for
index
in
range
(
len
(
batch_
gt_labels_3d
)):
pad_num
=
max_gt_num
-
gt_labels_3d
[
index
].
shape
[
0
]
pad_num
=
max_gt_num
-
batch_
gt_labels_3d
[
index
].
shape
[
0
]
valid_gt_masks
[
index
]
=
F
.
pad
(
valid_gt_masks
[
index
],
(
0
,
pad_num
))
valid_gt_masks
[
index
]
=
F
.
pad
(
valid_gt_masks
[
index
],
(
0
,
pad_num
))
sampling_targets
=
torch
.
stack
(
sampling_targets
)
sampling_targets
=
torch
.
stack
(
sampling_targets
)
...
@@ -644,17 +719,17 @@ class GroupFree3DHead(BaseModule):
...
@@ -644,17 +719,17 @@ class GroupFree3DHead(BaseModule):
mask_targets
,
valid_gt_masks
,
objectness_targets
,
mask_targets
,
valid_gt_masks
,
objectness_targets
,
objectness_weights
,
box_loss_weights
,
valid_gt_weights
)
objectness_weights
,
box_loss_weights
,
valid_gt_weights
)
def
get_targets_single
(
self
,
def
_
get_targets_single
(
self
,
points
,
points
:
Tensor
,
gt_bboxes_3d
,
gt_bboxes_3d
:
BaseInstance3DBoxes
,
gt_labels_3d
,
gt_labels_3d
:
Tensor
,
pts_semantic_mask
=
None
,
pts_semantic_mask
:
Optional
[
Tensor
]
=
None
,
pts_instance_mask
=
None
,
pts_instance_mask
:
Optional
[
Tensor
]
=
None
,
max_gt_nums
=
None
,
max_gt_nums
:
Optional
[
int
]
=
None
,
seed_points
=
None
,
seed_points
:
Optional
[
Tensor
]
=
None
,
seed_indices
=
None
,
seed_indices
:
Optional
[
Tensor
]
=
None
,
candidate_indices
=
None
,
candidate_indices
:
Optional
[
Tensor
]
=
None
,
seed_points_obj_topk
=
4
):
seed_points_obj_topk
:
int
=
4
):
"""Generate targets of GroupFree3D head for single batch.
"""Generate targets of GroupFree3D head for single batch.
Args:
Args:
...
@@ -662,15 +737,20 @@ class GroupFree3DHead(BaseModule):
...
@@ -662,15 +737,20 @@ class GroupFree3DHead(BaseModule):
gt_bboxes_3d (:obj:`BaseInstance3DBoxes`): Ground truth
gt_bboxes_3d (:obj:`BaseInstance3DBoxes`): Ground truth
boxes of each batch.
boxes of each batch.
gt_labels_3d (torch.Tensor): Labels of each batch.
gt_labels_3d (torch.Tensor): Labels of each batch.
pts_semantic_mask (torch.Tensor): Point-wise semantic
pts_semantic_mask (torch.Tensor, optional): Point-wise semantic
label of each batch.
label of each batch. Defaults to None.
pts_instance_mask (torch.Tensor): Point-wise instance
pts_instance_mask (torch.Tensor, optional): Point-wise instance
label of each batch.
label of each batch. Defaults to None.
max_gt_nums (int): Max number of GTs for single batch.
max_gt_nums (int, optional): Max number of GTs for single batch.
seed_points (torch.Tensor): Coordinates of seed points.
Defaults to None.
seed_indices (torch.Tensor): Indices of seed points.
seed_points (torch.Tensor,optional): Coordinates of seed points.
candidate_indices (torch.Tensor): Indices of object candidates.
Defaults to None.
seed_indices (torch.Tensor,optional): Indices of seed points.
Defaults to None.
candidate_indices (torch.Tensor,optional): Indices of object
candidates. Defaults to None.
seed_points_obj_topk (int): k value of k-Closest Points Sampling.
seed_points_obj_topk (int): k value of k-Closest Points Sampling.
Defaults to 4.
Returns:
Returns:
tuple[torch.Tensor]: Targets of GroupFree3D head.
tuple[torch.Tensor]: Targets of GroupFree3D head.
...
@@ -755,7 +835,7 @@ class GroupFree3DHead(BaseModule):
...
@@ -755,7 +835,7 @@ class GroupFree3DHead(BaseModule):
pts_instance_label
=
instance_lable
.
long
()
pts_instance_label
=
instance_lable
.
long
()
pts_instance_label
[
pts_obj_mask
==
0
]
=
-
1
pts_instance_label
[
pts_obj_mask
==
0
]
=
-
1
elif
pts_semantic_mask
is
not
None
:
elif
pts_instance_mask
is
not
None
and
pts_semantic_mask
is
not
None
:
for
i
in
torch
.
unique
(
pts_instance_mask
):
for
i
in
torch
.
unique
(
pts_instance_mask
):
indices
=
torch
.
nonzero
(
indices
=
torch
.
nonzero
(
pts_instance_mask
==
i
,
as_tuple
=
False
).
squeeze
(
-
1
)
pts_instance_mask
==
i
,
as_tuple
=
False
).
squeeze
(
-
1
)
...
@@ -863,30 +943,58 @@ class GroupFree3DHead(BaseModule):
...
@@ -863,30 +943,58 @@ class GroupFree3DHead(BaseModule):
center_targets
,
assigned_center_targets
,
mask_targets
,
center_targets
,
assigned_center_targets
,
mask_targets
,
objectness_targets
,
objectness_masks
)
objectness_targets
,
objectness_masks
)
def
get_bboxes
(
self
,
def
predict
(
self
,
points
:
List
[
torch
.
Tensor
],
points
,
feats_dict
:
Dict
[
str
,
torch
.
Tensor
],
bbox_preds
,
batch_data_samples
:
List
[
Det3DDataSample
],
input_metas
,
**
kwargs
)
->
List
[
InstanceData
]:
rescale
=
False
,
"""
use_nms
=
True
):
Args:
"""Generate bboxes from GroupFree3D head predictions.
points (list[tensor]): Point clouds of multiple samples.
feats_dict (dict): Features from FPN or backbone.
batch_data_samples (List[:obj:`Det3DDataSample`]): The Data
Samples. It usually includes meta information of data.
Returns:
list[:obj:`InstanceData`]: List of processed predictions. Each
InstanceData contains 3d Bounding boxes and corresponding
scores and labels.
"""
preds_dict
=
self
(
feats_dict
)
batch_size
=
len
(
batch_data_samples
)
batch_input_metas
=
[]
for
batch_index
in
range
(
batch_size
):
metainfo
=
batch_data_samples
[
batch_index
].
metainfo
batch_input_metas
.
append
(
metainfo
)
results_list
=
self
.
predict_by_feat
(
points
,
preds_dict
,
batch_input_metas
,
**
kwargs
)
return
results_list
def
predict_by_feat
(
self
,
points
:
List
[
torch
.
Tensor
],
bbox_preds_dict
:
dict
,
batch_input_metas
:
List
[
dict
],
use_nms
:
bool
=
True
,
**
kwargs
)
->
List
[
InstanceData
]:
"""Generate bboxes from vote head predictions.
Args:
Args:
points (torch.Tensor): Input points.
points (
List[
torch.Tensor
]
): Input points
of multiple samples
.
bbox_preds (dict): Predictions from
G
roup
F
ree3
D
head.
bbox_preds
_dict
(dict): Predictions from
g
roup
f
ree3
d
head.
input_metas (list[dict]):
Point cloud and image's meta info.
batch_
input_metas (list[dict]):
Each item
rescale (bool): Whether to rescale bboxes
.
contains the meta information of each sample
.
use_nms (bool): Whether to apply NMS, skip nms postprocessing
use_nms (bool): Whether to apply NMS, skip nms postprocessing
while using
GroupFree3D
head in rpn stage.
while using
vote
head in rpn stage.
Returns:
Returns:
list[tuple[torch.Tensor]]: Bounding boxes, scores and labels.
list[:obj:`InstanceData`]: List of processed predictions. Each
InstanceData cantains 3d Bounding boxes and corresponding
scores and labels.
"""
"""
# support multi-stage predictions
# support multi-stage predictions
assert
self
.
test_cfg
[
'prediction_stages'
]
in
\
assert
self
.
test_cfg
[
'prediction_stages'
]
in
\
[
'last'
,
'all'
,
'last_three'
]
[
'last'
,
'all'
,
'last_three'
]
prefixes
=
list
()
if
self
.
test_cfg
[
'prediction_stages'
]
==
'last'
:
if
self
.
test_cfg
[
'prediction_stages'
]
==
'last'
:
prefixes
=
[
f
's
{
self
.
num_decoder_layers
-
1
}
.'
]
prefixes
=
[
f
's
{
self
.
num_decoder_layers
-
1
}
.'
]
elif
self
.
test_cfg
[
'prediction_stages'
]
==
'all'
:
elif
self
.
test_cfg
[
'prediction_stages'
]
==
'all'
:
...
@@ -905,9 +1013,10 @@ class GroupFree3DHead(BaseModule):
...
@@ -905,9 +1013,10 @@ class GroupFree3DHead(BaseModule):
bbox3d
=
list
()
bbox3d
=
list
()
for
prefix
in
prefixes
:
for
prefix
in
prefixes
:
# decode boxes
# decode boxes
obj_score
=
bbox_preds
[
f
'
{
prefix
}
obj_scores'
][...,
-
1
].
sigmoid
()
obj_score
=
bbox_preds_dict
[
f
'
{
prefix
}
obj_scores'
][...,
sem_score
=
bbox_preds
[
f
'
{
prefix
}
sem_scores'
].
softmax
(
-
1
)
-
1
].
sigmoid
()
bbox
=
self
.
bbox_coder
.
decode
(
bbox_preds
,
prefix
)
sem_score
=
bbox_preds_dict
[
f
'
{
prefix
}
sem_scores'
].
softmax
(
-
1
)
bbox
=
self
.
bbox_coder
.
decode
(
bbox_preds_dict
,
prefix
)
obj_scores
.
append
(
obj_score
)
obj_scores
.
append
(
obj_score
)
sem_scores
.
append
(
sem_score
)
sem_scores
.
append
(
sem_score
)
bbox3d
.
append
(
bbox
)
bbox3d
.
append
(
bbox
)
...
@@ -915,22 +1024,27 @@ class GroupFree3DHead(BaseModule):
...
@@ -915,22 +1024,27 @@ class GroupFree3DHead(BaseModule):
obj_scores
=
torch
.
cat
(
obj_scores
,
dim
=
1
)
obj_scores
=
torch
.
cat
(
obj_scores
,
dim
=
1
)
sem_scores
=
torch
.
cat
(
sem_scores
,
dim
=
1
)
sem_scores
=
torch
.
cat
(
sem_scores
,
dim
=
1
)
bbox3d
=
torch
.
cat
(
bbox3d
,
dim
=
1
)
bbox3d
=
torch
.
cat
(
bbox3d
,
dim
=
1
)
stack_points
=
torch
.
stack
(
points
)
results_list
=
list
()
if
use_nms
:
if
use_nms
:
batch_size
=
bbox3d
.
shape
[
0
]
batch_size
=
bbox3d
.
shape
[
0
]
results
=
list
()
temp_
results
=
InstanceData
()
for
b
in
range
(
batch_size
):
for
b
in
range
(
batch_size
):
bbox_selected
,
score_selected
,
labels
=
\
bbox_selected
,
score_selected
,
labels
=
\
self
.
multiclass_nms_single
(
obj_scores
[
b
],
sem_scores
[
b
],
self
.
multiclass_nms_single
(
obj_scores
[
b
],
bbox3d
[
b
],
points
[
b
,
...,
:
3
],
sem_scores
[
b
],
input_metas
[
b
])
bbox3d
[
b
],
bbox
=
input_metas
[
b
][
'box_type_3d'
](
stack_points
[
b
,
...,
:
3
],
batch_input_metas
[
b
])
bbox
=
batch_input_metas
[
b
][
'box_type_3d'
](
bbox_selected
,
bbox_selected
,
box_dim
=
bbox_selected
.
shape
[
-
1
],
box_dim
=
bbox_selected
.
shape
[
-
1
],
with_yaw
=
self
.
bbox_coder
.
with_rot
)
with_yaw
=
self
.
bbox_coder
.
with_rot
)
results
.
append
((
bbox
,
score_selected
,
labels
))
temp_results
.
bboxes_3d
=
bbox
temp_results
.
scores_3d
=
score_selected
return
results
temp_results
.
labels_3d
=
labels
results_list
.
append
(
temp_results
)
return
results_list
else
:
else
:
return
bbox3d
return
bbox3d
...
...
mmdet3d/models/dense_heads/vote_head.py
View file @
9ebb75da
...
@@ -214,9 +214,9 @@ class VoteHead(BaseModule):
...
@@ -214,9 +214,9 @@ class VoteHead(BaseModule):
batch_gt_instances_ignore
.
append
(
batch_gt_instances_ignore
.
append
(
data_sample
.
get
(
'ignored_instances'
,
None
))
data_sample
.
get
(
'ignored_instances'
,
None
))
batch_pts_semantic_mask
.
append
(
batch_pts_semantic_mask
.
append
(
data_sample
.
seg_data
.
get
(
'pts_semantic_mask'
,
None
))
data_sample
.
gt_pts_seg
.
get
(
'pts_semantic_mask'
,
None
))
batch_pts_instance_mask
.
append
(
batch_pts_instance_mask
.
append
(
data_sample
.
seg_data
.
get
(
'pts_instance_mask'
,
None
))
data_sample
.
gt_pts_seg
.
get
(
'pts_instance_mask'
,
None
))
loss_inputs
=
(
points
,
preds_dict
,
batch_gt_instance_3d
)
loss_inputs
=
(
points
,
preds_dict
,
batch_gt_instance_3d
)
losses
=
self
.
loss_by_feat
(
losses
=
self
.
loss_by_feat
(
...
@@ -452,9 +452,9 @@ class VoteHead(BaseModule):
...
@@ -452,9 +452,9 @@ class VoteHead(BaseModule):
gt_instances. It usually includes ``bboxes`` and ``labels``
gt_instances. It usually includes ``bboxes`` and ``labels``
attributes.
attributes.
batch_pts_semantic_mask (list[tensor]): Semantic gt mask for
batch_pts_semantic_mask (list[tensor]): Semantic gt mask for
multiple images
.
point clouds. Defaults to None
.
batch_pts_instance_mask (list[tensor]): Instance gt mask for
batch_pts_instance_mask (list[tensor]): Instance gt mask for
multiple images
.
point clouds. Defaults to None
.
Returns:
Returns:
tuple[torch.Tensor]: Targets of vote head.
tuple[torch.Tensor]: Targets of vote head.
...
...
mmdet3d/models/detectors/groupfree3dnet.py
View file @
9ebb75da
# Copyright (c) OpenMMLab. All rights reserved.
# Copyright (c) OpenMMLab. All rights reserved.
import
torch
from
mmdet3d.core
import
bbox3d2result
,
merge_aug_bboxes_3d
from
mmdet3d.registry
import
MODELS
from
mmdet3d.registry
import
MODELS
from
...core
import
SampleList
from
.single_stage
import
SingleStage3DDetector
from
.single_stage
import
SingleStage3DDetector
...
@@ -15,91 +14,73 @@ class GroupFree3DNet(SingleStage3DDetector):
...
@@ -15,91 +14,73 @@ class GroupFree3DNet(SingleStage3DDetector):
bbox_head
=
None
,
bbox_head
=
None
,
train_cfg
=
None
,
train_cfg
=
None
,
test_cfg
=
None
,
test_cfg
=
None
,
pretrained
=
None
):
init_cfg
=
None
,
**
kwargs
):
super
(
GroupFree3DNet
,
self
).
__init__
(
super
(
GroupFree3DNet
,
self
).
__init__
(
backbone
=
backbone
,
backbone
=
backbone
,
bbox_head
=
bbox_head
,
bbox_head
=
bbox_head
,
train_cfg
=
train_cfg
,
train_cfg
=
train_cfg
,
test_cfg
=
test_cfg
,
test_cfg
=
test_cfg
,
pretrained
=
pretrained
)
init_cfg
=
init_cfg
,
**
kwargs
)
def
forward_train
(
self
,
def
loss
(
self
,
batch_inputs_dict
:
dict
,
batch_data_samples
:
SampleList
,
points
,
**
kwargs
)
->
dict
:
img_metas
,
"""Calculate losses from a batch of inputs dict and data samples.
gt_bboxes_3d
,
gt_labels_3d
,
pts_semantic_mask
=
None
,
pts_instance_mask
=
None
,
gt_bboxes_ignore
=
None
):
"""Forward of training.
Args:
Args:
points (list[torch.Tensor]): Points of each batch.
batch_inputs_dict (dict): The model input dict which include
img_metas (list): Image metas.
'points', 'imgs' keys.
gt_bboxes_3d (:obj:`BaseInstance3DBoxes`): gt bboxes of each batch.
gt_labels_3d (list[torch.Tensor]): gt class labels of each batch.
- points (list[torch.Tensor]): Point cloud of each sample.
pts_semantic_mask (list[torch.Tensor]): point-wise semantic
- imgs (torch.Tensor, optional): Image of each sample.
label of each batch.
pts_instance_mask (list[torch.Tensor]): point-wise instance
batch_data_samples (List[:obj:`Det3DDataSample`]): The Data
label of each batch.
Samples. It usually includes information such as
gt_bboxes_ignore (list[torch.Tensor]): Specify
`gt_instance_3d`, `gt_pts_seg`.
which bounding.
Returns:
Returns:
dict
[str: torch.Tensor]: Losse
s.
dict
: A dictionary of loss component
s.
"""
"""
# TODO: refactor votenet series to reduce redundant codes.
x
=
self
.
extract_feat
(
batch_inputs_dict
)
points_cat
=
torch
.
stack
(
points
)
points
=
batch_inputs_dict
[
'points'
]
losses
=
self
.
bbox_head
.
loss
(
points
,
x
,
batch_data_samples
,
**
kwargs
)
x
=
self
.
extract_feat
(
points_cat
)
bbox_preds
=
self
.
bbox_head
(
x
,
self
.
train_cfg
.
sample_mod
)
loss_inputs
=
(
points
,
gt_bboxes_3d
,
gt_labels_3d
,
pts_semantic_mask
,
pts_instance_mask
,
img_metas
)
losses
=
self
.
bbox_head
.
loss
(
bbox_preds
,
*
loss_inputs
,
gt_bboxes_ignore
=
gt_bboxes_ignore
)
return
losses
return
losses
def
simple_test
(
self
,
points
,
img_metas
,
imgs
=
None
,
rescale
=
False
):
def
predict
(
self
,
batch_inputs_dict
:
dict
,
batch_data_samples
:
SampleList
,
"""Forward of testing.
**
kwargs
)
->
SampleList
:
"""Predict results from a batch of inputs and data samples with post-
processing.
Args:
Args:
points (list[torch.Tensor]): Points of each sample.
batch_inputs_dict (dict): The model input dict which include
img_metas (list): Image metas.
'points', 'imgs' keys.
rescale (bool): Whether to rescale results.
Returns:
list: Predicted 3d boxes.
"""
points_cat
=
torch
.
stack
(
points
)
x
=
self
.
extract_feat
(
points_cat
)
bbox_preds
=
self
.
bbox_head
(
x
,
self
.
test_cfg
.
sample_mod
)
bbox_list
=
self
.
bbox_head
.
get_bboxes
(
points_cat
,
bbox_preds
,
img_metas
,
rescale
=
rescale
)
bbox_results
=
[
bbox3d2result
(
bboxes
,
scores
,
labels
)
for
bboxes
,
scores
,
labels
in
bbox_list
]
return
bbox_results
def
aug_test
(
self
,
points
,
img_metas
,
imgs
=
None
,
rescale
=
False
):
- points (list[torch.Tensor]): Point cloud of each sample.
"""Test with augmentation."""
- imgs (torch.Tensor, optional): Image of each sample.
points_cat
=
[
torch
.
stack
(
pts
)
for
pts
in
points
]
feats
=
self
.
extract_feats
(
points_cat
,
img_metas
)
# only support aug_test for one sample
batch_data_samples (List[:obj:`Det3DDataSample`]): The Data
aug_bboxes
=
[]
Samples. It usually includes information such as
for
x
,
pts_cat
,
img_meta
in
zip
(
feats
,
points_cat
,
img_metas
):
`gt_instance_3d`, `gt_pts_seg`.
bbox_preds
=
self
.
bbox_head
(
x
,
self
.
test_cfg
.
sample_mod
)
rescale (bool): Whether to rescale the results.
bbox_list
=
self
.
bbox_head
.
get_bboxes
(
Defaults to True.
pts_cat
,
bbox_preds
,
img_meta
,
rescale
=
rescale
)
bbox_list
=
[
dict
(
boxes_3d
=
bboxes
,
scores_3d
=
scores
,
labels_3d
=
labels
)
for
bboxes
,
scores
,
labels
in
bbox_list
]
aug_bboxes
.
append
(
bbox_list
[
0
])
# after merging, bboxes will be rescaled to the original image size
Returns:
merged_bboxes
=
merge_aug_bboxes_3d
(
aug_bboxes
,
img_metas
,
list[:obj:`Det3DDataSample`]: Detection results of the
self
.
bbox_head
.
test_cfg
)
input images. Each Det3DDataSample usually contain
'pred_instances_3d'. And the ``pred_instances_3d`` usually
contains following keys.
return
[
merged_bboxes
]
- scores_3d (Tensor): Classification scores, has a shape
(num_instance, )
- labels_3d (Tensor): Labels of bboxes, has a shape
(num_instances, ).
- bboxes_3d (Tensor): Contains a tensor with shape
(num_instances, C) where C >=7.
"""
x
=
self
.
extract_feat
(
batch_inputs_dict
)
points
=
batch_inputs_dict
[
'points'
]
results_list
=
self
.
bbox_head
.
predict
(
points
,
x
,
batch_data_samples
,
**
kwargs
)
predictions
=
self
.
convert_to_datasample
(
results_list
)
return
predictions
tests/test_models/test_detectors/test_groupfree3d.py
0 → 100644
View file @
9ebb75da
import
unittest
import
torch
from
mmengine
import
DefaultScope
from
mmdet3d.registry
import
MODELS
from
tests.utils.model_utils
import
(
_create_detector_inputs
,
_get_detector_cfg
,
_setup_seed
)
class
TestGroupfree3d
(
unittest
.
TestCase
):
def
test_groupfree3d
(
self
):
import
mmdet3d.models
assert
hasattr
(
mmdet3d
.
models
,
'GroupFree3DNet'
)
DefaultScope
.
get_instance
(
'test_groupfree3d'
,
scope_name
=
'mmdet3d'
)
_setup_seed
(
0
)
voxel_net_cfg
=
_get_detector_cfg
(
'groupfree3d/groupfree3d_8x4_scannet-3d-18class-L6-O256.py'
)
model
=
MODELS
.
build
(
voxel_net_cfg
)
num_gt_instance
=
5
data
=
[
_create_detector_inputs
(
num_gt_instance
=
num_gt_instance
,
points_feat_dim
=
3
,
with_pts_semantic_mask
=
True
,
with_pts_instance_mask
=
True
)
]
if
torch
.
cuda
.
is_available
():
model
=
model
.
cuda
()
# test simple_test
with
torch
.
no_grad
():
batch_inputs
,
data_samples
=
model
.
data_preprocessor
(
data
,
True
)
results
=
model
.
forward
(
batch_inputs
,
data_samples
,
mode
=
'predict'
)
self
.
assertEqual
(
len
(
results
),
len
(
data
))
self
.
assertIn
(
'bboxes_3d'
,
results
[
0
].
pred_instances_3d
)
self
.
assertIn
(
'scores_3d'
,
results
[
0
].
pred_instances_3d
)
self
.
assertIn
(
'labels_3d'
,
results
[
0
].
pred_instances_3d
)
# save the memory
with
torch
.
no_grad
():
losses
=
model
.
forward
(
batch_inputs
,
data_samples
,
mode
=
'loss'
)
self
.
assertGreater
(
losses
[
'sampling_objectness_loss'
],
0
)
self
.
assertGreater
(
losses
[
'proposal.objectness_loss'
],
0
)
self
.
assertGreater
(
losses
[
's0.objectness_loss'
],
0
)
self
.
assertGreater
(
losses
[
's1.size_res_loss'
],
0
)
self
.
assertGreater
(
losses
[
's4.size_class_loss'
],
0
)
tests/utils/model_utils.py
View file @
9ebb75da
...
@@ -7,7 +7,7 @@ import numpy as np
...
@@ -7,7 +7,7 @@ import numpy as np
import
torch
import
torch
from
mmengine
import
InstanceData
from
mmengine
import
InstanceData
from
mmdet3d.core
import
Det3DDataSample
,
LiDARInstance3DBoxes
from
mmdet3d.core
import
Det3DDataSample
,
LiDARInstance3DBoxes
,
PointData
def
_setup_seed
(
seed
):
def
_setup_seed
(
seed
):
...
@@ -71,22 +71,28 @@ def _get_detector_cfg(fname):
...
@@ -71,22 +71,28 @@ def _get_detector_cfg(fname):
return
model
return
model
def
_create_detector_inputs
(
seed
=
0
,
def
_create_detector_inputs
(
seed
=
0
,
with_points
=
True
,
with_points
=
True
,
with_img
=
False
,
with_img
=
False
,
num_gt_instance
=
20
,
num_gt_instance
=
20
,
num_points
=
10
,
points_feat_dim
=
4
,
points_feat_dim
=
4
,
num_classes
=
3
,
gt_bboxes_dim
=
7
,
gt_bboxes_dim
=
7
,
num_classes
=
3
):
with_pts_semantic_mask
=
False
,
with_pts_instance_mask
=
False
,
):
_setup_seed
(
seed
)
_setup_seed
(
seed
)
inputs_dict
=
dict
()
if
with_points
:
if
with_points
:
points
=
torch
.
rand
([
3
,
points_feat_dim
])
points
=
torch
.
rand
([
num_points
,
points_feat_dim
])
inputs_dict
[
'points'
]
=
points
else
:
points
=
None
if
with_img
:
if
with_img
:
img
=
torch
.
rand
(
3
,
10
,
10
)
img
=
torch
.
rand
(
3
,
10
,
10
)
inputs_dict
[
'img'
]
=
img
else
:
img
=
None
inputs_dict
=
dict
(
img
=
img
,
points
=
points
)
gt_instance_3d
=
InstanceData
()
gt_instance_3d
=
InstanceData
()
gt_instance_3d
.
bboxes_3d
=
LiDARInstance3DBoxes
(
gt_instance_3d
.
bboxes_3d
=
LiDARInstance3DBoxes
(
torch
.
rand
([
num_gt_instance
,
gt_bboxes_dim
]),
box_dim
=
gt_bboxes_dim
)
torch
.
rand
([
num_gt_instance
,
gt_bboxes_dim
]),
box_dim
=
gt_bboxes_dim
)
...
@@ -94,5 +100,12 @@ def _create_detector_inputs(seed=0,
...
@@ -94,5 +100,12 @@ def _create_detector_inputs(seed=0,
data_sample
=
Det3DDataSample
(
data_sample
=
Det3DDataSample
(
metainfo
=
dict
(
box_type_3d
=
LiDARInstance3DBoxes
))
metainfo
=
dict
(
box_type_3d
=
LiDARInstance3DBoxes
))
data_sample
.
gt_instances_3d
=
gt_instance_3d
data_sample
.
gt_instances_3d
=
gt_instance_3d
data_sample
.
seg_data
=
dict
()
data_sample
.
gt_pts_seg
=
PointData
()
if
with_pts_instance_mask
:
pts_instance_mask
=
torch
.
randint
(
0
,
num_gt_instance
,
[
num_points
])
data_sample
.
gt_pts_seg
[
'pts_instance_mask'
]
=
pts_instance_mask
if
with_pts_semantic_mask
:
pts_semantic_mask
=
torch
.
randint
(
0
,
num_classes
,
[
num_points
])
data_sample
.
gt_pts_seg
[
'pts_semantic_mask'
]
=
pts_semantic_mask
return
dict
(
inputs
=
inputs_dict
,
data_sample
=
data_sample
)
return
dict
(
inputs
=
inputs_dict
,
data_sample
=
data_sample
)
Write
Preview
Markdown
is supported
0%
Try again
or
attach a new file
.
Attach a file
Cancel
You are about to add
0
people
to the discussion. Proceed with caution.
Finish editing this message first!
Cancel
Please
register
or
sign in
to comment