Skip to content
GitLab
Menu
Projects
Groups
Snippets
Loading...
Help
Help
Support
Community forum
Keyboard shortcuts
?
Submit feedback
Contribute to GitLab
Sign in / Register
Toggle navigation
Menu
Open sidebar
OpenDAS
mmdetection3d
Commits
9ebb75da
Commit
9ebb75da
authored
Jul 18, 2022
by
jshilong
Committed by
ChaimZhu
Jul 20, 2022
Browse files
[refactor]Groupfree3d
parent
b496f579
Changes
17
Expand all
Show whitespace changes
Inline
Side-by-side
Showing
17 changed files
with
807 additions
and
586 deletions
+807
-586
configs/_base_/models/groupfree3d.py
configs/_base_/models/groupfree3d.py
+14
-10
configs/groupfree3d/groupfree3d_8x4_scannet-3d-18class-L12-O256.py
...roupfree3d/groupfree3d_8x4_scannet-3d-18class-L12-O256.py
+64
-39
configs/groupfree3d/groupfree3d_8x4_scannet-3d-18class-L6-O256.py
...groupfree3d/groupfree3d_8x4_scannet-3d-18class-L6-O256.py
+64
-39
configs/groupfree3d/groupfree3d_8x4_scannet-3d-18class-w2x-L12-O256.py
...free3d/groupfree3d_8x4_scannet-3d-18class-w2x-L12-O256.py
+64
-39
configs/groupfree3d/groupfree3d_8x4_scannet-3d-18class-w2x-L12-O512.py
...free3d/groupfree3d_8x4_scannet-3d-18class-w2x-L12-O512.py
+64
-39
configs/h3dnet/debug.py
configs/h3dnet/debug.py
+69
-0
mmdet3d/datasets/det3d_dataset.py
mmdet3d/datasets/det3d_dataset.py
+2
-0
mmdet3d/datasets/kitti_dataset.py
mmdet3d/datasets/kitti_dataset.py
+1
-0
mmdet3d/datasets/pipelines/formating.py
mmdet3d/datasets/pipelines/formating.py
+4
-4
mmdet3d/datasets/scannet_dataset.py
mmdet3d/datasets/scannet_dataset.py
+1
-0
mmdet3d/datasets/sunrgbd_dataset.py
mmdet3d/datasets/sunrgbd_dataset.py
+8
-124
mmdet3d/models/data_preprocessors/data_preprocessor.py
mmdet3d/models/data_preprocessors/data_preprocessor.py
+1
-1
mmdet3d/models/dense_heads/groupfree3d_head.py
mmdet3d/models/dense_heads/groupfree3d_head.py
+316
-202
mmdet3d/models/dense_heads/vote_head.py
mmdet3d/models/dense_heads/vote_head.py
+4
-4
mmdet3d/models/detectors/groupfree3dnet.py
mmdet3d/models/detectors/groupfree3dnet.py
+52
-71
tests/test_models/test_detectors/test_groupfree3d.py
tests/test_models/test_detectors/test_groupfree3d.py
+52
-0
tests/utils/model_utils.py
tests/utils/model_utils.py
+27
-14
No files found.
configs/_base_/models/groupfree3d.py
View file @
9ebb75da
model
=
dict
(
type
=
'GroupFree3DNet'
,
data_preprocessor
=
dict
(
type
=
'Det3DDataPreprocessor'
),
backbone
=
dict
(
type
=
'PointNet2SASSG'
,
in_channels
=
3
,
...
...
@@ -38,33 +39,36 @@ model = dict(
pred_layer_cfg
=
dict
(
in_channels
=
288
,
shared_conv_channels
=
(
288
,
288
),
bias
=
True
),
sampling_objectness_loss
=
dict
(
type
=
'FocalLoss'
,
type
=
'
mmdet.
FocalLoss'
,
use_sigmoid
=
True
,
gamma
=
2.0
,
alpha
=
0.25
,
loss_weight
=
8.0
),
objectness_loss
=
dict
(
type
=
'FocalLoss'
,
type
=
'
mmdet.
FocalLoss'
,
use_sigmoid
=
True
,
gamma
=
2.0
,
alpha
=
0.25
,
loss_weight
=
1.0
),
center_loss
=
dict
(
type
=
'SmoothL1Loss'
,
reduction
=
'sum'
,
loss_weight
=
10.0
),
type
=
'
mmdet.
SmoothL1Loss'
,
reduction
=
'sum'
,
loss_weight
=
10.0
),
dir_class_loss
=
dict
(
type
=
'CrossEntropyLoss'
,
reduction
=
'sum'
,
loss_weight
=
1.0
),
type
=
'
mmdet.
CrossEntropyLoss'
,
reduction
=
'sum'
,
loss_weight
=
1.0
),
dir_res_loss
=
dict
(
type
=
'SmoothL1Loss'
,
reduction
=
'sum'
,
loss_weight
=
10.0
),
type
=
'
mmdet.
SmoothL1Loss'
,
reduction
=
'sum'
,
loss_weight
=
10.0
),
size_class_loss
=
dict
(
type
=
'CrossEntropyLoss'
,
reduction
=
'sum'
,
loss_weight
=
1.0
),
type
=
'
mmdet.
CrossEntropyLoss'
,
reduction
=
'sum'
,
loss_weight
=
1.0
),
size_res_loss
=
dict
(
type
=
'SmoothL1Loss'
,
beta
=
1.0
,
reduction
=
'sum'
,
loss_weight
=
10.0
),
type
=
'mmdet.SmoothL1Loss'
,
beta
=
1.0
,
reduction
=
'sum'
,
loss_weight
=
10.0
),
semantic_loss
=
dict
(
type
=
'CrossEntropyLoss'
,
reduction
=
'sum'
,
loss_weight
=
1.0
)),
type
=
'
mmdet.
CrossEntropyLoss'
,
reduction
=
'sum'
,
loss_weight
=
1.0
)),
# model training and testing settings
train_cfg
=
dict
(
sample_mod
=
'kps'
),
train_cfg
=
dict
(
sample_mod
e
=
'kps'
),
test_cfg
=
dict
(
sample_mod
=
'kps'
,
sample_mod
e
=
'kps'
,
nms_thr
=
0.25
,
score_thr
=
0.0
,
per_class_proposal
=
True
,
...
...
configs/groupfree3d/groupfree3d_8x4_scannet-3d-18class-L12-O256.py
View file @
9ebb75da
...
...
@@ -35,34 +35,37 @@ model = dict(
[
1.1511526
,
1.0546296
,
0.49706793
],
[
0.47535285
,
0.49249494
,
0.5802117
]]),
sampling_objectness_loss
=
dict
(
type
=
'FocalLoss'
,
type
=
'
mmdet.
FocalLoss'
,
use_sigmoid
=
True
,
gamma
=
2.0
,
alpha
=
0.25
,
loss_weight
=
8.0
),
objectness_loss
=
dict
(
type
=
'FocalLoss'
,
type
=
'
mmdet.
FocalLoss'
,
use_sigmoid
=
True
,
gamma
=
2.0
,
alpha
=
0.25
,
loss_weight
=
1.0
),
center_loss
=
dict
(
type
=
'SmoothL1Loss'
,
beta
=
0.04
,
reduction
=
'sum'
,
loss_weight
=
10.0
),
type
=
'mmdet.SmoothL1Loss'
,
beta
=
0.04
,
reduction
=
'sum'
,
loss_weight
=
10.0
),
dir_class_loss
=
dict
(
type
=
'CrossEntropyLoss'
,
reduction
=
'sum'
,
loss_weight
=
1.0
),
type
=
'
mmdet.
CrossEntropyLoss'
,
reduction
=
'sum'
,
loss_weight
=
1.0
),
dir_res_loss
=
dict
(
type
=
'SmoothL1Loss'
,
reduction
=
'sum'
,
loss_weight
=
10.0
),
type
=
'
mmdet.
SmoothL1Loss'
,
reduction
=
'sum'
,
loss_weight
=
10.0
),
size_class_loss
=
dict
(
type
=
'CrossEntropyLoss'
,
reduction
=
'sum'
,
loss_weight
=
1.0
),
type
=
'
mmdet.
CrossEntropyLoss'
,
reduction
=
'sum'
,
loss_weight
=
1.0
),
size_res_loss
=
dict
(
type
=
'SmoothL1Loss'
,
type
=
'
mmdet.
SmoothL1Loss'
,
beta
=
1.0
/
9.0
,
reduction
=
'sum'
,
loss_weight
=
10.0
/
9.0
),
semantic_loss
=
dict
(
type
=
'CrossEntropyLoss'
,
reduction
=
'sum'
,
loss_weight
=
1.0
)),
type
=
'
mmdet.
CrossEntropyLoss'
,
reduction
=
'sum'
,
loss_weight
=
1.0
)),
test_cfg
=
dict
(
sample_mod
=
'kps'
,
sample_mod
e
=
'kps'
,
nms_thr
=
0.25
,
score_thr
=
0.0
,
per_class_proposal
=
True
,
...
...
@@ -75,6 +78,9 @@ class_names = ('cabinet', 'bed', 'chair', 'sofa', 'table', 'door', 'window',
'bookshelf'
,
'picture'
,
'counter'
,
'desk'
,
'curtain'
,
'refrigerator'
,
'showercurtrain'
,
'toilet'
,
'sink'
,
'bathtub'
,
'garbagebin'
)
metainfo
=
dict
(
CLASSES
=
class_names
)
train_pipeline
=
[
dict
(
type
=
'LoadPointsFromFile'
,
...
...
@@ -102,9 +108,8 @@ train_pipeline = [
type
=
'GlobalRotScaleTrans'
,
rot_range
=
[
-
0.087266
,
0.087266
],
scale_ratio_range
=
[
1.0
,
1.0
]),
dict
(
type
=
'DefaultFormatBundle3D'
,
class_names
=
class_names
),
dict
(
type
=
'
Collect3D
'
,
type
=
'
Pack3DDetInputs
'
,
keys
=
[
'points'
,
'gt_bboxes_3d'
,
'gt_labels_3d'
,
'pts_semantic_mask'
,
'pts_instance_mask'
...
...
@@ -134,52 +139,60 @@ test_pipeline = [
flip_ratio_bev_horizontal
=
0.5
,
flip_ratio_bev_vertical
=
0.5
),
dict
(
type
=
'PointSample'
,
num_points
=
50000
),
dict
(
type
=
'DefaultFormatBundle3D'
,
class_names
=
class_names
,
with_label
=
False
),
dict
(
type
=
'Collect3D'
,
keys
=
[
'points'
])
])
]),
dict
(
type
=
'Pack3DDetInputs'
,
keys
=
[
'points'
])
]
data
=
dict
(
samples_per_gpu
=
8
,
workers_per_gpu
=
4
,
train
=
dict
(
train_dataloader
=
dict
(
batch_size
=
8
,
num_workers
=
4
,
sampler
=
dict
(
type
=
'DefaultSampler'
,
shuffle
=
True
),
dataset
=
dict
(
type
=
'RepeatDataset'
,
times
=
5
,
dataset
=
dict
(
type
=
dataset_type
,
data_root
=
data_root
,
ann_file
=
data_root
+
'scannet_infos_train.pkl'
,
ann_file
=
'scannet_infos_train.pkl'
,
pipeline
=
train_pipeline
,
filter_empty_gt
=
False
,
classes
=
class_names
,
metainfo
=
metainfo
,
# we use box_type_3d='LiDAR' in kitti and nuscenes dataset
# and box_type_3d='Depth' in sunrgbd and scannet dataset.
box_type_3d
=
'Depth'
)),
val
=
dict
(
box_type_3d
=
'Depth'
)))
val_dataloader
=
dict
(
batch_size
=
1
,
num_workers
=
1
,
sampler
=
dict
(
type
=
'DefaultSampler'
,
shuffle
=
False
),
dataset
=
dict
(
type
=
dataset_type
,
data_root
=
data_root
,
ann_file
=
data_root
+
'scannet_infos_val.pkl'
,
ann_file
=
'scannet_infos_val.pkl'
,
pipeline
=
test_pipeline
,
classes
=
class_names
,
metainfo
=
metainfo
,
test_mode
=
True
,
box_type_3d
=
'Depth'
),
test
=
dict
(
box_type_3d
=
'Depth'
))
test_dataloader
=
dict
(
batch_size
=
1
,
num_workers
=
1
,
sampler
=
dict
(
type
=
'DefaultSampler'
,
shuffle
=
False
),
dataset
=
dict
(
type
=
dataset_type
,
data_root
=
data_root
,
ann_file
=
data_root
+
'scannet_infos_val.pkl'
,
ann_file
=
'scannet_infos_val.pkl'
,
pipeline
=
test_pipeline
,
classes
=
class_names
,
metainfo
=
metainfo
,
test_mode
=
True
,
box_type_3d
=
'Depth'
))
val_evaluator
=
dict
(
type
=
'IndoorMetric'
)
test_evaluator
=
val_evaluator
# optimizer
lr
=
0.006
optimizer
=
dict
(
lr
=
lr
,
weight_decay
=
0.0005
,
optim_wrapper
=
dict
(
type
=
'OptimWrapper'
,
optimizer
=
dict
(
type
=
'AdamW'
,
lr
=
lr
,
weight_decay
=
0.0005
),
clip_grad
=
dict
(
max_norm
=
0.1
,
norm_type
=
2
),
paramwise_cfg
=
dict
(
custom_keys
=
{
'bbox_head.decoder_layers'
:
dict
(
lr_mult
=
0.1
,
decay_mult
=
1.0
),
...
...
@@ -191,9 +204,21 @@ optimizer = dict(
'bbox_head.decoder_key_proj'
:
dict
(
lr_mult
=
0.1
,
decay_mult
=
1.0
)
}))
optimizer_config
=
dict
(
grad_clip
=
dict
(
max_norm
=
0.1
,
norm_type
=
2
))
lr_config
=
dict
(
policy
=
'step'
,
warmup
=
None
,
step
=
[
56
,
68
])
# learning rate
param_scheduler
=
[
dict
(
type
=
'MultiStepLR'
,
begin
=
0
,
end
=
80
,
by_epoch
=
True
,
milestones
=
[
56
,
68
],
gamma
=
0.1
)
]
# training schedule for 1x
train_cfg
=
dict
(
type
=
'EpochBasedTrainLoop'
,
max_epochs
=
80
,
val_interval
=
1
)
val_cfg
=
dict
(
type
=
'ValLoop'
)
test_cfg
=
dict
(
type
=
'TestLoop'
)
# runtime settings
runner
=
dict
(
type
=
'EpochBasedRunner'
,
max_epochs
=
80
)
checkpoint_config
=
dict
(
interval
=
1
,
max_keep_ckpts
=
10
)
default_hooks
=
dict
(
checkpoint
=
dict
(
type
=
'CheckpointHook'
,
interval
=
1
,
max_keep_ckpts
=
10
))
configs/groupfree3d/groupfree3d_8x4_scannet-3d-18class-L6-O256.py
View file @
9ebb75da
...
...
@@ -34,34 +34,37 @@ model = dict(
[
1.1511526
,
1.0546296
,
0.49706793
],
[
0.47535285
,
0.49249494
,
0.5802117
]]),
sampling_objectness_loss
=
dict
(
type
=
'FocalLoss'
,
type
=
'
mmdet.
FocalLoss'
,
use_sigmoid
=
True
,
gamma
=
2.0
,
alpha
=
0.25
,
loss_weight
=
8.0
),
objectness_loss
=
dict
(
type
=
'FocalLoss'
,
type
=
'
mmdet.
FocalLoss'
,
use_sigmoid
=
True
,
gamma
=
2.0
,
alpha
=
0.25
,
loss_weight
=
1.0
),
center_loss
=
dict
(
type
=
'SmoothL1Loss'
,
beta
=
0.04
,
reduction
=
'sum'
,
loss_weight
=
10.0
),
type
=
'mmdet.SmoothL1Loss'
,
beta
=
0.04
,
reduction
=
'sum'
,
loss_weight
=
10.0
),
dir_class_loss
=
dict
(
type
=
'CrossEntropyLoss'
,
reduction
=
'sum'
,
loss_weight
=
1.0
),
type
=
'
mmdet.
CrossEntropyLoss'
,
reduction
=
'sum'
,
loss_weight
=
1.0
),
dir_res_loss
=
dict
(
type
=
'SmoothL1Loss'
,
reduction
=
'sum'
,
loss_weight
=
10.0
),
type
=
'
mmdet.
SmoothL1Loss'
,
reduction
=
'sum'
,
loss_weight
=
10.0
),
size_class_loss
=
dict
(
type
=
'CrossEntropyLoss'
,
reduction
=
'sum'
,
loss_weight
=
1.0
),
type
=
'
mmdet.
CrossEntropyLoss'
,
reduction
=
'sum'
,
loss_weight
=
1.0
),
size_res_loss
=
dict
(
type
=
'SmoothL1Loss'
,
type
=
'
mmdet.
SmoothL1Loss'
,
beta
=
1.0
/
9.0
,
reduction
=
'sum'
,
loss_weight
=
10.0
/
9.0
),
semantic_loss
=
dict
(
type
=
'CrossEntropyLoss'
,
reduction
=
'sum'
,
loss_weight
=
1.0
)),
type
=
'
mmdet.
CrossEntropyLoss'
,
reduction
=
'sum'
,
loss_weight
=
1.0
)),
test_cfg
=
dict
(
sample_mod
=
'kps'
,
sample_mod
e
=
'kps'
,
nms_thr
=
0.25
,
score_thr
=
0.0
,
per_class_proposal
=
True
,
...
...
@@ -74,6 +77,9 @@ class_names = ('cabinet', 'bed', 'chair', 'sofa', 'table', 'door', 'window',
'bookshelf'
,
'picture'
,
'counter'
,
'desk'
,
'curtain'
,
'refrigerator'
,
'showercurtrain'
,
'toilet'
,
'sink'
,
'bathtub'
,
'garbagebin'
)
metainfo
=
dict
(
CLASSES
=
class_names
)
train_pipeline
=
[
dict
(
type
=
'LoadPointsFromFile'
,
...
...
@@ -101,9 +107,8 @@ train_pipeline = [
type
=
'GlobalRotScaleTrans'
,
rot_range
=
[
-
0.087266
,
0.087266
],
scale_ratio_range
=
[
1.0
,
1.0
]),
dict
(
type
=
'DefaultFormatBundle3D'
,
class_names
=
class_names
),
dict
(
type
=
'
Collect3D
'
,
type
=
'
Pack3DDetInputs
'
,
keys
=
[
'points'
,
'gt_bboxes_3d'
,
'gt_labels_3d'
,
'pts_semantic_mask'
,
'pts_instance_mask'
...
...
@@ -133,52 +138,60 @@ test_pipeline = [
flip_ratio_bev_horizontal
=
0.5
,
flip_ratio_bev_vertical
=
0.5
),
dict
(
type
=
'PointSample'
,
num_points
=
50000
),
dict
(
type
=
'DefaultFormatBundle3D'
,
class_names
=
class_names
,
with_label
=
False
),
dict
(
type
=
'Collect3D'
,
keys
=
[
'points'
])
])
]),
dict
(
type
=
'Pack3DDetInputs'
,
keys
=
[
'points'
])
]
data
=
dict
(
samples_per_gpu
=
8
,
workers_per_gpu
=
4
,
train
=
dict
(
train_dataloader
=
dict
(
batch_size
=
8
,
num_workers
=
4
,
sampler
=
dict
(
type
=
'DefaultSampler'
,
shuffle
=
True
),
dataset
=
dict
(
type
=
'RepeatDataset'
,
times
=
5
,
dataset
=
dict
(
type
=
dataset_type
,
data_root
=
data_root
,
ann_file
=
data_root
+
'scannet_infos_train.pkl'
,
ann_file
=
'scannet_infos_train.pkl'
,
pipeline
=
train_pipeline
,
filter_empty_gt
=
False
,
classes
=
class_names
,
metainfo
=
metainfo
,
# we use box_type_3d='LiDAR' in kitti and nuscenes dataset
# and box_type_3d='Depth' in sunrgbd and scannet dataset.
box_type_3d
=
'Depth'
)),
val
=
dict
(
box_type_3d
=
'Depth'
)))
val_dataloader
=
dict
(
batch_size
=
1
,
num_workers
=
1
,
sampler
=
dict
(
type
=
'DefaultSampler'
,
shuffle
=
False
),
dataset
=
dict
(
type
=
dataset_type
,
data_root
=
data_root
,
ann_file
=
data_root
+
'scannet_infos_val.pkl'
,
ann_file
=
'scannet_infos_val.pkl'
,
pipeline
=
test_pipeline
,
classes
=
class_names
,
metainfo
=
metainfo
,
test_mode
=
True
,
box_type_3d
=
'Depth'
),
test
=
dict
(
box_type_3d
=
'Depth'
))
test_dataloader
=
dict
(
batch_size
=
1
,
num_workers
=
1
,
sampler
=
dict
(
type
=
'DefaultSampler'
,
shuffle
=
False
),
dataset
=
dict
(
type
=
dataset_type
,
data_root
=
data_root
,
ann_file
=
data_root
+
'scannet_infos_val.pkl'
,
ann_file
=
'scannet_infos_val.pkl'
,
pipeline
=
test_pipeline
,
classes
=
class_names
,
metainfo
=
metainfo
,
test_mode
=
True
,
box_type_3d
=
'Depth'
))
val_evaluator
=
dict
(
type
=
'IndoorMetric'
)
test_evaluator
=
val_evaluator
# optimizer
lr
=
0.006
optimizer
=
dict
(
lr
=
lr
,
weight_decay
=
0.0005
,
optim_wrapper
=
dict
(
type
=
'OptimWrapper'
,
optimizer
=
dict
(
type
=
'AdamW'
,
lr
=
lr
,
weight_decay
=
0.0005
),
clip_grad
=
dict
(
max_norm
=
0.1
,
norm_type
=
2
),
paramwise_cfg
=
dict
(
custom_keys
=
{
'bbox_head.decoder_layers'
:
dict
(
lr_mult
=
0.1
,
decay_mult
=
1.0
),
...
...
@@ -190,9 +203,21 @@ optimizer = dict(
'bbox_head.decoder_key_proj'
:
dict
(
lr_mult
=
0.1
,
decay_mult
=
1.0
)
}))
optimizer_config
=
dict
(
grad_clip
=
dict
(
max_norm
=
0.1
,
norm_type
=
2
))
lr_config
=
dict
(
policy
=
'step'
,
warmup
=
None
,
step
=
[
56
,
68
])
# learning rate
param_scheduler
=
[
dict
(
type
=
'MultiStepLR'
,
begin
=
0
,
end
=
80
,
by_epoch
=
True
,
milestones
=
[
56
,
68
],
gamma
=
0.1
)
]
# training schedule for 1x
train_cfg
=
dict
(
type
=
'EpochBasedTrainLoop'
,
max_epochs
=
80
,
val_interval
=
1
)
val_cfg
=
dict
(
type
=
'ValLoop'
)
test_cfg
=
dict
(
type
=
'TestLoop'
)
# runtime settings
runner
=
dict
(
type
=
'EpochBasedRunner'
,
max_epochs
=
80
)
checkpoint_config
=
dict
(
interval
=
1
,
max_keep_ckpts
=
10
)
default_hooks
=
dict
(
checkpoint
=
dict
(
type
=
'CheckpointHook'
,
interval
=
1
,
max_keep_ckpts
=
10
))
configs/groupfree3d/groupfree3d_8x4_scannet-3d-18class-w2x-L12-O256.py
View file @
9ebb75da
...
...
@@ -50,34 +50,37 @@ model = dict(
[
1.1511526
,
1.0546296
,
0.49706793
],
[
0.47535285
,
0.49249494
,
0.5802117
]]),
sampling_objectness_loss
=
dict
(
type
=
'FocalLoss'
,
type
=
'
mmdet.
FocalLoss'
,
use_sigmoid
=
True
,
gamma
=
2.0
,
alpha
=
0.25
,
loss_weight
=
8.0
),
objectness_loss
=
dict
(
type
=
'FocalLoss'
,
type
=
'
mmdet.
FocalLoss'
,
use_sigmoid
=
True
,
gamma
=
2.0
,
alpha
=
0.25
,
loss_weight
=
1.0
),
center_loss
=
dict
(
type
=
'SmoothL1Loss'
,
beta
=
0.04
,
reduction
=
'sum'
,
loss_weight
=
10.0
),
type
=
'mmdet.SmoothL1Loss'
,
beta
=
0.04
,
reduction
=
'sum'
,
loss_weight
=
10.0
),
dir_class_loss
=
dict
(
type
=
'CrossEntropyLoss'
,
reduction
=
'sum'
,
loss_weight
=
1.0
),
type
=
'
mmdet.
CrossEntropyLoss'
,
reduction
=
'sum'
,
loss_weight
=
1.0
),
dir_res_loss
=
dict
(
type
=
'SmoothL1Loss'
,
reduction
=
'sum'
,
loss_weight
=
10.0
),
type
=
'
mmdet.
SmoothL1Loss'
,
reduction
=
'sum'
,
loss_weight
=
10.0
),
size_class_loss
=
dict
(
type
=
'CrossEntropyLoss'
,
reduction
=
'sum'
,
loss_weight
=
1.0
),
type
=
'
mmdet.
CrossEntropyLoss'
,
reduction
=
'sum'
,
loss_weight
=
1.0
),
size_res_loss
=
dict
(
type
=
'SmoothL1Loss'
,
type
=
'
mmdet.
SmoothL1Loss'
,
beta
=
1.0
/
9.0
,
reduction
=
'sum'
,
loss_weight
=
10.0
/
9.0
),
semantic_loss
=
dict
(
type
=
'CrossEntropyLoss'
,
reduction
=
'sum'
,
loss_weight
=
1.0
)),
type
=
'
mmdet.
CrossEntropyLoss'
,
reduction
=
'sum'
,
loss_weight
=
1.0
)),
test_cfg
=
dict
(
sample_mod
=
'kps'
,
sample_mod
e
=
'kps'
,
nms_thr
=
0.25
,
score_thr
=
0.0
,
per_class_proposal
=
True
,
...
...
@@ -90,6 +93,9 @@ class_names = ('cabinet', 'bed', 'chair', 'sofa', 'table', 'door', 'window',
'bookshelf'
,
'picture'
,
'counter'
,
'desk'
,
'curtain'
,
'refrigerator'
,
'showercurtrain'
,
'toilet'
,
'sink'
,
'bathtub'
,
'garbagebin'
)
metainfo
=
dict
(
CLASSES
=
class_names
)
train_pipeline
=
[
dict
(
type
=
'LoadPointsFromFile'
,
...
...
@@ -117,9 +123,8 @@ train_pipeline = [
type
=
'GlobalRotScaleTrans'
,
rot_range
=
[
-
0.087266
,
0.087266
],
scale_ratio_range
=
[
1.0
,
1.0
]),
dict
(
type
=
'DefaultFormatBundle3D'
,
class_names
=
class_names
),
dict
(
type
=
'
Collect3D
'
,
type
=
'
Pack3DDetInputs
'
,
keys
=
[
'points'
,
'gt_bboxes_3d'
,
'gt_labels_3d'
,
'pts_semantic_mask'
,
'pts_instance_mask'
...
...
@@ -149,52 +154,60 @@ test_pipeline = [
flip_ratio_bev_horizontal
=
0.5
,
flip_ratio_bev_vertical
=
0.5
),
dict
(
type
=
'PointSample'
,
num_points
=
50000
),
dict
(
type
=
'DefaultFormatBundle3D'
,
class_names
=
class_names
,
with_label
=
False
),
dict
(
type
=
'Collect3D'
,
keys
=
[
'points'
])
])
]),
dict
(
type
=
'Pack3DDetInputs'
,
keys
=
[
'points'
])
]
data
=
dict
(
samples_per_gpu
=
8
,
workers_per_gpu
=
4
,
train
=
dict
(
train_dataloader
=
dict
(
batch_size
=
8
,
num_workers
=
4
,
sampler
=
dict
(
type
=
'DefaultSampler'
,
shuffle
=
True
),
dataset
=
dict
(
type
=
'RepeatDataset'
,
times
=
5
,
dataset
=
dict
(
type
=
dataset_type
,
data_root
=
data_root
,
ann_file
=
data_root
+
'scannet_infos_train.pkl'
,
ann_file
=
'scannet_infos_train.pkl'
,
pipeline
=
train_pipeline
,
filter_empty_gt
=
False
,
classes
=
class_names
,
metainfo
=
metainfo
,
# we use box_type_3d='LiDAR' in kitti and nuscenes dataset
# and box_type_3d='Depth' in sunrgbd and scannet dataset.
box_type_3d
=
'Depth'
)),
val
=
dict
(
box_type_3d
=
'Depth'
)))
val_dataloader
=
dict
(
batch_size
=
1
,
num_workers
=
1
,
sampler
=
dict
(
type
=
'DefaultSampler'
,
shuffle
=
False
),
dataset
=
dict
(
type
=
dataset_type
,
data_root
=
data_root
,
ann_file
=
data_root
+
'scannet_infos_val.pkl'
,
ann_file
=
'scannet_infos_val.pkl'
,
pipeline
=
test_pipeline
,
classes
=
class_names
,
metainfo
=
metainfo
,
test_mode
=
True
,
box_type_3d
=
'Depth'
),
test
=
dict
(
box_type_3d
=
'Depth'
))
test_dataloader
=
dict
(
batch_size
=
1
,
num_workers
=
1
,
sampler
=
dict
(
type
=
'DefaultSampler'
,
shuffle
=
False
),
dataset
=
dict
(
type
=
dataset_type
,
data_root
=
data_root
,
ann_file
=
data_root
+
'scannet_infos_val.pkl'
,
ann_file
=
'scannet_infos_val.pkl'
,
pipeline
=
test_pipeline
,
classes
=
class_names
,
metainfo
=
metainfo
,
test_mode
=
True
,
box_type_3d
=
'Depth'
))
val_evaluator
=
dict
(
type
=
'IndoorMetric'
)
test_evaluator
=
val_evaluator
# optimizer
lr
=
0.006
optimizer
=
dict
(
lr
=
lr
,
weight_decay
=
0.0005
,
optim_wrapper
=
dict
(
type
=
'OptimWrapper'
,
optimizer
=
dict
(
type
=
'AdamW'
,
lr
=
lr
,
weight_decay
=
0.0005
),
clip_grad
=
dict
(
max_norm
=
0.1
,
norm_type
=
2
),
paramwise_cfg
=
dict
(
custom_keys
=
{
'bbox_head.decoder_layers'
:
dict
(
lr_mult
=
0.1
,
decay_mult
=
1.0
),
...
...
@@ -206,9 +219,21 @@ optimizer = dict(
'bbox_head.decoder_key_proj'
:
dict
(
lr_mult
=
0.1
,
decay_mult
=
1.0
)
}))
optimizer_config
=
dict
(
grad_clip
=
dict
(
max_norm
=
0.1
,
norm_type
=
2
))
lr_config
=
dict
(
policy
=
'step'
,
warmup
=
None
,
step
=
[
56
,
68
])
# learning rate
param_scheduler
=
[
dict
(
type
=
'MultiStepLR'
,
begin
=
0
,
end
=
80
,
by_epoch
=
True
,
milestones
=
[
56
,
68
],
gamma
=
0.1
)
]
# training schedule for 1x
train_cfg
=
dict
(
type
=
'EpochBasedTrainLoop'
,
max_epochs
=
80
,
val_interval
=
1
)
val_cfg
=
dict
(
type
=
'ValLoop'
)
test_cfg
=
dict
(
type
=
'TestLoop'
)
# runtime settings
runner
=
dict
(
type
=
'EpochBasedRunner'
,
max_epochs
=
80
)
checkpoint_config
=
dict
(
interval
=
1
,
max_keep_ckpts
=
10
)
default_hooks
=
dict
(
checkpoint
=
dict
(
type
=
'CheckpointHook'
,
interval
=
1
,
max_keep_ckpts
=
10
))
configs/groupfree3d/groupfree3d_8x4_scannet-3d-18class-w2x-L12-O512.py
View file @
9ebb75da
...
...
@@ -51,34 +51,37 @@ model = dict(
[
1.1511526
,
1.0546296
,
0.49706793
],
[
0.47535285
,
0.49249494
,
0.5802117
]]),
sampling_objectness_loss
=
dict
(
type
=
'FocalLoss'
,
type
=
'
mmdet.
FocalLoss'
,
use_sigmoid
=
True
,
gamma
=
2.0
,
alpha
=
0.25
,
loss_weight
=
8.0
),
objectness_loss
=
dict
(
type
=
'FocalLoss'
,
type
=
'
mmdet.
FocalLoss'
,
use_sigmoid
=
True
,
gamma
=
2.0
,
alpha
=
0.25
,
loss_weight
=
1.0
),
center_loss
=
dict
(
type
=
'SmoothL1Loss'
,
beta
=
0.04
,
reduction
=
'sum'
,
loss_weight
=
10.0
),
type
=
'mmdet.SmoothL1Loss'
,
beta
=
0.04
,
reduction
=
'sum'
,
loss_weight
=
10.0
),
dir_class_loss
=
dict
(
type
=
'CrossEntropyLoss'
,
reduction
=
'sum'
,
loss_weight
=
1.0
),
type
=
'
mmdet.
CrossEntropyLoss'
,
reduction
=
'sum'
,
loss_weight
=
1.0
),
dir_res_loss
=
dict
(
type
=
'SmoothL1Loss'
,
reduction
=
'sum'
,
loss_weight
=
10.0
),
type
=
'
mmdet.
SmoothL1Loss'
,
reduction
=
'sum'
,
loss_weight
=
10.0
),
size_class_loss
=
dict
(
type
=
'CrossEntropyLoss'
,
reduction
=
'sum'
,
loss_weight
=
1.0
),
type
=
'
mmdet.
CrossEntropyLoss'
,
reduction
=
'sum'
,
loss_weight
=
1.0
),
size_res_loss
=
dict
(
type
=
'SmoothL1Loss'
,
type
=
'
mmdet.
SmoothL1Loss'
,
beta
=
1.0
/
9.0
,
reduction
=
'sum'
,
loss_weight
=
10.0
/
9.0
),
semantic_loss
=
dict
(
type
=
'CrossEntropyLoss'
,
reduction
=
'sum'
,
loss_weight
=
1.0
)),
type
=
'
mmdet.
CrossEntropyLoss'
,
reduction
=
'sum'
,
loss_weight
=
1.0
)),
test_cfg
=
dict
(
sample_mod
=
'kps'
,
sample_mod
e
=
'kps'
,
nms_thr
=
0.25
,
score_thr
=
0.0
,
per_class_proposal
=
True
,
...
...
@@ -91,6 +94,9 @@ class_names = ('cabinet', 'bed', 'chair', 'sofa', 'table', 'door', 'window',
'bookshelf'
,
'picture'
,
'counter'
,
'desk'
,
'curtain'
,
'refrigerator'
,
'showercurtrain'
,
'toilet'
,
'sink'
,
'bathtub'
,
'garbagebin'
)
metainfo
=
dict
(
CLASSES
=
class_names
)
train_pipeline
=
[
dict
(
type
=
'LoadPointsFromFile'
,
...
...
@@ -118,9 +124,8 @@ train_pipeline = [
type
=
'GlobalRotScaleTrans'
,
rot_range
=
[
-
0.087266
,
0.087266
],
scale_ratio_range
=
[
1.0
,
1.0
]),
dict
(
type
=
'DefaultFormatBundle3D'
,
class_names
=
class_names
),
dict
(
type
=
'
Collect3D
'
,
type
=
'
Pack3DDetInputs
'
,
keys
=
[
'points'
,
'gt_bboxes_3d'
,
'gt_labels_3d'
,
'pts_semantic_mask'
,
'pts_instance_mask'
...
...
@@ -150,52 +155,60 @@ test_pipeline = [
flip_ratio_bev_horizontal
=
0.5
,
flip_ratio_bev_vertical
=
0.5
),
dict
(
type
=
'PointSample'
,
num_points
=
50000
),
dict
(
type
=
'DefaultFormatBundle3D'
,
class_names
=
class_names
,
with_label
=
False
),
dict
(
type
=
'Collect3D'
,
keys
=
[
'points'
])
])
]),
dict
(
type
=
'Pack3DDetInputs'
,
keys
=
[
'points'
])
]
data
=
dict
(
samples_per_gpu
=
8
,
workers_per_gpu
=
4
,
train
=
dict
(
train_dataloader
=
dict
(
batch_size
=
8
,
num_workers
=
4
,
sampler
=
dict
(
type
=
'DefaultSampler'
,
shuffle
=
True
),
dataset
=
dict
(
type
=
'RepeatDataset'
,
times
=
5
,
dataset
=
dict
(
type
=
dataset_type
,
data_root
=
data_root
,
ann_file
=
data_root
+
'scannet_infos_train.pkl'
,
ann_file
=
'scannet_infos_train.pkl'
,
pipeline
=
train_pipeline
,
filter_empty_gt
=
False
,
classes
=
class_names
,
metainfo
=
metainfo
,
# we use box_type_3d='LiDAR' in kitti and nuscenes dataset
# and box_type_3d='Depth' in sunrgbd and scannet dataset.
box_type_3d
=
'Depth'
)),
val
=
dict
(
box_type_3d
=
'Depth'
)))
val_dataloader
=
dict
(
batch_size
=
1
,
num_workers
=
1
,
sampler
=
dict
(
type
=
'DefaultSampler'
,
shuffle
=
False
),
dataset
=
dict
(
type
=
dataset_type
,
data_root
=
data_root
,
ann_file
=
data_root
+
'scannet_infos_val.pkl'
,
ann_file
=
'scannet_infos_val.pkl'
,
pipeline
=
test_pipeline
,
classes
=
class_names
,
metainfo
=
metainfo
,
test_mode
=
True
,
box_type_3d
=
'Depth'
),
test
=
dict
(
box_type_3d
=
'Depth'
))
test_dataloader
=
dict
(
batch_size
=
1
,
num_workers
=
1
,
sampler
=
dict
(
type
=
'DefaultSampler'
,
shuffle
=
False
),
dataset
=
dict
(
type
=
dataset_type
,
data_root
=
data_root
,
ann_file
=
data_root
+
'scannet_infos_val.pkl'
,
ann_file
=
'scannet_infos_val.pkl'
,
pipeline
=
test_pipeline
,
classes
=
class_names
,
metainfo
=
metainfo
,
test_mode
=
True
,
box_type_3d
=
'Depth'
))
val_evaluator
=
dict
(
type
=
'IndoorMetric'
)
test_evaluator
=
val_evaluator
# optimizer
lr
=
0.006
optimizer
=
dict
(
lr
=
lr
,
weight_decay
=
0.0005
,
optim_wrapper
=
dict
(
type
=
'OptimWrapper'
,
optimizer
=
dict
(
type
=
'AdamW'
,
lr
=
lr
,
weight_decay
=
0.0005
),
clip_grad
=
dict
(
max_norm
=
0.1
,
norm_type
=
2
),
paramwise_cfg
=
dict
(
custom_keys
=
{
'bbox_head.decoder_layers'
:
dict
(
lr_mult
=
0.1
,
decay_mult
=
1.0
),
...
...
@@ -207,9 +220,21 @@ optimizer = dict(
'bbox_head.decoder_key_proj'
:
dict
(
lr_mult
=
0.1
,
decay_mult
=
1.0
)
}))
optimizer_config
=
dict
(
grad_clip
=
dict
(
max_norm
=
0.1
,
norm_type
=
2
))
lr_config
=
dict
(
policy
=
'step'
,
warmup
=
None
,
step
=
[
56
,
68
])
# learning rate
param_scheduler
=
[
dict
(
type
=
'MultiStepLR'
,
begin
=
0
,
end
=
80
,
by_epoch
=
True
,
milestones
=
[
56
,
68
],
gamma
=
0.1
)
]
# training schedule for 1x
train_cfg
=
dict
(
type
=
'EpochBasedTrainLoop'
,
max_epochs
=
80
,
val_interval
=
1
)
val_cfg
=
dict
(
type
=
'ValLoop'
)
test_cfg
=
dict
(
type
=
'TestLoop'
)
# runtime settings
runner
=
dict
(
type
=
'EpochBasedRunner'
,
max_epochs
=
80
)
checkpoint_config
=
dict
(
interval
=
1
,
max_keep_ckpts
=
10
)
default_hooks
=
dict
(
checkpoint
=
dict
(
type
=
'CheckpointHook'
,
interval
=
1
,
max_keep_ckpts
=
10
))
configs/h3dnet/debug.py
0 → 100644
View file @
9ebb75da
_base_
=
[
'../_base_/datasets/scannet-3d-18class.py'
,
'../_base_/models/h3dnet.py'
,
'../_base_/schedules/schedule_3x.py'
,
'../_base_/default_runtime.py'
]
# model settings
model
=
dict
(
rpn_head
=
dict
(
num_classes
=
18
,
bbox_coder
=
dict
(
type
=
'PartialBinBasedBBoxCoder'
,
num_sizes
=
18
,
num_dir_bins
=
24
,
with_rot
=
False
,
mean_sizes
=
[[
0.76966727
,
0.8116021
,
0.92573744
],
[
1.876858
,
1.8425595
,
1.1931566
],
[
0.61328
,
0.6148609
,
0.7182701
],
[
1.3955007
,
1.5121545
,
0.83443564
],
[
0.97949594
,
1.0675149
,
0.6329687
],
[
0.531663
,
0.5955577
,
1.7500148
],
[
0.9624706
,
0.72462326
,
1.1481868
],
[
0.83221924
,
1.0490936
,
1.6875663
],
[
0.21132214
,
0.4206159
,
0.5372846
],
[
1.4440073
,
1.8970833
,
0.26985747
],
[
1.0294262
,
1.4040797
,
0.87554324
],
[
1.3766412
,
0.65521795
,
1.6813129
],
[
0.6650819
,
0.71111923
,
1.298853
],
[
0.41999173
,
0.37906948
,
1.7513971
],
[
0.59359556
,
0.5912492
,
0.73919016
],
[
0.50867593
,
0.50656086
,
0.30136237
],
[
1.1511526
,
1.0546296
,
0.49706793
],
[
0.47535285
,
0.49249494
,
0.5802117
]])),
roi_head
=
dict
(
bbox_head
=
dict
(
num_classes
=
18
,
bbox_coder
=
dict
(
type
=
'PartialBinBasedBBoxCoder'
,
num_sizes
=
18
,
num_dir_bins
=
24
,
with_rot
=
False
,
mean_sizes
=
[[
0.76966727
,
0.8116021
,
0.92573744
],
[
1.876858
,
1.8425595
,
1.1931566
],
[
0.61328
,
0.6148609
,
0.7182701
],
[
1.3955007
,
1.5121545
,
0.83443564
],
[
0.97949594
,
1.0675149
,
0.6329687
],
[
0.531663
,
0.5955577
,
1.7500148
],
[
0.9624706
,
0.72462326
,
1.1481868
],
[
0.83221924
,
1.0490936
,
1.6875663
],
[
0.21132214
,
0.4206159
,
0.5372846
],
[
1.4440073
,
1.8970833
,
0.26985747
],
[
1.0294262
,
1.4040797
,
0.87554324
],
[
1.3766412
,
0.65521795
,
1.6813129
],
[
0.6650819
,
0.71111923
,
1.298853
],
[
0.41999173
,
0.37906948
,
1.7513971
],
[
0.59359556
,
0.5912492
,
0.73919016
],
[
0.50867593
,
0.50656086
,
0.30136237
],
[
1.1511526
,
1.0546296
,
0.49706793
],
[
0.47535285
,
0.49249494
,
0.5802117
]]))))
train_dataloader
=
dict
(
batch_size
=
3
,
num_workers
=
2
,
)
# yapf:disable
default_hooks
=
dict
(
logger
=
dict
(
type
=
'LoggerHook'
,
interval
=
30
)
)
# yapf:enable
mmdet3d/datasets/det3d_dataset.py
View file @
9ebb75da
...
...
@@ -229,6 +229,8 @@ class Det3DDataset(BaseDataset):
self
.
data_prefix
.
get
(
'pts'
,
''
),
info
[
'lidar_points'
][
'lidar_path'
])
info
[
'lidar_path'
]
=
info
[
'lidar_points'
][
'lidar_path'
]
if
self
.
modality
[
'use_camera'
]:
for
cam_id
,
img_info
in
info
[
'images'
].
items
():
if
'img_path'
in
img_info
:
...
...
mmdet3d/datasets/kitti_dataset.py
View file @
9ebb75da
...
...
@@ -128,6 +128,7 @@ class KittiDataset(Det3DDataset):
"""
ann_info
=
super
().
parse_ann_info
(
info
)
if
ann_info
is
None
:
ann_info
=
dict
()
# empty instance
ann_info
[
'gt_bboxes_3d'
]
=
np
.
zeros
((
0
,
7
),
dtype
=
np
.
float32
)
ann_info
[
'gt_labels_3d'
]
=
np
.
zeros
(
0
,
dtype
=
np
.
int64
)
...
...
mmdet3d/datasets/pipelines/formating.py
View file @
9ebb75da
...
...
@@ -31,14 +31,14 @@ class Pack3DDetInputs(BaseTransform):
def
__init__
(
self
,
keys
:
dict
,
meta_keys
:
dict
=
(
'
filename
'
,
'ori_shape'
,
'img_shape'
,
'lidar2img'
,
meta_keys
:
dict
=
(
'
img_path
'
,
'ori_shape'
,
'img_shape'
,
'lidar2img'
,
'depth2img'
,
'cam2img'
,
'pad_shape'
,
'scale_factor'
,
'flip'
,
'pcd_horizontal_flip'
,
'pcd_vertical_flip'
,
'box_mode_3d'
,
'box_type_3d'
,
'img_norm_cfg'
,
'pcd_trans'
,
'sample_idx'
,
'pcd_scale_factor'
,
'pcd_rotation'
,
'pcd_rotation_angle'
,
'pts_filename'
,
'transformation_3d_flow'
,
'trans_mat'
,
'affine_aug'
)):
'pcd_rotation'
,
'pcd_rotation_angle'
,
'lidar_path'
,
'transformation_3d_flow'
,
'trans_mat'
,
'affine_aug'
)):
self
.
keys
=
keys
self
.
meta_keys
=
meta_keys
...
...
mmdet3d/datasets/scannet_dataset.py
View file @
9ebb75da
...
...
@@ -138,6 +138,7 @@ class ScanNetDataset(Det3DDataset):
ann_info
=
super
().
parse_ann_info
(
info
)
# empty gt
if
ann_info
is
None
:
ann_info
=
dict
()
ann_info
[
'gt_bboxes_3d'
]
=
np
.
zeros
((
0
,
6
),
dtype
=
np
.
float32
)
ann_info
[
'gt_labels_3d'
]
=
np
.
zeros
((
0
,
),
dtype
=
np
.
int64
)
# to target box structure
...
...
mmdet3d/datasets/sunrgbd_dataset.py
View file @
9ebb75da
# Copyright (c) OpenMMLab. All rights reserved.
from
collections
import
OrderedDict
from
os
import
path
as
osp
from
typing
import
Callable
,
List
,
Optional
,
Union
from
mmdet3d.core
import
show_multi_modality_result
,
show_result
import
numpy
as
np
from
mmdet3d.core.bbox
import
DepthInstance3DBoxes
from
mmdet3d.registry
import
DATASETS
from
mmdet.core
import
eval_map
from
.det3d_dataset
import
Det3DDataset
from
.pipelines
import
Compose
@
DATASETS
.
register_module
()
...
...
@@ -86,128 +83,15 @@ class SUNRGBDDataset(Det3DDataset):
dict: Processed `ann_info`
"""
ann_info
=
super
().
parse_ann_info
(
info
)
# empty gt
if
ann_info
is
None
:
ann_info
=
dict
()
ann_info
[
'gt_bboxes_3d'
]
=
np
.
zeros
((
0
,
6
),
dtype
=
np
.
float32
)
ann_info
[
'gt_labels_3d'
]
=
np
.
zeros
((
0
,
),
dtype
=
np
.
int64
)
# to target box structure
ann_info
[
'gt_bboxes_3d'
]
=
DepthInstance3DBoxes
(
ann_info
[
'gt_bboxes_3d'
],
origin
=
(
0.5
,
0.5
,
0.5
)).
convert_to
(
self
.
box_mode_3d
)
return
ann_info
def
_build_default_pipeline
(
self
):
"""Build the default pipeline for this dataset."""
pipeline
=
[
dict
(
type
=
'LoadPointsFromFile'
,
coord_type
=
'DEPTH'
,
shift_height
=
False
,
load_dim
=
6
,
use_dim
=
[
0
,
1
,
2
]),
dict
(
type
=
'DefaultFormatBundle3D'
,
class_names
=
self
.
CLASSES
,
with_label
=
False
),
dict
(
type
=
'Collect3D'
,
keys
=
[
'points'
])
]
if
self
.
modality
[
'use_camera'
]:
pipeline
.
insert
(
0
,
dict
(
type
=
'LoadImageFromFile'
))
return
Compose
(
pipeline
)
# TODO fix this
def
show
(
self
,
results
,
out_dir
,
show
=
True
,
pipeline
=
None
):
"""Results visualization.
Args:
results (list[dict]): List of bounding boxes results.
out_dir (str): Output directory of visualization result.
show (bool): Visualize the results online.
pipeline (list[dict], optional): raw data loading for showing.
Default: None.
"""
assert
out_dir
is
not
None
,
'Expect out_dir, got none.'
pipeline
=
self
.
_get_pipeline
(
pipeline
)
for
i
,
result
in
enumerate
(
results
):
data_info
=
self
.
data_infos
[
i
]
pts_path
=
data_info
[
'pts_path'
]
file_name
=
osp
.
split
(
pts_path
)[
-
1
].
split
(
'.'
)[
0
]
points
,
img_metas
,
img
=
self
.
_extract_data
(
i
,
pipeline
,
[
'points'
,
'img_metas'
,
'img'
])
# scale colors to [0, 255]
points
=
points
.
numpy
()
points
[:,
3
:]
*=
255
gt_bboxes
=
self
.
get_ann_info
(
i
)[
'gt_bboxes_3d'
].
tensor
.
numpy
()
pred_bboxes
=
result
[
'boxes_3d'
].
tensor
.
numpy
()
show_result
(
points
,
gt_bboxes
.
copy
(),
pred_bboxes
.
copy
(),
out_dir
,
file_name
,
show
)
# multi-modality visualization
if
self
.
modality
[
'use_camera'
]:
img
=
img
.
numpy
()
# need to transpose channel to first dim
img
=
img
.
transpose
(
1
,
2
,
0
)
pred_bboxes
=
DepthInstance3DBoxes
(
pred_bboxes
,
origin
=
(
0.5
,
0.5
,
0
))
gt_bboxes
=
DepthInstance3DBoxes
(
gt_bboxes
,
origin
=
(
0.5
,
0.5
,
0
))
show_multi_modality_result
(
img
,
gt_bboxes
,
pred_bboxes
,
None
,
out_dir
,
file_name
,
box_mode
=
'depth'
,
img_metas
=
img_metas
,
show
=
show
)
def
evaluate
(
self
,
results
,
metric
=
None
,
iou_thr
=
(
0.25
,
0.5
),
iou_thr_2d
=
(
0.5
,
),
logger
=
None
,
show
=
False
,
out_dir
=
None
,
pipeline
=
None
):
"""Evaluate.
Evaluation in indoor protocol.
Args:
results (list[dict]): List of results.
metric (str | list[str], optional): Metrics to be evaluated.
Default: None.
iou_thr (list[float], optional): AP IoU thresholds for 3D
evaluation. Default: (0.25, 0.5).
iou_thr_2d (list[float], optional): AP IoU thresholds for 2D
evaluation. Default: (0.5, ).
show (bool, optional): Whether to visualize.
Default: False.
out_dir (str, optional): Path to save the visualization results.
Default: None.
pipeline (list[dict], optional): raw data loading for showing.
Default: None.
Returns:
dict: Evaluation results.
"""
# evaluate 3D detection performance
if
isinstance
(
results
[
0
],
dict
):
return
super
().
evaluate
(
results
,
metric
,
iou_thr
,
logger
,
show
,
out_dir
,
pipeline
)
# evaluate 2D detection performance
else
:
eval_results
=
OrderedDict
()
annotations
=
[
self
.
get_ann_info
(
i
)
for
i
in
range
(
len
(
self
))]
iou_thr_2d
=
(
iou_thr_2d
)
if
isinstance
(
iou_thr_2d
,
float
)
else
iou_thr_2d
for
iou_thr_2d_single
in
iou_thr_2d
:
mean_ap
,
_
=
eval_map
(
results
,
annotations
,
scale_ranges
=
None
,
iou_thr
=
iou_thr_2d_single
,
dataset
=
self
.
CLASSES
,
logger
=
logger
)
eval_results
[
'mAP_'
+
str
(
iou_thr_2d_single
)]
=
mean_ap
return
eval_results
mmdet3d/models/data_preprocessors/data_preprocessor.py
View file @
9ebb75da
...
...
@@ -172,7 +172,7 @@ class Det3DDataPreprocessor(DetDataPreprocessor):
# image tensor.
inputs_dict
=
[{
k
:
v
.
to
(
self
.
_device
)
for
k
,
v
in
_data
[
'inputs'
].
items
()
for
k
,
v
in
_data
[
'inputs'
].
items
()
if
v
is
not
None
}
for
_data
in
data
]
batch_data_samples
:
List
[
BaseDataElement
]
=
[]
...
...
mmdet3d/models/dense_heads/groupfree3d_head.py
View file @
9ebb75da
This diff is collapsed.
Click to expand it.
mmdet3d/models/dense_heads/vote_head.py
View file @
9ebb75da
...
...
@@ -214,9 +214,9 @@ class VoteHead(BaseModule):
batch_gt_instances_ignore
.
append
(
data_sample
.
get
(
'ignored_instances'
,
None
))
batch_pts_semantic_mask
.
append
(
data_sample
.
seg_data
.
get
(
'pts_semantic_mask'
,
None
))
data_sample
.
gt_pts_seg
.
get
(
'pts_semantic_mask'
,
None
))
batch_pts_instance_mask
.
append
(
data_sample
.
seg_data
.
get
(
'pts_instance_mask'
,
None
))
data_sample
.
gt_pts_seg
.
get
(
'pts_instance_mask'
,
None
))
loss_inputs
=
(
points
,
preds_dict
,
batch_gt_instance_3d
)
losses
=
self
.
loss_by_feat
(
...
...
@@ -452,9 +452,9 @@ class VoteHead(BaseModule):
gt_instances. It usually includes ``bboxes`` and ``labels``
attributes.
batch_pts_semantic_mask (list[tensor]): Semantic gt mask for
multiple images
.
point clouds. Defaults to None
.
batch_pts_instance_mask (list[tensor]): Instance gt mask for
multiple images
.
point clouds. Defaults to None
.
Returns:
tuple[torch.Tensor]: Targets of vote head.
...
...
mmdet3d/models/detectors/groupfree3dnet.py
View file @
9ebb75da
# Copyright (c) OpenMMLab. All rights reserved.
import
torch
from
mmdet3d.core
import
bbox3d2result
,
merge_aug_bboxes_3d
from
mmdet3d.registry
import
MODELS
from
...core
import
SampleList
from
.single_stage
import
SingleStage3DDetector
...
...
@@ -15,91 +14,73 @@ class GroupFree3DNet(SingleStage3DDetector):
bbox_head
=
None
,
train_cfg
=
None
,
test_cfg
=
None
,
pretrained
=
None
):
init_cfg
=
None
,
**
kwargs
):
super
(
GroupFree3DNet
,
self
).
__init__
(
backbone
=
backbone
,
bbox_head
=
bbox_head
,
train_cfg
=
train_cfg
,
test_cfg
=
test_cfg
,
pretrained
=
pretrained
)
init_cfg
=
init_cfg
,
**
kwargs
)
def
forward_train
(
self
,
points
,
img_metas
,
gt_bboxes_3d
,
gt_labels_3d
,
pts_semantic_mask
=
None
,
pts_instance_mask
=
None
,
gt_bboxes_ignore
=
None
):
"""Forward of training.
def
loss
(
self
,
batch_inputs_dict
:
dict
,
batch_data_samples
:
SampleList
,
**
kwargs
)
->
dict
:
"""Calculate losses from a batch of inputs dict and data samples.
Args:
points (list[torch.Tensor]): Points of each batch.
img_metas (list): Image metas.
gt_bboxes_3d (:obj:`BaseInstance3DBoxes`): gt bboxes of each batch.
gt_labels_3d (list[torch.Tensor]): gt class labels of each batch.
pts_semantic_mask (list[torch.Tensor]): point-wise semantic
label of each batch.
pts_instance_mask (list[torch.Tensor]): point-wise instance
label of each batch.
gt_bboxes_ignore (list[torch.Tensor]): Specify
which bounding.
batch_inputs_dict (dict): The model input dict which include
'points', 'imgs' keys.
- points (list[torch.Tensor]): Point cloud of each sample.
- imgs (torch.Tensor, optional): Image of each sample.
batch_data_samples (List[:obj:`Det3DDataSample`]): The Data
Samples. It usually includes information such as
`gt_instance_3d`, `gt_pts_seg`.
Returns:
dict
[str: torch.Tensor]: Losse
s.
dict
: A dictionary of loss component
s.
"""
# TODO: refactor votenet series to reduce redundant codes.
points_cat
=
torch
.
stack
(
points
)
x
=
self
.
extract_feat
(
points_cat
)
bbox_preds
=
self
.
bbox_head
(
x
,
self
.
train_cfg
.
sample_mod
)
loss_inputs
=
(
points
,
gt_bboxes_3d
,
gt_labels_3d
,
pts_semantic_mask
,
pts_instance_mask
,
img_metas
)
losses
=
self
.
bbox_head
.
loss
(
bbox_preds
,
*
loss_inputs
,
gt_bboxes_ignore
=
gt_bboxes_ignore
)
x
=
self
.
extract_feat
(
batch_inputs_dict
)
points
=
batch_inputs_dict
[
'points'
]
losses
=
self
.
bbox_head
.
loss
(
points
,
x
,
batch_data_samples
,
**
kwargs
)
return
losses
def
simple_test
(
self
,
points
,
img_metas
,
imgs
=
None
,
rescale
=
False
):
"""Forward of testing.
def
predict
(
self
,
batch_inputs_dict
:
dict
,
batch_data_samples
:
SampleList
,
**
kwargs
)
->
SampleList
:
"""Predict results from a batch of inputs and data samples with post-
processing.
Args:
points (list[torch.Tensor]): Points of each sample.
img_metas (list): Image metas.
rescale (bool): Whether to rescale results.
Returns:
list: Predicted 3d boxes.
"""
points_cat
=
torch
.
stack
(
points
)
x
=
self
.
extract_feat
(
points_cat
)
bbox_preds
=
self
.
bbox_head
(
x
,
self
.
test_cfg
.
sample_mod
)
bbox_list
=
self
.
bbox_head
.
get_bboxes
(
points_cat
,
bbox_preds
,
img_metas
,
rescale
=
rescale
)
bbox_results
=
[
bbox3d2result
(
bboxes
,
scores
,
labels
)
for
bboxes
,
scores
,
labels
in
bbox_list
]
return
bbox_results
batch_inputs_dict (dict): The model input dict which include
'points', 'imgs' keys.
def
aug_test
(
self
,
points
,
img_metas
,
imgs
=
None
,
rescale
=
False
):
"""Test with augmentation."""
points_cat
=
[
torch
.
stack
(
pts
)
for
pts
in
points
]
feats
=
self
.
extract_feats
(
points_cat
,
img_metas
)
- points (list[torch.Tensor]): Point cloud of each sample.
- imgs (torch.Tensor, optional): Image of each sample.
# only support aug_test for one sample
aug_bboxes
=
[]
for
x
,
pts_cat
,
img_meta
in
zip
(
feats
,
points_cat
,
img_metas
):
bbox_preds
=
self
.
bbox_head
(
x
,
self
.
test_cfg
.
sample_mod
)
bbox_list
=
self
.
bbox_head
.
get_bboxes
(
pts_cat
,
bbox_preds
,
img_meta
,
rescale
=
rescale
)
bbox_list
=
[
dict
(
boxes_3d
=
bboxes
,
scores_3d
=
scores
,
labels_3d
=
labels
)
for
bboxes
,
scores
,
labels
in
bbox_list
]
aug_bboxes
.
append
(
bbox_list
[
0
])
batch_data_samples (List[:obj:`Det3DDataSample`]): The Data
Samples. It usually includes information such as
`gt_instance_3d`, `gt_pts_seg`.
rescale (bool): Whether to rescale the results.
Defaults to True.
# after merging, bboxes will be rescaled to the original image size
merged_bboxes
=
merge_aug_bboxes_3d
(
aug_bboxes
,
img_metas
,
self
.
bbox_head
.
test_cfg
)
Returns:
list[:obj:`Det3DDataSample`]: Detection results of the
input images. Each Det3DDataSample usually contain
'pred_instances_3d'. And the ``pred_instances_3d`` usually
contains following keys.
return
[
merged_bboxes
]
- scores_3d (Tensor): Classification scores, has a shape
(num_instance, )
- labels_3d (Tensor): Labels of bboxes, has a shape
(num_instances, ).
- bboxes_3d (Tensor): Contains a tensor with shape
(num_instances, C) where C >=7.
"""
x
=
self
.
extract_feat
(
batch_inputs_dict
)
points
=
batch_inputs_dict
[
'points'
]
results_list
=
self
.
bbox_head
.
predict
(
points
,
x
,
batch_data_samples
,
**
kwargs
)
predictions
=
self
.
convert_to_datasample
(
results_list
)
return
predictions
tests/test_models/test_detectors/test_groupfree3d.py
0 → 100644
View file @
9ebb75da
import
unittest
import
torch
from
mmengine
import
DefaultScope
from
mmdet3d.registry
import
MODELS
from
tests.utils.model_utils
import
(
_create_detector_inputs
,
_get_detector_cfg
,
_setup_seed
)
class
TestGroupfree3d
(
unittest
.
TestCase
):
def
test_groupfree3d
(
self
):
import
mmdet3d.models
assert
hasattr
(
mmdet3d
.
models
,
'GroupFree3DNet'
)
DefaultScope
.
get_instance
(
'test_groupfree3d'
,
scope_name
=
'mmdet3d'
)
_setup_seed
(
0
)
voxel_net_cfg
=
_get_detector_cfg
(
'groupfree3d/groupfree3d_8x4_scannet-3d-18class-L6-O256.py'
)
model
=
MODELS
.
build
(
voxel_net_cfg
)
num_gt_instance
=
5
data
=
[
_create_detector_inputs
(
num_gt_instance
=
num_gt_instance
,
points_feat_dim
=
3
,
with_pts_semantic_mask
=
True
,
with_pts_instance_mask
=
True
)
]
if
torch
.
cuda
.
is_available
():
model
=
model
.
cuda
()
# test simple_test
with
torch
.
no_grad
():
batch_inputs
,
data_samples
=
model
.
data_preprocessor
(
data
,
True
)
results
=
model
.
forward
(
batch_inputs
,
data_samples
,
mode
=
'predict'
)
self
.
assertEqual
(
len
(
results
),
len
(
data
))
self
.
assertIn
(
'bboxes_3d'
,
results
[
0
].
pred_instances_3d
)
self
.
assertIn
(
'scores_3d'
,
results
[
0
].
pred_instances_3d
)
self
.
assertIn
(
'labels_3d'
,
results
[
0
].
pred_instances_3d
)
# save the memory
with
torch
.
no_grad
():
losses
=
model
.
forward
(
batch_inputs
,
data_samples
,
mode
=
'loss'
)
self
.
assertGreater
(
losses
[
'sampling_objectness_loss'
],
0
)
self
.
assertGreater
(
losses
[
'proposal.objectness_loss'
],
0
)
self
.
assertGreater
(
losses
[
's0.objectness_loss'
],
0
)
self
.
assertGreater
(
losses
[
's1.size_res_loss'
],
0
)
self
.
assertGreater
(
losses
[
's4.size_class_loss'
],
0
)
tests/utils/model_utils.py
View file @
9ebb75da
...
...
@@ -7,7 +7,7 @@ import numpy as np
import
torch
from
mmengine
import
InstanceData
from
mmdet3d.core
import
Det3DDataSample
,
LiDARInstance3DBoxes
from
mmdet3d.core
import
Det3DDataSample
,
LiDARInstance3DBoxes
,
PointData
def
_setup_seed
(
seed
):
...
...
@@ -71,22 +71,28 @@ def _get_detector_cfg(fname):
return
model
def
_create_detector_inputs
(
seed
=
0
,
def
_create_detector_inputs
(
seed
=
0
,
with_points
=
True
,
with_img
=
False
,
num_gt_instance
=
20
,
num_points
=
10
,
points_feat_dim
=
4
,
num_classes
=
3
,
gt_bboxes_dim
=
7
,
num_classes
=
3
):
with_pts_semantic_mask
=
False
,
with_pts_instance_mask
=
False
,
):
_setup_seed
(
seed
)
inputs_dict
=
dict
()
if
with_points
:
points
=
torch
.
rand
([
3
,
points_feat_dim
])
inputs_dict
[
'points'
]
=
points
points
=
torch
.
rand
([
num_points
,
points_feat_dim
])
else
:
points
=
None
if
with_img
:
img
=
torch
.
rand
(
3
,
10
,
10
)
inputs_dict
[
'img'
]
=
img
else
:
img
=
None
inputs_dict
=
dict
(
img
=
img
,
points
=
points
)
gt_instance_3d
=
InstanceData
()
gt_instance_3d
.
bboxes_3d
=
LiDARInstance3DBoxes
(
torch
.
rand
([
num_gt_instance
,
gt_bboxes_dim
]),
box_dim
=
gt_bboxes_dim
)
...
...
@@ -94,5 +100,12 @@ def _create_detector_inputs(seed=0,
data_sample
=
Det3DDataSample
(
metainfo
=
dict
(
box_type_3d
=
LiDARInstance3DBoxes
))
data_sample
.
gt_instances_3d
=
gt_instance_3d
data_sample
.
seg_data
=
dict
()
data_sample
.
gt_pts_seg
=
PointData
()
if
with_pts_instance_mask
:
pts_instance_mask
=
torch
.
randint
(
0
,
num_gt_instance
,
[
num_points
])
data_sample
.
gt_pts_seg
[
'pts_instance_mask'
]
=
pts_instance_mask
if
with_pts_semantic_mask
:
pts_semantic_mask
=
torch
.
randint
(
0
,
num_classes
,
[
num_points
])
data_sample
.
gt_pts_seg
[
'pts_semantic_mask'
]
=
pts_semantic_mask
return
dict
(
inputs
=
inputs_dict
,
data_sample
=
data_sample
)
Write
Preview
Markdown
is supported
0%
Try again
or
attach a new file
.
Attach a file
Cancel
You are about to add
0
people
to the discussion. Proceed with caution.
Finish editing this message first!
Cancel
Please
register
or
sign in
to comment