Skip to content
GitLab
Menu
Projects
Groups
Snippets
Loading...
Help
Help
Support
Community forum
Keyboard shortcuts
?
Submit feedback
Contribute to GitLab
Sign in / Register
Toggle navigation
Menu
Open sidebar
OpenDAS
mmdetection3d
Commits
36f658a5
Unverified
Commit
36f658a5
authored
Oct 10, 2021
by
ChaimZhu
Committed by
GitHub
Oct 10, 2021
Browse files
[Feature] Add SMOKE detector and Configs (#975)
* add smoke detector and cfgs * fix init * fix input_modality
parent
80f372e1
Changes
6
Hide whitespace changes
Inline
Side-by-side
Showing
6 changed files
with
299 additions
and
1 deletion
+299
-1
configs/_base_/datasets/kitti-mono3d.py
configs/_base_/datasets/kitti-mono3d.py
+92
-0
configs/_base_/models/smoke.py
configs/_base_/models/smoke.py
+53
-0
configs/smoke/smoke_dla34_pytorch_dlaneck_gn-all_8x4_6x_kitti-mono3d.py
...smoke_dla34_pytorch_dlaneck_gn-all_8x4_6x_kitti-mono3d.py
+64
-0
mmdet3d/models/detectors/__init__.py
mmdet3d/models/detectors/__init__.py
+2
-1
mmdet3d/models/detectors/smoke_mono3d.py
mmdet3d/models/detectors/smoke_mono3d.py
+20
-0
tests/test_models/test_detectors.py
tests/test_models/test_detectors.py
+68
-0
No files found.
configs/_base_/datasets/kitti-mono3d.py
0 → 100644
View file @
36f658a5
dataset_type
=
'KittiMonoDataset'
data_root
=
'data/kitti/'
class_names
=
[
'Pedestrian'
,
'Cyclist'
,
'Car'
]
input_modality
=
dict
(
use_lidar
=
False
,
use_camera
=
True
)
img_norm_cfg
=
dict
(
mean
=
[
123.675
,
116.28
,
103.53
],
std
=
[
58.395
,
57.12
,
57.375
],
to_rgb
=
True
)
train_pipeline
=
[
dict
(
type
=
'LoadImageFromFileMono3D'
),
dict
(
type
=
'LoadAnnotations3D'
,
with_bbox
=
True
,
with_label
=
True
,
with_attr_label
=
False
,
with_bbox_3d
=
True
,
with_label_3d
=
True
,
with_bbox_depth
=
True
),
dict
(
type
=
'Resize'
,
img_scale
=
(
1242
,
375
),
keep_ratio
=
True
),
dict
(
type
=
'RandomFlip3D'
,
flip_ratio_bev_horizontal
=
0.5
),
dict
(
type
=
'Normalize'
,
**
img_norm_cfg
),
dict
(
type
=
'Pad'
,
size_divisor
=
32
),
dict
(
type
=
'DefaultFormatBundle3D'
,
class_names
=
class_names
),
dict
(
type
=
'Collect3D'
,
keys
=
[
'img'
,
'gt_bboxes'
,
'gt_labels'
,
'gt_bboxes_3d'
,
'gt_labels_3d'
,
'centers2d'
,
'depths'
]),
]
test_pipeline
=
[
dict
(
type
=
'LoadImageFromFileMono3D'
),
dict
(
type
=
'MultiScaleFlipAug'
,
img_scale
=
(
1242
,
375
),
flip
=
False
,
transforms
=
[
dict
(
type
=
'RandomFlip3D'
),
dict
(
type
=
'Normalize'
,
**
img_norm_cfg
),
dict
(
type
=
'Pad'
,
size_divisor
=
32
),
dict
(
type
=
'DefaultFormatBundle3D'
,
class_names
=
class_names
,
with_label
=
False
),
dict
(
type
=
'Collect3D'
,
keys
=
[
'img'
]),
])
]
# construct a pipeline for data and gt loading in show function
# please keep its loading function consistent with test_pipeline (e.g. client)
eval_pipeline
=
[
dict
(
type
=
'LoadImageFromFileMono3D'
),
dict
(
type
=
'DefaultFormatBundle3D'
,
class_names
=
class_names
,
with_label
=
False
),
dict
(
type
=
'Collect3D'
,
keys
=
[
'img'
])
]
data
=
dict
(
samples_per_gpu
=
2
,
workers_per_gpu
=
2
,
train
=
dict
(
type
=
dataset_type
,
data_root
=
data_root
,
ann_file
=
data_root
+
'kitti_infos_train_mono3d.coco.json'
,
info_file
=
data_root
+
'kitti_infos_train.pkl'
,
img_prefix
=
data_root
,
classes
=
class_names
,
pipeline
=
train_pipeline
,
modality
=
input_modality
,
test_mode
=
False
,
box_type_3d
=
'Camera'
),
val
=
dict
(
type
=
dataset_type
,
data_root
=
data_root
,
ann_file
=
data_root
+
'kitti_infos_val_mono3d.coco.json'
,
info_file
=
data_root
+
'kitti_infos_val.pkl'
,
img_prefix
=
data_root
,
classes
=
class_names
,
pipeline
=
test_pipeline
,
modality
=
input_modality
,
test_mode
=
True
,
box_type_3d
=
'Camera'
),
test
=
dict
(
type
=
dataset_type
,
data_root
=
data_root
,
ann_file
=
data_root
+
'kitti_infos_val_mono3d.coco.json'
,
info_file
=
data_root
+
'kitti_infos_val.pkl'
,
img_prefix
=
data_root
,
classes
=
class_names
,
pipeline
=
test_pipeline
,
modality
=
input_modality
,
test_mode
=
True
,
box_type_3d
=
'Camera'
))
evaluation
=
dict
(
interval
=
2
)
configs/_base_/models/smoke.py
0 → 100644
View file @
36f658a5
model
=
dict
(
type
=
'SMOKEMono3D'
,
backbone
=
dict
(
type
=
'DLANet'
,
depth
=
34
,
in_channels
=
3
,
norm_cfg
=
dict
(
type
=
'GN'
,
num_groups
=
32
),
init_cfg
=
dict
(
type
=
'Pretrained'
,
checkpoint
=
'http://dl.yf.io/dla/models/imagenet/dla34-ba72cf86.pth'
)),
neck
=
dict
(
type
=
'DLANeck'
,
in_channels
=
[
16
,
32
,
64
,
128
,
256
,
512
],
start_level
=
2
,
end_level
=
5
,
norm_cfg
=
dict
(
type
=
'GN'
,
num_groups
=
32
)),
bbox_head
=
dict
(
type
=
'SMOKEMono3DHead'
,
num_classes
=
3
,
in_channels
=
64
,
dim_channel
=
[
3
,
4
,
5
],
ori_channel
=
[
6
,
7
],
stacked_convs
=
0
,
feat_channels
=
64
,
use_direction_classifier
=
False
,
diff_rad_by_sin
=
False
,
pred_attrs
=
False
,
pred_velo
=
False
,
dir_offset
=
0
,
strides
=
None
,
group_reg_dims
=
(
8
,
),
cls_branch
=
(
256
,
),
reg_branch
=
((
256
,
),
),
num_attrs
=
0
,
bbox_code_size
=
7
,
dir_branch
=
(),
attr_branch
=
(),
bbox_coder
=
dict
(
type
=
'SMOKECoder'
,
base_depth
=
(
28.01
,
16.32
),
base_dims
=
((
0.88
,
1.73
,
0.67
),
(
1.78
,
1.70
,
0.58
),
(
3.88
,
1.63
,
1.53
)),
code_size
=
7
),
loss_cls
=
dict
(
type
=
'GaussianFocalLoss'
,
loss_weight
=
1.0
),
loss_bbox
=
dict
(
type
=
'L1Loss'
,
reduction
=
'sum'
,
loss_weight
=
1
/
300
),
loss_dir
=
dict
(
type
=
'CrossEntropyLoss'
,
use_sigmoid
=
False
,
loss_weight
=
1.0
),
loss_attr
=
None
,
conv_bias
=
True
,
dcn_on_last_conv
=
False
),
train_cfg
=
None
,
test_cfg
=
dict
(
topK
=
100
,
local_maximum_kernel
=
3
,
max_per_img
=
100
))
configs/smoke/smoke_dla34_pytorch_dlaneck_gn-all_8x4_6x_kitti-mono3d.py
0 → 100644
View file @
36f658a5
_base_
=
[
'../_base_/datasets/kitti-mono3d.py'
,
'../_base_/models/smoke.py'
,
'../_base_/default_runtime.py'
]
# optimizer
optimizer
=
dict
(
type
=
'Adam'
,
lr
=
2.5e-4
)
optimizer_config
=
dict
(
grad_clip
=
None
)
lr_config
=
dict
(
policy
=
'step'
,
warmup
=
None
,
step
=
[
50
])
# runtime settings
runner
=
dict
(
type
=
'EpochBasedRunner'
,
max_epochs
=
72
)
log_config
=
dict
(
interval
=
10
)
find_unused_parameters
=
True
class_names
=
[
'Pedestrian'
,
'Cyclist'
,
'Car'
]
img_norm_cfg
=
dict
(
mean
=
[
123.675
,
116.28
,
103.53
],
std
=
[
58.395
,
57.12
,
57.375
],
to_rgb
=
True
)
train_pipeline
=
[
dict
(
type
=
'LoadImageFromFileMono3D'
),
dict
(
type
=
'LoadAnnotations3D'
,
with_bbox
=
True
,
with_label
=
True
,
with_attr_label
=
False
,
with_bbox_3d
=
True
,
with_label_3d
=
True
,
with_bbox_depth
=
True
),
dict
(
type
=
'RandomFlip3D'
,
flip_ratio_bev_horizontal
=
0.5
),
dict
(
type
=
'RandomShiftScale'
,
shift_scale
=
(
0.2
,
0.4
),
aug_prob
=
0.3
),
dict
(
type
=
'AffineResize'
,
img_scale
=
(
1280
,
384
),
down_ratio
=
4
),
dict
(
type
=
'Normalize'
,
**
img_norm_cfg
),
dict
(
type
=
'Pad'
,
size_divisor
=
32
),
dict
(
type
=
'DefaultFormatBundle3D'
,
class_names
=
class_names
),
dict
(
type
=
'Collect3D'
,
keys
=
[
'img'
,
'gt_bboxes'
,
'gt_labels'
,
'gt_bboxes_3d'
,
'gt_labels_3d'
,
'centers2d'
,
'depths'
]),
]
test_pipeline
=
[
dict
(
type
=
'LoadImageFromFileMono3D'
),
dict
(
type
=
'MultiScaleFlipAug'
,
img_scale
=
(
1280
,
384
),
flip
=
False
,
transforms
=
[
dict
(
type
=
'AffineResize'
,
img_scale
=
(
1280
,
384
),
down_ratio
=
4
),
dict
(
type
=
'Normalize'
,
**
img_norm_cfg
),
dict
(
type
=
'Pad'
,
size_divisor
=
32
),
dict
(
type
=
'DefaultFormatBundle3D'
,
class_names
=
class_names
,
with_label
=
False
),
dict
(
type
=
'Collect3D'
,
keys
=
[
'img'
]),
])
]
data
=
dict
(
samples_per_gpu
=
8
,
workers_per_gpu
=
4
,
train
=
dict
(
pipeline
=
train_pipeline
),
val
=
dict
(
pipeline
=
test_pipeline
),
test
=
dict
(
pipeline
=
test_pipeline
))
mmdet3d/models/detectors/__init__.py
View file @
36f658a5
...
@@ -11,6 +11,7 @@ from .mvx_faster_rcnn import DynamicMVXFasterRCNN, MVXFasterRCNN
...
@@ -11,6 +11,7 @@ from .mvx_faster_rcnn import DynamicMVXFasterRCNN, MVXFasterRCNN
from
.mvx_two_stage
import
MVXTwoStageDetector
from
.mvx_two_stage
import
MVXTwoStageDetector
from
.parta2
import
PartA2
from
.parta2
import
PartA2
from
.single_stage_mono3d
import
SingleStageMono3DDetector
from
.single_stage_mono3d
import
SingleStageMono3DDetector
from
.smoke_mono3d
import
SMOKEMono3D
from
.ssd3dnet
import
SSD3DNet
from
.ssd3dnet
import
SSD3DNet
from
.votenet
import
VoteNet
from
.votenet
import
VoteNet
from
.voxelnet
import
VoxelNet
from
.voxelnet
import
VoxelNet
...
@@ -19,5 +20,5 @@ __all__ = [
...
@@ -19,5 +20,5 @@ __all__ = [
'Base3DDetector'
,
'VoxelNet'
,
'DynamicVoxelNet'
,
'MVXTwoStageDetector'
,
'Base3DDetector'
,
'VoxelNet'
,
'DynamicVoxelNet'
,
'MVXTwoStageDetector'
,
'DynamicMVXFasterRCNN'
,
'MVXFasterRCNN'
,
'PartA2'
,
'VoteNet'
,
'H3DNet'
,
'DynamicMVXFasterRCNN'
,
'MVXFasterRCNN'
,
'PartA2'
,
'VoteNet'
,
'H3DNet'
,
'CenterPoint'
,
'SSD3DNet'
,
'ImVoteNet'
,
'SingleStageMono3DDetector'
,
'CenterPoint'
,
'SSD3DNet'
,
'ImVoteNet'
,
'SingleStageMono3DDetector'
,
'FCOSMono3D'
,
'ImVoxelNet'
,
'GroupFree3DNet'
'FCOSMono3D'
,
'ImVoxelNet'
,
'GroupFree3DNet'
,
'SMOKEMono3D'
]
]
mmdet3d/models/detectors/smoke_mono3d.py
0 → 100644
View file @
36f658a5
from
mmdet.models.builder
import
DETECTORS
from
.single_stage_mono3d
import
SingleStageMono3DDetector
@
DETECTORS
.
register_module
()
class
SMOKEMono3D
(
SingleStageMono3DDetector
):
r
"""SMOKE <https://arxiv.org/abs/2002.10111>`_ for monocular 3D object
detection.
"""
def
__init__
(
self
,
backbone
,
neck
,
bbox_head
,
train_cfg
=
None
,
test_cfg
=
None
,
pretrained
=
None
):
super
(
SMOKEMono3D
,
self
).
__init__
(
backbone
,
neck
,
bbox_head
,
train_cfg
,
test_cfg
,
pretrained
)
tests/test_models/test_detectors.py
View file @
36f658a5
...
@@ -470,3 +470,71 @@ def test_imvoxelnet():
...
@@ -470,3 +470,71 @@ def test_imvoxelnet():
assert
boxes_3d
.
tensor
.
shape
[
1
]
==
7
assert
boxes_3d
.
tensor
.
shape
[
1
]
==
7
assert
scores_3d
.
shape
[
0
]
>=
0
assert
scores_3d
.
shape
[
0
]
>=
0
assert
labels_3d
.
shape
[
0
]
>=
0
assert
labels_3d
.
shape
[
0
]
>=
0
def
test_smoke
():
if
not
torch
.
cuda
.
is_available
():
pytest
.
skip
(
'test requires GPU and torch+cuda'
)
_setup_seed
(
0
)
smoke_cfg
=
_get_detector_cfg
(
'smoke/smoke_dla34_pytorch_dlaneck_gn-all_8x4_6x_kitti-mono3d.py'
)
self
=
build_detector
(
smoke_cfg
).
cuda
()
imgs
=
torch
.
rand
([
1
,
3
,
384
,
1280
],
dtype
=
torch
.
float32
).
cuda
()
gt_bboxes
=
[
torch
.
Tensor
([[
563.63122442
,
175.02195182
,
614.81298184
,
224.97763099
],
[
480.89676358
,
179.86272635
,
511.53017463
,
202.54645962
],
[
541.48322272
,
175.73767011
,
564.55208966
,
193.95009791
],
[
329.51448848
,
176.14566789
,
354.24670848
,
213.82599081
]]).
cuda
()
]
gt_bboxes_3d
=
[
CameraInstance3DBoxes
(
torch
.
Tensor
([[
-
0.69
,
1.69
,
25.01
,
3.20
,
1.61
,
1.66
,
-
1.59
],
[
-
7.43
,
1.88
,
47.55
,
3.70
,
1.40
,
1.51
,
1.55
],
[
-
4.71
,
1.71
,
60.52
,
4.05
,
1.46
,
1.66
,
1.56
],
[
-
12.63
,
1.88
,
34.09
,
1.95
,
1.72
,
0.50
,
1.54
]]).
cuda
(),
box_dim
=
7
)
]
gt_labels
=
[
torch
.
tensor
([
0
,
0
,
0
,
1
]).
cuda
()]
gt_labels_3d
=
gt_labels
centers2d
=
[
torch
.
Tensor
([[
589.6528477
,
198.3862263
],
[
496.8143155
,
190.75967182
],
[
553.40528354
,
184.53785991
],
[
342.23690317
,
194.44298819
]]).
cuda
()
]
# depths is actually not used in smoke head loss computation
depths
=
[
torch
.
rand
([
3
],
dtype
=
torch
.
float32
).
cuda
()]
attr_labels
=
None
img_metas
=
[
dict
(
cam_intrinsic
=
[[
721.5377
,
0.
,
609.5593
,
0.
],
[
0.
,
721.5377
,
172.854
,
0.
],
[
0.
,
0.
,
1.
,
0.
],
[
0.
,
0.
,
0.
,
1.
]],
scale_factor
=
np
.
array
([
1.
,
1.
,
1.
,
1.
],
dtype
=
np
.
float32
),
pad_shape
=
[
384
,
1280
],
trans_mat
=
np
.
array
([[
0.25
,
0.
,
0.
],
[
0.
,
0.25
,
0
],
[
0.
,
0.
,
1.
]],
dtype
=
np
.
float32
),
affine_aug
=
False
,
box_type_3d
=
CameraInstance3DBoxes
)
]
# test forward_train
losses
=
self
.
forward_train
(
imgs
,
img_metas
,
gt_bboxes
,
gt_labels
,
gt_bboxes_3d
,
gt_labels_3d
,
centers2d
,
depths
,
attr_labels
)
assert
losses
[
'loss_cls'
]
>=
0
assert
losses
[
'loss_bbox'
]
>=
0
# test simple_test
with
torch
.
no_grad
():
results
=
self
.
simple_test
(
imgs
,
img_metas
)
boxes_3d
=
results
[
0
][
'img_bbox'
][
'boxes_3d'
]
scores_3d
=
results
[
0
][
'img_bbox'
][
'scores_3d'
]
labels_3d
=
results
[
0
][
'img_bbox'
][
'labels_3d'
]
assert
boxes_3d
.
tensor
.
shape
[
0
]
>=
0
assert
boxes_3d
.
tensor
.
shape
[
1
]
==
7
assert
scores_3d
.
shape
[
0
]
>=
0
assert
labels_3d
.
shape
[
0
]
>=
0
Write
Preview
Markdown
is supported
0%
Try again
or
attach a new file
.
Attach a file
Cancel
You are about to add
0
people
to the discussion. Proceed with caution.
Finish editing this message first!
Cancel
Please
register
or
sign in
to comment