Skip to content
GitLab
Menu
Projects
Groups
Snippets
Loading...
Help
Help
Support
Community forum
Keyboard shortcuts
?
Submit feedback
Contribute to GitLab
Sign in / Register
Toggle navigation
Menu
Open sidebar
OpenDAS
mmdetection3d
Commits
b496f579
Commit
b496f579
authored
Jul 18, 2022
by
ZCMax
Committed by
ChaimZhu
Jul 20, 2022
Browse files
[Refactor] Refactor Mono3D models
parent
35667791
Changes
36
Expand all
Hide whitespace changes
Inline
Side-by-side
Showing
16 changed files
with
776 additions
and
679 deletions
+776
-679
mmdet3d/models/dense_heads/base_mono3d_dense_head.py
mmdet3d/models/dense_heads/base_mono3d_dense_head.py
+131
-44
mmdet3d/models/dense_heads/fcos_mono3d_head.py
mmdet3d/models/dense_heads/fcos_mono3d_head.py
+195
-176
mmdet3d/models/dense_heads/monoflex_head.py
mmdet3d/models/dense_heads/monoflex_head.py
+37
-72
mmdet3d/models/dense_heads/pgd_head.py
mmdet3d/models/dense_heads/pgd_head.py
+106
-84
mmdet3d/models/dense_heads/smoke_mono3d_head.py
mmdet3d/models/dense_heads/smoke_mono3d_head.py
+86
-63
mmdet3d/models/detectors/fcos_mono3d.py
mmdet3d/models/detectors/fcos_mono3d.py
+31
-8
mmdet3d/models/detectors/single_stage_mono3d.py
mmdet3d/models/detectors/single_stage_mono3d.py
+48
-202
mmdet3d/models/detectors/smoke_mono3d.py
mmdet3d/models/detectors/smoke_mono3d.py
+30
-8
tests/data/kitti/kitti_infos_train.pkl
tests/data/kitti/kitti_infos_train.pkl
+0
-0
tests/test_data/test_datasets/test_kitti_dataset.py
tests/test_data/test_datasets/test_kitti_dataset.py
+4
-0
tests/test_data/test_datasets/test_nuscenes_dataset.py
tests/test_data/test_datasets/test_nuscenes_dataset.py
+2
-2
tests/test_data/test_transforms/utils.py
tests/test_data/test_transforms/utils.py
+22
-0
tests/test_models/test_dense_heads/test_fcos_mono3d_head.py
tests/test_models/test_dense_heads/test_fcos_mono3d_head.py
+6
-5
tests/test_models/test_dense_heads/test_pgd_head.py
tests/test_models/test_dense_heads/test_pgd_head.py
+10
-8
tests/test_models/test_dense_heads/test_smoke_mono3d_head.py
tests/test_models/test_dense_heads/test_smoke_mono3d_head.py
+7
-5
tools/data_converter/update_infos_to_v2.py
tools/data_converter/update_infos_to_v2.py
+61
-2
No files found.
mmdet3d/models/dense_heads/base_mono3d_dense_head.py
View file @
b496f579
# Copyright (c) OpenMMLab. All rights reserved.
import
warnings
from
abc
import
ABCMeta
,
abstractmethod
from
typing
import
List
,
Optional
from
typing
import
Optional
,
Tuple
from
mmcv.runner
import
BaseModule
from
mmengine.config
import
ConfigDict
from
torch
import
Tensor
from
mmdet3d.core
import
Det3DData
Sample
from
mmdet3d.core
.utils
import
InstanceList
,
OptMultiConfig
,
Sample
List
class
BaseMono3DDenseHead
(
BaseModule
,
metaclass
=
ABCMeta
):
"""Base class for Monocular 3D DenseHeads.
"""
"""Base class for Monocular 3D DenseHeads.
def
__init__
(
self
,
init_cfg
:
Optional
[
dict
]
=
None
)
->
None
:
super
(
BaseMono3DDenseHead
,
self
).
__init__
(
init_cfg
=
init_cfg
)
1. The ``loss`` method is used to calculate the loss of densehead,
which includes two steps: (1) the densehead model performs forward
propagation to obtain the feature maps (2) The ``loss_by_feat`` method
is called based on the feature maps to calculate the loss.
@
abstractmethod
def
loss
(
self
,
**
kwargs
):
"""Compute losses of the head."""
pass
.. code:: text
def
get_bboxes
(
self
,
*
args
,
**
kwargs
):
warnings
.
warn
(
'`get_bboxes` is deprecated and will be removed in '
'the future. Please use `get_results` instead.'
)
return
self
.
get_results
(
*
args
,
**
kwargs
)
loss(): forward() -> loss_by_feat()
@
abstractmethod
def
get_results
(
self
,
*
args
,
**
kwargs
):
"""Transform network outputs of a batch into 3D bbox results."""
pass
2. The ``predict`` method is used to predict detection results,
which includes two steps: (1) the densehead model performs forward
propagation to obtain the feature maps (2) The ``predict_by_feat`` method
is called based on the feature maps to predict detection results including
post-processing.
.. code:: text
predict(): forward() -> predict_by_feat()
def
forward_train
(
self
,
x
:
List
[
Tensor
],
batch_data_samples
:
List
[
Det3DDataSample
],
proposal_cfg
:
Optional
[
ConfigDict
]
=
None
,
**
kwargs
):
3. The ``loss_and_predict`` method is used to return loss and detection
results at the same time. It will call densehead's ``forward``,
``loss_by_feat`` and ``predict_by_feat`` methods in order. If one-stage is
used as RPN, the densehead needs to return both losses and predictions.
This predictions is used as the proposal of roihead.
.. code:: text
loss_and_predict(): forward() -> loss_by_feat() -> predict_by_feat()
"""
def
__init__
(
self
,
init_cfg
:
OptMultiConfig
=
None
)
->
None
:
super
(
BaseMono3DDenseHead
,
self
).
__init__
(
init_cfg
=
init_cfg
)
def
loss
(
self
,
x
:
Tuple
[
Tensor
],
batch_data_samples
:
SampleList
,
**
kwargs
)
->
dict
:
"""
Args:
x (list[Tensor]): Features from FPN.
batch_data_samples (list[:obj:`Det3DDataSample`]): Each item
contains the meta information of each image and corresponding
annotations.
proposal_cfg (mmengine.Config, optional): Test / postprocessing
configuration, if None, test_cfg would be used.
Defaults to None.
Returns:
tuple or Tensor: When `proposal_cfg` is None, the detector is a
\
...
...
@@ -73,26 +81,105 @@ class BaseMono3DDenseHead(BaseModule, metaclass=ABCMeta):
outs
=
self
(
x
)
batch_gt_instances_3d
=
[]
batch_gt_instances
=
[]
batch_gt_instances_ignore
=
[]
batch_img_metas
=
[]
for
data_sample
in
batch_data_samples
:
batch_img_metas
.
append
(
data_sample
.
metainfo
)
batch_gt_instances_3d
.
append
(
data_sample
.
gt_instances_3d
)
batch_gt_instances
.
append
(
data_sample
.
gt_instances
)
batch_gt_instances_ignore
.
append
(
data_sample
.
get
(
'ignored_instances'
,
None
))
loss_inputs
=
outs
+
(
batch_gt_instances_3d
,
batch_gt_instances
,
batch_img_metas
,
batch_gt_instances_ignore
)
losses
=
self
.
loss_by_feat
(
*
loss_inputs
)
return
losses
@
abstractmethod
def
loss_by_feat
(
self
,
**
kwargs
)
->
dict
:
"""Calculate the loss based on the features extracted by the detection
head."""
pass
def
loss_and_predict
(
self
,
x
:
Tuple
[
Tensor
],
batch_data_samples
:
SampleList
,
proposal_cfg
:
Optional
[
ConfigDict
]
=
None
,
**
kwargs
)
->
Tuple
[
dict
,
InstanceList
]:
"""Perform forward propagation of the head, then calculate loss and
predictions from the features and data samples.
Args:
x (tuple[Tensor]): Features from FPN.
batch_data_samples (list[:obj:`Det3DDataSample`]): Each item
contains the meta information of each image and
corresponding annotations.
proposal_cfg (ConfigDict, optional): Test / postprocessing
configuration, if None, test_cfg would be used.
Defaults to None.
Returns:
tuple: the return value is a tuple contains:
- losses: (dict[str, Tensor]): A dictionary of loss components.
- predictions (list[:obj:`InstanceData`]): Detection
results of each image after the post process.
"""
batch_gt_instances_3d
=
[]
batch_gt_instances
=
[]
batch_gt_instances_ignore
=
[]
batch_img_metas
=
[]
for
data_sample
in
batch_data_samples
:
batch_img_metas
.
append
(
data_sample
.
metainfo
)
batch_gt_instances_3d
.
append
(
data_sample
.
gt_instances_3d
)
if
'ignored_instances'
in
data_sample
:
batch_gt_instances_ignore
.
append
(
data_sample
.
ignored_instances
)
else
:
batch_gt_instances_ignore
.
append
(
None
)
loss_inputs
=
outs
+
(
batch_gt_instances_3d
,
batch_img_metas
,
batch_gt_instances_ignore
)
losses
=
self
.
loss
(
*
loss_inputs
)
if
proposal_cfg
is
None
:
return
losses
else
:
batch_img_metas
=
[
data_sample
.
metainfo
for
data_sample
in
batch_data_samples
]
results_list
=
self
.
get_results
(
*
outs
,
batch_img_metas
=
batch_img_metas
,
cfg
=
proposal_cfg
)
return
losses
,
results_list
batch_gt_instances
.
append
(
data_sample
.
gt_instances
)
batch_gt_instances_ignore
.
append
(
data_sample
.
get
(
'ignored_instances'
,
None
))
outs
=
self
(
x
)
loss_inputs
=
outs
+
(
batch_gt_instances_3d
,
batch_gt_instances
,
batch_img_metas
,
batch_gt_instances_ignore
)
losses
=
self
.
loss_by_feat
(
*
loss_inputs
)
predictions
=
self
.
predict_by_feat
(
*
outs
,
batch_img_metas
=
batch_img_metas
,
cfg
=
proposal_cfg
)
return
losses
,
predictions
def
predict
(
self
,
x
:
Tuple
[
Tensor
],
batch_data_samples
:
SampleList
,
rescale
:
bool
=
False
)
->
InstanceList
:
"""Perform forward propagation of the detection head and predict
detection results on the features of the upstream network.
Args:
x (tuple[Tensor]): Multi-level features from the
upstream network, each is a 4D-tensor.
batch_data_samples (List[:obj:`Det3DDataSample`]): The Data
Samples. It usually includes information such as
`gt_instance_3d`, `gt_pts_panoptic_seg` and `gt_pts_sem_seg`.
rescale (bool, optional): Whether to rescale the results.
Defaults to False.
Returns:
list[obj:`InstanceData`]: Detection results of each image
after the post process.
"""
batch_img_metas
=
[
data_samples
.
metainfo
for
data_samples
in
batch_data_samples
]
outs
=
self
(
x
)
predictions
=
self
.
predict_by_feat
(
*
outs
,
batch_img_metas
=
batch_img_metas
,
rescale
=
rescale
)
return
predictions
@
abstractmethod
def
predict_by_feat
(
self
,
**
kwargs
)
->
InstanceList
:
"""Transform a batch of output features extracted from the head into
bbox results."""
pass
mmdet3d/models/dense_heads/fcos_mono3d_head.py
View file @
b496f579
This diff is collapsed.
Click to expand it.
mmdet3d/models/dense_heads/monoflex_head.py
View file @
b496f579
...
...
@@ -3,7 +3,6 @@ from typing import List, Optional, Tuple, Union
import
torch
from
mmcv.cnn
import
xavier_init
from
mmcv.runner
import
force_fp32
from
mmengine.config
import
ConfigDict
from
mmengine.data
import
InstanceData
from
torch
import
Tensor
...
...
@@ -197,39 +196,8 @@ class MonoFlexHead(AnchorFreeMono3DHead):
if
self
.
use_edge_fusion
:
self
.
_init_edge_module
()
def
forward_train
(
self
,
x
:
List
[
Tensor
],
batch_data_samples
:
List
[
Det3DDataSample
],
proposal_cfg
:
Optional
[
ConfigDict
]
=
None
,
**
kwargs
):
"""
Args:
x (list[Tensor]): Features from FPN.
batch_img_metas (list[dict]): Meta information of each image, e.g.,
image size, scaling factor, etc.
gt_bboxes (list[Tensor]): Ground truth bboxes of the image,
shape (num_gts, 4).
gt_labels (list[Tensor]): Ground truth labels of each box,
shape (num_gts,).
gt_bboxes_3d (list[Tensor]): 3D ground truth bboxes of the image,
shape (num_gts, self.bbox_code_size).
gt_labels_3d (list[Tensor]): 3D ground truth labels of each box,
shape (num_gts,).
centers_2d (list[Tensor]): Projected 3D center of each box,
shape (num_gts, 2).
depths (list[Tensor]): Depth of projected 3D center of each box,
shape (num_gts,).
attr_labels (list[Tensor]): Attribute labels of each box,
shape (num_gts,).
gt_bboxes_ignore (list[Tensor]): Ground truth bboxes to be
ignored, shape (num_ignored_gts, 4).
proposal_cfg (mmcv.Config): Test / postprocessing configuration,
if None, test_cfg would be used
Returns:
tuple:
losses: (dict[str, Tensor]): A dictionary of loss components.
proposal_list (list[Tensor]): Proposals of each image.
"""
def
loss
(
self
,
x
:
List
[
Tensor
],
batch_data_samples
:
List
[
Det3DDataSample
],
**
kwargs
):
"""
Args:
x (list[Tensor]): Features from FPN.
...
...
@@ -266,15 +234,15 @@ class MonoFlexHead(AnchorFreeMono3DHead):
"""
batch_gt_instances_3d
=
[]
batch_gt_instances
=
[]
batch_gt_instances_ignore
=
[]
batch_img_metas
=
[]
for
data_sample
in
batch_data_samples
:
batch_img_metas
.
append
(
data_sample
.
metainfo
)
batch_gt_instances_3d
.
append
(
data_sample
.
gt_instances_3d
)
if
'ignored_instances'
in
data_sample
:
batch_gt_instances_ignore
.
append
(
data_sample
.
ignored_instances
)
else
:
batch_gt_instances_ignore
.
append
(
None
)
batch_gt_instances
.
append
(
data_sample
.
gt_instances
)
batch_gt_instances_ignore
.
append
(
data_sample
.
get
(
'ignored_instances'
,
None
))
# monoflex head needs img_metas for feature extraction
outs
=
self
(
x
,
batch_img_metas
)
...
...
@@ -282,15 +250,7 @@ class MonoFlexHead(AnchorFreeMono3DHead):
batch_gt_instances_ignore
)
losses
=
self
.
loss
(
*
loss_inputs
)
if
proposal_cfg
is
None
:
return
losses
else
:
batch_img_metas
=
[
data_sample
.
metainfo
for
data_sample
in
batch_data_samples
]
results_list
=
self
.
get_results
(
*
outs
,
batch_img_metas
=
batch_img_metas
,
cfg
=
proposal_cfg
)
return
losses
,
results_list
return
losses
def
forward
(
self
,
feats
:
List
[
Tensor
],
batch_img_metas
:
List
[
dict
]):
"""Forward features from the upstream network.
...
...
@@ -373,9 +333,8 @@ class MonoFlexHead(AnchorFreeMono3DHead):
return
cls_score
,
bbox_pred
@
force_fp32
(
apply_to
=
(
'cls_scores'
,
'bbox_preds'
))
def
get_results
(
self
,
cls_scores
:
List
[
Tensor
],
bbox_preds
:
List
[
Tensor
],
batch_img_metas
:
List
[
dict
]):
def
predict_by_feat
(
self
,
cls_scores
:
List
[
Tensor
],
bbox_preds
:
List
[
Tensor
],
batch_img_metas
:
List
[
dict
]):
"""Generate bboxes from bbox head predictions.
Args:
...
...
@@ -393,7 +352,7 @@ class MonoFlexHead(AnchorFreeMono3DHead):
cls_scores
[
0
].
new_tensor
(
input_meta
[
'cam2img'
])
for
input_meta
in
batch_img_metas
])
batch_bboxes
,
batch_scores
,
batch_topk_labels
=
self
.
decode_heatmap
(
batch_bboxes
,
batch_scores
,
batch_topk_labels
=
self
.
_
decode_heatmap
(
cls_scores
[
0
],
bbox_preds
[
0
],
batch_img_metas
,
...
...
@@ -429,13 +388,13 @@ class MonoFlexHead(AnchorFreeMono3DHead):
return
result_list
def
decode_heatmap
(
self
,
cls_score
:
Tensor
,
reg_pred
:
Tensor
,
batch_img_metas
:
List
[
dict
],
cam2imgs
:
Tensor
,
topk
:
int
=
100
,
kernel
:
int
=
3
):
def
_
decode_heatmap
(
self
,
cls_score
:
Tensor
,
reg_pred
:
Tensor
,
batch_img_metas
:
List
[
dict
],
cam2imgs
:
Tensor
,
topk
:
int
=
100
,
kernel
:
int
=
3
):
"""Transform outputs into detections raw bbox predictions.
Args:
...
...
@@ -530,14 +489,16 @@ class MonoFlexHead(AnchorFreeMono3DHead):
return
preds
def
get_targets
(
self
,
batch_gt_instances_3d
:
List
[
InstanceData
],
batch_gt_instances
:
List
[
InstanceData
],
feat_shape
:
Tuple
[
int
],
batch_img_metas
:
List
[
dict
]):
"""Get training targets for batch images.
``
Args:
batch_gt_instances_3d (list[:obj:`InstanceData`]): Batch of
gt_instance_3d. It usually includes ``bboxes``、``labels``
、``bboxes_3d``、``labels_3d``、``depths``、``centers_2d`` and
attributes.
gt_instance_3d. It usually includes ``bboxes_3d``、
``labels_3d``、``depths``、``centers_2d`` and attributes.
batch_gt_instances (list[:obj:`InstanceData`]): Batch of
gt_instance. It usually includes ``bboxes``、``labels``.
feat_shape (tuple[int]): Feature map shape with value,
shape (B, _, H, W).
batch_img_metas (list[dict]): Meta information of each image, e.g.,
...
...
@@ -574,10 +535,10 @@ class MonoFlexHead(AnchorFreeMono3DHead):
"""
gt_bboxes_list
=
[
gt_instances
_3d
.
bboxes
for
gt_instances
_3d
in
batch_gt_instances
_3d
gt_instances
.
bboxes
for
gt_instances
in
batch_gt_instances
]
gt_labels_list
=
[
gt_instances
_3d
.
labels
for
gt_instances
_3d
in
batch_gt_instances
_3d
gt_instances
.
labels
for
gt_instances
in
batch_gt_instances
]
gt_bboxes_3d_list
=
[
gt_instances_3d
.
bboxes_3d
...
...
@@ -721,12 +682,14 @@ class MonoFlexHead(AnchorFreeMono3DHead):
return
center_heatmap_target
,
avg_factor
,
target_labels
def
loss
(
self
,
cls_scores
:
List
[
Tensor
],
bbox_preds
:
List
[
Tensor
],
batch_gt_instances_3d
:
List
[
InstanceData
],
batch_img_metas
:
List
[
dict
],
batch_gt_instances_ignore
:
Optional
[
List
[
InstanceData
]]
=
None
):
def
loss_by_feat
(
self
,
cls_scores
:
List
[
Tensor
],
bbox_preds
:
List
[
Tensor
],
batch_gt_instances_3d
:
List
[
InstanceData
],
batch_gt_instances
:
List
[
InstanceData
],
batch_img_metas
:
List
[
dict
],
batch_gt_instances_ignore
:
Optional
[
List
[
InstanceData
]]
=
None
):
"""Compute loss of the head.
Args:
...
...
@@ -736,9 +699,10 @@ class MonoFlexHead(AnchorFreeMono3DHead):
number is bbox_code_size.
shape (B, 7, H, W).
batch_gt_instances_3d (list[:obj:`InstanceData`]): Batch of
gt_instance_3d. It usually includes ``bboxes``、``labels``
、``bboxes_3d``、``labels_3d``、``depths``、``centers_2d`` and
attributes.
gt_instance_3d. It usually includes ``bboxes_3d``、
``labels_3d``、``depths``、``centers_2d`` and attributes.
batch_gt_instances (list[:obj:`InstanceData`]): Batch of
gt_instance. It usually includes ``bboxes``、``labels``.
batch_img_metas (list[dict]): Meta information of each image, e.g.,
image size, scaling factor, etc.
batch_gt_instances_ignore (list[:obj:`InstanceData`], Optional):
...
...
@@ -756,6 +720,7 @@ class MonoFlexHead(AnchorFreeMono3DHead):
center2d_heatmap_target
,
avg_factor
,
target_labels
=
\
self
.
get_targets
(
batch_gt_instances_3d
,
batch_gt_instances
,
center2d_heatmap
.
shape
,
batch_img_metas
)
...
...
mmdet3d/models/dense_heads/pgd_head.py
View file @
b496f579
# Copyright (c) OpenMMLab. All rights reserved.
from
typing
import
Tuple
from
typing
import
List
,
Optional
,
Tuple
import
numpy
as
np
import
torch
from
mmcv.cnn
import
Scale
,
bias_init_with_prob
,
normal_init
from
mmcv.runner
import
force_fp32
from
mmengine.data
import
InstanceData
from
torch
import
Tensor
from
torch
import
nn
as
nn
from
torch.nn
import
functional
as
F
from
mmdet3d.core
import
box3d_multiclass_nms
,
xywhr2xyxyr
from
mmdet3d.core.bbox
import
points_cam2img
,
points_img2cam
from
mmdet3d.core.utils
import
(
ConfigType
,
InstanceList
,
OptConfigType
,
OptInstanceList
)
from
mmdet3d.registry
import
MODELS
from
mmdet.core
import
distance2bbox
,
multi_apply
from
.fcos_mono3d_head
import
FCOSMono3DHead
...
...
@@ -86,7 +88,7 @@ class PGDHead(FCOSMono3DHead):
base_dims
=
((
0.8
,
1.73
,
0.6
),
(
1.76
,
1.73
,
0.6
),
(
3.9
,
1.56
,
1.6
)),
code_size
=
7
),
**
kwargs
):
**
kwargs
)
->
None
:
self
.
use_depth_classifier
=
use_depth_classifier
self
.
use_onlyreg_proj
=
use_onlyreg_proj
self
.
depth_branch
=
depth_branch
...
...
@@ -190,11 +192,11 @@ class PGDHead(FCOSMono3DHead):
for
conv_weight
in
self
.
conv_weights
:
normal_init
(
conv_weight
,
std
=
0.01
)
def
forward
(
self
,
feats
)
:
def
forward
(
self
,
x
:
Tuple
[
Tensor
])
->
Tuple
[
Tensor
,
...]
:
"""Forward features from the upstream network.
Args:
feats
(tuple[Tensor]): Features from the upstream network, each is
x
(tuple[Tensor]): Features from the upstream network, each is
a 4D-tensor.
Returns:
...
...
@@ -220,10 +222,10 @@ class PGDHead(FCOSMono3DHead):
centernesses (list[Tensor]): Centerness for each scale level,
each is a 4D-tensor, the channel number is num_points * 1.
"""
return
multi_apply
(
self
.
forward_single
,
feats
,
self
.
scales
,
self
.
strides
)
return
multi_apply
(
self
.
forward_single
,
x
,
self
.
scales
,
self
.
strides
)
def
forward_single
(
self
,
x
,
scale
,
stride
):
def
forward_single
(
self
,
x
:
Tensor
,
scale
:
Scale
,
stride
:
int
)
->
Tuple
[
Tensor
,
...]:
"""Forward features of a single scale level.
Args:
...
...
@@ -271,17 +273,17 @@ class PGDHead(FCOSMono3DHead):
attr_pred
,
centerness
def
get_proj_bbox2d
(
self
,
bbox_preds
,
pos_dir_cls_preds
,
labels_3d
,
bbox_targets_3d
,
pos_points
,
pos_inds
,
batch_img_metas
,
pos_depth_cls_preds
=
None
,
pos_weights
=
None
,
pos_cls_scores
=
None
,
with_kpts
=
False
)
:
bbox_preds
:
List
[
Tensor
]
,
pos_dir_cls_preds
:
List
[
Tensor
]
,
labels_3d
:
List
[
Tensor
]
,
bbox_targets_3d
:
List
[
Tensor
]
,
pos_points
:
Tensor
,
pos_inds
:
Tensor
,
batch_img_metas
:
List
[
dict
]
,
pos_depth_cls_preds
:
Optional
[
Tensor
]
=
None
,
pos_weights
:
Optional
[
Tensor
]
=
None
,
pos_cls_scores
:
Optional
[
Tensor
]
=
None
,
with_kpts
:
bool
=
False
)
->
Tuple
[
Tensor
]
:
"""Decode box predictions and get projected 2D attributes.
Args:
...
...
@@ -448,9 +450,12 @@ class PGDHead(FCOSMono3DHead):
return
outputs
def
get_pos_predictions
(
self
,
bbox_preds
,
dir_cls_preds
,
depth_cls_preds
,
weights
,
attr_preds
,
centernesses
,
pos_inds
,
batch_img_metas
):
def
get_pos_predictions
(
self
,
bbox_preds
:
List
[
Tensor
],
dir_cls_preds
:
List
[
Tensor
],
depth_cls_preds
:
List
[
Tensor
],
weights
:
List
[
Tensor
],
attr_preds
:
List
[
Tensor
],
centernesses
:
List
[
Tensor
],
pos_inds
:
Tensor
,
batch_img_metas
:
List
[
dict
])
->
Tuple
[
Tensor
]:
"""Flatten predictions and get positive ones.
Args:
...
...
@@ -528,20 +533,19 @@ class PGDHead(FCOSMono3DHead):
return
pos_bbox_preds
,
pos_dir_cls_preds
,
pos_depth_cls_preds
,
\
pos_weights
,
pos_attr_preds
,
pos_centerness
@
force_fp32
(
apply_to
=
(
'cls_scores'
,
'bbox_preds'
,
'dir_cls_preds'
,
'depth_cls_preds'
,
'weights'
,
'attr_preds'
,
'centernesses'
))
def
loss
(
self
,
cls_scores
,
bbox_preds
,
dir_cls_preds
,
depth_cls_preds
,
weights
,
attr_preds
,
centernesses
,
batch_gt_instances_3d
,
batch_img_metas
,
batch_gt_instances_ignore
=
None
):
def
loss_by_feat
(
self
,
cls_scores
:
List
[
Tensor
],
bbox_preds
:
List
[
Tensor
],
dir_cls_preds
:
List
[
Tensor
],
depth_cls_preds
:
List
[
Tensor
],
weights
:
List
[
Tensor
],
attr_preds
:
List
[
Tensor
],
centernesses
:
List
[
Tensor
],
batch_gt_instances_3d
:
InstanceList
,
batch_gt_instances
:
InstanceList
,
batch_img_metas
:
List
[
dict
],
batch_gt_instances_ignore
:
OptInstanceList
=
None
)
->
dict
:
"""Compute loss of the head.
Args:
...
...
@@ -591,7 +595,7 @@ class PGDHead(FCOSMono3DHead):
bbox_preds
[
0
].
device
)
labels_3d
,
bbox_targets_3d
,
centerness_targets
,
attr_targets
=
\
self
.
get_targets
(
all_level_points
,
batch_gt_instances_3d
)
all_level_points
,
batch_gt_instances_3d
,
batch_gt_instances
)
num_imgs
=
cls_scores
[
0
].
size
(
0
)
# flatten cls_scores and targets
...
...
@@ -785,20 +789,17 @@ class PGDHead(FCOSMono3DHead):
return
loss_dict
@
force_fp32
(
apply_to
=
(
'cls_scores'
,
'bbox_preds'
,
'dir_cls_preds'
,
'depth_cls_preds'
,
'weights'
,
'attr_preds'
,
'centernesses'
))
def
get_results
(
self
,
cls_scores
,
bbox_preds
,
dir_cls_preds
,
depth_cls_preds
,
weights
,
attr_preds
,
centernesses
,
batch_img_metas
,
cfg
=
None
,
rescale
=
None
):
def
predict_by_feat
(
self
,
cls_scores
:
List
[
Tensor
],
bbox_preds
:
List
[
Tensor
],
dir_cls_preds
:
List
[
Tensor
],
depth_cls_preds
:
List
[
Tensor
],
weights
:
List
[
Tensor
],
attr_preds
:
List
[
Tensor
],
centernesses
:
List
[
Tensor
],
batch_img_metas
:
Optional
[
List
[
dict
]]
=
None
,
cfg
:
OptConfigType
=
None
,
rescale
:
bool
=
False
)
->
InstanceList
:
"""Transform network output for a batch into bbox predictions.
Args:
...
...
@@ -824,7 +825,7 @@ class PGDHead(FCOSMono3DHead):
cfg (mmcv.Config, optional): Test / postprocessing configuration,
if None, test_cfg would be used. Defaults to None.
rescale (bool, optional): If True, return boxes in original image
space. Defaults to
Non
e.
space. Defaults to
Fals
e.
Returns:
list[tuple[Tensor]]: Each item in result_list is a tuple, which
...
...
@@ -898,25 +899,33 @@ class PGDHead(FCOSMono3DHead):
centernesses
[
i
][
img_id
].
detach
()
for
i
in
range
(
num_levels
)
]
img_meta
=
batch_img_metas
[
img_id
]
results
=
self
.
_get_results_single
(
cls_score_list
,
bbox_pred_list
,
dir_cls_pred_list
,
depth_cls_pred_list
,
weight_list
,
attr_pred_list
,
centerness_pred_list
,
mlvl_points
,
img_meta
,
cfg
,
rescale
)
results
=
self
.
_predict_by_feat_single
(
cls_score_list
=
cls_score_list
,
bbox_pred_list
=
bbox_pred_list
,
dir_cls_pred_list
=
dir_cls_pred_list
,
depth_cls_pred_list
=
depth_cls_pred_list
,
weight_list
=
weight_list
,
attr_pred_list
=
attr_pred_list
,
centerness_pred_list
=
centerness_pred_list
,
mlvl_points
=
mlvl_points
,
img_meta
=
img_meta
,
cfg
=
cfg
,
rescale
=
rescale
)
result_list
.
append
(
results
)
return
result_list
def
_
get_results
_single
(
self
,
cls_score
s
,
bbox_pred
s
,
dir_cls_pred
s
,
depth_cls_pred
s
,
weight
s
,
attr_pred
s
,
centerness
es
,
mlvl_points
,
img_meta
,
cfg
,
rescale
=
False
)
:
def
_
predict_by_feat
_single
(
self
,
cls_score
_list
:
List
[
Tensor
]
,
bbox_pred
_list
:
List
[
Tensor
]
,
dir_cls_pred
_list
:
List
[
Tensor
]
,
depth_cls_pred
_list
:
List
[
Tensor
]
,
weight
_list
:
List
[
Tensor
]
,
attr_pred
_list
:
List
[
Tensor
]
,
centerness
_pred_list
:
List
[
Tensor
]
,
mlvl_points
:
Tensor
,
img_meta
:
dict
,
cfg
:
ConfigType
,
rescale
:
bool
=
False
)
->
InstanceData
:
"""Transform outputs for a single batch item into bbox predictions.
Args:
...
...
@@ -951,7 +960,7 @@ class PGDHead(FCOSMono3DHead):
view
=
np
.
array
(
img_meta
[
'cam2img'
])
scale_factor
=
img_meta
[
'scale_factor'
]
cfg
=
self
.
test_cfg
if
cfg
is
None
else
cfg
assert
len
(
cls_score
s
)
==
len
(
bbox_pred
s
)
==
len
(
mlvl_points
)
assert
len
(
cls_score
_list
)
==
len
(
bbox_pred
_list
)
==
len
(
mlvl_points
)
mlvl_centers2d
=
[]
mlvl_bboxes
=
[]
mlvl_scores
=
[]
...
...
@@ -966,8 +975,9 @@ class PGDHead(FCOSMono3DHead):
for
cls_score
,
bbox_pred
,
dir_cls_pred
,
depth_cls_pred
,
weight
,
\
attr_pred
,
centerness
,
points
in
zip
(
cls_scores
,
bbox_preds
,
dir_cls_preds
,
depth_cls_preds
,
weights
,
attr_preds
,
centernesses
,
mlvl_points
):
cls_score_list
,
bbox_pred_list
,
dir_cls_pred_list
,
depth_cls_pred_list
,
weight_list
,
attr_pred_list
,
centerness_pred_list
,
mlvl_points
):
assert
cls_score
.
size
()[
-
2
:]
==
bbox_pred
.
size
()[
-
2
:]
scores
=
cls_score
.
permute
(
1
,
2
,
0
).
reshape
(
-
1
,
self
.
cls_out_channels
).
sigmoid
()
...
...
@@ -1018,9 +1028,9 @@ class PGDHead(FCOSMono3DHead):
bbox_pred3d
[:,
:
2
]
=
points
-
bbox_pred3d
[:,
:
2
]
if
rescale
:
bbox_pred3d
[:,
:
2
]
/=
bbox_pred3d
[:,
:
2
].
new_tensor
(
scale_factor
)
scale_factor
[
0
]
)
if
self
.
pred_bbox2d
:
bbox_pred2d
/=
bbox_pred2d
.
new_tensor
(
scale_factor
)
bbox_pred2d
/=
bbox_pred2d
.
new_tensor
(
scale_factor
[
0
]
)
if
self
.
use_depth_classifier
:
prob_depth_pred
=
self
.
bbox_coder
.
decode_prob_depth
(
depth_cls_pred
,
self
.
depth_range
,
self
.
depth_unit
,
...
...
@@ -1106,13 +1116,21 @@ class PGDHead(FCOSMono3DHead):
results
.
attr_labels
=
attrs
if
self
.
pred_bbox2d
:
results_2d
=
InstanceData
()
bboxes2d
=
nms_results
[
-
1
]
bboxes2d
=
torch
.
cat
([
bboxes2d
,
scores
[:,
None
]],
dim
=
1
)
results
.
bboxes
=
bboxes2d
return
results
def
get_targets
(
self
,
points
,
batch_gt_instances_3d
):
results_2d
.
bboxes
=
bboxes2d
results_2d
.
scores
=
scores
results_2d
.
labels
=
labels
return
results
,
results_2d
else
:
return
results
def
get_targets
(
self
,
points
:
List
[
Tensor
],
batch_gt_instances_3d
:
InstanceList
,
batch_gt_instances
:
InstanceList
,
)
->
Tuple
[
List
[
Tensor
],
List
[
Tensor
],
List
[
Tensor
],
List
[
Tensor
]]:
"""Compute regression, classification and centerss targets for points
in multiple images.
...
...
@@ -1120,9 +1138,10 @@ class PGDHead(FCOSMono3DHead):
points (list[Tensor]): Points of each fpn level, each has shape
(num_points, 2).
batch_gt_instances_3d (list[:obj:`InstanceData`]): Batch of
gt_instance_3d. It usually includes ``bboxes``、``labels``
、``bboxes_3d``、``labels_3d``、``depths``、``centers_2d`` and
attributes.
gt_instance_3d. It usually includes ``bboxes_3d``、
``labels_3d``、``depths``、``centers_2d`` and attributes.
batch_gt_instances (list[:obj:`InstanceData`]): Batch of
gt_instance. It usually includes ``bboxes``、``labels``.
Returns:
tuple:
...
...
@@ -1146,14 +1165,17 @@ class PGDHead(FCOSMono3DHead):
if
'attr_labels'
not
in
batch_gt_instances_3d
[
0
]:
for
gt_instances_3d
in
batch_gt_instances_3d
:
gt_instances_3d
.
attr_labels
=
gt_instances_3d
.
labels
.
new_full
(
gt_instances_3d
.
labels
.
shape
,
self
.
attr_background_label
)
gt_instances_3d
.
attr_labels
=
\
gt_instances_3d
.
labels_3d
.
new_full
(
gt_instances_3d
.
labels_3d
.
shape
,
self
.
attr_background_label
)
# get labels and bbox_targets of each image
_
,
bbox_targets_list
,
labels_3d_list
,
bbox_targets_3d_list
,
\
centerness_targets_list
,
attr_targets_list
=
multi_apply
(
self
.
_get_target_single
,
batch_gt_instances_3d
,
batch_gt_instances
,
points
=
concat_points
,
regress_ranges
=
concat_regress_ranges
,
num_points_per_lvl
=
num_points
)
...
...
mmdet3d/models/dense_heads/smoke_mono3d_head.py
View file @
b496f579
# Copyright (c) OpenMMLab. All rights reserved.
from
typing
import
List
,
Optional
,
Tuple
,
Union
from
typing
import
List
,
Optional
,
Tuple
import
torch
from
mmcv.runner
import
force_fp32
from
mmengine.config
import
ConfigDict
from
mmengine.data
import
InstanceData
from
torch
import
Tensor
from
torch.nn
import
functional
as
F
from
mmdet3d.core.utils
import
(
ConfigType
,
InstanceList
,
OptConfigType
,
OptInstanceList
,
OptMultiConfig
)
from
mmdet3d.registry
import
MODELS
,
TASK_UTILS
from
mmdet.core
import
multi_apply
from
mmdet.models.utils
import
gaussian_radius
,
gen_gaussian_target
...
...
@@ -35,19 +35,20 @@ class SMOKEMono3DHead(AnchorFreeMono3DHead):
regression heatmap channels.
ori_channel (list[int]): indices of orientation offset pred in
regression heatmap channels.
bbox_coder (dict): Bbox coder for encoding and decoding boxes.
loss_cls (dict, optional): Config of classification loss.
bbox_coder (:obj:`ConfigDict` or dict): Bbox coder for encoding
and decoding boxes.
loss_cls (:obj:`ConfigDict` or dict): Config of classification loss.
Default: loss_cls=dict(type='GaussionFocalLoss', loss_weight=1.0).
loss_bbox (
d
ict
,
o
ptional
): Config of localization loss.
loss_bbox (
:obj:`ConfigD
ict
`
o
r dict
): Config of localization loss.
Default: loss_bbox=dict(type='L1Loss', loss_weight=10.0).
loss_dir (dict, optional): Config of direction classification loss.
In SMOKE, Default: None.
loss_attr (dict, optional): Config of attribute classification loss.
In SMOKE, Default: None.
loss_centerness (dict): Config of centerness loss.
norm_cfg (dict): Dictionary to construct and config norm layer.
loss_dir (:obj:`ConfigDict` or dict, Optional): Config of direction
classification loss. In SMOKE, Default: None.
loss_attr (:obj:`ConfigDict` or dict, Optional): Config of attribute
classification loss. In SMOKE, Default: None.
norm_cfg (:obj:`ConfigDict` or dict): Dictionary to construct and config norm layer.
Default: norm_cfg=dict(type='GN', num_groups=32, requires_grad=True).
init_cfg (dict): Initialization config dict. Default: None.
init_cfg (:obj:`ConfigDict` or dict or list[:obj:`ConfigDict` or \
dict]): Initialization config dict. Defaults to None.
"""
# noqa: E501
def
__init__
(
self
,
...
...
@@ -55,15 +56,16 @@ class SMOKEMono3DHead(AnchorFreeMono3DHead):
in_channels
:
int
,
dim_channel
:
List
[
int
],
ori_channel
:
List
[
int
],
bbox_coder
:
dict
,
loss_cls
:
dict
=
dict
(
type
=
'GaussionFocalLoss'
,
loss_weight
=
1.0
),
loss_bbox
:
dict
=
dict
(
type
=
'L1Loss'
,
loss_weight
=
0.1
),
loss_dir
:
Optional
[
dict
]
=
None
,
loss_attr
:
Optional
[
dict
]
=
None
,
norm_cfg
:
dict
=
dict
(
bbox_coder
:
ConfigType
,
loss_cls
:
ConfigType
=
dict
(
type
=
'mmdet.GaussionFocalLoss'
,
loss_weight
=
1.0
),
loss_bbox
:
ConfigType
=
dict
(
type
=
'mmdet.L1Loss'
,
loss_weight
=
0.1
),
loss_dir
:
OptConfigType
=
None
,
loss_attr
:
OptConfigType
=
None
,
norm_cfg
:
OptConfigType
=
dict
(
type
=
'GN'
,
num_groups
=
32
,
requires_grad
=
True
),
init_cfg
:
Opt
ional
[
Union
[
ConfigDict
,
dict
]]
=
None
,
init_cfg
:
Opt
MultiConfig
=
None
,
**
kwargs
)
->
None
:
super
().
__init__
(
num_classes
,
...
...
@@ -79,11 +81,11 @@ class SMOKEMono3DHead(AnchorFreeMono3DHead):
self
.
ori_channel
=
ori_channel
self
.
bbox_coder
=
TASK_UTILS
.
build
(
bbox_coder
)
def
forward
(
self
,
feats
:
Tuple
[
Tensor
]
)
:
def
forward
(
self
,
x
:
Tuple
[
Tensor
])
->
Tuple
[
List
[
Tensor
]
]
:
"""Forward features from the upstream network.
Args:
feats
(tuple[Tensor]): Features from the upstream network, each is
x
(tuple[Tensor]): Features from the upstream network, each is
a 4D-tensor.
Returns:
...
...
@@ -95,9 +97,9 @@ class SMOKEMono3DHead(AnchorFreeMono3DHead):
level, each is a 4D-tensor, the channel number is
num_points * bbox_code_size.
"""
return
multi_apply
(
self
.
forward_single
,
feats
)
return
multi_apply
(
self
.
forward_single
,
x
)
def
forward_single
(
self
,
x
:
Tensor
)
->
Union
[
Tensor
,
Tensor
]:
def
forward_single
(
self
,
x
:
Tensor
)
->
Tuple
[
Tensor
,
Tensor
]:
"""Forward features of a single scale level.
Args:
...
...
@@ -118,12 +120,11 @@ class SMOKEMono3DHead(AnchorFreeMono3DHead):
bbox_pred
[:,
self
.
ori_channel
,
...]
=
F
.
normalize
(
vector_ori
)
return
cls_score
,
bbox_pred
@
force_fp32
(
apply_to
=
(
'cls_scores'
,
'bbox_preds'
))
def
get_results
(
self
,
cls_scores
,
bbox_preds
,
batch_img_metas
,
rescale
=
None
):
def
predict_by_feat
(
self
,
cls_scores
:
List
[
Tensor
],
bbox_preds
:
List
[
Tensor
],
batch_img_metas
:
Optional
[
List
[
dict
]]
=
None
,
rescale
:
bool
=
None
)
->
InstanceList
:
"""Generate bboxes from bbox head predictions.
Args:
...
...
@@ -134,8 +135,16 @@ class SMOKEMono3DHead(AnchorFreeMono3DHead):
rescale (bool): If True, return boxes in original image space.
Returns:
list[tuple[:obj:`CameraInstance3DBoxes`, Tensor, Tensor, None]]:
Each item in result_list is 4-tuple.
list[:obj:`InstanceData`]: 3D Detection results of each image
after the post process.
Each item usually contains following keys.
- scores_3d (Tensor): Classification scores, has a shape
(num_instance, )
- labels_3d (Tensor): Labels of bboxes, has a shape
(num_instances, ).
- bboxes_3d (Tensor): Contains a tensor with shape
(num_instances, 7).
"""
assert
len
(
cls_scores
)
==
len
(
bbox_preds
)
==
1
cam2imgs
=
torch
.
stack
([
...
...
@@ -146,7 +155,7 @@ class SMOKEMono3DHead(AnchorFreeMono3DHead):
cls_scores
[
0
].
new_tensor
(
img_meta
[
'trans_mat'
])
for
img_meta
in
batch_img_metas
])
batch_bboxes
,
batch_scores
,
batch_topk_labels
=
self
.
decode_heatmap
(
batch_bboxes
,
batch_scores
,
batch_topk_labels
=
self
.
_
decode_heatmap
(
cls_scores
[
0
],
bbox_preds
[
0
],
batch_img_metas
,
...
...
@@ -183,14 +192,14 @@ class SMOKEMono3DHead(AnchorFreeMono3DHead):
return
result_list
def
decode_heatmap
(
self
,
cls_score
,
reg_pred
,
batch_img_metas
,
cam2imgs
,
trans_mats
,
topk
=
100
,
kernel
=
3
)
:
def
_
decode_heatmap
(
self
,
cls_score
:
Tensor
,
reg_pred
:
Tensor
,
batch_img_metas
:
List
[
dict
]
,
cam2imgs
:
Tensor
,
trans_mats
:
Tensor
,
topk
:
int
=
100
,
kernel
:
int
=
3
)
->
Tuple
[
Tensor
,
Tensor
,
Tensor
]
:
"""Transform outputs into detections raw bbox predictions.
Args:
...
...
@@ -212,6 +221,7 @@ class SMOKEMono3DHead(AnchorFreeMono3DHead):
Returns:
tuple[torch.Tensor]: Decoded output of SMOKEHead, containing
the following Tensors:
- batch_bboxes (Tensor): Coords of each 3D box.
shape (B, k, 7)
- batch_scores (Tensor): Scores of each 3D box.
...
...
@@ -241,9 +251,10 @@ class SMOKEMono3DHead(AnchorFreeMono3DHead):
batch_bboxes
=
batch_bboxes
.
view
(
bs
,
-
1
,
self
.
bbox_code_size
)
return
batch_bboxes
,
batch_scores
,
batch_topk_labels
def
get_predictions
(
self
,
labels_3d
,
centers_2d
,
gt_locations
,
gt_dimensions
,
gt_orientations
,
indices
,
batch_img_metas
,
pred_reg
):
def
get_predictions
(
self
,
labels_3d
:
Tensor
,
centers_2d
:
Tensor
,
gt_locations
:
Tensor
,
gt_dimensions
:
Tensor
,
gt_orientations
:
Tensor
,
indices
:
Tensor
,
batch_img_metas
:
List
[
dict
],
pred_reg
:
Tensor
)
->
dict
:
"""Prepare predictions for computing loss.
Args:
...
...
@@ -266,6 +277,7 @@ class SMOKEMono3DHead(AnchorFreeMono3DHead):
Returns:
dict: the dict has components below:
- bbox3d_yaws (:obj:`CameraInstance3DBoxes`):
bbox calculated using pred orientations.
- bbox3d_dims (:obj:`CameraInstance3DBoxes`):
...
...
@@ -312,22 +324,26 @@ class SMOKEMono3DHead(AnchorFreeMono3DHead):
return
pred_bboxes
def
get_targets
(
self
,
batch_gt_instances_3d
,
feat_shape
,
batch_img_metas
):
def
get_targets
(
self
,
batch_gt_instances_3d
:
InstanceList
,
batch_gt_instances
:
InstanceList
,
feat_shape
:
Tuple
[
int
],
batch_img_metas
:
List
[
dict
])
->
Tuple
[
Tensor
,
int
,
dict
]:
"""Get training targets for batch images.
Args:
batch_gt_instances_3d (list[:obj:`InstanceData`]): Batch of
gt_instance_3d. It usually includes ``bboxes``、``labels``
、``bboxes_3d``、``labels_3d``、``depths``、``centers_2d`` and
attributes.
gt_instance_3d. It usually includes ``bboxes_3d``、
``labels_3d``、``depths``、``centers_2d`` and attributes.
batch_gt_instances (list[:obj:`InstanceData`]): Batch of
gt_instance. It usually includes ``bboxes``、``labels``.
feat_shape (tuple[int]): Feature map shape with value,
shape (B, _, H, W).
batch_img_metas (list[dict]): Meta information of each image, e.g.,
image size, scaling factor, etc.
Returns:
tuple[Tensor, dict]: The Tensor value is the targets of
tuple[Tensor,
int,
dict]: The Tensor value is the targets of
center heatmap, the dict has components below:
- gt_centers_2d (Tensor): Coords of each projected 3D box
center on image. shape (B * max_objs, 2)
- gt_labels_3d (Tensor): Labels of each 3D box.
...
...
@@ -347,10 +363,10 @@ class SMOKEMono3DHead(AnchorFreeMono3DHead):
"""
gt_bboxes
=
[
gt_instances
_3d
.
bboxes
for
gt_instances
_3d
in
batch_gt_instances
_3d
gt_instances
.
bboxes
for
gt_instances
in
batch_gt_instances
]
gt_labels
=
[
gt_instances
_3d
.
labels
for
gt_instances
_3d
in
batch_gt_instances
_3d
gt_instances
.
labels
for
gt_instances
in
batch_gt_instances
]
gt_bboxes_3d
=
[
gt_instances_3d
.
bboxes_3d
...
...
@@ -459,12 +475,14 @@ class SMOKEMono3DHead(AnchorFreeMono3DHead):
return
center_heatmap_target
,
avg_factor
,
target_labels
def
loss
(
self
,
cls_scores
,
bbox_preds
,
batch_gt_instances_3d
,
batch_img_metas
,
batch_gt_instances_ignore
=
None
):
def
loss_by_feat
(
self
,
cls_scores
:
List
[
Tensor
],
bbox_preds
:
List
[
Tensor
],
batch_gt_instances_3d
:
InstanceList
,
batch_gt_instances
:
InstanceList
,
batch_img_metas
:
List
[
dict
],
batch_gt_instances_ignore
:
OptInstanceList
=
None
)
->
dict
:
"""Compute loss of the head.
Args:
...
...
@@ -474,9 +492,10 @@ class SMOKEMono3DHead(AnchorFreeMono3DHead):
number is bbox_code_size.
shape (B, 7, H, W).
batch_gt_instances_3d (list[:obj:`InstanceData`]): Batch of
gt_instance_3d. It usually includes ``bboxes``、``labels``
、``bboxes_3d``、``labels_3d``、``depths``、``centers_2d`` and
attributes.
gt_instance_3d. It usually includes ``bboxes_3d``、
``labels_3d``、``depths``、``centers_2d`` and attributes.
batch_gt_instances (list[:obj:`InstanceData`]): Batch of
gt_instance. It usually includes ``bboxes``、``labels``.
batch_img_metas (list[dict]): Meta information of each image, e.g.,
image size, scaling factor, etc.
batch_gt_instances_ignore (list[:obj:`InstanceData`], Optional):
...
...
@@ -485,15 +504,19 @@ class SMOKEMono3DHead(AnchorFreeMono3DHead):
Defaults to None.
Returns:
dict[str, Tensor]: A dictionary of loss components.
dict[str, Tensor]: A dictionary of loss components, which has
components below:
- loss_cls (Tensor): loss of cls heatmap.
- loss_bbox (Tensor): loss of bbox heatmap.
"""
assert
len
(
cls_scores
)
==
len
(
bbox_preds
)
==
1
assert
batch_gt_instances_ignore
is
None
center_2d_heatmap
=
cls_scores
[
0
]
pred_reg
=
bbox_preds
[
0
]
center_2d_heatmap_target
,
avg_factor
,
target_labels
=
\
self
.
get_targets
(
batch_gt_instances_3d
,
batch_gt_instances
,
center_2d_heatmap
.
shape
,
batch_img_metas
)
...
...
mmdet3d/models/detectors/fcos_mono3d.py
View file @
b496f579
# Copyright (c) OpenMMLab. All rights reserved.
from
mmdet3d.core
import
ConfigType
,
OptConfigType
,
OptMultiConfig
from
mmdet3d.registry
import
MODELS
from
.single_stage_mono3d
import
SingleStageMono3DDetector
...
...
@@ -9,14 +10,36 @@ class FCOSMono3D(SingleStageMono3DDetector):
Currently please refer to our entry on the
`leaderboard <https://www.nuscenes.org/object-detection?externalData=all&mapData=all&modalities=Camera>`_.
Args:
backbone (:obj:`ConfigDict` or dict): The backbone config.
neck (:obj:`ConfigDict` or dict): The neck config.
bbox_head (:obj:`ConfigDict` or dict): The bbox head config.
train_cfg (:obj:`ConfigDict` or dict, optional): The training config
of FCOS. Defaults to None.
test_cfg (:obj:`ConfigDict` or dict, optional): The testing config
of FCOS. Defaults to None.
data_preprocessor (:obj:`ConfigDict` or dict, optional): Config of
:class:`DetDataPreprocessor` to process the input data.
Defaults to None.
init_cfg (:obj:`ConfigDict` or list[:obj:`ConfigDict`] or dict or
list[dict], optional): Initialization config dict.
Defaults to None.
"""
# noqa: E501
def
__init__
(
self
,
backbone
,
neck
,
bbox_head
,
train_cfg
=
None
,
test_cfg
=
None
,
pretrained
=
None
):
super
(
FCOSMono3D
,
self
).
__init__
(
backbone
,
neck
,
bbox_head
,
train_cfg
,
test_cfg
,
pretrained
)
backbone
:
ConfigType
,
neck
:
ConfigType
,
bbox_head
:
ConfigType
,
train_cfg
:
OptConfigType
=
None
,
test_cfg
:
OptConfigType
=
None
,
data_preprocessor
:
OptConfigType
=
None
,
init_cfg
:
OptMultiConfig
=
None
)
->
None
:
super
().
__init__
(
backbone
=
backbone
,
neck
=
neck
,
bbox_head
=
bbox_head
,
train_cfg
=
train_cfg
,
test_cfg
=
test_cfg
,
data_preprocessor
=
data_preprocessor
,
init_cfg
=
init_cfg
)
mmdet3d/models/detectors/single_stage_mono3d.py
View file @
b496f579
# Copyright (c) OpenMMLab. All rights reserved.
from
os
import
path
as
osp
from
typing
import
Tuple
import
mmcv
import
numpy
as
np
import
torch
from
mmcv.parallel
import
DataContainer
as
DC
from
torch
import
Tensor
from
mmdet3d.core
import
(
CameraInstance3DBoxes
,
bbox3d2result
,
show_multi_modality_result
)
from
mmdet3d.core
import
Det3DDataSample
,
InstanceList
from
mmdet3d.core.utils
import
SampleList
from
mmdet3d.registry
import
MODELS
from
mmdet.models.detectors.single_stage
import
SingleStageDetector
...
...
@@ -16,212 +13,61 @@ from mmdet.models.detectors.single_stage import SingleStageDetector
class
SingleStageMono3DDetector
(
SingleStageDetector
):
"""Base class for monocular 3D single-stage detectors.
S
ingle-stage detectors directly and densely predict bounding
boxes on the
output features of the backbone+neck.
Monocular 3D s
ingle-stage detectors directly and densely predict bounding
boxes on the
output features of the backbone+neck.
"""
def
extract_feats
(
self
,
imgs
):
"""Directly extract features from the backbone+neck."""
assert
isinstance
(
imgs
,
list
)
return
[
self
.
extract_feat
(
img
)
for
img
in
imgs
]
def
forward_train
(
self
,
img
,
img_metas
,
gt_bboxes
,
gt_labels
,
gt_bboxes_3d
,
gt_labels_3d
,
centers2d
,
depths
,
attr_labels
=
None
,
gt_bboxes_ignore
=
None
):
"""
def
convert_to_datasample
(
self
,
results_list
:
InstanceList
)
->
SampleList
:
""" Convert results list to `Det3DDataSample`.
Args:
img (Tensor): Input images of shape (N, C, H, W).
Typically these should be mean centered and std scaled.
img_metas (list[dict]): A List of image info dict where each dict
has: 'img_shape', 'scale_factor', 'flip', and may also contain
'filename', 'ori_shape', 'pad_shape', and 'img_norm_cfg'.
For details on the values of these keys see
:class:`mmdet.datasets.pipelines.Collect`.
gt_bboxes (list[Tensor]): Each item are the truth boxes for each
image in [tl_x, tl_y, br_x, br_y] format.
gt_labels (list[Tensor]): Class indices corresponding to each box
gt_bboxes_3d (list[Tensor]): Each item are the 3D truth boxes for
each image in [x, y, z, x_size, y_size, z_size, yaw, vx, vy]
format.
gt_labels_3d (list[Tensor]): 3D class indices corresponding to
each box.
centers2d (list[Tensor]): Projected 3D centers onto 2D images.
depths (list[Tensor]): Depth of projected centers on 2D images.
attr_labels (list[Tensor], optional): Attribute indices
corresponding to each box
gt_bboxes_ignore (list[Tensor]): Specify which bounding
boxes can be ignored when computing the loss.
results_list (list[:obj:`InstanceData`]):Detection results
of each image. For each image, it could contains two results
format:
1. pred_instances_3d
2. (pred_instances_3d, pred_instances)
Returns:
dict[str, Tensor]: A dictionary of loss components.
"""
x
=
self
.
extract_feat
(
img
)
losses
=
self
.
bbox_head
.
forward_train
(
x
,
img_metas
,
gt_bboxes
,
gt_labels
,
gt_bboxes_3d
,
gt_labels_3d
,
centers2d
,
depths
,
attr_labels
,
gt_bboxes_ignore
)
return
losses
list[:obj:`Det3DDataSample`]: 3D Detection results of the
input images. Each Det3DDataSample usually contain
'pred_instances_3d'. And the ``pred_instances_3d`` usually
contains following keys.
- scores_3d (Tensor): Classification scores, has a shape
(num_instance, )
- labels_3d (Tensor): Labels of bboxes, has a shape
(num_instances, ).
- bboxes_3d (Tensor): Contains a tensor with shape
(num_instances, C) where C >=7.
"""
out_results_list
=
[]
for
i
in
range
(
len
(
results_list
)):
result
=
Det3DDataSample
()
if
len
(
results_list
[
i
])
==
2
:
result
.
pred_instances_3d
=
results_list
[
i
][
0
]
result
.
pred_instances
=
results_list
[
i
][
1
]
else
:
result
.
pred_instances_3d
=
results_list
[
i
]
out_results_list
.
append
(
result
)
return
out_results_list
def
simple_test
(
self
,
img
,
img_metas
,
rescale
=
False
)
:
"""
Test function without test time augmentation
.
def
extract_feat
(
self
,
batch_inputs_dict
:
dict
)
->
Tuple
[
Tensor
]
:
"""
Extract features
.
Args:
imgs (list[torch.Tensor]): List of multiple images
img_metas (list[dict]): List of image information.
rescale (bool, optional): Whether to rescale the results.
Defaults to False.
batch_inputs_dict (dict): Contains 'img' key
with image tensor with shape (N, C, H ,W).
Returns:
list[list[np.ndarray]]: BBox results of each image and classes.
The outer list corresponds to each image. The inner list
corresponds to each class.
tuple[Tensor]: Multi-level features that may have
different resolutions.
"""
x
=
self
.
extract_feat
(
img
)
outs
=
self
.
bbox_head
(
x
)
bbox_outputs
=
self
.
bbox_head
.
get_bboxes
(
*
outs
,
img_metas
,
rescale
=
rescale
)
if
self
.
bbox_head
.
pred_bbox2d
:
from
mmdet.core
import
bbox2result
bbox2d_img
=
[
bbox2result
(
bboxes2d
,
labels
,
self
.
bbox_head
.
num_classes
)
for
bboxes
,
scores
,
labels
,
attrs
,
bboxes2d
in
bbox_outputs
]
bbox_outputs
=
[
bbox_outputs
[
0
][:
-
1
]]
bbox_img
=
[
bbox3d2result
(
bboxes
,
scores
,
labels
,
attrs
)
for
bboxes
,
scores
,
labels
,
attrs
in
bbox_outputs
]
bbox_list
=
[
dict
()
for
i
in
range
(
len
(
img_metas
))]
for
result_dict
,
img_bbox
in
zip
(
bbox_list
,
bbox_img
):
result_dict
[
'img_bbox'
]
=
img_bbox
if
self
.
bbox_head
.
pred_bbox2d
:
for
result_dict
,
img_bbox2d
in
zip
(
bbox_list
,
bbox2d_img
):
result_dict
[
'img_bbox2d'
]
=
img_bbox2d
return
bbox_list
batch_imgs
=
batch_inputs_dict
[
'imgs'
]
x
=
self
.
backbone
(
batch_imgs
)
if
self
.
with_neck
:
x
=
self
.
neck
(
x
)
return
x
# TODO: Support test time augmentation
def
aug_test
(
self
,
imgs
,
img_metas
,
rescale
=
False
):
"""Test function with test time augmentation."""
feats
=
self
.
extract_feats
(
imgs
)
# only support aug_test for one sample
outs_list
=
[
self
.
bbox_head
(
x
)
for
x
in
feats
]
for
i
,
img_meta
in
enumerate
(
img_metas
):
if
img_meta
[
0
][
'pcd_horizontal_flip'
]:
for
j
in
range
(
len
(
outs_list
[
i
])):
# for each prediction
if
outs_list
[
i
][
j
][
0
]
is
None
:
continue
for
k
in
range
(
len
(
outs_list
[
i
][
j
])):
# every stride of featmap
outs_list
[
i
][
j
][
k
]
=
torch
.
flip
(
outs_list
[
i
][
j
][
k
],
dims
=
[
3
])
reg
=
outs_list
[
i
][
1
]
for
reg_feat
in
reg
:
# offset_x
reg_feat
[:,
0
,
:,
:]
=
1
-
reg_feat
[:,
0
,
:,
:]
# velo_x
if
self
.
bbox_head
.
pred_velo
:
reg_feat
[:,
7
,
:,
:]
=
-
reg_feat
[:,
7
,
:,
:]
# rotation
reg_feat
[:,
6
,
:,
:]
=
-
reg_feat
[:,
6
,
:,
:]
+
np
.
pi
merged_outs
=
[]
for
i
in
range
(
len
(
outs_list
[
0
])):
# for each prediction
merged_feats
=
[]
for
j
in
range
(
len
(
outs_list
[
0
][
i
])):
if
outs_list
[
0
][
i
][
0
]
is
None
:
merged_feats
.
append
(
None
)
continue
# for each stride of featmap
avg_feats
=
torch
.
mean
(
torch
.
cat
([
x
[
i
][
j
]
for
x
in
outs_list
]),
dim
=
0
,
keepdim
=
True
)
if
i
==
1
:
# regression predictions
# rot/velo/2d det keeps the original
avg_feats
[:,
6
:,
:,
:]
=
\
outs_list
[
0
][
i
][
j
][:,
6
:,
:,
:]
if
i
==
2
:
# dir_cls keeps the original
avg_feats
=
outs_list
[
0
][
i
][
j
]
merged_feats
.
append
(
avg_feats
)
merged_outs
.
append
(
merged_feats
)
merged_outs
=
tuple
(
merged_outs
)
bbox_outputs
=
self
.
bbox_head
.
get_bboxes
(
*
merged_outs
,
img_metas
[
0
],
rescale
=
rescale
)
if
self
.
bbox_head
.
pred_bbox2d
:
from
mmdet.core
import
bbox2result
bbox2d_img
=
[
bbox2result
(
bboxes2d
,
labels
,
self
.
bbox_head
.
num_classes
)
for
bboxes
,
scores
,
labels
,
attrs
,
bboxes2d
in
bbox_outputs
]
bbox_outputs
=
[
bbox_outputs
[
0
][:
-
1
]]
bbox_img
=
[
bbox3d2result
(
bboxes
,
scores
,
labels
,
attrs
)
for
bboxes
,
scores
,
labels
,
attrs
in
bbox_outputs
]
bbox_list
=
dict
()
bbox_list
.
update
(
img_bbox
=
bbox_img
[
0
])
if
self
.
bbox_head
.
pred_bbox2d
:
bbox_list
.
update
(
img_bbox2d
=
bbox2d_img
[
0
])
return
[
bbox_list
]
def
show_results
(
self
,
data
,
result
,
out_dir
,
show
=
False
,
score_thr
=
None
):
"""Results visualization.
Args:
data (list[dict]): Input images and the information of the sample.
result (list[dict]): Prediction results.
out_dir (str): Output directory of visualization result.
show (bool, optional): Determines whether you are
going to show result by open3d.
Defaults to False.
TODO: implement score_thr of single_stage_mono3d.
score_thr (float, optional): Score threshold of bounding boxes.
Default to None.
Not implemented yet, but it is here for unification.
"""
for
batch_id
in
range
(
len
(
result
)):
if
isinstance
(
data
[
'img_metas'
][
0
],
DC
):
img_filename
=
data
[
'img_metas'
][
0
].
_data
[
0
][
batch_id
][
'filename'
]
cam2img
=
data
[
'img_metas'
][
0
].
_data
[
0
][
batch_id
][
'cam2img'
]
elif
mmcv
.
is_list_of
(
data
[
'img_metas'
][
0
],
dict
):
img_filename
=
data
[
'img_metas'
][
0
][
batch_id
][
'filename'
]
cam2img
=
data
[
'img_metas'
][
0
][
batch_id
][
'cam2img'
]
else
:
ValueError
(
f
"Unsupported data type
{
type
(
data
[
'img_metas'
][
0
])
}
"
f
'for visualization!'
)
img
=
mmcv
.
imread
(
img_filename
)
file_name
=
osp
.
split
(
img_filename
)[
-
1
].
split
(
'.'
)[
0
]
assert
out_dir
is
not
None
,
'Expect out_dir, got none.'
pred_bboxes
=
result
[
batch_id
][
'img_bbox'
][
'boxes_3d'
]
assert
isinstance
(
pred_bboxes
,
CameraInstance3DBoxes
),
\
f
'unsupported predicted bbox type
{
type
(
pred_bboxes
)
}
'
show_multi_modality_result
(
img
,
None
,
pred_bboxes
,
cam2img
,
out_dir
,
file_name
,
'camera'
,
show
=
show
)
pass
mmdet3d/models/detectors/smoke_mono3d.py
View file @
b496f579
# Copyright (c) OpenMMLab. All rights reserved.
from
mmdet3d.core
import
ConfigType
,
OptConfigType
,
OptMultiConfig
from
mmdet3d.registry
import
MODELS
from
.single_stage_mono3d
import
SingleStageMono3DDetector
...
...
@@ -8,14 +9,35 @@ class SMOKEMono3D(SingleStageMono3DDetector):
r
"""SMOKE <https://arxiv.org/abs/2002.10111>`_ for monocular 3D object
detection.
Args:
backbone (:obj:`ConfigDict` or dict): The backbone config.
neck (:obj:`ConfigDict` or dict): The neck config.
bbox_head (:obj:`ConfigDict` or dict): The bbox head config.
train_cfg (:obj:`ConfigDict` or dict, optional): The training config
of FCOS. Defaults to None.
test_cfg (:obj:`ConfigDict` or dict, optional): The testing config
of FCOS. Defaults to None.
data_preprocessor (:obj:`ConfigDict` or dict, optional): Config of
:class:`DetDataPreprocessor` to process the input data.
Defaults to None.
init_cfg (:obj:`ConfigDict` or list[:obj:`ConfigDict`] or dict or
list[dict], optional): Initialization config dict.
Defaults to None.
"""
def
__init__
(
self
,
backbone
,
neck
,
bbox_head
,
train_cfg
=
None
,
test_cfg
=
None
,
pretrained
=
None
):
super
(
SMOKEMono3D
,
self
).
__init__
(
backbone
,
neck
,
bbox_head
,
train_cfg
,
test_cfg
,
pretrained
)
backbone
:
ConfigType
,
neck
:
ConfigType
,
bbox_head
:
ConfigType
,
train_cfg
:
OptConfigType
=
None
,
test_cfg
:
OptConfigType
=
None
,
data_preprocessor
:
OptConfigType
=
None
,
init_cfg
:
OptMultiConfig
=
None
)
->
None
:
super
().
__init__
(
backbone
=
backbone
,
neck
=
neck
,
bbox_head
=
bbox_head
,
train_cfg
=
train_cfg
,
test_cfg
=
test_cfg
,
data_preprocessor
=
data_preprocessor
,
init_cfg
=
init_cfg
)
tests/data/kitti/kitti_infos_train.pkl
View file @
b496f579
No preview for this file type
tests/test_data/test_datasets/test_kitti_dataset.py
View file @
b496f579
...
...
@@ -85,6 +85,10 @@ def test_getitem():
assert
isinstance
(
ann_info
[
'gt_bboxes_3d'
],
LiDARInstance3DBoxes
)
assert
torch
.
allclose
(
ann_info
[
'gt_bboxes_3d'
].
tensor
.
sum
(),
torch
.
tensor
(
7.2650
))
assert
'centers_2d'
in
ann_info
assert
ann_info
[
'centers_2d'
].
dtype
==
np
.
float64
assert
'depths'
in
ann_info
assert
ann_info
[
'depths'
].
dtype
==
np
.
float64
assert
'group_id'
in
ann_info
assert
ann_info
[
'group_id'
].
dtype
==
np
.
int64
assert
'occluded'
in
ann_info
...
...
tests/test_data/test_datasets/test_nuscenes_dataset.py
View file @
b496f579
...
...
@@ -45,8 +45,8 @@ def test_getitem():
_generate_nus_dataset_config
()
nus_dataset
=
NuScenesDataset
(
data_root
,
ann_file
,
data_root
=
data_root
,
ann_file
=
ann_file
,
data_prefix
=
data_prefix
,
pipeline
=
pipeline
,
metainfo
=
dict
(
CLASSES
=
classes
),
...
...
tests/test_data/test_transforms/utils.py
View file @
b496f579
...
...
@@ -4,6 +4,7 @@ import numpy as np
from
mmdet3d.core
import
LiDARInstance3DBoxes
# create a dummy `results` to test the pipeline
from
mmdet3d.datasets
import
LoadAnnotations3D
,
LoadPointsFromFile
from
mmdet3d.datasets.pipelines.loading
import
LoadImageFromFileMono3D
def
create_dummy_data_info
(
with_ann
=
True
):
...
...
@@ -20,6 +21,10 @@ def create_dummy_data_info(with_ann=True):
-
1.5808
]])),
'gt_labels_3d'
:
np
.
array
([
1
]),
'centers_2d'
:
np
.
array
([[
765.04
,
214.56
]]),
'depths'
:
np
.
array
([
8.410
]),
'num_lidar_pts'
:
np
.
array
([
377
]),
'difficulty'
:
...
...
@@ -134,6 +139,9 @@ def create_dummy_data_info(with_ann=True):
],
'bbox_label_3d'
:
-
1
,
'center_2d'
:
[
765.04
,
214.56
],
'depth'
:
8.410
,
'num_lidar_pts'
:
377
,
'difficulty'
:
...
...
@@ -168,3 +176,17 @@ def create_data_info_after_loading():
data_info
=
load_points_transform
(
data_info
)
data_info_after_loading
=
load_anns_transform
(
data_info
)
return
data_info_after_loading
def
create_mono3d_data_info_after_loading
():
load_anns_transform
=
LoadAnnotations3D
(
with_bbox
=
True
,
with_label
=
True
,
with_bbox_3d
=
True
,
with_label_3d
=
True
,
with_bbox_depth
=
True
)
load_img_transform
=
LoadImageFromFileMono3D
()
data_info
=
create_dummy_data_info
()
data_info
=
load_img_transform
(
data_info
)
data_info_after_loading
=
load_anns_transform
(
data_info
)
return
data_info_after_loading
tests/test_models/test_dense_heads/test_fcos_mono3d_head.py
View file @
b496f579
...
...
@@ -117,6 +117,7 @@ class TestFCOSMono3DHead(TestCase):
# When truth is non-empty then all losses
# should be nonzero for random inputs
gt_instances_3d
=
InstanceData
()
gt_instances
=
InstanceData
()
gt_bboxes
=
torch
.
rand
([
3
,
4
],
dtype
=
torch
.
float32
)
gt_bboxes_3d
=
CameraInstance3DBoxes
(
torch
.
rand
([
3
,
9
]),
box_dim
=
9
)
...
...
@@ -129,14 +130,14 @@ class TestFCOSMono3DHead(TestCase):
gt_instances_3d
.
bboxes_3d
=
gt_bboxes_3d
gt_instances_3d
.
labels_3d
=
gt_labels_3d
gt_instances
_3d
.
bboxes
=
gt_bboxes
gt_instances
_3d
.
labels
=
gt_labels
gt_instances
.
bboxes
=
gt_bboxes
gt_instances
.
labels
=
gt_labels
gt_instances_3d
.
centers_2d
=
centers_2d
gt_instances_3d
.
depths
=
depths
gt_instances_3d
.
attr_labels
=
attr_labels
gt_losses
=
fcos_mono3d_head
.
loss
(
*
ret_dict
,
[
gt_instances_3d
],
img_metas
)
gt_losses
=
fcos_mono3d_head
.
loss
_by_feat
(
*
ret_dict
,
[
gt_instances_3d
],
[
gt_instances
],
img_metas
)
gt_cls_loss
=
gt_losses
[
'loss_cls'
].
item
()
gt_siz_loss
=
gt_losses
[
'loss_size'
].
item
()
...
...
@@ -160,7 +161,7 @@ class TestFCOSMono3DHead(TestCase):
self
.
assertGreater
(
gt_atr_loss
,
0
,
'attribue loss should be positive'
)
# test get_results
results_list
=
fcos_mono3d_head
.
get_results
(
*
ret_dict
,
img_metas
)
results_list
=
fcos_mono3d_head
.
predict_by_feat
(
*
ret_dict
,
img_metas
)
self
.
assertEqual
(
len
(
results_list
),
1
,
'there should be no centerness loss when there are no true boxes'
)
...
...
tests/test_models/test_dense_heads/test_pgd_head.py
View file @
b496f579
...
...
@@ -142,6 +142,7 @@ class TestFGDHead(TestCase):
# When truth is non-empty then all losses
# should be nonzero for random inputs
gt_instances_3d
=
InstanceData
()
gt_instances
=
InstanceData
()
gt_bboxes
=
torch
.
rand
([
3
,
4
],
dtype
=
torch
.
float32
)
gt_bboxes_3d
=
CameraInstance3DBoxes
(
torch
.
rand
([
3
,
7
]),
box_dim
=
7
)
...
...
@@ -152,12 +153,13 @@ class TestFGDHead(TestCase):
gt_instances_3d
.
bboxes_3d
=
gt_bboxes_3d
gt_instances_3d
.
labels_3d
=
gt_labels_3d
gt_instances
_3d
.
bboxes
=
gt_bboxes
gt_instances
_3d
.
labels
=
gt_labels
gt_instances
.
bboxes
=
gt_bboxes
gt_instances
.
labels
=
gt_labels
gt_instances_3d
.
centers_2d
=
centers_2d
gt_instances_3d
.
depths
=
depths
gt_losses
=
pgd_head
.
loss
(
*
ret_dict
,
[
gt_instances_3d
],
img_metas
)
gt_losses
=
pgd_head
.
loss_by_feat
(
*
ret_dict
,
[
gt_instances_3d
],
[
gt_instances
],
img_metas
)
gt_cls_loss
=
gt_losses
[
'loss_cls'
].
item
()
gt_siz_loss
=
gt_losses
[
'loss_size'
].
item
()
...
...
@@ -184,15 +186,15 @@ class TestFGDHead(TestCase):
'consistency loss should be positive'
)
# test get_results
results_list
=
pgd_head
.
get_results
(
*
ret_dict
,
img_metas
)
results_list
=
pgd_head
.
predict_by_feat
(
*
ret_dict
,
img_metas
)
self
.
assertEqual
(
len
(
results_list
),
1
,
'there should be no centerness loss when there are no true boxes'
)
results
=
results_list
[
0
]
results
,
results_2d
=
results_list
[
0
]
pred_bboxes_3d
=
results
.
bboxes_3d
pred_scores_3d
=
results
.
scores_3d
pred_labels_3d
=
results
.
labels_3d
pred_bboxes_2d
=
results
.
bboxes
pred_bboxes_2d
=
results
_2d
.
bboxes
self
.
assertEqual
(
pred_bboxes_3d
.
tensor
.
shape
,
torch
.
Size
([
20
,
7
]),
'the shape of predicted 3d bboxes should be [20, 7]'
)
self
.
assertEqual
(
...
...
@@ -202,6 +204,6 @@ class TestFGDHead(TestCase):
pred_labels_3d
.
shape
,
torch
.
Size
([
20
]),
'the shape of predicted 3d bbox labels should be [20]'
)
self
.
assertEqual
(
pred_bboxes_2d
.
shape
,
torch
.
Size
([
20
,
5
]),
'the shape of predicted 2d bbox attribute labels should be [20,
5
]'
pred_bboxes_2d
.
shape
,
torch
.
Size
([
20
,
4
]),
'the shape of predicted 2d bbox attribute labels should be [20,
4
]'
)
tests/test_models/test_dense_heads/test_smoke_mono3d_head.py
View file @
b496f579
...
...
@@ -82,6 +82,7 @@ class TestSMOKEMono3DHead(TestCase):
# When truth is non-empty then all losses
# should be nonzero for random inputs
gt_instances_3d
=
InstanceData
()
gt_instances
=
InstanceData
()
gt_bboxes
=
torch
.
Tensor
([[
1.0
,
2.0
,
20.0
,
40.0
],
[
45.0
,
50.0
,
80.0
,
70.1
],
...
...
@@ -94,13 +95,14 @@ class TestSMOKEMono3DHead(TestCase):
gt_instances_3d
.
bboxes_3d
=
gt_bboxes_3d
gt_instances_3d
.
labels_3d
=
gt_labels_3d
gt_instances
_3d
.
bboxes
=
gt_bboxes
gt_instances
_3d
.
labels
=
gt_labels
gt_instances
.
bboxes
=
gt_bboxes
gt_instances
.
labels
=
gt_labels
gt_instances_3d
.
centers_2d
=
centers_2d
gt_instances_3d
.
depths
=
depths
gt_losses
=
smoke_mono3d_head
.
loss
(
*
ret_dict
,
[
gt_instances_3d
],
img_metas
)
gt_losses
=
smoke_mono3d_head
.
loss_by_feat
(
*
ret_dict
,
[
gt_instances_3d
],
[
gt_instances
],
img_metas
)
gt_cls_loss
=
gt_losses
[
'loss_cls'
].
item
()
gt_box_loss
=
gt_losses
[
'loss_bbox'
].
item
()
...
...
@@ -109,7 +111,7 @@ class TestSMOKEMono3DHead(TestCase):
self
.
assertGreater
(
gt_box_loss
,
0
,
'bbox loss should be positive'
)
# test get_results
results_list
=
smoke_mono3d_head
.
get_results
(
*
ret_dict
,
img_metas
)
results_list
=
smoke_mono3d_head
.
predict_by_feat
(
*
ret_dict
,
img_metas
)
self
.
assertEqual
(
len
(
results_list
),
1
,
'there should be one image results'
)
results
=
results_list
[
0
]
...
...
tools/data_converter/update_infos_to_v2.py
View file @
b496f579
...
...
@@ -14,7 +14,10 @@ from os import path as osp
import
mmcv
import
numpy
as
np
from
nuscenes.nuscenes
import
NuScenes
from
mmdet3d.core.bbox
import
points_cam2img
from
mmdet3d.datasets.convert_utils
import
get_2d_boxes
from
mmdet3d.datasets.utils
import
convert_quaternion_to_matrix
...
...
@@ -60,6 +63,19 @@ def get_empty_instance():
return
instance
def
get_empty_multicamera_instances
():
cam_instance
=
dict
(
CAM_FONT
=
None
,
CAM_FRONT_RIGHT
=
None
,
CAM_FRONT_LEFT
=
None
,
CAM_BACK
=
None
,
CAM_BACK_RIGHT
=
None
,
CAM_BACK_LEFT
=
None
)
return
cam_instance
def
get_empty_lidar_points
():
lidar_points
=
dict
(
# (int, optional) : Number of features for each point.
...
...
@@ -206,6 +222,32 @@ def clear_data_info_unused_keys(data_info):
return
data_info
,
empty_flag
def
generate_camera_instances
(
info
,
nusc
):
# get bbox annotations for camera
camera_types
=
[
'CAM_FRONT'
,
'CAM_FRONT_RIGHT'
,
'CAM_FRONT_LEFT'
,
'CAM_BACK'
,
'CAM_BACK_LEFT'
,
'CAM_BACK_RIGHT'
,
]
empty_multicamera_instance
=
get_empty_multicamera_instances
()
for
cam
in
camera_types
:
cam_info
=
info
[
'cams'
][
cam
]
# list[dict]
ann_infos
=
get_2d_boxes
(
nusc
,
cam_info
[
'sample_data_token'
],
visibilities
=
[
''
,
'1'
,
'2'
,
'3'
,
'4'
])
empty_multicamera_instance
[
cam
]
=
ann_infos
return
empty_multicamera_instance
def
update_nuscenes_infos
(
pkl_path
,
out_dir
):
print
(
f
'
{
pkl_path
}
will be modified.'
)
if
out_dir
in
pkl_path
:
...
...
@@ -222,6 +264,11 @@ def update_nuscenes_infos(pkl_path, out_dir):
'version'
:
data_list
[
'metadata'
][
'version'
]
}
nusc
=
NuScenes
(
version
=
data_list
[
'metadata'
][
'version'
],
dataroot
=
'./data/nuscenes'
,
verbose
=
True
)
print
(
'Start updating:'
)
converted_list
=
[]
for
i
,
ori_info_dict
in
enumerate
(
...
...
@@ -304,6 +351,8 @@ def update_nuscenes_infos(pkl_path, out_dir):
empty_instance
[
'bbox_3d_isvalid'
]
=
ori_info_dict
[
'valid_flag'
][
i
]
empty_instance
=
clear_instance_unused_keys
(
empty_instance
)
temp_data_info
[
'instances'
].
append
(
empty_instance
)
temp_data_info
[
'cam_instances'
]
=
generate_camera_instances
(
ori_info_dict
,
nusc
)
temp_data_info
,
_
=
clear_data_info_unused_keys
(
temp_data_info
)
converted_list
.
append
(
temp_data_info
)
pkl_name
=
pkl_path
.
split
(
'/'
)[
-
1
]
...
...
@@ -313,7 +362,6 @@ def update_nuscenes_infos(pkl_path, out_dir):
converted_data_info
=
dict
(
metainfo
=
METAINFO
,
data_list
=
converted_list
)
mmcv
.
dump
(
converted_data_info
,
out_path
,
'pkl'
)
return
temp_lidar_sweep
def
update_kitti_infos
(
pkl_path
,
out_dir
):
...
...
@@ -382,6 +430,7 @@ def update_kitti_infos(pkl_path, out_dir):
anns
=
ori_info_dict
[
'annos'
]
num_instances
=
len
(
anns
[
'name'
])
cam2img
=
ori_info_dict
[
'calib'
][
'P2'
]
ignore_class_name
=
set
()
instance_list
=
[]
...
...
@@ -401,6 +450,17 @@ def update_kitti_infos(pkl_path, out_dir):
loc
=
anns
[
'location'
][
instance_id
]
dims
=
anns
[
'dimensions'
][
instance_id
]
rots
=
anns
[
'rotation_y'
][:,
None
][
instance_id
]
dst
=
np
.
array
([
0.5
,
0.5
,
0.5
])
src
=
np
.
array
([
0.5
,
1.0
,
0.5
])
center_3d
=
loc
+
dims
*
(
dst
-
src
)
center_2d
=
points_cam2img
(
center_3d
.
reshape
([
1
,
3
]),
cam2img
,
with_depth
=
True
)
center_2d
=
center_2d
.
squeeze
().
tolist
()
empty_instance
[
'center_2d'
]
=
center_2d
[:
2
]
empty_instance
[
'depth'
]
=
center_2d
[
2
]
gt_bboxes_3d
=
np
.
concatenate
([
loc
,
dims
,
rots
]).
tolist
()
empty_instance
[
'bbox_3d'
]
=
gt_bboxes_3d
empty_instance
[
'bbox_label_3d'
]
=
copy
.
deepcopy
(
...
...
@@ -734,7 +794,6 @@ def parse_args():
type
=
str
,
default
=
'./data/kitti/kitti_infos_train.pkl '
,
help
=
'specify the root dir of dataset'
)
parser
.
add_argument
(
'--out-dir'
,
type
=
str
,
...
...
Prev
1
2
Next
Write
Preview
Markdown
is supported
0%
Try again
or
attach a new file
.
Attach a file
Cancel
You are about to add
0
people
to the discussion. Proceed with caution.
Finish editing this message first!
Cancel
Please
register
or
sign in
to comment