Commit b496f579 authored by ZCMax's avatar ZCMax Committed by ChaimZhu
Browse files

[Refactor] Refactor Mono3D models

parent 35667791
# Copyright (c) OpenMMLab. All rights reserved.
import warnings
from abc import ABCMeta, abstractmethod
from typing import List, Optional
from typing import Optional, Tuple
from mmcv.runner import BaseModule
from mmengine.config import ConfigDict
from torch import Tensor
from mmdet3d.core import Det3DDataSample
from mmdet3d.core.utils import InstanceList, OptMultiConfig, SampleList
class BaseMono3DDenseHead(BaseModule, metaclass=ABCMeta):
"""Base class for Monocular 3D DenseHeads."""
"""Base class for Monocular 3D DenseHeads.
def __init__(self, init_cfg: Optional[dict] = None) -> None:
super(BaseMono3DDenseHead, self).__init__(init_cfg=init_cfg)
1. The ``loss`` method is used to calculate the loss of densehead,
which includes two steps: (1) the densehead model performs forward
propagation to obtain the feature maps (2) The ``loss_by_feat`` method
is called based on the feature maps to calculate the loss.
@abstractmethod
def loss(self, **kwargs):
"""Compute losses of the head."""
pass
.. code:: text
def get_bboxes(self, *args, **kwargs):
warnings.warn('`get_bboxes` is deprecated and will be removed in '
'the future. Please use `get_results` instead.')
return self.get_results(*args, **kwargs)
loss(): forward() -> loss_by_feat()
@abstractmethod
def get_results(self, *args, **kwargs):
"""Transform network outputs of a batch into 3D bbox results."""
pass
2. The ``predict`` method is used to predict detection results,
which includes two steps: (1) the densehead model performs forward
propagation to obtain the feature maps (2) The ``predict_by_feat`` method
is called based on the feature maps to predict detection results including
post-processing.
.. code:: text
predict(): forward() -> predict_by_feat()
def forward_train(self,
x: List[Tensor],
batch_data_samples: List[Det3DDataSample],
proposal_cfg: Optional[ConfigDict] = None,
**kwargs):
3. The ``loss_and_predict`` method is used to return loss and detection
results at the same time. It will call densehead's ``forward``,
``loss_by_feat`` and ``predict_by_feat`` methods in order. If one-stage is
used as RPN, the densehead needs to return both losses and predictions.
This predictions is used as the proposal of roihead.
.. code:: text
loss_and_predict(): forward() -> loss_by_feat() -> predict_by_feat()
"""
def __init__(self, init_cfg: OptMultiConfig = None) -> None:
super(BaseMono3DDenseHead, self).__init__(init_cfg=init_cfg)
def loss(self, x: Tuple[Tensor], batch_data_samples: SampleList,
**kwargs) -> dict:
"""
Args:
x (list[Tensor]): Features from FPN.
batch_data_samples (list[:obj:`Det3DDataSample`]): Each item
contains the meta information of each image and corresponding
annotations.
proposal_cfg (mmengine.Config, optional): Test / postprocessing
configuration, if None, test_cfg would be used.
Defaults to None.
Returns:
tuple or Tensor: When `proposal_cfg` is None, the detector is a \
......@@ -73,26 +81,105 @@ class BaseMono3DDenseHead(BaseModule, metaclass=ABCMeta):
outs = self(x)
batch_gt_instances_3d = []
batch_gt_instances = []
batch_gt_instances_ignore = []
batch_img_metas = []
for data_sample in batch_data_samples:
batch_img_metas.append(data_sample.metainfo)
batch_gt_instances_3d.append(data_sample.gt_instances_3d)
batch_gt_instances.append(data_sample.gt_instances)
batch_gt_instances_ignore.append(
data_sample.get('ignored_instances', None))
loss_inputs = outs + (batch_gt_instances_3d, batch_gt_instances,
batch_img_metas, batch_gt_instances_ignore)
losses = self.loss_by_feat(*loss_inputs)
return losses
@abstractmethod
def loss_by_feat(self, **kwargs) -> dict:
"""Calculate the loss based on the features extracted by the detection
head."""
pass
def loss_and_predict(self,
x: Tuple[Tensor],
batch_data_samples: SampleList,
proposal_cfg: Optional[ConfigDict] = None,
**kwargs) -> Tuple[dict, InstanceList]:
"""Perform forward propagation of the head, then calculate loss and
predictions from the features and data samples.
Args:
x (tuple[Tensor]): Features from FPN.
batch_data_samples (list[:obj:`Det3DDataSample`]): Each item
contains the meta information of each image and
corresponding annotations.
proposal_cfg (ConfigDict, optional): Test / postprocessing
configuration, if None, test_cfg would be used.
Defaults to None.
Returns:
tuple: the return value is a tuple contains:
- losses: (dict[str, Tensor]): A dictionary of loss components.
- predictions (list[:obj:`InstanceData`]): Detection
results of each image after the post process.
"""
batch_gt_instances_3d = []
batch_gt_instances = []
batch_gt_instances_ignore = []
batch_img_metas = []
for data_sample in batch_data_samples:
batch_img_metas.append(data_sample.metainfo)
batch_gt_instances_3d.append(data_sample.gt_instances_3d)
if 'ignored_instances' in data_sample:
batch_gt_instances_ignore.append(data_sample.ignored_instances)
else:
batch_gt_instances_ignore.append(None)
loss_inputs = outs + (batch_gt_instances_3d, batch_img_metas,
batch_gt_instances_ignore)
losses = self.loss(*loss_inputs)
if proposal_cfg is None:
return losses
else:
batch_img_metas = [
data_sample.metainfo for data_sample in batch_data_samples
]
results_list = self.get_results(
*outs, batch_img_metas=batch_img_metas, cfg=proposal_cfg)
return losses, results_list
batch_gt_instances.append(data_sample.gt_instances)
batch_gt_instances_ignore.append(
data_sample.get('ignored_instances', None))
outs = self(x)
loss_inputs = outs + (batch_gt_instances_3d, batch_gt_instances,
batch_img_metas, batch_gt_instances_ignore)
losses = self.loss_by_feat(*loss_inputs)
predictions = self.predict_by_feat(
*outs, batch_img_metas=batch_img_metas, cfg=proposal_cfg)
return losses, predictions
def predict(self,
x: Tuple[Tensor],
batch_data_samples: SampleList,
rescale: bool = False) -> InstanceList:
"""Perform forward propagation of the detection head and predict
detection results on the features of the upstream network.
Args:
x (tuple[Tensor]): Multi-level features from the
upstream network, each is a 4D-tensor.
batch_data_samples (List[:obj:`Det3DDataSample`]): The Data
Samples. It usually includes information such as
`gt_instance_3d`, `gt_pts_panoptic_seg` and `gt_pts_sem_seg`.
rescale (bool, optional): Whether to rescale the results.
Defaults to False.
Returns:
list[obj:`InstanceData`]: Detection results of each image
after the post process.
"""
batch_img_metas = [
data_samples.metainfo for data_samples in batch_data_samples
]
outs = self(x)
predictions = self.predict_by_feat(
*outs, batch_img_metas=batch_img_metas, rescale=rescale)
return predictions
@abstractmethod
def predict_by_feat(self, **kwargs) -> InstanceList:
"""Transform a batch of output features extracted from the head into
bbox results."""
pass
......@@ -3,7 +3,6 @@ from typing import List, Optional, Tuple, Union
import torch
from mmcv.cnn import xavier_init
from mmcv.runner import force_fp32
from mmengine.config import ConfigDict
from mmengine.data import InstanceData
from torch import Tensor
......@@ -197,39 +196,8 @@ class MonoFlexHead(AnchorFreeMono3DHead):
if self.use_edge_fusion:
self._init_edge_module()
def forward_train(self,
x: List[Tensor],
batch_data_samples: List[Det3DDataSample],
proposal_cfg: Optional[ConfigDict] = None,
**kwargs):
"""
Args:
x (list[Tensor]): Features from FPN.
batch_img_metas (list[dict]): Meta information of each image, e.g.,
image size, scaling factor, etc.
gt_bboxes (list[Tensor]): Ground truth bboxes of the image,
shape (num_gts, 4).
gt_labels (list[Tensor]): Ground truth labels of each box,
shape (num_gts,).
gt_bboxes_3d (list[Tensor]): 3D ground truth bboxes of the image,
shape (num_gts, self.bbox_code_size).
gt_labels_3d (list[Tensor]): 3D ground truth labels of each box,
shape (num_gts,).
centers_2d (list[Tensor]): Projected 3D center of each box,
shape (num_gts, 2).
depths (list[Tensor]): Depth of projected 3D center of each box,
shape (num_gts,).
attr_labels (list[Tensor]): Attribute labels of each box,
shape (num_gts,).
gt_bboxes_ignore (list[Tensor]): Ground truth bboxes to be
ignored, shape (num_ignored_gts, 4).
proposal_cfg (mmcv.Config): Test / postprocessing configuration,
if None, test_cfg would be used
Returns:
tuple:
losses: (dict[str, Tensor]): A dictionary of loss components.
proposal_list (list[Tensor]): Proposals of each image.
"""
def loss(self, x: List[Tensor], batch_data_samples: List[Det3DDataSample],
**kwargs):
"""
Args:
x (list[Tensor]): Features from FPN.
......@@ -266,15 +234,15 @@ class MonoFlexHead(AnchorFreeMono3DHead):
"""
batch_gt_instances_3d = []
batch_gt_instances = []
batch_gt_instances_ignore = []
batch_img_metas = []
for data_sample in batch_data_samples:
batch_img_metas.append(data_sample.metainfo)
batch_gt_instances_3d.append(data_sample.gt_instances_3d)
if 'ignored_instances' in data_sample:
batch_gt_instances_ignore.append(data_sample.ignored_instances)
else:
batch_gt_instances_ignore.append(None)
batch_gt_instances.append(data_sample.gt_instances)
batch_gt_instances_ignore.append(
data_sample.get('ignored_instances', None))
# monoflex head needs img_metas for feature extraction
outs = self(x, batch_img_metas)
......@@ -282,15 +250,7 @@ class MonoFlexHead(AnchorFreeMono3DHead):
batch_gt_instances_ignore)
losses = self.loss(*loss_inputs)
if proposal_cfg is None:
return losses
else:
batch_img_metas = [
data_sample.metainfo for data_sample in batch_data_samples
]
results_list = self.get_results(
*outs, batch_img_metas=batch_img_metas, cfg=proposal_cfg)
return losses, results_list
return losses
def forward(self, feats: List[Tensor], batch_img_metas: List[dict]):
"""Forward features from the upstream network.
......@@ -373,9 +333,8 @@ class MonoFlexHead(AnchorFreeMono3DHead):
return cls_score, bbox_pred
@force_fp32(apply_to=('cls_scores', 'bbox_preds'))
def get_results(self, cls_scores: List[Tensor], bbox_preds: List[Tensor],
batch_img_metas: List[dict]):
def predict_by_feat(self, cls_scores: List[Tensor],
bbox_preds: List[Tensor], batch_img_metas: List[dict]):
"""Generate bboxes from bbox head predictions.
Args:
......@@ -393,7 +352,7 @@ class MonoFlexHead(AnchorFreeMono3DHead):
cls_scores[0].new_tensor(input_meta['cam2img'])
for input_meta in batch_img_metas
])
batch_bboxes, batch_scores, batch_topk_labels = self.decode_heatmap(
batch_bboxes, batch_scores, batch_topk_labels = self._decode_heatmap(
cls_scores[0],
bbox_preds[0],
batch_img_metas,
......@@ -429,13 +388,13 @@ class MonoFlexHead(AnchorFreeMono3DHead):
return result_list
def decode_heatmap(self,
cls_score: Tensor,
reg_pred: Tensor,
batch_img_metas: List[dict],
cam2imgs: Tensor,
topk: int = 100,
kernel: int = 3):
def _decode_heatmap(self,
cls_score: Tensor,
reg_pred: Tensor,
batch_img_metas: List[dict],
cam2imgs: Tensor,
topk: int = 100,
kernel: int = 3):
"""Transform outputs into detections raw bbox predictions.
Args:
......@@ -530,14 +489,16 @@ class MonoFlexHead(AnchorFreeMono3DHead):
return preds
def get_targets(self, batch_gt_instances_3d: List[InstanceData],
batch_gt_instances: List[InstanceData],
feat_shape: Tuple[int], batch_img_metas: List[dict]):
"""Get training targets for batch images.
``
Args:
batch_gt_instances_3d (list[:obj:`InstanceData`]): Batch of
gt_instance_3d. It usually includes ``bboxes``、``labels``
、``bboxes_3d``、``labels_3d``、``depths``、``centers_2d`` and
attributes.
gt_instance_3d. It usually includes ``bboxes_3d``、
``labels_3d``、``depths``、``centers_2d`` and attributes.
batch_gt_instances (list[:obj:`InstanceData`]): Batch of
gt_instance. It usually includes ``bboxes``、``labels``.
feat_shape (tuple[int]): Feature map shape with value,
shape (B, _, H, W).
batch_img_metas (list[dict]): Meta information of each image, e.g.,
......@@ -574,10 +535,10 @@ class MonoFlexHead(AnchorFreeMono3DHead):
"""
gt_bboxes_list = [
gt_instances_3d.bboxes for gt_instances_3d in batch_gt_instances_3d
gt_instances.bboxes for gt_instances in batch_gt_instances
]
gt_labels_list = [
gt_instances_3d.labels for gt_instances_3d in batch_gt_instances_3d
gt_instances.labels for gt_instances in batch_gt_instances
]
gt_bboxes_3d_list = [
gt_instances_3d.bboxes_3d
......@@ -721,12 +682,14 @@ class MonoFlexHead(AnchorFreeMono3DHead):
return center_heatmap_target, avg_factor, target_labels
def loss(self,
cls_scores: List[Tensor],
bbox_preds: List[Tensor],
batch_gt_instances_3d: List[InstanceData],
batch_img_metas: List[dict],
batch_gt_instances_ignore: Optional[List[InstanceData]] = None):
def loss_by_feat(
self,
cls_scores: List[Tensor],
bbox_preds: List[Tensor],
batch_gt_instances_3d: List[InstanceData],
batch_gt_instances: List[InstanceData],
batch_img_metas: List[dict],
batch_gt_instances_ignore: Optional[List[InstanceData]] = None):
"""Compute loss of the head.
Args:
......@@ -736,9 +699,10 @@ class MonoFlexHead(AnchorFreeMono3DHead):
number is bbox_code_size.
shape (B, 7, H, W).
batch_gt_instances_3d (list[:obj:`InstanceData`]): Batch of
gt_instance_3d. It usually includes ``bboxes``、``labels``
、``bboxes_3d``、``labels_3d``、``depths``、``centers_2d`` and
attributes.
gt_instance_3d. It usually includes ``bboxes_3d``、
``labels_3d``、``depths``、``centers_2d`` and attributes.
batch_gt_instances (list[:obj:`InstanceData`]): Batch of
gt_instance. It usually includes ``bboxes``、``labels``.
batch_img_metas (list[dict]): Meta information of each image, e.g.,
image size, scaling factor, etc.
batch_gt_instances_ignore (list[:obj:`InstanceData`], Optional):
......@@ -756,6 +720,7 @@ class MonoFlexHead(AnchorFreeMono3DHead):
center2d_heatmap_target, avg_factor, target_labels = \
self.get_targets(batch_gt_instances_3d,
batch_gt_instances,
center2d_heatmap.shape,
batch_img_metas)
......
# Copyright (c) OpenMMLab. All rights reserved.
from typing import Tuple
from typing import List, Optional, Tuple
import numpy as np
import torch
from mmcv.cnn import Scale, bias_init_with_prob, normal_init
from mmcv.runner import force_fp32
from mmengine.data import InstanceData
from torch import Tensor
from torch import nn as nn
from torch.nn import functional as F
from mmdet3d.core import box3d_multiclass_nms, xywhr2xyxyr
from mmdet3d.core.bbox import points_cam2img, points_img2cam
from mmdet3d.core.utils import (ConfigType, InstanceList, OptConfigType,
OptInstanceList)
from mmdet3d.registry import MODELS
from mmdet.core import distance2bbox, multi_apply
from .fcos_mono3d_head import FCOSMono3DHead
......@@ -86,7 +88,7 @@ class PGDHead(FCOSMono3DHead):
base_dims=((0.8, 1.73, 0.6), (1.76, 1.73, 0.6),
(3.9, 1.56, 1.6)),
code_size=7),
**kwargs):
**kwargs) -> None:
self.use_depth_classifier = use_depth_classifier
self.use_onlyreg_proj = use_onlyreg_proj
self.depth_branch = depth_branch
......@@ -190,11 +192,11 @@ class PGDHead(FCOSMono3DHead):
for conv_weight in self.conv_weights:
normal_init(conv_weight, std=0.01)
def forward(self, feats):
def forward(self, x: Tuple[Tensor]) -> Tuple[Tensor, ...]:
"""Forward features from the upstream network.
Args:
feats (tuple[Tensor]): Features from the upstream network, each is
x (tuple[Tensor]): Features from the upstream network, each is
a 4D-tensor.
Returns:
......@@ -220,10 +222,10 @@ class PGDHead(FCOSMono3DHead):
centernesses (list[Tensor]): Centerness for each scale level,
each is a 4D-tensor, the channel number is num_points * 1.
"""
return multi_apply(self.forward_single, feats, self.scales,
self.strides)
return multi_apply(self.forward_single, x, self.scales, self.strides)
def forward_single(self, x, scale, stride):
def forward_single(self, x: Tensor, scale: Scale,
stride: int) -> Tuple[Tensor, ...]:
"""Forward features of a single scale level.
Args:
......@@ -271,17 +273,17 @@ class PGDHead(FCOSMono3DHead):
attr_pred, centerness
def get_proj_bbox2d(self,
bbox_preds,
pos_dir_cls_preds,
labels_3d,
bbox_targets_3d,
pos_points,
pos_inds,
batch_img_metas,
pos_depth_cls_preds=None,
pos_weights=None,
pos_cls_scores=None,
with_kpts=False):
bbox_preds: List[Tensor],
pos_dir_cls_preds: List[Tensor],
labels_3d: List[Tensor],
bbox_targets_3d: List[Tensor],
pos_points: Tensor,
pos_inds: Tensor,
batch_img_metas: List[dict],
pos_depth_cls_preds: Optional[Tensor] = None,
pos_weights: Optional[Tensor] = None,
pos_cls_scores: Optional[Tensor] = None,
with_kpts: bool = False) -> Tuple[Tensor]:
"""Decode box predictions and get projected 2D attributes.
Args:
......@@ -448,9 +450,12 @@ class PGDHead(FCOSMono3DHead):
return outputs
def get_pos_predictions(self, bbox_preds, dir_cls_preds, depth_cls_preds,
weights, attr_preds, centernesses, pos_inds,
batch_img_metas):
def get_pos_predictions(self, bbox_preds: List[Tensor],
dir_cls_preds: List[Tensor],
depth_cls_preds: List[Tensor],
weights: List[Tensor], attr_preds: List[Tensor],
centernesses: List[Tensor], pos_inds: Tensor,
batch_img_metas: List[dict]) -> Tuple[Tensor]:
"""Flatten predictions and get positive ones.
Args:
......@@ -528,20 +533,19 @@ class PGDHead(FCOSMono3DHead):
return pos_bbox_preds, pos_dir_cls_preds, pos_depth_cls_preds, \
pos_weights, pos_attr_preds, pos_centerness
@force_fp32(
apply_to=('cls_scores', 'bbox_preds', 'dir_cls_preds',
'depth_cls_preds', 'weights', 'attr_preds', 'centernesses'))
def loss(self,
cls_scores,
bbox_preds,
dir_cls_preds,
depth_cls_preds,
weights,
attr_preds,
centernesses,
batch_gt_instances_3d,
batch_img_metas,
batch_gt_instances_ignore=None):
def loss_by_feat(
self,
cls_scores: List[Tensor],
bbox_preds: List[Tensor],
dir_cls_preds: List[Tensor],
depth_cls_preds: List[Tensor],
weights: List[Tensor],
attr_preds: List[Tensor],
centernesses: List[Tensor],
batch_gt_instances_3d: InstanceList,
batch_gt_instances: InstanceList,
batch_img_metas: List[dict],
batch_gt_instances_ignore: OptInstanceList = None) -> dict:
"""Compute loss of the head.
Args:
......@@ -591,7 +595,7 @@ class PGDHead(FCOSMono3DHead):
bbox_preds[0].device)
labels_3d, bbox_targets_3d, centerness_targets, attr_targets = \
self.get_targets(
all_level_points, batch_gt_instances_3d)
all_level_points, batch_gt_instances_3d, batch_gt_instances)
num_imgs = cls_scores[0].size(0)
# flatten cls_scores and targets
......@@ -785,20 +789,17 @@ class PGDHead(FCOSMono3DHead):
return loss_dict
@force_fp32(
apply_to=('cls_scores', 'bbox_preds', 'dir_cls_preds',
'depth_cls_preds', 'weights', 'attr_preds', 'centernesses'))
def get_results(self,
cls_scores,
bbox_preds,
dir_cls_preds,
depth_cls_preds,
weights,
attr_preds,
centernesses,
batch_img_metas,
cfg=None,
rescale=None):
def predict_by_feat(self,
cls_scores: List[Tensor],
bbox_preds: List[Tensor],
dir_cls_preds: List[Tensor],
depth_cls_preds: List[Tensor],
weights: List[Tensor],
attr_preds: List[Tensor],
centernesses: List[Tensor],
batch_img_metas: Optional[List[dict]] = None,
cfg: OptConfigType = None,
rescale: bool = False) -> InstanceList:
"""Transform network output for a batch into bbox predictions.
Args:
......@@ -824,7 +825,7 @@ class PGDHead(FCOSMono3DHead):
cfg (mmcv.Config, optional): Test / postprocessing configuration,
if None, test_cfg would be used. Defaults to None.
rescale (bool, optional): If True, return boxes in original image
space. Defaults to None.
space. Defaults to False.
Returns:
list[tuple[Tensor]]: Each item in result_list is a tuple, which
......@@ -898,25 +899,33 @@ class PGDHead(FCOSMono3DHead):
centernesses[i][img_id].detach() for i in range(num_levels)
]
img_meta = batch_img_metas[img_id]
results = self._get_results_single(
cls_score_list, bbox_pred_list, dir_cls_pred_list,
depth_cls_pred_list, weight_list, attr_pred_list,
centerness_pred_list, mlvl_points, img_meta, cfg, rescale)
results = self._predict_by_feat_single(
cls_score_list=cls_score_list,
bbox_pred_list=bbox_pred_list,
dir_cls_pred_list=dir_cls_pred_list,
depth_cls_pred_list=depth_cls_pred_list,
weight_list=weight_list,
attr_pred_list=attr_pred_list,
centerness_pred_list=centerness_pred_list,
mlvl_points=mlvl_points,
img_meta=img_meta,
cfg=cfg,
rescale=rescale)
result_list.append(results)
return result_list
def _get_results_single(self,
cls_scores,
bbox_preds,
dir_cls_preds,
depth_cls_preds,
weights,
attr_preds,
centernesses,
mlvl_points,
img_meta,
cfg,
rescale=False):
def _predict_by_feat_single(self,
cls_score_list: List[Tensor],
bbox_pred_list: List[Tensor],
dir_cls_pred_list: List[Tensor],
depth_cls_pred_list: List[Tensor],
weight_list: List[Tensor],
attr_pred_list: List[Tensor],
centerness_pred_list: List[Tensor],
mlvl_points: Tensor,
img_meta: dict,
cfg: ConfigType,
rescale: bool = False) -> InstanceData:
"""Transform outputs for a single batch item into bbox predictions.
Args:
......@@ -951,7 +960,7 @@ class PGDHead(FCOSMono3DHead):
view = np.array(img_meta['cam2img'])
scale_factor = img_meta['scale_factor']
cfg = self.test_cfg if cfg is None else cfg
assert len(cls_scores) == len(bbox_preds) == len(mlvl_points)
assert len(cls_score_list) == len(bbox_pred_list) == len(mlvl_points)
mlvl_centers2d = []
mlvl_bboxes = []
mlvl_scores = []
......@@ -966,8 +975,9 @@ class PGDHead(FCOSMono3DHead):
for cls_score, bbox_pred, dir_cls_pred, depth_cls_pred, weight, \
attr_pred, centerness, points in zip(
cls_scores, bbox_preds, dir_cls_preds, depth_cls_preds,
weights, attr_preds, centernesses, mlvl_points):
cls_score_list, bbox_pred_list, dir_cls_pred_list,
depth_cls_pred_list, weight_list, attr_pred_list,
centerness_pred_list, mlvl_points):
assert cls_score.size()[-2:] == bbox_pred.size()[-2:]
scores = cls_score.permute(1, 2, 0).reshape(
-1, self.cls_out_channels).sigmoid()
......@@ -1018,9 +1028,9 @@ class PGDHead(FCOSMono3DHead):
bbox_pred3d[:, :2] = points - bbox_pred3d[:, :2]
if rescale:
bbox_pred3d[:, :2] /= bbox_pred3d[:, :2].new_tensor(
scale_factor)
scale_factor[0])
if self.pred_bbox2d:
bbox_pred2d /= bbox_pred2d.new_tensor(scale_factor)
bbox_pred2d /= bbox_pred2d.new_tensor(scale_factor[0])
if self.use_depth_classifier:
prob_depth_pred = self.bbox_coder.decode_prob_depth(
depth_cls_pred, self.depth_range, self.depth_unit,
......@@ -1106,13 +1116,21 @@ class PGDHead(FCOSMono3DHead):
results.attr_labels = attrs
if self.pred_bbox2d:
results_2d = InstanceData()
bboxes2d = nms_results[-1]
bboxes2d = torch.cat([bboxes2d, scores[:, None]], dim=1)
results.bboxes = bboxes2d
return results
def get_targets(self, points, batch_gt_instances_3d):
results_2d.bboxes = bboxes2d
results_2d.scores = scores
results_2d.labels = labels
return results, results_2d
else:
return results
def get_targets(
self,
points: List[Tensor],
batch_gt_instances_3d: InstanceList,
batch_gt_instances: InstanceList,
) -> Tuple[List[Tensor], List[Tensor], List[Tensor], List[Tensor]]:
"""Compute regression, classification and centerss targets for points
in multiple images.
......@@ -1120,9 +1138,10 @@ class PGDHead(FCOSMono3DHead):
points (list[Tensor]): Points of each fpn level, each has shape
(num_points, 2).
batch_gt_instances_3d (list[:obj:`InstanceData`]): Batch of
gt_instance_3d. It usually includes ``bboxes``、``labels``
、``bboxes_3d``、``labels_3d``、``depths``、``centers_2d`` and
attributes.
gt_instance_3d. It usually includes ``bboxes_3d``、
``labels_3d``、``depths``、``centers_2d`` and attributes.
batch_gt_instances (list[:obj:`InstanceData`]): Batch of
gt_instance. It usually includes ``bboxes``、``labels``.
Returns:
tuple:
......@@ -1146,14 +1165,17 @@ class PGDHead(FCOSMono3DHead):
if 'attr_labels' not in batch_gt_instances_3d[0]:
for gt_instances_3d in batch_gt_instances_3d:
gt_instances_3d.attr_labels = gt_instances_3d.labels.new_full(
gt_instances_3d.labels.shape, self.attr_background_label)
gt_instances_3d.attr_labels = \
gt_instances_3d.labels_3d.new_full(
gt_instances_3d.labels_3d.shape,
self.attr_background_label)
# get labels and bbox_targets of each image
_, bbox_targets_list, labels_3d_list, bbox_targets_3d_list, \
centerness_targets_list, attr_targets_list = multi_apply(
self._get_target_single,
batch_gt_instances_3d,
batch_gt_instances,
points=concat_points,
regress_ranges=concat_regress_ranges,
num_points_per_lvl=num_points)
......
# Copyright (c) OpenMMLab. All rights reserved.
from typing import List, Optional, Tuple, Union
from typing import List, Optional, Tuple
import torch
from mmcv.runner import force_fp32
from mmengine.config import ConfigDict
from mmengine.data import InstanceData
from torch import Tensor
from torch.nn import functional as F
from mmdet3d.core.utils import (ConfigType, InstanceList, OptConfigType,
OptInstanceList, OptMultiConfig)
from mmdet3d.registry import MODELS, TASK_UTILS
from mmdet.core import multi_apply
from mmdet.models.utils import gaussian_radius, gen_gaussian_target
......@@ -35,19 +35,20 @@ class SMOKEMono3DHead(AnchorFreeMono3DHead):
regression heatmap channels.
ori_channel (list[int]): indices of orientation offset pred in
regression heatmap channels.
bbox_coder (dict): Bbox coder for encoding and decoding boxes.
loss_cls (dict, optional): Config of classification loss.
bbox_coder (:obj:`ConfigDict` or dict): Bbox coder for encoding
and decoding boxes.
loss_cls (:obj:`ConfigDict` or dict): Config of classification loss.
Default: loss_cls=dict(type='GaussionFocalLoss', loss_weight=1.0).
loss_bbox (dict, optional): Config of localization loss.
loss_bbox (:obj:`ConfigDict` or dict): Config of localization loss.
Default: loss_bbox=dict(type='L1Loss', loss_weight=10.0).
loss_dir (dict, optional): Config of direction classification loss.
In SMOKE, Default: None.
loss_attr (dict, optional): Config of attribute classification loss.
In SMOKE, Default: None.
loss_centerness (dict): Config of centerness loss.
norm_cfg (dict): Dictionary to construct and config norm layer.
loss_dir (:obj:`ConfigDict` or dict, Optional): Config of direction
classification loss. In SMOKE, Default: None.
loss_attr (:obj:`ConfigDict` or dict, Optional): Config of attribute
classification loss. In SMOKE, Default: None.
norm_cfg (:obj:`ConfigDict` or dict): Dictionary to construct and config norm layer.
Default: norm_cfg=dict(type='GN', num_groups=32, requires_grad=True).
init_cfg (dict): Initialization config dict. Default: None.
init_cfg (:obj:`ConfigDict` or dict or list[:obj:`ConfigDict` or \
dict]): Initialization config dict. Defaults to None.
""" # noqa: E501
def __init__(self,
......@@ -55,15 +56,16 @@ class SMOKEMono3DHead(AnchorFreeMono3DHead):
in_channels: int,
dim_channel: List[int],
ori_channel: List[int],
bbox_coder: dict,
loss_cls: dict = dict(
type='GaussionFocalLoss', loss_weight=1.0),
loss_bbox: dict = dict(type='L1Loss', loss_weight=0.1),
loss_dir: Optional[dict] = None,
loss_attr: Optional[dict] = None,
norm_cfg: dict = dict(
bbox_coder: ConfigType,
loss_cls: ConfigType = dict(
type='mmdet.GaussionFocalLoss', loss_weight=1.0),
loss_bbox: ConfigType = dict(
type='mmdet.L1Loss', loss_weight=0.1),
loss_dir: OptConfigType = None,
loss_attr: OptConfigType = None,
norm_cfg: OptConfigType = dict(
type='GN', num_groups=32, requires_grad=True),
init_cfg: Optional[Union[ConfigDict, dict]] = None,
init_cfg: OptMultiConfig = None,
**kwargs) -> None:
super().__init__(
num_classes,
......@@ -79,11 +81,11 @@ class SMOKEMono3DHead(AnchorFreeMono3DHead):
self.ori_channel = ori_channel
self.bbox_coder = TASK_UTILS.build(bbox_coder)
def forward(self, feats: Tuple[Tensor]):
def forward(self, x: Tuple[Tensor]) -> Tuple[List[Tensor]]:
"""Forward features from the upstream network.
Args:
feats (tuple[Tensor]): Features from the upstream network, each is
x (tuple[Tensor]): Features from the upstream network, each is
a 4D-tensor.
Returns:
......@@ -95,9 +97,9 @@ class SMOKEMono3DHead(AnchorFreeMono3DHead):
level, each is a 4D-tensor, the channel number is
num_points * bbox_code_size.
"""
return multi_apply(self.forward_single, feats)
return multi_apply(self.forward_single, x)
def forward_single(self, x: Tensor) -> Union[Tensor, Tensor]:
def forward_single(self, x: Tensor) -> Tuple[Tensor, Tensor]:
"""Forward features of a single scale level.
Args:
......@@ -118,12 +120,11 @@ class SMOKEMono3DHead(AnchorFreeMono3DHead):
bbox_pred[:, self.ori_channel, ...] = F.normalize(vector_ori)
return cls_score, bbox_pred
@force_fp32(apply_to=('cls_scores', 'bbox_preds'))
def get_results(self,
cls_scores,
bbox_preds,
batch_img_metas,
rescale=None):
def predict_by_feat(self,
cls_scores: List[Tensor],
bbox_preds: List[Tensor],
batch_img_metas: Optional[List[dict]] = None,
rescale: bool = None) -> InstanceList:
"""Generate bboxes from bbox head predictions.
Args:
......@@ -134,8 +135,16 @@ class SMOKEMono3DHead(AnchorFreeMono3DHead):
rescale (bool): If True, return boxes in original image space.
Returns:
list[tuple[:obj:`CameraInstance3DBoxes`, Tensor, Tensor, None]]:
Each item in result_list is 4-tuple.
list[:obj:`InstanceData`]: 3D Detection results of each image
after the post process.
Each item usually contains following keys.
- scores_3d (Tensor): Classification scores, has a shape
(num_instance, )
- labels_3d (Tensor): Labels of bboxes, has a shape
(num_instances, ).
- bboxes_3d (Tensor): Contains a tensor with shape
(num_instances, 7).
"""
assert len(cls_scores) == len(bbox_preds) == 1
cam2imgs = torch.stack([
......@@ -146,7 +155,7 @@ class SMOKEMono3DHead(AnchorFreeMono3DHead):
cls_scores[0].new_tensor(img_meta['trans_mat'])
for img_meta in batch_img_metas
])
batch_bboxes, batch_scores, batch_topk_labels = self.decode_heatmap(
batch_bboxes, batch_scores, batch_topk_labels = self._decode_heatmap(
cls_scores[0],
bbox_preds[0],
batch_img_metas,
......@@ -183,14 +192,14 @@ class SMOKEMono3DHead(AnchorFreeMono3DHead):
return result_list
def decode_heatmap(self,
cls_score,
reg_pred,
batch_img_metas,
cam2imgs,
trans_mats,
topk=100,
kernel=3):
def _decode_heatmap(self,
cls_score: Tensor,
reg_pred: Tensor,
batch_img_metas: List[dict],
cam2imgs: Tensor,
trans_mats: Tensor,
topk: int = 100,
kernel: int = 3) -> Tuple[Tensor, Tensor, Tensor]:
"""Transform outputs into detections raw bbox predictions.
Args:
......@@ -212,6 +221,7 @@ class SMOKEMono3DHead(AnchorFreeMono3DHead):
Returns:
tuple[torch.Tensor]: Decoded output of SMOKEHead, containing
the following Tensors:
- batch_bboxes (Tensor): Coords of each 3D box.
shape (B, k, 7)
- batch_scores (Tensor): Scores of each 3D box.
......@@ -241,9 +251,10 @@ class SMOKEMono3DHead(AnchorFreeMono3DHead):
batch_bboxes = batch_bboxes.view(bs, -1, self.bbox_code_size)
return batch_bboxes, batch_scores, batch_topk_labels
def get_predictions(self, labels_3d, centers_2d, gt_locations,
gt_dimensions, gt_orientations, indices,
batch_img_metas, pred_reg):
def get_predictions(self, labels_3d: Tensor, centers_2d: Tensor,
gt_locations: Tensor, gt_dimensions: Tensor,
gt_orientations: Tensor, indices: Tensor,
batch_img_metas: List[dict], pred_reg: Tensor) -> dict:
"""Prepare predictions for computing loss.
Args:
......@@ -266,6 +277,7 @@ class SMOKEMono3DHead(AnchorFreeMono3DHead):
Returns:
dict: the dict has components below:
- bbox3d_yaws (:obj:`CameraInstance3DBoxes`):
bbox calculated using pred orientations.
- bbox3d_dims (:obj:`CameraInstance3DBoxes`):
......@@ -312,22 +324,26 @@ class SMOKEMono3DHead(AnchorFreeMono3DHead):
return pred_bboxes
def get_targets(self, batch_gt_instances_3d, feat_shape, batch_img_metas):
def get_targets(self, batch_gt_instances_3d: InstanceList,
batch_gt_instances: InstanceList, feat_shape: Tuple[int],
batch_img_metas: List[dict]) -> Tuple[Tensor, int, dict]:
"""Get training targets for batch images.
Args:
batch_gt_instances_3d (list[:obj:`InstanceData`]): Batch of
gt_instance_3d. It usually includes ``bboxes``、``labels``
、``bboxes_3d``、``labels_3d``、``depths``、``centers_2d`` and
attributes.
gt_instance_3d. It usually includes ``bboxes_3d``、
``labels_3d``、``depths``、``centers_2d`` and attributes.
batch_gt_instances (list[:obj:`InstanceData`]): Batch of
gt_instance. It usually includes ``bboxes``、``labels``.
feat_shape (tuple[int]): Feature map shape with value,
shape (B, _, H, W).
batch_img_metas (list[dict]): Meta information of each image, e.g.,
image size, scaling factor, etc.
Returns:
tuple[Tensor, dict]: The Tensor value is the targets of
tuple[Tensor, int, dict]: The Tensor value is the targets of
center heatmap, the dict has components below:
- gt_centers_2d (Tensor): Coords of each projected 3D box
center on image. shape (B * max_objs, 2)
- gt_labels_3d (Tensor): Labels of each 3D box.
......@@ -347,10 +363,10 @@ class SMOKEMono3DHead(AnchorFreeMono3DHead):
"""
gt_bboxes = [
gt_instances_3d.bboxes for gt_instances_3d in batch_gt_instances_3d
gt_instances.bboxes for gt_instances in batch_gt_instances
]
gt_labels = [
gt_instances_3d.labels for gt_instances_3d in batch_gt_instances_3d
gt_instances.labels for gt_instances in batch_gt_instances
]
gt_bboxes_3d = [
gt_instances_3d.bboxes_3d
......@@ -459,12 +475,14 @@ class SMOKEMono3DHead(AnchorFreeMono3DHead):
return center_heatmap_target, avg_factor, target_labels
def loss(self,
cls_scores,
bbox_preds,
batch_gt_instances_3d,
batch_img_metas,
batch_gt_instances_ignore=None):
def loss_by_feat(
self,
cls_scores: List[Tensor],
bbox_preds: List[Tensor],
batch_gt_instances_3d: InstanceList,
batch_gt_instances: InstanceList,
batch_img_metas: List[dict],
batch_gt_instances_ignore: OptInstanceList = None) -> dict:
"""Compute loss of the head.
Args:
......@@ -474,9 +492,10 @@ class SMOKEMono3DHead(AnchorFreeMono3DHead):
number is bbox_code_size.
shape (B, 7, H, W).
batch_gt_instances_3d (list[:obj:`InstanceData`]): Batch of
gt_instance_3d. It usually includes ``bboxes``、``labels``
、``bboxes_3d``、``labels_3d``、``depths``、``centers_2d`` and
attributes.
gt_instance_3d. It usually includes ``bboxes_3d``、
``labels_3d``、``depths``、``centers_2d`` and attributes.
batch_gt_instances (list[:obj:`InstanceData`]): Batch of
gt_instance. It usually includes ``bboxes``、``labels``.
batch_img_metas (list[dict]): Meta information of each image, e.g.,
image size, scaling factor, etc.
batch_gt_instances_ignore (list[:obj:`InstanceData`], Optional):
......@@ -485,15 +504,19 @@ class SMOKEMono3DHead(AnchorFreeMono3DHead):
Defaults to None.
Returns:
dict[str, Tensor]: A dictionary of loss components.
dict[str, Tensor]: A dictionary of loss components, which has
components below:
- loss_cls (Tensor): loss of cls heatmap.
- loss_bbox (Tensor): loss of bbox heatmap.
"""
assert len(cls_scores) == len(bbox_preds) == 1
assert batch_gt_instances_ignore is None
center_2d_heatmap = cls_scores[0]
pred_reg = bbox_preds[0]
center_2d_heatmap_target, avg_factor, target_labels = \
self.get_targets(batch_gt_instances_3d,
batch_gt_instances,
center_2d_heatmap.shape,
batch_img_metas)
......
# Copyright (c) OpenMMLab. All rights reserved.
from mmdet3d.core import ConfigType, OptConfigType, OptMultiConfig
from mmdet3d.registry import MODELS
from .single_stage_mono3d import SingleStageMono3DDetector
......@@ -9,14 +10,36 @@ class FCOSMono3D(SingleStageMono3DDetector):
Currently please refer to our entry on the
`leaderboard <https://www.nuscenes.org/object-detection?externalData=all&mapData=all&modalities=Camera>`_.
Args:
backbone (:obj:`ConfigDict` or dict): The backbone config.
neck (:obj:`ConfigDict` or dict): The neck config.
bbox_head (:obj:`ConfigDict` or dict): The bbox head config.
train_cfg (:obj:`ConfigDict` or dict, optional): The training config
of FCOS. Defaults to None.
test_cfg (:obj:`ConfigDict` or dict, optional): The testing config
of FCOS. Defaults to None.
data_preprocessor (:obj:`ConfigDict` or dict, optional): Config of
:class:`DetDataPreprocessor` to process the input data.
Defaults to None.
init_cfg (:obj:`ConfigDict` or list[:obj:`ConfigDict`] or dict or
list[dict], optional): Initialization config dict.
Defaults to None.
""" # noqa: E501
def __init__(self,
backbone,
neck,
bbox_head,
train_cfg=None,
test_cfg=None,
pretrained=None):
super(FCOSMono3D, self).__init__(backbone, neck, bbox_head, train_cfg,
test_cfg, pretrained)
backbone: ConfigType,
neck: ConfigType,
bbox_head: ConfigType,
train_cfg: OptConfigType = None,
test_cfg: OptConfigType = None,
data_preprocessor: OptConfigType = None,
init_cfg: OptMultiConfig = None) -> None:
super().__init__(
backbone=backbone,
neck=neck,
bbox_head=bbox_head,
train_cfg=train_cfg,
test_cfg=test_cfg,
data_preprocessor=data_preprocessor,
init_cfg=init_cfg)
# Copyright (c) OpenMMLab. All rights reserved.
from os import path as osp
from typing import Tuple
import mmcv
import numpy as np
import torch
from mmcv.parallel import DataContainer as DC
from torch import Tensor
from mmdet3d.core import (CameraInstance3DBoxes, bbox3d2result,
show_multi_modality_result)
from mmdet3d.core import Det3DDataSample, InstanceList
from mmdet3d.core.utils import SampleList
from mmdet3d.registry import MODELS
from mmdet.models.detectors.single_stage import SingleStageDetector
......@@ -16,212 +13,61 @@ from mmdet.models.detectors.single_stage import SingleStageDetector
class SingleStageMono3DDetector(SingleStageDetector):
"""Base class for monocular 3D single-stage detectors.
Single-stage detectors directly and densely predict bounding boxes on the
output features of the backbone+neck.
Monocular 3D single-stage detectors directly and densely predict bounding
boxes on the output features of the backbone+neck.
"""
def extract_feats(self, imgs):
"""Directly extract features from the backbone+neck."""
assert isinstance(imgs, list)
return [self.extract_feat(img) for img in imgs]
def forward_train(self,
img,
img_metas,
gt_bboxes,
gt_labels,
gt_bboxes_3d,
gt_labels_3d,
centers2d,
depths,
attr_labels=None,
gt_bboxes_ignore=None):
"""
def convert_to_datasample(self, results_list: InstanceList) -> SampleList:
""" Convert results list to `Det3DDataSample`.
Args:
img (Tensor): Input images of shape (N, C, H, W).
Typically these should be mean centered and std scaled.
img_metas (list[dict]): A List of image info dict where each dict
has: 'img_shape', 'scale_factor', 'flip', and may also contain
'filename', 'ori_shape', 'pad_shape', and 'img_norm_cfg'.
For details on the values of these keys see
:class:`mmdet.datasets.pipelines.Collect`.
gt_bboxes (list[Tensor]): Each item are the truth boxes for each
image in [tl_x, tl_y, br_x, br_y] format.
gt_labels (list[Tensor]): Class indices corresponding to each box
gt_bboxes_3d (list[Tensor]): Each item are the 3D truth boxes for
each image in [x, y, z, x_size, y_size, z_size, yaw, vx, vy]
format.
gt_labels_3d (list[Tensor]): 3D class indices corresponding to
each box.
centers2d (list[Tensor]): Projected 3D centers onto 2D images.
depths (list[Tensor]): Depth of projected centers on 2D images.
attr_labels (list[Tensor], optional): Attribute indices
corresponding to each box
gt_bboxes_ignore (list[Tensor]): Specify which bounding
boxes can be ignored when computing the loss.
results_list (list[:obj:`InstanceData`]):Detection results
of each image. For each image, it could contains two results
format:
1. pred_instances_3d
2. (pred_instances_3d, pred_instances)
Returns:
dict[str, Tensor]: A dictionary of loss components.
"""
x = self.extract_feat(img)
losses = self.bbox_head.forward_train(x, img_metas, gt_bboxes,
gt_labels, gt_bboxes_3d,
gt_labels_3d, centers2d, depths,
attr_labels, gt_bboxes_ignore)
return losses
list[:obj:`Det3DDataSample`]: 3D Detection results of the
input images. Each Det3DDataSample usually contain
'pred_instances_3d'. And the ``pred_instances_3d`` usually
contains following keys.
- scores_3d (Tensor): Classification scores, has a shape
(num_instance, )
- labels_3d (Tensor): Labels of bboxes, has a shape
(num_instances, ).
- bboxes_3d (Tensor): Contains a tensor with shape
(num_instances, C) where C >=7.
"""
out_results_list = []
for i in range(len(results_list)):
result = Det3DDataSample()
if len(results_list[i]) == 2:
result.pred_instances_3d = results_list[i][0]
result.pred_instances = results_list[i][1]
else:
result.pred_instances_3d = results_list[i]
out_results_list.append(result)
return out_results_list
def simple_test(self, img, img_metas, rescale=False):
"""Test function without test time augmentation.
def extract_feat(self, batch_inputs_dict: dict) -> Tuple[Tensor]:
"""Extract features.
Args:
imgs (list[torch.Tensor]): List of multiple images
img_metas (list[dict]): List of image information.
rescale (bool, optional): Whether to rescale the results.
Defaults to False.
batch_inputs_dict (dict): Contains 'img' key
with image tensor with shape (N, C, H ,W).
Returns:
list[list[np.ndarray]]: BBox results of each image and classes.
The outer list corresponds to each image. The inner list
corresponds to each class.
tuple[Tensor]: Multi-level features that may have
different resolutions.
"""
x = self.extract_feat(img)
outs = self.bbox_head(x)
bbox_outputs = self.bbox_head.get_bboxes(
*outs, img_metas, rescale=rescale)
if self.bbox_head.pred_bbox2d:
from mmdet.core import bbox2result
bbox2d_img = [
bbox2result(bboxes2d, labels, self.bbox_head.num_classes)
for bboxes, scores, labels, attrs, bboxes2d in bbox_outputs
]
bbox_outputs = [bbox_outputs[0][:-1]]
bbox_img = [
bbox3d2result(bboxes, scores, labels, attrs)
for bboxes, scores, labels, attrs in bbox_outputs
]
bbox_list = [dict() for i in range(len(img_metas))]
for result_dict, img_bbox in zip(bbox_list, bbox_img):
result_dict['img_bbox'] = img_bbox
if self.bbox_head.pred_bbox2d:
for result_dict, img_bbox2d in zip(bbox_list, bbox2d_img):
result_dict['img_bbox2d'] = img_bbox2d
return bbox_list
batch_imgs = batch_inputs_dict['imgs']
x = self.backbone(batch_imgs)
if self.with_neck:
x = self.neck(x)
return x
# TODO: Support test time augmentation
def aug_test(self, imgs, img_metas, rescale=False):
"""Test function with test time augmentation."""
feats = self.extract_feats(imgs)
# only support aug_test for one sample
outs_list = [self.bbox_head(x) for x in feats]
for i, img_meta in enumerate(img_metas):
if img_meta[0]['pcd_horizontal_flip']:
for j in range(len(outs_list[i])): # for each prediction
if outs_list[i][j][0] is None:
continue
for k in range(len(outs_list[i][j])):
# every stride of featmap
outs_list[i][j][k] = torch.flip(
outs_list[i][j][k], dims=[3])
reg = outs_list[i][1]
for reg_feat in reg:
# offset_x
reg_feat[:, 0, :, :] = 1 - reg_feat[:, 0, :, :]
# velo_x
if self.bbox_head.pred_velo:
reg_feat[:, 7, :, :] = -reg_feat[:, 7, :, :]
# rotation
reg_feat[:, 6, :, :] = -reg_feat[:, 6, :, :] + np.pi
merged_outs = []
for i in range(len(outs_list[0])): # for each prediction
merged_feats = []
for j in range(len(outs_list[0][i])):
if outs_list[0][i][0] is None:
merged_feats.append(None)
continue
# for each stride of featmap
avg_feats = torch.mean(
torch.cat([x[i][j] for x in outs_list]),
dim=0,
keepdim=True)
if i == 1: # regression predictions
# rot/velo/2d det keeps the original
avg_feats[:, 6:, :, :] = \
outs_list[0][i][j][:, 6:, :, :]
if i == 2:
# dir_cls keeps the original
avg_feats = outs_list[0][i][j]
merged_feats.append(avg_feats)
merged_outs.append(merged_feats)
merged_outs = tuple(merged_outs)
bbox_outputs = self.bbox_head.get_bboxes(
*merged_outs, img_metas[0], rescale=rescale)
if self.bbox_head.pred_bbox2d:
from mmdet.core import bbox2result
bbox2d_img = [
bbox2result(bboxes2d, labels, self.bbox_head.num_classes)
for bboxes, scores, labels, attrs, bboxes2d in bbox_outputs
]
bbox_outputs = [bbox_outputs[0][:-1]]
bbox_img = [
bbox3d2result(bboxes, scores, labels, attrs)
for bboxes, scores, labels, attrs in bbox_outputs
]
bbox_list = dict()
bbox_list.update(img_bbox=bbox_img[0])
if self.bbox_head.pred_bbox2d:
bbox_list.update(img_bbox2d=bbox2d_img[0])
return [bbox_list]
def show_results(self, data, result, out_dir, show=False, score_thr=None):
"""Results visualization.
Args:
data (list[dict]): Input images and the information of the sample.
result (list[dict]): Prediction results.
out_dir (str): Output directory of visualization result.
show (bool, optional): Determines whether you are
going to show result by open3d.
Defaults to False.
TODO: implement score_thr of single_stage_mono3d.
score_thr (float, optional): Score threshold of bounding boxes.
Default to None.
Not implemented yet, but it is here for unification.
"""
for batch_id in range(len(result)):
if isinstance(data['img_metas'][0], DC):
img_filename = data['img_metas'][0]._data[0][batch_id][
'filename']
cam2img = data['img_metas'][0]._data[0][batch_id]['cam2img']
elif mmcv.is_list_of(data['img_metas'][0], dict):
img_filename = data['img_metas'][0][batch_id]['filename']
cam2img = data['img_metas'][0][batch_id]['cam2img']
else:
ValueError(
f"Unsupported data type {type(data['img_metas'][0])} "
f'for visualization!')
img = mmcv.imread(img_filename)
file_name = osp.split(img_filename)[-1].split('.')[0]
assert out_dir is not None, 'Expect out_dir, got none.'
pred_bboxes = result[batch_id]['img_bbox']['boxes_3d']
assert isinstance(pred_bboxes, CameraInstance3DBoxes), \
f'unsupported predicted bbox type {type(pred_bboxes)}'
show_multi_modality_result(
img,
None,
pred_bboxes,
cam2img,
out_dir,
file_name,
'camera',
show=show)
pass
# Copyright (c) OpenMMLab. All rights reserved.
from mmdet3d.core import ConfigType, OptConfigType, OptMultiConfig
from mmdet3d.registry import MODELS
from .single_stage_mono3d import SingleStageMono3DDetector
......@@ -8,14 +9,35 @@ class SMOKEMono3D(SingleStageMono3DDetector):
r"""SMOKE <https://arxiv.org/abs/2002.10111>`_ for monocular 3D object
detection.
Args:
backbone (:obj:`ConfigDict` or dict): The backbone config.
neck (:obj:`ConfigDict` or dict): The neck config.
bbox_head (:obj:`ConfigDict` or dict): The bbox head config.
train_cfg (:obj:`ConfigDict` or dict, optional): The training config
of FCOS. Defaults to None.
test_cfg (:obj:`ConfigDict` or dict, optional): The testing config
of FCOS. Defaults to None.
data_preprocessor (:obj:`ConfigDict` or dict, optional): Config of
:class:`DetDataPreprocessor` to process the input data.
Defaults to None.
init_cfg (:obj:`ConfigDict` or list[:obj:`ConfigDict`] or dict or
list[dict], optional): Initialization config dict.
Defaults to None.
"""
def __init__(self,
backbone,
neck,
bbox_head,
train_cfg=None,
test_cfg=None,
pretrained=None):
super(SMOKEMono3D, self).__init__(backbone, neck, bbox_head, train_cfg,
test_cfg, pretrained)
backbone: ConfigType,
neck: ConfigType,
bbox_head: ConfigType,
train_cfg: OptConfigType = None,
test_cfg: OptConfigType = None,
data_preprocessor: OptConfigType = None,
init_cfg: OptMultiConfig = None) -> None:
super().__init__(
backbone=backbone,
neck=neck,
bbox_head=bbox_head,
train_cfg=train_cfg,
test_cfg=test_cfg,
data_preprocessor=data_preprocessor,
init_cfg=init_cfg)
......@@ -85,6 +85,10 @@ def test_getitem():
assert isinstance(ann_info['gt_bboxes_3d'], LiDARInstance3DBoxes)
assert torch.allclose(ann_info['gt_bboxes_3d'].tensor.sum(),
torch.tensor(7.2650))
assert 'centers_2d' in ann_info
assert ann_info['centers_2d'].dtype == np.float64
assert 'depths' in ann_info
assert ann_info['depths'].dtype == np.float64
assert 'group_id' in ann_info
assert ann_info['group_id'].dtype == np.int64
assert 'occluded' in ann_info
......
......@@ -45,8 +45,8 @@ def test_getitem():
_generate_nus_dataset_config()
nus_dataset = NuScenesDataset(
data_root,
ann_file,
data_root=data_root,
ann_file=ann_file,
data_prefix=data_prefix,
pipeline=pipeline,
metainfo=dict(CLASSES=classes),
......
......@@ -4,6 +4,7 @@ import numpy as np
from mmdet3d.core import LiDARInstance3DBoxes
# create a dummy `results` to test the pipeline
from mmdet3d.datasets import LoadAnnotations3D, LoadPointsFromFile
from mmdet3d.datasets.pipelines.loading import LoadImageFromFileMono3D
def create_dummy_data_info(with_ann=True):
......@@ -20,6 +21,10 @@ def create_dummy_data_info(with_ann=True):
-1.5808]])),
'gt_labels_3d':
np.array([1]),
'centers_2d':
np.array([[765.04, 214.56]]),
'depths':
np.array([8.410]),
'num_lidar_pts':
np.array([377]),
'difficulty':
......@@ -134,6 +139,9 @@ def create_dummy_data_info(with_ann=True):
],
'bbox_label_3d':
-1,
'center_2d': [765.04, 214.56],
'depth':
8.410,
'num_lidar_pts':
377,
'difficulty':
......@@ -168,3 +176,17 @@ def create_data_info_after_loading():
data_info = load_points_transform(data_info)
data_info_after_loading = load_anns_transform(data_info)
return data_info_after_loading
def create_mono3d_data_info_after_loading():
load_anns_transform = LoadAnnotations3D(
with_bbox=True,
with_label=True,
with_bbox_3d=True,
with_label_3d=True,
with_bbox_depth=True)
load_img_transform = LoadImageFromFileMono3D()
data_info = create_dummy_data_info()
data_info = load_img_transform(data_info)
data_info_after_loading = load_anns_transform(data_info)
return data_info_after_loading
......@@ -117,6 +117,7 @@ class TestFCOSMono3DHead(TestCase):
# When truth is non-empty then all losses
# should be nonzero for random inputs
gt_instances_3d = InstanceData()
gt_instances = InstanceData()
gt_bboxes = torch.rand([3, 4], dtype=torch.float32)
gt_bboxes_3d = CameraInstance3DBoxes(torch.rand([3, 9]), box_dim=9)
......@@ -129,14 +130,14 @@ class TestFCOSMono3DHead(TestCase):
gt_instances_3d.bboxes_3d = gt_bboxes_3d
gt_instances_3d.labels_3d = gt_labels_3d
gt_instances_3d.bboxes = gt_bboxes
gt_instances_3d.labels = gt_labels
gt_instances.bboxes = gt_bboxes
gt_instances.labels = gt_labels
gt_instances_3d.centers_2d = centers_2d
gt_instances_3d.depths = depths
gt_instances_3d.attr_labels = attr_labels
gt_losses = fcos_mono3d_head.loss(*ret_dict, [gt_instances_3d],
img_metas)
gt_losses = fcos_mono3d_head.loss_by_feat(*ret_dict, [gt_instances_3d],
[gt_instances], img_metas)
gt_cls_loss = gt_losses['loss_cls'].item()
gt_siz_loss = gt_losses['loss_size'].item()
......@@ -160,7 +161,7 @@ class TestFCOSMono3DHead(TestCase):
self.assertGreater(gt_atr_loss, 0, 'attribue loss should be positive')
# test get_results
results_list = fcos_mono3d_head.get_results(*ret_dict, img_metas)
results_list = fcos_mono3d_head.predict_by_feat(*ret_dict, img_metas)
self.assertEqual(
len(results_list), 1,
'there should be no centerness loss when there are no true boxes')
......
......@@ -142,6 +142,7 @@ class TestFGDHead(TestCase):
# When truth is non-empty then all losses
# should be nonzero for random inputs
gt_instances_3d = InstanceData()
gt_instances = InstanceData()
gt_bboxes = torch.rand([3, 4], dtype=torch.float32)
gt_bboxes_3d = CameraInstance3DBoxes(torch.rand([3, 7]), box_dim=7)
......@@ -152,12 +153,13 @@ class TestFGDHead(TestCase):
gt_instances_3d.bboxes_3d = gt_bboxes_3d
gt_instances_3d.labels_3d = gt_labels_3d
gt_instances_3d.bboxes = gt_bboxes
gt_instances_3d.labels = gt_labels
gt_instances.bboxes = gt_bboxes
gt_instances.labels = gt_labels
gt_instances_3d.centers_2d = centers_2d
gt_instances_3d.depths = depths
gt_losses = pgd_head.loss(*ret_dict, [gt_instances_3d], img_metas)
gt_losses = pgd_head.loss_by_feat(*ret_dict, [gt_instances_3d],
[gt_instances], img_metas)
gt_cls_loss = gt_losses['loss_cls'].item()
gt_siz_loss = gt_losses['loss_size'].item()
......@@ -184,15 +186,15 @@ class TestFGDHead(TestCase):
'consistency loss should be positive')
# test get_results
results_list = pgd_head.get_results(*ret_dict, img_metas)
results_list = pgd_head.predict_by_feat(*ret_dict, img_metas)
self.assertEqual(
len(results_list), 1,
'there should be no centerness loss when there are no true boxes')
results = results_list[0]
results, results_2d = results_list[0]
pred_bboxes_3d = results.bboxes_3d
pred_scores_3d = results.scores_3d
pred_labels_3d = results.labels_3d
pred_bboxes_2d = results.bboxes
pred_bboxes_2d = results_2d.bboxes
self.assertEqual(pred_bboxes_3d.tensor.shape, torch.Size([20, 7]),
'the shape of predicted 3d bboxes should be [20, 7]')
self.assertEqual(
......@@ -202,6 +204,6 @@ class TestFGDHead(TestCase):
pred_labels_3d.shape, torch.Size([20]),
'the shape of predicted 3d bbox labels should be [20]')
self.assertEqual(
pred_bboxes_2d.shape, torch.Size([20, 5]),
'the shape of predicted 2d bbox attribute labels should be [20, 5]'
pred_bboxes_2d.shape, torch.Size([20, 4]),
'the shape of predicted 2d bbox attribute labels should be [20, 4]'
)
......@@ -82,6 +82,7 @@ class TestSMOKEMono3DHead(TestCase):
# When truth is non-empty then all losses
# should be nonzero for random inputs
gt_instances_3d = InstanceData()
gt_instances = InstanceData()
gt_bboxes = torch.Tensor([[1.0, 2.0, 20.0, 40.0],
[45.0, 50.0, 80.0, 70.1],
......@@ -94,13 +95,14 @@ class TestSMOKEMono3DHead(TestCase):
gt_instances_3d.bboxes_3d = gt_bboxes_3d
gt_instances_3d.labels_3d = gt_labels_3d
gt_instances_3d.bboxes = gt_bboxes
gt_instances_3d.labels = gt_labels
gt_instances.bboxes = gt_bboxes
gt_instances.labels = gt_labels
gt_instances_3d.centers_2d = centers_2d
gt_instances_3d.depths = depths
gt_losses = smoke_mono3d_head.loss(*ret_dict, [gt_instances_3d],
img_metas)
gt_losses = smoke_mono3d_head.loss_by_feat(*ret_dict,
[gt_instances_3d],
[gt_instances], img_metas)
gt_cls_loss = gt_losses['loss_cls'].item()
gt_box_loss = gt_losses['loss_bbox'].item()
......@@ -109,7 +111,7 @@ class TestSMOKEMono3DHead(TestCase):
self.assertGreater(gt_box_loss, 0, 'bbox loss should be positive')
# test get_results
results_list = smoke_mono3d_head.get_results(*ret_dict, img_metas)
results_list = smoke_mono3d_head.predict_by_feat(*ret_dict, img_metas)
self.assertEqual(
len(results_list), 1, 'there should be one image results')
results = results_list[0]
......
......@@ -14,7 +14,10 @@ from os import path as osp
import mmcv
import numpy as np
from nuscenes.nuscenes import NuScenes
from mmdet3d.core.bbox import points_cam2img
from mmdet3d.datasets.convert_utils import get_2d_boxes
from mmdet3d.datasets.utils import convert_quaternion_to_matrix
......@@ -60,6 +63,19 @@ def get_empty_instance():
return instance
def get_empty_multicamera_instances():
cam_instance = dict(
CAM_FONT=None,
CAM_FRONT_RIGHT=None,
CAM_FRONT_LEFT=None,
CAM_BACK=None,
CAM_BACK_RIGHT=None,
CAM_BACK_LEFT=None)
return cam_instance
def get_empty_lidar_points():
lidar_points = dict(
# (int, optional) : Number of features for each point.
......@@ -206,6 +222,32 @@ def clear_data_info_unused_keys(data_info):
return data_info, empty_flag
def generate_camera_instances(info, nusc):
# get bbox annotations for camera
camera_types = [
'CAM_FRONT',
'CAM_FRONT_RIGHT',
'CAM_FRONT_LEFT',
'CAM_BACK',
'CAM_BACK_LEFT',
'CAM_BACK_RIGHT',
]
empty_multicamera_instance = get_empty_multicamera_instances()
for cam in camera_types:
cam_info = info['cams'][cam]
# list[dict]
ann_infos = get_2d_boxes(
nusc,
cam_info['sample_data_token'],
visibilities=['', '1', '2', '3', '4'])
empty_multicamera_instance[cam] = ann_infos
return empty_multicamera_instance
def update_nuscenes_infos(pkl_path, out_dir):
print(f'{pkl_path} will be modified.')
if out_dir in pkl_path:
......@@ -222,6 +264,11 @@ def update_nuscenes_infos(pkl_path, out_dir):
'version':
data_list['metadata']['version']
}
nusc = NuScenes(
version=data_list['metadata']['version'],
dataroot='./data/nuscenes',
verbose=True)
print('Start updating:')
converted_list = []
for i, ori_info_dict in enumerate(
......@@ -304,6 +351,8 @@ def update_nuscenes_infos(pkl_path, out_dir):
empty_instance['bbox_3d_isvalid'] = ori_info_dict['valid_flag'][i]
empty_instance = clear_instance_unused_keys(empty_instance)
temp_data_info['instances'].append(empty_instance)
temp_data_info['cam_instances'] = generate_camera_instances(
ori_info_dict, nusc)
temp_data_info, _ = clear_data_info_unused_keys(temp_data_info)
converted_list.append(temp_data_info)
pkl_name = pkl_path.split('/')[-1]
......@@ -313,7 +362,6 @@ def update_nuscenes_infos(pkl_path, out_dir):
converted_data_info = dict(metainfo=METAINFO, data_list=converted_list)
mmcv.dump(converted_data_info, out_path, 'pkl')
return temp_lidar_sweep
def update_kitti_infos(pkl_path, out_dir):
......@@ -382,6 +430,7 @@ def update_kitti_infos(pkl_path, out_dir):
anns = ori_info_dict['annos']
num_instances = len(anns['name'])
cam2img = ori_info_dict['calib']['P2']
ignore_class_name = set()
instance_list = []
......@@ -401,6 +450,17 @@ def update_kitti_infos(pkl_path, out_dir):
loc = anns['location'][instance_id]
dims = anns['dimensions'][instance_id]
rots = anns['rotation_y'][:, None][instance_id]
dst = np.array([0.5, 0.5, 0.5])
src = np.array([0.5, 1.0, 0.5])
center_3d = loc + dims * (dst - src)
center_2d = points_cam2img(
center_3d.reshape([1, 3]), cam2img, with_depth=True)
center_2d = center_2d.squeeze().tolist()
empty_instance['center_2d'] = center_2d[:2]
empty_instance['depth'] = center_2d[2]
gt_bboxes_3d = np.concatenate([loc, dims, rots]).tolist()
empty_instance['bbox_3d'] = gt_bboxes_3d
empty_instance['bbox_label_3d'] = copy.deepcopy(
......@@ -734,7 +794,6 @@ def parse_args():
type=str,
default='./data/kitti/kitti_infos_train.pkl ',
help='specify the root dir of dataset')
parser.add_argument(
'--out-dir',
type=str,
......
Markdown is supported
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment