"torchvision/vscode:/vscode.git/clone" did not exist on "1ae38297fa127ebe13157fb293c6a1367b420433"
Commit b496f579 authored by ZCMax's avatar ZCMax Committed by ChaimZhu
Browse files

[Refactor] Refactor Mono3D models

parent 35667791
# Copyright (c) OpenMMLab. All rights reserved. # Copyright (c) OpenMMLab. All rights reserved.
import warnings
from abc import ABCMeta, abstractmethod from abc import ABCMeta, abstractmethod
from typing import List, Optional from typing import Optional, Tuple
from mmcv.runner import BaseModule from mmcv.runner import BaseModule
from mmengine.config import ConfigDict from mmengine.config import ConfigDict
from torch import Tensor from torch import Tensor
from mmdet3d.core import Det3DDataSample from mmdet3d.core.utils import InstanceList, OptMultiConfig, SampleList
class BaseMono3DDenseHead(BaseModule, metaclass=ABCMeta): class BaseMono3DDenseHead(BaseModule, metaclass=ABCMeta):
"""Base class for Monocular 3D DenseHeads.""" """Base class for Monocular 3D DenseHeads.
def __init__(self, init_cfg: Optional[dict] = None) -> None: 1. The ``loss`` method is used to calculate the loss of densehead,
super(BaseMono3DDenseHead, self).__init__(init_cfg=init_cfg) which includes two steps: (1) the densehead model performs forward
propagation to obtain the feature maps (2) The ``loss_by_feat`` method
is called based on the feature maps to calculate the loss.
@abstractmethod .. code:: text
def loss(self, **kwargs):
"""Compute losses of the head."""
pass
def get_bboxes(self, *args, **kwargs): loss(): forward() -> loss_by_feat()
warnings.warn('`get_bboxes` is deprecated and will be removed in '
'the future. Please use `get_results` instead.')
return self.get_results(*args, **kwargs)
@abstractmethod 2. The ``predict`` method is used to predict detection results,
def get_results(self, *args, **kwargs): which includes two steps: (1) the densehead model performs forward
"""Transform network outputs of a batch into 3D bbox results.""" propagation to obtain the feature maps (2) The ``predict_by_feat`` method
pass is called based on the feature maps to predict detection results including
post-processing.
def forward_train(self, .. code:: text
x: List[Tensor],
batch_data_samples: List[Det3DDataSample], predict(): forward() -> predict_by_feat()
proposal_cfg: Optional[ConfigDict] = None,
**kwargs): 3. The ``loss_and_predict`` method is used to return loss and detection
results at the same time. It will call densehead's ``forward``,
``loss_by_feat`` and ``predict_by_feat`` methods in order. If one-stage is
used as RPN, the densehead needs to return both losses and predictions.
This predictions is used as the proposal of roihead.
.. code:: text
loss_and_predict(): forward() -> loss_by_feat() -> predict_by_feat()
"""
def __init__(self, init_cfg: OptMultiConfig = None) -> None:
super(BaseMono3DDenseHead, self).__init__(init_cfg=init_cfg)
def loss(self, x: Tuple[Tensor], batch_data_samples: SampleList,
**kwargs) -> dict:
""" """
Args: Args:
x (list[Tensor]): Features from FPN. x (list[Tensor]): Features from FPN.
batch_data_samples (list[:obj:`Det3DDataSample`]): Each item batch_data_samples (list[:obj:`Det3DDataSample`]): Each item
contains the meta information of each image and corresponding contains the meta information of each image and corresponding
annotations. annotations.
proposal_cfg (mmengine.Config, optional): Test / postprocessing
configuration, if None, test_cfg would be used.
Defaults to None.
Returns: Returns:
tuple or Tensor: When `proposal_cfg` is None, the detector is a \ tuple or Tensor: When `proposal_cfg` is None, the detector is a \
...@@ -73,26 +81,105 @@ class BaseMono3DDenseHead(BaseModule, metaclass=ABCMeta): ...@@ -73,26 +81,105 @@ class BaseMono3DDenseHead(BaseModule, metaclass=ABCMeta):
outs = self(x) outs = self(x)
batch_gt_instances_3d = [] batch_gt_instances_3d = []
batch_gt_instances = []
batch_gt_instances_ignore = [] batch_gt_instances_ignore = []
batch_img_metas = [] batch_img_metas = []
for data_sample in batch_data_samples: for data_sample in batch_data_samples:
batch_img_metas.append(data_sample.metainfo) batch_img_metas.append(data_sample.metainfo)
batch_gt_instances_3d.append(data_sample.gt_instances_3d) batch_gt_instances_3d.append(data_sample.gt_instances_3d)
if 'ignored_instances' in data_sample: batch_gt_instances.append(data_sample.gt_instances)
batch_gt_instances_ignore.append(data_sample.ignored_instances) batch_gt_instances_ignore.append(
else: data_sample.get('ignored_instances', None))
batch_gt_instances_ignore.append(None)
loss_inputs = outs + (batch_gt_instances_3d, batch_img_metas, loss_inputs = outs + (batch_gt_instances_3d, batch_gt_instances,
batch_gt_instances_ignore) batch_img_metas, batch_gt_instances_ignore)
losses = self.loss(*loss_inputs) losses = self.loss_by_feat(*loss_inputs)
if proposal_cfg is None:
return losses return losses
else:
@abstractmethod
def loss_by_feat(self, **kwargs) -> dict:
"""Calculate the loss based on the features extracted by the detection
head."""
pass
def loss_and_predict(self,
x: Tuple[Tensor],
batch_data_samples: SampleList,
proposal_cfg: Optional[ConfigDict] = None,
**kwargs) -> Tuple[dict, InstanceList]:
"""Perform forward propagation of the head, then calculate loss and
predictions from the features and data samples.
Args:
x (tuple[Tensor]): Features from FPN.
batch_data_samples (list[:obj:`Det3DDataSample`]): Each item
contains the meta information of each image and
corresponding annotations.
proposal_cfg (ConfigDict, optional): Test / postprocessing
configuration, if None, test_cfg would be used.
Defaults to None.
Returns:
tuple: the return value is a tuple contains:
- losses: (dict[str, Tensor]): A dictionary of loss components.
- predictions (list[:obj:`InstanceData`]): Detection
results of each image after the post process.
"""
batch_gt_instances_3d = []
batch_gt_instances = []
batch_gt_instances_ignore = []
batch_img_metas = []
for data_sample in batch_data_samples:
batch_img_metas.append(data_sample.metainfo)
batch_gt_instances_3d.append(data_sample.gt_instances_3d)
batch_gt_instances.append(data_sample.gt_instances)
batch_gt_instances_ignore.append(
data_sample.get('ignored_instances', None))
outs = self(x)
loss_inputs = outs + (batch_gt_instances_3d, batch_gt_instances,
batch_img_metas, batch_gt_instances_ignore)
losses = self.loss_by_feat(*loss_inputs)
predictions = self.predict_by_feat(
*outs, batch_img_metas=batch_img_metas, cfg=proposal_cfg)
return losses, predictions
def predict(self,
x: Tuple[Tensor],
batch_data_samples: SampleList,
rescale: bool = False) -> InstanceList:
"""Perform forward propagation of the detection head and predict
detection results on the features of the upstream network.
Args:
x (tuple[Tensor]): Multi-level features from the
upstream network, each is a 4D-tensor.
batch_data_samples (List[:obj:`Det3DDataSample`]): The Data
Samples. It usually includes information such as
`gt_instance_3d`, `gt_pts_panoptic_seg` and `gt_pts_sem_seg`.
rescale (bool, optional): Whether to rescale the results.
Defaults to False.
Returns:
list[obj:`InstanceData`]: Detection results of each image
after the post process.
"""
batch_img_metas = [ batch_img_metas = [
data_sample.metainfo for data_sample in batch_data_samples data_samples.metainfo for data_samples in batch_data_samples
] ]
results_list = self.get_results( outs = self(x)
*outs, batch_img_metas=batch_img_metas, cfg=proposal_cfg) predictions = self.predict_by_feat(
return losses, results_list *outs, batch_img_metas=batch_img_metas, rescale=rescale)
return predictions
@abstractmethod
def predict_by_feat(self, **kwargs) -> InstanceList:
"""Transform a batch of output features extracted from the head into
bbox results."""
pass
...@@ -3,7 +3,6 @@ from typing import List, Optional, Tuple, Union ...@@ -3,7 +3,6 @@ from typing import List, Optional, Tuple, Union
import torch import torch
from mmcv.cnn import xavier_init from mmcv.cnn import xavier_init
from mmcv.runner import force_fp32
from mmengine.config import ConfigDict from mmengine.config import ConfigDict
from mmengine.data import InstanceData from mmengine.data import InstanceData
from torch import Tensor from torch import Tensor
...@@ -197,40 +196,9 @@ class MonoFlexHead(AnchorFreeMono3DHead): ...@@ -197,40 +196,9 @@ class MonoFlexHead(AnchorFreeMono3DHead):
if self.use_edge_fusion: if self.use_edge_fusion:
self._init_edge_module() self._init_edge_module()
def forward_train(self, def loss(self, x: List[Tensor], batch_data_samples: List[Det3DDataSample],
x: List[Tensor],
batch_data_samples: List[Det3DDataSample],
proposal_cfg: Optional[ConfigDict] = None,
**kwargs): **kwargs):
""" """
Args:
x (list[Tensor]): Features from FPN.
batch_img_metas (list[dict]): Meta information of each image, e.g.,
image size, scaling factor, etc.
gt_bboxes (list[Tensor]): Ground truth bboxes of the image,
shape (num_gts, 4).
gt_labels (list[Tensor]): Ground truth labels of each box,
shape (num_gts,).
gt_bboxes_3d (list[Tensor]): 3D ground truth bboxes of the image,
shape (num_gts, self.bbox_code_size).
gt_labels_3d (list[Tensor]): 3D ground truth labels of each box,
shape (num_gts,).
centers_2d (list[Tensor]): Projected 3D center of each box,
shape (num_gts, 2).
depths (list[Tensor]): Depth of projected 3D center of each box,
shape (num_gts,).
attr_labels (list[Tensor]): Attribute labels of each box,
shape (num_gts,).
gt_bboxes_ignore (list[Tensor]): Ground truth bboxes to be
ignored, shape (num_ignored_gts, 4).
proposal_cfg (mmcv.Config): Test / postprocessing configuration,
if None, test_cfg would be used
Returns:
tuple:
losses: (dict[str, Tensor]): A dictionary of loss components.
proposal_list (list[Tensor]): Proposals of each image.
"""
"""
Args: Args:
x (list[Tensor]): Features from FPN. x (list[Tensor]): Features from FPN.
batch_data_samples (list[:obj:`Det3DDataSample`]): Each item batch_data_samples (list[:obj:`Det3DDataSample`]): Each item
...@@ -266,15 +234,15 @@ class MonoFlexHead(AnchorFreeMono3DHead): ...@@ -266,15 +234,15 @@ class MonoFlexHead(AnchorFreeMono3DHead):
""" """
batch_gt_instances_3d = [] batch_gt_instances_3d = []
batch_gt_instances = []
batch_gt_instances_ignore = [] batch_gt_instances_ignore = []
batch_img_metas = [] batch_img_metas = []
for data_sample in batch_data_samples: for data_sample in batch_data_samples:
batch_img_metas.append(data_sample.metainfo) batch_img_metas.append(data_sample.metainfo)
batch_gt_instances_3d.append(data_sample.gt_instances_3d) batch_gt_instances_3d.append(data_sample.gt_instances_3d)
if 'ignored_instances' in data_sample: batch_gt_instances.append(data_sample.gt_instances)
batch_gt_instances_ignore.append(data_sample.ignored_instances) batch_gt_instances_ignore.append(
else: data_sample.get('ignored_instances', None))
batch_gt_instances_ignore.append(None)
# monoflex head needs img_metas for feature extraction # monoflex head needs img_metas for feature extraction
outs = self(x, batch_img_metas) outs = self(x, batch_img_metas)
...@@ -282,15 +250,7 @@ class MonoFlexHead(AnchorFreeMono3DHead): ...@@ -282,15 +250,7 @@ class MonoFlexHead(AnchorFreeMono3DHead):
batch_gt_instances_ignore) batch_gt_instances_ignore)
losses = self.loss(*loss_inputs) losses = self.loss(*loss_inputs)
if proposal_cfg is None:
return losses return losses
else:
batch_img_metas = [
data_sample.metainfo for data_sample in batch_data_samples
]
results_list = self.get_results(
*outs, batch_img_metas=batch_img_metas, cfg=proposal_cfg)
return losses, results_list
def forward(self, feats: List[Tensor], batch_img_metas: List[dict]): def forward(self, feats: List[Tensor], batch_img_metas: List[dict]):
"""Forward features from the upstream network. """Forward features from the upstream network.
...@@ -373,9 +333,8 @@ class MonoFlexHead(AnchorFreeMono3DHead): ...@@ -373,9 +333,8 @@ class MonoFlexHead(AnchorFreeMono3DHead):
return cls_score, bbox_pred return cls_score, bbox_pred
@force_fp32(apply_to=('cls_scores', 'bbox_preds')) def predict_by_feat(self, cls_scores: List[Tensor],
def get_results(self, cls_scores: List[Tensor], bbox_preds: List[Tensor], bbox_preds: List[Tensor], batch_img_metas: List[dict]):
batch_img_metas: List[dict]):
"""Generate bboxes from bbox head predictions. """Generate bboxes from bbox head predictions.
Args: Args:
...@@ -393,7 +352,7 @@ class MonoFlexHead(AnchorFreeMono3DHead): ...@@ -393,7 +352,7 @@ class MonoFlexHead(AnchorFreeMono3DHead):
cls_scores[0].new_tensor(input_meta['cam2img']) cls_scores[0].new_tensor(input_meta['cam2img'])
for input_meta in batch_img_metas for input_meta in batch_img_metas
]) ])
batch_bboxes, batch_scores, batch_topk_labels = self.decode_heatmap( batch_bboxes, batch_scores, batch_topk_labels = self._decode_heatmap(
cls_scores[0], cls_scores[0],
bbox_preds[0], bbox_preds[0],
batch_img_metas, batch_img_metas,
...@@ -429,7 +388,7 @@ class MonoFlexHead(AnchorFreeMono3DHead): ...@@ -429,7 +388,7 @@ class MonoFlexHead(AnchorFreeMono3DHead):
return result_list return result_list
def decode_heatmap(self, def _decode_heatmap(self,
cls_score: Tensor, cls_score: Tensor,
reg_pred: Tensor, reg_pred: Tensor,
batch_img_metas: List[dict], batch_img_metas: List[dict],
...@@ -530,14 +489,16 @@ class MonoFlexHead(AnchorFreeMono3DHead): ...@@ -530,14 +489,16 @@ class MonoFlexHead(AnchorFreeMono3DHead):
return preds return preds
def get_targets(self, batch_gt_instances_3d: List[InstanceData], def get_targets(self, batch_gt_instances_3d: List[InstanceData],
batch_gt_instances: List[InstanceData],
feat_shape: Tuple[int], batch_img_metas: List[dict]): feat_shape: Tuple[int], batch_img_metas: List[dict]):
"""Get training targets for batch images. """Get training targets for batch images.
`` ``
Args: Args:
batch_gt_instances_3d (list[:obj:`InstanceData`]): Batch of batch_gt_instances_3d (list[:obj:`InstanceData`]): Batch of
gt_instance_3d. It usually includes ``bboxes``、``labels`` gt_instance_3d. It usually includes ``bboxes_3d``、
、``bboxes_3d``、``labels_3d``、``depths``、``centers_2d`` and ``labels_3d``、``depths``、``centers_2d`` and attributes.
attributes. batch_gt_instances (list[:obj:`InstanceData`]): Batch of
gt_instance. It usually includes ``bboxes``、``labels``.
feat_shape (tuple[int]): Feature map shape with value, feat_shape (tuple[int]): Feature map shape with value,
shape (B, _, H, W). shape (B, _, H, W).
batch_img_metas (list[dict]): Meta information of each image, e.g., batch_img_metas (list[dict]): Meta information of each image, e.g.,
...@@ -574,10 +535,10 @@ class MonoFlexHead(AnchorFreeMono3DHead): ...@@ -574,10 +535,10 @@ class MonoFlexHead(AnchorFreeMono3DHead):
""" """
gt_bboxes_list = [ gt_bboxes_list = [
gt_instances_3d.bboxes for gt_instances_3d in batch_gt_instances_3d gt_instances.bboxes for gt_instances in batch_gt_instances
] ]
gt_labels_list = [ gt_labels_list = [
gt_instances_3d.labels for gt_instances_3d in batch_gt_instances_3d gt_instances.labels for gt_instances in batch_gt_instances
] ]
gt_bboxes_3d_list = [ gt_bboxes_3d_list = [
gt_instances_3d.bboxes_3d gt_instances_3d.bboxes_3d
...@@ -721,10 +682,12 @@ class MonoFlexHead(AnchorFreeMono3DHead): ...@@ -721,10 +682,12 @@ class MonoFlexHead(AnchorFreeMono3DHead):
return center_heatmap_target, avg_factor, target_labels return center_heatmap_target, avg_factor, target_labels
def loss(self, def loss_by_feat(
self,
cls_scores: List[Tensor], cls_scores: List[Tensor],
bbox_preds: List[Tensor], bbox_preds: List[Tensor],
batch_gt_instances_3d: List[InstanceData], batch_gt_instances_3d: List[InstanceData],
batch_gt_instances: List[InstanceData],
batch_img_metas: List[dict], batch_img_metas: List[dict],
batch_gt_instances_ignore: Optional[List[InstanceData]] = None): batch_gt_instances_ignore: Optional[List[InstanceData]] = None):
"""Compute loss of the head. """Compute loss of the head.
...@@ -736,9 +699,10 @@ class MonoFlexHead(AnchorFreeMono3DHead): ...@@ -736,9 +699,10 @@ class MonoFlexHead(AnchorFreeMono3DHead):
number is bbox_code_size. number is bbox_code_size.
shape (B, 7, H, W). shape (B, 7, H, W).
batch_gt_instances_3d (list[:obj:`InstanceData`]): Batch of batch_gt_instances_3d (list[:obj:`InstanceData`]): Batch of
gt_instance_3d. It usually includes ``bboxes``、``labels`` gt_instance_3d. It usually includes ``bboxes_3d``、
、``bboxes_3d``、``labels_3d``、``depths``、``centers_2d`` and ``labels_3d``、``depths``、``centers_2d`` and attributes.
attributes. batch_gt_instances (list[:obj:`InstanceData`]): Batch of
gt_instance. It usually includes ``bboxes``、``labels``.
batch_img_metas (list[dict]): Meta information of each image, e.g., batch_img_metas (list[dict]): Meta information of each image, e.g.,
image size, scaling factor, etc. image size, scaling factor, etc.
batch_gt_instances_ignore (list[:obj:`InstanceData`], Optional): batch_gt_instances_ignore (list[:obj:`InstanceData`], Optional):
...@@ -756,6 +720,7 @@ class MonoFlexHead(AnchorFreeMono3DHead): ...@@ -756,6 +720,7 @@ class MonoFlexHead(AnchorFreeMono3DHead):
center2d_heatmap_target, avg_factor, target_labels = \ center2d_heatmap_target, avg_factor, target_labels = \
self.get_targets(batch_gt_instances_3d, self.get_targets(batch_gt_instances_3d,
batch_gt_instances,
center2d_heatmap.shape, center2d_heatmap.shape,
batch_img_metas) batch_img_metas)
......
# Copyright (c) OpenMMLab. All rights reserved. # Copyright (c) OpenMMLab. All rights reserved.
from typing import Tuple from typing import List, Optional, Tuple
import numpy as np import numpy as np
import torch import torch
from mmcv.cnn import Scale, bias_init_with_prob, normal_init from mmcv.cnn import Scale, bias_init_with_prob, normal_init
from mmcv.runner import force_fp32
from mmengine.data import InstanceData from mmengine.data import InstanceData
from torch import Tensor
from torch import nn as nn from torch import nn as nn
from torch.nn import functional as F from torch.nn import functional as F
from mmdet3d.core import box3d_multiclass_nms, xywhr2xyxyr from mmdet3d.core import box3d_multiclass_nms, xywhr2xyxyr
from mmdet3d.core.bbox import points_cam2img, points_img2cam from mmdet3d.core.bbox import points_cam2img, points_img2cam
from mmdet3d.core.utils import (ConfigType, InstanceList, OptConfigType,
OptInstanceList)
from mmdet3d.registry import MODELS from mmdet3d.registry import MODELS
from mmdet.core import distance2bbox, multi_apply from mmdet.core import distance2bbox, multi_apply
from .fcos_mono3d_head import FCOSMono3DHead from .fcos_mono3d_head import FCOSMono3DHead
...@@ -86,7 +88,7 @@ class PGDHead(FCOSMono3DHead): ...@@ -86,7 +88,7 @@ class PGDHead(FCOSMono3DHead):
base_dims=((0.8, 1.73, 0.6), (1.76, 1.73, 0.6), base_dims=((0.8, 1.73, 0.6), (1.76, 1.73, 0.6),
(3.9, 1.56, 1.6)), (3.9, 1.56, 1.6)),
code_size=7), code_size=7),
**kwargs): **kwargs) -> None:
self.use_depth_classifier = use_depth_classifier self.use_depth_classifier = use_depth_classifier
self.use_onlyreg_proj = use_onlyreg_proj self.use_onlyreg_proj = use_onlyreg_proj
self.depth_branch = depth_branch self.depth_branch = depth_branch
...@@ -190,11 +192,11 @@ class PGDHead(FCOSMono3DHead): ...@@ -190,11 +192,11 @@ class PGDHead(FCOSMono3DHead):
for conv_weight in self.conv_weights: for conv_weight in self.conv_weights:
normal_init(conv_weight, std=0.01) normal_init(conv_weight, std=0.01)
def forward(self, feats): def forward(self, x: Tuple[Tensor]) -> Tuple[Tensor, ...]:
"""Forward features from the upstream network. """Forward features from the upstream network.
Args: Args:
feats (tuple[Tensor]): Features from the upstream network, each is x (tuple[Tensor]): Features from the upstream network, each is
a 4D-tensor. a 4D-tensor.
Returns: Returns:
...@@ -220,10 +222,10 @@ class PGDHead(FCOSMono3DHead): ...@@ -220,10 +222,10 @@ class PGDHead(FCOSMono3DHead):
centernesses (list[Tensor]): Centerness for each scale level, centernesses (list[Tensor]): Centerness for each scale level,
each is a 4D-tensor, the channel number is num_points * 1. each is a 4D-tensor, the channel number is num_points * 1.
""" """
return multi_apply(self.forward_single, feats, self.scales, return multi_apply(self.forward_single, x, self.scales, self.strides)
self.strides)
def forward_single(self, x, scale, stride): def forward_single(self, x: Tensor, scale: Scale,
stride: int) -> Tuple[Tensor, ...]:
"""Forward features of a single scale level. """Forward features of a single scale level.
Args: Args:
...@@ -271,17 +273,17 @@ class PGDHead(FCOSMono3DHead): ...@@ -271,17 +273,17 @@ class PGDHead(FCOSMono3DHead):
attr_pred, centerness attr_pred, centerness
def get_proj_bbox2d(self, def get_proj_bbox2d(self,
bbox_preds, bbox_preds: List[Tensor],
pos_dir_cls_preds, pos_dir_cls_preds: List[Tensor],
labels_3d, labels_3d: List[Tensor],
bbox_targets_3d, bbox_targets_3d: List[Tensor],
pos_points, pos_points: Tensor,
pos_inds, pos_inds: Tensor,
batch_img_metas, batch_img_metas: List[dict],
pos_depth_cls_preds=None, pos_depth_cls_preds: Optional[Tensor] = None,
pos_weights=None, pos_weights: Optional[Tensor] = None,
pos_cls_scores=None, pos_cls_scores: Optional[Tensor] = None,
with_kpts=False): with_kpts: bool = False) -> Tuple[Tensor]:
"""Decode box predictions and get projected 2D attributes. """Decode box predictions and get projected 2D attributes.
Args: Args:
...@@ -448,9 +450,12 @@ class PGDHead(FCOSMono3DHead): ...@@ -448,9 +450,12 @@ class PGDHead(FCOSMono3DHead):
return outputs return outputs
def get_pos_predictions(self, bbox_preds, dir_cls_preds, depth_cls_preds, def get_pos_predictions(self, bbox_preds: List[Tensor],
weights, attr_preds, centernesses, pos_inds, dir_cls_preds: List[Tensor],
batch_img_metas): depth_cls_preds: List[Tensor],
weights: List[Tensor], attr_preds: List[Tensor],
centernesses: List[Tensor], pos_inds: Tensor,
batch_img_metas: List[dict]) -> Tuple[Tensor]:
"""Flatten predictions and get positive ones. """Flatten predictions and get positive ones.
Args: Args:
...@@ -528,20 +533,19 @@ class PGDHead(FCOSMono3DHead): ...@@ -528,20 +533,19 @@ class PGDHead(FCOSMono3DHead):
return pos_bbox_preds, pos_dir_cls_preds, pos_depth_cls_preds, \ return pos_bbox_preds, pos_dir_cls_preds, pos_depth_cls_preds, \
pos_weights, pos_attr_preds, pos_centerness pos_weights, pos_attr_preds, pos_centerness
@force_fp32( def loss_by_feat(
apply_to=('cls_scores', 'bbox_preds', 'dir_cls_preds', self,
'depth_cls_preds', 'weights', 'attr_preds', 'centernesses')) cls_scores: List[Tensor],
def loss(self, bbox_preds: List[Tensor],
cls_scores, dir_cls_preds: List[Tensor],
bbox_preds, depth_cls_preds: List[Tensor],
dir_cls_preds, weights: List[Tensor],
depth_cls_preds, attr_preds: List[Tensor],
weights, centernesses: List[Tensor],
attr_preds, batch_gt_instances_3d: InstanceList,
centernesses, batch_gt_instances: InstanceList,
batch_gt_instances_3d, batch_img_metas: List[dict],
batch_img_metas, batch_gt_instances_ignore: OptInstanceList = None) -> dict:
batch_gt_instances_ignore=None):
"""Compute loss of the head. """Compute loss of the head.
Args: Args:
...@@ -591,7 +595,7 @@ class PGDHead(FCOSMono3DHead): ...@@ -591,7 +595,7 @@ class PGDHead(FCOSMono3DHead):
bbox_preds[0].device) bbox_preds[0].device)
labels_3d, bbox_targets_3d, centerness_targets, attr_targets = \ labels_3d, bbox_targets_3d, centerness_targets, attr_targets = \
self.get_targets( self.get_targets(
all_level_points, batch_gt_instances_3d) all_level_points, batch_gt_instances_3d, batch_gt_instances)
num_imgs = cls_scores[0].size(0) num_imgs = cls_scores[0].size(0)
# flatten cls_scores and targets # flatten cls_scores and targets
...@@ -785,20 +789,17 @@ class PGDHead(FCOSMono3DHead): ...@@ -785,20 +789,17 @@ class PGDHead(FCOSMono3DHead):
return loss_dict return loss_dict
@force_fp32( def predict_by_feat(self,
apply_to=('cls_scores', 'bbox_preds', 'dir_cls_preds', cls_scores: List[Tensor],
'depth_cls_preds', 'weights', 'attr_preds', 'centernesses')) bbox_preds: List[Tensor],
def get_results(self, dir_cls_preds: List[Tensor],
cls_scores, depth_cls_preds: List[Tensor],
bbox_preds, weights: List[Tensor],
dir_cls_preds, attr_preds: List[Tensor],
depth_cls_preds, centernesses: List[Tensor],
weights, batch_img_metas: Optional[List[dict]] = None,
attr_preds, cfg: OptConfigType = None,
centernesses, rescale: bool = False) -> InstanceList:
batch_img_metas,
cfg=None,
rescale=None):
"""Transform network output for a batch into bbox predictions. """Transform network output for a batch into bbox predictions.
Args: Args:
...@@ -824,7 +825,7 @@ class PGDHead(FCOSMono3DHead): ...@@ -824,7 +825,7 @@ class PGDHead(FCOSMono3DHead):
cfg (mmcv.Config, optional): Test / postprocessing configuration, cfg (mmcv.Config, optional): Test / postprocessing configuration,
if None, test_cfg would be used. Defaults to None. if None, test_cfg would be used. Defaults to None.
rescale (bool, optional): If True, return boxes in original image rescale (bool, optional): If True, return boxes in original image
space. Defaults to None. space. Defaults to False.
Returns: Returns:
list[tuple[Tensor]]: Each item in result_list is a tuple, which list[tuple[Tensor]]: Each item in result_list is a tuple, which
...@@ -898,25 +899,33 @@ class PGDHead(FCOSMono3DHead): ...@@ -898,25 +899,33 @@ class PGDHead(FCOSMono3DHead):
centernesses[i][img_id].detach() for i in range(num_levels) centernesses[i][img_id].detach() for i in range(num_levels)
] ]
img_meta = batch_img_metas[img_id] img_meta = batch_img_metas[img_id]
results = self._get_results_single( results = self._predict_by_feat_single(
cls_score_list, bbox_pred_list, dir_cls_pred_list, cls_score_list=cls_score_list,
depth_cls_pred_list, weight_list, attr_pred_list, bbox_pred_list=bbox_pred_list,
centerness_pred_list, mlvl_points, img_meta, cfg, rescale) dir_cls_pred_list=dir_cls_pred_list,
depth_cls_pred_list=depth_cls_pred_list,
weight_list=weight_list,
attr_pred_list=attr_pred_list,
centerness_pred_list=centerness_pred_list,
mlvl_points=mlvl_points,
img_meta=img_meta,
cfg=cfg,
rescale=rescale)
result_list.append(results) result_list.append(results)
return result_list return result_list
def _get_results_single(self, def _predict_by_feat_single(self,
cls_scores, cls_score_list: List[Tensor],
bbox_preds, bbox_pred_list: List[Tensor],
dir_cls_preds, dir_cls_pred_list: List[Tensor],
depth_cls_preds, depth_cls_pred_list: List[Tensor],
weights, weight_list: List[Tensor],
attr_preds, attr_pred_list: List[Tensor],
centernesses, centerness_pred_list: List[Tensor],
mlvl_points, mlvl_points: Tensor,
img_meta, img_meta: dict,
cfg, cfg: ConfigType,
rescale=False): rescale: bool = False) -> InstanceData:
"""Transform outputs for a single batch item into bbox predictions. """Transform outputs for a single batch item into bbox predictions.
Args: Args:
...@@ -951,7 +960,7 @@ class PGDHead(FCOSMono3DHead): ...@@ -951,7 +960,7 @@ class PGDHead(FCOSMono3DHead):
view = np.array(img_meta['cam2img']) view = np.array(img_meta['cam2img'])
scale_factor = img_meta['scale_factor'] scale_factor = img_meta['scale_factor']
cfg = self.test_cfg if cfg is None else cfg cfg = self.test_cfg if cfg is None else cfg
assert len(cls_scores) == len(bbox_preds) == len(mlvl_points) assert len(cls_score_list) == len(bbox_pred_list) == len(mlvl_points)
mlvl_centers2d = [] mlvl_centers2d = []
mlvl_bboxes = [] mlvl_bboxes = []
mlvl_scores = [] mlvl_scores = []
...@@ -966,8 +975,9 @@ class PGDHead(FCOSMono3DHead): ...@@ -966,8 +975,9 @@ class PGDHead(FCOSMono3DHead):
for cls_score, bbox_pred, dir_cls_pred, depth_cls_pred, weight, \ for cls_score, bbox_pred, dir_cls_pred, depth_cls_pred, weight, \
attr_pred, centerness, points in zip( attr_pred, centerness, points in zip(
cls_scores, bbox_preds, dir_cls_preds, depth_cls_preds, cls_score_list, bbox_pred_list, dir_cls_pred_list,
weights, attr_preds, centernesses, mlvl_points): depth_cls_pred_list, weight_list, attr_pred_list,
centerness_pred_list, mlvl_points):
assert cls_score.size()[-2:] == bbox_pred.size()[-2:] assert cls_score.size()[-2:] == bbox_pred.size()[-2:]
scores = cls_score.permute(1, 2, 0).reshape( scores = cls_score.permute(1, 2, 0).reshape(
-1, self.cls_out_channels).sigmoid() -1, self.cls_out_channels).sigmoid()
...@@ -1018,9 +1028,9 @@ class PGDHead(FCOSMono3DHead): ...@@ -1018,9 +1028,9 @@ class PGDHead(FCOSMono3DHead):
bbox_pred3d[:, :2] = points - bbox_pred3d[:, :2] bbox_pred3d[:, :2] = points - bbox_pred3d[:, :2]
if rescale: if rescale:
bbox_pred3d[:, :2] /= bbox_pred3d[:, :2].new_tensor( bbox_pred3d[:, :2] /= bbox_pred3d[:, :2].new_tensor(
scale_factor) scale_factor[0])
if self.pred_bbox2d: if self.pred_bbox2d:
bbox_pred2d /= bbox_pred2d.new_tensor(scale_factor) bbox_pred2d /= bbox_pred2d.new_tensor(scale_factor[0])
if self.use_depth_classifier: if self.use_depth_classifier:
prob_depth_pred = self.bbox_coder.decode_prob_depth( prob_depth_pred = self.bbox_coder.decode_prob_depth(
depth_cls_pred, self.depth_range, self.depth_unit, depth_cls_pred, self.depth_range, self.depth_unit,
...@@ -1106,13 +1116,21 @@ class PGDHead(FCOSMono3DHead): ...@@ -1106,13 +1116,21 @@ class PGDHead(FCOSMono3DHead):
results.attr_labels = attrs results.attr_labels = attrs
if self.pred_bbox2d: if self.pred_bbox2d:
results_2d = InstanceData()
bboxes2d = nms_results[-1] bboxes2d = nms_results[-1]
bboxes2d = torch.cat([bboxes2d, scores[:, None]], dim=1) results_2d.bboxes = bboxes2d
results.bboxes = bboxes2d results_2d.scores = scores
results_2d.labels = labels
return results, results_2d
else:
return results return results
def get_targets(self, points, batch_gt_instances_3d): def get_targets(
self,
points: List[Tensor],
batch_gt_instances_3d: InstanceList,
batch_gt_instances: InstanceList,
) -> Tuple[List[Tensor], List[Tensor], List[Tensor], List[Tensor]]:
"""Compute regression, classification and centerss targets for points """Compute regression, classification and centerss targets for points
in multiple images. in multiple images.
...@@ -1120,9 +1138,10 @@ class PGDHead(FCOSMono3DHead): ...@@ -1120,9 +1138,10 @@ class PGDHead(FCOSMono3DHead):
points (list[Tensor]): Points of each fpn level, each has shape points (list[Tensor]): Points of each fpn level, each has shape
(num_points, 2). (num_points, 2).
batch_gt_instances_3d (list[:obj:`InstanceData`]): Batch of batch_gt_instances_3d (list[:obj:`InstanceData`]): Batch of
gt_instance_3d. It usually includes ``bboxes``、``labels`` gt_instance_3d. It usually includes ``bboxes_3d``、
、``bboxes_3d``、``labels_3d``、``depths``、``centers_2d`` and ``labels_3d``、``depths``、``centers_2d`` and attributes.
attributes. batch_gt_instances (list[:obj:`InstanceData`]): Batch of
gt_instance. It usually includes ``bboxes``、``labels``.
Returns: Returns:
tuple: tuple:
...@@ -1146,14 +1165,17 @@ class PGDHead(FCOSMono3DHead): ...@@ -1146,14 +1165,17 @@ class PGDHead(FCOSMono3DHead):
if 'attr_labels' not in batch_gt_instances_3d[0]: if 'attr_labels' not in batch_gt_instances_3d[0]:
for gt_instances_3d in batch_gt_instances_3d: for gt_instances_3d in batch_gt_instances_3d:
gt_instances_3d.attr_labels = gt_instances_3d.labels.new_full( gt_instances_3d.attr_labels = \
gt_instances_3d.labels.shape, self.attr_background_label) gt_instances_3d.labels_3d.new_full(
gt_instances_3d.labels_3d.shape,
self.attr_background_label)
# get labels and bbox_targets of each image # get labels and bbox_targets of each image
_, bbox_targets_list, labels_3d_list, bbox_targets_3d_list, \ _, bbox_targets_list, labels_3d_list, bbox_targets_3d_list, \
centerness_targets_list, attr_targets_list = multi_apply( centerness_targets_list, attr_targets_list = multi_apply(
self._get_target_single, self._get_target_single,
batch_gt_instances_3d, batch_gt_instances_3d,
batch_gt_instances,
points=concat_points, points=concat_points,
regress_ranges=concat_regress_ranges, regress_ranges=concat_regress_ranges,
num_points_per_lvl=num_points) num_points_per_lvl=num_points)
......
# Copyright (c) OpenMMLab. All rights reserved. # Copyright (c) OpenMMLab. All rights reserved.
from typing import List, Optional, Tuple, Union from typing import List, Optional, Tuple
import torch import torch
from mmcv.runner import force_fp32
from mmengine.config import ConfigDict
from mmengine.data import InstanceData from mmengine.data import InstanceData
from torch import Tensor from torch import Tensor
from torch.nn import functional as F from torch.nn import functional as F
from mmdet3d.core.utils import (ConfigType, InstanceList, OptConfigType,
OptInstanceList, OptMultiConfig)
from mmdet3d.registry import MODELS, TASK_UTILS from mmdet3d.registry import MODELS, TASK_UTILS
from mmdet.core import multi_apply from mmdet.core import multi_apply
from mmdet.models.utils import gaussian_radius, gen_gaussian_target from mmdet.models.utils import gaussian_radius, gen_gaussian_target
...@@ -35,19 +35,20 @@ class SMOKEMono3DHead(AnchorFreeMono3DHead): ...@@ -35,19 +35,20 @@ class SMOKEMono3DHead(AnchorFreeMono3DHead):
regression heatmap channels. regression heatmap channels.
ori_channel (list[int]): indices of orientation offset pred in ori_channel (list[int]): indices of orientation offset pred in
regression heatmap channels. regression heatmap channels.
bbox_coder (dict): Bbox coder for encoding and decoding boxes. bbox_coder (:obj:`ConfigDict` or dict): Bbox coder for encoding
loss_cls (dict, optional): Config of classification loss. and decoding boxes.
loss_cls (:obj:`ConfigDict` or dict): Config of classification loss.
Default: loss_cls=dict(type='GaussionFocalLoss', loss_weight=1.0). Default: loss_cls=dict(type='GaussionFocalLoss', loss_weight=1.0).
loss_bbox (dict, optional): Config of localization loss. loss_bbox (:obj:`ConfigDict` or dict): Config of localization loss.
Default: loss_bbox=dict(type='L1Loss', loss_weight=10.0). Default: loss_bbox=dict(type='L1Loss', loss_weight=10.0).
loss_dir (dict, optional): Config of direction classification loss. loss_dir (:obj:`ConfigDict` or dict, Optional): Config of direction
In SMOKE, Default: None. classification loss. In SMOKE, Default: None.
loss_attr (dict, optional): Config of attribute classification loss. loss_attr (:obj:`ConfigDict` or dict, Optional): Config of attribute
In SMOKE, Default: None. classification loss. In SMOKE, Default: None.
loss_centerness (dict): Config of centerness loss. norm_cfg (:obj:`ConfigDict` or dict): Dictionary to construct and config norm layer.
norm_cfg (dict): Dictionary to construct and config norm layer.
Default: norm_cfg=dict(type='GN', num_groups=32, requires_grad=True). Default: norm_cfg=dict(type='GN', num_groups=32, requires_grad=True).
init_cfg (dict): Initialization config dict. Default: None. init_cfg (:obj:`ConfigDict` or dict or list[:obj:`ConfigDict` or \
dict]): Initialization config dict. Defaults to None.
""" # noqa: E501 """ # noqa: E501
def __init__(self, def __init__(self,
...@@ -55,15 +56,16 @@ class SMOKEMono3DHead(AnchorFreeMono3DHead): ...@@ -55,15 +56,16 @@ class SMOKEMono3DHead(AnchorFreeMono3DHead):
in_channels: int, in_channels: int,
dim_channel: List[int], dim_channel: List[int],
ori_channel: List[int], ori_channel: List[int],
bbox_coder: dict, bbox_coder: ConfigType,
loss_cls: dict = dict( loss_cls: ConfigType = dict(
type='GaussionFocalLoss', loss_weight=1.0), type='mmdet.GaussionFocalLoss', loss_weight=1.0),
loss_bbox: dict = dict(type='L1Loss', loss_weight=0.1), loss_bbox: ConfigType = dict(
loss_dir: Optional[dict] = None, type='mmdet.L1Loss', loss_weight=0.1),
loss_attr: Optional[dict] = None, loss_dir: OptConfigType = None,
norm_cfg: dict = dict( loss_attr: OptConfigType = None,
norm_cfg: OptConfigType = dict(
type='GN', num_groups=32, requires_grad=True), type='GN', num_groups=32, requires_grad=True),
init_cfg: Optional[Union[ConfigDict, dict]] = None, init_cfg: OptMultiConfig = None,
**kwargs) -> None: **kwargs) -> None:
super().__init__( super().__init__(
num_classes, num_classes,
...@@ -79,11 +81,11 @@ class SMOKEMono3DHead(AnchorFreeMono3DHead): ...@@ -79,11 +81,11 @@ class SMOKEMono3DHead(AnchorFreeMono3DHead):
self.ori_channel = ori_channel self.ori_channel = ori_channel
self.bbox_coder = TASK_UTILS.build(bbox_coder) self.bbox_coder = TASK_UTILS.build(bbox_coder)
def forward(self, feats: Tuple[Tensor]): def forward(self, x: Tuple[Tensor]) -> Tuple[List[Tensor]]:
"""Forward features from the upstream network. """Forward features from the upstream network.
Args: Args:
feats (tuple[Tensor]): Features from the upstream network, each is x (tuple[Tensor]): Features from the upstream network, each is
a 4D-tensor. a 4D-tensor.
Returns: Returns:
...@@ -95,9 +97,9 @@ class SMOKEMono3DHead(AnchorFreeMono3DHead): ...@@ -95,9 +97,9 @@ class SMOKEMono3DHead(AnchorFreeMono3DHead):
level, each is a 4D-tensor, the channel number is level, each is a 4D-tensor, the channel number is
num_points * bbox_code_size. num_points * bbox_code_size.
""" """
return multi_apply(self.forward_single, feats) return multi_apply(self.forward_single, x)
def forward_single(self, x: Tensor) -> Union[Tensor, Tensor]: def forward_single(self, x: Tensor) -> Tuple[Tensor, Tensor]:
"""Forward features of a single scale level. """Forward features of a single scale level.
Args: Args:
...@@ -118,12 +120,11 @@ class SMOKEMono3DHead(AnchorFreeMono3DHead): ...@@ -118,12 +120,11 @@ class SMOKEMono3DHead(AnchorFreeMono3DHead):
bbox_pred[:, self.ori_channel, ...] = F.normalize(vector_ori) bbox_pred[:, self.ori_channel, ...] = F.normalize(vector_ori)
return cls_score, bbox_pred return cls_score, bbox_pred
@force_fp32(apply_to=('cls_scores', 'bbox_preds')) def predict_by_feat(self,
def get_results(self, cls_scores: List[Tensor],
cls_scores, bbox_preds: List[Tensor],
bbox_preds, batch_img_metas: Optional[List[dict]] = None,
batch_img_metas, rescale: bool = None) -> InstanceList:
rescale=None):
"""Generate bboxes from bbox head predictions. """Generate bboxes from bbox head predictions.
Args: Args:
...@@ -134,8 +135,16 @@ class SMOKEMono3DHead(AnchorFreeMono3DHead): ...@@ -134,8 +135,16 @@ class SMOKEMono3DHead(AnchorFreeMono3DHead):
rescale (bool): If True, return boxes in original image space. rescale (bool): If True, return boxes in original image space.
Returns: Returns:
list[tuple[:obj:`CameraInstance3DBoxes`, Tensor, Tensor, None]]: list[:obj:`InstanceData`]: 3D Detection results of each image
Each item in result_list is 4-tuple. after the post process.
Each item usually contains following keys.
- scores_3d (Tensor): Classification scores, has a shape
(num_instance, )
- labels_3d (Tensor): Labels of bboxes, has a shape
(num_instances, ).
- bboxes_3d (Tensor): Contains a tensor with shape
(num_instances, 7).
""" """
assert len(cls_scores) == len(bbox_preds) == 1 assert len(cls_scores) == len(bbox_preds) == 1
cam2imgs = torch.stack([ cam2imgs = torch.stack([
...@@ -146,7 +155,7 @@ class SMOKEMono3DHead(AnchorFreeMono3DHead): ...@@ -146,7 +155,7 @@ class SMOKEMono3DHead(AnchorFreeMono3DHead):
cls_scores[0].new_tensor(img_meta['trans_mat']) cls_scores[0].new_tensor(img_meta['trans_mat'])
for img_meta in batch_img_metas for img_meta in batch_img_metas
]) ])
batch_bboxes, batch_scores, batch_topk_labels = self.decode_heatmap( batch_bboxes, batch_scores, batch_topk_labels = self._decode_heatmap(
cls_scores[0], cls_scores[0],
bbox_preds[0], bbox_preds[0],
batch_img_metas, batch_img_metas,
...@@ -183,14 +192,14 @@ class SMOKEMono3DHead(AnchorFreeMono3DHead): ...@@ -183,14 +192,14 @@ class SMOKEMono3DHead(AnchorFreeMono3DHead):
return result_list return result_list
def decode_heatmap(self, def _decode_heatmap(self,
cls_score, cls_score: Tensor,
reg_pred, reg_pred: Tensor,
batch_img_metas, batch_img_metas: List[dict],
cam2imgs, cam2imgs: Tensor,
trans_mats, trans_mats: Tensor,
topk=100, topk: int = 100,
kernel=3): kernel: int = 3) -> Tuple[Tensor, Tensor, Tensor]:
"""Transform outputs into detections raw bbox predictions. """Transform outputs into detections raw bbox predictions.
Args: Args:
...@@ -212,6 +221,7 @@ class SMOKEMono3DHead(AnchorFreeMono3DHead): ...@@ -212,6 +221,7 @@ class SMOKEMono3DHead(AnchorFreeMono3DHead):
Returns: Returns:
tuple[torch.Tensor]: Decoded output of SMOKEHead, containing tuple[torch.Tensor]: Decoded output of SMOKEHead, containing
the following Tensors: the following Tensors:
- batch_bboxes (Tensor): Coords of each 3D box. - batch_bboxes (Tensor): Coords of each 3D box.
shape (B, k, 7) shape (B, k, 7)
- batch_scores (Tensor): Scores of each 3D box. - batch_scores (Tensor): Scores of each 3D box.
...@@ -241,9 +251,10 @@ class SMOKEMono3DHead(AnchorFreeMono3DHead): ...@@ -241,9 +251,10 @@ class SMOKEMono3DHead(AnchorFreeMono3DHead):
batch_bboxes = batch_bboxes.view(bs, -1, self.bbox_code_size) batch_bboxes = batch_bboxes.view(bs, -1, self.bbox_code_size)
return batch_bboxes, batch_scores, batch_topk_labels return batch_bboxes, batch_scores, batch_topk_labels
def get_predictions(self, labels_3d, centers_2d, gt_locations, def get_predictions(self, labels_3d: Tensor, centers_2d: Tensor,
gt_dimensions, gt_orientations, indices, gt_locations: Tensor, gt_dimensions: Tensor,
batch_img_metas, pred_reg): gt_orientations: Tensor, indices: Tensor,
batch_img_metas: List[dict], pred_reg: Tensor) -> dict:
"""Prepare predictions for computing loss. """Prepare predictions for computing loss.
Args: Args:
...@@ -266,6 +277,7 @@ class SMOKEMono3DHead(AnchorFreeMono3DHead): ...@@ -266,6 +277,7 @@ class SMOKEMono3DHead(AnchorFreeMono3DHead):
Returns: Returns:
dict: the dict has components below: dict: the dict has components below:
- bbox3d_yaws (:obj:`CameraInstance3DBoxes`): - bbox3d_yaws (:obj:`CameraInstance3DBoxes`):
bbox calculated using pred orientations. bbox calculated using pred orientations.
- bbox3d_dims (:obj:`CameraInstance3DBoxes`): - bbox3d_dims (:obj:`CameraInstance3DBoxes`):
...@@ -312,22 +324,26 @@ class SMOKEMono3DHead(AnchorFreeMono3DHead): ...@@ -312,22 +324,26 @@ class SMOKEMono3DHead(AnchorFreeMono3DHead):
return pred_bboxes return pred_bboxes
def get_targets(self, batch_gt_instances_3d, feat_shape, batch_img_metas): def get_targets(self, batch_gt_instances_3d: InstanceList,
batch_gt_instances: InstanceList, feat_shape: Tuple[int],
batch_img_metas: List[dict]) -> Tuple[Tensor, int, dict]:
"""Get training targets for batch images. """Get training targets for batch images.
Args: Args:
batch_gt_instances_3d (list[:obj:`InstanceData`]): Batch of batch_gt_instances_3d (list[:obj:`InstanceData`]): Batch of
gt_instance_3d. It usually includes ``bboxes``、``labels`` gt_instance_3d. It usually includes ``bboxes_3d``、
、``bboxes_3d``、``labels_3d``、``depths``、``centers_2d`` and ``labels_3d``、``depths``、``centers_2d`` and attributes.
attributes. batch_gt_instances (list[:obj:`InstanceData`]): Batch of
gt_instance. It usually includes ``bboxes``、``labels``.
feat_shape (tuple[int]): Feature map shape with value, feat_shape (tuple[int]): Feature map shape with value,
shape (B, _, H, W). shape (B, _, H, W).
batch_img_metas (list[dict]): Meta information of each image, e.g., batch_img_metas (list[dict]): Meta information of each image, e.g.,
image size, scaling factor, etc. image size, scaling factor, etc.
Returns: Returns:
tuple[Tensor, dict]: The Tensor value is the targets of tuple[Tensor, int, dict]: The Tensor value is the targets of
center heatmap, the dict has components below: center heatmap, the dict has components below:
- gt_centers_2d (Tensor): Coords of each projected 3D box - gt_centers_2d (Tensor): Coords of each projected 3D box
center on image. shape (B * max_objs, 2) center on image. shape (B * max_objs, 2)
- gt_labels_3d (Tensor): Labels of each 3D box. - gt_labels_3d (Tensor): Labels of each 3D box.
...@@ -347,10 +363,10 @@ class SMOKEMono3DHead(AnchorFreeMono3DHead): ...@@ -347,10 +363,10 @@ class SMOKEMono3DHead(AnchorFreeMono3DHead):
""" """
gt_bboxes = [ gt_bboxes = [
gt_instances_3d.bboxes for gt_instances_3d in batch_gt_instances_3d gt_instances.bboxes for gt_instances in batch_gt_instances
] ]
gt_labels = [ gt_labels = [
gt_instances_3d.labels for gt_instances_3d in batch_gt_instances_3d gt_instances.labels for gt_instances in batch_gt_instances
] ]
gt_bboxes_3d = [ gt_bboxes_3d = [
gt_instances_3d.bboxes_3d gt_instances_3d.bboxes_3d
...@@ -459,12 +475,14 @@ class SMOKEMono3DHead(AnchorFreeMono3DHead): ...@@ -459,12 +475,14 @@ class SMOKEMono3DHead(AnchorFreeMono3DHead):
return center_heatmap_target, avg_factor, target_labels return center_heatmap_target, avg_factor, target_labels
def loss(self, def loss_by_feat(
cls_scores, self,
bbox_preds, cls_scores: List[Tensor],
batch_gt_instances_3d, bbox_preds: List[Tensor],
batch_img_metas, batch_gt_instances_3d: InstanceList,
batch_gt_instances_ignore=None): batch_gt_instances: InstanceList,
batch_img_metas: List[dict],
batch_gt_instances_ignore: OptInstanceList = None) -> dict:
"""Compute loss of the head. """Compute loss of the head.
Args: Args:
...@@ -474,9 +492,10 @@ class SMOKEMono3DHead(AnchorFreeMono3DHead): ...@@ -474,9 +492,10 @@ class SMOKEMono3DHead(AnchorFreeMono3DHead):
number is bbox_code_size. number is bbox_code_size.
shape (B, 7, H, W). shape (B, 7, H, W).
batch_gt_instances_3d (list[:obj:`InstanceData`]): Batch of batch_gt_instances_3d (list[:obj:`InstanceData`]): Batch of
gt_instance_3d. It usually includes ``bboxes``、``labels`` gt_instance_3d. It usually includes ``bboxes_3d``、
、``bboxes_3d``、``labels_3d``、``depths``、``centers_2d`` and ``labels_3d``、``depths``、``centers_2d`` and attributes.
attributes. batch_gt_instances (list[:obj:`InstanceData`]): Batch of
gt_instance. It usually includes ``bboxes``、``labels``.
batch_img_metas (list[dict]): Meta information of each image, e.g., batch_img_metas (list[dict]): Meta information of each image, e.g.,
image size, scaling factor, etc. image size, scaling factor, etc.
batch_gt_instances_ignore (list[:obj:`InstanceData`], Optional): batch_gt_instances_ignore (list[:obj:`InstanceData`], Optional):
...@@ -485,15 +504,19 @@ class SMOKEMono3DHead(AnchorFreeMono3DHead): ...@@ -485,15 +504,19 @@ class SMOKEMono3DHead(AnchorFreeMono3DHead):
Defaults to None. Defaults to None.
Returns: Returns:
dict[str, Tensor]: A dictionary of loss components. dict[str, Tensor]: A dictionary of loss components, which has
components below:
- loss_cls (Tensor): loss of cls heatmap.
- loss_bbox (Tensor): loss of bbox heatmap.
""" """
assert len(cls_scores) == len(bbox_preds) == 1 assert len(cls_scores) == len(bbox_preds) == 1
assert batch_gt_instances_ignore is None
center_2d_heatmap = cls_scores[0] center_2d_heatmap = cls_scores[0]
pred_reg = bbox_preds[0] pred_reg = bbox_preds[0]
center_2d_heatmap_target, avg_factor, target_labels = \ center_2d_heatmap_target, avg_factor, target_labels = \
self.get_targets(batch_gt_instances_3d, self.get_targets(batch_gt_instances_3d,
batch_gt_instances,
center_2d_heatmap.shape, center_2d_heatmap.shape,
batch_img_metas) batch_img_metas)
......
# Copyright (c) OpenMMLab. All rights reserved. # Copyright (c) OpenMMLab. All rights reserved.
from mmdet3d.core import ConfigType, OptConfigType, OptMultiConfig
from mmdet3d.registry import MODELS from mmdet3d.registry import MODELS
from .single_stage_mono3d import SingleStageMono3DDetector from .single_stage_mono3d import SingleStageMono3DDetector
...@@ -9,14 +10,36 @@ class FCOSMono3D(SingleStageMono3DDetector): ...@@ -9,14 +10,36 @@ class FCOSMono3D(SingleStageMono3DDetector):
Currently please refer to our entry on the Currently please refer to our entry on the
`leaderboard <https://www.nuscenes.org/object-detection?externalData=all&mapData=all&modalities=Camera>`_. `leaderboard <https://www.nuscenes.org/object-detection?externalData=all&mapData=all&modalities=Camera>`_.
Args:
backbone (:obj:`ConfigDict` or dict): The backbone config.
neck (:obj:`ConfigDict` or dict): The neck config.
bbox_head (:obj:`ConfigDict` or dict): The bbox head config.
train_cfg (:obj:`ConfigDict` or dict, optional): The training config
of FCOS. Defaults to None.
test_cfg (:obj:`ConfigDict` or dict, optional): The testing config
of FCOS. Defaults to None.
data_preprocessor (:obj:`ConfigDict` or dict, optional): Config of
:class:`DetDataPreprocessor` to process the input data.
Defaults to None.
init_cfg (:obj:`ConfigDict` or list[:obj:`ConfigDict`] or dict or
list[dict], optional): Initialization config dict.
Defaults to None.
""" # noqa: E501 """ # noqa: E501
def __init__(self, def __init__(self,
backbone, backbone: ConfigType,
neck, neck: ConfigType,
bbox_head, bbox_head: ConfigType,
train_cfg=None, train_cfg: OptConfigType = None,
test_cfg=None, test_cfg: OptConfigType = None,
pretrained=None): data_preprocessor: OptConfigType = None,
super(FCOSMono3D, self).__init__(backbone, neck, bbox_head, train_cfg, init_cfg: OptMultiConfig = None) -> None:
test_cfg, pretrained) super().__init__(
backbone=backbone,
neck=neck,
bbox_head=bbox_head,
train_cfg=train_cfg,
test_cfg=test_cfg,
data_preprocessor=data_preprocessor,
init_cfg=init_cfg)
# Copyright (c) OpenMMLab. All rights reserved. # Copyright (c) OpenMMLab. All rights reserved.
from os import path as osp from typing import Tuple
import mmcv from torch import Tensor
import numpy as np
import torch
from mmcv.parallel import DataContainer as DC
from mmdet3d.core import (CameraInstance3DBoxes, bbox3d2result, from mmdet3d.core import Det3DDataSample, InstanceList
show_multi_modality_result) from mmdet3d.core.utils import SampleList
from mmdet3d.registry import MODELS from mmdet3d.registry import MODELS
from mmdet.models.detectors.single_stage import SingleStageDetector from mmdet.models.detectors.single_stage import SingleStageDetector
...@@ -16,212 +13,61 @@ from mmdet.models.detectors.single_stage import SingleStageDetector ...@@ -16,212 +13,61 @@ from mmdet.models.detectors.single_stage import SingleStageDetector
class SingleStageMono3DDetector(SingleStageDetector): class SingleStageMono3DDetector(SingleStageDetector):
"""Base class for monocular 3D single-stage detectors. """Base class for monocular 3D single-stage detectors.
Single-stage detectors directly and densely predict bounding boxes on the Monocular 3D single-stage detectors directly and densely predict bounding
output features of the backbone+neck. boxes on the output features of the backbone+neck.
""" """
def extract_feats(self, imgs): def convert_to_datasample(self, results_list: InstanceList) -> SampleList:
"""Directly extract features from the backbone+neck.""" """ Convert results list to `Det3DDataSample`.
assert isinstance(imgs, list)
return [self.extract_feat(img) for img in imgs]
def forward_train(self,
img,
img_metas,
gt_bboxes,
gt_labels,
gt_bboxes_3d,
gt_labels_3d,
centers2d,
depths,
attr_labels=None,
gt_bboxes_ignore=None):
"""
Args: Args:
img (Tensor): Input images of shape (N, C, H, W). results_list (list[:obj:`InstanceData`]):Detection results
Typically these should be mean centered and std scaled. of each image. For each image, it could contains two results
img_metas (list[dict]): A List of image info dict where each dict format:
has: 'img_shape', 'scale_factor', 'flip', and may also contain 1. pred_instances_3d
'filename', 'ori_shape', 'pad_shape', and 'img_norm_cfg'. 2. (pred_instances_3d, pred_instances)
For details on the values of these keys see
:class:`mmdet.datasets.pipelines.Collect`.
gt_bboxes (list[Tensor]): Each item are the truth boxes for each
image in [tl_x, tl_y, br_x, br_y] format.
gt_labels (list[Tensor]): Class indices corresponding to each box
gt_bboxes_3d (list[Tensor]): Each item are the 3D truth boxes for
each image in [x, y, z, x_size, y_size, z_size, yaw, vx, vy]
format.
gt_labels_3d (list[Tensor]): 3D class indices corresponding to
each box.
centers2d (list[Tensor]): Projected 3D centers onto 2D images.
depths (list[Tensor]): Depth of projected centers on 2D images.
attr_labels (list[Tensor], optional): Attribute indices
corresponding to each box
gt_bboxes_ignore (list[Tensor]): Specify which bounding
boxes can be ignored when computing the loss.
Returns: Returns:
dict[str, Tensor]: A dictionary of loss components. list[:obj:`Det3DDataSample`]: 3D Detection results of the
input images. Each Det3DDataSample usually contain
'pred_instances_3d'. And the ``pred_instances_3d`` usually
contains following keys.
- scores_3d (Tensor): Classification scores, has a shape
(num_instance, )
- labels_3d (Tensor): Labels of bboxes, has a shape
(num_instances, ).
- bboxes_3d (Tensor): Contains a tensor with shape
(num_instances, C) where C >=7.
""" """
x = self.extract_feat(img) out_results_list = []
losses = self.bbox_head.forward_train(x, img_metas, gt_bboxes, for i in range(len(results_list)):
gt_labels, gt_bboxes_3d, result = Det3DDataSample()
gt_labels_3d, centers2d, depths, if len(results_list[i]) == 2:
attr_labels, gt_bboxes_ignore) result.pred_instances_3d = results_list[i][0]
return losses result.pred_instances = results_list[i][1]
else:
result.pred_instances_3d = results_list[i]
out_results_list.append(result)
return out_results_list
def simple_test(self, img, img_metas, rescale=False): def extract_feat(self, batch_inputs_dict: dict) -> Tuple[Tensor]:
"""Test function without test time augmentation. """Extract features.
Args: Args:
imgs (list[torch.Tensor]): List of multiple images batch_inputs_dict (dict): Contains 'img' key
img_metas (list[dict]): List of image information. with image tensor with shape (N, C, H ,W).
rescale (bool, optional): Whether to rescale the results.
Defaults to False.
Returns: Returns:
list[list[np.ndarray]]: BBox results of each image and classes. tuple[Tensor]: Multi-level features that may have
The outer list corresponds to each image. The inner list different resolutions.
corresponds to each class.
""" """
x = self.extract_feat(img) batch_imgs = batch_inputs_dict['imgs']
outs = self.bbox_head(x) x = self.backbone(batch_imgs)
bbox_outputs = self.bbox_head.get_bboxes( if self.with_neck:
*outs, img_metas, rescale=rescale) x = self.neck(x)
return x
if self.bbox_head.pred_bbox2d:
from mmdet.core import bbox2result
bbox2d_img = [
bbox2result(bboxes2d, labels, self.bbox_head.num_classes)
for bboxes, scores, labels, attrs, bboxes2d in bbox_outputs
]
bbox_outputs = [bbox_outputs[0][:-1]]
bbox_img = [
bbox3d2result(bboxes, scores, labels, attrs)
for bboxes, scores, labels, attrs in bbox_outputs
]
bbox_list = [dict() for i in range(len(img_metas))]
for result_dict, img_bbox in zip(bbox_list, bbox_img):
result_dict['img_bbox'] = img_bbox
if self.bbox_head.pred_bbox2d:
for result_dict, img_bbox2d in zip(bbox_list, bbox2d_img):
result_dict['img_bbox2d'] = img_bbox2d
return bbox_list
# TODO: Support test time augmentation
def aug_test(self, imgs, img_metas, rescale=False): def aug_test(self, imgs, img_metas, rescale=False):
"""Test function with test time augmentation.""" """Test function with test time augmentation."""
feats = self.extract_feats(imgs) pass
# only support aug_test for one sample
outs_list = [self.bbox_head(x) for x in feats]
for i, img_meta in enumerate(img_metas):
if img_meta[0]['pcd_horizontal_flip']:
for j in range(len(outs_list[i])): # for each prediction
if outs_list[i][j][0] is None:
continue
for k in range(len(outs_list[i][j])):
# every stride of featmap
outs_list[i][j][k] = torch.flip(
outs_list[i][j][k], dims=[3])
reg = outs_list[i][1]
for reg_feat in reg:
# offset_x
reg_feat[:, 0, :, :] = 1 - reg_feat[:, 0, :, :]
# velo_x
if self.bbox_head.pred_velo:
reg_feat[:, 7, :, :] = -reg_feat[:, 7, :, :]
# rotation
reg_feat[:, 6, :, :] = -reg_feat[:, 6, :, :] + np.pi
merged_outs = []
for i in range(len(outs_list[0])): # for each prediction
merged_feats = []
for j in range(len(outs_list[0][i])):
if outs_list[0][i][0] is None:
merged_feats.append(None)
continue
# for each stride of featmap
avg_feats = torch.mean(
torch.cat([x[i][j] for x in outs_list]),
dim=0,
keepdim=True)
if i == 1: # regression predictions
# rot/velo/2d det keeps the original
avg_feats[:, 6:, :, :] = \
outs_list[0][i][j][:, 6:, :, :]
if i == 2:
# dir_cls keeps the original
avg_feats = outs_list[0][i][j]
merged_feats.append(avg_feats)
merged_outs.append(merged_feats)
merged_outs = tuple(merged_outs)
bbox_outputs = self.bbox_head.get_bboxes(
*merged_outs, img_metas[0], rescale=rescale)
if self.bbox_head.pred_bbox2d:
from mmdet.core import bbox2result
bbox2d_img = [
bbox2result(bboxes2d, labels, self.bbox_head.num_classes)
for bboxes, scores, labels, attrs, bboxes2d in bbox_outputs
]
bbox_outputs = [bbox_outputs[0][:-1]]
bbox_img = [
bbox3d2result(bboxes, scores, labels, attrs)
for bboxes, scores, labels, attrs in bbox_outputs
]
bbox_list = dict()
bbox_list.update(img_bbox=bbox_img[0])
if self.bbox_head.pred_bbox2d:
bbox_list.update(img_bbox2d=bbox2d_img[0])
return [bbox_list]
def show_results(self, data, result, out_dir, show=False, score_thr=None):
"""Results visualization.
Args:
data (list[dict]): Input images and the information of the sample.
result (list[dict]): Prediction results.
out_dir (str): Output directory of visualization result.
show (bool, optional): Determines whether you are
going to show result by open3d.
Defaults to False.
TODO: implement score_thr of single_stage_mono3d.
score_thr (float, optional): Score threshold of bounding boxes.
Default to None.
Not implemented yet, but it is here for unification.
"""
for batch_id in range(len(result)):
if isinstance(data['img_metas'][0], DC):
img_filename = data['img_metas'][0]._data[0][batch_id][
'filename']
cam2img = data['img_metas'][0]._data[0][batch_id]['cam2img']
elif mmcv.is_list_of(data['img_metas'][0], dict):
img_filename = data['img_metas'][0][batch_id]['filename']
cam2img = data['img_metas'][0][batch_id]['cam2img']
else:
ValueError(
f"Unsupported data type {type(data['img_metas'][0])} "
f'for visualization!')
img = mmcv.imread(img_filename)
file_name = osp.split(img_filename)[-1].split('.')[0]
assert out_dir is not None, 'Expect out_dir, got none.'
pred_bboxes = result[batch_id]['img_bbox']['boxes_3d']
assert isinstance(pred_bboxes, CameraInstance3DBoxes), \
f'unsupported predicted bbox type {type(pred_bboxes)}'
show_multi_modality_result(
img,
None,
pred_bboxes,
cam2img,
out_dir,
file_name,
'camera',
show=show)
# Copyright (c) OpenMMLab. All rights reserved. # Copyright (c) OpenMMLab. All rights reserved.
from mmdet3d.core import ConfigType, OptConfigType, OptMultiConfig
from mmdet3d.registry import MODELS from mmdet3d.registry import MODELS
from .single_stage_mono3d import SingleStageMono3DDetector from .single_stage_mono3d import SingleStageMono3DDetector
...@@ -8,14 +9,35 @@ class SMOKEMono3D(SingleStageMono3DDetector): ...@@ -8,14 +9,35 @@ class SMOKEMono3D(SingleStageMono3DDetector):
r"""SMOKE <https://arxiv.org/abs/2002.10111>`_ for monocular 3D object r"""SMOKE <https://arxiv.org/abs/2002.10111>`_ for monocular 3D object
detection. detection.
Args:
backbone (:obj:`ConfigDict` or dict): The backbone config.
neck (:obj:`ConfigDict` or dict): The neck config.
bbox_head (:obj:`ConfigDict` or dict): The bbox head config.
train_cfg (:obj:`ConfigDict` or dict, optional): The training config
of FCOS. Defaults to None.
test_cfg (:obj:`ConfigDict` or dict, optional): The testing config
of FCOS. Defaults to None.
data_preprocessor (:obj:`ConfigDict` or dict, optional): Config of
:class:`DetDataPreprocessor` to process the input data.
Defaults to None.
init_cfg (:obj:`ConfigDict` or list[:obj:`ConfigDict`] or dict or
list[dict], optional): Initialization config dict.
Defaults to None.
""" """
def __init__(self, def __init__(self,
backbone, backbone: ConfigType,
neck, neck: ConfigType,
bbox_head, bbox_head: ConfigType,
train_cfg=None, train_cfg: OptConfigType = None,
test_cfg=None, test_cfg: OptConfigType = None,
pretrained=None): data_preprocessor: OptConfigType = None,
super(SMOKEMono3D, self).__init__(backbone, neck, bbox_head, train_cfg, init_cfg: OptMultiConfig = None) -> None:
test_cfg, pretrained) super().__init__(
backbone=backbone,
neck=neck,
bbox_head=bbox_head,
train_cfg=train_cfg,
test_cfg=test_cfg,
data_preprocessor=data_preprocessor,
init_cfg=init_cfg)
...@@ -85,6 +85,10 @@ def test_getitem(): ...@@ -85,6 +85,10 @@ def test_getitem():
assert isinstance(ann_info['gt_bboxes_3d'], LiDARInstance3DBoxes) assert isinstance(ann_info['gt_bboxes_3d'], LiDARInstance3DBoxes)
assert torch.allclose(ann_info['gt_bboxes_3d'].tensor.sum(), assert torch.allclose(ann_info['gt_bboxes_3d'].tensor.sum(),
torch.tensor(7.2650)) torch.tensor(7.2650))
assert 'centers_2d' in ann_info
assert ann_info['centers_2d'].dtype == np.float64
assert 'depths' in ann_info
assert ann_info['depths'].dtype == np.float64
assert 'group_id' in ann_info assert 'group_id' in ann_info
assert ann_info['group_id'].dtype == np.int64 assert ann_info['group_id'].dtype == np.int64
assert 'occluded' in ann_info assert 'occluded' in ann_info
......
...@@ -45,8 +45,8 @@ def test_getitem(): ...@@ -45,8 +45,8 @@ def test_getitem():
_generate_nus_dataset_config() _generate_nus_dataset_config()
nus_dataset = NuScenesDataset( nus_dataset = NuScenesDataset(
data_root, data_root=data_root,
ann_file, ann_file=ann_file,
data_prefix=data_prefix, data_prefix=data_prefix,
pipeline=pipeline, pipeline=pipeline,
metainfo=dict(CLASSES=classes), metainfo=dict(CLASSES=classes),
......
...@@ -4,6 +4,7 @@ import numpy as np ...@@ -4,6 +4,7 @@ import numpy as np
from mmdet3d.core import LiDARInstance3DBoxes from mmdet3d.core import LiDARInstance3DBoxes
# create a dummy `results` to test the pipeline # create a dummy `results` to test the pipeline
from mmdet3d.datasets import LoadAnnotations3D, LoadPointsFromFile from mmdet3d.datasets import LoadAnnotations3D, LoadPointsFromFile
from mmdet3d.datasets.pipelines.loading import LoadImageFromFileMono3D
def create_dummy_data_info(with_ann=True): def create_dummy_data_info(with_ann=True):
...@@ -20,6 +21,10 @@ def create_dummy_data_info(with_ann=True): ...@@ -20,6 +21,10 @@ def create_dummy_data_info(with_ann=True):
-1.5808]])), -1.5808]])),
'gt_labels_3d': 'gt_labels_3d':
np.array([1]), np.array([1]),
'centers_2d':
np.array([[765.04, 214.56]]),
'depths':
np.array([8.410]),
'num_lidar_pts': 'num_lidar_pts':
np.array([377]), np.array([377]),
'difficulty': 'difficulty':
...@@ -134,6 +139,9 @@ def create_dummy_data_info(with_ann=True): ...@@ -134,6 +139,9 @@ def create_dummy_data_info(with_ann=True):
], ],
'bbox_label_3d': 'bbox_label_3d':
-1, -1,
'center_2d': [765.04, 214.56],
'depth':
8.410,
'num_lidar_pts': 'num_lidar_pts':
377, 377,
'difficulty': 'difficulty':
...@@ -168,3 +176,17 @@ def create_data_info_after_loading(): ...@@ -168,3 +176,17 @@ def create_data_info_after_loading():
data_info = load_points_transform(data_info) data_info = load_points_transform(data_info)
data_info_after_loading = load_anns_transform(data_info) data_info_after_loading = load_anns_transform(data_info)
return data_info_after_loading return data_info_after_loading
def create_mono3d_data_info_after_loading():
load_anns_transform = LoadAnnotations3D(
with_bbox=True,
with_label=True,
with_bbox_3d=True,
with_label_3d=True,
with_bbox_depth=True)
load_img_transform = LoadImageFromFileMono3D()
data_info = create_dummy_data_info()
data_info = load_img_transform(data_info)
data_info_after_loading = load_anns_transform(data_info)
return data_info_after_loading
...@@ -117,6 +117,7 @@ class TestFCOSMono3DHead(TestCase): ...@@ -117,6 +117,7 @@ class TestFCOSMono3DHead(TestCase):
# When truth is non-empty then all losses # When truth is non-empty then all losses
# should be nonzero for random inputs # should be nonzero for random inputs
gt_instances_3d = InstanceData() gt_instances_3d = InstanceData()
gt_instances = InstanceData()
gt_bboxes = torch.rand([3, 4], dtype=torch.float32) gt_bboxes = torch.rand([3, 4], dtype=torch.float32)
gt_bboxes_3d = CameraInstance3DBoxes(torch.rand([3, 9]), box_dim=9) gt_bboxes_3d = CameraInstance3DBoxes(torch.rand([3, 9]), box_dim=9)
...@@ -129,14 +130,14 @@ class TestFCOSMono3DHead(TestCase): ...@@ -129,14 +130,14 @@ class TestFCOSMono3DHead(TestCase):
gt_instances_3d.bboxes_3d = gt_bboxes_3d gt_instances_3d.bboxes_3d = gt_bboxes_3d
gt_instances_3d.labels_3d = gt_labels_3d gt_instances_3d.labels_3d = gt_labels_3d
gt_instances_3d.bboxes = gt_bboxes gt_instances.bboxes = gt_bboxes
gt_instances_3d.labels = gt_labels gt_instances.labels = gt_labels
gt_instances_3d.centers_2d = centers_2d gt_instances_3d.centers_2d = centers_2d
gt_instances_3d.depths = depths gt_instances_3d.depths = depths
gt_instances_3d.attr_labels = attr_labels gt_instances_3d.attr_labels = attr_labels
gt_losses = fcos_mono3d_head.loss(*ret_dict, [gt_instances_3d], gt_losses = fcos_mono3d_head.loss_by_feat(*ret_dict, [gt_instances_3d],
img_metas) [gt_instances], img_metas)
gt_cls_loss = gt_losses['loss_cls'].item() gt_cls_loss = gt_losses['loss_cls'].item()
gt_siz_loss = gt_losses['loss_size'].item() gt_siz_loss = gt_losses['loss_size'].item()
...@@ -160,7 +161,7 @@ class TestFCOSMono3DHead(TestCase): ...@@ -160,7 +161,7 @@ class TestFCOSMono3DHead(TestCase):
self.assertGreater(gt_atr_loss, 0, 'attribue loss should be positive') self.assertGreater(gt_atr_loss, 0, 'attribue loss should be positive')
# test get_results # test get_results
results_list = fcos_mono3d_head.get_results(*ret_dict, img_metas) results_list = fcos_mono3d_head.predict_by_feat(*ret_dict, img_metas)
self.assertEqual( self.assertEqual(
len(results_list), 1, len(results_list), 1,
'there should be no centerness loss when there are no true boxes') 'there should be no centerness loss when there are no true boxes')
......
...@@ -142,6 +142,7 @@ class TestFGDHead(TestCase): ...@@ -142,6 +142,7 @@ class TestFGDHead(TestCase):
# When truth is non-empty then all losses # When truth is non-empty then all losses
# should be nonzero for random inputs # should be nonzero for random inputs
gt_instances_3d = InstanceData() gt_instances_3d = InstanceData()
gt_instances = InstanceData()
gt_bboxes = torch.rand([3, 4], dtype=torch.float32) gt_bboxes = torch.rand([3, 4], dtype=torch.float32)
gt_bboxes_3d = CameraInstance3DBoxes(torch.rand([3, 7]), box_dim=7) gt_bboxes_3d = CameraInstance3DBoxes(torch.rand([3, 7]), box_dim=7)
...@@ -152,12 +153,13 @@ class TestFGDHead(TestCase): ...@@ -152,12 +153,13 @@ class TestFGDHead(TestCase):
gt_instances_3d.bboxes_3d = gt_bboxes_3d gt_instances_3d.bboxes_3d = gt_bboxes_3d
gt_instances_3d.labels_3d = gt_labels_3d gt_instances_3d.labels_3d = gt_labels_3d
gt_instances_3d.bboxes = gt_bboxes gt_instances.bboxes = gt_bboxes
gt_instances_3d.labels = gt_labels gt_instances.labels = gt_labels
gt_instances_3d.centers_2d = centers_2d gt_instances_3d.centers_2d = centers_2d
gt_instances_3d.depths = depths gt_instances_3d.depths = depths
gt_losses = pgd_head.loss(*ret_dict, [gt_instances_3d], img_metas) gt_losses = pgd_head.loss_by_feat(*ret_dict, [gt_instances_3d],
[gt_instances], img_metas)
gt_cls_loss = gt_losses['loss_cls'].item() gt_cls_loss = gt_losses['loss_cls'].item()
gt_siz_loss = gt_losses['loss_size'].item() gt_siz_loss = gt_losses['loss_size'].item()
...@@ -184,15 +186,15 @@ class TestFGDHead(TestCase): ...@@ -184,15 +186,15 @@ class TestFGDHead(TestCase):
'consistency loss should be positive') 'consistency loss should be positive')
# test get_results # test get_results
results_list = pgd_head.get_results(*ret_dict, img_metas) results_list = pgd_head.predict_by_feat(*ret_dict, img_metas)
self.assertEqual( self.assertEqual(
len(results_list), 1, len(results_list), 1,
'there should be no centerness loss when there are no true boxes') 'there should be no centerness loss when there are no true boxes')
results = results_list[0] results, results_2d = results_list[0]
pred_bboxes_3d = results.bboxes_3d pred_bboxes_3d = results.bboxes_3d
pred_scores_3d = results.scores_3d pred_scores_3d = results.scores_3d
pred_labels_3d = results.labels_3d pred_labels_3d = results.labels_3d
pred_bboxes_2d = results.bboxes pred_bboxes_2d = results_2d.bboxes
self.assertEqual(pred_bboxes_3d.tensor.shape, torch.Size([20, 7]), self.assertEqual(pred_bboxes_3d.tensor.shape, torch.Size([20, 7]),
'the shape of predicted 3d bboxes should be [20, 7]') 'the shape of predicted 3d bboxes should be [20, 7]')
self.assertEqual( self.assertEqual(
...@@ -202,6 +204,6 @@ class TestFGDHead(TestCase): ...@@ -202,6 +204,6 @@ class TestFGDHead(TestCase):
pred_labels_3d.shape, torch.Size([20]), pred_labels_3d.shape, torch.Size([20]),
'the shape of predicted 3d bbox labels should be [20]') 'the shape of predicted 3d bbox labels should be [20]')
self.assertEqual( self.assertEqual(
pred_bboxes_2d.shape, torch.Size([20, 5]), pred_bboxes_2d.shape, torch.Size([20, 4]),
'the shape of predicted 2d bbox attribute labels should be [20, 5]' 'the shape of predicted 2d bbox attribute labels should be [20, 4]'
) )
...@@ -82,6 +82,7 @@ class TestSMOKEMono3DHead(TestCase): ...@@ -82,6 +82,7 @@ class TestSMOKEMono3DHead(TestCase):
# When truth is non-empty then all losses # When truth is non-empty then all losses
# should be nonzero for random inputs # should be nonzero for random inputs
gt_instances_3d = InstanceData() gt_instances_3d = InstanceData()
gt_instances = InstanceData()
gt_bboxes = torch.Tensor([[1.0, 2.0, 20.0, 40.0], gt_bboxes = torch.Tensor([[1.0, 2.0, 20.0, 40.0],
[45.0, 50.0, 80.0, 70.1], [45.0, 50.0, 80.0, 70.1],
...@@ -94,13 +95,14 @@ class TestSMOKEMono3DHead(TestCase): ...@@ -94,13 +95,14 @@ class TestSMOKEMono3DHead(TestCase):
gt_instances_3d.bboxes_3d = gt_bboxes_3d gt_instances_3d.bboxes_3d = gt_bboxes_3d
gt_instances_3d.labels_3d = gt_labels_3d gt_instances_3d.labels_3d = gt_labels_3d
gt_instances_3d.bboxes = gt_bboxes gt_instances.bboxes = gt_bboxes
gt_instances_3d.labels = gt_labels gt_instances.labels = gt_labels
gt_instances_3d.centers_2d = centers_2d gt_instances_3d.centers_2d = centers_2d
gt_instances_3d.depths = depths gt_instances_3d.depths = depths
gt_losses = smoke_mono3d_head.loss(*ret_dict, [gt_instances_3d], gt_losses = smoke_mono3d_head.loss_by_feat(*ret_dict,
img_metas) [gt_instances_3d],
[gt_instances], img_metas)
gt_cls_loss = gt_losses['loss_cls'].item() gt_cls_loss = gt_losses['loss_cls'].item()
gt_box_loss = gt_losses['loss_bbox'].item() gt_box_loss = gt_losses['loss_bbox'].item()
...@@ -109,7 +111,7 @@ class TestSMOKEMono3DHead(TestCase): ...@@ -109,7 +111,7 @@ class TestSMOKEMono3DHead(TestCase):
self.assertGreater(gt_box_loss, 0, 'bbox loss should be positive') self.assertGreater(gt_box_loss, 0, 'bbox loss should be positive')
# test get_results # test get_results
results_list = smoke_mono3d_head.get_results(*ret_dict, img_metas) results_list = smoke_mono3d_head.predict_by_feat(*ret_dict, img_metas)
self.assertEqual( self.assertEqual(
len(results_list), 1, 'there should be one image results') len(results_list), 1, 'there should be one image results')
results = results_list[0] results = results_list[0]
......
...@@ -14,7 +14,10 @@ from os import path as osp ...@@ -14,7 +14,10 @@ from os import path as osp
import mmcv import mmcv
import numpy as np import numpy as np
from nuscenes.nuscenes import NuScenes
from mmdet3d.core.bbox import points_cam2img
from mmdet3d.datasets.convert_utils import get_2d_boxes
from mmdet3d.datasets.utils import convert_quaternion_to_matrix from mmdet3d.datasets.utils import convert_quaternion_to_matrix
...@@ -60,6 +63,19 @@ def get_empty_instance(): ...@@ -60,6 +63,19 @@ def get_empty_instance():
return instance return instance
def get_empty_multicamera_instances():
cam_instance = dict(
CAM_FONT=None,
CAM_FRONT_RIGHT=None,
CAM_FRONT_LEFT=None,
CAM_BACK=None,
CAM_BACK_RIGHT=None,
CAM_BACK_LEFT=None)
return cam_instance
def get_empty_lidar_points(): def get_empty_lidar_points():
lidar_points = dict( lidar_points = dict(
# (int, optional) : Number of features for each point. # (int, optional) : Number of features for each point.
...@@ -206,6 +222,32 @@ def clear_data_info_unused_keys(data_info): ...@@ -206,6 +222,32 @@ def clear_data_info_unused_keys(data_info):
return data_info, empty_flag return data_info, empty_flag
def generate_camera_instances(info, nusc):
# get bbox annotations for camera
camera_types = [
'CAM_FRONT',
'CAM_FRONT_RIGHT',
'CAM_FRONT_LEFT',
'CAM_BACK',
'CAM_BACK_LEFT',
'CAM_BACK_RIGHT',
]
empty_multicamera_instance = get_empty_multicamera_instances()
for cam in camera_types:
cam_info = info['cams'][cam]
# list[dict]
ann_infos = get_2d_boxes(
nusc,
cam_info['sample_data_token'],
visibilities=['', '1', '2', '3', '4'])
empty_multicamera_instance[cam] = ann_infos
return empty_multicamera_instance
def update_nuscenes_infos(pkl_path, out_dir): def update_nuscenes_infos(pkl_path, out_dir):
print(f'{pkl_path} will be modified.') print(f'{pkl_path} will be modified.')
if out_dir in pkl_path: if out_dir in pkl_path:
...@@ -222,6 +264,11 @@ def update_nuscenes_infos(pkl_path, out_dir): ...@@ -222,6 +264,11 @@ def update_nuscenes_infos(pkl_path, out_dir):
'version': 'version':
data_list['metadata']['version'] data_list['metadata']['version']
} }
nusc = NuScenes(
version=data_list['metadata']['version'],
dataroot='./data/nuscenes',
verbose=True)
print('Start updating:') print('Start updating:')
converted_list = [] converted_list = []
for i, ori_info_dict in enumerate( for i, ori_info_dict in enumerate(
...@@ -304,6 +351,8 @@ def update_nuscenes_infos(pkl_path, out_dir): ...@@ -304,6 +351,8 @@ def update_nuscenes_infos(pkl_path, out_dir):
empty_instance['bbox_3d_isvalid'] = ori_info_dict['valid_flag'][i] empty_instance['bbox_3d_isvalid'] = ori_info_dict['valid_flag'][i]
empty_instance = clear_instance_unused_keys(empty_instance) empty_instance = clear_instance_unused_keys(empty_instance)
temp_data_info['instances'].append(empty_instance) temp_data_info['instances'].append(empty_instance)
temp_data_info['cam_instances'] = generate_camera_instances(
ori_info_dict, nusc)
temp_data_info, _ = clear_data_info_unused_keys(temp_data_info) temp_data_info, _ = clear_data_info_unused_keys(temp_data_info)
converted_list.append(temp_data_info) converted_list.append(temp_data_info)
pkl_name = pkl_path.split('/')[-1] pkl_name = pkl_path.split('/')[-1]
...@@ -313,7 +362,6 @@ def update_nuscenes_infos(pkl_path, out_dir): ...@@ -313,7 +362,6 @@ def update_nuscenes_infos(pkl_path, out_dir):
converted_data_info = dict(metainfo=METAINFO, data_list=converted_list) converted_data_info = dict(metainfo=METAINFO, data_list=converted_list)
mmcv.dump(converted_data_info, out_path, 'pkl') mmcv.dump(converted_data_info, out_path, 'pkl')
return temp_lidar_sweep
def update_kitti_infos(pkl_path, out_dir): def update_kitti_infos(pkl_path, out_dir):
...@@ -382,6 +430,7 @@ def update_kitti_infos(pkl_path, out_dir): ...@@ -382,6 +430,7 @@ def update_kitti_infos(pkl_path, out_dir):
anns = ori_info_dict['annos'] anns = ori_info_dict['annos']
num_instances = len(anns['name']) num_instances = len(anns['name'])
cam2img = ori_info_dict['calib']['P2']
ignore_class_name = set() ignore_class_name = set()
instance_list = [] instance_list = []
...@@ -401,6 +450,17 @@ def update_kitti_infos(pkl_path, out_dir): ...@@ -401,6 +450,17 @@ def update_kitti_infos(pkl_path, out_dir):
loc = anns['location'][instance_id] loc = anns['location'][instance_id]
dims = anns['dimensions'][instance_id] dims = anns['dimensions'][instance_id]
rots = anns['rotation_y'][:, None][instance_id] rots = anns['rotation_y'][:, None][instance_id]
dst = np.array([0.5, 0.5, 0.5])
src = np.array([0.5, 1.0, 0.5])
center_3d = loc + dims * (dst - src)
center_2d = points_cam2img(
center_3d.reshape([1, 3]), cam2img, with_depth=True)
center_2d = center_2d.squeeze().tolist()
empty_instance['center_2d'] = center_2d[:2]
empty_instance['depth'] = center_2d[2]
gt_bboxes_3d = np.concatenate([loc, dims, rots]).tolist() gt_bboxes_3d = np.concatenate([loc, dims, rots]).tolist()
empty_instance['bbox_3d'] = gt_bboxes_3d empty_instance['bbox_3d'] = gt_bboxes_3d
empty_instance['bbox_label_3d'] = copy.deepcopy( empty_instance['bbox_label_3d'] = copy.deepcopy(
...@@ -734,7 +794,6 @@ def parse_args(): ...@@ -734,7 +794,6 @@ def parse_args():
type=str, type=str,
default='./data/kitti/kitti_infos_train.pkl ', default='./data/kitti/kitti_infos_train.pkl ',
help='specify the root dir of dataset') help='specify the root dir of dataset')
parser.add_argument( parser.add_argument(
'--out-dir', '--out-dir',
type=str, type=str,
......
Markdown is supported
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment