Commit db44cc50 authored by ZCMax's avatar ZCMax Committed by ChaimZhu
Browse files

[Refactor] Refactor the model of VoxelNet and DynamicVoxelNet

parent 7fda1f66
......@@ -7,6 +7,7 @@ from .builder import (BACKBONES, DETECTORS, FUSION_LAYERS, HEADS, LOSSES,
build_loss, build_middle_encoder, build_model,
build_neck, build_roi_extractor, build_shared_head,
build_voxel_encoder)
from .data_preprocessors import * # noqa: F401,F403
from .decode_heads import * # noqa: F401,F403
from .dense_heads import * # noqa: F401,F403
from .detectors import * # noqa: F401,F403
......
# Copyright (c) OpenMMLab. All rights reserved.
from .anchor3d_head import Anchor3DHead
from .anchor_free_mono3d_head import AnchorFreeMono3DHead
from .base_3d_dense_head import Base3DDenseHead
from .base_conv_bbox_head import BaseConvBboxHead
from .base_mono3d_dense_head import BaseMono3DDenseHead
from .centerpoint_head import CenterHead
......@@ -21,5 +22,5 @@ __all__ = [
'SSD3DHead', 'BaseConvBboxHead', 'CenterHead', 'ShapeAwareHead',
'BaseMono3DDenseHead', 'AnchorFreeMono3DHead', 'FCOSMono3DHead',
'GroupFree3DHead', 'PointRPNHead', 'SMOKEMono3DHead', 'PGDHead',
'MonoFlexHead'
'MonoFlexHead', 'Base3DDenseHead'
]
# Copyright (c) OpenMMLab. All rights reserved.
import warnings
from typing import List, Optional, Tuple
from typing import List, Tuple
import numpy as np
import torch
from mmcv import ConfigDict
from mmcv.runner import BaseModule, force_fp32
from mmengine.data import InstanceData
from torch import Tensor
from torch import nn as nn
from mmdet3d.core import (Det3DDataSample, PseudoSampler, box3d_multiclass_nms,
limit_period, merge_aug_bboxes_3d, xywhr2xyxyr)
from mmdet3d.core import PseudoSampler, merge_aug_bboxes_3d
from mmdet3d.core.utils import ConfigType, InstanceList, OptConfigType
from mmdet3d.core.utils.typing import OptInstanceList
from mmdet3d.registry import MODELS, TASK_UTILS
from mmdet.core import multi_apply
from .base_3d_dense_head import Base3DDenseHead
from .train_mixins import AnchorTrainMixin
@MODELS.register_module()
class Anchor3DHead(BaseModule, AnchorTrainMixin):
"""Anchor head for SECOND/PointPillars/MVXNet/PartA2.
class Anchor3DHead(Base3DDenseHead, AnchorTrainMixin):
"""Anchor-based head for SECOND/PointPillars/MVXNet/PartA2.
Args:
num_classes (int): Number of classes.
in_channels (int): Number of channels in the input feature map.
train_cfg (dict): Train configs.
test_cfg (dict): Test configs.
feat_channels (int): Number of channels of the feature map.
use_direction_classifier (bool): Whether to add a direction classifier.
anchor_generator(dict): Config dict of anchor generator.
......@@ -42,16 +39,17 @@ class Anchor3DHead(BaseModule, AnchorTrainMixin):
loss_cls (dict): Config of classification loss.
loss_bbox (dict): Config of localization loss.
loss_dir (dict): Config of direction classifier loss.
train_cfg (dict): Train configs.
test_cfg (dict): Test configs.
init_cfg (dict or list[dict], optional): Initialization config dict.
"""
def __init__(self,
num_classes: int,
in_channels: int,
train_cfg: dict,
test_cfg: dict,
feat_channels: int = 256,
use_direction_classifier: bool = True,
anchor_generator: dict = dict(
anchor_generator: ConfigType = dict(
type='Anchor3DRangeGenerator',
range=[0, -39.68, -1.78, 69.12, 39.68, -1.78],
strides=[2],
......@@ -64,16 +62,20 @@ class Anchor3DHead(BaseModule, AnchorTrainMixin):
diff_rad_by_sin: bool = True,
dir_offset: float = -np.pi / 2,
dir_limit_offset: int = 0,
bbox_coder: dict = dict(type='DeltaXYZWLHRBBoxCoder'),
loss_cls: dict = dict(
type='CrossEntropyLoss',
bbox_coder: ConfigType = dict(type='DeltaXYZWLHRBBoxCoder'),
loss_cls: ConfigType = dict(
type='mmdet.CrossEntropyLoss',
use_sigmoid=True,
loss_weight=1.0),
loss_bbox: dict = dict(
type='SmoothL1Loss', beta=1.0 / 9.0, loss_weight=2.0),
loss_dir: dict = dict(
type='CrossEntropyLoss', loss_weight=0.2),
init_cfg: Optional[dict] = None) -> None:
loss_bbox: ConfigType = dict(
type='mmdet.SmoothL1Loss',
beta=1.0 / 9.0,
loss_weight=2.0),
loss_dir: ConfigType = dict(
type='mmdet.CrossEntropyLoss', loss_weight=0.2),
train_cfg: OptConfigType = None,
test_cfg: OptConfigType = None,
init_cfg: OptConfigType = None) -> None:
super().__init__(init_cfg=init_cfg)
self.in_channels = in_channels
self.num_classes = num_classes
......@@ -148,128 +150,53 @@ class Anchor3DHead(BaseModule, AnchorTrainMixin):
self.conv_dir_cls = nn.Conv2d(self.feat_channels,
self.num_anchors * 2, 1)
def forward_single(self, x: Tensor) -> Tuple[Tensor, Tensor]:
def forward_single(self, x: Tensor) -> Tuple[Tensor, Tensor, Tensor]:
"""Forward function on a single-scale feature map.
Args:
x (torch.Tensor): Input features.
x (Tensor): Features of a single scale level.
Returns:
tuple[torch.Tensor]: Contain score of each class, bbox
regression and direction classification predictions.
tuple:
cls_score (Tensor): Cls scores for a single scale level
the channels number is num_base_priors * num_classes.
bbox_pred (Tensor): Box energies / deltas for a single scale
level, the channels number is num_base_priors * C.
dir_cls_pred (Tensor | None): Direction classification
prediction for a single scale level, the channels
number is num_base_priors * 2.
"""
cls_score = self.conv_cls(x)
bbox_pred = self.conv_reg(x)
dir_cls_preds = None
dir_cls_pred = None
if self.use_direction_classifier:
dir_cls_preds = self.conv_dir_cls(x)
return cls_score, bbox_pred, dir_cls_preds
dir_cls_pred = self.conv_dir_cls(x)
return cls_score, bbox_pred, dir_cls_pred
def forward(self, feats: List[Tensor]) -> Tuple[list]:
def forward(self, x: Tuple[Tensor]) -> Tuple[List[Tensor]]:
"""Forward pass.
Args:
feats (list[torch.Tensor]): Multi-level features, e.g.,
features produced by FPN.
Returns:
tuple[list[torch.Tensor]]: Multi-level class score, bbox
and direction predictions.
"""
return multi_apply(self.forward_single, feats)
def forward_train(self,
feats: List[Tensor],
batch_data_samples: List[Det3DDataSample],
proposal_cfg: Optional[ConfigDict] = None,
**kwargs):
"""
Args:
feats (list[Tensor]): Features from FPN.
batch_data_samples (list[:obj:`Det3DDataSample`]): Each item
contains the meta information of each sample and
corresponding annotations.
proposal_cfg (ConfigDict, optional): Test / postprocessing
configuration, if None, test_cfg would be used.
Defaults to None.
Returns:
tuple or Tensor: When `proposal_cfg` is None, the detector is a \
normal one-stage detector, The return value is the losses.
- losses: (dict[str, Tensor]): A dictionary of loss components.
When the `proposal_cfg` is not None, the head is used as a
`rpn_head`, the return value is a tuple contains:
- losses: (dict[str, Tensor]): A dictionary of loss components.
- results_list (list[:obj:`InstanceData`]): Detection
results of each input after the post process.
Each item usually contains following keys.Det3DDataSample
- scores_3d (Tensor): Classification scores, has a shape
(num_instances, )
- labels_3d (Tensor): Labels of bboxes, has a shape
(num_instances, ).
- bboxes_3d (:obj:`BaseInstance3DBoxes`): Prediction of bboxes,
contains a tensor with shape (num_instances, 7).
"""
outs = self.forward(feats)
batch_gt_instance_3d = []
batch_gt_instances_ignore = []
batch_input_metas = []
for data_sample in batch_data_samples:
batch_input_metas.append(data_sample.metainfo)
batch_gt_instance_3d.append(data_sample.gt_instances_3d)
if 'ignored_instances' in data_sample:
batch_gt_instances_ignore.append(data_sample.ignored_instances)
else:
batch_gt_instances_ignore.append(None)
loss_inputs = outs + (batch_gt_instance_3d, batch_input_metas)
losses = self.loss(
*loss_inputs, batch_gt_instances_ignore=batch_gt_instances_ignore)
if proposal_cfg is None:
return losses
else:
batch_img_metas = [
data_sample.metainfo for data_sample in batch_data_samples
]
results_list = self.get_results(
*outs, batch_img_metas=batch_img_metas, cfg=proposal_cfg)
return losses, results_list
def simple_test(self,
feats: Tuple[Tensor],
batch_input_metas: List[dict],
rescale: bool = False) -> List[InstanceData]:
"""Test function without test-time augmentation.
Args:
feats (tuple[torch.Tensor]): Multi-level features from the
upstream network, each is a 4D-tensor.
batch_input_metas (list[dict]): List of image information.
rescale (bool, optional): Whether to rescale the results.
Defaults to False.
x (tuple[Tensor]): Features from the upstream network,
each is a 4D-tensor.
Returns:
list[:obj:`InstanceData`]: Detection results of each input
after the post process.
Each item usually contains following keys.
- scores_3d (Tensor): Classification scores, has a shape
(num_instances, )
- labels_3d (Tensor): Labels of bboxes, has a shape
(num_instances, ).
- bboxes_3d (BaseInstance3DBoxes): Prediction of bboxes,
contains a tensor with shape (num_instances, 7).
tuple: A tuple of classification scores, bbox and direction
classification prediction.
- cls_scores (list[Tensor]): Classification scores for all
scale levels, each is a 4D-tensor, the channels number
is num_base_priors * num_classes.
- bbox_preds (list[Tensor]): Box energies / deltas for all
scale levels, each is a 4D-tensor, the channels number
is num_base_priors * C.
- dir_cls_preds (list[Tensor|None]): Direction classification
predictions for all scale levels, each is a 4D-tensor,
the channels number is num_base_priors * 2.
"""
outs = self.forward(feats)
results_list = self.get_results(
*outs, input_metas=batch_input_metas, rescale=rescale)
return results_list
return multi_apply(self.forward_single, x)
# TODO: Support augmentation test
def aug_test(self,
aug_batch_feats,
aug_batch_input_metas,
......@@ -313,22 +240,24 @@ class Anchor3DHead(BaseModule, AnchorTrainMixin):
anchor_list = [multi_level_anchors for _ in range(num_imgs)]
return anchor_list
def loss_single(self, cls_score, bbox_pred, dir_cls_preds, labels,
label_weights, bbox_targets, bbox_weights, dir_targets,
dir_weights, num_total_samples):
def _loss_by_feat_single(self, cls_score: Tensor, bbox_pred: Tensor,
dir_cls_pred: Tensor, labels: Tensor,
label_weights: Tensor, bbox_targets: Tensor,
bbox_weights: Tensor, dir_targets: Tensor,
dir_weights: Tensor, num_total_samples: int):
"""Calculate loss of Single-level results.
Args:
cls_score (torch.Tensor): Class score in single-level.
bbox_pred (torch.Tensor): Bbox prediction in single-level.
dir_cls_preds (torch.Tensor): Predictions of direction class
cls_score (Tensor): Class score in single-level.
bbox_pred (Tensor): Bbox prediction in single-level.
dir_cls_pred (Tensor): Predictions of direction class
in single-level.
labels (torch.Tensor): Labels of class.
label_weights (torch.Tensor): Weights of class loss.
bbox_targets (torch.Tensor): Targets of bbox predictions.
bbox_weights (torch.Tensor): Weights of bbox loss.
dir_targets (torch.Tensor): Targets of direction predictions.
dir_weights (torch.Tensor): Weights of direction loss.
labels (Tensor): Labels of class.
label_weights (Tensor): Weights of class loss.
bbox_targets (Tensor): Targets of bbox predictions.
bbox_weights (Tensor): Weights of bbox loss.
dir_targets (Tensor): Targets of direction predictions.
dir_weights (Tensor): Weights of direction loss.
num_total_samples (int): The number of valid samples.
Returns:
......@@ -363,10 +292,10 @@ class Anchor3DHead(BaseModule, AnchorTrainMixin):
# dir loss
if self.use_direction_classifier:
dir_cls_preds = dir_cls_preds.permute(0, 2, 3, 1).reshape(-1, 2)
dir_cls_pred = dir_cls_pred.permute(0, 2, 3, 1).reshape(-1, 2)
dir_targets = dir_targets.reshape(-1)
dir_weights = dir_weights.reshape(-1)
pos_dir_cls_preds = dir_cls_preds[pos_inds]
pos_dir_cls_pred = dir_cls_pred[pos_inds]
pos_dir_targets = dir_targets[pos_inds]
pos_dir_weights = dir_weights[pos_inds]
......@@ -388,14 +317,14 @@ class Anchor3DHead(BaseModule, AnchorTrainMixin):
loss_dir = None
if self.use_direction_classifier:
loss_dir = self.loss_dir(
pos_dir_cls_preds,
pos_dir_cls_pred,
pos_dir_targets,
pos_dir_weights,
avg_factor=num_total_samples)
else:
loss_bbox = pos_bbox_pred.sum()
if self.use_direction_classifier:
loss_dir = pos_dir_cls_preds.sum()
loss_dir = pos_dir_cls_pred.sum()
return loss_cls, loss_bbox, loss_dir
......@@ -423,15 +352,16 @@ class Anchor3DHead(BaseModule, AnchorTrainMixin):
dim=-1)
return boxes1, boxes2
@force_fp32(apply_to=('cls_scores', 'bbox_preds', 'dir_cls_preds'))
def loss(self,
cls_scores: List[Tensor],
bbox_preds: List[Tensor],
dir_cls_preds: List[Tensor],
batch_gt_instances_3d: List[InstanceData],
batch_input_metas: List[dict],
batch_gt_instances_ignore: List[InstanceData] = None) -> dict:
"""Calculate losses.
def loss_by_feat(
self,
cls_scores: List[Tensor],
bbox_preds: List[Tensor],
dir_cls_preds: List[Tensor],
batch_gt_instances_3d: InstanceList,
batch_input_metas: List[dict],
batch_gt_instances_ignore: OptInstanceList = None) -> dict:
"""Calculate the loss based on the features extracted by the detection
head.
Args:
cls_scores (list[torch.Tensor]): Multi-level class scores.
......@@ -481,7 +411,7 @@ class Anchor3DHead(BaseModule, AnchorTrainMixin):
# num_total_samples = None
losses_cls, losses_bbox, losses_dir = multi_apply(
self.loss_single,
self._loss_by_feat_single,
cls_scores,
bbox_preds,
dir_cls_preds,
......@@ -494,165 +424,3 @@ class Anchor3DHead(BaseModule, AnchorTrainMixin):
num_total_samples=num_total_samples)
return dict(
loss_cls=losses_cls, loss_bbox=losses_bbox, loss_dir=losses_dir)
def get_results(self,
cls_scores: List[Tensor],
bbox_preds: List[Tensor],
dir_cls_preds: List[Tensor],
input_metas: List[dict],
cfg: ConfigDict = None,
rescale: list = False) -> List[InstanceData]:
"""Get results of anchor head.
Args:
cls_scores (list[torch.Tensor]): Multi-level class scores.
bbox_preds (list[torch.Tensor]): Multi-level bbox predictions.
dir_cls_preds (list[torch.Tensor]): Multi-level direction
class predictions.
input_metas (list[dict]): Contain pcd and img's meta info.
cfg (:obj:`ConfigDict`): Training or testing config.
rescale (list[torch.Tensor]): Whether th rescale bbox.
Returns:
list[:obj:`InstanceData`]: Instance prediction
results of each sample after the post process.
Each item usually contains following keys.
- scores_3d (Tensor): Classification scores, has a shape
(num_instance, )
- labels_3d (Tensor): Labels of bboxes, has a shape
(num_instances, ).
- bboxes_3d (:obj:`BaseInstance3DBoxes`): Prediction of bboxes,
contains a tensor with shape (num_instances, 7).
"""
assert len(cls_scores) == len(bbox_preds)
assert len(cls_scores) == len(dir_cls_preds)
num_levels = len(cls_scores)
featmap_sizes = [cls_scores[i].shape[-2:] for i in range(num_levels)]
device = cls_scores[0].device
mlvl_anchors = self.prior_generator.grid_anchors(
featmap_sizes, device=device)
mlvl_anchors = [
anchor.reshape(-1, self.box_code_size) for anchor in mlvl_anchors
]
result_list = []
for img_id in range(len(input_metas)):
cls_score_list = [
cls_scores[i][img_id].detach() for i in range(num_levels)
]
bbox_pred_list = [
bbox_preds[i][img_id].detach() for i in range(num_levels)
]
dir_cls_pred_list = [
dir_cls_preds[i][img_id].detach() for i in range(num_levels)
]
input_meta = input_metas[img_id]
proposals = self._get_results_single(cls_score_list,
bbox_pred_list,
dir_cls_pred_list,
mlvl_anchors, input_meta, cfg,
rescale)
result_list.append(proposals)
return result_list
def _get_results_single(self,
cls_scores: Tensor,
bbox_preds: Tensor,
dir_cls_preds: Tensor,
mlvl_anchors: List[Tensor],
input_meta: List[dict],
cfg: ConfigDict = None,
rescale: bool = False) -> InstanceData:
"""Get results of single branch.
Args:
cls_scores (torch.Tensor): Class score in single batch.
bbox_preds (torch.Tensor): Bbox prediction in single batch.
dir_cls_preds (torch.Tensor): Predictions of direction class
in single batch.
mlvl_anchors (List[torch.Tensor]): Multi-level anchors
in single batch.
input_meta (list[dict]): Contain pcd and img's meta info.
cfg (:obj:`ConfigDict`): Training or testing config.
rescale (list[torch.Tensor]): whether th rescale bbox.
Returns:
:obj:`InstanceData`: Detection results of each sample
after the post process.
Each item usually contains following keys.
- scores_3d (Tensor): Classification scores, has a shape
(num_instance, )
- labels_3d (Tensor): Labels of bboxes, has a shape
(num_instances, ).
- bboxes_3d (:obj:`BaseInstance3DBoxes`): Prediction of bboxes,
contains a tensor with shape (num_instances, 7).
"""
cfg = self.test_cfg if cfg is None else cfg
assert len(cls_scores) == len(bbox_preds) == len(mlvl_anchors)
mlvl_bboxes = []
mlvl_scores = []
mlvl_dir_scores = []
for cls_score, bbox_pred, dir_cls_pred, anchors in zip(
cls_scores, bbox_preds, dir_cls_preds, mlvl_anchors):
assert cls_score.size()[-2:] == bbox_pred.size()[-2:]
assert cls_score.size()[-2:] == dir_cls_pred.size()[-2:]
dir_cls_pred = dir_cls_pred.permute(1, 2, 0).reshape(-1, 2)
dir_cls_score = torch.max(dir_cls_pred, dim=-1)[1]
cls_score = cls_score.permute(1, 2,
0).reshape(-1, self.num_classes)
if self.use_sigmoid_cls:
scores = cls_score.sigmoid()
else:
scores = cls_score.softmax(-1)
bbox_pred = bbox_pred.permute(1, 2,
0).reshape(-1, self.box_code_size)
nms_pre = cfg.get('nms_pre', -1)
if nms_pre > 0 and scores.shape[0] > nms_pre:
if self.use_sigmoid_cls:
max_scores, _ = scores.max(dim=1)
else:
max_scores, _ = scores[:, :-1].max(dim=1)
_, topk_inds = max_scores.topk(nms_pre)
anchors = anchors[topk_inds, :]
bbox_pred = bbox_pred[topk_inds, :]
scores = scores[topk_inds, :]
dir_cls_score = dir_cls_score[topk_inds]
bboxes = self.bbox_coder.decode(anchors, bbox_pred)
mlvl_bboxes.append(bboxes)
mlvl_scores.append(scores)
mlvl_dir_scores.append(dir_cls_score)
mlvl_bboxes = torch.cat(mlvl_bboxes)
mlvl_bboxes_for_nms = xywhr2xyxyr(input_meta['box_type_3d'](
mlvl_bboxes, box_dim=self.box_code_size).bev)
mlvl_scores = torch.cat(mlvl_scores)
mlvl_dir_scores = torch.cat(mlvl_dir_scores)
if self.use_sigmoid_cls:
# Add a dummy background class to the front when using sigmoid
padding = mlvl_scores.new_zeros(mlvl_scores.shape[0], 1)
mlvl_scores = torch.cat([mlvl_scores, padding], dim=1)
score_thr = cfg.get('score_thr', 0)
results = box3d_multiclass_nms(mlvl_bboxes, mlvl_bboxes_for_nms,
mlvl_scores, score_thr, cfg.max_num,
cfg, mlvl_dir_scores)
bboxes, scores, labels, dir_scores = results
if bboxes.shape[0] > 0:
dir_rot = limit_period(bboxes[..., 6] - self.dir_offset,
self.dir_limit_offset, np.pi)
bboxes[..., 6] = (
dir_rot + self.dir_offset +
np.pi * dir_scores.to(bboxes.dtype))
bboxes = input_meta['box_type_3d'](bboxes, box_dim=self.box_code_size)
results = InstanceData()
results.bboxes_3d = bboxes
results.scores_3d = scores
results.labels_3d = labels
return results
# Copyright (c) OpenMMLab. All rights reserved.
from abc import ABCMeta, abstractmethod
from typing import List, Optional, Tuple
import numpy as np
import torch
from mmcv.cnn.utils.weight_init import constant_init
from mmengine.config import ConfigDict
from mmengine.data import InstanceData
from mmengine.model import BaseModule
from torch import Tensor
from mmdet3d.core import box3d_multiclass_nms, limit_period, xywhr2xyxyr
from mmdet3d.core.utils import InstanceList, OptMultiConfig, SampleList
from mmdet.core.utils import select_single_mlvl
class Base3DDenseHead(BaseModule, metaclass=ABCMeta):
"""Base class for 3D DenseHeads.
1. The ``init_weights`` method is used to initialize densehead's
model parameters. After detector initialization, ``init_weights``
is triggered when ``detector.init_weights()`` is called externally.
2. The ``loss`` method is used to calculate the loss of densehead,
which includes two steps: (1) the densehead model performs forward
propagation to obtain the feature maps (2) The ``loss_by_feat`` method
is called based on the feature maps to calculate the loss.
.. code:: text
loss(): forward() -> loss_by_feat()
3. The ``predict`` method is used to predict detection results,
which includes two steps: (1) the densehead model performs forward
propagation to obtain the feature maps (2) The ``predict_by_feat`` method
is called based on the feature maps to predict detection results including
post-processing.
.. code:: text
predict(): forward() -> predict_by_feat()
4. The ``loss_and_predict`` method is used to return loss and detection
results at the same time. It will call densehead's ``forward``,
``loss_by_feat`` and ``predict_by_feat`` methods in order. If one-stage is
used as RPN, the densehead needs to return both losses and predictions.
This predictions is used as the proposal of roihead.
.. code:: text
loss_and_predict(): forward() -> loss_by_feat() -> predict_by_feat()
"""
def __init__(self, init_cfg: OptMultiConfig = None) -> None:
super().__init__(init_cfg=init_cfg)
def init_weights(self) -> None:
"""Initialize the weights."""
super().init_weights()
# avoid init_cfg overwrite the initialization of `conv_offset`
for m in self.modules():
# DeformConv2dPack, ModulatedDeformConv2dPack
if hasattr(m, 'conv_offset'):
constant_init(m.conv_offset, 0)
def loss(self, x: Tuple[Tensor], batch_data_samples: SampleList,
**kwargs) -> dict:
"""Perform forward propagation and loss calculation of the detection
head on the features of the upstream network.
Args:
x (tuple[Tensor]): Features from the upstream network, each is
a 4D-tensor.
batch_data_samples (List[:obj:`Det3DDataSample`]): The Data
Samples. It usually includes information such as
`gt_instance`, `gt_panoptic_seg` and `gt_sem_seg`.
Returns:
dict: A dictionary of loss components.
"""
outs = self(x)
batch_gt_instances_3d = []
batch_gt_instances_ignore = []
batch_input_metas = []
for data_sample in batch_data_samples:
batch_input_metas.append(data_sample.metainfo)
batch_gt_instances_3d.append(data_sample.gt_instances_3d)
batch_gt_instances_ignore.append(
data_sample.get('ignored_instances', None))
loss_inputs = outs + (batch_gt_instances_3d, batch_input_metas,
batch_gt_instances_ignore)
losses = self.loss_by_feat(*loss_inputs)
return losses
@abstractmethod
def loss_by_feat(self, **kwargs) -> dict:
"""Calculate the loss based on the features extracted by the detection
head."""
pass
def loss_and_predict(self,
x: Tuple[Tensor],
batch_data_samples: SampleList,
proposal_cfg: Optional[ConfigDict] = None,
**kwargs) -> Tuple[dict, InstanceList]:
"""Perform forward propagation of the head, then calculate loss and
predictions from the features and data samples.
Args:
x (tuple[Tensor]): Features from FPN.
batch_data_samples (list[:obj:`DetDataSample`]): Each item contains
the meta information of each image and corresponding
annotations.
proposal_cfg (ConfigDict, optional): Test / postprocessing
configuration, if None, test_cfg would be used.
Defaults to None.
Returns:
tuple: the return value is a tuple contains:
- losses: (dict[str, Tensor]): A dictionary of loss components.
- predictions (list[:obj:`InstanceData`]): Detection
results of each image after the post process.
"""
batch_gt_instances = []
batch_gt_instances_ignore = []
batch_input_metas = []
for data_sample in batch_data_samples:
batch_input_metas.append(data_sample.metainfo)
batch_gt_instances.append(data_sample.gt_instances_3d)
batch_gt_instances_ignore.append(
data_sample.get('ignored_instances', None))
outs = self(x)
loss_inputs = outs + (batch_gt_instances, batch_input_metas,
batch_gt_instances_ignore)
losses = self.loss_by_feat(*loss_inputs)
predictions = self.predict_by_feat(
*outs, batch_input_metas=batch_input_metas, cfg=proposal_cfg)
return losses, predictions
def predict(self,
x: Tuple[Tensor],
batch_data_samples: SampleList,
rescale: bool = False) -> InstanceList:
"""Perform forward propagation of the 3D detection head and predict
detection results on the features of the upstream network.
Args:
x (tuple[Tensor]): Multi-level features from the
upstream network, each is a 4D-tensor.
batch_data_samples (List[:obj:`Det3DDataSample`]): The Data
Samples. It usually includes information such as
`gt_instance_3d`, `gt_pts_panoptic_seg` and
`gt_pts_sem_seg`.
rescale (bool, optional): Whether to rescale the results.
Defaults to False.
Returns:
list[:obj:`InstanceData`]: Detection results of each sample
after the post process.
Each item usually contains following keys.
- scores_3d (Tensor): Classification scores, has a shape
(num_instances, )
- labels_3d (Tensor): Labels of bboxes, has a shape
(num_instances, ).
- bboxes_3d (BaseInstance3DBoxes): Prediction of bboxes,
contains a tensor with shape (num_instances, C), where
C >= 7.
"""
batch_input_metas = [
data_samples.metainfo for data_samples in batch_data_samples
]
outs = self(x)
predictions = self.predict_by_feat(
*outs, batch_input_metas=batch_input_metas, rescale=rescale)
return predictions
def predict_by_feat(self,
cls_scores: List[Tensor],
bbox_preds: List[Tensor],
dir_cls_preds: List[Tensor],
batch_input_metas: Optional[List[dict]] = None,
cfg: Optional[ConfigDict] = None,
rescale: bool = False,
**kwargs) -> InstanceList:
"""Transform a batch of output features extracted from the head into
bbox results.
Args:
cls_scores (list[Tensor]): Classification scores for all
scale levels, each is a 4D-tensor, has shape
(batch_size, num_priors * num_classes, H, W).
bbox_preds (list[Tensor]): Box energies / deltas for all
scale levels, each is a 4D-tensor, has shape
(batch_size, num_priors * 4, H, W).
score_factors (list[Tensor], optional): Score factor for
all scale level, each is a 4D-tensor, has shape
(batch_size, num_priors * 1, H, W). Defaults to None.
batch_input_metas (list[dict], Optional): Batch image meta info.
Defaults to None.
cfg (ConfigDict, optional): Test / postprocessing
configuration, if None, test_cfg would be used.
Defaults to None.
rescale (bool): If True, return boxes in original image space.
Defaults to False.
Returns:
list[:obj:`InstanceData`]: Detection results of each sample
after the post process.
Each item usually contains following keys.
- scores_3d (Tensor): Classification scores, has a shape
(num_instances, )
- labels_3d (Tensor): Labels of bboxes, has a shape
(num_instances, ).
- bboxes_3d (BaseInstance3DBoxes): Prediction of bboxes,
contains a tensor with shape (num_instances, C), where
C >= 7.
"""
assert len(cls_scores) == len(bbox_preds)
assert len(cls_scores) == len(dir_cls_preds)
num_levels = len(cls_scores)
featmap_sizes = [cls_scores[i].shape[-2:] for i in range(num_levels)]
mlvl_priors = self.prior_generator.grid_anchors(
featmap_sizes, device=cls_scores[0].device)
mlvl_priors = [
prior.reshape(-1, self.box_code_size) for prior in mlvl_priors
]
result_list = []
for input_id in range(len(batch_input_metas)):
input_meta = batch_input_metas[input_id]
cls_score_list = select_single_mlvl(cls_scores, input_id)
bbox_pred_list = select_single_mlvl(bbox_preds, input_id)
dir_cls_pred_list = select_single_mlvl(dir_cls_preds, input_id)
results = self._predict_by_feat_single(
cls_score_list=cls_score_list,
bbox_pred_list=bbox_pred_list,
dir_cls_pred_list=dir_cls_pred_list,
mlvl_priors=mlvl_priors,
input_meta=input_meta,
cfg=cfg,
rescale=rescale,
**kwargs)
result_list.append(results)
return result_list
def _predict_by_feat_single(self,
cls_score_list: List[Tensor],
bbox_pred_list: List[Tensor],
dir_cls_pred_list: List[Tensor],
mlvl_priors: List[Tensor],
input_meta: dict,
cfg: ConfigDict,
rescale: bool = False,
**kwargs) -> InstanceData:
"""Transform a single image's features extracted from the head into
bbox results.
Args:
cls_score_list (list[Tensor]): Box scores from all scale
levels of a single point cloud sample, each item has shape
(num_priors * num_classes, H, W).
bbox_pred_list (list[Tensor]): Box energies / deltas from
all scale levels of a single point cloud sample, each item
has shape (num_priors * C, H, W).
dir_cls_pred_list (list[Tensor]): Predictions of direction class
from all scale levels of a single point cloud sample, each
item has shape (num_priors * 2, H, W).
mlvl_priors (list[Tensor]): Each element in the list is
the priors of a single level in feature pyramid. In all
anchor-based methods, it has shape (num_priors, 4). In
all anchor-free methods, it has shape (num_priors, 2)
when `with_stride=True`, otherwise it still has shape
(num_priors, 4).
input_meta (dict): Contain point clouds and image meta info.
cfg (:obj:`ConfigDict`): Test / postprocessing configuration,
if None, test_cfg would be used.
rescale (bool): If True, return boxes in original image space.
Defaults to False.
Returns:
:obj:`InstanceData`: Detection results of each image
after the post process.
Each item usually contains following keys.
- scores (Tensor): Classification scores, has a shape
(num_instance, )
- labels (Tensor): Labels of bboxes, has a shape
(num_instances, ).
- bboxes (Tensor): Has a shape (num_instances, 4),
the last dimension 4 arrange as (x1, y1, x2, y2).
"""
cfg = self.test_cfg if cfg is None else cfg
assert len(cls_score_list) == len(bbox_pred_list) == len(mlvl_priors)
mlvl_bboxes = []
mlvl_scores = []
mlvl_dir_scores = []
for cls_score, bbox_pred, dir_cls_pred, priors in zip(
cls_score_list, bbox_pred_list, dir_cls_pred_list,
mlvl_priors):
assert cls_score.size()[-2:] == bbox_pred.size()[-2:]
assert cls_score.size()[-2:] == dir_cls_pred.size()[-2:]
dir_cls_pred = dir_cls_pred.permute(1, 2, 0).reshape(-1, 2)
dir_cls_score = torch.max(dir_cls_pred, dim=-1)[1]
cls_score = cls_score.permute(1, 2,
0).reshape(-1, self.num_classes)
if self.use_sigmoid_cls:
scores = cls_score.sigmoid()
else:
scores = cls_score.softmax(-1)
bbox_pred = bbox_pred.permute(1, 2,
0).reshape(-1, self.box_code_size)
nms_pre = cfg.get('nms_pre', -1)
if nms_pre > 0 and scores.shape[0] > nms_pre:
if self.use_sigmoid_cls:
max_scores, _ = scores.max(dim=1)
else:
max_scores, _ = scores[:, :-1].max(dim=1)
_, topk_inds = max_scores.topk(nms_pre)
priors = priors[topk_inds, :]
bbox_pred = bbox_pred[topk_inds, :]
scores = scores[topk_inds, :]
dir_cls_score = dir_cls_score[topk_inds]
bboxes = self.bbox_coder.decode(priors, bbox_pred)
mlvl_bboxes.append(bboxes)
mlvl_scores.append(scores)
mlvl_dir_scores.append(dir_cls_score)
mlvl_bboxes = torch.cat(mlvl_bboxes)
mlvl_bboxes_for_nms = xywhr2xyxyr(input_meta['box_type_3d'](
mlvl_bboxes, box_dim=self.box_code_size).bev)
mlvl_scores = torch.cat(mlvl_scores)
mlvl_dir_scores = torch.cat(mlvl_dir_scores)
if self.use_sigmoid_cls:
# Add a dummy background class to the front when using sigmoid
padding = mlvl_scores.new_zeros(mlvl_scores.shape[0], 1)
mlvl_scores = torch.cat([mlvl_scores, padding], dim=1)
score_thr = cfg.get('score_thr', 0)
results = box3d_multiclass_nms(mlvl_bboxes, mlvl_bboxes_for_nms,
mlvl_scores, score_thr, cfg.max_num,
cfg, mlvl_dir_scores)
bboxes, scores, labels, dir_scores = results
if bboxes.shape[0] > 0:
dir_rot = limit_period(bboxes[..., 6] - self.dir_offset,
self.dir_limit_offset, np.pi)
bboxes[..., 6] = (
dir_rot + self.dir_offset +
np.pi * dir_scores.to(bboxes.dtype))
bboxes = input_meta['box_type_3d'](bboxes, box_dim=self.box_code_size)
results = InstanceData()
results.bboxes_3d = bboxes
results.scores_3d = scores
results.labels_3d = labels
return results
# TODO: Support augmentation test
def aug_test(self,
aug_batch_feats,
aug_batch_input_metas,
rescale=False,
with_ori_nms=False,
**kwargs):
pass
# Copyright (c) OpenMMLab. All rights reserved.
from typing import List, Tuple
import torch
from mmcv.runner import force_fp32
from torch import Tensor
from torch.nn import functional as F
from mmdet3d.core.utils import ConfigType, OptConfigType, OptMultiConfig
from mmdet3d.registry import MODELS
from .voxelnet import VoxelNet
......@@ -13,17 +17,17 @@ class DynamicVoxelNet(VoxelNet):
"""
def __init__(self,
voxel_layer,
voxel_encoder,
middle_encoder,
backbone,
neck=None,
bbox_head=None,
train_cfg=None,
test_cfg=None,
pretrained=None,
init_cfg=None):
super(DynamicVoxelNet, self).__init__(
voxel_layer: ConfigType,
voxel_encoder: ConfigType,
middle_encoder: ConfigType,
backbone: ConfigType,
neck: OptConfigType = None,
bbox_head: OptConfigType = None,
train_cfg: OptConfigType = None,
test_cfg: OptConfigType = None,
data_preprocessor: OptConfigType = None,
init_cfg: OptMultiConfig = None) -> None:
super().__init__(
voxel_layer=voxel_layer,
voxel_encoder=voxel_encoder,
middle_encoder=middle_encoder,
......@@ -32,30 +36,19 @@ class DynamicVoxelNet(VoxelNet):
bbox_head=bbox_head,
train_cfg=train_cfg,
test_cfg=test_cfg,
pretrained=pretrained,
data_preprocessor=data_preprocessor,
init_cfg=init_cfg)
def extract_feat(self, points, img_metas):
"""Extract features from points."""
voxels, coors = self.voxelize(points)
voxel_features, feature_coors = self.voxel_encoder(voxels, coors)
batch_size = coors[-1, 0].item() + 1
x = self.middle_encoder(voxel_features, feature_coors, batch_size)
x = self.backbone(x)
if self.with_neck:
x = self.neck(x)
return x
@torch.no_grad()
@force_fp32()
def voxelize(self, points):
def voxelize(self, points: List[torch.Tensor]) -> tuple:
"""Apply dynamic voxelization to points.
Args:
points (list[torch.Tensor]): Points of each sample.
points (list[Tensor]): Points of each sample.
Returns:
tuple[torch.Tensor]: Concatenated points and coordinates.
tuple[Tensor]: Concatenated points and coordinates.
"""
coors = []
# dynamic voxelization only provide a coors mapping
......@@ -69,3 +62,16 @@ class DynamicVoxelNet(VoxelNet):
coors_batch.append(coor_pad)
coors_batch = torch.cat(coors_batch, dim=0)
return points, coors_batch
def extract_feat(self, batch_inputs_dict: dict) -> Tuple[Tensor]:
"""Extract features from points."""
# TODO: Remove voxelization to datapreprocessor
points = batch_inputs_dict['points']
voxels, coors = self.voxelize(points)
voxel_features, feature_coors = self.voxel_encoder(voxels, coors)
batch_size = coors[-1, 0].item() + 1
x = self.middle_encoder(voxel_features, feature_coors, batch_size)
x = self.backbone(x)
if self.with_neck:
x = self.neck(x)
return x
# Copyright (c) OpenMMLab. All rights reserved.
from typing import List, Tuple, Union
from typing import Dict, List, Tuple, Union
import torch
from torch import Tensor
from mmdet3d.core.utils import (ConfigType, OptConfigType, OptMultiConfig,
OptSampleList, SampleList)
......@@ -134,12 +135,19 @@ class SingleStage3DDetector(Base3DDetector):
results = self.bbox_head.forward(x)
return results
def extract_feat(self,
batch_inputs_dict: torch.Tensor) -> Tuple[torch.Tensor]:
def extract_feat(
self, batch_inputs_dict: torch.Tensor
) -> Union[Tuple[torch.Tensor], Dict[str, Tensor]]:
"""Directly extract features from the backbone+neck.
Args:
points (torch.Tensor): Input points.
Returns:
tuple[Tensor] | dict: For outside 3D object detection, we
typically obtain a tuple of features from the backbone + neck,
and for inside 3D object detection, usually a dict containing
features will be obtained.
"""
points = batch_inputs_dict['points']
stack_points = torch.stack(points)
......
# Copyright (c) OpenMMLab. All rights reserved.
from typing import Dict, List, Optional
from typing import List, Tuple
import torch
from mmcv.ops import Voxelization
from mmcv.runner import force_fp32
from torch import Tensor
from torch.nn import functional as F
from mmdet3d.core import Det3DDataSample
from mmdet3d.core.utils import ConfigType, OptConfigType, OptMultiConfig
from mmdet3d.registry import MODELS
from .single_stage import SingleStage3DDetector
......@@ -16,39 +17,28 @@ class VoxelNet(SingleStage3DDetector):
r"""`VoxelNet <https://arxiv.org/abs/1711.06396>`_ for 3D detection."""
def __init__(self,
voxel_layer: dict,
voxel_encoder: dict,
middle_encoder: dict,
backbone: dict,
neck: Optional[dict] = None,
bbox_head: Optional[dict] = None,
train_cfg: Optional[dict] = None,
test_cfg: Optional[dict] = None,
init_cfg: Optional[dict] = None,
pretrained: Optional[str] = None) -> None:
super(VoxelNet, self).__init__(
voxel_layer: ConfigType,
voxel_encoder: ConfigType,
middle_encoder: ConfigType,
backbone: ConfigType,
neck: OptConfigType = None,
bbox_head: OptConfigType = None,
train_cfg: OptConfigType = None,
test_cfg: OptConfigType = None,
data_preprocessor: OptConfigType = None,
init_cfg: OptMultiConfig = None) -> None:
super().__init__(
backbone=backbone,
neck=neck,
bbox_head=bbox_head,
train_cfg=train_cfg,
test_cfg=test_cfg,
init_cfg=init_cfg,
pretrained=pretrained)
data_preprocessor=data_preprocessor,
init_cfg=init_cfg)
self.voxel_layer = Voxelization(**voxel_layer)
self.voxel_encoder = MODELS.build(voxel_encoder)
self.middle_encoder = MODELS.build(middle_encoder)
def extract_feat(self, points: List[torch.Tensor]) -> list:
"""Extract features from points."""
voxels, num_points, coors = self.voxelize(points)
voxel_features = self.voxel_encoder(voxels, num_points, coors)
batch_size = coors[-1, 0].item() + 1
x = self.middle_encoder(voxel_features, coors, batch_size)
x = self.backbone(x)
if self.with_neck:
x = self.neck(x)
return x
@torch.no_grad()
@force_fp32()
def voxelize(self, points: List[torch.Tensor]) -> tuple:
......@@ -68,75 +58,15 @@ class VoxelNet(SingleStage3DDetector):
coors_batch = torch.cat(coors_batch, dim=0)
return voxels, num_points, coors_batch
def forward_train(self, batch_inputs_dict: Dict[list, torch.Tensor],
batch_data_samples: List[Det3DDataSample],
**kwargs) -> dict:
"""
Args:
batch_inputs_dict (dict): The model input dict. It should contain
``points`` and ``img`` keys.
- points (list[torch.Tensor]): Point cloud of each sample.
- imgs (torch.Tensor, optional): Image of each sample.
batch_data_samples (list[:obj:`Det3DDataSample`]): The batch
data samples. It usually includes information such
as `gt_instance_3d` or `gt_panoptic_seg_3d` or `gt_sem_seg_3d`.
Returns:
dict[str, Tensor]: A dictionary of loss components.
"""
x = self.extract_feat(batch_inputs_dict['points'])
losses = self.bbox_head.forward_train(x, batch_data_samples, **kwargs)
return losses
def simple_test(self,
batch_inputs_dict: Dict[list, torch.Tensor],
batch_input_metas: List[dict],
rescale: bool = False) -> list:
"""Test function without test-time augmentation.
Args:
batch_inputs_dict (dict): The model input dict. It should contain
``points`` and ``img`` keys.
- points (list[torch.Tensor]): Point cloud of single
sample.
- imgs (torch.Tensor, optional): Image of single sample.
batch_input_metas (list[dict]): List of input information.
rescale (bool, optional): Whether to rescale the results.
Defaults to False.
Returns:
list[:obj:`Det3DDataSample`]: Detection results of the \
inputs. Each Det3DDataSample usually contain \
'pred_instances_3d'. And the ``pred_instances_3d`` usually \
contains following keys.
- scores_3d (Tensor): Classification scores, has a shape
(num_instances, )
- labels_3d (Tensor): Labels of bboxes, has a shape
(num_instances, ).
- bboxes_3d (:obj:`BaseInstance3DBoxes`): Prediction of bboxes,
contains a tensor with shape (num_instances, 7).
"""
x = self.extract_feat(batch_inputs_dict['points'])
bboxes_list = self.bbox_head.simple_test(
x, batch_input_metas, rescale=rescale)
# connvert to Det3DDataSample
results_list = self.postprocess_result(bboxes_list)
return results_list
def aug_test(self,
aug_batch_inputs_dict: Dict[list, torch.Tensor],
aug_batch_input_metas: List[dict],
rescale: bool = False) -> list:
"""Test function with augmentaiton."""
# TODO Refactor this after mmdet update
feats = self.extract_feats(aug_batch_inputs_dict)
aug_bboxes = self.bbox_head.aug_test(
feats, aug_batch_input_metas, rescale=rescale)
return aug_bboxes
def extract_feat(self, batch_inputs_dict: dict) -> Tuple[Tensor]:
"""Extract features from points."""
# TODO: Remove voxelization to datapreprocessor
points = batch_inputs_dict['points']
voxels, num_points, coors = self.voxelize(points)
voxel_features = self.voxel_encoder(voxels, num_points, coors)
batch_size = coors[-1, 0].item() + 1
x = self.middle_encoder(voxel_features, coors, batch_size)
x = self.backbone(x)
if self.with_neck:
x = self.neck(x)
return x
# Copyright (c) OpenMMLab. All rights reserved.
from unittest import TestCase
import torch
from mmengine import Config
from mmengine.data import InstanceData
from mmdet3d import * # noqa
from mmdet3d.core.bbox import Box3DMode, LiDARInstance3DBoxes
from mmdet3d.models.dense_heads import Anchor3DHead
class TestAnchor3DHead(TestCase):
def test_anchor3d_head_loss(self):
"""Test anchor head loss when truth is empty and non-empty."""
cfg = Config(
dict(
assigner=[
dict( # for Pedestrian
type='Max3DIoUAssigner',
iou_calculator=dict(type='BboxOverlapsNearest3D'),
pos_iou_thr=0.35,
neg_iou_thr=0.2,
min_pos_iou=0.2,
ignore_iof_thr=-1),
dict( # for Cyclist
type='Max3DIoUAssigner',
iou_calculator=dict(type='BboxOverlapsNearest3D'),
pos_iou_thr=0.35,
neg_iou_thr=0.2,
min_pos_iou=0.2,
ignore_iof_thr=-1),
dict( # for Car
type='Max3DIoUAssigner',
iou_calculator=dict(type='BboxOverlapsNearest3D'),
pos_iou_thr=0.6,
neg_iou_thr=0.45,
min_pos_iou=0.45,
ignore_iof_thr=-1),
],
allowed_border=0,
pos_weight=-1,
debug=False))
anchor3d_head = Anchor3DHead(
num_classes=3,
in_channels=512,
feat_channels=512,
use_direction_classifier=True,
anchor_generator=dict(
type='Anchor3DRangeGenerator',
ranges=[
[0, -40.0, -0.6, 70.4, 40.0, -0.6],
[0, -40.0, -0.6, 70.4, 40.0, -0.6],
[0, -40.0, -1.78, 70.4, 40.0, -1.78],
],
sizes=[[0.8, 0.6, 1.73], [1.76, 0.6, 1.73], [3.9, 1.6, 1.56]],
rotations=[0, 1.57],
reshape_out=False),
diff_rad_by_sin=True,
bbox_coder=dict(type='DeltaXYZWLHRBBoxCoder'),
loss_cls=dict(
type='mmdet.FocalLoss',
use_sigmoid=True,
gamma=2.0,
alpha=0.25,
loss_weight=1.0),
loss_bbox=dict(
type='mmdet.SmoothL1Loss', beta=1.0 / 9.0, loss_weight=2.0),
loss_dir=dict(
type='mmdet.CrossEntropyLoss',
use_sigmoid=False,
loss_weight=0.2),
train_cfg=cfg)
# Anchor head expects a multiple levels of features per image
feats = (torch.rand([1, 512, 200, 176], dtype=torch.float32), )
(cls_scores, bbox_preds, dir_cls_preds) = anchor3d_head.forward(feats)
self.assertEqual(cls_scores[0].shape, torch.Size([1, 18, 200, 176]))
self.assertEqual(bbox_preds[0].shape, torch.Size([1, 42, 200, 176]))
self.assertEqual(dir_cls_preds[0].shape, torch.Size([1, 12, 200, 176]))
# # Test that empty ground truth encourages the network to
# # predict background
gt_instances = InstanceData()
gt_bboxes_3d = LiDARInstance3DBoxes(torch.empty((0, 7)))
gt_labels_3d = torch.tensor([])
input_metas = dict(sample_idx=1234)
# fake input_metas
gt_instances.bboxes_3d = gt_bboxes_3d
gt_instances.labels_3d = gt_labels_3d
empty_gt_losses = anchor3d_head.loss_by_feat(cls_scores, bbox_preds,
dir_cls_preds,
[gt_instances],
[input_metas])
# When there is no truth, the cls loss should be nonzero but
# there should be no box and dir loss.
self.assertGreater(empty_gt_losses['loss_cls'][0], 0,
'cls loss should be non-zero')
self.assertEqual(
empty_gt_losses['loss_bbox'][0], 0,
'there should be no box loss when there are no true boxes')
self.assertEqual(
empty_gt_losses['loss_dir'][0], 0,
'there should be no dir loss when there are no true dirs')
# When truth is non-empty then both cls and box loss
# should be nonzero for random inputs
gt_instances = InstanceData()
gt_bboxes_3d = LiDARInstance3DBoxes(
torch.tensor(
[[6.4118, -3.4305, -1.7291, 1.7033, 3.4693, 1.6197, -0.9091]],
dtype=torch.float32))
gt_labels_3d = torch.tensor([1], dtype=torch.int64)
gt_instances.bboxes_3d = gt_bboxes_3d
gt_instances.labels_3d = gt_labels_3d
gt_losses = anchor3d_head.loss_by_feat(cls_scores, bbox_preds,
dir_cls_preds, [gt_instances],
[input_metas])
self.assertGreater(gt_losses['loss_cls'][0], 0,
'cls loss should be non-zero')
self.assertGreater(gt_losses['loss_bbox'][0], 0,
'box loss should be non-zero')
self.assertGreater(gt_losses['loss_dir'][0], 0,
'dir loss should be none-zero')
def test_anchor3d_head_predict(self):
cfg = Config(
dict(
use_rotate_nms=True,
nms_across_levels=False,
nms_thr=0.01,
score_thr=0.1,
min_bbox_size=0,
nms_pre=100,
max_num=50))
anchor3d_head = Anchor3DHead(
num_classes=3,
in_channels=512,
feat_channels=512,
use_direction_classifier=True,
anchor_generator=dict(
type='Anchor3DRangeGenerator',
ranges=[
[0, -40.0, -0.6, 70.4, 40.0, -0.6],
[0, -40.0, -0.6, 70.4, 40.0, -0.6],
[0, -40.0, -1.78, 70.4, 40.0, -1.78],
],
sizes=[[0.8, 0.6, 1.73], [1.76, 0.6, 1.73], [3.9, 1.6, 1.56]],
rotations=[0, 1.57],
reshape_out=False),
diff_rad_by_sin=True,
bbox_coder=dict(type='DeltaXYZWLHRBBoxCoder'),
loss_cls=dict(
type='mmdet.FocalLoss',
use_sigmoid=True,
gamma=2.0,
alpha=0.25,
loss_weight=1.0),
loss_bbox=dict(
type='mmdet.SmoothL1Loss', beta=1.0 / 9.0, loss_weight=2.0),
loss_dir=dict(
type='mmdet.CrossEntropyLoss',
use_sigmoid=False,
loss_weight=0.2),
test_cfg=cfg)
feats = (torch.rand([2, 512, 200, 176], dtype=torch.float32), )
(cls_scores, bbox_preds, dir_cls_preds) = anchor3d_head.forward(feats)
# fake input_metas
input_metas = [{
'sample_idx': 1234,
'box_type_3d': LiDARInstance3DBoxes,
'box_mode_3d': Box3DMode.LIDAR
}, {
'sample_idx': 2345,
'box_type_3d': LiDARInstance3DBoxes,
'box_mode_3d': Box3DMode.LIDAR
}]
# test get_boxes
cls_scores[0] -= 1.5 # too many positive samples may cause cuda oom
results = anchor3d_head.predict_by_feat(cls_scores, bbox_preds,
dir_cls_preds, input_metas)
pred_instances = results[0]
scores_3d = pred_instances.scores_3d
assert (scores_3d > 0.3).all()
Markdown is supported
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment