Commit db44cc50 authored by ZCMax's avatar ZCMax Committed by ChaimZhu
Browse files

[Refactor] Refactor the model of VoxelNet and DynamicVoxelNet

parent 7fda1f66
...@@ -7,6 +7,7 @@ from .builder import (BACKBONES, DETECTORS, FUSION_LAYERS, HEADS, LOSSES, ...@@ -7,6 +7,7 @@ from .builder import (BACKBONES, DETECTORS, FUSION_LAYERS, HEADS, LOSSES,
build_loss, build_middle_encoder, build_model, build_loss, build_middle_encoder, build_model,
build_neck, build_roi_extractor, build_shared_head, build_neck, build_roi_extractor, build_shared_head,
build_voxel_encoder) build_voxel_encoder)
from .data_preprocessors import * # noqa: F401,F403
from .decode_heads import * # noqa: F401,F403 from .decode_heads import * # noqa: F401,F403
from .dense_heads import * # noqa: F401,F403 from .dense_heads import * # noqa: F401,F403
from .detectors import * # noqa: F401,F403 from .detectors import * # noqa: F401,F403
......
# Copyright (c) OpenMMLab. All rights reserved. # Copyright (c) OpenMMLab. All rights reserved.
from .anchor3d_head import Anchor3DHead from .anchor3d_head import Anchor3DHead
from .anchor_free_mono3d_head import AnchorFreeMono3DHead from .anchor_free_mono3d_head import AnchorFreeMono3DHead
from .base_3d_dense_head import Base3DDenseHead
from .base_conv_bbox_head import BaseConvBboxHead from .base_conv_bbox_head import BaseConvBboxHead
from .base_mono3d_dense_head import BaseMono3DDenseHead from .base_mono3d_dense_head import BaseMono3DDenseHead
from .centerpoint_head import CenterHead from .centerpoint_head import CenterHead
...@@ -21,5 +22,5 @@ __all__ = [ ...@@ -21,5 +22,5 @@ __all__ = [
'SSD3DHead', 'BaseConvBboxHead', 'CenterHead', 'ShapeAwareHead', 'SSD3DHead', 'BaseConvBboxHead', 'CenterHead', 'ShapeAwareHead',
'BaseMono3DDenseHead', 'AnchorFreeMono3DHead', 'FCOSMono3DHead', 'BaseMono3DDenseHead', 'AnchorFreeMono3DHead', 'FCOSMono3DHead',
'GroupFree3DHead', 'PointRPNHead', 'SMOKEMono3DHead', 'PGDHead', 'GroupFree3DHead', 'PointRPNHead', 'SMOKEMono3DHead', 'PGDHead',
'MonoFlexHead' 'MonoFlexHead', 'Base3DDenseHead'
] ]
This diff is collapsed.
# Copyright (c) OpenMMLab. All rights reserved.
from abc import ABCMeta, abstractmethod
from typing import List, Optional, Tuple
import numpy as np
import torch
from mmcv.cnn.utils.weight_init import constant_init
from mmengine.config import ConfigDict
from mmengine.data import InstanceData
from mmengine.model import BaseModule
from torch import Tensor
from mmdet3d.core import box3d_multiclass_nms, limit_period, xywhr2xyxyr
from mmdet3d.core.utils import InstanceList, OptMultiConfig, SampleList
from mmdet.core.utils import select_single_mlvl
class Base3DDenseHead(BaseModule, metaclass=ABCMeta):
"""Base class for 3D DenseHeads.
1. The ``init_weights`` method is used to initialize densehead's
model parameters. After detector initialization, ``init_weights``
is triggered when ``detector.init_weights()`` is called externally.
2. The ``loss`` method is used to calculate the loss of densehead,
which includes two steps: (1) the densehead model performs forward
propagation to obtain the feature maps (2) The ``loss_by_feat`` method
is called based on the feature maps to calculate the loss.
.. code:: text
loss(): forward() -> loss_by_feat()
3. The ``predict`` method is used to predict detection results,
which includes two steps: (1) the densehead model performs forward
propagation to obtain the feature maps (2) The ``predict_by_feat`` method
is called based on the feature maps to predict detection results including
post-processing.
.. code:: text
predict(): forward() -> predict_by_feat()
4. The ``loss_and_predict`` method is used to return loss and detection
results at the same time. It will call densehead's ``forward``,
``loss_by_feat`` and ``predict_by_feat`` methods in order. If one-stage is
used as RPN, the densehead needs to return both losses and predictions.
This predictions is used as the proposal of roihead.
.. code:: text
loss_and_predict(): forward() -> loss_by_feat() -> predict_by_feat()
"""
def __init__(self, init_cfg: OptMultiConfig = None) -> None:
super().__init__(init_cfg=init_cfg)
def init_weights(self) -> None:
"""Initialize the weights."""
super().init_weights()
# avoid init_cfg overwrite the initialization of `conv_offset`
for m in self.modules():
# DeformConv2dPack, ModulatedDeformConv2dPack
if hasattr(m, 'conv_offset'):
constant_init(m.conv_offset, 0)
def loss(self, x: Tuple[Tensor], batch_data_samples: SampleList,
**kwargs) -> dict:
"""Perform forward propagation and loss calculation of the detection
head on the features of the upstream network.
Args:
x (tuple[Tensor]): Features from the upstream network, each is
a 4D-tensor.
batch_data_samples (List[:obj:`Det3DDataSample`]): The Data
Samples. It usually includes information such as
`gt_instance`, `gt_panoptic_seg` and `gt_sem_seg`.
Returns:
dict: A dictionary of loss components.
"""
outs = self(x)
batch_gt_instances_3d = []
batch_gt_instances_ignore = []
batch_input_metas = []
for data_sample in batch_data_samples:
batch_input_metas.append(data_sample.metainfo)
batch_gt_instances_3d.append(data_sample.gt_instances_3d)
batch_gt_instances_ignore.append(
data_sample.get('ignored_instances', None))
loss_inputs = outs + (batch_gt_instances_3d, batch_input_metas,
batch_gt_instances_ignore)
losses = self.loss_by_feat(*loss_inputs)
return losses
@abstractmethod
def loss_by_feat(self, **kwargs) -> dict:
"""Calculate the loss based on the features extracted by the detection
head."""
pass
def loss_and_predict(self,
x: Tuple[Tensor],
batch_data_samples: SampleList,
proposal_cfg: Optional[ConfigDict] = None,
**kwargs) -> Tuple[dict, InstanceList]:
"""Perform forward propagation of the head, then calculate loss and
predictions from the features and data samples.
Args:
x (tuple[Tensor]): Features from FPN.
batch_data_samples (list[:obj:`DetDataSample`]): Each item contains
the meta information of each image and corresponding
annotations.
proposal_cfg (ConfigDict, optional): Test / postprocessing
configuration, if None, test_cfg would be used.
Defaults to None.
Returns:
tuple: the return value is a tuple contains:
- losses: (dict[str, Tensor]): A dictionary of loss components.
- predictions (list[:obj:`InstanceData`]): Detection
results of each image after the post process.
"""
batch_gt_instances = []
batch_gt_instances_ignore = []
batch_input_metas = []
for data_sample in batch_data_samples:
batch_input_metas.append(data_sample.metainfo)
batch_gt_instances.append(data_sample.gt_instances_3d)
batch_gt_instances_ignore.append(
data_sample.get('ignored_instances', None))
outs = self(x)
loss_inputs = outs + (batch_gt_instances, batch_input_metas,
batch_gt_instances_ignore)
losses = self.loss_by_feat(*loss_inputs)
predictions = self.predict_by_feat(
*outs, batch_input_metas=batch_input_metas, cfg=proposal_cfg)
return losses, predictions
def predict(self,
x: Tuple[Tensor],
batch_data_samples: SampleList,
rescale: bool = False) -> InstanceList:
"""Perform forward propagation of the 3D detection head and predict
detection results on the features of the upstream network.
Args:
x (tuple[Tensor]): Multi-level features from the
upstream network, each is a 4D-tensor.
batch_data_samples (List[:obj:`Det3DDataSample`]): The Data
Samples. It usually includes information such as
`gt_instance_3d`, `gt_pts_panoptic_seg` and
`gt_pts_sem_seg`.
rescale (bool, optional): Whether to rescale the results.
Defaults to False.
Returns:
list[:obj:`InstanceData`]: Detection results of each sample
after the post process.
Each item usually contains following keys.
- scores_3d (Tensor): Classification scores, has a shape
(num_instances, )
- labels_3d (Tensor): Labels of bboxes, has a shape
(num_instances, ).
- bboxes_3d (BaseInstance3DBoxes): Prediction of bboxes,
contains a tensor with shape (num_instances, C), where
C >= 7.
"""
batch_input_metas = [
data_samples.metainfo for data_samples in batch_data_samples
]
outs = self(x)
predictions = self.predict_by_feat(
*outs, batch_input_metas=batch_input_metas, rescale=rescale)
return predictions
def predict_by_feat(self,
cls_scores: List[Tensor],
bbox_preds: List[Tensor],
dir_cls_preds: List[Tensor],
batch_input_metas: Optional[List[dict]] = None,
cfg: Optional[ConfigDict] = None,
rescale: bool = False,
**kwargs) -> InstanceList:
"""Transform a batch of output features extracted from the head into
bbox results.
Args:
cls_scores (list[Tensor]): Classification scores for all
scale levels, each is a 4D-tensor, has shape
(batch_size, num_priors * num_classes, H, W).
bbox_preds (list[Tensor]): Box energies / deltas for all
scale levels, each is a 4D-tensor, has shape
(batch_size, num_priors * 4, H, W).
score_factors (list[Tensor], optional): Score factor for
all scale level, each is a 4D-tensor, has shape
(batch_size, num_priors * 1, H, W). Defaults to None.
batch_input_metas (list[dict], Optional): Batch image meta info.
Defaults to None.
cfg (ConfigDict, optional): Test / postprocessing
configuration, if None, test_cfg would be used.
Defaults to None.
rescale (bool): If True, return boxes in original image space.
Defaults to False.
Returns:
list[:obj:`InstanceData`]: Detection results of each sample
after the post process.
Each item usually contains following keys.
- scores_3d (Tensor): Classification scores, has a shape
(num_instances, )
- labels_3d (Tensor): Labels of bboxes, has a shape
(num_instances, ).
- bboxes_3d (BaseInstance3DBoxes): Prediction of bboxes,
contains a tensor with shape (num_instances, C), where
C >= 7.
"""
assert len(cls_scores) == len(bbox_preds)
assert len(cls_scores) == len(dir_cls_preds)
num_levels = len(cls_scores)
featmap_sizes = [cls_scores[i].shape[-2:] for i in range(num_levels)]
mlvl_priors = self.prior_generator.grid_anchors(
featmap_sizes, device=cls_scores[0].device)
mlvl_priors = [
prior.reshape(-1, self.box_code_size) for prior in mlvl_priors
]
result_list = []
for input_id in range(len(batch_input_metas)):
input_meta = batch_input_metas[input_id]
cls_score_list = select_single_mlvl(cls_scores, input_id)
bbox_pred_list = select_single_mlvl(bbox_preds, input_id)
dir_cls_pred_list = select_single_mlvl(dir_cls_preds, input_id)
results = self._predict_by_feat_single(
cls_score_list=cls_score_list,
bbox_pred_list=bbox_pred_list,
dir_cls_pred_list=dir_cls_pred_list,
mlvl_priors=mlvl_priors,
input_meta=input_meta,
cfg=cfg,
rescale=rescale,
**kwargs)
result_list.append(results)
return result_list
def _predict_by_feat_single(self,
cls_score_list: List[Tensor],
bbox_pred_list: List[Tensor],
dir_cls_pred_list: List[Tensor],
mlvl_priors: List[Tensor],
input_meta: dict,
cfg: ConfigDict,
rescale: bool = False,
**kwargs) -> InstanceData:
"""Transform a single image's features extracted from the head into
bbox results.
Args:
cls_score_list (list[Tensor]): Box scores from all scale
levels of a single point cloud sample, each item has shape
(num_priors * num_classes, H, W).
bbox_pred_list (list[Tensor]): Box energies / deltas from
all scale levels of a single point cloud sample, each item
has shape (num_priors * C, H, W).
dir_cls_pred_list (list[Tensor]): Predictions of direction class
from all scale levels of a single point cloud sample, each
item has shape (num_priors * 2, H, W).
mlvl_priors (list[Tensor]): Each element in the list is
the priors of a single level in feature pyramid. In all
anchor-based methods, it has shape (num_priors, 4). In
all anchor-free methods, it has shape (num_priors, 2)
when `with_stride=True`, otherwise it still has shape
(num_priors, 4).
input_meta (dict): Contain point clouds and image meta info.
cfg (:obj:`ConfigDict`): Test / postprocessing configuration,
if None, test_cfg would be used.
rescale (bool): If True, return boxes in original image space.
Defaults to False.
Returns:
:obj:`InstanceData`: Detection results of each image
after the post process.
Each item usually contains following keys.
- scores (Tensor): Classification scores, has a shape
(num_instance, )
- labels (Tensor): Labels of bboxes, has a shape
(num_instances, ).
- bboxes (Tensor): Has a shape (num_instances, 4),
the last dimension 4 arrange as (x1, y1, x2, y2).
"""
cfg = self.test_cfg if cfg is None else cfg
assert len(cls_score_list) == len(bbox_pred_list) == len(mlvl_priors)
mlvl_bboxes = []
mlvl_scores = []
mlvl_dir_scores = []
for cls_score, bbox_pred, dir_cls_pred, priors in zip(
cls_score_list, bbox_pred_list, dir_cls_pred_list,
mlvl_priors):
assert cls_score.size()[-2:] == bbox_pred.size()[-2:]
assert cls_score.size()[-2:] == dir_cls_pred.size()[-2:]
dir_cls_pred = dir_cls_pred.permute(1, 2, 0).reshape(-1, 2)
dir_cls_score = torch.max(dir_cls_pred, dim=-1)[1]
cls_score = cls_score.permute(1, 2,
0).reshape(-1, self.num_classes)
if self.use_sigmoid_cls:
scores = cls_score.sigmoid()
else:
scores = cls_score.softmax(-1)
bbox_pred = bbox_pred.permute(1, 2,
0).reshape(-1, self.box_code_size)
nms_pre = cfg.get('nms_pre', -1)
if nms_pre > 0 and scores.shape[0] > nms_pre:
if self.use_sigmoid_cls:
max_scores, _ = scores.max(dim=1)
else:
max_scores, _ = scores[:, :-1].max(dim=1)
_, topk_inds = max_scores.topk(nms_pre)
priors = priors[topk_inds, :]
bbox_pred = bbox_pred[topk_inds, :]
scores = scores[topk_inds, :]
dir_cls_score = dir_cls_score[topk_inds]
bboxes = self.bbox_coder.decode(priors, bbox_pred)
mlvl_bboxes.append(bboxes)
mlvl_scores.append(scores)
mlvl_dir_scores.append(dir_cls_score)
mlvl_bboxes = torch.cat(mlvl_bboxes)
mlvl_bboxes_for_nms = xywhr2xyxyr(input_meta['box_type_3d'](
mlvl_bboxes, box_dim=self.box_code_size).bev)
mlvl_scores = torch.cat(mlvl_scores)
mlvl_dir_scores = torch.cat(mlvl_dir_scores)
if self.use_sigmoid_cls:
# Add a dummy background class to the front when using sigmoid
padding = mlvl_scores.new_zeros(mlvl_scores.shape[0], 1)
mlvl_scores = torch.cat([mlvl_scores, padding], dim=1)
score_thr = cfg.get('score_thr', 0)
results = box3d_multiclass_nms(mlvl_bboxes, mlvl_bboxes_for_nms,
mlvl_scores, score_thr, cfg.max_num,
cfg, mlvl_dir_scores)
bboxes, scores, labels, dir_scores = results
if bboxes.shape[0] > 0:
dir_rot = limit_period(bboxes[..., 6] - self.dir_offset,
self.dir_limit_offset, np.pi)
bboxes[..., 6] = (
dir_rot + self.dir_offset +
np.pi * dir_scores.to(bboxes.dtype))
bboxes = input_meta['box_type_3d'](bboxes, box_dim=self.box_code_size)
results = InstanceData()
results.bboxes_3d = bboxes
results.scores_3d = scores
results.labels_3d = labels
return results
# TODO: Support augmentation test
def aug_test(self,
aug_batch_feats,
aug_batch_input_metas,
rescale=False,
with_ori_nms=False,
**kwargs):
pass
# Copyright (c) OpenMMLab. All rights reserved. # Copyright (c) OpenMMLab. All rights reserved.
from typing import List, Tuple
import torch import torch
from mmcv.runner import force_fp32 from mmcv.runner import force_fp32
from torch import Tensor
from torch.nn import functional as F from torch.nn import functional as F
from mmdet3d.core.utils import ConfigType, OptConfigType, OptMultiConfig
from mmdet3d.registry import MODELS from mmdet3d.registry import MODELS
from .voxelnet import VoxelNet from .voxelnet import VoxelNet
...@@ -13,17 +17,17 @@ class DynamicVoxelNet(VoxelNet): ...@@ -13,17 +17,17 @@ class DynamicVoxelNet(VoxelNet):
""" """
def __init__(self, def __init__(self,
voxel_layer, voxel_layer: ConfigType,
voxel_encoder, voxel_encoder: ConfigType,
middle_encoder, middle_encoder: ConfigType,
backbone, backbone: ConfigType,
neck=None, neck: OptConfigType = None,
bbox_head=None, bbox_head: OptConfigType = None,
train_cfg=None, train_cfg: OptConfigType = None,
test_cfg=None, test_cfg: OptConfigType = None,
pretrained=None, data_preprocessor: OptConfigType = None,
init_cfg=None): init_cfg: OptMultiConfig = None) -> None:
super(DynamicVoxelNet, self).__init__( super().__init__(
voxel_layer=voxel_layer, voxel_layer=voxel_layer,
voxel_encoder=voxel_encoder, voxel_encoder=voxel_encoder,
middle_encoder=middle_encoder, middle_encoder=middle_encoder,
...@@ -32,30 +36,19 @@ class DynamicVoxelNet(VoxelNet): ...@@ -32,30 +36,19 @@ class DynamicVoxelNet(VoxelNet):
bbox_head=bbox_head, bbox_head=bbox_head,
train_cfg=train_cfg, train_cfg=train_cfg,
test_cfg=test_cfg, test_cfg=test_cfg,
pretrained=pretrained, data_preprocessor=data_preprocessor,
init_cfg=init_cfg) init_cfg=init_cfg)
def extract_feat(self, points, img_metas):
"""Extract features from points."""
voxels, coors = self.voxelize(points)
voxel_features, feature_coors = self.voxel_encoder(voxels, coors)
batch_size = coors[-1, 0].item() + 1
x = self.middle_encoder(voxel_features, feature_coors, batch_size)
x = self.backbone(x)
if self.with_neck:
x = self.neck(x)
return x
@torch.no_grad() @torch.no_grad()
@force_fp32() @force_fp32()
def voxelize(self, points): def voxelize(self, points: List[torch.Tensor]) -> tuple:
"""Apply dynamic voxelization to points. """Apply dynamic voxelization to points.
Args: Args:
points (list[torch.Tensor]): Points of each sample. points (list[Tensor]): Points of each sample.
Returns: Returns:
tuple[torch.Tensor]: Concatenated points and coordinates. tuple[Tensor]: Concatenated points and coordinates.
""" """
coors = [] coors = []
# dynamic voxelization only provide a coors mapping # dynamic voxelization only provide a coors mapping
...@@ -69,3 +62,16 @@ class DynamicVoxelNet(VoxelNet): ...@@ -69,3 +62,16 @@ class DynamicVoxelNet(VoxelNet):
coors_batch.append(coor_pad) coors_batch.append(coor_pad)
coors_batch = torch.cat(coors_batch, dim=0) coors_batch = torch.cat(coors_batch, dim=0)
return points, coors_batch return points, coors_batch
def extract_feat(self, batch_inputs_dict: dict) -> Tuple[Tensor]:
"""Extract features from points."""
# TODO: Remove voxelization to datapreprocessor
points = batch_inputs_dict['points']
voxels, coors = self.voxelize(points)
voxel_features, feature_coors = self.voxel_encoder(voxels, coors)
batch_size = coors[-1, 0].item() + 1
x = self.middle_encoder(voxel_features, feature_coors, batch_size)
x = self.backbone(x)
if self.with_neck:
x = self.neck(x)
return x
# Copyright (c) OpenMMLab. All rights reserved. # Copyright (c) OpenMMLab. All rights reserved.
from typing import List, Tuple, Union from typing import Dict, List, Tuple, Union
import torch import torch
from torch import Tensor
from mmdet3d.core.utils import (ConfigType, OptConfigType, OptMultiConfig, from mmdet3d.core.utils import (ConfigType, OptConfigType, OptMultiConfig,
OptSampleList, SampleList) OptSampleList, SampleList)
...@@ -134,12 +135,19 @@ class SingleStage3DDetector(Base3DDetector): ...@@ -134,12 +135,19 @@ class SingleStage3DDetector(Base3DDetector):
results = self.bbox_head.forward(x) results = self.bbox_head.forward(x)
return results return results
def extract_feat(self, def extract_feat(
batch_inputs_dict: torch.Tensor) -> Tuple[torch.Tensor]: self, batch_inputs_dict: torch.Tensor
) -> Union[Tuple[torch.Tensor], Dict[str, Tensor]]:
"""Directly extract features from the backbone+neck. """Directly extract features from the backbone+neck.
Args: Args:
points (torch.Tensor): Input points. points (torch.Tensor): Input points.
Returns:
tuple[Tensor] | dict: For outside 3D object detection, we
typically obtain a tuple of features from the backbone + neck,
and for inside 3D object detection, usually a dict containing
features will be obtained.
""" """
points = batch_inputs_dict['points'] points = batch_inputs_dict['points']
stack_points = torch.stack(points) stack_points = torch.stack(points)
......
# Copyright (c) OpenMMLab. All rights reserved. # Copyright (c) OpenMMLab. All rights reserved.
from typing import Dict, List, Optional from typing import List, Tuple
import torch import torch
from mmcv.ops import Voxelization from mmcv.ops import Voxelization
from mmcv.runner import force_fp32 from mmcv.runner import force_fp32
from torch import Tensor
from torch.nn import functional as F from torch.nn import functional as F
from mmdet3d.core import Det3DDataSample from mmdet3d.core.utils import ConfigType, OptConfigType, OptMultiConfig
from mmdet3d.registry import MODELS from mmdet3d.registry import MODELS
from .single_stage import SingleStage3DDetector from .single_stage import SingleStage3DDetector
...@@ -16,39 +17,28 @@ class VoxelNet(SingleStage3DDetector): ...@@ -16,39 +17,28 @@ class VoxelNet(SingleStage3DDetector):
r"""`VoxelNet <https://arxiv.org/abs/1711.06396>`_ for 3D detection.""" r"""`VoxelNet <https://arxiv.org/abs/1711.06396>`_ for 3D detection."""
def __init__(self, def __init__(self,
voxel_layer: dict, voxel_layer: ConfigType,
voxel_encoder: dict, voxel_encoder: ConfigType,
middle_encoder: dict, middle_encoder: ConfigType,
backbone: dict, backbone: ConfigType,
neck: Optional[dict] = None, neck: OptConfigType = None,
bbox_head: Optional[dict] = None, bbox_head: OptConfigType = None,
train_cfg: Optional[dict] = None, train_cfg: OptConfigType = None,
test_cfg: Optional[dict] = None, test_cfg: OptConfigType = None,
init_cfg: Optional[dict] = None, data_preprocessor: OptConfigType = None,
pretrained: Optional[str] = None) -> None: init_cfg: OptMultiConfig = None) -> None:
super(VoxelNet, self).__init__( super().__init__(
backbone=backbone, backbone=backbone,
neck=neck, neck=neck,
bbox_head=bbox_head, bbox_head=bbox_head,
train_cfg=train_cfg, train_cfg=train_cfg,
test_cfg=test_cfg, test_cfg=test_cfg,
init_cfg=init_cfg, data_preprocessor=data_preprocessor,
pretrained=pretrained) init_cfg=init_cfg)
self.voxel_layer = Voxelization(**voxel_layer) self.voxel_layer = Voxelization(**voxel_layer)
self.voxel_encoder = MODELS.build(voxel_encoder) self.voxel_encoder = MODELS.build(voxel_encoder)
self.middle_encoder = MODELS.build(middle_encoder) self.middle_encoder = MODELS.build(middle_encoder)
def extract_feat(self, points: List[torch.Tensor]) -> list:
"""Extract features from points."""
voxels, num_points, coors = self.voxelize(points)
voxel_features = self.voxel_encoder(voxels, num_points, coors)
batch_size = coors[-1, 0].item() + 1
x = self.middle_encoder(voxel_features, coors, batch_size)
x = self.backbone(x)
if self.with_neck:
x = self.neck(x)
return x
@torch.no_grad() @torch.no_grad()
@force_fp32() @force_fp32()
def voxelize(self, points: List[torch.Tensor]) -> tuple: def voxelize(self, points: List[torch.Tensor]) -> tuple:
...@@ -68,75 +58,15 @@ class VoxelNet(SingleStage3DDetector): ...@@ -68,75 +58,15 @@ class VoxelNet(SingleStage3DDetector):
coors_batch = torch.cat(coors_batch, dim=0) coors_batch = torch.cat(coors_batch, dim=0)
return voxels, num_points, coors_batch return voxels, num_points, coors_batch
def forward_train(self, batch_inputs_dict: Dict[list, torch.Tensor], def extract_feat(self, batch_inputs_dict: dict) -> Tuple[Tensor]:
batch_data_samples: List[Det3DDataSample], """Extract features from points."""
**kwargs) -> dict: # TODO: Remove voxelization to datapreprocessor
""" points = batch_inputs_dict['points']
Args: voxels, num_points, coors = self.voxelize(points)
batch_inputs_dict (dict): The model input dict. It should contain voxel_features = self.voxel_encoder(voxels, num_points, coors)
``points`` and ``img`` keys. batch_size = coors[-1, 0].item() + 1
x = self.middle_encoder(voxel_features, coors, batch_size)
- points (list[torch.Tensor]): Point cloud of each sample. x = self.backbone(x)
- imgs (torch.Tensor, optional): Image of each sample. if self.with_neck:
x = self.neck(x)
batch_data_samples (list[:obj:`Det3DDataSample`]): The batch return x
data samples. It usually includes information such
as `gt_instance_3d` or `gt_panoptic_seg_3d` or `gt_sem_seg_3d`.
Returns:
dict[str, Tensor]: A dictionary of loss components.
"""
x = self.extract_feat(batch_inputs_dict['points'])
losses = self.bbox_head.forward_train(x, batch_data_samples, **kwargs)
return losses
def simple_test(self,
batch_inputs_dict: Dict[list, torch.Tensor],
batch_input_metas: List[dict],
rescale: bool = False) -> list:
"""Test function without test-time augmentation.
Args:
batch_inputs_dict (dict): The model input dict. It should contain
``points`` and ``img`` keys.
- points (list[torch.Tensor]): Point cloud of single
sample.
- imgs (torch.Tensor, optional): Image of single sample.
batch_input_metas (list[dict]): List of input information.
rescale (bool, optional): Whether to rescale the results.
Defaults to False.
Returns:
list[:obj:`Det3DDataSample`]: Detection results of the \
inputs. Each Det3DDataSample usually contain \
'pred_instances_3d'. And the ``pred_instances_3d`` usually \
contains following keys.
- scores_3d (Tensor): Classification scores, has a shape
(num_instances, )
- labels_3d (Tensor): Labels of bboxes, has a shape
(num_instances, ).
- bboxes_3d (:obj:`BaseInstance3DBoxes`): Prediction of bboxes,
contains a tensor with shape (num_instances, 7).
"""
x = self.extract_feat(batch_inputs_dict['points'])
bboxes_list = self.bbox_head.simple_test(
x, batch_input_metas, rescale=rescale)
# connvert to Det3DDataSample
results_list = self.postprocess_result(bboxes_list)
return results_list
def aug_test(self,
aug_batch_inputs_dict: Dict[list, torch.Tensor],
aug_batch_input_metas: List[dict],
rescale: bool = False) -> list:
"""Test function with augmentaiton."""
# TODO Refactor this after mmdet update
feats = self.extract_feats(aug_batch_inputs_dict)
aug_bboxes = self.bbox_head.aug_test(
feats, aug_batch_input_metas, rescale=rescale)
return aug_bboxes
# Copyright (c) OpenMMLab. All rights reserved.
from unittest import TestCase
import torch
from mmengine import Config
from mmengine.data import InstanceData
from mmdet3d import * # noqa
from mmdet3d.core.bbox import Box3DMode, LiDARInstance3DBoxes
from mmdet3d.models.dense_heads import Anchor3DHead
class TestAnchor3DHead(TestCase):
def test_anchor3d_head_loss(self):
"""Test anchor head loss when truth is empty and non-empty."""
cfg = Config(
dict(
assigner=[
dict( # for Pedestrian
type='Max3DIoUAssigner',
iou_calculator=dict(type='BboxOverlapsNearest3D'),
pos_iou_thr=0.35,
neg_iou_thr=0.2,
min_pos_iou=0.2,
ignore_iof_thr=-1),
dict( # for Cyclist
type='Max3DIoUAssigner',
iou_calculator=dict(type='BboxOverlapsNearest3D'),
pos_iou_thr=0.35,
neg_iou_thr=0.2,
min_pos_iou=0.2,
ignore_iof_thr=-1),
dict( # for Car
type='Max3DIoUAssigner',
iou_calculator=dict(type='BboxOverlapsNearest3D'),
pos_iou_thr=0.6,
neg_iou_thr=0.45,
min_pos_iou=0.45,
ignore_iof_thr=-1),
],
allowed_border=0,
pos_weight=-1,
debug=False))
anchor3d_head = Anchor3DHead(
num_classes=3,
in_channels=512,
feat_channels=512,
use_direction_classifier=True,
anchor_generator=dict(
type='Anchor3DRangeGenerator',
ranges=[
[0, -40.0, -0.6, 70.4, 40.0, -0.6],
[0, -40.0, -0.6, 70.4, 40.0, -0.6],
[0, -40.0, -1.78, 70.4, 40.0, -1.78],
],
sizes=[[0.8, 0.6, 1.73], [1.76, 0.6, 1.73], [3.9, 1.6, 1.56]],
rotations=[0, 1.57],
reshape_out=False),
diff_rad_by_sin=True,
bbox_coder=dict(type='DeltaXYZWLHRBBoxCoder'),
loss_cls=dict(
type='mmdet.FocalLoss',
use_sigmoid=True,
gamma=2.0,
alpha=0.25,
loss_weight=1.0),
loss_bbox=dict(
type='mmdet.SmoothL1Loss', beta=1.0 / 9.0, loss_weight=2.0),
loss_dir=dict(
type='mmdet.CrossEntropyLoss',
use_sigmoid=False,
loss_weight=0.2),
train_cfg=cfg)
# Anchor head expects a multiple levels of features per image
feats = (torch.rand([1, 512, 200, 176], dtype=torch.float32), )
(cls_scores, bbox_preds, dir_cls_preds) = anchor3d_head.forward(feats)
self.assertEqual(cls_scores[0].shape, torch.Size([1, 18, 200, 176]))
self.assertEqual(bbox_preds[0].shape, torch.Size([1, 42, 200, 176]))
self.assertEqual(dir_cls_preds[0].shape, torch.Size([1, 12, 200, 176]))
# # Test that empty ground truth encourages the network to
# # predict background
gt_instances = InstanceData()
gt_bboxes_3d = LiDARInstance3DBoxes(torch.empty((0, 7)))
gt_labels_3d = torch.tensor([])
input_metas = dict(sample_idx=1234)
# fake input_metas
gt_instances.bboxes_3d = gt_bboxes_3d
gt_instances.labels_3d = gt_labels_3d
empty_gt_losses = anchor3d_head.loss_by_feat(cls_scores, bbox_preds,
dir_cls_preds,
[gt_instances],
[input_metas])
# When there is no truth, the cls loss should be nonzero but
# there should be no box and dir loss.
self.assertGreater(empty_gt_losses['loss_cls'][0], 0,
'cls loss should be non-zero')
self.assertEqual(
empty_gt_losses['loss_bbox'][0], 0,
'there should be no box loss when there are no true boxes')
self.assertEqual(
empty_gt_losses['loss_dir'][0], 0,
'there should be no dir loss when there are no true dirs')
# When truth is non-empty then both cls and box loss
# should be nonzero for random inputs
gt_instances = InstanceData()
gt_bboxes_3d = LiDARInstance3DBoxes(
torch.tensor(
[[6.4118, -3.4305, -1.7291, 1.7033, 3.4693, 1.6197, -0.9091]],
dtype=torch.float32))
gt_labels_3d = torch.tensor([1], dtype=torch.int64)
gt_instances.bboxes_3d = gt_bboxes_3d
gt_instances.labels_3d = gt_labels_3d
gt_losses = anchor3d_head.loss_by_feat(cls_scores, bbox_preds,
dir_cls_preds, [gt_instances],
[input_metas])
self.assertGreater(gt_losses['loss_cls'][0], 0,
'cls loss should be non-zero')
self.assertGreater(gt_losses['loss_bbox'][0], 0,
'box loss should be non-zero')
self.assertGreater(gt_losses['loss_dir'][0], 0,
'dir loss should be none-zero')
def test_anchor3d_head_predict(self):
cfg = Config(
dict(
use_rotate_nms=True,
nms_across_levels=False,
nms_thr=0.01,
score_thr=0.1,
min_bbox_size=0,
nms_pre=100,
max_num=50))
anchor3d_head = Anchor3DHead(
num_classes=3,
in_channels=512,
feat_channels=512,
use_direction_classifier=True,
anchor_generator=dict(
type='Anchor3DRangeGenerator',
ranges=[
[0, -40.0, -0.6, 70.4, 40.0, -0.6],
[0, -40.0, -0.6, 70.4, 40.0, -0.6],
[0, -40.0, -1.78, 70.4, 40.0, -1.78],
],
sizes=[[0.8, 0.6, 1.73], [1.76, 0.6, 1.73], [3.9, 1.6, 1.56]],
rotations=[0, 1.57],
reshape_out=False),
diff_rad_by_sin=True,
bbox_coder=dict(type='DeltaXYZWLHRBBoxCoder'),
loss_cls=dict(
type='mmdet.FocalLoss',
use_sigmoid=True,
gamma=2.0,
alpha=0.25,
loss_weight=1.0),
loss_bbox=dict(
type='mmdet.SmoothL1Loss', beta=1.0 / 9.0, loss_weight=2.0),
loss_dir=dict(
type='mmdet.CrossEntropyLoss',
use_sigmoid=False,
loss_weight=0.2),
test_cfg=cfg)
feats = (torch.rand([2, 512, 200, 176], dtype=torch.float32), )
(cls_scores, bbox_preds, dir_cls_preds) = anchor3d_head.forward(feats)
# fake input_metas
input_metas = [{
'sample_idx': 1234,
'box_type_3d': LiDARInstance3DBoxes,
'box_mode_3d': Box3DMode.LIDAR
}, {
'sample_idx': 2345,
'box_type_3d': LiDARInstance3DBoxes,
'box_mode_3d': Box3DMode.LIDAR
}]
# test get_boxes
cls_scores[0] -= 1.5 # too many positive samples may cause cuda oom
results = anchor3d_head.predict_by_feat(cls_scores, bbox_preds,
dir_cls_preds, input_metas)
pred_instances = results[0]
scores_3d = pred_instances.scores_3d
assert (scores_3d > 0.3).all()
Markdown is supported
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment