Commit 7c6810e3 authored by VVsssssk's avatar VVsssssk Committed by ChaimZhu
Browse files

[Refactor]Refactor pointpillars model interface

parent 49a1e555
......@@ -48,34 +48,36 @@ model = dict(
diff_rad_by_sin=True,
bbox_coder=dict(type='DeltaXYZWLHRBBoxCoder'),
loss_cls=dict(
type='FocalLoss',
type='mmdet.FocalLoss',
use_sigmoid=True,
gamma=2.0,
alpha=0.25,
loss_weight=1.0),
loss_bbox=dict(type='SmoothL1Loss', beta=1.0 / 9.0, loss_weight=2.0),
loss_bbox=dict(
type='mmdet.SmoothL1Loss', beta=1.0 / 9.0, loss_weight=2.0),
loss_dir=dict(
type='CrossEntropyLoss', use_sigmoid=False, loss_weight=0.2)),
type='mmdet.CrossEntropyLoss', use_sigmoid=False,
loss_weight=0.2)),
# model training and testing settings
train_cfg=dict(
assigner=[
dict( # for Pedestrian
type='MaxIoUAssigner',
iou_calculator=dict(type='BboxOverlapsNearest3D'),
type='Max3DIoUAssigner',
iou_calculator=dict(type='mmdet3d.BboxOverlapsNearest3D'),
pos_iou_thr=0.5,
neg_iou_thr=0.35,
min_pos_iou=0.35,
ignore_iof_thr=-1),
dict( # for Cyclist
type='MaxIoUAssigner',
iou_calculator=dict(type='BboxOverlapsNearest3D'),
type='Max3DIoUAssigner',
iou_calculator=dict(type='mmdet3d.BboxOverlapsNearest3D'),
pos_iou_thr=0.5,
neg_iou_thr=0.35,
min_pos_iou=0.35,
ignore_iof_thr=-1),
dict( # for Car
type='MaxIoUAssigner',
iou_calculator=dict(type='BboxOverlapsNearest3D'),
type='Max3DIoUAssigner',
iou_calculator=dict(type='mmdet3d.BboxOverlapsNearest3D'),
pos_iou_thr=0.6,
neg_iou_thr=0.45,
min_pos_iou=0.45,
......
......@@ -17,8 +17,8 @@ model = dict(
train_cfg=dict(
_delete_=True,
assigner=dict(
type='MaxIoUAssigner',
iou_calculator=dict(type='BboxOverlapsNearest3D'),
type='Max3DIoUAssigner',
iou_calculator=dict(type='mmdet3d.BboxOverlapsNearest3D'),
pos_iou_thr=0.6,
neg_iou_thr=0.45,
min_pos_iou=0.45,
......
# Copyright (c) OpenMMLab. All rights reserved.
from mmdet.core.bbox import AssignResult, BaseAssigner, MaxIoUAssigner
from mmdet.core.bbox import AssignResult, BaseAssigner
from .max_3d_iou_assigner import MaxIoUAssigner
__all__ = ['BaseAssigner', 'MaxIoUAssigner', 'AssignResult']
# Copyright (c) OpenMMLab. All rights reserved.
from typing import Optional, Union
from mmengine.data import InstanceData
from mmdet3d.registry import TASK_UTILS
from mmdet.core.bbox.assigners import MaxIoUAssigner
from ..assigners import AssignResult
@TASK_UTILS.register_module()
class Max3DIoUAssigner(MaxIoUAssigner):
# TODO: This is a temporary box assigner.
"""Assign a corresponding gt bbox or background to each bbox.
Each proposals will be assigned with `-1`, or a semi-positive integer
indicating the ground truth index.
- -1: negative sample, no assigned gt
- semi-positive integer: positive sample, index (0-based) of assigned gt
Args:
pos_iou_thr (float): IoU threshold for positive bboxes.
neg_iou_thr (float or tuple): IoU threshold for negative bboxes.
min_pos_iou (float): Minimum iou for a bbox to be considered as a
positive bbox. Positive samples can have smaller IoU than
pos_iou_thr due to the 4th step (assign max IoU sample to each gt).
`min_pos_iou` is set to avoid assigning bboxes that have extremely
small iou with GT as positive samples.
gt_max_assign_all (bool): Whether to assign all bboxes with the same
highest overlap with some gt to that gt.
ignore_iof_thr (float): IoF threshold for ignoring bboxes (if
`gt_bboxes_ignore` is specified). Negative values mean not
ignoring any bboxes.
ignore_wrt_candidates (bool): Whether to compute the iof between
`bboxes` and `gt_bboxes_ignore`, or the contrary.
match_low_quality (bool): Whether to allow low quality matches. This is
usually allowed for RPN and single stage detectors, but not allowed
in the second stage. Details are demonstrated in Step 4.
gpu_assign_thr (int): The upper bound of the number of GT for GPU
assign. When the number of gt is above this threshold, will assign
on CPU device. Negative values mean not assign on CPU.
iou_calculator (dict): Config of overlaps Calculator.
"""
def __init__(self,
pos_iou_thr: float,
neg_iou_thr: Union[float, tuple],
min_pos_iou: float = .0,
gt_max_assign_all: bool = True,
ignore_iof_thr: float = -1,
ignore_wrt_candidates: bool = True,
match_low_quality: bool = True,
gpu_assign_thr: float = -1,
iou_calculator: dict = dict(type='BboxOverlaps2D')):
self.pos_iou_thr = pos_iou_thr
self.neg_iou_thr = neg_iou_thr
self.min_pos_iou = min_pos_iou
self.gt_max_assign_all = gt_max_assign_all
self.ignore_iof_thr = ignore_iof_thr
self.ignore_wrt_candidates = ignore_wrt_candidates
self.gpu_assign_thr = gpu_assign_thr
self.match_low_quality = match_low_quality
self.iou_calculator = TASK_UTILS.build(iou_calculator)
def assign(self,
pred_instances: InstanceData,
gt_instances: InstanceData,
gt_instances_ignore: Optional[InstanceData] = None,
**kwargs) -> AssignResult:
"""Assign gt to bboxes.
This method assign a gt bbox to every bbox (proposal/anchor), each bbox
will be assigned with -1, or a semi-positive number. -1 means negative
sample, semi-positive number is the index (0-based) of assigned gt.
The assignment is done in following steps, the order matters.
1. assign every bbox to the background
2. assign proposals whose iou with all gts < neg_iou_thr to 0
3. for each bbox, if the iou with its nearest gt >= pos_iou_thr,
assign it to that bbox
4. for each gt bbox, assign its nearest proposals (may be more than
one) to itself
Args:
pred_instances (:obj:`InstanceData`): Instances of model
predictions. It includes ``priors``, and the priors can
be anchors or points, or the bboxes predicted by the
previous stage, has shape (n, 4). The bboxes predicted by
the current model or stage will be named ``bboxes``,
``labels``, and ``scores``, the same as the ``InstanceData``
in other places.
gt_instances (:obj:`InstanceData`): Ground truth of instance
annotations. It usually includes ``bboxes``, with shape (k, 4),
and ``labels``, with shape (k, ).
gt_instances_ignore (:obj:`InstanceData`, optional): Instances
to be ignored during training. It includes ``bboxes``
attribute data that is ignored during training and testing.
Defaults to None.
Returns:
:obj:`AssignResult`: The assign result.
Example:
>>> from mmengine.data import InstanceData
>>> self = MaxIoUAssigner(0.5, 0.5)
>>> pred_instances = InstanceData()
>>> pred_instances.priors = torch.Tensor([[0, 0, 10, 10],
... [10, 10, 20, 20]])
>>> gt_instances = InstanceData()
>>> gt_instances.bboxes = torch.Tensor([[0, 0, 10, 9]])
>>> gt_instances.labels = torch.Tensor([0])
>>> assign_result = self.assign(pred_instances, gt_instances)
>>> expected_gt_inds = torch.LongTensor([1, 0])
>>> assert torch.all(assign_result.gt_inds == expected_gt_inds)
"""
gt_bboxes = gt_instances.bboxes_3d
priors = pred_instances.priors
gt_labels = gt_instances.labels_3d
if gt_instances_ignore is not None:
gt_bboxes_ignore = gt_instances_ignore.bboxes_3d
else:
gt_bboxes_ignore = None
assign_on_cpu = True if (self.gpu_assign_thr > 0) and (
gt_bboxes.shape[0] > self.gpu_assign_thr) else False
# compute overlap and assign gt on CPU when number of GT is large
if assign_on_cpu:
device = priors.device
priors = priors.cpu()
gt_bboxes = gt_bboxes.cpu()
gt_labels = gt_labels.cpu()
if gt_bboxes_ignore is not None:
gt_bboxes_ignore = gt_bboxes_ignore.cpu()
overlaps = self.iou_calculator(gt_bboxes, priors)
if (self.ignore_iof_thr > 0 and gt_bboxes_ignore is not None
and gt_bboxes_ignore.numel() > 0 and priors.numel() > 0):
if self.ignore_wrt_candidates:
ignore_overlaps = self.iou_calculator(
priors, gt_bboxes_ignore, mode='iof')
ignore_max_overlaps, _ = ignore_overlaps.max(dim=1)
else:
ignore_overlaps = self.iou_calculator(
gt_bboxes_ignore, priors, mode='iof')
ignore_max_overlaps, _ = ignore_overlaps.max(dim=0)
overlaps[:, ignore_max_overlaps > self.ignore_iof_thr] = -1
assign_result = self.assign_wrt_overlaps(overlaps, gt_labels)
if assign_on_cpu:
assign_result.gt_inds = assign_result.gt_inds.to(device)
assign_result.max_overlaps = assign_result.max_overlaps.to(device)
if assign_result.labels is not None:
assign_result.labels = assign_result.labels.to(device)
return assign_result
......@@ -2,9 +2,9 @@
from mmdet.core.bbox.samplers import (BaseSampler, CombinedSampler,
InstanceBalancedPosSampler,
IoUBalancedNegSampler, OHEMSampler,
PseudoSampler, RandomSampler,
SamplingResult)
RandomSampler, SamplingResult)
from .iou_neg_piecewise_sampler import IoUNegPiecewiseSampler
from .pseudosample import PseudoSampler
__all__ = [
'BaseSampler', 'PseudoSampler', 'RandomSampler',
......
# Copyright (c) OpenMMLab. All rights reserved.
import torch
from mmengine.data import InstanceData
from mmdet3d.registry import TASK_UTILS
from mmdet.core.bbox.assigners import AssignResult
from ..samplers import BaseSampler, SamplingResult
@TASK_UTILS.register_module()
class PseudoSampler(BaseSampler):
"""A pseudo sampler that does not do sampling actually."""
# TODO: This is a temporary pseudo sampler.
def __init__(self, **kwargs):
pass
def _sample_pos(self, **kwargs):
"""Sample positive samples."""
raise NotImplementedError
def _sample_neg(self, **kwargs):
"""Sample negative samples."""
raise NotImplementedError
def sample(self, assign_result: AssignResult, pred_instances: InstanceData,
gt_instances: InstanceData, *args, **kwargs):
"""Directly returns the positive and negative indices of samples.
Args:
assign_result (:obj:`AssignResult`): Bbox assigning results.
pred_instances (:obj:`InstaceData`): Instances of model
predictions. It includes ``priors``, and the priors can
be anchors, points, or bboxes predicted by the model,
shape(n, 4).
gt_instances (:obj:`InstaceData`): Ground truth of instance
annotations. It usually includes ``bboxes`` and ``labels``
attributes.
Returns:
:obj:`SamplingResult`: sampler results
"""
gt_bboxes = gt_instances.bboxes_3d
priors = pred_instances.priors
pos_inds = torch.nonzero(
assign_result.gt_inds > 0, as_tuple=False).squeeze(-1).unique()
neg_inds = torch.nonzero(
assign_result.gt_inds == 0, as_tuple=False).squeeze(-1).unique()
gt_flags = priors.new_zeros(priors.shape[0], dtype=torch.uint8)
sampling_result = SamplingResult(
pos_inds=pos_inds,
neg_inds=neg_inds,
priors=priors,
gt_bboxes=gt_bboxes,
assign_result=assign_result,
gt_flags=gt_flags,
avg_factor_with_neg=False)
return sampling_result
......@@ -5,14 +5,14 @@ from mmdet3d.core.post_processing import nms_bev, nms_normal_bev
from ..bbox import bbox3d2result, bbox3d_mapping_back, xywhr2xyxyr
def merge_aug_bboxes_3d(aug_results, img_metas, test_cfg):
def merge_aug_bboxes_3d(aug_results, aug_batch_input_metas, test_cfg):
"""Merge augmented detection 3D bboxes and scores.
Args:
aug_results (list[dict]): The dict of detection results.
The dict contains the following keys
- boxes_3d (:obj:`BaseInstance3DBoxes`): Detection bbox.
- bboxes_3d (:obj:`BaseInstance3DBoxes`): Detection bbox.
- scores_3d (torch.Tensor): Detection scores.
- labels_3d (torch.Tensor): Predicted box labels.
img_metas (list[dict]): Meta information of each sample.
......@@ -21,26 +21,27 @@ def merge_aug_bboxes_3d(aug_results, img_metas, test_cfg):
Returns:
dict: Bounding boxes results in cpu mode, containing merged results.
- boxes_3d (:obj:`BaseInstance3DBoxes`): Merged detection bbox.
- bboxes_3d (:obj:`BaseInstance3DBoxes`): Merged detection bbox.
- scores_3d (torch.Tensor): Merged detection scores.
- labels_3d (torch.Tensor): Merged predicted box labels.
"""
assert len(aug_results) == len(img_metas), \
assert len(aug_results) == len(aug_batch_input_metas), \
'"aug_results" should have the same length as "img_metas", got len(' \
f'aug_results)={len(aug_results)} and len(img_metas)={len(img_metas)}'
f'aug_results)={len(aug_results)} and ' \
f'len(img_metas)={len(aug_batch_input_metas)}'
recovered_bboxes = []
recovered_scores = []
recovered_labels = []
for bboxes, img_info in zip(aug_results, img_metas):
scale_factor = img_info[0]['pcd_scale_factor']
pcd_horizontal_flip = img_info[0]['pcd_horizontal_flip']
pcd_vertical_flip = img_info[0]['pcd_vertical_flip']
for bboxes, input_info in zip(aug_results, aug_batch_input_metas):
scale_factor = input_info['pcd_scale_factor']
pcd_horizontal_flip = input_info['pcd_horizontal_flip']
pcd_vertical_flip = input_info['pcd_vertical_flip']
recovered_scores.append(bboxes['scores_3d'])
recovered_labels.append(bboxes['labels_3d'])
bboxes = bbox3d_mapping_back(bboxes['boxes_3d'], scale_factor,
bboxes = bbox3d_mapping_back(bboxes['bboxes_3d'], scale_factor,
pcd_horizontal_flip, pcd_vertical_flip)
recovered_bboxes.append(bboxes)
......
# Copyright (c) OpenMMLab. All rights reserved.
import warnings
from typing import List, Optional, Tuple
import numpy as np
import torch
from mmcv import ConfigDict
from mmcv.runner import BaseModule, force_fp32
from mmengine.data import InstanceData
from torch import Tensor
from torch import nn as nn
from mmdet3d.core import (PseudoSampler, box3d_multiclass_nms, limit_period,
xywhr2xyxyr)
from mmdet3d.registry import MODELS
from mmdet.core import (build_assigner, build_bbox_coder,
build_prior_generator, build_sampler, multi_apply)
from ..builder import build_loss
from mmdet3d.core import (Det3DDataSample, PseudoSampler, box3d_multiclass_nms,
limit_period, merge_aug_bboxes_3d, xywhr2xyxyr)
from mmdet3d.registry import MODELS, TASK_UTILS
from mmdet.core import multi_apply
from .train_mixins import AnchorTrainMixin
......@@ -41,13 +45,13 @@ class Anchor3DHead(BaseModule, AnchorTrainMixin):
"""
def __init__(self,
num_classes,
in_channels,
train_cfg,
test_cfg,
feat_channels=256,
use_direction_classifier=True,
anchor_generator=dict(
num_classes: int,
in_channels: int,
train_cfg: dict,
test_cfg: dict,
feat_channels: int = 256,
use_direction_classifier: bool = True,
anchor_generator: dict = dict(
type='Anchor3DRangeGenerator',
range=[0, -39.68, -1.78, 69.12, 39.68, -1.78],
strides=[2],
......@@ -55,20 +59,21 @@ class Anchor3DHead(BaseModule, AnchorTrainMixin):
rotations=[0, 1.57],
custom_values=[],
reshape_out=False),
assigner_per_size=False,
assign_per_class=False,
diff_rad_by_sin=True,
dir_offset=-np.pi / 2,
dir_limit_offset=0,
bbox_coder=dict(type='DeltaXYZWLHRBBoxCoder'),
loss_cls=dict(
assigner_per_size: bool = False,
assign_per_class: bool = False,
diff_rad_by_sin: bool = True,
dir_offset: float = -np.pi / 2,
dir_limit_offset: int = 0,
bbox_coder: dict = dict(type='DeltaXYZWLHRBBoxCoder'),
loss_cls: dict = dict(
type='CrossEntropyLoss',
use_sigmoid=True,
loss_weight=1.0),
loss_bbox=dict(
loss_bbox: dict = dict(
type='SmoothL1Loss', beta=1.0 / 9.0, loss_weight=2.0),
loss_dir=dict(type='CrossEntropyLoss', loss_weight=0.2),
init_cfg=None):
loss_dir: dict = dict(
type='CrossEntropyLoss', loss_weight=0.2),
init_cfg: Optional[dict] = None) -> None:
super().__init__(init_cfg=init_cfg)
self.in_channels = in_channels
self.num_classes = num_classes
......@@ -81,28 +86,29 @@ class Anchor3DHead(BaseModule, AnchorTrainMixin):
self.assign_per_class = assign_per_class
self.dir_offset = dir_offset
self.dir_limit_offset = dir_limit_offset
import warnings
warnings.warn(
'dir_offset and dir_limit_offset will be depressed and be '
'incorporated into box coder in the future')
self.fp16_enabled = False
# build anchor generator
self.anchor_generator = build_prior_generator(anchor_generator)
self.prior_generator = TASK_UTILS.build(anchor_generator)
# In 3D detection, the anchor stride is connected with anchor size
self.num_anchors = self.anchor_generator.num_base_anchors
self.num_anchors = self.prior_generator.num_base_anchors
# build box coder
self.bbox_coder = build_bbox_coder(bbox_coder)
self.bbox_coder = TASK_UTILS.build(bbox_coder)
self.box_code_size = self.bbox_coder.code_size
# build loss function
self.use_sigmoid_cls = loss_cls.get('use_sigmoid', False)
self.sampling = loss_cls['type'] not in ['FocalLoss', 'GHMC']
self.sampling = loss_cls['type'] not in [
'mmdet.FocalLoss', 'mmdet.GHMC'
]
if not self.use_sigmoid_cls:
self.num_classes += 1
self.loss_cls = build_loss(loss_cls)
self.loss_bbox = build_loss(loss_bbox)
self.loss_dir = build_loss(loss_dir)
self.loss_cls = MODELS.build(loss_cls)
self.loss_bbox = MODELS.build(loss_bbox)
self.loss_dir = MODELS.build(loss_dir)
self.fp16_enabled = False
self._init_layers()
......@@ -122,14 +128,14 @@ class Anchor3DHead(BaseModule, AnchorTrainMixin):
return
if self.sampling:
self.bbox_sampler = build_sampler(self.train_cfg.sampler)
self.bbox_sampler = TASK_UTILS.build(self.train_cfg.sampler)
else:
self.bbox_sampler = PseudoSampler()
if isinstance(self.train_cfg.assigner, dict):
self.bbox_assigner = build_assigner(self.train_cfg.assigner)
self.bbox_assigner = TASK_UTILS.build(self.train_cfg.assigner)
elif isinstance(self.train_cfg.assigner, list):
self.bbox_assigner = [
build_assigner(res) for res in self.train_cfg.assigner
TASK_UTILS.build(res) for res in self.train_cfg.assigner
]
def _init_layers(self):
......@@ -142,7 +148,7 @@ class Anchor3DHead(BaseModule, AnchorTrainMixin):
self.conv_dir_cls = nn.Conv2d(self.feat_channels,
self.num_anchors * 2, 1)
def forward_single(self, x):
def forward_single(self, x: Tensor) -> Tuple[Tensor, Tensor]:
"""Forward function on a single-scale feature map.
Args:
......@@ -159,7 +165,7 @@ class Anchor3DHead(BaseModule, AnchorTrainMixin):
dir_cls_preds = self.conv_dir_cls(x)
return cls_score, bbox_pred, dir_cls_preds
def forward(self, feats):
def forward(self, feats: List[Tensor]) -> Tuple[list]:
"""Forward pass.
Args:
......@@ -172,7 +178,122 @@ class Anchor3DHead(BaseModule, AnchorTrainMixin):
"""
return multi_apply(self.forward_single, feats)
def get_anchors(self, featmap_sizes, input_metas, device='cuda'):
def forward_train(self,
feats: List[Tensor],
batch_data_samples: List[Det3DDataSample],
proposal_cfg: Optional[ConfigDict] = None,
**kwargs):
"""
Args:
feats (list[Tensor]): Features from FPN.
batch_data_samples (list[:obj:`Det3DDataSample`]): Each item
contains the meta information of each sample and
corresponding annotations.
proposal_cfg (ConfigDict, optional): Test / postprocessing
configuration, if None, test_cfg would be used.
Defaults to None.
Returns:
tuple or Tensor: When `proposal_cfg` is None, the detector is a \
normal one-stage detector, The return value is the losses.
- losses: (dict[str, Tensor]): A dictionary of loss components.
When the `proposal_cfg` is not None, the head is used as a
`rpn_head`, the return value is a tuple contains:
- losses: (dict[str, Tensor]): A dictionary of loss components.
- results_list (list[:obj:`InstanceData`]): Detection
results of each input after the post process.
Each item usually contains following keys.Det3DDataSample
- scores_3d (Tensor): Classification scores, has a shape
(num_instances, )
- labels_3d (Tensor): Labels of bboxes, has a shape
(num_instances, ).
- bboxes_3d (:obj:`BaseInstance3DBoxes`): Prediction of bboxes,
contains a tensor with shape (num_instances, 7).
"""
outs = self.forward(feats)
batch_gt_instance_3d = []
batch_gt_instances_ignore = []
batch_input_metas = []
for data_sample in batch_data_samples:
batch_input_metas.append(data_sample.metainfo)
batch_gt_instance_3d.append(data_sample.gt_instances_3d)
if 'ignored_instances' in data_sample:
batch_gt_instances_ignore.append(data_sample.ignored_instances)
else:
batch_gt_instances_ignore.append(None)
loss_inputs = outs + (batch_gt_instance_3d, batch_input_metas)
losses = self.loss(
*loss_inputs, batch_gt_instances_ignore=batch_gt_instances_ignore)
if proposal_cfg is None:
return losses
else:
batch_img_metas = [
data_sample.metainfo for data_sample in batch_data_samples
]
results_list = self.get_results(
*outs, batch_img_metas=batch_img_metas, cfg=proposal_cfg)
return losses, results_list
def simple_test(self,
feats: Tuple[Tensor],
batch_input_metas: List[dict],
rescale: bool = False) -> List[InstanceData]:
"""Test function without test-time augmentation.
Args:
feats (tuple[torch.Tensor]): Multi-level features from the
upstream network, each is a 4D-tensor.
batch_input_metas (list[dict]): List of image information.
rescale (bool, optional): Whether to rescale the results.
Defaults to False.
Returns:
list[:obj:`InstanceData`]: Detection results of each input
after the post process.
Each item usually contains following keys.
- scores_3d (Tensor): Classification scores, has a shape
(num_instances, )
- labels_3d (Tensor): Labels of bboxes, has a shape
(num_instances, ).
- bboxes_3d (BaseInstance3DBoxes): Prediction of bboxes,
contains a tensor with shape (num_instances, 7).
"""
outs = self.forward(feats)
results_list = self.get_results(
*outs, input_metas=batch_input_metas, rescale=rescale)
return results_list
def aug_test(self,
aug_batch_feats,
aug_batch_input_metas,
rescale=False,
**kwargs):
aug_bboxes = []
# only support aug_test for one sample
for x, input_meta in zip(aug_batch_feats, aug_batch_input_metas):
outs = self.forward(x)
bbox_list = self.get_results(*outs, [input_meta], rescale=rescale)
bbox_dict = dict(
bboxes_3d=bbox_list[0].bboxes_3d,
scores_3d=bbox_list[0].scores_3d,
labels_3d=bbox_list[0].labels_3d)
aug_bboxes.append(bbox_dict)
# after merging, bboxes will be rescaled to the original image size
merged_bboxes = merge_aug_bboxes_3d(aug_bboxes, aug_batch_input_metas,
self.test_cfg)
return [merged_bboxes]
def get_anchors(self,
featmap_sizes: List[tuple],
input_metas: List[dict],
device: str = 'cuda') -> list:
"""Get anchors according to feature map sizes.
Args:
......@@ -187,7 +308,7 @@ class Anchor3DHead(BaseModule, AnchorTrainMixin):
num_imgs = len(input_metas)
# since feature map sizes of all images are the same, we only compute
# anchors for one time
multi_level_anchors = self.anchor_generator.grid_anchors(
multi_level_anchors = self.prior_generator.grid_anchors(
featmap_sizes, device=device)
anchor_list = [multi_level_anchors for _ in range(num_imgs)]
return anchor_list
......@@ -279,7 +400,7 @@ class Anchor3DHead(BaseModule, AnchorTrainMixin):
return loss_cls, loss_bbox, loss_dir
@staticmethod
def add_sin_difference(boxes1, boxes2):
def add_sin_difference(boxes1: Tensor, boxes2: Tensor) -> tuple:
"""Convert the rotation difference to difference in sine function.
Args:
......@@ -304,13 +425,12 @@ class Anchor3DHead(BaseModule, AnchorTrainMixin):
@force_fp32(apply_to=('cls_scores', 'bbox_preds', 'dir_cls_preds'))
def loss(self,
cls_scores,
bbox_preds,
dir_cls_preds,
gt_bboxes,
gt_labels,
input_metas,
gt_bboxes_ignore=None):
cls_scores: List[Tensor],
bbox_preds: List[Tensor],
dir_cls_preds: List[Tensor],
batch_gt_instances_3d: List[InstanceData],
batch_input_metas: List[dict],
batch_gt_instances_ignore: List[InstanceData] = None) -> dict:
"""Calculate losses.
Args:
......@@ -318,12 +438,14 @@ class Anchor3DHead(BaseModule, AnchorTrainMixin):
bbox_preds (list[torch.Tensor]): Multi-level bbox predictions.
dir_cls_preds (list[torch.Tensor]): Multi-level direction
class predictions.
gt_bboxes (list[:obj:`BaseInstance3DBoxes`]): Gt bboxes
of each sample.
gt_labels (list[torch.Tensor]): Gt labels of each sample.
input_metas (list[dict]): Contain pcd and img's meta info.
gt_bboxes_ignore (list[torch.Tensor]): Specify
which bounding boxes to ignore.
batch_gt_instances_3d (list[:obj:`InstanceData`]): Batch of
gt_instances. It usually includes ``bboxes`` and ``labels``
attributes.
batch_input_metas (list[dict]): Contain pcd and img's meta info.
batch_gt_instances_ignore (list[:obj:`InstanceData`], optional):
Batch of gt_instances_ignore. It includes ``bboxes`` attribute
data that is ignored during training and testing.
Defaults to None.
Returns:
dict[str, list[torch.Tensor]]: Classification, bbox, and
......@@ -335,17 +457,16 @@ class Anchor3DHead(BaseModule, AnchorTrainMixin):
losses.
"""
featmap_sizes = [featmap.size()[-2:] for featmap in cls_scores]
assert len(featmap_sizes) == self.anchor_generator.num_levels
assert len(featmap_sizes) == self.prior_generator.num_levels
device = cls_scores[0].device
anchor_list = self.get_anchors(
featmap_sizes, input_metas, device=device)
featmap_sizes, batch_input_metas, device=device)
label_channels = self.cls_out_channels if self.use_sigmoid_cls else 1
cls_reg_targets = self.anchor_target_3d(
anchor_list,
gt_bboxes,
input_metas,
gt_bboxes_ignore_list=gt_bboxes_ignore,
gt_labels_list=gt_labels,
batch_gt_instances_3d,
batch_input_metas,
batch_gt_instances_ignore=batch_gt_instances_ignore,
num_classes=self.num_classes,
label_channels=label_channels,
sampling=self.sampling)
......@@ -374,14 +495,14 @@ class Anchor3DHead(BaseModule, AnchorTrainMixin):
return dict(
loss_cls=losses_cls, loss_bbox=losses_bbox, loss_dir=losses_dir)
def get_bboxes(self,
cls_scores,
bbox_preds,
dir_cls_preds,
input_metas,
cfg=None,
rescale=False):
"""Get bboxes of anchor head.
def get_results(self,
cls_scores: List[Tensor],
bbox_preds: List[Tensor],
dir_cls_preds: List[Tensor],
input_metas: List[dict],
cfg: ConfigDict = None,
rescale: list = False) -> List[InstanceData]:
"""Get results of anchor head.
Args:
cls_scores (list[torch.Tensor]): Multi-level class scores.
......@@ -393,14 +514,23 @@ class Anchor3DHead(BaseModule, AnchorTrainMixin):
rescale (list[torch.Tensor]): Whether th rescale bbox.
Returns:
list[tuple]: Prediction resultes of batches.
list[:obj:`InstanceData`]: Instance prediction
results of each sample after the post process.
Each item usually contains following keys.
- scores_3d (Tensor): Classification scores, has a shape
(num_instance, )
- labels_3d (Tensor): Labels of bboxes, has a shape
(num_instances, ).
- bboxes_3d (:obj:`BaseInstance3DBoxes`): Prediction of bboxes,
contains a tensor with shape (num_instances, 7).
"""
assert len(cls_scores) == len(bbox_preds)
assert len(cls_scores) == len(dir_cls_preds)
num_levels = len(cls_scores)
featmap_sizes = [cls_scores[i].shape[-2:] for i in range(num_levels)]
device = cls_scores[0].device
mlvl_anchors = self.anchor_generator.grid_anchors(
mlvl_anchors = self.prior_generator.grid_anchors(
featmap_sizes, device=device)
mlvl_anchors = [
anchor.reshape(-1, self.box_code_size) for anchor in mlvl_anchors
......@@ -419,21 +549,23 @@ class Anchor3DHead(BaseModule, AnchorTrainMixin):
]
input_meta = input_metas[img_id]
proposals = self.get_bboxes_single(cls_score_list, bbox_pred_list,
dir_cls_pred_list, mlvl_anchors,
input_meta, cfg, rescale)
proposals = self._get_results_single(cls_score_list,
bbox_pred_list,
dir_cls_pred_list,
mlvl_anchors, input_meta, cfg,
rescale)
result_list.append(proposals)
return result_list
def get_bboxes_single(self,
cls_scores,
bbox_preds,
dir_cls_preds,
mlvl_anchors,
input_meta,
cfg=None,
rescale=False):
"""Get bboxes of single branch.
def _get_results_single(self,
cls_scores: Tensor,
bbox_preds: Tensor,
dir_cls_preds: Tensor,
mlvl_anchors: List[Tensor],
input_meta: List[dict],
cfg: ConfigDict = None,
rescale: bool = False) -> InstanceData:
"""Get results of single branch.
Args:
cls_scores (torch.Tensor): Class score in single batch.
......@@ -447,11 +579,16 @@ class Anchor3DHead(BaseModule, AnchorTrainMixin):
rescale (list[torch.Tensor]): whether th rescale bbox.
Returns:
tuple: Contain predictions of single batch.
- bboxes (:obj:`BaseInstance3DBoxes`): Predicted 3d bboxes.
- scores (torch.Tensor): Class score of each bbox.
- labels (torch.Tensor): Label of each bbox.
:obj:`InstanceData`: Detection results of each sample
after the post process.
Each item usually contains following keys.
- scores_3d (Tensor): Classification scores, has a shape
(num_instance, )
- labels_3d (Tensor): Labels of bboxes, has a shape
(num_instances, ).
- bboxes_3d (:obj:`BaseInstance3DBoxes`): Prediction of bboxes,
contains a tensor with shape (num_instances, 7).
"""
cfg = self.test_cfg if cfg is None else cfg
assert len(cls_scores) == len(bbox_preds) == len(mlvl_anchors)
......@@ -514,4 +651,8 @@ class Anchor3DHead(BaseModule, AnchorTrainMixin):
dir_rot + self.dir_offset +
np.pi * dir_scores.to(bboxes.dtype))
bboxes = input_meta['box_type_3d'](bboxes, box_dim=self.box_code_size)
return bboxes, scores, labels
results = InstanceData()
results.bboxes_3d = bboxes
results.scores_3d = scores
results.labels_3d = labels
return results
# Copyright (c) OpenMMLab. All rights reserved.
import numpy as np
import torch
from mmengine.data import InstanceData
from mmdet3d.core import limit_period
from mmdet.core import images_to_levels, multi_apply
......@@ -11,10 +12,9 @@ class AnchorTrainMixin(object):
def anchor_target_3d(self,
anchor_list,
gt_bboxes_list,
input_metas,
gt_bboxes_ignore_list=None,
gt_labels_list=None,
batch_gt_instances_3d,
batch_input_metas,
batch_gt_instances_ignore=None,
label_channels=1,
num_classes=1,
sampling=True):
......@@ -22,11 +22,10 @@ class AnchorTrainMixin(object):
Args:
anchor_list (list[list]): Multi level anchors of each image.
gt_bboxes_list (list[:obj:`BaseInstance3DBoxes`]): Ground truth
batch_gt_instances_3d (list[:obj:`InstanceData`]): Ground truth
bboxes of each image.
input_metas (list[dict]): Meta info of each image.
gt_bboxes_ignore_list (list): Ignore list of gt bboxes.
gt_labels_list (list[torch.Tensor]): Gt labels of batches.
batch_input_metas (list[dict]): Meta info of each image.
batch_gt_instances_ignore (list): Ignore list of gt bboxes.
label_channels (int): The channel of labels.
num_classes (int): The number of classes.
sampling (bool): Whether to sample anchors.
......@@ -38,8 +37,8 @@ class AnchorTrainMixin(object):
direction weights, number of positive anchors and
number of negative anchors.
"""
num_imgs = len(input_metas)
assert len(anchor_list) == num_imgs
num_inputs = len(batch_input_metas)
assert len(anchor_list) == num_inputs
if isinstance(anchor_list[0][0], list):
# sizes of anchors are different
......@@ -48,7 +47,7 @@ class AnchorTrainMixin(object):
sum([anchor.size(0) for anchor in anchors])
for anchors in anchor_list[0]
]
for i in range(num_imgs):
for i in range(num_inputs):
anchor_list[i] = anchor_list[i][0]
else:
# anchor number of multi levels
......@@ -57,24 +56,21 @@ class AnchorTrainMixin(object):
for anchors in anchor_list[0]
]
# concat all level anchors and flags to a single tensor
for i in range(num_imgs):
for i in range(num_inputs):
anchor_list[i] = torch.cat(anchor_list[i])
# compute targets for each image
if gt_bboxes_ignore_list is None:
gt_bboxes_ignore_list = [None for _ in range(num_imgs)]
if gt_labels_list is None:
gt_labels_list = [None for _ in range(num_imgs)]
if batch_gt_instances_ignore is None:
batch_gt_instances_ignore = [None for _ in range(num_inputs)]
(all_labels, all_label_weights, all_bbox_targets, all_bbox_weights,
all_dir_targets, all_dir_weights, pos_inds_list,
neg_inds_list) = multi_apply(
self.anchor_target_3d_single,
anchor_list,
gt_bboxes_list,
gt_bboxes_ignore_list,
gt_labels_list,
input_metas,
batch_gt_instances_3d,
batch_gt_instances_ignore,
batch_input_metas,
label_channels=label_channels,
num_classes=num_classes,
sampling=sampling)
......@@ -101,9 +97,8 @@ class AnchorTrainMixin(object):
def anchor_target_3d_single(self,
anchors,
gt_bboxes,
gt_bboxes_ignore,
gt_labels,
gt_instance_3d,
gt_instance_ignore,
input_meta,
label_channels=1,
num_classes=1,
......@@ -112,9 +107,8 @@ class AnchorTrainMixin(object):
Args:
anchors (torch.Tensor): Concatenated multi-level anchor.
gt_bboxes (:obj:`BaseInstance3DBoxes`): Gt bboxes.
gt_bboxes_ignore (torch.Tensor): Ignored gt bboxes.
gt_labels (torch.Tensor): Gt class labels.
gt_instance_3d (:obj:`InstanceData`): Gt bboxes.
gt_instance_ignore (:obj:`InstanceData`): Ignored gt bboxes.
input_meta (dict): Meta info of each image.
label_channels (int): The channel of labels.
num_classes (int): The number of classes.
......@@ -137,15 +131,19 @@ class AnchorTrainMixin(object):
-1, self.box_code_size)
current_anchor_num += current_anchors.size(0)
if self.assign_per_class:
gt_per_cls = (gt_labels == i)
gt_per_cls = (gt_instance_3d.labels_3d == i)
gt_per_cls_instance = InstanceData()
gt_per_cls_instance.labels_3d = gt_instance_3d.labels_3d[
gt_per_cls]
gt_per_cls_instance.bboxes_3d = gt_instance_3d.bboxes_3d[
gt_per_cls, :]
anchor_targets = self.anchor_target_single_assigner(
assigner, current_anchors, gt_bboxes[gt_per_cls, :],
gt_bboxes_ignore, gt_labels[gt_per_cls], input_meta,
num_classes, sampling)
assigner, current_anchors, gt_per_cls_instance,
gt_instance_ignore, input_meta, num_classes, sampling)
else:
anchor_targets = self.anchor_target_single_assigner(
assigner, current_anchors, gt_bboxes, gt_bboxes_ignore,
gt_labels, input_meta, num_classes, sampling)
assigner, current_anchors, gt_instance_3d,
gt_instance_ignore, input_meta, num_classes, sampling)
(labels, label_weights, bbox_targets, bbox_weights,
dir_targets, dir_weights, pos_inds, neg_inds) = anchor_targets
......@@ -194,15 +192,19 @@ class AnchorTrainMixin(object):
current_anchors = anchors[i]
current_anchor_num += current_anchors.size(0)
if self.assign_per_class:
gt_per_cls = (gt_labels == i)
gt_per_cls = (gt_instance_3d.labels_3d == i)
gt_per_cls_instance = InstanceData()
gt_per_cls_instance.labels_3d = gt_instance_3d.labels_3d[
gt_per_cls]
gt_per_cls_instance.bboxes_3d = gt_instance_3d.bboxes_3d[
gt_per_cls, :]
anchor_targets = self.anchor_target_single_assigner(
assigner, current_anchors, gt_bboxes[gt_per_cls, :],
gt_bboxes_ignore, gt_labels[gt_per_cls], input_meta,
num_classes, sampling)
assigner, current_anchors, gt_per_cls_instance,
gt_instance_ignore, input_meta, num_classes, sampling)
else:
anchor_targets = self.anchor_target_single_assigner(
assigner, current_anchors, gt_bboxes, gt_bboxes_ignore,
gt_labels, input_meta, num_classes, sampling)
assigner, current_anchors, gt_instance_3d,
gt_instance_ignore, input_meta, num_classes, sampling)
(labels, label_weights, bbox_targets, bbox_weights,
dir_targets, dir_weights, pos_inds, neg_inds) = anchor_targets
......@@ -230,17 +232,16 @@ class AnchorTrainMixin(object):
total_pos_inds, total_neg_inds)
else:
return self.anchor_target_single_assigner(self.bbox_assigner,
anchors, gt_bboxes,
gt_bboxes_ignore,
gt_labels, input_meta,
num_classes, sampling)
anchors, gt_instance_3d,
gt_instance_ignore,
input_meta, num_classes,
sampling)
def anchor_target_single_assigner(self,
bbox_assigner,
anchors,
gt_bboxes,
gt_bboxes_ignore,
gt_labels,
gt_instance_3d,
gt_instance_ignore,
input_meta,
num_classes=1,
sampling=True):
......@@ -249,9 +250,8 @@ class AnchorTrainMixin(object):
Args:
bbox_assigner (BaseAssigner): assign positive and negative boxes.
anchors (torch.Tensor): Concatenated multi-level anchor.
gt_bboxes (:obj:`BaseInstance3DBoxes`): Gt bboxes.
gt_bboxes_ignore (torch.Tensor): Ignored gt bboxes.
gt_labels (torch.Tensor): Gt class labels.
gt_instance_3d (:obj:`InstanceData`): Gt bboxes.
gt_instance_ignore (torch.Tensor): Ignored gt bboxes.
input_meta (dict): Meta info of each image.
num_classes (int): The number of classes.
sampling (bool): Whether to sample anchors.
......@@ -267,13 +267,17 @@ class AnchorTrainMixin(object):
dir_weights = anchors.new_zeros((anchors.shape[0]), dtype=torch.float)
labels = anchors.new_zeros(num_valid_anchors, dtype=torch.long)
label_weights = anchors.new_zeros(num_valid_anchors, dtype=torch.float)
if len(gt_bboxes) > 0:
if not isinstance(gt_bboxes, torch.Tensor):
gt_bboxes = gt_bboxes.tensor.to(anchors.device)
assign_result = bbox_assigner.assign(anchors, gt_bboxes,
gt_bboxes_ignore, gt_labels)
sampling_result = self.bbox_sampler.sample(assign_result, anchors,
gt_bboxes)
if len(gt_instance_3d.bboxes_3d) > 0:
if not isinstance(gt_instance_3d.bboxes_3d, torch.Tensor):
gt_instance_3d.bboxes_3d = gt_instance_3d.bboxes_3d.tensor.to(
anchors.device)
pred_instance_3d = InstanceData(priors=anchors)
assign_result = bbox_assigner.assign(pred_instance_3d,
gt_instance_3d,
gt_instance_ignore)
sampling_result = self.bbox_sampler.sample(assign_result,
pred_instance_3d,
gt_instance_3d)
pos_inds = sampling_result.pos_inds
neg_inds = sampling_result.neg_inds
else:
......@@ -284,7 +288,7 @@ class AnchorTrainMixin(object):
anchors.new_zeros((anchors.shape[0], ), dtype=torch.bool) == 0,
as_tuple=False).squeeze(-1).unique()
if gt_labels is not None:
if gt_instance_3d.labels_3d is not None:
labels += num_classes
if len(pos_inds) > 0:
pos_bbox_targets = self.bbox_coder.encode(
......@@ -300,10 +304,10 @@ class AnchorTrainMixin(object):
dir_targets[pos_inds] = pos_dir_targets
dir_weights[pos_inds] = 1.0
if gt_labels is None:
if gt_instance_3d.labels_3d is None:
labels[pos_inds] = 1
else:
labels[pos_inds] = gt_labels[
labels[pos_inds] = gt_instance_3d.labels_3d[
sampling_result.pos_assigned_gt_inds]
if self.train_cfg.pos_weight <= 0:
label_weights[pos_inds] = 1.0
......
# Copyright (c) OpenMMLab. All rights reserved.
from os import path as osp
from typing import Dict, List, Optional, Union
import mmcv
import torch
from mmcv.parallel import DataContainer as DC
from mmcv.runner import auto_fp16
from mmengine.data import InstanceData
from torch.optim import Optimizer
from mmdet3d.core import Box3DMode, Coord3DMode, show_result
from mmdet3d.core import Det3DDataSample
from mmdet3d.registry import MODELS
from mmdet.core.utils import stack_batch
from mmdet.models.detectors import BaseDetector
@MODELS.register_module()
class Base3DDetector(BaseDetector):
"""Base class for detectors."""
"""Base class for 3D detectors.
def forward_test(self, points, img_metas, img=None, **kwargs):
Args:
preprocess_cfg (dict, optional): Model preprocessing config
for processing the input data. it usually includes
``to_rgb``, ``pad_size_divisor``, ``pad_value``,
``mean`` and ``std``. Default to None.
init_cfg (dict, optional): the config to control the
initialization. Default to None.
"""
def __init__(self,
preprocess_cfg: Optional[dict] = None,
init_cfg: Optional[dict] = None) -> None:
super(Base3DDetector, self).__init__(
preprocess_cfg=preprocess_cfg, init_cfg=init_cfg)
def forward_simple_test(self, batch_inputs_dict: Dict[List, torch.Tensor],
batch_data_samples: List[Det3DDataSample],
**kwargs) -> List[Det3DDataSample]:
"""
Args:
points (list[torch.Tensor]): the outer list indicates test-time
augmentations and inner torch.Tensor should have a shape NxC,
which contains all points in the batch.
img_metas (list[list[dict]]): the outer list indicates test-time
augs (multiscale, flip, etc.) and the inner list indicates
images in a batch
img (list[torch.Tensor], optional): the outer
list indicates test-time augmentations and inner
torch.Tensor should have a shape NxCxHxW, which contains
all images in the batch. Defaults to None.
batch_inputs_dict (dict): The model input dict which include
'points', 'img' keys.
- points (list[torch.Tensor]): Point cloud of each sample.
- imgs (torch.Tensor, optional): Image of each sample.
batch_data_samples (List[:obj:`DetDataSample`]): The Data
Samples. It usually includes information such as
`gt_instance_3d`, `gt_panoptic_seg_3d` and `gt_sem_seg_3d`.
Returns:
list(obj:`Det3DDataSample`): Detection results of the
input images. Each DetDataSample usually contains
``pred_instances_3d`` or ``pred_panoptic_seg_3d`` or
``pred_sem_seg_3d``.
"""
for var, name in [(points, 'points'), (img_metas, 'img_metas')]:
batch_size = len(batch_data_samples)
batch_input_metas = []
if batch_size != len(batch_inputs_dict['points']):
raise ValueError(
'num of augmentations ({}) != num of image meta ({})'.format(
len(batch_inputs_dict['points']), len(batch_input_metas)))
for batch_index in range(batch_size):
metainfo = batch_data_samples[batch_index].metainfo
batch_input_metas.append(metainfo)
for var, name in [(batch_inputs_dict['points'], 'points'),
(batch_input_metas, 'img_metas')]:
if not isinstance(var, list):
raise TypeError('{} must be a list, but got {}'.format(
name, type(var)))
num_augs = len(points)
if num_augs != len(img_metas):
raise ValueError(
'num of augmentations ({}) != num of image meta ({})'.format(
len(points), len(img_metas)))
if num_augs == 1:
img = [img] if img is None else img
return self.simple_test(points[0], img_metas[0], img[0], **kwargs)
if batch_size == 1:
return self.simple_test(
batch_inputs_dict, batch_input_metas, rescale=True, **kwargs)
else:
return self.aug_test(points, img_metas, img, **kwargs)
@auto_fp16(apply_to=('img', 'points'))
def forward(self, return_loss=True, **kwargs):
"""Calls either forward_train or forward_test depending on whether
return_loss=True.
Note this setting will change the expected inputs. When
`return_loss=True`, img and img_metas are single-nested (i.e.
torch.Tensor and list[dict]), and when `resturn_loss=False`, img and
img_metas should be double nested (i.e. list[torch.Tensor],
list[list[dict]]), with the outer list indicating test time
augmentations.
return self.aug_test(
batch_inputs_dict, batch_input_metas, rescale=True, **kwargs)
def forward(self,
data: List[dict],
optimizer: Optional[Union[Optimizer, dict]] = None,
return_loss: bool = False,
**kwargs):
"""The iteration step during training and testing. This method defines
an iteration step during training and testing, except for the back
propagation and optimizer updating during training, which are done in
an optimizer scheduler.
Args:
data (list[dict]): The output of dataloader.
optimizer (:obj:`torch.optim.Optimizer`, dict, Optional): The
optimizer of runner. This argument is unused and reserved.
Default to None.
return_loss (bool): Whether to return loss. In general,
it will be set to True during training and False
during testing. Default to False.
Returns:
during training
dict: It should contain at least 3 keys: ``loss``,
``log_vars``, ``num_samples``.
- ``loss`` is a tensor for back propagation, which can be a
weighted sum of multiple losses.
- ``log_vars`` contains all the variables to be sent to the
logger.
- ``num_samples`` indicates the batch size (when the model
is DDP, it means the batch size on each GPU), which is
used for averaging the logs.
during testing
list(obj:`Det3DDataSample`): Detection results of the
input samples. Each DetDataSample usually contains
``pred_instances_3d`` or ``pred_panoptic_seg_3d`` or
``pred_sem_seg_3d``.
"""
batch_inputs_dict, batch_data_samples = self.preprocess_data(data)
if return_loss:
return self.forward_train(**kwargs)
else:
return self.forward_test(**kwargs)
losses = self.forward_train(batch_inputs_dict, batch_data_samples,
**kwargs)
loss, log_vars = self._parse_losses(losses)
def show_results(self, data, result, out_dir, show=False, score_thr=None):
"""Results visualization.
outputs = dict(
loss=loss,
log_vars=log_vars,
num_samples=len(batch_data_samples))
return outputs
else:
return self.forward_simple_test(batch_inputs_dict,
batch_data_samples, **kwargs)
def preprocess_data(self, data: List[dict]) -> tuple:
""" Process input data during training and simple testing phases.
Args:
data (list[dict]): Input points and the information of the sample.
result (list[dict]): Prediction results.
out_dir (str): Output directory of visualization result.
show (bool, optional): Determines whether you are
going to show result by open3d.
Defaults to False.
score_thr (float, optional): Score threshold of bounding boxes.
Default to None.
data (list[dict]): The data to be processed, which
comes from dataloader.
Returns:
tuple: It should contain 2 item.
- batch_inputs_dict (dict): The model input dict which include
'points', 'img' keys.
- points (list[torch.Tensor]): Point cloud of each sample.
- imgs (torch.Tensor, optional): Image of each sample.
- batch_data_samples (list[:obj:`Det3DDataSample`]): The Data
Samples. It usually includes information such as
`gt_instance_3d` , `gt_instances`.
"""
for batch_id in range(len(result)):
if isinstance(data['points'][0], DC):
points = data['points'][0]._data[0][batch_id].numpy()
elif mmcv.is_list_of(data['points'][0], torch.Tensor):
points = data['points'][0][batch_id]
batch_data_samples = [
data_['data_sample'].to(self.device) for data_ in data
]
if 'points' in data[0]['inputs'].keys():
points = [
data_['inputs']['points'].to(self.device) for data_ in data
]
else:
ValueError(f"Unsupported data type {type(data['points'][0])} "
f'for visualization!')
if isinstance(data['img_metas'][0], DC):
pts_filename = data['img_metas'][0]._data[0][batch_id][
'pts_filename']
box_mode_3d = data['img_metas'][0]._data[0][batch_id][
'box_mode_3d']
elif mmcv.is_list_of(data['img_metas'][0], dict):
pts_filename = data['img_metas'][0][batch_id]['pts_filename']
box_mode_3d = data['img_metas'][0][batch_id]['box_mode_3d']
raise KeyError(
"Model input dict needs to include the 'points' key.")
if 'img' in data[0]['inputs'].keys():
imgs = [data_['inputs']['img'].to(self.device) for data_ in data]
else:
ValueError(
f"Unsupported data type {type(data['img_metas'][0])} "
f'for visualization!')
file_name = osp.split(pts_filename)[-1].split('.')[0]
assert out_dir is not None, 'Expect out_dir, got none.'
pred_bboxes = result[batch_id]['boxes_3d']
pred_labels = result[batch_id]['labels_3d']
if score_thr is not None:
mask = result[batch_id]['scores_3d'] > score_thr
pred_bboxes = pred_bboxes[mask]
pred_labels = pred_labels[mask]
# for now we convert points and bbox into depth mode
if (box_mode_3d == Box3DMode.CAM) or (box_mode_3d
== Box3DMode.LIDAR):
points = Coord3DMode.convert_point(points, Coord3DMode.LIDAR,
Coord3DMode.DEPTH)
pred_bboxes = Box3DMode.convert(pred_bboxes, box_mode_3d,
Box3DMode.DEPTH)
elif box_mode_3d != Box3DMode.DEPTH:
ValueError(
f'Unsupported box_mode_3d {box_mode_3d} for conversion!')
pred_bboxes = pred_bboxes.tensor.cpu().numpy()
show_result(
points,
None,
pred_bboxes,
out_dir,
file_name,
show=show,
pred_labels=pred_labels)
imgs = None
if self.preprocess_cfg is None:
batch_inputs_dict = {
'points': points,
'imgs': stack_batch(imgs).float() if imgs is not None else None
}
return batch_inputs_dict, batch_data_samples
if self.to_rgb and imgs[0].size(0) == 3:
imgs = [_img[[2, 1, 0], ...] for _img in imgs]
imgs = [(_img - self.pixel_mean) / self.pixel_std for _img in imgs]
batch_img = stack_batch(imgs, self.pad_size_divisor, self.pad_value)
batch_inputs_dict = {'points': points, 'imgs': batch_img}
return batch_inputs_dict, batch_data_samples
def postprocess_result(self, results_list: List[InstanceData]) \
-> List[Det3DDataSample]:
""" Convert results list to `Det3DDataSample`.
Args:
results_list (list[:obj:`InstanceData`]): Detection results of
each sample.
Returns:
list[:obj:`Det3DDataSample`]: Detection results of the
input sample. Each Det3DDataSample usually contain
'pred_instances_3d'. And the ``pred_instances_3dd`` usually
contains following keys.
- scores_3d (Tensor): Classification scores, has a shape
(num_instances, )
- labels_3d (Tensor): Labels of bboxes, has a shape
(num_instances, ).
- bboxes_3d (:obj:`BaseInstance3DBoxes`): Prediction of bboxes,
contains a tensor with shape (num_instances, 7).
"""
for i in range(len(results_list)):
result = Det3DDataSample()
result.pred_instances_3d = results_list[i]
results_list[i] = result
return results_list
def show_results(self, data, result, out_dir, show=False, score_thr=None):
# TODO
pass
# Copyright (c) OpenMMLab. All rights reserved.
from typing import List, Optional
import torch
from mmdet3d.registry import MODELS
from .base import Base3DDetector
......@@ -23,13 +27,15 @@ class SingleStage3DDetector(Base3DDetector):
def __init__(self,
backbone,
neck=None,
bbox_head=None,
train_cfg=None,
test_cfg=None,
init_cfg=None,
pretrained=None):
super(SingleStage3DDetector, self).__init__(init_cfg)
neck: Optional[dict] = None,
bbox_head: Optional[dict] = None,
train_cfg: Optional[dict] = None,
test_cfg: Optional[dict] = None,
preprocess_cfg: Optional[dict] = None,
init_cfg: Optional[dict] = None,
pretrained: Optional[str] = None) -> None:
super(SingleStage3DDetector, self).__init__(
preprocess_cfg=preprocess_cfg, init_cfg=init_cfg)
self.backbone = MODELS.build(backbone)
if neck is not None:
self.neck = MODELS.build(neck)
......@@ -39,12 +45,12 @@ class SingleStage3DDetector(Base3DDetector):
self.train_cfg = train_cfg
self.test_cfg = test_cfg
def forward_dummy(self, points):
def forward_dummy(self, batch_inputs: dict) -> tuple:
"""Used for computing network flops.
See `mmdetection/tools/analysis_tools/get_flops.py`
"""
x = self.extract_feat(points)
x = self.extract_feat(batch_inputs['points'])
try:
sample_mod = self.train_cfg.sample_mod
outs = self.bbox_head(x, sample_mod)
......@@ -52,20 +58,20 @@ class SingleStage3DDetector(Base3DDetector):
outs = self.bbox_head(x)
return outs
def extract_feat(self, points, img_metas=None):
def extract_feat(self, points: List[torch.Tensor]) -> list:
"""Directly extract features from the backbone+neck.
Args:
points (torch.Tensor): Input points.
points (List[torch.Tensor]): Input points.
"""
x = self.backbone(points)
x = self.backbone(points[0])
if self.with_neck:
x = self.neck(x)
return x
def extract_feats(self, points, img_metas):
def extract_feats(self, batch_inputs_dict: dict) -> list:
"""Extract features of multiple samples."""
return [
self.extract_feat(pts, img_meta)
for pts, img_meta in zip(points, img_metas)
self.extract_feat([points])
for points in batch_inputs_dict['points']
]
# Copyright (c) OpenMMLab. All rights reserved.
from typing import Dict, List, Optional
import torch
from mmcv.ops import Voxelization
from mmcv.runner import force_fp32
from torch.nn import functional as F
from mmdet3d.core import bbox3d2result, merge_aug_bboxes_3d
from mmdet3d.core import Det3DDataSample
from mmdet3d.registry import MODELS
from .single_stage import SingleStage3DDetector
......@@ -14,16 +16,16 @@ class VoxelNet(SingleStage3DDetector):
r"""`VoxelNet <https://arxiv.org/abs/1711.06396>`_ for 3D detection."""
def __init__(self,
voxel_layer,
voxel_encoder,
middle_encoder,
backbone,
neck=None,
bbox_head=None,
train_cfg=None,
test_cfg=None,
init_cfg=None,
pretrained=None):
voxel_layer: dict,
voxel_encoder: dict,
middle_encoder: dict,
backbone: dict,
neck: Optional[dict] = None,
bbox_head: Optional[dict] = None,
train_cfg: Optional[dict] = None,
test_cfg: Optional[dict] = None,
init_cfg: Optional[dict] = None,
pretrained: Optional[str] = None) -> None:
super(VoxelNet, self).__init__(
backbone=backbone,
neck=neck,
......@@ -36,7 +38,7 @@ class VoxelNet(SingleStage3DDetector):
self.voxel_encoder = MODELS.build(voxel_encoder)
self.middle_encoder = MODELS.build(middle_encoder)
def extract_feat(self, points, img_metas=None):
def extract_feat(self, points: List[torch.Tensor]) -> list:
"""Extract features from points."""
voxels, num_points, coors = self.voxelize(points)
voxel_features = self.voxel_encoder(voxels, num_points, coors)
......@@ -49,7 +51,7 @@ class VoxelNet(SingleStage3DDetector):
@torch.no_grad()
@force_fp32()
def voxelize(self, points):
def voxelize(self, points: List[torch.Tensor]) -> tuple:
"""Apply hard voxelization to points."""
voxels, coors, num_points = [], [], []
for res in points:
......@@ -66,64 +68,75 @@ class VoxelNet(SingleStage3DDetector):
coors_batch = torch.cat(coors_batch, dim=0)
return voxels, num_points, coors_batch
def forward_train(self,
points,
img_metas,
gt_bboxes_3d,
gt_labels_3d,
gt_bboxes_ignore=None):
"""Training forward function.
def forward_train(self, batch_inputs_dict: Dict[list, torch.Tensor],
batch_data_samples: List[Det3DDataSample],
**kwargs) -> dict:
"""
Args:
points (list[torch.Tensor]): Point cloud of each sample.
img_metas (list[dict]): Meta information of each sample
gt_bboxes_3d (list[:obj:`BaseInstance3DBoxes`]): Ground truth
boxes for each sample.
gt_labels_3d (list[torch.Tensor]): Ground truth labels for
boxes of each sampole
gt_bboxes_ignore (list[torch.Tensor], optional): Ground truth
boxes to be ignored. Defaults to None.
batch_inputs_dict (dict): The model input dict. It should contain
``points`` and ``img`` keys.
- points (list[torch.Tensor]): Point cloud of each sample.
- imgs (torch.Tensor, optional): Image of each sample.
batch_data_samples (list[:obj:`Det3DDataSample`]): The batch
data samples. It usually includes information such
as `gt_instance_3d` or `gt_panoptic_seg_3d` or `gt_sem_seg_3d`.
Returns:
dict: Losses of each branch.
dict[str, Tensor]: A dictionary of loss components.
"""
x = self.extract_feat(points, img_metas)
outs = self.bbox_head(x)
loss_inputs = outs + (gt_bboxes_3d, gt_labels_3d, img_metas)
losses = self.bbox_head.loss(
*loss_inputs, gt_bboxes_ignore=gt_bboxes_ignore)
x = self.extract_feat(batch_inputs_dict['points'])
losses = self.bbox_head.forward_train(x, batch_data_samples, **kwargs)
return losses
def simple_test(self, points, img_metas, imgs=None, rescale=False):
"""Test function without augmentaiton."""
x = self.extract_feat(points, img_metas)
outs = self.bbox_head(x)
bbox_list = self.bbox_head.get_bboxes(
*outs, img_metas, rescale=rescale)
bbox_results = [
bbox3d2result(bboxes, scores, labels)
for bboxes, scores, labels in bbox_list
]
return bbox_results
def aug_test(self, points, img_metas, imgs=None, rescale=False):
def simple_test(self,
batch_inputs_dict: Dict[list, torch.Tensor],
batch_input_metas: List[dict],
rescale: bool = False) -> list:
"""Test function without test-time augmentation.
Args:
batch_inputs_dict (dict): The model input dict. It should contain
``points`` and ``img`` keys.
- points (list[torch.Tensor]): Point cloud of single
sample.
- imgs (torch.Tensor, optional): Image of single sample.
batch_input_metas (list[dict]): List of input information.
rescale (bool, optional): Whether to rescale the results.
Defaults to False.
Returns:
list[:obj:`Det3DDataSample`]: Detection results of the \
inputs. Each Det3DDataSample usually contain \
'pred_instances_3d'. And the ``pred_instances_3d`` usually \
contains following keys.
- scores_3d (Tensor): Classification scores, has a shape
(num_instances, )
- labels_3d (Tensor): Labels of bboxes, has a shape
(num_instances, ).
- bboxes_3d (:obj:`BaseInstance3DBoxes`): Prediction of bboxes,
contains a tensor with shape (num_instances, 7).
"""
x = self.extract_feat(batch_inputs_dict['points'])
bboxes_list = self.bbox_head.simple_test(
x, batch_input_metas, rescale=rescale)
# connvert to Det3DDataSample
results_list = self.postprocess_result(bboxes_list)
return results_list
def aug_test(self,
aug_batch_inputs_dict: Dict[list, torch.Tensor],
aug_batch_input_metas: List[dict],
rescale: bool = False) -> list:
"""Test function with augmentaiton."""
feats = self.extract_feats(points, img_metas)
# only support aug_test for one sample
aug_bboxes = []
for x, img_meta in zip(feats, img_metas):
outs = self.bbox_head(x)
bbox_list = self.bbox_head.get_bboxes(
*outs, img_meta, rescale=rescale)
bbox_list = [
dict(boxes_3d=bboxes, scores_3d=scores, labels_3d=labels)
for bboxes, scores, labels in bbox_list
]
aug_bboxes.append(bbox_list[0])
# after merging, bboxes will be rescaled to the original image size
merged_bboxes = merge_aug_bboxes_3d(aug_bboxes, img_metas,
self.bbox_head.test_cfg)
return [merged_bboxes]
# TODO Refactor this after mmdet update
feats = self.extract_feats(aug_batch_inputs_dict)
aug_bboxes = self.bbox_head.aug_test(
feats, aug_batch_input_metas, rescale=rescale)
return aug_bboxes
# Copyright (c) OpenMMLab. All rights reserved.
import copy
import random
from os.path import dirname, exists, join
import numpy as np
import pytest
import torch
from mmengine.data import InstanceData
from mmdet3d.core import Det3DDataSample
from mmdet3d.core.bbox import LiDARInstance3DBoxes
from mmdet3d.registry import MODELS
def _setup_seed(seed):
torch.manual_seed(seed)
torch.cuda.manual_seed_all(seed)
np.random.seed(seed)
random.seed(seed)
torch.backends.cudnn.deterministic = True
def _get_config_directory():
"""Find the predefined detector config directory."""
try:
# Assume we are running in the source mmdetection3d repo
repo_dpath = dirname(dirname(dirname(__file__)))
except NameError:
# For IPython development when this __file__ is not defined
import mmdet3d
repo_dpath = dirname(dirname(mmdet3d.__file__))
config_dpath = join(repo_dpath, 'configs')
if not exists(config_dpath):
raise Exception('Cannot find config path')
return config_dpath
def _get_config_module(fname):
"""Load a configuration as a python module."""
from mmcv import Config
config_dpath = _get_config_directory()
config_fpath = join(config_dpath, fname)
config_mod = Config.fromfile(config_fpath)
return config_mod
def _get_model_cfg(fname):
"""Grab configs necessary to create a model.
These are deep copied to allow for safe modification of parameters without
influencing other tests.
"""
config = _get_config_module(fname)
model = copy.deepcopy(config.model)
return model
def _get_detector_cfg(fname):
"""Grab configs necessary to create a detector.
These are deep copied to allow for safe modification of parameters without
influencing other tests.
"""
import mmcv
config = _get_config_module(fname)
model = copy.deepcopy(config.model)
train_cfg = mmcv.Config(copy.deepcopy(config.model.train_cfg))
test_cfg = mmcv.Config(copy.deepcopy(config.model.test_cfg))
model.update(train_cfg=train_cfg)
model.update(test_cfg=test_cfg)
return model
def test_voxel_net():
import mmdet3d.models
assert hasattr(mmdet3d.models, 'VoxelNet')
if not torch.cuda.is_available():
pytest.skip('test requires GPU and torch+cuda')
_setup_seed(0)
voxel_net_cfg = _get_detector_cfg(
'pointpillars/hv_pointpillars_secfpn_6x8_160e_kitti-3d-3class.py')
model = MODELS.build(voxel_net_cfg).cuda()
input_dict0 = dict(points=torch.rand([2010, 4], device='cuda'))
input_dict1 = dict(points=torch.rand([2020, 4], device='cuda'))
gt_instance_3d_0 = InstanceData()
gt_instance_3d_0.bboxes_3d = LiDARInstance3DBoxes(
torch.rand([20, 7], device='cuda'))
gt_instance_3d_0.labels_3d = torch.randint(0, 3, [20], device='cuda')
data_sample_0 = Det3DDataSample(
metainfo=dict(box_type_3d=LiDARInstance3DBoxes))
data_sample_0.gt_instances_3d = gt_instance_3d_0
gt_instance_3d_1 = InstanceData()
gt_instance_3d_1.bboxes_3d = LiDARInstance3DBoxes(
torch.rand([50, 7], device='cuda'))
gt_instance_3d_1.labels_3d = torch.randint(0, 3, [50], device='cuda')
data_sample_1 = Det3DDataSample(
metainfo=dict(box_type_3d=LiDARInstance3DBoxes))
data_sample_1.gt_instances_3d = gt_instance_3d_1
data = [dict(inputs=input_dict0, data_sample=data_sample_0)]
# test simple_test
with torch.no_grad():
results = model.forward(data, return_loss=False)
bboxes_3d = results[0].pred_instances_3d['bboxes_3d']
scores_3d = results[0].pred_instances_3d['scores_3d']
labels_3d = results[0].pred_instances_3d['labels_3d']
assert bboxes_3d.tensor.shape == (50, 7)
assert scores_3d.shape == torch.Size([50])
assert labels_3d.shape == torch.Size([50])
# test forward_train
data = [
dict(inputs=input_dict0, data_sample=data_sample_0),
dict(inputs=input_dict1, data_sample=data_sample_1)
]
losses = model.forward(data, return_loss=True)
assert losses['log_vars']['loss_cls'] >= 0
assert losses['log_vars']['loss_bbox'] >= 0
assert losses['log_vars']['loss_dir'] >= 0
assert losses['log_vars']['loss'] >= 0
# test_aug_test
metainfo = {
'pcd_scale_factor': 1,
'pcd_horizontal_flip': 1,
'pcd_vertical_flip': 1,
'box_type_3d': LiDARInstance3DBoxes
}
data_sample_0.set_metainfo(metainfo)
data_sample_1.set_metainfo(metainfo)
data = [
dict(inputs=input_dict0, data_sample=data_sample_0),
dict(inputs=input_dict1, data_sample=data_sample_1)
]
results = model.forward(data, return_loss=False)
Markdown is supported
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment