Commit 7c6810e3 authored by VVsssssk's avatar VVsssssk Committed by ChaimZhu
Browse files

[Refactor]Refactor pointpillars model interface

parent 49a1e555
...@@ -48,34 +48,36 @@ model = dict( ...@@ -48,34 +48,36 @@ model = dict(
diff_rad_by_sin=True, diff_rad_by_sin=True,
bbox_coder=dict(type='DeltaXYZWLHRBBoxCoder'), bbox_coder=dict(type='DeltaXYZWLHRBBoxCoder'),
loss_cls=dict( loss_cls=dict(
type='FocalLoss', type='mmdet.FocalLoss',
use_sigmoid=True, use_sigmoid=True,
gamma=2.0, gamma=2.0,
alpha=0.25, alpha=0.25,
loss_weight=1.0), loss_weight=1.0),
loss_bbox=dict(type='SmoothL1Loss', beta=1.0 / 9.0, loss_weight=2.0), loss_bbox=dict(
type='mmdet.SmoothL1Loss', beta=1.0 / 9.0, loss_weight=2.0),
loss_dir=dict( loss_dir=dict(
type='CrossEntropyLoss', use_sigmoid=False, loss_weight=0.2)), type='mmdet.CrossEntropyLoss', use_sigmoid=False,
loss_weight=0.2)),
# model training and testing settings # model training and testing settings
train_cfg=dict( train_cfg=dict(
assigner=[ assigner=[
dict( # for Pedestrian dict( # for Pedestrian
type='MaxIoUAssigner', type='Max3DIoUAssigner',
iou_calculator=dict(type='BboxOverlapsNearest3D'), iou_calculator=dict(type='mmdet3d.BboxOverlapsNearest3D'),
pos_iou_thr=0.5, pos_iou_thr=0.5,
neg_iou_thr=0.35, neg_iou_thr=0.35,
min_pos_iou=0.35, min_pos_iou=0.35,
ignore_iof_thr=-1), ignore_iof_thr=-1),
dict( # for Cyclist dict( # for Cyclist
type='MaxIoUAssigner', type='Max3DIoUAssigner',
iou_calculator=dict(type='BboxOverlapsNearest3D'), iou_calculator=dict(type='mmdet3d.BboxOverlapsNearest3D'),
pos_iou_thr=0.5, pos_iou_thr=0.5,
neg_iou_thr=0.35, neg_iou_thr=0.35,
min_pos_iou=0.35, min_pos_iou=0.35,
ignore_iof_thr=-1), ignore_iof_thr=-1),
dict( # for Car dict( # for Car
type='MaxIoUAssigner', type='Max3DIoUAssigner',
iou_calculator=dict(type='BboxOverlapsNearest3D'), iou_calculator=dict(type='mmdet3d.BboxOverlapsNearest3D'),
pos_iou_thr=0.6, pos_iou_thr=0.6,
neg_iou_thr=0.45, neg_iou_thr=0.45,
min_pos_iou=0.45, min_pos_iou=0.45,
......
...@@ -17,8 +17,8 @@ model = dict( ...@@ -17,8 +17,8 @@ model = dict(
train_cfg=dict( train_cfg=dict(
_delete_=True, _delete_=True,
assigner=dict( assigner=dict(
type='MaxIoUAssigner', type='Max3DIoUAssigner',
iou_calculator=dict(type='BboxOverlapsNearest3D'), iou_calculator=dict(type='mmdet3d.BboxOverlapsNearest3D'),
pos_iou_thr=0.6, pos_iou_thr=0.6,
neg_iou_thr=0.45, neg_iou_thr=0.45,
min_pos_iou=0.45, min_pos_iou=0.45,
......
# Copyright (c) OpenMMLab. All rights reserved. # Copyright (c) OpenMMLab. All rights reserved.
from mmdet.core.bbox import AssignResult, BaseAssigner, MaxIoUAssigner from mmdet.core.bbox import AssignResult, BaseAssigner
from .max_3d_iou_assigner import MaxIoUAssigner
__all__ = ['BaseAssigner', 'MaxIoUAssigner', 'AssignResult'] __all__ = ['BaseAssigner', 'MaxIoUAssigner', 'AssignResult']
# Copyright (c) OpenMMLab. All rights reserved.
from typing import Optional, Union
from mmengine.data import InstanceData
from mmdet3d.registry import TASK_UTILS
from mmdet.core.bbox.assigners import MaxIoUAssigner
from ..assigners import AssignResult
@TASK_UTILS.register_module()
class Max3DIoUAssigner(MaxIoUAssigner):
# TODO: This is a temporary box assigner.
"""Assign a corresponding gt bbox or background to each bbox.
Each proposals will be assigned with `-1`, or a semi-positive integer
indicating the ground truth index.
- -1: negative sample, no assigned gt
- semi-positive integer: positive sample, index (0-based) of assigned gt
Args:
pos_iou_thr (float): IoU threshold for positive bboxes.
neg_iou_thr (float or tuple): IoU threshold for negative bboxes.
min_pos_iou (float): Minimum iou for a bbox to be considered as a
positive bbox. Positive samples can have smaller IoU than
pos_iou_thr due to the 4th step (assign max IoU sample to each gt).
`min_pos_iou` is set to avoid assigning bboxes that have extremely
small iou with GT as positive samples.
gt_max_assign_all (bool): Whether to assign all bboxes with the same
highest overlap with some gt to that gt.
ignore_iof_thr (float): IoF threshold for ignoring bboxes (if
`gt_bboxes_ignore` is specified). Negative values mean not
ignoring any bboxes.
ignore_wrt_candidates (bool): Whether to compute the iof between
`bboxes` and `gt_bboxes_ignore`, or the contrary.
match_low_quality (bool): Whether to allow low quality matches. This is
usually allowed for RPN and single stage detectors, but not allowed
in the second stage. Details are demonstrated in Step 4.
gpu_assign_thr (int): The upper bound of the number of GT for GPU
assign. When the number of gt is above this threshold, will assign
on CPU device. Negative values mean not assign on CPU.
iou_calculator (dict): Config of overlaps Calculator.
"""
def __init__(self,
pos_iou_thr: float,
neg_iou_thr: Union[float, tuple],
min_pos_iou: float = .0,
gt_max_assign_all: bool = True,
ignore_iof_thr: float = -1,
ignore_wrt_candidates: bool = True,
match_low_quality: bool = True,
gpu_assign_thr: float = -1,
iou_calculator: dict = dict(type='BboxOverlaps2D')):
self.pos_iou_thr = pos_iou_thr
self.neg_iou_thr = neg_iou_thr
self.min_pos_iou = min_pos_iou
self.gt_max_assign_all = gt_max_assign_all
self.ignore_iof_thr = ignore_iof_thr
self.ignore_wrt_candidates = ignore_wrt_candidates
self.gpu_assign_thr = gpu_assign_thr
self.match_low_quality = match_low_quality
self.iou_calculator = TASK_UTILS.build(iou_calculator)
def assign(self,
pred_instances: InstanceData,
gt_instances: InstanceData,
gt_instances_ignore: Optional[InstanceData] = None,
**kwargs) -> AssignResult:
"""Assign gt to bboxes.
This method assign a gt bbox to every bbox (proposal/anchor), each bbox
will be assigned with -1, or a semi-positive number. -1 means negative
sample, semi-positive number is the index (0-based) of assigned gt.
The assignment is done in following steps, the order matters.
1. assign every bbox to the background
2. assign proposals whose iou with all gts < neg_iou_thr to 0
3. for each bbox, if the iou with its nearest gt >= pos_iou_thr,
assign it to that bbox
4. for each gt bbox, assign its nearest proposals (may be more than
one) to itself
Args:
pred_instances (:obj:`InstanceData`): Instances of model
predictions. It includes ``priors``, and the priors can
be anchors or points, or the bboxes predicted by the
previous stage, has shape (n, 4). The bboxes predicted by
the current model or stage will be named ``bboxes``,
``labels``, and ``scores``, the same as the ``InstanceData``
in other places.
gt_instances (:obj:`InstanceData`): Ground truth of instance
annotations. It usually includes ``bboxes``, with shape (k, 4),
and ``labels``, with shape (k, ).
gt_instances_ignore (:obj:`InstanceData`, optional): Instances
to be ignored during training. It includes ``bboxes``
attribute data that is ignored during training and testing.
Defaults to None.
Returns:
:obj:`AssignResult`: The assign result.
Example:
>>> from mmengine.data import InstanceData
>>> self = MaxIoUAssigner(0.5, 0.5)
>>> pred_instances = InstanceData()
>>> pred_instances.priors = torch.Tensor([[0, 0, 10, 10],
... [10, 10, 20, 20]])
>>> gt_instances = InstanceData()
>>> gt_instances.bboxes = torch.Tensor([[0, 0, 10, 9]])
>>> gt_instances.labels = torch.Tensor([0])
>>> assign_result = self.assign(pred_instances, gt_instances)
>>> expected_gt_inds = torch.LongTensor([1, 0])
>>> assert torch.all(assign_result.gt_inds == expected_gt_inds)
"""
gt_bboxes = gt_instances.bboxes_3d
priors = pred_instances.priors
gt_labels = gt_instances.labels_3d
if gt_instances_ignore is not None:
gt_bboxes_ignore = gt_instances_ignore.bboxes_3d
else:
gt_bboxes_ignore = None
assign_on_cpu = True if (self.gpu_assign_thr > 0) and (
gt_bboxes.shape[0] > self.gpu_assign_thr) else False
# compute overlap and assign gt on CPU when number of GT is large
if assign_on_cpu:
device = priors.device
priors = priors.cpu()
gt_bboxes = gt_bboxes.cpu()
gt_labels = gt_labels.cpu()
if gt_bboxes_ignore is not None:
gt_bboxes_ignore = gt_bboxes_ignore.cpu()
overlaps = self.iou_calculator(gt_bboxes, priors)
if (self.ignore_iof_thr > 0 and gt_bboxes_ignore is not None
and gt_bboxes_ignore.numel() > 0 and priors.numel() > 0):
if self.ignore_wrt_candidates:
ignore_overlaps = self.iou_calculator(
priors, gt_bboxes_ignore, mode='iof')
ignore_max_overlaps, _ = ignore_overlaps.max(dim=1)
else:
ignore_overlaps = self.iou_calculator(
gt_bboxes_ignore, priors, mode='iof')
ignore_max_overlaps, _ = ignore_overlaps.max(dim=0)
overlaps[:, ignore_max_overlaps > self.ignore_iof_thr] = -1
assign_result = self.assign_wrt_overlaps(overlaps, gt_labels)
if assign_on_cpu:
assign_result.gt_inds = assign_result.gt_inds.to(device)
assign_result.max_overlaps = assign_result.max_overlaps.to(device)
if assign_result.labels is not None:
assign_result.labels = assign_result.labels.to(device)
return assign_result
...@@ -2,9 +2,9 @@ ...@@ -2,9 +2,9 @@
from mmdet.core.bbox.samplers import (BaseSampler, CombinedSampler, from mmdet.core.bbox.samplers import (BaseSampler, CombinedSampler,
InstanceBalancedPosSampler, InstanceBalancedPosSampler,
IoUBalancedNegSampler, OHEMSampler, IoUBalancedNegSampler, OHEMSampler,
PseudoSampler, RandomSampler, RandomSampler, SamplingResult)
SamplingResult)
from .iou_neg_piecewise_sampler import IoUNegPiecewiseSampler from .iou_neg_piecewise_sampler import IoUNegPiecewiseSampler
from .pseudosample import PseudoSampler
__all__ = [ __all__ = [
'BaseSampler', 'PseudoSampler', 'RandomSampler', 'BaseSampler', 'PseudoSampler', 'RandomSampler',
......
# Copyright (c) OpenMMLab. All rights reserved.
import torch
from mmengine.data import InstanceData
from mmdet3d.registry import TASK_UTILS
from mmdet.core.bbox.assigners import AssignResult
from ..samplers import BaseSampler, SamplingResult
@TASK_UTILS.register_module()
class PseudoSampler(BaseSampler):
"""A pseudo sampler that does not do sampling actually."""
# TODO: This is a temporary pseudo sampler.
def __init__(self, **kwargs):
pass
def _sample_pos(self, **kwargs):
"""Sample positive samples."""
raise NotImplementedError
def _sample_neg(self, **kwargs):
"""Sample negative samples."""
raise NotImplementedError
def sample(self, assign_result: AssignResult, pred_instances: InstanceData,
gt_instances: InstanceData, *args, **kwargs):
"""Directly returns the positive and negative indices of samples.
Args:
assign_result (:obj:`AssignResult`): Bbox assigning results.
pred_instances (:obj:`InstaceData`): Instances of model
predictions. It includes ``priors``, and the priors can
be anchors, points, or bboxes predicted by the model,
shape(n, 4).
gt_instances (:obj:`InstaceData`): Ground truth of instance
annotations. It usually includes ``bboxes`` and ``labels``
attributes.
Returns:
:obj:`SamplingResult`: sampler results
"""
gt_bboxes = gt_instances.bboxes_3d
priors = pred_instances.priors
pos_inds = torch.nonzero(
assign_result.gt_inds > 0, as_tuple=False).squeeze(-1).unique()
neg_inds = torch.nonzero(
assign_result.gt_inds == 0, as_tuple=False).squeeze(-1).unique()
gt_flags = priors.new_zeros(priors.shape[0], dtype=torch.uint8)
sampling_result = SamplingResult(
pos_inds=pos_inds,
neg_inds=neg_inds,
priors=priors,
gt_bboxes=gt_bboxes,
assign_result=assign_result,
gt_flags=gt_flags,
avg_factor_with_neg=False)
return sampling_result
...@@ -5,14 +5,14 @@ from mmdet3d.core.post_processing import nms_bev, nms_normal_bev ...@@ -5,14 +5,14 @@ from mmdet3d.core.post_processing import nms_bev, nms_normal_bev
from ..bbox import bbox3d2result, bbox3d_mapping_back, xywhr2xyxyr from ..bbox import bbox3d2result, bbox3d_mapping_back, xywhr2xyxyr
def merge_aug_bboxes_3d(aug_results, img_metas, test_cfg): def merge_aug_bboxes_3d(aug_results, aug_batch_input_metas, test_cfg):
"""Merge augmented detection 3D bboxes and scores. """Merge augmented detection 3D bboxes and scores.
Args: Args:
aug_results (list[dict]): The dict of detection results. aug_results (list[dict]): The dict of detection results.
The dict contains the following keys The dict contains the following keys
- boxes_3d (:obj:`BaseInstance3DBoxes`): Detection bbox. - bboxes_3d (:obj:`BaseInstance3DBoxes`): Detection bbox.
- scores_3d (torch.Tensor): Detection scores. - scores_3d (torch.Tensor): Detection scores.
- labels_3d (torch.Tensor): Predicted box labels. - labels_3d (torch.Tensor): Predicted box labels.
img_metas (list[dict]): Meta information of each sample. img_metas (list[dict]): Meta information of each sample.
...@@ -21,26 +21,27 @@ def merge_aug_bboxes_3d(aug_results, img_metas, test_cfg): ...@@ -21,26 +21,27 @@ def merge_aug_bboxes_3d(aug_results, img_metas, test_cfg):
Returns: Returns:
dict: Bounding boxes results in cpu mode, containing merged results. dict: Bounding boxes results in cpu mode, containing merged results.
- boxes_3d (:obj:`BaseInstance3DBoxes`): Merged detection bbox. - bboxes_3d (:obj:`BaseInstance3DBoxes`): Merged detection bbox.
- scores_3d (torch.Tensor): Merged detection scores. - scores_3d (torch.Tensor): Merged detection scores.
- labels_3d (torch.Tensor): Merged predicted box labels. - labels_3d (torch.Tensor): Merged predicted box labels.
""" """
assert len(aug_results) == len(img_metas), \ assert len(aug_results) == len(aug_batch_input_metas), \
'"aug_results" should have the same length as "img_metas", got len(' \ '"aug_results" should have the same length as "img_metas", got len(' \
f'aug_results)={len(aug_results)} and len(img_metas)={len(img_metas)}' f'aug_results)={len(aug_results)} and ' \
f'len(img_metas)={len(aug_batch_input_metas)}'
recovered_bboxes = [] recovered_bboxes = []
recovered_scores = [] recovered_scores = []
recovered_labels = [] recovered_labels = []
for bboxes, img_info in zip(aug_results, img_metas): for bboxes, input_info in zip(aug_results, aug_batch_input_metas):
scale_factor = img_info[0]['pcd_scale_factor'] scale_factor = input_info['pcd_scale_factor']
pcd_horizontal_flip = img_info[0]['pcd_horizontal_flip'] pcd_horizontal_flip = input_info['pcd_horizontal_flip']
pcd_vertical_flip = img_info[0]['pcd_vertical_flip'] pcd_vertical_flip = input_info['pcd_vertical_flip']
recovered_scores.append(bboxes['scores_3d']) recovered_scores.append(bboxes['scores_3d'])
recovered_labels.append(bboxes['labels_3d']) recovered_labels.append(bboxes['labels_3d'])
bboxes = bbox3d_mapping_back(bboxes['boxes_3d'], scale_factor, bboxes = bbox3d_mapping_back(bboxes['bboxes_3d'], scale_factor,
pcd_horizontal_flip, pcd_vertical_flip) pcd_horizontal_flip, pcd_vertical_flip)
recovered_bboxes.append(bboxes) recovered_bboxes.append(bboxes)
......
This diff is collapsed.
# Copyright (c) OpenMMLab. All rights reserved. # Copyright (c) OpenMMLab. All rights reserved.
import numpy as np import numpy as np
import torch import torch
from mmengine.data import InstanceData
from mmdet3d.core import limit_period from mmdet3d.core import limit_period
from mmdet.core import images_to_levels, multi_apply from mmdet.core import images_to_levels, multi_apply
...@@ -11,10 +12,9 @@ class AnchorTrainMixin(object): ...@@ -11,10 +12,9 @@ class AnchorTrainMixin(object):
def anchor_target_3d(self, def anchor_target_3d(self,
anchor_list, anchor_list,
gt_bboxes_list, batch_gt_instances_3d,
input_metas, batch_input_metas,
gt_bboxes_ignore_list=None, batch_gt_instances_ignore=None,
gt_labels_list=None,
label_channels=1, label_channels=1,
num_classes=1, num_classes=1,
sampling=True): sampling=True):
...@@ -22,11 +22,10 @@ class AnchorTrainMixin(object): ...@@ -22,11 +22,10 @@ class AnchorTrainMixin(object):
Args: Args:
anchor_list (list[list]): Multi level anchors of each image. anchor_list (list[list]): Multi level anchors of each image.
gt_bboxes_list (list[:obj:`BaseInstance3DBoxes`]): Ground truth batch_gt_instances_3d (list[:obj:`InstanceData`]): Ground truth
bboxes of each image. bboxes of each image.
input_metas (list[dict]): Meta info of each image. batch_input_metas (list[dict]): Meta info of each image.
gt_bboxes_ignore_list (list): Ignore list of gt bboxes. batch_gt_instances_ignore (list): Ignore list of gt bboxes.
gt_labels_list (list[torch.Tensor]): Gt labels of batches.
label_channels (int): The channel of labels. label_channels (int): The channel of labels.
num_classes (int): The number of classes. num_classes (int): The number of classes.
sampling (bool): Whether to sample anchors. sampling (bool): Whether to sample anchors.
...@@ -38,8 +37,8 @@ class AnchorTrainMixin(object): ...@@ -38,8 +37,8 @@ class AnchorTrainMixin(object):
direction weights, number of positive anchors and direction weights, number of positive anchors and
number of negative anchors. number of negative anchors.
""" """
num_imgs = len(input_metas) num_inputs = len(batch_input_metas)
assert len(anchor_list) == num_imgs assert len(anchor_list) == num_inputs
if isinstance(anchor_list[0][0], list): if isinstance(anchor_list[0][0], list):
# sizes of anchors are different # sizes of anchors are different
...@@ -48,7 +47,7 @@ class AnchorTrainMixin(object): ...@@ -48,7 +47,7 @@ class AnchorTrainMixin(object):
sum([anchor.size(0) for anchor in anchors]) sum([anchor.size(0) for anchor in anchors])
for anchors in anchor_list[0] for anchors in anchor_list[0]
] ]
for i in range(num_imgs): for i in range(num_inputs):
anchor_list[i] = anchor_list[i][0] anchor_list[i] = anchor_list[i][0]
else: else:
# anchor number of multi levels # anchor number of multi levels
...@@ -57,24 +56,21 @@ class AnchorTrainMixin(object): ...@@ -57,24 +56,21 @@ class AnchorTrainMixin(object):
for anchors in anchor_list[0] for anchors in anchor_list[0]
] ]
# concat all level anchors and flags to a single tensor # concat all level anchors and flags to a single tensor
for i in range(num_imgs): for i in range(num_inputs):
anchor_list[i] = torch.cat(anchor_list[i]) anchor_list[i] = torch.cat(anchor_list[i])
# compute targets for each image # compute targets for each image
if gt_bboxes_ignore_list is None: if batch_gt_instances_ignore is None:
gt_bboxes_ignore_list = [None for _ in range(num_imgs)] batch_gt_instances_ignore = [None for _ in range(num_inputs)]
if gt_labels_list is None:
gt_labels_list = [None for _ in range(num_imgs)]
(all_labels, all_label_weights, all_bbox_targets, all_bbox_weights, (all_labels, all_label_weights, all_bbox_targets, all_bbox_weights,
all_dir_targets, all_dir_weights, pos_inds_list, all_dir_targets, all_dir_weights, pos_inds_list,
neg_inds_list) = multi_apply( neg_inds_list) = multi_apply(
self.anchor_target_3d_single, self.anchor_target_3d_single,
anchor_list, anchor_list,
gt_bboxes_list, batch_gt_instances_3d,
gt_bboxes_ignore_list, batch_gt_instances_ignore,
gt_labels_list, batch_input_metas,
input_metas,
label_channels=label_channels, label_channels=label_channels,
num_classes=num_classes, num_classes=num_classes,
sampling=sampling) sampling=sampling)
...@@ -101,9 +97,8 @@ class AnchorTrainMixin(object): ...@@ -101,9 +97,8 @@ class AnchorTrainMixin(object):
def anchor_target_3d_single(self, def anchor_target_3d_single(self,
anchors, anchors,
gt_bboxes, gt_instance_3d,
gt_bboxes_ignore, gt_instance_ignore,
gt_labels,
input_meta, input_meta,
label_channels=1, label_channels=1,
num_classes=1, num_classes=1,
...@@ -112,9 +107,8 @@ class AnchorTrainMixin(object): ...@@ -112,9 +107,8 @@ class AnchorTrainMixin(object):
Args: Args:
anchors (torch.Tensor): Concatenated multi-level anchor. anchors (torch.Tensor): Concatenated multi-level anchor.
gt_bboxes (:obj:`BaseInstance3DBoxes`): Gt bboxes. gt_instance_3d (:obj:`InstanceData`): Gt bboxes.
gt_bboxes_ignore (torch.Tensor): Ignored gt bboxes. gt_instance_ignore (:obj:`InstanceData`): Ignored gt bboxes.
gt_labels (torch.Tensor): Gt class labels.
input_meta (dict): Meta info of each image. input_meta (dict): Meta info of each image.
label_channels (int): The channel of labels. label_channels (int): The channel of labels.
num_classes (int): The number of classes. num_classes (int): The number of classes.
...@@ -137,15 +131,19 @@ class AnchorTrainMixin(object): ...@@ -137,15 +131,19 @@ class AnchorTrainMixin(object):
-1, self.box_code_size) -1, self.box_code_size)
current_anchor_num += current_anchors.size(0) current_anchor_num += current_anchors.size(0)
if self.assign_per_class: if self.assign_per_class:
gt_per_cls = (gt_labels == i) gt_per_cls = (gt_instance_3d.labels_3d == i)
gt_per_cls_instance = InstanceData()
gt_per_cls_instance.labels_3d = gt_instance_3d.labels_3d[
gt_per_cls]
gt_per_cls_instance.bboxes_3d = gt_instance_3d.bboxes_3d[
gt_per_cls, :]
anchor_targets = self.anchor_target_single_assigner( anchor_targets = self.anchor_target_single_assigner(
assigner, current_anchors, gt_bboxes[gt_per_cls, :], assigner, current_anchors, gt_per_cls_instance,
gt_bboxes_ignore, gt_labels[gt_per_cls], input_meta, gt_instance_ignore, input_meta, num_classes, sampling)
num_classes, sampling)
else: else:
anchor_targets = self.anchor_target_single_assigner( anchor_targets = self.anchor_target_single_assigner(
assigner, current_anchors, gt_bboxes, gt_bboxes_ignore, assigner, current_anchors, gt_instance_3d,
gt_labels, input_meta, num_classes, sampling) gt_instance_ignore, input_meta, num_classes, sampling)
(labels, label_weights, bbox_targets, bbox_weights, (labels, label_weights, bbox_targets, bbox_weights,
dir_targets, dir_weights, pos_inds, neg_inds) = anchor_targets dir_targets, dir_weights, pos_inds, neg_inds) = anchor_targets
...@@ -194,15 +192,19 @@ class AnchorTrainMixin(object): ...@@ -194,15 +192,19 @@ class AnchorTrainMixin(object):
current_anchors = anchors[i] current_anchors = anchors[i]
current_anchor_num += current_anchors.size(0) current_anchor_num += current_anchors.size(0)
if self.assign_per_class: if self.assign_per_class:
gt_per_cls = (gt_labels == i) gt_per_cls = (gt_instance_3d.labels_3d == i)
gt_per_cls_instance = InstanceData()
gt_per_cls_instance.labels_3d = gt_instance_3d.labels_3d[
gt_per_cls]
gt_per_cls_instance.bboxes_3d = gt_instance_3d.bboxes_3d[
gt_per_cls, :]
anchor_targets = self.anchor_target_single_assigner( anchor_targets = self.anchor_target_single_assigner(
assigner, current_anchors, gt_bboxes[gt_per_cls, :], assigner, current_anchors, gt_per_cls_instance,
gt_bboxes_ignore, gt_labels[gt_per_cls], input_meta, gt_instance_ignore, input_meta, num_classes, sampling)
num_classes, sampling)
else: else:
anchor_targets = self.anchor_target_single_assigner( anchor_targets = self.anchor_target_single_assigner(
assigner, current_anchors, gt_bboxes, gt_bboxes_ignore, assigner, current_anchors, gt_instance_3d,
gt_labels, input_meta, num_classes, sampling) gt_instance_ignore, input_meta, num_classes, sampling)
(labels, label_weights, bbox_targets, bbox_weights, (labels, label_weights, bbox_targets, bbox_weights,
dir_targets, dir_weights, pos_inds, neg_inds) = anchor_targets dir_targets, dir_weights, pos_inds, neg_inds) = anchor_targets
...@@ -230,17 +232,16 @@ class AnchorTrainMixin(object): ...@@ -230,17 +232,16 @@ class AnchorTrainMixin(object):
total_pos_inds, total_neg_inds) total_pos_inds, total_neg_inds)
else: else:
return self.anchor_target_single_assigner(self.bbox_assigner, return self.anchor_target_single_assigner(self.bbox_assigner,
anchors, gt_bboxes, anchors, gt_instance_3d,
gt_bboxes_ignore, gt_instance_ignore,
gt_labels, input_meta, input_meta, num_classes,
num_classes, sampling) sampling)
def anchor_target_single_assigner(self, def anchor_target_single_assigner(self,
bbox_assigner, bbox_assigner,
anchors, anchors,
gt_bboxes, gt_instance_3d,
gt_bboxes_ignore, gt_instance_ignore,
gt_labels,
input_meta, input_meta,
num_classes=1, num_classes=1,
sampling=True): sampling=True):
...@@ -249,9 +250,8 @@ class AnchorTrainMixin(object): ...@@ -249,9 +250,8 @@ class AnchorTrainMixin(object):
Args: Args:
bbox_assigner (BaseAssigner): assign positive and negative boxes. bbox_assigner (BaseAssigner): assign positive and negative boxes.
anchors (torch.Tensor): Concatenated multi-level anchor. anchors (torch.Tensor): Concatenated multi-level anchor.
gt_bboxes (:obj:`BaseInstance3DBoxes`): Gt bboxes. gt_instance_3d (:obj:`InstanceData`): Gt bboxes.
gt_bboxes_ignore (torch.Tensor): Ignored gt bboxes. gt_instance_ignore (torch.Tensor): Ignored gt bboxes.
gt_labels (torch.Tensor): Gt class labels.
input_meta (dict): Meta info of each image. input_meta (dict): Meta info of each image.
num_classes (int): The number of classes. num_classes (int): The number of classes.
sampling (bool): Whether to sample anchors. sampling (bool): Whether to sample anchors.
...@@ -267,13 +267,17 @@ class AnchorTrainMixin(object): ...@@ -267,13 +267,17 @@ class AnchorTrainMixin(object):
dir_weights = anchors.new_zeros((anchors.shape[0]), dtype=torch.float) dir_weights = anchors.new_zeros((anchors.shape[0]), dtype=torch.float)
labels = anchors.new_zeros(num_valid_anchors, dtype=torch.long) labels = anchors.new_zeros(num_valid_anchors, dtype=torch.long)
label_weights = anchors.new_zeros(num_valid_anchors, dtype=torch.float) label_weights = anchors.new_zeros(num_valid_anchors, dtype=torch.float)
if len(gt_bboxes) > 0: if len(gt_instance_3d.bboxes_3d) > 0:
if not isinstance(gt_bboxes, torch.Tensor): if not isinstance(gt_instance_3d.bboxes_3d, torch.Tensor):
gt_bboxes = gt_bboxes.tensor.to(anchors.device) gt_instance_3d.bboxes_3d = gt_instance_3d.bboxes_3d.tensor.to(
assign_result = bbox_assigner.assign(anchors, gt_bboxes, anchors.device)
gt_bboxes_ignore, gt_labels) pred_instance_3d = InstanceData(priors=anchors)
sampling_result = self.bbox_sampler.sample(assign_result, anchors, assign_result = bbox_assigner.assign(pred_instance_3d,
gt_bboxes) gt_instance_3d,
gt_instance_ignore)
sampling_result = self.bbox_sampler.sample(assign_result,
pred_instance_3d,
gt_instance_3d)
pos_inds = sampling_result.pos_inds pos_inds = sampling_result.pos_inds
neg_inds = sampling_result.neg_inds neg_inds = sampling_result.neg_inds
else: else:
...@@ -284,7 +288,7 @@ class AnchorTrainMixin(object): ...@@ -284,7 +288,7 @@ class AnchorTrainMixin(object):
anchors.new_zeros((anchors.shape[0], ), dtype=torch.bool) == 0, anchors.new_zeros((anchors.shape[0], ), dtype=torch.bool) == 0,
as_tuple=False).squeeze(-1).unique() as_tuple=False).squeeze(-1).unique()
if gt_labels is not None: if gt_instance_3d.labels_3d is not None:
labels += num_classes labels += num_classes
if len(pos_inds) > 0: if len(pos_inds) > 0:
pos_bbox_targets = self.bbox_coder.encode( pos_bbox_targets = self.bbox_coder.encode(
...@@ -300,10 +304,10 @@ class AnchorTrainMixin(object): ...@@ -300,10 +304,10 @@ class AnchorTrainMixin(object):
dir_targets[pos_inds] = pos_dir_targets dir_targets[pos_inds] = pos_dir_targets
dir_weights[pos_inds] = 1.0 dir_weights[pos_inds] = 1.0
if gt_labels is None: if gt_instance_3d.labels_3d is None:
labels[pos_inds] = 1 labels[pos_inds] = 1
else: else:
labels[pos_inds] = gt_labels[ labels[pos_inds] = gt_instance_3d.labels_3d[
sampling_result.pos_assigned_gt_inds] sampling_result.pos_assigned_gt_inds]
if self.train_cfg.pos_weight <= 0: if self.train_cfg.pos_weight <= 0:
label_weights[pos_inds] = 1.0 label_weights[pos_inds] = 1.0
......
# Copyright (c) OpenMMLab. All rights reserved. # Copyright (c) OpenMMLab. All rights reserved.
from os import path as osp from typing import Dict, List, Optional, Union
import mmcv
import torch import torch
from mmcv.parallel import DataContainer as DC from mmengine.data import InstanceData
from mmcv.runner import auto_fp16 from torch.optim import Optimizer
from mmdet3d.core import Box3DMode, Coord3DMode, show_result from mmdet3d.core import Det3DDataSample
from mmdet3d.registry import MODELS
from mmdet.core.utils import stack_batch
from mmdet.models.detectors import BaseDetector from mmdet.models.detectors import BaseDetector
@MODELS.register_module()
class Base3DDetector(BaseDetector): class Base3DDetector(BaseDetector):
"""Base class for detectors.""" """Base class for 3D detectors.
def forward_test(self, points, img_metas, img=None, **kwargs): Args:
preprocess_cfg (dict, optional): Model preprocessing config
for processing the input data. it usually includes
``to_rgb``, ``pad_size_divisor``, ``pad_value``,
``mean`` and ``std``. Default to None.
init_cfg (dict, optional): the config to control the
initialization. Default to None.
"""
def __init__(self,
preprocess_cfg: Optional[dict] = None,
init_cfg: Optional[dict] = None) -> None:
super(Base3DDetector, self).__init__(
preprocess_cfg=preprocess_cfg, init_cfg=init_cfg)
def forward_simple_test(self, batch_inputs_dict: Dict[List, torch.Tensor],
batch_data_samples: List[Det3DDataSample],
**kwargs) -> List[Det3DDataSample]:
""" """
Args: Args:
points (list[torch.Tensor]): the outer list indicates test-time batch_inputs_dict (dict): The model input dict which include
augmentations and inner torch.Tensor should have a shape NxC, 'points', 'img' keys.
which contains all points in the batch.
img_metas (list[list[dict]]): the outer list indicates test-time - points (list[torch.Tensor]): Point cloud of each sample.
augs (multiscale, flip, etc.) and the inner list indicates - imgs (torch.Tensor, optional): Image of each sample.
images in a batch
img (list[torch.Tensor], optional): the outer batch_data_samples (List[:obj:`DetDataSample`]): The Data
list indicates test-time augmentations and inner Samples. It usually includes information such as
torch.Tensor should have a shape NxCxHxW, which contains `gt_instance_3d`, `gt_panoptic_seg_3d` and `gt_sem_seg_3d`.
all images in the batch. Defaults to None.
Returns:
list(obj:`Det3DDataSample`): Detection results of the
input images. Each DetDataSample usually contains
``pred_instances_3d`` or ``pred_panoptic_seg_3d`` or
``pred_sem_seg_3d``.
""" """
for var, name in [(points, 'points'), (img_metas, 'img_metas')]: batch_size = len(batch_data_samples)
batch_input_metas = []
if batch_size != len(batch_inputs_dict['points']):
raise ValueError(
'num of augmentations ({}) != num of image meta ({})'.format(
len(batch_inputs_dict['points']), len(batch_input_metas)))
for batch_index in range(batch_size):
metainfo = batch_data_samples[batch_index].metainfo
batch_input_metas.append(metainfo)
for var, name in [(batch_inputs_dict['points'], 'points'),
(batch_input_metas, 'img_metas')]:
if not isinstance(var, list): if not isinstance(var, list):
raise TypeError('{} must be a list, but got {}'.format( raise TypeError('{} must be a list, but got {}'.format(
name, type(var))) name, type(var)))
num_augs = len(points) if batch_size == 1:
if num_augs != len(img_metas): return self.simple_test(
raise ValueError( batch_inputs_dict, batch_input_metas, rescale=True, **kwargs)
'num of augmentations ({}) != num of image meta ({})'.format(
len(points), len(img_metas)))
if num_augs == 1:
img = [img] if img is None else img
return self.simple_test(points[0], img_metas[0], img[0], **kwargs)
else: else:
return self.aug_test(points, img_metas, img, **kwargs) return self.aug_test(
batch_inputs_dict, batch_input_metas, rescale=True, **kwargs)
@auto_fp16(apply_to=('img', 'points'))
def forward(self, return_loss=True, **kwargs): def forward(self,
"""Calls either forward_train or forward_test depending on whether data: List[dict],
return_loss=True. optimizer: Optional[Union[Optimizer, dict]] = None,
return_loss: bool = False,
Note this setting will change the expected inputs. When **kwargs):
`return_loss=True`, img and img_metas are single-nested (i.e. """The iteration step during training and testing. This method defines
torch.Tensor and list[dict]), and when `resturn_loss=False`, img and an iteration step during training and testing, except for the back
img_metas should be double nested (i.e. list[torch.Tensor], propagation and optimizer updating during training, which are done in
list[list[dict]]), with the outer list indicating test time an optimizer scheduler.
augmentations.
Args:
data (list[dict]): The output of dataloader.
optimizer (:obj:`torch.optim.Optimizer`, dict, Optional): The
optimizer of runner. This argument is unused and reserved.
Default to None.
return_loss (bool): Whether to return loss. In general,
it will be set to True during training and False
during testing. Default to False.
Returns:
during training
dict: It should contain at least 3 keys: ``loss``,
``log_vars``, ``num_samples``.
- ``loss`` is a tensor for back propagation, which can be a
weighted sum of multiple losses.
- ``log_vars`` contains all the variables to be sent to the
logger.
- ``num_samples`` indicates the batch size (when the model
is DDP, it means the batch size on each GPU), which is
used for averaging the logs.
during testing
list(obj:`Det3DDataSample`): Detection results of the
input samples. Each DetDataSample usually contains
``pred_instances_3d`` or ``pred_panoptic_seg_3d`` or
``pred_sem_seg_3d``.
""" """
batch_inputs_dict, batch_data_samples = self.preprocess_data(data)
if return_loss: if return_loss:
return self.forward_train(**kwargs) losses = self.forward_train(batch_inputs_dict, batch_data_samples,
else: **kwargs)
return self.forward_test(**kwargs) loss, log_vars = self._parse_losses(losses)
def show_results(self, data, result, out_dir, show=False, score_thr=None): outputs = dict(
"""Results visualization. loss=loss,
log_vars=log_vars,
num_samples=len(batch_data_samples))
return outputs
else:
return self.forward_simple_test(batch_inputs_dict,
batch_data_samples, **kwargs)
def preprocess_data(self, data: List[dict]) -> tuple:
""" Process input data during training and simple testing phases.
Args: Args:
data (list[dict]): Input points and the information of the sample. data (list[dict]): The data to be processed, which
result (list[dict]): Prediction results. comes from dataloader.
out_dir (str): Output directory of visualization result.
show (bool, optional): Determines whether you are Returns:
going to show result by open3d. tuple: It should contain 2 item.
Defaults to False.
score_thr (float, optional): Score threshold of bounding boxes. - batch_inputs_dict (dict): The model input dict which include
Default to None. 'points', 'img' keys.
- points (list[torch.Tensor]): Point cloud of each sample.
- imgs (torch.Tensor, optional): Image of each sample.
- batch_data_samples (list[:obj:`Det3DDataSample`]): The Data
Samples. It usually includes information such as
`gt_instance_3d` , `gt_instances`.
""" """
for batch_id in range(len(result)): batch_data_samples = [
if isinstance(data['points'][0], DC): data_['data_sample'].to(self.device) for data_ in data
points = data['points'][0]._data[0][batch_id].numpy() ]
elif mmcv.is_list_of(data['points'][0], torch.Tensor): if 'points' in data[0]['inputs'].keys():
points = data['points'][0][batch_id] points = [
data_['inputs']['points'].to(self.device) for data_ in data
]
else: else:
ValueError(f"Unsupported data type {type(data['points'][0])} " raise KeyError(
f'for visualization!') "Model input dict needs to include the 'points' key.")
if isinstance(data['img_metas'][0], DC): if 'img' in data[0]['inputs'].keys():
pts_filename = data['img_metas'][0]._data[0][batch_id][ imgs = [data_['inputs']['img'].to(self.device) for data_ in data]
'pts_filename']
box_mode_3d = data['img_metas'][0]._data[0][batch_id][
'box_mode_3d']
elif mmcv.is_list_of(data['img_metas'][0], dict):
pts_filename = data['img_metas'][0][batch_id]['pts_filename']
box_mode_3d = data['img_metas'][0][batch_id]['box_mode_3d']
else: else:
ValueError( imgs = None
f"Unsupported data type {type(data['img_metas'][0])} " if self.preprocess_cfg is None:
f'for visualization!') batch_inputs_dict = {
file_name = osp.split(pts_filename)[-1].split('.')[0] 'points': points,
'imgs': stack_batch(imgs).float() if imgs is not None else None
assert out_dir is not None, 'Expect out_dir, got none.' }
return batch_inputs_dict, batch_data_samples
pred_bboxes = result[batch_id]['boxes_3d']
pred_labels = result[batch_id]['labels_3d'] if self.to_rgb and imgs[0].size(0) == 3:
imgs = [_img[[2, 1, 0], ...] for _img in imgs]
if score_thr is not None: imgs = [(_img - self.pixel_mean) / self.pixel_std for _img in imgs]
mask = result[batch_id]['scores_3d'] > score_thr batch_img = stack_batch(imgs, self.pad_size_divisor, self.pad_value)
pred_bboxes = pred_bboxes[mask] batch_inputs_dict = {'points': points, 'imgs': batch_img}
pred_labels = pred_labels[mask] return batch_inputs_dict, batch_data_samples
# for now we convert points and bbox into depth mode def postprocess_result(self, results_list: List[InstanceData]) \
if (box_mode_3d == Box3DMode.CAM) or (box_mode_3d -> List[Det3DDataSample]:
== Box3DMode.LIDAR): """ Convert results list to `Det3DDataSample`.
points = Coord3DMode.convert_point(points, Coord3DMode.LIDAR, Args:
Coord3DMode.DEPTH) results_list (list[:obj:`InstanceData`]): Detection results of
pred_bboxes = Box3DMode.convert(pred_bboxes, box_mode_3d, each sample.
Box3DMode.DEPTH)
elif box_mode_3d != Box3DMode.DEPTH: Returns:
ValueError( list[:obj:`Det3DDataSample`]: Detection results of the
f'Unsupported box_mode_3d {box_mode_3d} for conversion!') input sample. Each Det3DDataSample usually contain
pred_bboxes = pred_bboxes.tensor.cpu().numpy() 'pred_instances_3d'. And the ``pred_instances_3dd`` usually
show_result( contains following keys.
points,
None, - scores_3d (Tensor): Classification scores, has a shape
pred_bboxes, (num_instances, )
out_dir, - labels_3d (Tensor): Labels of bboxes, has a shape
file_name, (num_instances, ).
show=show, - bboxes_3d (:obj:`BaseInstance3DBoxes`): Prediction of bboxes,
pred_labels=pred_labels) contains a tensor with shape (num_instances, 7).
"""
for i in range(len(results_list)):
result = Det3DDataSample()
result.pred_instances_3d = results_list[i]
results_list[i] = result
return results_list
def show_results(self, data, result, out_dir, show=False, score_thr=None):
# TODO
pass
# Copyright (c) OpenMMLab. All rights reserved. # Copyright (c) OpenMMLab. All rights reserved.
from typing import List, Optional
import torch
from mmdet3d.registry import MODELS from mmdet3d.registry import MODELS
from .base import Base3DDetector from .base import Base3DDetector
...@@ -23,13 +27,15 @@ class SingleStage3DDetector(Base3DDetector): ...@@ -23,13 +27,15 @@ class SingleStage3DDetector(Base3DDetector):
def __init__(self, def __init__(self,
backbone, backbone,
neck=None, neck: Optional[dict] = None,
bbox_head=None, bbox_head: Optional[dict] = None,
train_cfg=None, train_cfg: Optional[dict] = None,
test_cfg=None, test_cfg: Optional[dict] = None,
init_cfg=None, preprocess_cfg: Optional[dict] = None,
pretrained=None): init_cfg: Optional[dict] = None,
super(SingleStage3DDetector, self).__init__(init_cfg) pretrained: Optional[str] = None) -> None:
super(SingleStage3DDetector, self).__init__(
preprocess_cfg=preprocess_cfg, init_cfg=init_cfg)
self.backbone = MODELS.build(backbone) self.backbone = MODELS.build(backbone)
if neck is not None: if neck is not None:
self.neck = MODELS.build(neck) self.neck = MODELS.build(neck)
...@@ -39,12 +45,12 @@ class SingleStage3DDetector(Base3DDetector): ...@@ -39,12 +45,12 @@ class SingleStage3DDetector(Base3DDetector):
self.train_cfg = train_cfg self.train_cfg = train_cfg
self.test_cfg = test_cfg self.test_cfg = test_cfg
def forward_dummy(self, points): def forward_dummy(self, batch_inputs: dict) -> tuple:
"""Used for computing network flops. """Used for computing network flops.
See `mmdetection/tools/analysis_tools/get_flops.py` See `mmdetection/tools/analysis_tools/get_flops.py`
""" """
x = self.extract_feat(points) x = self.extract_feat(batch_inputs['points'])
try: try:
sample_mod = self.train_cfg.sample_mod sample_mod = self.train_cfg.sample_mod
outs = self.bbox_head(x, sample_mod) outs = self.bbox_head(x, sample_mod)
...@@ -52,20 +58,20 @@ class SingleStage3DDetector(Base3DDetector): ...@@ -52,20 +58,20 @@ class SingleStage3DDetector(Base3DDetector):
outs = self.bbox_head(x) outs = self.bbox_head(x)
return outs return outs
def extract_feat(self, points, img_metas=None): def extract_feat(self, points: List[torch.Tensor]) -> list:
"""Directly extract features from the backbone+neck. """Directly extract features from the backbone+neck.
Args: Args:
points (torch.Tensor): Input points. points (List[torch.Tensor]): Input points.
""" """
x = self.backbone(points) x = self.backbone(points[0])
if self.with_neck: if self.with_neck:
x = self.neck(x) x = self.neck(x)
return x return x
def extract_feats(self, points, img_metas): def extract_feats(self, batch_inputs_dict: dict) -> list:
"""Extract features of multiple samples.""" """Extract features of multiple samples."""
return [ return [
self.extract_feat(pts, img_meta) self.extract_feat([points])
for pts, img_meta in zip(points, img_metas) for points in batch_inputs_dict['points']
] ]
# Copyright (c) OpenMMLab. All rights reserved. # Copyright (c) OpenMMLab. All rights reserved.
from typing import Dict, List, Optional
import torch import torch
from mmcv.ops import Voxelization from mmcv.ops import Voxelization
from mmcv.runner import force_fp32 from mmcv.runner import force_fp32
from torch.nn import functional as F from torch.nn import functional as F
from mmdet3d.core import bbox3d2result, merge_aug_bboxes_3d from mmdet3d.core import Det3DDataSample
from mmdet3d.registry import MODELS from mmdet3d.registry import MODELS
from .single_stage import SingleStage3DDetector from .single_stage import SingleStage3DDetector
...@@ -14,16 +16,16 @@ class VoxelNet(SingleStage3DDetector): ...@@ -14,16 +16,16 @@ class VoxelNet(SingleStage3DDetector):
r"""`VoxelNet <https://arxiv.org/abs/1711.06396>`_ for 3D detection.""" r"""`VoxelNet <https://arxiv.org/abs/1711.06396>`_ for 3D detection."""
def __init__(self, def __init__(self,
voxel_layer, voxel_layer: dict,
voxel_encoder, voxel_encoder: dict,
middle_encoder, middle_encoder: dict,
backbone, backbone: dict,
neck=None, neck: Optional[dict] = None,
bbox_head=None, bbox_head: Optional[dict] = None,
train_cfg=None, train_cfg: Optional[dict] = None,
test_cfg=None, test_cfg: Optional[dict] = None,
init_cfg=None, init_cfg: Optional[dict] = None,
pretrained=None): pretrained: Optional[str] = None) -> None:
super(VoxelNet, self).__init__( super(VoxelNet, self).__init__(
backbone=backbone, backbone=backbone,
neck=neck, neck=neck,
...@@ -36,7 +38,7 @@ class VoxelNet(SingleStage3DDetector): ...@@ -36,7 +38,7 @@ class VoxelNet(SingleStage3DDetector):
self.voxel_encoder = MODELS.build(voxel_encoder) self.voxel_encoder = MODELS.build(voxel_encoder)
self.middle_encoder = MODELS.build(middle_encoder) self.middle_encoder = MODELS.build(middle_encoder)
def extract_feat(self, points, img_metas=None): def extract_feat(self, points: List[torch.Tensor]) -> list:
"""Extract features from points.""" """Extract features from points."""
voxels, num_points, coors = self.voxelize(points) voxels, num_points, coors = self.voxelize(points)
voxel_features = self.voxel_encoder(voxels, num_points, coors) voxel_features = self.voxel_encoder(voxels, num_points, coors)
...@@ -49,7 +51,7 @@ class VoxelNet(SingleStage3DDetector): ...@@ -49,7 +51,7 @@ class VoxelNet(SingleStage3DDetector):
@torch.no_grad() @torch.no_grad()
@force_fp32() @force_fp32()
def voxelize(self, points): def voxelize(self, points: List[torch.Tensor]) -> tuple:
"""Apply hard voxelization to points.""" """Apply hard voxelization to points."""
voxels, coors, num_points = [], [], [] voxels, coors, num_points = [], [], []
for res in points: for res in points:
...@@ -66,64 +68,75 @@ class VoxelNet(SingleStage3DDetector): ...@@ -66,64 +68,75 @@ class VoxelNet(SingleStage3DDetector):
coors_batch = torch.cat(coors_batch, dim=0) coors_batch = torch.cat(coors_batch, dim=0)
return voxels, num_points, coors_batch return voxels, num_points, coors_batch
def forward_train(self, def forward_train(self, batch_inputs_dict: Dict[list, torch.Tensor],
points, batch_data_samples: List[Det3DDataSample],
img_metas, **kwargs) -> dict:
gt_bboxes_3d, """
gt_labels_3d,
gt_bboxes_ignore=None):
"""Training forward function.
Args: Args:
points (list[torch.Tensor]): Point cloud of each sample. batch_inputs_dict (dict): The model input dict. It should contain
img_metas (list[dict]): Meta information of each sample ``points`` and ``img`` keys.
gt_bboxes_3d (list[:obj:`BaseInstance3DBoxes`]): Ground truth
boxes for each sample. - points (list[torch.Tensor]): Point cloud of each sample.
gt_labels_3d (list[torch.Tensor]): Ground truth labels for - imgs (torch.Tensor, optional): Image of each sample.
boxes of each sampole
gt_bboxes_ignore (list[torch.Tensor], optional): Ground truth batch_data_samples (list[:obj:`Det3DDataSample`]): The batch
boxes to be ignored. Defaults to None. data samples. It usually includes information such
as `gt_instance_3d` or `gt_panoptic_seg_3d` or `gt_sem_seg_3d`.
Returns: Returns:
dict: Losses of each branch. dict[str, Tensor]: A dictionary of loss components.
""" """
x = self.extract_feat(points, img_metas)
outs = self.bbox_head(x) x = self.extract_feat(batch_inputs_dict['points'])
loss_inputs = outs + (gt_bboxes_3d, gt_labels_3d, img_metas) losses = self.bbox_head.forward_train(x, batch_data_samples, **kwargs)
losses = self.bbox_head.loss(
*loss_inputs, gt_bboxes_ignore=gt_bboxes_ignore)
return losses return losses
def simple_test(self, points, img_metas, imgs=None, rescale=False): def simple_test(self,
"""Test function without augmentaiton.""" batch_inputs_dict: Dict[list, torch.Tensor],
x = self.extract_feat(points, img_metas) batch_input_metas: List[dict],
outs = self.bbox_head(x) rescale: bool = False) -> list:
bbox_list = self.bbox_head.get_bboxes( """Test function without test-time augmentation.
*outs, img_metas, rescale=rescale)
bbox_results = [ Args:
bbox3d2result(bboxes, scores, labels) batch_inputs_dict (dict): The model input dict. It should contain
for bboxes, scores, labels in bbox_list ``points`` and ``img`` keys.
]
return bbox_results - points (list[torch.Tensor]): Point cloud of single
sample.
def aug_test(self, points, img_metas, imgs=None, rescale=False): - imgs (torch.Tensor, optional): Image of single sample.
batch_input_metas (list[dict]): List of input information.
rescale (bool, optional): Whether to rescale the results.
Defaults to False.
Returns:
list[:obj:`Det3DDataSample`]: Detection results of the \
inputs. Each Det3DDataSample usually contain \
'pred_instances_3d'. And the ``pred_instances_3d`` usually \
contains following keys.
- scores_3d (Tensor): Classification scores, has a shape
(num_instances, )
- labels_3d (Tensor): Labels of bboxes, has a shape
(num_instances, ).
- bboxes_3d (:obj:`BaseInstance3DBoxes`): Prediction of bboxes,
contains a tensor with shape (num_instances, 7).
"""
x = self.extract_feat(batch_inputs_dict['points'])
bboxes_list = self.bbox_head.simple_test(
x, batch_input_metas, rescale=rescale)
# connvert to Det3DDataSample
results_list = self.postprocess_result(bboxes_list)
return results_list
def aug_test(self,
aug_batch_inputs_dict: Dict[list, torch.Tensor],
aug_batch_input_metas: List[dict],
rescale: bool = False) -> list:
"""Test function with augmentaiton.""" """Test function with augmentaiton."""
feats = self.extract_feats(points, img_metas) # TODO Refactor this after mmdet update
feats = self.extract_feats(aug_batch_inputs_dict)
# only support aug_test for one sample aug_bboxes = self.bbox_head.aug_test(
aug_bboxes = [] feats, aug_batch_input_metas, rescale=rescale)
for x, img_meta in zip(feats, img_metas): return aug_bboxes
outs = self.bbox_head(x)
bbox_list = self.bbox_head.get_bboxes(
*outs, img_meta, rescale=rescale)
bbox_list = [
dict(boxes_3d=bboxes, scores_3d=scores, labels_3d=labels)
for bboxes, scores, labels in bbox_list
]
aug_bboxes.append(bbox_list[0])
# after merging, bboxes will be rescaled to the original image size
merged_bboxes = merge_aug_bboxes_3d(aug_bboxes, img_metas,
self.bbox_head.test_cfg)
return [merged_bboxes]
# Copyright (c) OpenMMLab. All rights reserved.
import copy
import random
from os.path import dirname, exists, join
import numpy as np
import pytest
import torch
from mmengine.data import InstanceData
from mmdet3d.core import Det3DDataSample
from mmdet3d.core.bbox import LiDARInstance3DBoxes
from mmdet3d.registry import MODELS
def _setup_seed(seed):
torch.manual_seed(seed)
torch.cuda.manual_seed_all(seed)
np.random.seed(seed)
random.seed(seed)
torch.backends.cudnn.deterministic = True
def _get_config_directory():
"""Find the predefined detector config directory."""
try:
# Assume we are running in the source mmdetection3d repo
repo_dpath = dirname(dirname(dirname(__file__)))
except NameError:
# For IPython development when this __file__ is not defined
import mmdet3d
repo_dpath = dirname(dirname(mmdet3d.__file__))
config_dpath = join(repo_dpath, 'configs')
if not exists(config_dpath):
raise Exception('Cannot find config path')
return config_dpath
def _get_config_module(fname):
"""Load a configuration as a python module."""
from mmcv import Config
config_dpath = _get_config_directory()
config_fpath = join(config_dpath, fname)
config_mod = Config.fromfile(config_fpath)
return config_mod
def _get_model_cfg(fname):
"""Grab configs necessary to create a model.
These are deep copied to allow for safe modification of parameters without
influencing other tests.
"""
config = _get_config_module(fname)
model = copy.deepcopy(config.model)
return model
def _get_detector_cfg(fname):
"""Grab configs necessary to create a detector.
These are deep copied to allow for safe modification of parameters without
influencing other tests.
"""
import mmcv
config = _get_config_module(fname)
model = copy.deepcopy(config.model)
train_cfg = mmcv.Config(copy.deepcopy(config.model.train_cfg))
test_cfg = mmcv.Config(copy.deepcopy(config.model.test_cfg))
model.update(train_cfg=train_cfg)
model.update(test_cfg=test_cfg)
return model
def test_voxel_net():
import mmdet3d.models
assert hasattr(mmdet3d.models, 'VoxelNet')
if not torch.cuda.is_available():
pytest.skip('test requires GPU and torch+cuda')
_setup_seed(0)
voxel_net_cfg = _get_detector_cfg(
'pointpillars/hv_pointpillars_secfpn_6x8_160e_kitti-3d-3class.py')
model = MODELS.build(voxel_net_cfg).cuda()
input_dict0 = dict(points=torch.rand([2010, 4], device='cuda'))
input_dict1 = dict(points=torch.rand([2020, 4], device='cuda'))
gt_instance_3d_0 = InstanceData()
gt_instance_3d_0.bboxes_3d = LiDARInstance3DBoxes(
torch.rand([20, 7], device='cuda'))
gt_instance_3d_0.labels_3d = torch.randint(0, 3, [20], device='cuda')
data_sample_0 = Det3DDataSample(
metainfo=dict(box_type_3d=LiDARInstance3DBoxes))
data_sample_0.gt_instances_3d = gt_instance_3d_0
gt_instance_3d_1 = InstanceData()
gt_instance_3d_1.bboxes_3d = LiDARInstance3DBoxes(
torch.rand([50, 7], device='cuda'))
gt_instance_3d_1.labels_3d = torch.randint(0, 3, [50], device='cuda')
data_sample_1 = Det3DDataSample(
metainfo=dict(box_type_3d=LiDARInstance3DBoxes))
data_sample_1.gt_instances_3d = gt_instance_3d_1
data = [dict(inputs=input_dict0, data_sample=data_sample_0)]
# test simple_test
with torch.no_grad():
results = model.forward(data, return_loss=False)
bboxes_3d = results[0].pred_instances_3d['bboxes_3d']
scores_3d = results[0].pred_instances_3d['scores_3d']
labels_3d = results[0].pred_instances_3d['labels_3d']
assert bboxes_3d.tensor.shape == (50, 7)
assert scores_3d.shape == torch.Size([50])
assert labels_3d.shape == torch.Size([50])
# test forward_train
data = [
dict(inputs=input_dict0, data_sample=data_sample_0),
dict(inputs=input_dict1, data_sample=data_sample_1)
]
losses = model.forward(data, return_loss=True)
assert losses['log_vars']['loss_cls'] >= 0
assert losses['log_vars']['loss_bbox'] >= 0
assert losses['log_vars']['loss_dir'] >= 0
assert losses['log_vars']['loss'] >= 0
# test_aug_test
metainfo = {
'pcd_scale_factor': 1,
'pcd_horizontal_flip': 1,
'pcd_vertical_flip': 1,
'box_type_3d': LiDARInstance3DBoxes
}
data_sample_0.set_metainfo(metainfo)
data_sample_1.set_metainfo(metainfo)
data = [
dict(inputs=input_dict0, data_sample=data_sample_0),
dict(inputs=input_dict1, data_sample=data_sample_1)
]
results = model.forward(data, return_loss=False)
Markdown is supported
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment