Commit 3a939d7f authored by VVsssssk's avatar VVsssssk Committed by ChaimZhu
Browse files

[Refactor]refactor twostage model and parta2

parent f9ebc59b
......@@ -4,6 +4,7 @@ point_cloud_range = [0, -40, -3, 70.4, 40, 1]
model = dict(
type='PartA2',
data_preprocessor=dict(type='Det3DDataPreprocessor'),
voxel_layer=dict(
max_num_points=5, # max_points_per_voxel
point_cloud_range=point_cloud_range,
......@@ -46,14 +47,16 @@ model = dict(
assign_per_class=True,
bbox_coder=dict(type='DeltaXYZWLHRBBoxCoder'),
loss_cls=dict(
type='FocalLoss',
type='mmdet.FocalLoss',
use_sigmoid=True,
gamma=2.0,
alpha=0.25,
loss_weight=1.0),
loss_bbox=dict(type='SmoothL1Loss', beta=1.0 / 9.0, loss_weight=2.0),
loss_bbox=dict(
type='mmdet.SmoothL1Loss', beta=1.0 / 9.0, loss_weight=2.0),
loss_dir=dict(
type='CrossEntropyLoss', use_sigmoid=False, loss_weight=0.2)),
type='mmdet.CrossEntropyLoss', use_sigmoid=False,
loss_weight=0.2)),
roi_head=dict(
type='PartAggregationROIHead',
num_classes=3,
......@@ -64,14 +67,16 @@ model = dict(
seg_score_thr=0.3,
num_classes=3,
loss_seg=dict(
type='FocalLoss',
type='mmdet.FocalLoss',
use_sigmoid=True,
reduction='sum',
gamma=2.0,
alpha=0.25,
loss_weight=1.0),
loss_part=dict(
type='CrossEntropyLoss', use_sigmoid=True, loss_weight=1.0)),
type='mmdet.CrossEntropyLoss',
use_sigmoid=True,
loss_weight=1.0)),
seg_roi_extractor=dict(
type='Single3DRoIAwareExtractor',
roi_layer=dict(
......@@ -79,7 +84,7 @@ model = dict(
out_size=14,
max_pts_per_voxel=128,
mode='max')),
part_roi_extractor=dict(
bbox_roi_extractor=dict(
type='Single3DRoIAwareExtractor',
roi_layer=dict(
type='RoIAwarePool3d',
......@@ -103,12 +108,12 @@ model = dict(
roi_feat_size=14,
with_corner_loss=True,
loss_bbox=dict(
type='SmoothL1Loss',
type='mmdet.SmoothL1Loss',
beta=1.0 / 9.0,
reduction='sum',
loss_weight=1.0),
loss_cls=dict(
type='CrossEntropyLoss',
type='mmdet.CrossEntropyLoss',
use_sigmoid=True,
reduction='sum',
loss_weight=1.0))),
......@@ -117,21 +122,21 @@ model = dict(
rpn=dict(
assigner=[
dict( # for Pedestrian
type='MaxIoUAssigner',
type='Max3DIoUAssigner',
iou_calculator=dict(type='BboxOverlapsNearest3D'),
pos_iou_thr=0.5,
neg_iou_thr=0.35,
min_pos_iou=0.35,
ignore_iof_thr=-1),
dict( # for Cyclist
type='MaxIoUAssigner',
type='Max3DIoUAssigner',
iou_calculator=dict(type='BboxOverlapsNearest3D'),
pos_iou_thr=0.5,
neg_iou_thr=0.35,
min_pos_iou=0.35,
ignore_iof_thr=-1),
dict( # for Car
type='MaxIoUAssigner',
type='Max3DIoUAssigner',
iou_calculator=dict(type='BboxOverlapsNearest3D'),
pos_iou_thr=0.6,
neg_iou_thr=0.45,
......@@ -151,7 +156,7 @@ model = dict(
rcnn=dict(
assigner=[
dict( # for Pedestrian
type='MaxIoUAssigner',
type='Max3DIoUAssigner',
iou_calculator=dict(
type='BboxOverlaps3D', coordinate='lidar'),
pos_iou_thr=0.55,
......@@ -159,7 +164,7 @@ model = dict(
min_pos_iou=0.55,
ignore_iof_thr=-1),
dict( # for Cyclist
type='MaxIoUAssigner',
type='Max3DIoUAssigner',
iou_calculator=dict(
type='BboxOverlaps3D', coordinate='lidar'),
pos_iou_thr=0.55,
......@@ -167,7 +172,7 @@ model = dict(
min_pos_iou=0.55,
ignore_iof_thr=-1),
dict( # for Car
type='MaxIoUAssigner',
type='Max3DIoUAssigner',
iou_calculator=dict(
type='BboxOverlaps3D', coordinate='lidar'),
pos_iou_thr=0.55,
......
......@@ -38,8 +38,9 @@ train_pipeline = [
dict(type='ObjectRangeFilter', point_cloud_range=point_cloud_range),
dict(type='ObjectNameFilter', classes=class_names),
dict(type='PointShuffle'),
dict(type='DefaultFormatBundle3D', class_names=class_names),
dict(type='Collect3D', keys=['points', 'gt_bboxes_3d', 'gt_labels_3d'])
dict(
type='Pack3DDetInputs',
keys=['points', 'gt_bboxes_3d', 'gt_labels_3d'])
]
test_pipeline = [
dict(type='LoadPointsFromFile', coord_type='LIDAR', load_dim=4, use_dim=4),
......@@ -57,66 +58,70 @@ test_pipeline = [
dict(type='RandomFlip3D'),
dict(
type='PointsRangeFilter', point_cloud_range=point_cloud_range),
dict(
type='DefaultFormatBundle3D',
class_names=class_names,
with_label=False),
dict(type='Collect3D', keys=['points'])
dict(type='Pack3DDetInputs', keys=['points'])
])
]
# construct a pipeline for data and gt loading in show function
# please keep its loading function consistent with test_pipeline (e.g. client)
eval_pipeline = [
dict(type='LoadPointsFromFile', coord_type='LIDAR', load_dim=4, use_dim=4),
dict(
type='DefaultFormatBundle3D',
class_names=class_names,
with_label=False),
dict(type='Collect3D', keys=['points'])
dict(type='Pack3DDetInputs', keys=['points'])
]
data = dict(
samples_per_gpu=2,
workers_per_gpu=2,
train=dict(
train_dataloader = dict(
batch_size=2,
num_workers=2,
persistent_workers=True,
sampler=dict(type='DefaultSampler', shuffle=True),
dataset=dict(
type='RepeatDataset',
times=2,
dataset=dict(
type=dataset_type,
data_root=data_root,
ann_file=data_root + 'kitti_infos_train.pkl',
split='training',
pts_prefix='velodyne_reduced',
ann_file='kitti_infos_train.pkl',
data_prefix=dict(pts='training/velodyne_reduced'),
pipeline=train_pipeline,
modality=input_modality,
classes=class_names,
metainfo=dict(CLASSES=class_names),
box_type_3d='LiDAR',
test_mode=False)),
val=dict(
test_mode=False)))
test_dataloader = dict(
batch_size=1,
num_workers=1,
persistent_workers=True,
drop_last=False,
sampler=dict(type='DefaultSampler', shuffle=False),
dataset=dict(
type=dataset_type,
data_root=data_root,
ann_file=data_root + 'kitti_infos_val.pkl',
split='training',
pts_prefix='velodyne_reduced',
ann_file='kitti_infos_val.pkl',
data_prefix=dict(pts='training/velodyne_reduced'),
pipeline=test_pipeline,
modality=input_modality,
classes=class_names,
metainfo=dict(CLASSES=class_names),
box_type_3d='LiDAR',
test_mode=True),
test=dict(
test_mode=True))
val_dataloader = dict(
batch_size=1,
num_workers=1,
persistent_workers=True,
drop_last=False,
sampler=dict(type='DefaultSampler', shuffle=False),
dataset=dict(
type=dataset_type,
data_root=data_root,
ann_file=data_root + 'kitti_infos_val.pkl',
split='training',
pts_prefix='velodyne_reduced',
pipeline=test_pipeline,
ann_file='kitti_infos_val.pkl',
data_prefix=dict(pts='training/velodyne_reduced'),
pipeline=eval_pipeline,
modality=input_modality,
classes=class_names,
metainfo=dict(CLASSES=class_names),
box_type_3d='LiDAR',
test_mode=True))
val_evaluator = dict(
type='KittiMetric',
ann_file=data_root + 'kitti_infos_val.pkl',
metric='bbox')
test_evaluator = val_evaluator
# Part-A2 uses a different learning rate from what SECOND uses.
lr = 0.001
optimizer = dict(lr=lr)
evaluation = dict(pipeline=eval_pipeline)
optim_wrapper = dict(optimizer=dict(lr=0.001))
find_unused_parameters = True
......@@ -22,7 +22,7 @@ model = dict(
_delete_=True,
rpn=dict(
assigner=dict(
type='MaxIoUAssigner',
type='Max3DIoUAssigner',
iou_calculator=dict(type='BboxOverlapsNearest3D'),
pos_iou_thr=0.6,
neg_iou_thr=0.45,
......@@ -40,7 +40,7 @@ model = dict(
use_rotate_nms=False),
rcnn=dict(
assigner=dict( # for Car
type='MaxIoUAssigner',
type='Max3DIoUAssigner',
iou_calculator=dict(type='BboxOverlaps3D', coordinate='lidar'),
pos_iou_thr=0.55,
neg_iou_thr=0.55,
......@@ -102,8 +102,9 @@ train_pipeline = [
dict(type='ObjectRangeFilter', point_cloud_range=point_cloud_range),
dict(type='ObjectNameFilter', classes=class_names),
dict(type='PointShuffle'),
dict(type='DefaultFormatBundle3D', class_names=class_names),
dict(type='Collect3D', keys=['points', 'gt_bboxes_3d', 'gt_labels_3d'])
dict(
type='Pack3DDetInputs',
keys=['points', 'gt_bboxes_3d', 'gt_labels_3d'])
]
test_pipeline = [
dict(type='LoadPointsFromFile', coord_type='LIDAR', load_dim=4, use_dim=4),
......@@ -129,9 +130,11 @@ test_pipeline = [
])
]
data = dict(
train=dict(dataset=dict(pipeline=train_pipeline, classes=class_names)),
val=dict(pipeline=test_pipeline, classes=class_names),
test=dict(pipeline=test_pipeline, classes=class_names))
train_dataloader = dict(
dataset=dict(
dataset=dict(
pipeline=train_pipeline, metainfo=dict(CLASSES=class_names))))
test_dataloader = dict(
dataset=dict(pipeline=test_pipeline, metainfo=dict(CLASSES=class_names)))
val_dataloader = dict(dataset=dict(metainfo=dict(CLASSES=class_names)))
find_unused_parameters = True
......@@ -115,7 +115,10 @@ class Max3DIoUAssigner(MaxIoUAssigner):
>>> assert torch.all(assign_result.gt_inds == expected_gt_inds)
"""
gt_bboxes = gt_instances.bboxes_3d
if 'priors' in pred_instances:
priors = pred_instances.priors
else:
priors = pred_instances.bboxes_3d.tensor
gt_labels = gt_instances.labels_3d
if gt_instances_ignore is not None:
gt_bboxes_ignore = gt_instances_ignore.bboxes_3d
......
......@@ -369,8 +369,8 @@ class Anchor3DHead(Base3DDenseHead, AnchorTrainMixin):
dir_cls_preds (list[torch.Tensor]): Multi-level direction
class predictions.
batch_gt_instances_3d (list[:obj:`InstanceData`]): Batch of
gt_instances. It usually includes ``bboxes`` and ``labels``
attributes.
gt_instances. It usually includes ``bboxes_3d``
and ``labels_3d`` attributes.
batch_input_metas (list[dict]): Contain pcd and img's meta info.
batch_gt_instances_ignore (list[:obj:`InstanceData`], optional):
Batch of gt_instances_ignore. It includes ``bboxes`` attribute
......
# Copyright (c) OpenMMLab. All rights reserved.
from typing import Dict, List, Tuple
import numpy as np
import torch
from mmcv.runner import force_fp32
from mmcv import ConfigDict
from mmengine.data import InstanceData
from torch import Tensor
from mmdet3d.core import limit_period, xywhr2xyxyr
from mmdet3d.core.post_processing import nms_bev, nms_normal_bev
from mmdet3d.core.utils import InstanceList, SampleList
from mmdet3d.registry import MODELS
from .anchor3d_head import Anchor3DHead
......@@ -48,13 +53,13 @@ class PartA2RPNHead(Anchor3DHead):
"""
def __init__(self,
num_classes,
in_channels,
train_cfg,
test_cfg,
feat_channels=256,
use_direction_classifier=True,
anchor_generator=dict(
num_classes: int,
in_channels: int,
train_cfg: ConfigDict,
test_cfg: ConfigDict,
feat_channels: int = 256,
use_direction_classifier: bool = True,
anchor_generator: Dict = dict(
type='Anchor3DRangeGenerator',
range=[0, -39.68, -1.78, 69.12, 39.68, -1.78],
strides=[2],
......@@ -62,83 +67,45 @@ class PartA2RPNHead(Anchor3DHead):
rotations=[0, 1.57],
custom_values=[],
reshape_out=False),
assigner_per_size=False,
assign_per_class=False,
diff_rad_by_sin=True,
dir_offset=-np.pi / 2,
dir_limit_offset=0,
bbox_coder=dict(type='DeltaXYZWLHRBBoxCoder'),
loss_cls=dict(
type='CrossEntropyLoss',
assigner_per_size: bool = False,
assign_per_class: bool = False,
diff_rad_by_sin: bool = True,
dir_offset: float = -np.pi / 2,
dir_limit_offset: float = 0,
bbox_coder: Dict = dict(type='DeltaXYZWLHRBBoxCoder'),
loss_cls: Dict = dict(
type='mmdet.CrossEntropyLoss',
use_sigmoid=True,
loss_weight=1.0),
loss_bbox=dict(
type='SmoothL1Loss', beta=1.0 / 9.0, loss_weight=2.0),
loss_dir=dict(type='CrossEntropyLoss', loss_weight=0.2),
init_cfg=None):
super().__init__(num_classes, in_channels, train_cfg, test_cfg,
feat_channels, use_direction_classifier,
anchor_generator, assigner_per_size, assign_per_class,
diff_rad_by_sin, dir_offset, dir_limit_offset,
bbox_coder, loss_cls, loss_bbox, loss_dir, init_cfg)
@force_fp32(apply_to=('cls_scores', 'bbox_preds', 'dir_cls_preds'))
def loss(self,
cls_scores,
bbox_preds,
dir_cls_preds,
gt_bboxes,
gt_labels,
input_metas,
gt_bboxes_ignore=None):
"""Calculate losses.
Args:
cls_scores (list[torch.Tensor]): Multi-level class scores.
bbox_preds (list[torch.Tensor]): Multi-level bbox predictions.
dir_cls_preds (list[torch.Tensor]): Multi-level direction
class predictions.
gt_bboxes (list[:obj:`BaseInstance3DBoxes`]): Ground truth boxes
of each sample.
gt_labels (list[torch.Tensor]): Labels of each sample.
input_metas (list[dict]): Point cloud and image's meta info.
gt_bboxes_ignore (list[torch.Tensor]): Specify
which bounding.
Returns:
dict[str, list[torch.Tensor]]: Classification, bbox, and
direction losses of each level.
- loss_rpn_cls (list[torch.Tensor]): Classification losses.
- loss_rpn_bbox (list[torch.Tensor]): Box regression losses.
- loss_rpn_dir (list[torch.Tensor]): Direction classification
losses.
"""
loss_dict = super().loss(cls_scores, bbox_preds, dir_cls_preds,
gt_bboxes, gt_labels, input_metas,
gt_bboxes_ignore)
# change the loss key names to avoid conflict
return dict(
loss_rpn_cls=loss_dict['loss_cls'],
loss_rpn_bbox=loss_dict['loss_bbox'],
loss_rpn_dir=loss_dict['loss_dir'])
loss_bbox: Dict = dict(
type='mmdet.SmoothL1Loss',
beta=1.0 / 9.0,
loss_weight=2.0),
loss_dir: Dict = dict(
type='mmdet.CrossEntropyLoss', loss_weight=0.2),
init_cfg: Dict = None) -> None:
super().__init__(num_classes, in_channels, feat_channels,
use_direction_classifier, anchor_generator,
assigner_per_size, assign_per_class, diff_rad_by_sin,
dir_offset, dir_limit_offset, bbox_coder, loss_cls,
loss_bbox, loss_dir, train_cfg, test_cfg, init_cfg)
def get_bboxes_single(self,
cls_scores,
bbox_preds,
dir_cls_preds,
mlvl_anchors,
input_meta,
cfg,
rescale=False):
def _predict_by_feat_single(self,
cls_score_list: List[Tensor],
bbox_pred_list: List[Tensor],
dir_cls_pred_list: List[Tensor],
mlvl_priors: List[Tensor],
input_meta: List[dict],
cfg: ConfigDict,
rescale: List[Tensor] = False):
"""Get bboxes of single branch.
Args:
cls_scores (torch.Tensor): Class score in single batch.
bbox_preds (torch.Tensor): Bbox prediction in single batch.
dir_cls_preds (torch.Tensor): Predictions of direction class
cls_score_list (torch.Tensor): Class score in single batch.
bbox_pred_list (torch.Tensor): Bbox prediction in single batch.
dir_cls_pred_list (torch.Tensor): Predictions of direction class
in single batch.
mlvl_anchors (List[torch.Tensor]): Multi-level anchors
mlvl_priors (List[torch.Tensor]): Multi-level anchors
in single batch.
input_meta (list[dict]): Contain pcd and img's meta info.
cfg (:obj:`ConfigDict`): Training or testing config.
......@@ -152,14 +119,15 @@ class PartA2RPNHead(Anchor3DHead):
- labels_3d (torch.Tensor): Label of each bbox.
- cls_preds (torch.Tensor): Class score of each bbox.
"""
assert len(cls_scores) == len(bbox_preds) == len(mlvl_anchors)
assert len(cls_score_list) == len(bbox_pred_list) == len(mlvl_priors)
mlvl_bboxes = []
mlvl_max_scores = []
mlvl_label_pred = []
mlvl_dir_scores = []
mlvl_cls_score = []
for cls_score, bbox_pred, dir_cls_pred, anchors in zip(
cls_scores, bbox_preds, dir_cls_preds, mlvl_anchors):
cls_score_list, bbox_pred_list, dir_cls_pred_list,
mlvl_priors):
assert cls_score.size()[-2:] == bbox_pred.size()[-2:]
assert cls_score.size()[-2:] == dir_cls_pred.size()[-2:]
dir_cls_pred = dir_cls_pred.permute(1, 2, 0).reshape(-1, 2)
......@@ -216,13 +184,98 @@ class PartA2RPNHead(Anchor3DHead):
mlvl_cls_score, mlvl_dir_scores,
score_thr, cfg.nms_post, cfg,
input_meta)
return result
def class_agnostic_nms(self, mlvl_bboxes, mlvl_bboxes_for_nms,
mlvl_max_scores, mlvl_label_pred, mlvl_cls_score,
mlvl_dir_scores, score_thr, max_num, cfg,
input_meta):
def loss_and_predict(self,
feats_dict: Dict,
batch_data_samples: SampleList,
proposal_cfg: ConfigDict = None,
**kwargs) -> Tuple[dict, InstanceList]:
"""Perform forward propagation of the head, then calculate loss and
predictions from the features and data samples.
Args:
feats_dict (dict): Contains features from the first stage.
batch_data_samples (List[:obj:`Det3DDataSample`]): The Data
samples. It usually includes information such as
`gt_instance_3d`, `gt_panoptic_seg_3d` and `gt_sem_seg_3d`.
proposal_cfg (ConfigDict, optional): Proposal config.
Returns:
tuple: the return value is a tuple contains:
- losses: (dict[str, Tensor]): A dictionary of loss components.
- predictions (list[:obj:`InstanceData`]): Detection
results of each sample after the post process.
"""
batch_gt_instances_3d = []
batch_gt_instances_ignore = []
batch_input_metas = []
for data_sample in batch_data_samples:
batch_input_metas.append(data_sample.metainfo)
batch_gt_instances_3d.append(data_sample.gt_instances_3d)
batch_gt_instances_ignore.append(
data_sample.get('ignored_instances', None))
outs = self(feats_dict['neck_feats'])
loss_inputs = outs + (batch_gt_instances_3d, batch_input_metas,
batch_gt_instances_ignore)
losses = self.loss_by_feat(*loss_inputs)
predictions = self.predict_by_feat(
*outs, batch_input_metas=batch_input_metas, cfg=proposal_cfg)
return losses, predictions
def loss_by_feat(self,
cls_scores: List[Tensor],
bbox_preds: List[Tensor],
dir_cls_preds: List[Tensor],
batch_gt_instances_3d: InstanceList,
batch_input_metas: List[dict],
batch_gt_instances_ignore: InstanceList = None) -> Dict:
"""Calculate the loss based on the features extracted by the detection
head.
Args:
cls_scores (list[torch.Tensor]): Multi-level class scores.
bbox_preds (list[torch.Tensor]): Multi-level bbox predictions.
dir_cls_preds (list[torch.Tensor]): Multi-level direction
class predictions.
batch_gt_instances_3d (list[:obj:`InstanceData`]): Batch of
gt_instances. It usually includes ``bboxes_3d`` and
``labels_3d`` attributes.
batch_input_metas (list[dict]): Contain pcd and img's meta info.
batch_gt_instances_ignore (list[:obj:`InstanceData`], optional):
Batch of gt_instances_ignore. It includes ``bboxes`` attribute
data that is ignored during training and testing.
Defaults to None.
Returns:
dict[str, list[torch.Tensor]]: Classification, bbox, and
direction losses of each level.
- loss_rpn_cls (list[torch.Tensor]): Classification losses.
- loss_rpn_bbox (list[torch.Tensor]): Box regression losses.
- loss_rpn_dir (list[torch.Tensor]): Direction classification
losses.
"""
loss_dict = super().loss_by_feat(cls_scores, bbox_preds, dir_cls_preds,
batch_gt_instances_3d,
batch_input_metas,
batch_gt_instances_ignore)
# change the loss key names to avoid conflict
return dict(
loss_rpn_cls=loss_dict['loss_cls'],
loss_rpn_bbox=loss_dict['loss_bbox'],
loss_rpn_dir=loss_dict['loss_dir'])
def class_agnostic_nms(self, mlvl_bboxes: Tensor,
mlvl_bboxes_for_nms: Tensor,
mlvl_max_scores: Tensor, mlvl_label_pred: Tensor,
mlvl_cls_score: Tensor, mlvl_dir_scores: Tensor,
score_thr: int, max_num: int, cfg: ConfigDict,
input_meta: dict) -> Dict:
"""Class agnostic nms for single batch.
Args:
......@@ -294,17 +347,53 @@ class PartA2RPNHead(Anchor3DHead):
cls_scores = cls_scores[inds]
bboxes = input_meta['box_type_3d'](
bboxes, box_dim=self.box_code_size)
return dict(
boxes_3d=bboxes,
scores_3d=scores,
labels_3d=labels,
cls_preds=cls_scores # raw scores [max_num, cls_num]
)
result = InstanceData()
result.bboxes_3d = bboxes
result.scores_3d = scores
result.labels_3d = labels
result.cls_preds = cls_scores
return result
else:
return dict(
boxes_3d=input_meta['box_type_3d'](
result = InstanceData()
result.bboxes_3d = input_meta['box_type_3d'](
mlvl_bboxes.new_zeros([0, self.box_code_size]),
box_dim=self.box_code_size),
scores_3d=mlvl_bboxes.new_zeros([0]),
labels_3d=mlvl_bboxes.new_zeros([0]),
cls_preds=mlvl_bboxes.new_zeros([0, mlvl_cls_score.shape[-1]]))
box_dim=self.box_code_size)
result.scores_3d = mlvl_bboxes.new_zeros([0])
result.labels_3d = mlvl_bboxes.new_zeros([0])
result.cls_preds = mlvl_bboxes.new_zeros(
[0, mlvl_cls_score.shape[-1]])
return result
def predict(self, feats_dict: Dict,
batch_data_samples: SampleList) -> InstanceList:
"""Perform forward propagation of the 3D detection head and predict
detection results on the features of the upstream network.
Args:
feats_dict (dict): Contains features from the first stage.
batch_data_samples (List[:obj:`Det3DDataSample`]): The Data
samples. It usually includes information such as
`gt_instance_3d`, `gt_panoptic_seg_3d` and `gt_sem_seg_3d`.
Returns:
list[:obj:`InstanceData`]: Detection results of each sample
after the post process.
Each item usually contains following keys.
- scores_3d (Tensor): Classification scores, has a shape
(num_instances, )
- labels_3d (Tensor): Labels of bboxes, has a shape
(num_instances, ).
- bboxes_3d (BaseInstance3DBoxes): Prediction of bboxes,
contains a tensor with shape (num_instances, C), where
C >= 7.
"""
batch_input_metas = [
data_samples.metainfo for data_samples in batch_data_samples
]
rpn_outs = self(feats_dict['neck_feats'])
proposal_cfg = self.test_cfg
proposal_list = self.predict_by_feat(
*rpn_outs, cfg=proposal_cfg, batch_input_metas=batch_input_metas)
return proposal_list
# Copyright (c) OpenMMLab. All rights reserved.
from typing import Dict, List, Optional
import torch
from mmcv.ops import Voxelization
from torch.nn import functional as F
......@@ -15,17 +17,17 @@ class PartA2(TwoStage3DDetector):
"""
def __init__(self,
voxel_layer,
voxel_encoder,
middle_encoder,
backbone,
neck=None,
rpn_head=None,
roi_head=None,
train_cfg=None,
test_cfg=None,
pretrained=None,
init_cfg=None):
voxel_layer: dict,
voxel_encoder: dict,
middle_encoder: dict,
backbone: dict,
neck: dict = None,
rpn_head: dict = None,
roi_head: dict = None,
train_cfg: dict = None,
test_cfg: dict = None,
init_cfg: dict = None,
data_preprocessor: Optional[dict] = None):
super(PartA2, self).__init__(
backbone=backbone,
neck=neck,
......@@ -33,14 +35,29 @@ class PartA2(TwoStage3DDetector):
roi_head=roi_head,
train_cfg=train_cfg,
test_cfg=test_cfg,
pretrained=pretrained,
init_cfg=init_cfg)
init_cfg=init_cfg,
data_preprocessor=data_preprocessor)
self.voxel_layer = Voxelization(**voxel_layer)
self.voxel_encoder = MODELS.build(voxel_encoder)
self.middle_encoder = MODELS.build(middle_encoder)
def extract_feat(self, points, img_metas):
"""Extract features from points."""
def extract_feat(self, batch_inputs_dict: Dict) -> Dict:
"""Directly extract features from the backbone+neck.
Args:
batch_inputs_dict (dict): The model input dict which include
'points', 'imgs' keys.
- points (list[torch.Tensor]): Point cloud of each sample.
- imgs (torch.Tensor, optional): Image of each sample.
Returns:
tuple[Tensor] | dict: For outside 3D object detection, we
typically obtain a tuple of features from the backbone + neck,
and for inside 3D object detection, usually a dict containing
features will be obtained.
"""
points = batch_inputs_dict['points']
voxel_dict = self.voxelize(points)
voxel_features = self.voxel_encoder(voxel_dict['voxels'],
voxel_dict['num_points'],
......@@ -52,10 +69,11 @@ class PartA2(TwoStage3DDetector):
if self.with_neck:
neck_feats = self.neck(x)
feats_dict.update({'neck_feats': neck_feats})
return feats_dict, voxel_dict
feats_dict['voxels_dict'] = voxel_dict
return feats_dict
@torch.no_grad()
def voxelize(self, points):
def voxelize(self, points: List[torch.Tensor]) -> Dict:
"""Apply hard voxelization to points."""
voxels, coors, num_points, voxel_centers = [], [], [], []
for res in points:
......@@ -84,67 +102,3 @@ class PartA2(TwoStage3DDetector):
coors=coors_batch,
voxel_centers=voxel_centers)
return voxel_dict
def forward_train(self,
points,
img_metas,
gt_bboxes_3d,
gt_labels_3d,
gt_bboxes_ignore=None,
proposals=None):
"""Training forward function.
Args:
points (list[torch.Tensor]): Point cloud of each sample.
img_metas (list[dict]): Meta information of each sample
gt_bboxes_3d (list[:obj:`BaseInstance3DBoxes`]): Ground truth
boxes for each sample.
gt_labels_3d (list[torch.Tensor]): Ground truth labels for
boxes of each sampole
gt_bboxes_ignore (list[torch.Tensor], optional): Ground truth
boxes to be ignored. Defaults to None.
Returns:
dict: Losses of each branch.
"""
feats_dict, voxels_dict = self.extract_feat(points, img_metas)
losses = dict()
if self.with_rpn:
rpn_outs = self.rpn_head(feats_dict['neck_feats'])
rpn_loss_inputs = rpn_outs + (gt_bboxes_3d, gt_labels_3d,
img_metas)
rpn_losses = self.rpn_head.loss(
*rpn_loss_inputs, gt_bboxes_ignore=gt_bboxes_ignore)
losses.update(rpn_losses)
proposal_cfg = self.train_cfg.get('rpn_proposal',
self.test_cfg.rpn)
proposal_inputs = rpn_outs + (img_metas, proposal_cfg)
proposal_list = self.rpn_head.get_bboxes(*proposal_inputs)
else:
proposal_list = proposals
roi_losses = self.roi_head.forward_train(feats_dict, voxels_dict,
img_metas, proposal_list,
gt_bboxes_3d, gt_labels_3d)
losses.update(roi_losses)
return losses
def simple_test(self, points, img_metas, proposals=None, rescale=False):
"""Test function without augmentaiton."""
feats_dict, voxels_dict = self.extract_feat(points, img_metas)
if self.with_rpn:
rpn_outs = self.rpn_head(feats_dict['neck_feats'])
proposal_cfg = self.test_cfg.rpn
bbox_inputs = rpn_outs + (img_metas, proposal_cfg)
proposal_list = self.rpn_head.get_bboxes(*bbox_inputs)
else:
proposal_list = proposals
return self.roi_head.simple_test(feats_dict, voxels_dict, img_metas,
proposal_list)
......@@ -95,7 +95,7 @@ class SingleStage3DDetector(Base3DDetector):
Returns:
list[:obj:`Det3DDataSample`]: Detection results of the
input images. Each Det3DDataSample usually contain
input samples. Each Det3DDataSample usually contain
'pred_instances_3d'. And the ``pred_instances_3d`` usually
contains following keys.
......
# Copyright (c) OpenMMLab. All rights reserved.
import copy
from typing import Union
from mmdet3d.core.utils import (ConfigType, OptConfigType, OptMultiConfig,
SampleList)
from mmdet3d.registry import MODELS
from mmdet.models import TwoStageDetector
from .base import Base3DDetector
@MODELS.register_module()
class TwoStage3DDetector(Base3DDetector, TwoStageDetector):
class TwoStage3DDetector(Base3DDetector):
"""Base class of two-stage 3D detector.
It inherits original ``:class:TwoStageDetector`` and
``:class:Base3DDetector``. This class could serve as a base class for all
two-stage 3D detectors.
It inherits original ``:class:Base3DDetector``. This class could serve as a
base class for all two-stage 3D detectors.
"""
def __init__(
self,
backbone: ConfigType,
neck: OptConfigType = None,
rpn_head: OptConfigType = None,
roi_head: OptConfigType = None,
train_cfg: OptConfigType = None,
test_cfg: OptConfigType = None,
init_cfg: OptMultiConfig = None,
data_preprocessor: OptConfigType = None,
) -> None:
super(TwoStage3DDetector, self).__init__(
data_preprocessor=data_preprocessor, init_cfg=init_cfg)
self.backbone = MODELS.build(backbone)
if neck is not None:
self.neck = MODELS.build(neck)
if rpn_head is not None:
rpn_train_cfg = train_cfg.rpn if train_cfg is not None else None
rpn_head_ = rpn_head.copy()
rpn_head_.update(train_cfg=rpn_train_cfg, test_cfg=test_cfg.rpn)
rpn_head_num_classes = rpn_head_.get('num_classes', None)
if rpn_head_num_classes is None:
rpn_head_.update(num_classes=1)
self.rpn_head = MODELS.build(rpn_head_)
if roi_head is not None:
# update train and test cfg here for now
rcnn_train_cfg = train_cfg.rcnn if train_cfg is not None else None
roi_head.update(train_cfg=rcnn_train_cfg)
roi_head.update(test_cfg=test_cfg.rcnn)
self.roi_head = MODELS.build(roi_head)
self.train_cfg = train_cfg
self.test_cfg = test_cfg
@property
def with_rpn(self) -> bool:
"""bool: whether the detector has RPN"""
return hasattr(self, 'rpn_head') and self.rpn_head is not None
@property
def with_roi_head(self) -> bool:
"""bool: whether the detector has a RoI head"""
return hasattr(self, 'roi_head') and self.roi_head is not None
def loss(self, batch_inputs_dict: dict, batch_data_samples: SampleList,
**kwargs) -> Union[dict, list]:
"""Calculate losses from a batch of inputs and data samples.
Args:
batch_inputs_dict (dict): The model input dict which include
'points', 'imgs' keys.
- points (list[torch.Tensor]): Point cloud of each sample.
- imgs (torch.Tensor, optional): Image of each sample.
batch_data_samples (List[:obj:`Det3DDataSample`]): The Data
samples. It usually includes information such as
`gt_instance_3d`, `gt_panoptic_seg_3d` and `gt_sem_seg_3d`.
Returns:
dict: A dictionary of loss components.
"""
feats_dict = self.extract_feat(batch_inputs_dict)
losses = dict()
# RPN forward and loss
if self.with_rpn:
proposal_cfg = self.train_cfg.get('rpn_proposal',
self.test_cfg.rpn)
rpn_data_samples = copy.deepcopy(batch_data_samples)
rpn_losses, rpn_results_list = self.rpn_head.loss_and_predict(
feats_dict,
rpn_data_samples,
proposal_cfg=proposal_cfg,
**kwargs)
# avoid get same name with roi_head loss
keys = rpn_losses.keys()
for key in keys:
if 'loss' in key and 'rpn' not in key:
rpn_losses[f'rpn_{key}'] = rpn_losses.pop(key)
losses.update(rpn_losses)
else:
# TODO: Not support currently, should have a check at Fast R-CNN
assert batch_data_samples[0].get('proposals', None) is not None
# use pre-defined proposals in InstanceData for the second stage
# to extract ROI features.
rpn_results_list = [
data_sample.proposals for data_sample in batch_data_samples
]
roi_losses = self.roi_head.loss(feats_dict, rpn_results_list,
batch_data_samples, **kwargs)
losses.update(roi_losses)
return losses
def predict(self, batch_inputs_dict: dict, batch_data_samples: SampleList,
**kwargs) -> SampleList:
"""Predict results from a batch of inputs and data samples with post-
processing.
Args:
batch_inputs_dict (dict): The model input dict which include
'points', 'imgs' keys.
- points (list[torch.Tensor]): Point cloud of each sample.
- imgs (torch.Tensor, optional): Image of each sample.
batch_data_samples (List[:obj:`Det3DDataSample`]): The Data
samples. It usually includes information such as
`gt_instance_3d`, `gt_panoptic_seg_3d` and `gt_sem_seg_3d`.
Returns:
list[:obj:`Det3DDataSample`]: Detection results of the
input samples. Each Det3DDataSample usually contain
'pred_instances_3d'. And the ``pred_instances_3d`` usually
contains following keys.
- scores_3d (Tensor): Classification scores, has a shape
(num_instance, )
- labels_3d (Tensor): Labels of bboxes, has a shape
(num_instances, ).
- bboxes_3d (Tensor): Contains a tensor with shape
(num_instances, C) where C >=7.
"""
feats_dict = self.extract_feat(batch_inputs_dict)
if self.with_rpn:
rpn_results_list = self.rpn_head.predict(feats_dict,
batch_data_samples)
else:
rpn_results_list = [
data_sample.proposals for data_sample in batch_data_samples
]
results_list = self.roi_head.predict(feats_dict, rpn_results_list,
batch_data_samples)
# connvert to Det3DDataSample
results_list = self.convert_to_datasample(results_list)
return results_list
def _forward(self, batch_inputs_dict: dict, batch_data_samples: SampleList,
**kwargs) -> tuple:
"""Network forward process. Usually includes backbone, neck and head
forward without any post-processing.
Args:
batch_inputs_dict (dict): The model input dict which include
'points', 'img' keys.
- points (list[torch.Tensor]): Point cloud of each sample.
- imgs (torch.Tensor, optional): Image of each sample.
batch_data_samples (List[:obj:`Det3DDataSample`]): The Data
samples. It usually includes information such as
`gt_instance_3d`, `gt_panoptic_seg_3d` and `gt_sem_seg_3d`.
Returns:
tuple: A tuple of features from ``rpn_head`` and ``roi_head``
forward.
"""
feats_dict = self.extract_feat(batch_inputs_dict)
rpn_outs = self.rpn_head.forward(feats_dict['neck_feats'])
# If there are no pre-defined proposals, use RPN to get proposals
if batch_data_samples[0].get('proposals', None) is None:
batch_input_metas = [
data_samples.metainfo for data_samples in batch_data_samples
]
rpn_results_list = self.rpn_head.predict_by_feat(
*rpn_outs, batch_input_metas=batch_input_metas)
else:
# TODO: Not checked currently.
rpn_results_list = [
data_sample.proposals for data_sample in batch_data_samples
]
def __init__(self, **kwargs):
super(TwoStage3DDetector, self).__init__(**kwargs)
# roi_head
roi_outs = self.roi_head._forward(feats_dict, rpn_results_list)
return rpn_outs + roi_outs
# Copyright (c) OpenMMLab. All rights reserved.
from abc import ABCMeta, abstractmethod
from mmdet3d.registry import MODELS, TASK_UTILS
from mmdet.models.roi_heads import BaseRoIHead
from mmcv.runner import BaseModule
class Base3DRoIHead(BaseModule, metaclass=ABCMeta):
class Base3DRoIHead(BaseRoIHead):
"""Base class for 3d RoIHeads."""
def __init__(self,
bbox_head=None,
mask_roi_extractor=None,
bbox_roi_extractor=None,
mask_head=None,
mask_roi_extractor=None,
train_cfg=None,
test_cfg=None,
pretrained=None,
init_cfg=None):
super(Base3DRoIHead, self).__init__(init_cfg=init_cfg)
self.train_cfg = train_cfg
self.test_cfg = test_cfg
if bbox_head is not None:
self.init_bbox_head(bbox_head)
if mask_head is not None:
self.init_mask_head(mask_roi_extractor, mask_head)
self.init_assigner_sampler()
@property
def with_bbox(self):
"""bool: whether the RoIHead has box head"""
return hasattr(self, 'bbox_head') and self.bbox_head is not None
@property
def with_mask(self):
"""bool: whether the RoIHead has mask head"""
return hasattr(self, 'mask_head') and self.mask_head is not None
@abstractmethod
def init_bbox_head(self):
"""Initialize the box head."""
pass
@abstractmethod
def init_mask_head(self):
"""Initialize maek head."""
pass
@abstractmethod
def init_assigner_sampler(self):
"""Initialize assigner and sampler."""
pass
@abstractmethod
def forward_train(self,
x,
img_metas,
proposal_list,
gt_bboxes,
gt_labels,
gt_bboxes_ignore=None,
**kwargs):
"""Forward function during training.
super(Base3DRoIHead, self).__init__(
bbox_head=bbox_head,
bbox_roi_extractor=bbox_roi_extractor,
mask_head=mask_head,
mask_roi_extractor=mask_roi_extractor,
train_cfg=train_cfg,
test_cfg=test_cfg,
init_cfg=init_cfg)
def init_bbox_head(self, bbox_roi_extractor: dict,
bbox_head: dict) -> None:
"""Initialize box head and box roi extractor.
Args:
x (dict): Contains features from the first stage.
img_metas (list[dict]): Meta info of each image.
proposal_list (list[dict]): Proposal information from rpn.
gt_bboxes (list[:obj:`BaseInstance3DBoxes`]):
GT bboxes of each sample. The bboxes are encapsulated
by 3D box structures.
gt_labels (list[torch.LongTensor]): GT labels of each sample.
gt_bboxes_ignore (list[torch.Tensor], optional):
Ground truth boxes to be ignored.
Returns:
dict[str, torch.Tensor]: Losses from each head.
bbox_roi_extractor (dict or ConfigDict): Config of box
roi extractor.
bbox_head (dict or ConfigDict): Config of box in box head.
"""
pass
self.bbox_roi_extractor = MODELS.build(bbox_roi_extractor)
self.bbox_head = MODELS.build(bbox_head)
def simple_test(self,
x,
proposal_list,
img_metas,
proposals=None,
rescale=False,
**kwargs):
"""Test without augmentation."""
pass
def aug_test(self, x, proposal_list, img_metas, rescale=False, **kwargs):
"""Test with augmentations.
def init_assigner_sampler(self):
"""Initialize assigner and sampler."""
self.bbox_assigner = None
self.bbox_sampler = None
if self.train_cfg:
if isinstance(self.train_cfg.assigner, dict):
self.bbox_assigner = TASK_UTILS.build(self.train_cfg.assigner)
elif isinstance(self.train_cfg.assigner, list):
self.bbox_assigner = [
TASK_UTILS.build(res) for res in self.train_cfg.assigner
]
self.bbox_sampler = TASK_UTILS.build(self.train_cfg.sampler)
If rescale is False, then returned bboxes and masks will fit the scale
of imgs[0].
"""
def init_mask_head(self):
"""Initialize mask head, skip since ``PartAggregationROIHead`` does not
have one."""
pass
# Copyright (c) OpenMMLab. All rights reserved.
from typing import List
import numpy as np
import torch
from mmcv.cnn import ConvModule, normal_init
from mmengine.data import InstanceData
from torch import Tensor
from mmdet3d.ops.spconv import IS_SPCONV2_AVAILABLE
......@@ -504,14 +508,14 @@ class PartA2BboxHead(BaseModule):
return corner_loss.mean(dim=1)
def get_bboxes(self,
rois,
cls_score,
bbox_pred,
class_labels,
class_pred,
img_metas,
cfg=None):
def get_results(self,
rois: Tensor,
cls_score: Tensor,
bbox_pred: Tensor,
class_labels: Tensor,
class_pred: Tensor,
input_metas: List[dict],
cfg: dict = None) -> List:
"""Generate bboxes from bbox head predictions.
Args:
......@@ -520,7 +524,7 @@ class PartA2BboxHead(BaseModule):
bbox_pred (torch.Tensor): Bounding boxes predictions
class_labels (torch.Tensor): Label of classes
class_pred (torch.Tensor): Score for nms.
img_metas (list[dict]): Point cloud and image's meta info.
input_metas (list[dict]): Point cloud and image's meta info.
cfg (:obj:`ConfigDict`): Testing config.
Returns:
......@@ -550,16 +554,19 @@ class PartA2BboxHead(BaseModule):
cur_rcnn_boxes3d = rcnn_boxes3d[roi_batch_id == batch_id]
keep = self.multi_class_nms(cur_box_prob, cur_rcnn_boxes3d,
cfg.score_thr, cfg.nms_thr,
img_metas[batch_id],
input_metas[batch_id],
cfg.use_rotate_nms)
selected_bboxes = cur_rcnn_boxes3d[keep]
selected_label_preds = cur_class_labels[keep]
selected_scores = cur_cls_score[keep]
result_list.append(
(img_metas[batch_id]['box_type_3d'](selected_bboxes,
self.bbox_coder.code_size),
selected_scores, selected_label_preds))
results = InstanceData()
results.bboxes_3d = input_metas[batch_id]['box_type_3d'](
selected_bboxes, self.bbox_coder.code_size)
results.scores_3d = selected_scores
results.labels_3d = selected_label_preds
result_list.append(results)
return result_list
def multi_class_nms(self,
......
......@@ -5,6 +5,7 @@ from torch import nn as nn
from torch.nn import functional as F
from mmdet3d.core.bbox.structures import rotation_3d_in_axis
from mmdet3d.core.utils import InstanceList
from mmdet3d.models.builder import build_loss
from mmdet3d.registry import MODELS
from mmdet.core import multi_apply
......@@ -127,16 +128,15 @@ class PointwiseSemanticHead(BaseModule):
part_targets = torch.clamp(part_targets, min=0)
return seg_targets, part_targets
def get_targets(self, voxels_dict, gt_bboxes_3d, gt_labels_3d):
def get_targets(self, voxel_dict: dict,
batch_gt_instances_3d: InstanceList) -> dict:
"""generate segmentation and part prediction targets.
Args:
voxel_centers (torch.Tensor): The center of voxels in shape
(voxel_num, 3).
gt_bboxes_3d (:obj:`BaseInstance3DBoxes`): Ground truth boxes in
shape (box_num, 7).
gt_labels_3d (torch.Tensor): Class labels of ground truths in
shape (box_num).
voxel_dict (dict): Contains information of voxels.
batch_gt_instances_3d (list[:obj:`InstanceData`]): Batch of
gt_instances. It usually includes ``bboxes_3d`` and
``labels_3d`` attributes.
Returns:
dict: Prediction targets
......@@ -146,12 +146,15 @@ class PointwiseSemanticHead(BaseModule):
- part_targets (torch.Tensor): Part prediction targets
with shape [voxel_num, 3].
"""
batch_size = len(gt_labels_3d)
batch_size = len(batch_gt_instances_3d)
voxel_center_list = []
gt_bboxes_3d = []
gt_labels_3d = []
for idx in range(batch_size):
coords_idx = voxels_dict['coors'][:, 0] == idx
voxel_center_list.append(voxels_dict['voxel_centers'][coords_idx])
coords_idx = voxel_dict['coors'][:, 0] == idx
voxel_center_list.append(voxel_dict['voxel_centers'][coords_idx])
gt_bboxes_3d.append(batch_gt_instances_3d[idx].bboxes_3d)
gt_labels_3d.append(batch_gt_instances_3d[idx].labels_3d)
seg_targets, part_targets = multi_apply(self.get_targets_single,
voxel_center_list,
gt_bboxes_3d, gt_labels_3d)
......
# Copyright (c) OpenMMLab. All rights reserved.
import warnings
from typing import Dict, List, Tuple
from mmcv import ConfigDict
from torch import Tensor
from torch.nn import functional as F
from mmdet3d.core import AssignResult, build_assigner, build_sampler
from mmdet3d.core.bbox import bbox3d2result, bbox3d2roi
from mmdet3d.core import AssignResult
from mmdet3d.core.bbox import bbox3d2roi
from mmdet3d.core.utils import InstanceList, SampleList
from mmdet3d.registry import MODELS
from mmdet.core.bbox import SamplingResult
from .base_3droi_head import Base3DRoIHead
......@@ -17,66 +21,42 @@ class PartAggregationROIHead(Base3DRoIHead):
semantic_head (ConfigDict): Config of semantic head.
num_classes (int): The number of classes.
seg_roi_extractor (ConfigDict): Config of seg_roi_extractor.
part_roi_extractor (ConfigDict): Config of part_roi_extractor.
bbox_roi_extractor (ConfigDict): Config of part_roi_extractor.
bbox_head (ConfigDict): Config of bbox_head.
train_cfg (ConfigDict): Training config.
test_cfg (ConfigDict): Testing config.
"""
def __init__(self,
semantic_head,
num_classes=3,
seg_roi_extractor=None,
part_roi_extractor=None,
bbox_head=None,
train_cfg=None,
test_cfg=None,
pretrained=None,
init_cfg=None):
semantic_head: dict,
num_classes: int = 3,
seg_roi_extractor: dict = None,
bbox_head: dict = None,
bbox_roi_extractor: dict = None,
train_cfg: dict = None,
test_cfg: dict = None,
init_cfg: dict = None) -> None:
super(PartAggregationROIHead, self).__init__(
bbox_head=bbox_head,
bbox_roi_extractor=bbox_roi_extractor,
train_cfg=train_cfg,
test_cfg=test_cfg,
init_cfg=init_cfg)
self.num_classes = num_classes
assert semantic_head is not None
self.semantic_head = MODELS.build(semantic_head)
self.init_seg_head(seg_roi_extractor, semantic_head)
def init_seg_head(self, seg_roi_extractor: dict,
semantic_head: dict) -> None:
"""Initialize semantic head and seg roi extractor.
if seg_roi_extractor is not None:
Args:
seg_roi_extractor (dict): Config of seg
roi extractor.
semantic_head (dict): Config of semantic head.
"""
self.semantic_head = MODELS.build(semantic_head)
self.seg_roi_extractor = MODELS.build(seg_roi_extractor)
if part_roi_extractor is not None:
self.part_roi_extractor = MODELS.build(part_roi_extractor)
self.init_assigner_sampler()
assert not (init_cfg and pretrained), \
'init_cfg and pretrained cannot be setting at the same time'
if isinstance(pretrained, str):
warnings.warn('DeprecationWarning: pretrained is a deprecated, '
'please use "init_cfg" instead')
self.init_cfg = dict(type='Pretrained', checkpoint=pretrained)
def init_mask_head(self):
"""Initialize mask head, skip since ``PartAggregationROIHead`` does not
have one."""
pass
def init_bbox_head(self, bbox_head):
"""Initialize box head."""
self.bbox_head = MODELS.build(bbox_head)
def init_assigner_sampler(self):
"""Initialize assigner and sampler."""
self.bbox_assigner = None
self.bbox_sampler = None
if self.train_cfg:
if isinstance(self.train_cfg.assigner, dict):
self.bbox_assigner = build_assigner(self.train_cfg.assigner)
elif isinstance(self.train_cfg.assigner, list):
self.bbox_assigner = [
build_assigner(res) for res in self.train_cfg.assigner
]
self.bbox_sampler = build_sampler(self.train_cfg.sampler)
@property
def with_semantic(self):
......@@ -84,98 +64,12 @@ class PartAggregationROIHead(Base3DRoIHead):
return hasattr(self,
'semantic_head') and self.semantic_head is not None
def forward_train(self, feats_dict, voxels_dict, img_metas, proposal_list,
gt_bboxes_3d, gt_labels_3d):
"""Training forward function of PartAggregationROIHead.
Args:
feats_dict (dict): Contains features from the first stage.
voxels_dict (dict): Contains information of voxels.
img_metas (list[dict]): Meta info of each image.
proposal_list (list[dict]): Proposal information from rpn.
The dictionary should contain the following keys:
- boxes_3d (:obj:`BaseInstance3DBoxes`): Proposal bboxes
- labels_3d (torch.Tensor): Labels of proposals
- cls_preds (torch.Tensor): Original scores of proposals
gt_bboxes_3d (list[:obj:`BaseInstance3DBoxes`]):
GT bboxes of each sample. The bboxes are encapsulated
by 3D box structures.
gt_labels_3d (list[LongTensor]): GT labels of each sample.
Returns:
dict: losses from each head.
- loss_semantic (torch.Tensor): loss of semantic head
- loss_bbox (torch.Tensor): loss of bboxes
"""
losses = dict()
if self.with_semantic:
semantic_results = self._semantic_forward_train(
feats_dict['seg_features'], voxels_dict, gt_bboxes_3d,
gt_labels_3d)
losses.update(semantic_results['loss_semantic'])
sample_results = self._assign_and_sample(proposal_list, gt_bboxes_3d,
gt_labels_3d)
if self.with_bbox:
bbox_results = self._bbox_forward_train(
feats_dict['seg_features'], semantic_results['part_feats'],
voxels_dict, sample_results)
losses.update(bbox_results['loss_bbox'])
return losses
def simple_test(self, feats_dict, voxels_dict, img_metas, proposal_list,
**kwargs):
"""Simple testing forward function of PartAggregationROIHead.
Note:
This function assumes that the batch size is 1
Args:
feats_dict (dict): Contains features from the first stage.
voxels_dict (dict): Contains information of voxels.
img_metas (list[dict]): Meta info of each image.
proposal_list (list[dict]): Proposal information from rpn.
Returns:
dict: Bbox results of one frame.
"""
assert self.with_bbox, 'Bbox head must be implemented.'
assert self.with_semantic
semantic_results = self.semantic_head(feats_dict['seg_features'])
rois = bbox3d2roi([res['boxes_3d'].tensor for res in proposal_list])
labels_3d = [res['labels_3d'] for res in proposal_list]
cls_preds = [res['cls_preds'] for res in proposal_list]
bbox_results = self._bbox_forward(feats_dict['seg_features'],
semantic_results['part_feats'],
voxels_dict, rois)
bbox_list = self.bbox_head.get_bboxes(
rois,
bbox_results['cls_score'],
bbox_results['bbox_pred'],
labels_3d,
cls_preds,
img_metas,
cfg=self.test_cfg)
bbox_results = [
bbox3d2result(bboxes, scores, labels)
for bboxes, scores, labels in bbox_list
]
return bbox_results
def _bbox_forward_train(self, seg_feats, part_feats, voxels_dict,
sampling_results):
def _bbox_forward_train(self, feats_dict: Dict, voxels_dict: Dict,
sampling_results: List[SamplingResult]) -> Dict:
"""Forward training function of roi_extractor and bbox_head.
Args:
seg_feats (torch.Tensor): Point-wise semantic features.
part_feats (torch.Tensor): Point-wise part prediction features.
feats_dict (dict): Contains features from the first stage.
voxels_dict (dict): Contains information of voxels.
sampling_results (:obj:`SamplingResult`): Sampled results used
for training.
......@@ -184,8 +78,7 @@ class PartAggregationROIHead(Base3DRoIHead):
dict: Forward results including losses and predictions.
"""
rois = bbox3d2roi([res.bboxes for res in sampling_results])
bbox_results = self._bbox_forward(seg_feats, part_feats, voxels_dict,
rois)
bbox_results = self._bbox_forward(feats_dict, voxels_dict, rois)
bbox_targets = self.bbox_head.get_targets(sampling_results,
self.train_cfg)
......@@ -196,45 +89,17 @@ class PartAggregationROIHead(Base3DRoIHead):
bbox_results.update(loss_bbox=loss_bbox)
return bbox_results
def _bbox_forward(self, seg_feats, part_feats, voxels_dict, rois):
"""Forward function of roi_extractor and bbox_head used in both
training and testing.
Args:
seg_feats (torch.Tensor): Point-wise semantic features.
part_feats (torch.Tensor): Point-wise part prediction features.
voxels_dict (dict): Contains information of voxels.
rois (Tensor): Roi boxes.
Returns:
dict: Contains predictions of bbox_head and
features of roi_extractor.
"""
pooled_seg_feats = self.seg_roi_extractor(seg_feats,
voxels_dict['voxel_centers'],
voxels_dict['coors'][..., 0],
rois)
pooled_part_feats = self.part_roi_extractor(
part_feats, voxels_dict['voxel_centers'],
voxels_dict['coors'][..., 0], rois)
cls_score, bbox_pred = self.bbox_head(pooled_seg_feats,
pooled_part_feats)
bbox_results = dict(
cls_score=cls_score,
bbox_pred=bbox_pred,
pooled_seg_feats=pooled_seg_feats,
pooled_part_feats=pooled_part_feats)
return bbox_results
def _assign_and_sample(self, proposal_list, gt_bboxes_3d, gt_labels_3d):
def _assign_and_sample(
self, proposal_list: InstanceList,
batch_gt_instances_3d: InstanceList) -> List[SamplingResult]:
"""Assign and sample proposals for training.
Args:
proposal_list (list[dict]): Proposals produced by RPN.
gt_bboxes_3d (list[:obj:`BaseInstance3DBoxes`]): Ground truth
boxes.
gt_labels_3d (list[torch.Tensor]): Ground truth labels
proposal_list (list[:obj:`InstancesData`]): Proposals produced by
rpn head.
batch_gt_instances_3d (list[:obj:`InstanceData`]): Batch of
gt_instances. It usually includes ``bboxes_3d`` and
``labels_3d`` attributes.
Returns:
list[:obj:`SamplingResult`]: Sampled results of each training
......@@ -244,10 +109,14 @@ class PartAggregationROIHead(Base3DRoIHead):
# bbox assign
for batch_idx in range(len(proposal_list)):
cur_proposal_list = proposal_list[batch_idx]
cur_boxes = cur_proposal_list['boxes_3d']
cur_boxes = cur_proposal_list['bboxes_3d']
cur_labels_3d = cur_proposal_list['labels_3d']
cur_gt_bboxes = gt_bboxes_3d[batch_idx].to(cur_boxes.device)
cur_gt_labels = gt_labels_3d[batch_idx]
cur_gt_instances_3d = batch_gt_instances_3d[batch_idx]
cur_gt_instances_3d.bboxes_3d = cur_gt_instances_3d.\
bboxes_3d.tensor
cur_gt_bboxes = batch_gt_instances_3d[batch_idx].bboxes_3d.to(
cur_boxes.device)
cur_gt_labels = batch_gt_instances_3d[batch_idx].labels_3d
batch_num_gts = 0
# 0 is bg
......@@ -262,9 +131,8 @@ class PartAggregationROIHead(Base3DRoIHead):
gt_per_cls = (cur_gt_labels == i)
pred_per_cls = (cur_labels_3d == i)
cur_assign_res = assigner.assign(
cur_boxes.tensor[pred_per_cls],
cur_gt_bboxes.tensor[gt_per_cls],
gt_labels=cur_gt_labels[gt_per_cls])
cur_proposal_list[pred_per_cls],
cur_gt_instances_3d[gt_per_cls])
# gather assign_results in different class into one result
batch_num_gts += cur_assign_res.num_gts
# gt inds (1-based)
......@@ -290,35 +158,218 @@ class PartAggregationROIHead(Base3DRoIHead):
batch_gt_labels)
else: # for single class
assign_result = self.bbox_assigner.assign(
cur_boxes.tensor,
cur_gt_bboxes.tensor,
gt_labels=cur_gt_labels)
cur_proposal_list, cur_gt_instances_3d)
# sample boxes
sampling_result = self.bbox_sampler.sample(assign_result,
cur_boxes.tensor,
cur_gt_bboxes.tensor,
cur_gt_bboxes,
cur_gt_labels)
sampling_results.append(sampling_result)
return sampling_results
def _semantic_forward_train(self, x, voxels_dict, gt_bboxes_3d,
gt_labels_3d):
def _semantic_forward_train(self, feats_dict: dict, voxel_dict: dict,
batch_gt_instances_3d: InstanceList) -> Dict:
"""Train semantic head.
Args:
x (torch.Tensor): Point-wise semantic features for segmentation
voxels_dict (dict): Contains information of voxels.
gt_bboxes_3d (list[:obj:`BaseInstance3DBoxes`]): Ground truth
boxes.
gt_labels_3d (list[torch.Tensor]): Ground truth labels
feats_dict (dict): Contains features from the first stage.
voxel_dict (dict): Contains information of voxels.
batch_gt_instances_3d (list[:obj:`InstanceData`]): Batch of
gt_instances. It usually includes ``bboxes_3d`` and
``labels_3d`` attributes.
Returns:
dict: Segmentation results including losses
"""
semantic_results = self.semantic_head(x)
semantic_results = self.semantic_head(feats_dict['seg_features'])
semantic_targets = self.semantic_head.get_targets(
voxels_dict, gt_bboxes_3d, gt_labels_3d)
voxel_dict, batch_gt_instances_3d)
loss_semantic = self.semantic_head.loss(semantic_results,
semantic_targets)
semantic_results.update(loss_semantic=loss_semantic)
return semantic_results
def predict(self,
feats_dict: Dict,
rpn_results_list: InstanceList,
batch_data_samples: SampleList,
rescale: bool = False,
**kwargs) -> InstanceList:
"""Perform forward propagation of the roi head and predict detection
results on the features of the upstream network.
Args:
feats_dict (dict): Contains features from the first stage.
rpn_results_list (List[:obj:`InstancesData`]): Detection results
of rpn head.
batch_data_samples (List[:obj:`Det3DDataSample`]): The Data
samples. It usually includes information such as
`gt_instance_3d`, `gt_panoptic_seg_3d` and `gt_sem_seg_3d`.
rescale (bool): If True, return boxes in original image space.
Defaults to False.
Returns:
list[:obj:`InstanceData`]: Detection results of each sample
after the post process.
Each item usually contains following keys.
- scores_3d (Tensor): Classification scores, has a shape
(num_instances, )
- labels_3d (Tensor): Labels of bboxes, has a shape
(num_instances, ).
- bboxes_3d (BaseInstance3DBoxes): Prediction of bboxes,
contains a tensor with shape (num_instances, C), where
C >= 7.
"""
assert self.with_bbox, 'Bbox head must be implemented in PartA2.'
assert self.with_semantic, 'Semantic head must be implemented' \
' in PartA2.'
batch_input_metas = [
data_samples.metainfo for data_samples in batch_data_samples
]
voxels_dict = feats_dict.pop('voxels_dict')
# TODO: Split predict semantic and bbox
results_list = self.predict_bbox(feats_dict, voxels_dict,
batch_input_metas, rpn_results_list,
self.test_cfg)
return results_list
def predict_bbox(self, feats_dict: Dict, voxel_dict: Dict,
batch_input_metas: List[dict],
rpn_results_list: InstanceList,
test_cfg: ConfigDict) -> InstanceList:
"""Perform forward propagation of the bbox head and predict detection
results on the features of the upstream network.
Args:
feats_dict (dict): Contains features from the first stage.
voxel_dict (dict): Contains information of voxels.
batch_input_metas (list[dict], Optional): Batch image meta info.
Defaults to None.
rpn_results_list (List[:obj:`InstancesData`]): Detection results
of rpn head.
test_cfg (Config): Test config.
Returns:
list[:obj:`InstanceData`]: Detection results of each sample
after the post process.
Each item usually contains following keys.
- scores_3d (Tensor): Classification scores, has a shape
(num_instances, )
- labels_3d (Tensor): Labels of bboxes, has a shape
(num_instances, ).
- bboxes_3d (BaseInstance3DBoxes): Prediction of bboxes,
contains a tensor with shape (num_instances, C), where
C >= 7.
"""
semantic_results = self.semantic_head(feats_dict['seg_features'])
feats_dict.update(semantic_results)
rois = bbox3d2roi(
[res['bboxes_3d'].tensor for res in rpn_results_list])
labels_3d = [res['labels_3d'] for res in rpn_results_list]
cls_preds = [res['cls_preds'] for res in rpn_results_list]
bbox_results = self._bbox_forward(feats_dict, voxel_dict, rois)
bbox_list = self.bbox_head.get_results(rois, bbox_results['cls_score'],
bbox_results['bbox_pred'],
labels_3d, cls_preds,
batch_input_metas, test_cfg)
return bbox_list
def _bbox_forward(self, feats_dict: Dict, voxel_dict: Dict,
rois: Tensor) -> Dict:
"""Forward function of roi_extractor and bbox_head used in both
training and testing.
Args:
feats_dict (dict): Contains features from the first stage.
voxel_dict (dict): Contains information of voxels.
rois (Tensor): Roi boxes.
Returns:
dict: Contains predictions of bbox_head and
features of roi_extractor.
"""
pooled_seg_feats = self.seg_roi_extractor(feats_dict['seg_features'],
voxel_dict['voxel_centers'],
voxel_dict['coors'][...,
0], rois)
pooled_part_feats = self.bbox_roi_extractor(
feats_dict['part_feats'], voxel_dict['voxel_centers'],
voxel_dict['coors'][..., 0], rois)
cls_score, bbox_pred = self.bbox_head(pooled_seg_feats,
pooled_part_feats)
bbox_results = dict(
cls_score=cls_score,
bbox_pred=bbox_pred,
pooled_seg_feats=pooled_seg_feats,
pooled_part_feats=pooled_part_feats)
return bbox_results
def loss(self, feats_dict: Dict, rpn_results_list: InstanceList,
batch_data_samples: SampleList, **kwargs) -> dict:
"""Perform forward propagation and loss calculation of the detection
roi on the features of the upstream network.
Args:
feats_dict (dict): Contains features from the first stage.
rpn_results_list (List[:obj:`InstancesData`]): Detection results
of rpn head.
batch_data_samples (List[:obj:`Det3DDataSample`]): The Data
samples. It usually includes information such as
`gt_instance_3d`, `gt_panoptic_seg_3d` and `gt_sem_seg_3d`.
Returns:
dict[str, Tensor]: A dictionary of loss components
"""
assert len(rpn_results_list) == len(batch_data_samples)
losses = dict()
batch_gt_instances_3d = []
batch_gt_instances_ignore = []
voxels_dict = feats_dict.pop('voxels_dict')
for data_sample in batch_data_samples:
batch_gt_instances_3d.append(data_sample.gt_instances_3d)
if 'ignored_instances' in data_sample:
batch_gt_instances_ignore.append(data_sample.ignored_instances)
else:
batch_gt_instances_ignore.append(None)
if self.with_semantic:
semantic_results = self._semantic_forward_train(
feats_dict, voxels_dict, batch_gt_instances_3d)
losses.update(semantic_results.pop('loss_semantic'))
sample_results = self._assign_and_sample(rpn_results_list,
batch_gt_instances_3d)
if self.with_bbox:
feats_dict.update(semantic_results)
bbox_results = self._bbox_forward_train(feats_dict, voxels_dict,
sample_results)
losses.update(bbox_results['loss_bbox'])
return losses
def _forward(self, feats_dict: dict,
rpn_results_list: InstanceList) -> Tuple:
"""Network forward process. Usually includes backbone, neck and head
forward without any post-processing.
Args:
feats_dict (dict): Contains features from the first stage.
rpn_results_list (List[:obj:`InstancesData`]): Detection results
of rpn head.
Returns:
tuple: A tuple of results from roi head.
"""
voxel_dict = feats_dict.pop('voxel_dict')
semantic_results = self.semantic_head(feats_dict['seg_features'])
feats_dict.update(semantic_results)
rois = bbox3d2roi(
[res['bboxes_3d'].tensor for res in rpn_results_list])
bbox_results = self._bbox_forward(feats_dict, voxel_dict, rois)
cls_score = bbox_results['cls_score']
bbox_pred = bbox_results['bbox_pred']
return cls_score, bbox_pred
# Copyright (c) OpenMMLab. All rights reserved.
import torch
from mmcv import ops
from mmcv.runner import BaseModule
from mmengine.model import BaseModule
from mmdet3d.registry import MODELS
......
import unittest
import torch
from mmengine import DefaultScope
from mmdet3d.core import LiDARInstance3DBoxes
from mmdet3d.registry import MODELS
from tests.utils.model_utils import (_create_detector_inputs,
_get_detector_cfg, _setup_seed)
class TestPartA2(unittest.TestCase):
def test_parta2(self):
import mmdet3d.models
assert hasattr(mmdet3d.models, 'PartA2')
DefaultScope.get_instance('test_parta2', scope_name='mmdet3d')
_setup_seed(0)
parta2_cfg = _get_detector_cfg(
'parta2/hv_PartA2_secfpn_2x8_cyclic_80e_kitti-3d-3class.py')
model = MODELS.build(parta2_cfg)
num_gt_instance = 50
data = [_create_detector_inputs(num_gt_instance=num_gt_instance)]
aug_data = [
_create_detector_inputs(num_gt_instance=num_gt_instance),
_create_detector_inputs(num_gt_instance=num_gt_instance + 1)
]
# test_aug_test
metainfo = {
'pcd_scale_factor': 1,
'pcd_horizontal_flip': 1,
'pcd_vertical_flip': 1,
'box_type_3d': LiDARInstance3DBoxes
}
for item in aug_data:
item['data_sample'].set_metainfo(metainfo)
if torch.cuda.is_available():
model = model.cuda()
# test simple_test
with torch.no_grad():
batch_inputs, data_samples = model.data_preprocessor(
data, True)
results = model.forward(
batch_inputs, data_samples, mode='predict')
self.assertEqual(len(results), len(data))
self.assertIn('bboxes_3d', results[0].pred_instances_3d)
self.assertIn('scores_3d', results[0].pred_instances_3d)
self.assertIn('labels_3d', results[0].pred_instances_3d)
batch_inputs, data_samples = model.data_preprocessor(
aug_data, True)
aug_results = model.forward(
batch_inputs, data_samples, mode='predict')
self.assertEqual(len(results), len(data))
self.assertIn('bboxes_3d', aug_results[0].pred_instances_3d)
self.assertIn('scores_3d', aug_results[0].pred_instances_3d)
self.assertIn('labels_3d', aug_results[0].pred_instances_3d)
self.assertIn('bboxes_3d', aug_results[1].pred_instances_3d)
self.assertIn('scores_3d', aug_results[1].pred_instances_3d)
self.assertIn('labels_3d', aug_results[1].pred_instances_3d)
losses = model.forward(batch_inputs, data_samples, mode='loss')
self.assertGreater(losses['loss_rpn_cls'][0], 0)
self.assertGreater(losses['loss_rpn_bbox'][0], 0)
self.assertGreater(losses['loss_seg'], 0)
self.assertGreater(losses['loss_part'], 0)
self.assertGreater(losses['loss_cls'], 0)
Markdown is supported
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment