Commit 3a939d7f authored by VVsssssk's avatar VVsssssk Committed by ChaimZhu
Browse files

[Refactor]refactor twostage model and parta2

parent f9ebc59b
...@@ -4,6 +4,7 @@ point_cloud_range = [0, -40, -3, 70.4, 40, 1] ...@@ -4,6 +4,7 @@ point_cloud_range = [0, -40, -3, 70.4, 40, 1]
model = dict( model = dict(
type='PartA2', type='PartA2',
data_preprocessor=dict(type='Det3DDataPreprocessor'),
voxel_layer=dict( voxel_layer=dict(
max_num_points=5, # max_points_per_voxel max_num_points=5, # max_points_per_voxel
point_cloud_range=point_cloud_range, point_cloud_range=point_cloud_range,
...@@ -46,14 +47,16 @@ model = dict( ...@@ -46,14 +47,16 @@ model = dict(
assign_per_class=True, assign_per_class=True,
bbox_coder=dict(type='DeltaXYZWLHRBBoxCoder'), bbox_coder=dict(type='DeltaXYZWLHRBBoxCoder'),
loss_cls=dict( loss_cls=dict(
type='FocalLoss', type='mmdet.FocalLoss',
use_sigmoid=True, use_sigmoid=True,
gamma=2.0, gamma=2.0,
alpha=0.25, alpha=0.25,
loss_weight=1.0), loss_weight=1.0),
loss_bbox=dict(type='SmoothL1Loss', beta=1.0 / 9.0, loss_weight=2.0), loss_bbox=dict(
type='mmdet.SmoothL1Loss', beta=1.0 / 9.0, loss_weight=2.0),
loss_dir=dict( loss_dir=dict(
type='CrossEntropyLoss', use_sigmoid=False, loss_weight=0.2)), type='mmdet.CrossEntropyLoss', use_sigmoid=False,
loss_weight=0.2)),
roi_head=dict( roi_head=dict(
type='PartAggregationROIHead', type='PartAggregationROIHead',
num_classes=3, num_classes=3,
...@@ -64,14 +67,16 @@ model = dict( ...@@ -64,14 +67,16 @@ model = dict(
seg_score_thr=0.3, seg_score_thr=0.3,
num_classes=3, num_classes=3,
loss_seg=dict( loss_seg=dict(
type='FocalLoss', type='mmdet.FocalLoss',
use_sigmoid=True, use_sigmoid=True,
reduction='sum', reduction='sum',
gamma=2.0, gamma=2.0,
alpha=0.25, alpha=0.25,
loss_weight=1.0), loss_weight=1.0),
loss_part=dict( loss_part=dict(
type='CrossEntropyLoss', use_sigmoid=True, loss_weight=1.0)), type='mmdet.CrossEntropyLoss',
use_sigmoid=True,
loss_weight=1.0)),
seg_roi_extractor=dict( seg_roi_extractor=dict(
type='Single3DRoIAwareExtractor', type='Single3DRoIAwareExtractor',
roi_layer=dict( roi_layer=dict(
...@@ -79,7 +84,7 @@ model = dict( ...@@ -79,7 +84,7 @@ model = dict(
out_size=14, out_size=14,
max_pts_per_voxel=128, max_pts_per_voxel=128,
mode='max')), mode='max')),
part_roi_extractor=dict( bbox_roi_extractor=dict(
type='Single3DRoIAwareExtractor', type='Single3DRoIAwareExtractor',
roi_layer=dict( roi_layer=dict(
type='RoIAwarePool3d', type='RoIAwarePool3d',
...@@ -103,12 +108,12 @@ model = dict( ...@@ -103,12 +108,12 @@ model = dict(
roi_feat_size=14, roi_feat_size=14,
with_corner_loss=True, with_corner_loss=True,
loss_bbox=dict( loss_bbox=dict(
type='SmoothL1Loss', type='mmdet.SmoothL1Loss',
beta=1.0 / 9.0, beta=1.0 / 9.0,
reduction='sum', reduction='sum',
loss_weight=1.0), loss_weight=1.0),
loss_cls=dict( loss_cls=dict(
type='CrossEntropyLoss', type='mmdet.CrossEntropyLoss',
use_sigmoid=True, use_sigmoid=True,
reduction='sum', reduction='sum',
loss_weight=1.0))), loss_weight=1.0))),
...@@ -117,21 +122,21 @@ model = dict( ...@@ -117,21 +122,21 @@ model = dict(
rpn=dict( rpn=dict(
assigner=[ assigner=[
dict( # for Pedestrian dict( # for Pedestrian
type='MaxIoUAssigner', type='Max3DIoUAssigner',
iou_calculator=dict(type='BboxOverlapsNearest3D'), iou_calculator=dict(type='BboxOverlapsNearest3D'),
pos_iou_thr=0.5, pos_iou_thr=0.5,
neg_iou_thr=0.35, neg_iou_thr=0.35,
min_pos_iou=0.35, min_pos_iou=0.35,
ignore_iof_thr=-1), ignore_iof_thr=-1),
dict( # for Cyclist dict( # for Cyclist
type='MaxIoUAssigner', type='Max3DIoUAssigner',
iou_calculator=dict(type='BboxOverlapsNearest3D'), iou_calculator=dict(type='BboxOverlapsNearest3D'),
pos_iou_thr=0.5, pos_iou_thr=0.5,
neg_iou_thr=0.35, neg_iou_thr=0.35,
min_pos_iou=0.35, min_pos_iou=0.35,
ignore_iof_thr=-1), ignore_iof_thr=-1),
dict( # for Car dict( # for Car
type='MaxIoUAssigner', type='Max3DIoUAssigner',
iou_calculator=dict(type='BboxOverlapsNearest3D'), iou_calculator=dict(type='BboxOverlapsNearest3D'),
pos_iou_thr=0.6, pos_iou_thr=0.6,
neg_iou_thr=0.45, neg_iou_thr=0.45,
...@@ -151,7 +156,7 @@ model = dict( ...@@ -151,7 +156,7 @@ model = dict(
rcnn=dict( rcnn=dict(
assigner=[ assigner=[
dict( # for Pedestrian dict( # for Pedestrian
type='MaxIoUAssigner', type='Max3DIoUAssigner',
iou_calculator=dict( iou_calculator=dict(
type='BboxOverlaps3D', coordinate='lidar'), type='BboxOverlaps3D', coordinate='lidar'),
pos_iou_thr=0.55, pos_iou_thr=0.55,
...@@ -159,7 +164,7 @@ model = dict( ...@@ -159,7 +164,7 @@ model = dict(
min_pos_iou=0.55, min_pos_iou=0.55,
ignore_iof_thr=-1), ignore_iof_thr=-1),
dict( # for Cyclist dict( # for Cyclist
type='MaxIoUAssigner', type='Max3DIoUAssigner',
iou_calculator=dict( iou_calculator=dict(
type='BboxOverlaps3D', coordinate='lidar'), type='BboxOverlaps3D', coordinate='lidar'),
pos_iou_thr=0.55, pos_iou_thr=0.55,
...@@ -167,7 +172,7 @@ model = dict( ...@@ -167,7 +172,7 @@ model = dict(
min_pos_iou=0.55, min_pos_iou=0.55,
ignore_iof_thr=-1), ignore_iof_thr=-1),
dict( # for Car dict( # for Car
type='MaxIoUAssigner', type='Max3DIoUAssigner',
iou_calculator=dict( iou_calculator=dict(
type='BboxOverlaps3D', coordinate='lidar'), type='BboxOverlaps3D', coordinate='lidar'),
pos_iou_thr=0.55, pos_iou_thr=0.55,
......
...@@ -38,8 +38,9 @@ train_pipeline = [ ...@@ -38,8 +38,9 @@ train_pipeline = [
dict(type='ObjectRangeFilter', point_cloud_range=point_cloud_range), dict(type='ObjectRangeFilter', point_cloud_range=point_cloud_range),
dict(type='ObjectNameFilter', classes=class_names), dict(type='ObjectNameFilter', classes=class_names),
dict(type='PointShuffle'), dict(type='PointShuffle'),
dict(type='DefaultFormatBundle3D', class_names=class_names), dict(
dict(type='Collect3D', keys=['points', 'gt_bboxes_3d', 'gt_labels_3d']) type='Pack3DDetInputs',
keys=['points', 'gt_bboxes_3d', 'gt_labels_3d'])
] ]
test_pipeline = [ test_pipeline = [
dict(type='LoadPointsFromFile', coord_type='LIDAR', load_dim=4, use_dim=4), dict(type='LoadPointsFromFile', coord_type='LIDAR', load_dim=4, use_dim=4),
...@@ -57,66 +58,70 @@ test_pipeline = [ ...@@ -57,66 +58,70 @@ test_pipeline = [
dict(type='RandomFlip3D'), dict(type='RandomFlip3D'),
dict( dict(
type='PointsRangeFilter', point_cloud_range=point_cloud_range), type='PointsRangeFilter', point_cloud_range=point_cloud_range),
dict( dict(type='Pack3DDetInputs', keys=['points'])
type='DefaultFormatBundle3D',
class_names=class_names,
with_label=False),
dict(type='Collect3D', keys=['points'])
]) ])
] ]
# construct a pipeline for data and gt loading in show function # construct a pipeline for data and gt loading in show function
# please keep its loading function consistent with test_pipeline (e.g. client) # please keep its loading function consistent with test_pipeline (e.g. client)
eval_pipeline = [ eval_pipeline = [
dict(type='LoadPointsFromFile', coord_type='LIDAR', load_dim=4, use_dim=4), dict(type='LoadPointsFromFile', coord_type='LIDAR', load_dim=4, use_dim=4),
dict( dict(type='Pack3DDetInputs', keys=['points'])
type='DefaultFormatBundle3D',
class_names=class_names,
with_label=False),
dict(type='Collect3D', keys=['points'])
] ]
train_dataloader = dict(
data = dict( batch_size=2,
samples_per_gpu=2, num_workers=2,
workers_per_gpu=2, persistent_workers=True,
train=dict( sampler=dict(type='DefaultSampler', shuffle=True),
dataset=dict(
type='RepeatDataset', type='RepeatDataset',
times=2, times=2,
dataset=dict( dataset=dict(
type=dataset_type, type=dataset_type,
data_root=data_root, data_root=data_root,
ann_file=data_root + 'kitti_infos_train.pkl', ann_file='kitti_infos_train.pkl',
split='training', data_prefix=dict(pts='training/velodyne_reduced'),
pts_prefix='velodyne_reduced',
pipeline=train_pipeline, pipeline=train_pipeline,
modality=input_modality, modality=input_modality,
classes=class_names, metainfo=dict(CLASSES=class_names),
box_type_3d='LiDAR', box_type_3d='LiDAR',
test_mode=False)), test_mode=False)))
val=dict( test_dataloader = dict(
batch_size=1,
num_workers=1,
persistent_workers=True,
drop_last=False,
sampler=dict(type='DefaultSampler', shuffle=False),
dataset=dict(
type=dataset_type, type=dataset_type,
data_root=data_root, data_root=data_root,
ann_file=data_root + 'kitti_infos_val.pkl', ann_file='kitti_infos_val.pkl',
split='training', data_prefix=dict(pts='training/velodyne_reduced'),
pts_prefix='velodyne_reduced',
pipeline=test_pipeline, pipeline=test_pipeline,
modality=input_modality, modality=input_modality,
classes=class_names, metainfo=dict(CLASSES=class_names),
box_type_3d='LiDAR', box_type_3d='LiDAR',
test_mode=True), test_mode=True))
test=dict( val_dataloader = dict(
batch_size=1,
num_workers=1,
persistent_workers=True,
drop_last=False,
sampler=dict(type='DefaultSampler', shuffle=False),
dataset=dict(
type=dataset_type, type=dataset_type,
data_root=data_root, data_root=data_root,
ann_file=data_root + 'kitti_infos_val.pkl', ann_file='kitti_infos_val.pkl',
split='training', data_prefix=dict(pts='training/velodyne_reduced'),
pts_prefix='velodyne_reduced', pipeline=eval_pipeline,
pipeline=test_pipeline,
modality=input_modality, modality=input_modality,
classes=class_names, metainfo=dict(CLASSES=class_names),
box_type_3d='LiDAR', box_type_3d='LiDAR',
test_mode=True)) test_mode=True))
val_evaluator = dict(
type='KittiMetric',
ann_file=data_root + 'kitti_infos_val.pkl',
metric='bbox')
test_evaluator = val_evaluator
# Part-A2 uses a different learning rate from what SECOND uses. # Part-A2 uses a different learning rate from what SECOND uses.
lr = 0.001 optim_wrapper = dict(optimizer=dict(lr=0.001))
optimizer = dict(lr=lr)
evaluation = dict(pipeline=eval_pipeline)
find_unused_parameters = True find_unused_parameters = True
...@@ -22,7 +22,7 @@ model = dict( ...@@ -22,7 +22,7 @@ model = dict(
_delete_=True, _delete_=True,
rpn=dict( rpn=dict(
assigner=dict( assigner=dict(
type='MaxIoUAssigner', type='Max3DIoUAssigner',
iou_calculator=dict(type='BboxOverlapsNearest3D'), iou_calculator=dict(type='BboxOverlapsNearest3D'),
pos_iou_thr=0.6, pos_iou_thr=0.6,
neg_iou_thr=0.45, neg_iou_thr=0.45,
...@@ -40,7 +40,7 @@ model = dict( ...@@ -40,7 +40,7 @@ model = dict(
use_rotate_nms=False), use_rotate_nms=False),
rcnn=dict( rcnn=dict(
assigner=dict( # for Car assigner=dict( # for Car
type='MaxIoUAssigner', type='Max3DIoUAssigner',
iou_calculator=dict(type='BboxOverlaps3D', coordinate='lidar'), iou_calculator=dict(type='BboxOverlaps3D', coordinate='lidar'),
pos_iou_thr=0.55, pos_iou_thr=0.55,
neg_iou_thr=0.55, neg_iou_thr=0.55,
...@@ -102,8 +102,9 @@ train_pipeline = [ ...@@ -102,8 +102,9 @@ train_pipeline = [
dict(type='ObjectRangeFilter', point_cloud_range=point_cloud_range), dict(type='ObjectRangeFilter', point_cloud_range=point_cloud_range),
dict(type='ObjectNameFilter', classes=class_names), dict(type='ObjectNameFilter', classes=class_names),
dict(type='PointShuffle'), dict(type='PointShuffle'),
dict(type='DefaultFormatBundle3D', class_names=class_names), dict(
dict(type='Collect3D', keys=['points', 'gt_bboxes_3d', 'gt_labels_3d']) type='Pack3DDetInputs',
keys=['points', 'gt_bboxes_3d', 'gt_labels_3d'])
] ]
test_pipeline = [ test_pipeline = [
dict(type='LoadPointsFromFile', coord_type='LIDAR', load_dim=4, use_dim=4), dict(type='LoadPointsFromFile', coord_type='LIDAR', load_dim=4, use_dim=4),
...@@ -129,9 +130,11 @@ test_pipeline = [ ...@@ -129,9 +130,11 @@ test_pipeline = [
]) ])
] ]
data = dict( train_dataloader = dict(
train=dict(dataset=dict(pipeline=train_pipeline, classes=class_names)), dataset=dict(
val=dict(pipeline=test_pipeline, classes=class_names), dataset=dict(
test=dict(pipeline=test_pipeline, classes=class_names)) pipeline=train_pipeline, metainfo=dict(CLASSES=class_names))))
test_dataloader = dict(
dataset=dict(pipeline=test_pipeline, metainfo=dict(CLASSES=class_names)))
val_dataloader = dict(dataset=dict(metainfo=dict(CLASSES=class_names)))
find_unused_parameters = True find_unused_parameters = True
...@@ -115,7 +115,10 @@ class Max3DIoUAssigner(MaxIoUAssigner): ...@@ -115,7 +115,10 @@ class Max3DIoUAssigner(MaxIoUAssigner):
>>> assert torch.all(assign_result.gt_inds == expected_gt_inds) >>> assert torch.all(assign_result.gt_inds == expected_gt_inds)
""" """
gt_bboxes = gt_instances.bboxes_3d gt_bboxes = gt_instances.bboxes_3d
if 'priors' in pred_instances:
priors = pred_instances.priors priors = pred_instances.priors
else:
priors = pred_instances.bboxes_3d.tensor
gt_labels = gt_instances.labels_3d gt_labels = gt_instances.labels_3d
if gt_instances_ignore is not None: if gt_instances_ignore is not None:
gt_bboxes_ignore = gt_instances_ignore.bboxes_3d gt_bboxes_ignore = gt_instances_ignore.bboxes_3d
......
...@@ -369,8 +369,8 @@ class Anchor3DHead(Base3DDenseHead, AnchorTrainMixin): ...@@ -369,8 +369,8 @@ class Anchor3DHead(Base3DDenseHead, AnchorTrainMixin):
dir_cls_preds (list[torch.Tensor]): Multi-level direction dir_cls_preds (list[torch.Tensor]): Multi-level direction
class predictions. class predictions.
batch_gt_instances_3d (list[:obj:`InstanceData`]): Batch of batch_gt_instances_3d (list[:obj:`InstanceData`]): Batch of
gt_instances. It usually includes ``bboxes`` and ``labels`` gt_instances. It usually includes ``bboxes_3d``
attributes. and ``labels_3d`` attributes.
batch_input_metas (list[dict]): Contain pcd and img's meta info. batch_input_metas (list[dict]): Contain pcd and img's meta info.
batch_gt_instances_ignore (list[:obj:`InstanceData`], optional): batch_gt_instances_ignore (list[:obj:`InstanceData`], optional):
Batch of gt_instances_ignore. It includes ``bboxes`` attribute Batch of gt_instances_ignore. It includes ``bboxes`` attribute
......
# Copyright (c) OpenMMLab. All rights reserved. # Copyright (c) OpenMMLab. All rights reserved.
from typing import Dict, List, Tuple
import numpy as np import numpy as np
import torch import torch
from mmcv.runner import force_fp32 from mmcv import ConfigDict
from mmengine.data import InstanceData
from torch import Tensor
from mmdet3d.core import limit_period, xywhr2xyxyr from mmdet3d.core import limit_period, xywhr2xyxyr
from mmdet3d.core.post_processing import nms_bev, nms_normal_bev from mmdet3d.core.post_processing import nms_bev, nms_normal_bev
from mmdet3d.core.utils import InstanceList, SampleList
from mmdet3d.registry import MODELS from mmdet3d.registry import MODELS
from .anchor3d_head import Anchor3DHead from .anchor3d_head import Anchor3DHead
...@@ -48,13 +53,13 @@ class PartA2RPNHead(Anchor3DHead): ...@@ -48,13 +53,13 @@ class PartA2RPNHead(Anchor3DHead):
""" """
def __init__(self, def __init__(self,
num_classes, num_classes: int,
in_channels, in_channels: int,
train_cfg, train_cfg: ConfigDict,
test_cfg, test_cfg: ConfigDict,
feat_channels=256, feat_channels: int = 256,
use_direction_classifier=True, use_direction_classifier: bool = True,
anchor_generator=dict( anchor_generator: Dict = dict(
type='Anchor3DRangeGenerator', type='Anchor3DRangeGenerator',
range=[0, -39.68, -1.78, 69.12, 39.68, -1.78], range=[0, -39.68, -1.78, 69.12, 39.68, -1.78],
strides=[2], strides=[2],
...@@ -62,83 +67,45 @@ class PartA2RPNHead(Anchor3DHead): ...@@ -62,83 +67,45 @@ class PartA2RPNHead(Anchor3DHead):
rotations=[0, 1.57], rotations=[0, 1.57],
custom_values=[], custom_values=[],
reshape_out=False), reshape_out=False),
assigner_per_size=False, assigner_per_size: bool = False,
assign_per_class=False, assign_per_class: bool = False,
diff_rad_by_sin=True, diff_rad_by_sin: bool = True,
dir_offset=-np.pi / 2, dir_offset: float = -np.pi / 2,
dir_limit_offset=0, dir_limit_offset: float = 0,
bbox_coder=dict(type='DeltaXYZWLHRBBoxCoder'), bbox_coder: Dict = dict(type='DeltaXYZWLHRBBoxCoder'),
loss_cls=dict( loss_cls: Dict = dict(
type='CrossEntropyLoss', type='mmdet.CrossEntropyLoss',
use_sigmoid=True, use_sigmoid=True,
loss_weight=1.0), loss_weight=1.0),
loss_bbox=dict( loss_bbox: Dict = dict(
type='SmoothL1Loss', beta=1.0 / 9.0, loss_weight=2.0), type='mmdet.SmoothL1Loss',
loss_dir=dict(type='CrossEntropyLoss', loss_weight=0.2), beta=1.0 / 9.0,
init_cfg=None): loss_weight=2.0),
super().__init__(num_classes, in_channels, train_cfg, test_cfg, loss_dir: Dict = dict(
feat_channels, use_direction_classifier, type='mmdet.CrossEntropyLoss', loss_weight=0.2),
anchor_generator, assigner_per_size, assign_per_class, init_cfg: Dict = None) -> None:
diff_rad_by_sin, dir_offset, dir_limit_offset, super().__init__(num_classes, in_channels, feat_channels,
bbox_coder, loss_cls, loss_bbox, loss_dir, init_cfg) use_direction_classifier, anchor_generator,
assigner_per_size, assign_per_class, diff_rad_by_sin,
@force_fp32(apply_to=('cls_scores', 'bbox_preds', 'dir_cls_preds')) dir_offset, dir_limit_offset, bbox_coder, loss_cls,
def loss(self, loss_bbox, loss_dir, train_cfg, test_cfg, init_cfg)
cls_scores,
bbox_preds,
dir_cls_preds,
gt_bboxes,
gt_labels,
input_metas,
gt_bboxes_ignore=None):
"""Calculate losses.
Args:
cls_scores (list[torch.Tensor]): Multi-level class scores.
bbox_preds (list[torch.Tensor]): Multi-level bbox predictions.
dir_cls_preds (list[torch.Tensor]): Multi-level direction
class predictions.
gt_bboxes (list[:obj:`BaseInstance3DBoxes`]): Ground truth boxes
of each sample.
gt_labels (list[torch.Tensor]): Labels of each sample.
input_metas (list[dict]): Point cloud and image's meta info.
gt_bboxes_ignore (list[torch.Tensor]): Specify
which bounding.
Returns:
dict[str, list[torch.Tensor]]: Classification, bbox, and
direction losses of each level.
- loss_rpn_cls (list[torch.Tensor]): Classification losses.
- loss_rpn_bbox (list[torch.Tensor]): Box regression losses.
- loss_rpn_dir (list[torch.Tensor]): Direction classification
losses.
"""
loss_dict = super().loss(cls_scores, bbox_preds, dir_cls_preds,
gt_bboxes, gt_labels, input_metas,
gt_bboxes_ignore)
# change the loss key names to avoid conflict
return dict(
loss_rpn_cls=loss_dict['loss_cls'],
loss_rpn_bbox=loss_dict['loss_bbox'],
loss_rpn_dir=loss_dict['loss_dir'])
def get_bboxes_single(self, def _predict_by_feat_single(self,
cls_scores, cls_score_list: List[Tensor],
bbox_preds, bbox_pred_list: List[Tensor],
dir_cls_preds, dir_cls_pred_list: List[Tensor],
mlvl_anchors, mlvl_priors: List[Tensor],
input_meta, input_meta: List[dict],
cfg, cfg: ConfigDict,
rescale=False): rescale: List[Tensor] = False):
"""Get bboxes of single branch. """Get bboxes of single branch.
Args: Args:
cls_scores (torch.Tensor): Class score in single batch. cls_score_list (torch.Tensor): Class score in single batch.
bbox_preds (torch.Tensor): Bbox prediction in single batch. bbox_pred_list (torch.Tensor): Bbox prediction in single batch.
dir_cls_preds (torch.Tensor): Predictions of direction class dir_cls_pred_list (torch.Tensor): Predictions of direction class
in single batch. in single batch.
mlvl_anchors (List[torch.Tensor]): Multi-level anchors mlvl_priors (List[torch.Tensor]): Multi-level anchors
in single batch. in single batch.
input_meta (list[dict]): Contain pcd and img's meta info. input_meta (list[dict]): Contain pcd and img's meta info.
cfg (:obj:`ConfigDict`): Training or testing config. cfg (:obj:`ConfigDict`): Training or testing config.
...@@ -152,14 +119,15 @@ class PartA2RPNHead(Anchor3DHead): ...@@ -152,14 +119,15 @@ class PartA2RPNHead(Anchor3DHead):
- labels_3d (torch.Tensor): Label of each bbox. - labels_3d (torch.Tensor): Label of each bbox.
- cls_preds (torch.Tensor): Class score of each bbox. - cls_preds (torch.Tensor): Class score of each bbox.
""" """
assert len(cls_scores) == len(bbox_preds) == len(mlvl_anchors) assert len(cls_score_list) == len(bbox_pred_list) == len(mlvl_priors)
mlvl_bboxes = [] mlvl_bboxes = []
mlvl_max_scores = [] mlvl_max_scores = []
mlvl_label_pred = [] mlvl_label_pred = []
mlvl_dir_scores = [] mlvl_dir_scores = []
mlvl_cls_score = [] mlvl_cls_score = []
for cls_score, bbox_pred, dir_cls_pred, anchors in zip( for cls_score, bbox_pred, dir_cls_pred, anchors in zip(
cls_scores, bbox_preds, dir_cls_preds, mlvl_anchors): cls_score_list, bbox_pred_list, dir_cls_pred_list,
mlvl_priors):
assert cls_score.size()[-2:] == bbox_pred.size()[-2:] assert cls_score.size()[-2:] == bbox_pred.size()[-2:]
assert cls_score.size()[-2:] == dir_cls_pred.size()[-2:] assert cls_score.size()[-2:] == dir_cls_pred.size()[-2:]
dir_cls_pred = dir_cls_pred.permute(1, 2, 0).reshape(-1, 2) dir_cls_pred = dir_cls_pred.permute(1, 2, 0).reshape(-1, 2)
...@@ -216,13 +184,98 @@ class PartA2RPNHead(Anchor3DHead): ...@@ -216,13 +184,98 @@ class PartA2RPNHead(Anchor3DHead):
mlvl_cls_score, mlvl_dir_scores, mlvl_cls_score, mlvl_dir_scores,
score_thr, cfg.nms_post, cfg, score_thr, cfg.nms_post, cfg,
input_meta) input_meta)
return result return result
def class_agnostic_nms(self, mlvl_bboxes, mlvl_bboxes_for_nms, def loss_and_predict(self,
mlvl_max_scores, mlvl_label_pred, mlvl_cls_score, feats_dict: Dict,
mlvl_dir_scores, score_thr, max_num, cfg, batch_data_samples: SampleList,
input_meta): proposal_cfg: ConfigDict = None,
**kwargs) -> Tuple[dict, InstanceList]:
"""Perform forward propagation of the head, then calculate loss and
predictions from the features and data samples.
Args:
feats_dict (dict): Contains features from the first stage.
batch_data_samples (List[:obj:`Det3DDataSample`]): The Data
samples. It usually includes information such as
`gt_instance_3d`, `gt_panoptic_seg_3d` and `gt_sem_seg_3d`.
proposal_cfg (ConfigDict, optional): Proposal config.
Returns:
tuple: the return value is a tuple contains:
- losses: (dict[str, Tensor]): A dictionary of loss components.
- predictions (list[:obj:`InstanceData`]): Detection
results of each sample after the post process.
"""
batch_gt_instances_3d = []
batch_gt_instances_ignore = []
batch_input_metas = []
for data_sample in batch_data_samples:
batch_input_metas.append(data_sample.metainfo)
batch_gt_instances_3d.append(data_sample.gt_instances_3d)
batch_gt_instances_ignore.append(
data_sample.get('ignored_instances', None))
outs = self(feats_dict['neck_feats'])
loss_inputs = outs + (batch_gt_instances_3d, batch_input_metas,
batch_gt_instances_ignore)
losses = self.loss_by_feat(*loss_inputs)
predictions = self.predict_by_feat(
*outs, batch_input_metas=batch_input_metas, cfg=proposal_cfg)
return losses, predictions
def loss_by_feat(self,
cls_scores: List[Tensor],
bbox_preds: List[Tensor],
dir_cls_preds: List[Tensor],
batch_gt_instances_3d: InstanceList,
batch_input_metas: List[dict],
batch_gt_instances_ignore: InstanceList = None) -> Dict:
"""Calculate the loss based on the features extracted by the detection
head.
Args:
cls_scores (list[torch.Tensor]): Multi-level class scores.
bbox_preds (list[torch.Tensor]): Multi-level bbox predictions.
dir_cls_preds (list[torch.Tensor]): Multi-level direction
class predictions.
batch_gt_instances_3d (list[:obj:`InstanceData`]): Batch of
gt_instances. It usually includes ``bboxes_3d`` and
``labels_3d`` attributes.
batch_input_metas (list[dict]): Contain pcd and img's meta info.
batch_gt_instances_ignore (list[:obj:`InstanceData`], optional):
Batch of gt_instances_ignore. It includes ``bboxes`` attribute
data that is ignored during training and testing.
Defaults to None.
Returns:
dict[str, list[torch.Tensor]]: Classification, bbox, and
direction losses of each level.
- loss_rpn_cls (list[torch.Tensor]): Classification losses.
- loss_rpn_bbox (list[torch.Tensor]): Box regression losses.
- loss_rpn_dir (list[torch.Tensor]): Direction classification
losses.
"""
loss_dict = super().loss_by_feat(cls_scores, bbox_preds, dir_cls_preds,
batch_gt_instances_3d,
batch_input_metas,
batch_gt_instances_ignore)
# change the loss key names to avoid conflict
return dict(
loss_rpn_cls=loss_dict['loss_cls'],
loss_rpn_bbox=loss_dict['loss_bbox'],
loss_rpn_dir=loss_dict['loss_dir'])
def class_agnostic_nms(self, mlvl_bboxes: Tensor,
mlvl_bboxes_for_nms: Tensor,
mlvl_max_scores: Tensor, mlvl_label_pred: Tensor,
mlvl_cls_score: Tensor, mlvl_dir_scores: Tensor,
score_thr: int, max_num: int, cfg: ConfigDict,
input_meta: dict) -> Dict:
"""Class agnostic nms for single batch. """Class agnostic nms for single batch.
Args: Args:
...@@ -294,17 +347,53 @@ class PartA2RPNHead(Anchor3DHead): ...@@ -294,17 +347,53 @@ class PartA2RPNHead(Anchor3DHead):
cls_scores = cls_scores[inds] cls_scores = cls_scores[inds]
bboxes = input_meta['box_type_3d']( bboxes = input_meta['box_type_3d'](
bboxes, box_dim=self.box_code_size) bboxes, box_dim=self.box_code_size)
return dict( result = InstanceData()
boxes_3d=bboxes, result.bboxes_3d = bboxes
scores_3d=scores, result.scores_3d = scores
labels_3d=labels, result.labels_3d = labels
cls_preds=cls_scores # raw scores [max_num, cls_num] result.cls_preds = cls_scores
) return result
else: else:
return dict( result = InstanceData()
boxes_3d=input_meta['box_type_3d']( result.bboxes_3d = input_meta['box_type_3d'](
mlvl_bboxes.new_zeros([0, self.box_code_size]), mlvl_bboxes.new_zeros([0, self.box_code_size]),
box_dim=self.box_code_size), box_dim=self.box_code_size)
scores_3d=mlvl_bboxes.new_zeros([0]), result.scores_3d = mlvl_bboxes.new_zeros([0])
labels_3d=mlvl_bboxes.new_zeros([0]), result.labels_3d = mlvl_bboxes.new_zeros([0])
cls_preds=mlvl_bboxes.new_zeros([0, mlvl_cls_score.shape[-1]])) result.cls_preds = mlvl_bboxes.new_zeros(
[0, mlvl_cls_score.shape[-1]])
return result
def predict(self, feats_dict: Dict,
batch_data_samples: SampleList) -> InstanceList:
"""Perform forward propagation of the 3D detection head and predict
detection results on the features of the upstream network.
Args:
feats_dict (dict): Contains features from the first stage.
batch_data_samples (List[:obj:`Det3DDataSample`]): The Data
samples. It usually includes information such as
`gt_instance_3d`, `gt_panoptic_seg_3d` and `gt_sem_seg_3d`.
Returns:
list[:obj:`InstanceData`]: Detection results of each sample
after the post process.
Each item usually contains following keys.
- scores_3d (Tensor): Classification scores, has a shape
(num_instances, )
- labels_3d (Tensor): Labels of bboxes, has a shape
(num_instances, ).
- bboxes_3d (BaseInstance3DBoxes): Prediction of bboxes,
contains a tensor with shape (num_instances, C), where
C >= 7.
"""
batch_input_metas = [
data_samples.metainfo for data_samples in batch_data_samples
]
rpn_outs = self(feats_dict['neck_feats'])
proposal_cfg = self.test_cfg
proposal_list = self.predict_by_feat(
*rpn_outs, cfg=proposal_cfg, batch_input_metas=batch_input_metas)
return proposal_list
# Copyright (c) OpenMMLab. All rights reserved. # Copyright (c) OpenMMLab. All rights reserved.
from typing import Dict, List, Optional
import torch import torch
from mmcv.ops import Voxelization from mmcv.ops import Voxelization
from torch.nn import functional as F from torch.nn import functional as F
...@@ -15,17 +17,17 @@ class PartA2(TwoStage3DDetector): ...@@ -15,17 +17,17 @@ class PartA2(TwoStage3DDetector):
""" """
def __init__(self, def __init__(self,
voxel_layer, voxel_layer: dict,
voxel_encoder, voxel_encoder: dict,
middle_encoder, middle_encoder: dict,
backbone, backbone: dict,
neck=None, neck: dict = None,
rpn_head=None, rpn_head: dict = None,
roi_head=None, roi_head: dict = None,
train_cfg=None, train_cfg: dict = None,
test_cfg=None, test_cfg: dict = None,
pretrained=None, init_cfg: dict = None,
init_cfg=None): data_preprocessor: Optional[dict] = None):
super(PartA2, self).__init__( super(PartA2, self).__init__(
backbone=backbone, backbone=backbone,
neck=neck, neck=neck,
...@@ -33,14 +35,29 @@ class PartA2(TwoStage3DDetector): ...@@ -33,14 +35,29 @@ class PartA2(TwoStage3DDetector):
roi_head=roi_head, roi_head=roi_head,
train_cfg=train_cfg, train_cfg=train_cfg,
test_cfg=test_cfg, test_cfg=test_cfg,
pretrained=pretrained, init_cfg=init_cfg,
init_cfg=init_cfg) data_preprocessor=data_preprocessor)
self.voxel_layer = Voxelization(**voxel_layer) self.voxel_layer = Voxelization(**voxel_layer)
self.voxel_encoder = MODELS.build(voxel_encoder) self.voxel_encoder = MODELS.build(voxel_encoder)
self.middle_encoder = MODELS.build(middle_encoder) self.middle_encoder = MODELS.build(middle_encoder)
def extract_feat(self, points, img_metas): def extract_feat(self, batch_inputs_dict: Dict) -> Dict:
"""Extract features from points.""" """Directly extract features from the backbone+neck.
Args:
batch_inputs_dict (dict): The model input dict which include
'points', 'imgs' keys.
- points (list[torch.Tensor]): Point cloud of each sample.
- imgs (torch.Tensor, optional): Image of each sample.
Returns:
tuple[Tensor] | dict: For outside 3D object detection, we
typically obtain a tuple of features from the backbone + neck,
and for inside 3D object detection, usually a dict containing
features will be obtained.
"""
points = batch_inputs_dict['points']
voxel_dict = self.voxelize(points) voxel_dict = self.voxelize(points)
voxel_features = self.voxel_encoder(voxel_dict['voxels'], voxel_features = self.voxel_encoder(voxel_dict['voxels'],
voxel_dict['num_points'], voxel_dict['num_points'],
...@@ -52,10 +69,11 @@ class PartA2(TwoStage3DDetector): ...@@ -52,10 +69,11 @@ class PartA2(TwoStage3DDetector):
if self.with_neck: if self.with_neck:
neck_feats = self.neck(x) neck_feats = self.neck(x)
feats_dict.update({'neck_feats': neck_feats}) feats_dict.update({'neck_feats': neck_feats})
return feats_dict, voxel_dict feats_dict['voxels_dict'] = voxel_dict
return feats_dict
@torch.no_grad() @torch.no_grad()
def voxelize(self, points): def voxelize(self, points: List[torch.Tensor]) -> Dict:
"""Apply hard voxelization to points.""" """Apply hard voxelization to points."""
voxels, coors, num_points, voxel_centers = [], [], [], [] voxels, coors, num_points, voxel_centers = [], [], [], []
for res in points: for res in points:
...@@ -84,67 +102,3 @@ class PartA2(TwoStage3DDetector): ...@@ -84,67 +102,3 @@ class PartA2(TwoStage3DDetector):
coors=coors_batch, coors=coors_batch,
voxel_centers=voxel_centers) voxel_centers=voxel_centers)
return voxel_dict return voxel_dict
def forward_train(self,
points,
img_metas,
gt_bboxes_3d,
gt_labels_3d,
gt_bboxes_ignore=None,
proposals=None):
"""Training forward function.
Args:
points (list[torch.Tensor]): Point cloud of each sample.
img_metas (list[dict]): Meta information of each sample
gt_bboxes_3d (list[:obj:`BaseInstance3DBoxes`]): Ground truth
boxes for each sample.
gt_labels_3d (list[torch.Tensor]): Ground truth labels for
boxes of each sampole
gt_bboxes_ignore (list[torch.Tensor], optional): Ground truth
boxes to be ignored. Defaults to None.
Returns:
dict: Losses of each branch.
"""
feats_dict, voxels_dict = self.extract_feat(points, img_metas)
losses = dict()
if self.with_rpn:
rpn_outs = self.rpn_head(feats_dict['neck_feats'])
rpn_loss_inputs = rpn_outs + (gt_bboxes_3d, gt_labels_3d,
img_metas)
rpn_losses = self.rpn_head.loss(
*rpn_loss_inputs, gt_bboxes_ignore=gt_bboxes_ignore)
losses.update(rpn_losses)
proposal_cfg = self.train_cfg.get('rpn_proposal',
self.test_cfg.rpn)
proposal_inputs = rpn_outs + (img_metas, proposal_cfg)
proposal_list = self.rpn_head.get_bboxes(*proposal_inputs)
else:
proposal_list = proposals
roi_losses = self.roi_head.forward_train(feats_dict, voxels_dict,
img_metas, proposal_list,
gt_bboxes_3d, gt_labels_3d)
losses.update(roi_losses)
return losses
def simple_test(self, points, img_metas, proposals=None, rescale=False):
"""Test function without augmentaiton."""
feats_dict, voxels_dict = self.extract_feat(points, img_metas)
if self.with_rpn:
rpn_outs = self.rpn_head(feats_dict['neck_feats'])
proposal_cfg = self.test_cfg.rpn
bbox_inputs = rpn_outs + (img_metas, proposal_cfg)
proposal_list = self.rpn_head.get_bboxes(*bbox_inputs)
else:
proposal_list = proposals
return self.roi_head.simple_test(feats_dict, voxels_dict, img_metas,
proposal_list)
...@@ -95,7 +95,7 @@ class SingleStage3DDetector(Base3DDetector): ...@@ -95,7 +95,7 @@ class SingleStage3DDetector(Base3DDetector):
Returns: Returns:
list[:obj:`Det3DDataSample`]: Detection results of the list[:obj:`Det3DDataSample`]: Detection results of the
input images. Each Det3DDataSample usually contain input samples. Each Det3DDataSample usually contain
'pred_instances_3d'. And the ``pred_instances_3d`` usually 'pred_instances_3d'. And the ``pred_instances_3d`` usually
contains following keys. contains following keys.
......
# Copyright (c) OpenMMLab. All rights reserved. # Copyright (c) OpenMMLab. All rights reserved.
import copy
from typing import Union
from mmdet3d.core.utils import (ConfigType, OptConfigType, OptMultiConfig,
SampleList)
from mmdet3d.registry import MODELS from mmdet3d.registry import MODELS
from mmdet.models import TwoStageDetector
from .base import Base3DDetector from .base import Base3DDetector
@MODELS.register_module() @MODELS.register_module()
class TwoStage3DDetector(Base3DDetector, TwoStageDetector): class TwoStage3DDetector(Base3DDetector):
"""Base class of two-stage 3D detector. """Base class of two-stage 3D detector.
It inherits original ``:class:TwoStageDetector`` and It inherits original ``:class:Base3DDetector``. This class could serve as a
``:class:Base3DDetector``. This class could serve as a base class for all base class for all two-stage 3D detectors.
two-stage 3D detectors. """
def __init__(
self,
backbone: ConfigType,
neck: OptConfigType = None,
rpn_head: OptConfigType = None,
roi_head: OptConfigType = None,
train_cfg: OptConfigType = None,
test_cfg: OptConfigType = None,
init_cfg: OptMultiConfig = None,
data_preprocessor: OptConfigType = None,
) -> None:
super(TwoStage3DDetector, self).__init__(
data_preprocessor=data_preprocessor, init_cfg=init_cfg)
self.backbone = MODELS.build(backbone)
if neck is not None:
self.neck = MODELS.build(neck)
if rpn_head is not None:
rpn_train_cfg = train_cfg.rpn if train_cfg is not None else None
rpn_head_ = rpn_head.copy()
rpn_head_.update(train_cfg=rpn_train_cfg, test_cfg=test_cfg.rpn)
rpn_head_num_classes = rpn_head_.get('num_classes', None)
if rpn_head_num_classes is None:
rpn_head_.update(num_classes=1)
self.rpn_head = MODELS.build(rpn_head_)
if roi_head is not None:
# update train and test cfg here for now
rcnn_train_cfg = train_cfg.rcnn if train_cfg is not None else None
roi_head.update(train_cfg=rcnn_train_cfg)
roi_head.update(test_cfg=test_cfg.rcnn)
self.roi_head = MODELS.build(roi_head)
self.train_cfg = train_cfg
self.test_cfg = test_cfg
@property
def with_rpn(self) -> bool:
"""bool: whether the detector has RPN"""
return hasattr(self, 'rpn_head') and self.rpn_head is not None
@property
def with_roi_head(self) -> bool:
"""bool: whether the detector has a RoI head"""
return hasattr(self, 'roi_head') and self.roi_head is not None
def loss(self, batch_inputs_dict: dict, batch_data_samples: SampleList,
**kwargs) -> Union[dict, list]:
"""Calculate losses from a batch of inputs and data samples.
Args:
batch_inputs_dict (dict): The model input dict which include
'points', 'imgs' keys.
- points (list[torch.Tensor]): Point cloud of each sample.
- imgs (torch.Tensor, optional): Image of each sample.
batch_data_samples (List[:obj:`Det3DDataSample`]): The Data
samples. It usually includes information such as
`gt_instance_3d`, `gt_panoptic_seg_3d` and `gt_sem_seg_3d`.
Returns:
dict: A dictionary of loss components.
"""
feats_dict = self.extract_feat(batch_inputs_dict)
losses = dict()
# RPN forward and loss
if self.with_rpn:
proposal_cfg = self.train_cfg.get('rpn_proposal',
self.test_cfg.rpn)
rpn_data_samples = copy.deepcopy(batch_data_samples)
rpn_losses, rpn_results_list = self.rpn_head.loss_and_predict(
feats_dict,
rpn_data_samples,
proposal_cfg=proposal_cfg,
**kwargs)
# avoid get same name with roi_head loss
keys = rpn_losses.keys()
for key in keys:
if 'loss' in key and 'rpn' not in key:
rpn_losses[f'rpn_{key}'] = rpn_losses.pop(key)
losses.update(rpn_losses)
else:
# TODO: Not support currently, should have a check at Fast R-CNN
assert batch_data_samples[0].get('proposals', None) is not None
# use pre-defined proposals in InstanceData for the second stage
# to extract ROI features.
rpn_results_list = [
data_sample.proposals for data_sample in batch_data_samples
]
roi_losses = self.roi_head.loss(feats_dict, rpn_results_list,
batch_data_samples, **kwargs)
losses.update(roi_losses)
return losses
def predict(self, batch_inputs_dict: dict, batch_data_samples: SampleList,
**kwargs) -> SampleList:
"""Predict results from a batch of inputs and data samples with post-
processing.
Args:
batch_inputs_dict (dict): The model input dict which include
'points', 'imgs' keys.
- points (list[torch.Tensor]): Point cloud of each sample.
- imgs (torch.Tensor, optional): Image of each sample.
batch_data_samples (List[:obj:`Det3DDataSample`]): The Data
samples. It usually includes information such as
`gt_instance_3d`, `gt_panoptic_seg_3d` and `gt_sem_seg_3d`.
Returns:
list[:obj:`Det3DDataSample`]: Detection results of the
input samples. Each Det3DDataSample usually contain
'pred_instances_3d'. And the ``pred_instances_3d`` usually
contains following keys.
- scores_3d (Tensor): Classification scores, has a shape
(num_instance, )
- labels_3d (Tensor): Labels of bboxes, has a shape
(num_instances, ).
- bboxes_3d (Tensor): Contains a tensor with shape
(num_instances, C) where C >=7.
""" """
feats_dict = self.extract_feat(batch_inputs_dict)
if self.with_rpn:
rpn_results_list = self.rpn_head.predict(feats_dict,
batch_data_samples)
else:
rpn_results_list = [
data_sample.proposals for data_sample in batch_data_samples
]
results_list = self.roi_head.predict(feats_dict, rpn_results_list,
batch_data_samples)
# connvert to Det3DDataSample
results_list = self.convert_to_datasample(results_list)
return results_list
def _forward(self, batch_inputs_dict: dict, batch_data_samples: SampleList,
**kwargs) -> tuple:
"""Network forward process. Usually includes backbone, neck and head
forward without any post-processing.
Args:
batch_inputs_dict (dict): The model input dict which include
'points', 'img' keys.
- points (list[torch.Tensor]): Point cloud of each sample.
- imgs (torch.Tensor, optional): Image of each sample.
batch_data_samples (List[:obj:`Det3DDataSample`]): The Data
samples. It usually includes information such as
`gt_instance_3d`, `gt_panoptic_seg_3d` and `gt_sem_seg_3d`.
Returns:
tuple: A tuple of features from ``rpn_head`` and ``roi_head``
forward.
"""
feats_dict = self.extract_feat(batch_inputs_dict)
rpn_outs = self.rpn_head.forward(feats_dict['neck_feats'])
# If there are no pre-defined proposals, use RPN to get proposals
if batch_data_samples[0].get('proposals', None) is None:
batch_input_metas = [
data_samples.metainfo for data_samples in batch_data_samples
]
rpn_results_list = self.rpn_head.predict_by_feat(
*rpn_outs, batch_input_metas=batch_input_metas)
else:
# TODO: Not checked currently.
rpn_results_list = [
data_sample.proposals for data_sample in batch_data_samples
]
def __init__(self, **kwargs): # roi_head
super(TwoStage3DDetector, self).__init__(**kwargs) roi_outs = self.roi_head._forward(feats_dict, rpn_results_list)
return rpn_outs + roi_outs
# Copyright (c) OpenMMLab. All rights reserved. # Copyright (c) OpenMMLab. All rights reserved.
from abc import ABCMeta, abstractmethod from mmdet3d.registry import MODELS, TASK_UTILS
from mmdet.models.roi_heads import BaseRoIHead
from mmcv.runner import BaseModule
class Base3DRoIHead(BaseRoIHead):
class Base3DRoIHead(BaseModule, metaclass=ABCMeta):
"""Base class for 3d RoIHeads.""" """Base class for 3d RoIHeads."""
def __init__(self, def __init__(self,
bbox_head=None, bbox_head=None,
mask_roi_extractor=None, bbox_roi_extractor=None,
mask_head=None, mask_head=None,
mask_roi_extractor=None,
train_cfg=None, train_cfg=None,
test_cfg=None, test_cfg=None,
pretrained=None,
init_cfg=None): init_cfg=None):
super(Base3DRoIHead, self).__init__(init_cfg=init_cfg) super(Base3DRoIHead, self).__init__(
self.train_cfg = train_cfg bbox_head=bbox_head,
self.test_cfg = test_cfg bbox_roi_extractor=bbox_roi_extractor,
mask_head=mask_head,
if bbox_head is not None: mask_roi_extractor=mask_roi_extractor,
self.init_bbox_head(bbox_head) train_cfg=train_cfg,
test_cfg=test_cfg,
if mask_head is not None: init_cfg=init_cfg)
self.init_mask_head(mask_roi_extractor, mask_head)
def init_bbox_head(self, bbox_roi_extractor: dict,
self.init_assigner_sampler() bbox_head: dict) -> None:
"""Initialize box head and box roi extractor.
@property
def with_bbox(self):
"""bool: whether the RoIHead has box head"""
return hasattr(self, 'bbox_head') and self.bbox_head is not None
@property
def with_mask(self):
"""bool: whether the RoIHead has mask head"""
return hasattr(self, 'mask_head') and self.mask_head is not None
@abstractmethod
def init_bbox_head(self):
"""Initialize the box head."""
pass
@abstractmethod
def init_mask_head(self):
"""Initialize maek head."""
pass
@abstractmethod
def init_assigner_sampler(self):
"""Initialize assigner and sampler."""
pass
@abstractmethod
def forward_train(self,
x,
img_metas,
proposal_list,
gt_bboxes,
gt_labels,
gt_bboxes_ignore=None,
**kwargs):
"""Forward function during training.
Args: Args:
x (dict): Contains features from the first stage. bbox_roi_extractor (dict or ConfigDict): Config of box
img_metas (list[dict]): Meta info of each image. roi extractor.
proposal_list (list[dict]): Proposal information from rpn. bbox_head (dict or ConfigDict): Config of box in box head.
gt_bboxes (list[:obj:`BaseInstance3DBoxes`]):
GT bboxes of each sample. The bboxes are encapsulated
by 3D box structures.
gt_labels (list[torch.LongTensor]): GT labels of each sample.
gt_bboxes_ignore (list[torch.Tensor], optional):
Ground truth boxes to be ignored.
Returns:
dict[str, torch.Tensor]: Losses from each head.
""" """
pass self.bbox_roi_extractor = MODELS.build(bbox_roi_extractor)
self.bbox_head = MODELS.build(bbox_head)
def simple_test(self, def init_assigner_sampler(self):
x, """Initialize assigner and sampler."""
proposal_list, self.bbox_assigner = None
img_metas, self.bbox_sampler = None
proposals=None, if self.train_cfg:
rescale=False, if isinstance(self.train_cfg.assigner, dict):
**kwargs): self.bbox_assigner = TASK_UTILS.build(self.train_cfg.assigner)
"""Test without augmentation.""" elif isinstance(self.train_cfg.assigner, list):
pass self.bbox_assigner = [
TASK_UTILS.build(res) for res in self.train_cfg.assigner
def aug_test(self, x, proposal_list, img_metas, rescale=False, **kwargs): ]
"""Test with augmentations. self.bbox_sampler = TASK_UTILS.build(self.train_cfg.sampler)
If rescale is False, then returned bboxes and masks will fit the scale def init_mask_head(self):
of imgs[0]. """Initialize mask head, skip since ``PartAggregationROIHead`` does not
""" have one."""
pass pass
# Copyright (c) OpenMMLab. All rights reserved. # Copyright (c) OpenMMLab. All rights reserved.
from typing import List
import numpy as np import numpy as np
import torch import torch
from mmcv.cnn import ConvModule, normal_init from mmcv.cnn import ConvModule, normal_init
from mmengine.data import InstanceData
from torch import Tensor
from mmdet3d.ops.spconv import IS_SPCONV2_AVAILABLE from mmdet3d.ops.spconv import IS_SPCONV2_AVAILABLE
...@@ -504,14 +508,14 @@ class PartA2BboxHead(BaseModule): ...@@ -504,14 +508,14 @@ class PartA2BboxHead(BaseModule):
return corner_loss.mean(dim=1) return corner_loss.mean(dim=1)
def get_bboxes(self, def get_results(self,
rois, rois: Tensor,
cls_score, cls_score: Tensor,
bbox_pred, bbox_pred: Tensor,
class_labels, class_labels: Tensor,
class_pred, class_pred: Tensor,
img_metas, input_metas: List[dict],
cfg=None): cfg: dict = None) -> List:
"""Generate bboxes from bbox head predictions. """Generate bboxes from bbox head predictions.
Args: Args:
...@@ -520,7 +524,7 @@ class PartA2BboxHead(BaseModule): ...@@ -520,7 +524,7 @@ class PartA2BboxHead(BaseModule):
bbox_pred (torch.Tensor): Bounding boxes predictions bbox_pred (torch.Tensor): Bounding boxes predictions
class_labels (torch.Tensor): Label of classes class_labels (torch.Tensor): Label of classes
class_pred (torch.Tensor): Score for nms. class_pred (torch.Tensor): Score for nms.
img_metas (list[dict]): Point cloud and image's meta info. input_metas (list[dict]): Point cloud and image's meta info.
cfg (:obj:`ConfigDict`): Testing config. cfg (:obj:`ConfigDict`): Testing config.
Returns: Returns:
...@@ -550,16 +554,19 @@ class PartA2BboxHead(BaseModule): ...@@ -550,16 +554,19 @@ class PartA2BboxHead(BaseModule):
cur_rcnn_boxes3d = rcnn_boxes3d[roi_batch_id == batch_id] cur_rcnn_boxes3d = rcnn_boxes3d[roi_batch_id == batch_id]
keep = self.multi_class_nms(cur_box_prob, cur_rcnn_boxes3d, keep = self.multi_class_nms(cur_box_prob, cur_rcnn_boxes3d,
cfg.score_thr, cfg.nms_thr, cfg.score_thr, cfg.nms_thr,
img_metas[batch_id], input_metas[batch_id],
cfg.use_rotate_nms) cfg.use_rotate_nms)
selected_bboxes = cur_rcnn_boxes3d[keep] selected_bboxes = cur_rcnn_boxes3d[keep]
selected_label_preds = cur_class_labels[keep] selected_label_preds = cur_class_labels[keep]
selected_scores = cur_cls_score[keep] selected_scores = cur_cls_score[keep]
result_list.append( results = InstanceData()
(img_metas[batch_id]['box_type_3d'](selected_bboxes, results.bboxes_3d = input_metas[batch_id]['box_type_3d'](
self.bbox_coder.code_size), selected_bboxes, self.bbox_coder.code_size)
selected_scores, selected_label_preds)) results.scores_3d = selected_scores
results.labels_3d = selected_label_preds
result_list.append(results)
return result_list return result_list
def multi_class_nms(self, def multi_class_nms(self,
......
...@@ -5,6 +5,7 @@ from torch import nn as nn ...@@ -5,6 +5,7 @@ from torch import nn as nn
from torch.nn import functional as F from torch.nn import functional as F
from mmdet3d.core.bbox.structures import rotation_3d_in_axis from mmdet3d.core.bbox.structures import rotation_3d_in_axis
from mmdet3d.core.utils import InstanceList
from mmdet3d.models.builder import build_loss from mmdet3d.models.builder import build_loss
from mmdet3d.registry import MODELS from mmdet3d.registry import MODELS
from mmdet.core import multi_apply from mmdet.core import multi_apply
...@@ -127,16 +128,15 @@ class PointwiseSemanticHead(BaseModule): ...@@ -127,16 +128,15 @@ class PointwiseSemanticHead(BaseModule):
part_targets = torch.clamp(part_targets, min=0) part_targets = torch.clamp(part_targets, min=0)
return seg_targets, part_targets return seg_targets, part_targets
def get_targets(self, voxels_dict, gt_bboxes_3d, gt_labels_3d): def get_targets(self, voxel_dict: dict,
batch_gt_instances_3d: InstanceList) -> dict:
"""generate segmentation and part prediction targets. """generate segmentation and part prediction targets.
Args: Args:
voxel_centers (torch.Tensor): The center of voxels in shape voxel_dict (dict): Contains information of voxels.
(voxel_num, 3). batch_gt_instances_3d (list[:obj:`InstanceData`]): Batch of
gt_bboxes_3d (:obj:`BaseInstance3DBoxes`): Ground truth boxes in gt_instances. It usually includes ``bboxes_3d`` and
shape (box_num, 7). ``labels_3d`` attributes.
gt_labels_3d (torch.Tensor): Class labels of ground truths in
shape (box_num).
Returns: Returns:
dict: Prediction targets dict: Prediction targets
...@@ -146,12 +146,15 @@ class PointwiseSemanticHead(BaseModule): ...@@ -146,12 +146,15 @@ class PointwiseSemanticHead(BaseModule):
- part_targets (torch.Tensor): Part prediction targets - part_targets (torch.Tensor): Part prediction targets
with shape [voxel_num, 3]. with shape [voxel_num, 3].
""" """
batch_size = len(gt_labels_3d) batch_size = len(batch_gt_instances_3d)
voxel_center_list = [] voxel_center_list = []
gt_bboxes_3d = []
gt_labels_3d = []
for idx in range(batch_size): for idx in range(batch_size):
coords_idx = voxels_dict['coors'][:, 0] == idx coords_idx = voxel_dict['coors'][:, 0] == idx
voxel_center_list.append(voxels_dict['voxel_centers'][coords_idx]) voxel_center_list.append(voxel_dict['voxel_centers'][coords_idx])
gt_bboxes_3d.append(batch_gt_instances_3d[idx].bboxes_3d)
gt_labels_3d.append(batch_gt_instances_3d[idx].labels_3d)
seg_targets, part_targets = multi_apply(self.get_targets_single, seg_targets, part_targets = multi_apply(self.get_targets_single,
voxel_center_list, voxel_center_list,
gt_bboxes_3d, gt_labels_3d) gt_bboxes_3d, gt_labels_3d)
......
# Copyright (c) OpenMMLab. All rights reserved. # Copyright (c) OpenMMLab. All rights reserved.
import torch import torch
from mmcv import ops from mmcv import ops
from mmcv.runner import BaseModule from mmengine.model import BaseModule
from mmdet3d.registry import MODELS from mmdet3d.registry import MODELS
......
import unittest
import torch
from mmengine import DefaultScope
from mmdet3d.core import LiDARInstance3DBoxes
from mmdet3d.registry import MODELS
from tests.utils.model_utils import (_create_detector_inputs,
_get_detector_cfg, _setup_seed)
class TestPartA2(unittest.TestCase):
def test_parta2(self):
import mmdet3d.models
assert hasattr(mmdet3d.models, 'PartA2')
DefaultScope.get_instance('test_parta2', scope_name='mmdet3d')
_setup_seed(0)
parta2_cfg = _get_detector_cfg(
'parta2/hv_PartA2_secfpn_2x8_cyclic_80e_kitti-3d-3class.py')
model = MODELS.build(parta2_cfg)
num_gt_instance = 50
data = [_create_detector_inputs(num_gt_instance=num_gt_instance)]
aug_data = [
_create_detector_inputs(num_gt_instance=num_gt_instance),
_create_detector_inputs(num_gt_instance=num_gt_instance + 1)
]
# test_aug_test
metainfo = {
'pcd_scale_factor': 1,
'pcd_horizontal_flip': 1,
'pcd_vertical_flip': 1,
'box_type_3d': LiDARInstance3DBoxes
}
for item in aug_data:
item['data_sample'].set_metainfo(metainfo)
if torch.cuda.is_available():
model = model.cuda()
# test simple_test
with torch.no_grad():
batch_inputs, data_samples = model.data_preprocessor(
data, True)
results = model.forward(
batch_inputs, data_samples, mode='predict')
self.assertEqual(len(results), len(data))
self.assertIn('bboxes_3d', results[0].pred_instances_3d)
self.assertIn('scores_3d', results[0].pred_instances_3d)
self.assertIn('labels_3d', results[0].pred_instances_3d)
batch_inputs, data_samples = model.data_preprocessor(
aug_data, True)
aug_results = model.forward(
batch_inputs, data_samples, mode='predict')
self.assertEqual(len(results), len(data))
self.assertIn('bboxes_3d', aug_results[0].pred_instances_3d)
self.assertIn('scores_3d', aug_results[0].pred_instances_3d)
self.assertIn('labels_3d', aug_results[0].pred_instances_3d)
self.assertIn('bboxes_3d', aug_results[1].pred_instances_3d)
self.assertIn('scores_3d', aug_results[1].pred_instances_3d)
self.assertIn('labels_3d', aug_results[1].pred_instances_3d)
losses = model.forward(batch_inputs, data_samples, mode='loss')
self.assertGreater(losses['loss_rpn_cls'][0], 0)
self.assertGreater(losses['loss_rpn_bbox'][0], 0)
self.assertGreater(losses['loss_seg'], 0)
self.assertGreater(losses['loss_part'], 0)
self.assertGreater(losses['loss_cls'], 0)
Markdown is supported
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment