Unverified Commit 6c03a971 authored by Tai-Wang's avatar Tai-Wang Committed by GitHub
Browse files

Release v1.1.0rc1

Release v1.1.0rc1
parents 9611c2d0 ca42c312
# Copyright (c) OpenMMLab. All rights reserved.
import numpy as np
import torch
from mmdet3d.models.layers.fusion_layers.point_fusion import (point_sample,
voxel_sample)
from mmdet3d.registry import MODELS, TASK_UTILS
from mmdet3d.structures.bbox_3d.utils import get_lidar2img
from mmdet3d.structures.det3d_data_sample import SampleList
from mmdet3d.utils import ConfigType, OptConfigType
from mmdet.models.detectors import BaseDetector
from .dfm import DfM
from .imvoxelnet import ImVoxelNet
@MODELS.register_module()
class MultiViewDfM(ImVoxelNet, DfM):
r"""Waymo challenge solution of `MV-FCOS3D++
<https://arxiv.org/abs/2207.12716>`_.
Args:
backbone (:obj:`ConfigDict` or dict): The backbone config.
neck (:obj:`ConfigDict` or dict): The neck config.
backbone_stereo (:obj:`ConfigDict` or dict): The stereo backbone
config.
backbone_3d (:obj:`ConfigDict` or dict): The 3d backbone config.
neck_3d (:obj:`ConfigDict` or dict): The 3D neck config.
bbox_head (:obj:`ConfigDict` or dict): The bbox head config.
voxel_size (:obj:`ConfigDict` or dict): The voxel size.
anchor_generator (:obj:`ConfigDict` or dict): The anchor generator
config.
neck_2d (:obj:`ConfigDict` or dict, optional): The 2D neck config
for 2D object detection. Defaults to None.
bbox_head_2d (:obj:`ConfigDict` or dict, optional): The 2D bbox
head config for 2D object detection. Defaults to None.
depth_head_2d (:obj:`ConfigDict` or dict, optional): The 2D depth
head config for depth estimation in fov space. Defaults to None.
depth_head (:obj:`ConfigDict` or dict, optional): The depth head
config for depth estimation in 3D voxel projected to fov space .
train_cfg (:obj:`ConfigDict` or dict, optional): Config dict of
training hyper-parameters. Defaults to None.
test_cfg (:obj:`ConfigDict` or dict, optional): Config dict of test
hyper-parameters. Defaults to None.
data_preprocessor (dict or ConfigDict, optional): The pre-process
config of :class:`BaseDataPreprocessor`. it usually includes,
``pad_size_divisor``, ``pad_value``, ``mean`` and ``std``.
valid_sample (bool): Whether to filter invalid points in view
transformation. Defaults to True.
temporal_aggregate (str): Key to determine the aggregation way in
temporal fusion. Defaults to 'concat'.
transform_depth (bool): Key to determine the transformation of depth.
Defaults to True.
init_cfg (:obj:`ConfigDict` or dict, optional): The initialization
config. Defaults to None.
"""
def __init__(self,
backbone: ConfigType,
neck: ConfigType,
backbone_stereo: ConfigType,
backbone_3d: ConfigType,
neck_3d: ConfigType,
bbox_head: ConfigType,
voxel_size: ConfigType,
anchor_generator: ConfigType,
neck_2d: ConfigType = None,
bbox_head_2d: ConfigType = None,
depth_head_2d: ConfigType = None,
depth_head: ConfigType = None,
train_cfg: OptConfigType = None,
test_cfg: OptConfigType = None,
data_preprocessor: OptConfigType = None,
valid_sample: bool = True,
temporal_aggregate: str = 'concat',
transform_depth: bool = True,
init_cfg: OptConfigType = None):
# TODO merge with DFM
BaseDetector.__init__(
self, data_preprocessor=data_preprocessor, init_cfg=init_cfg)
self.backbone = MODELS.build(backbone)
self.neck = MODELS.build(neck)
if backbone_stereo is not None:
backbone_stereo.update(cat_img_feature=self.neck.cat_img_feature)
backbone_stereo.update(in_sem_channels=self.neck.sem_channels[-1])
self.backbone_stereo = MODELS.build(backbone_stereo)
assert self.neck.cat_img_feature == \
self.backbone_stereo.cat_img_feature
assert self.neck.sem_channels[
-1] == self.backbone_stereo.in_sem_channels
if backbone_3d is not None:
self.backbone_3d = MODELS.build(backbone_3d)
if neck_3d is not None:
self.neck_3d = MODELS.build(neck_3d)
if neck_2d is not None:
self.neck_2d = MODELS.build(neck_2d)
if bbox_head_2d is not None:
self.bbox_head_2d = MODELS.build(bbox_head_2d)
if depth_head_2d is not None:
self.depth_head_2d = MODELS.build(depth_head_2d)
if depth_head is not None:
self.depth_head = MODELS.build(depth_head)
self.depth_samples = self.depth_head.depth_samples
self.train_cfg = train_cfg
self.test_cfg = test_cfg
bbox_head.update(train_cfg=train_cfg)
bbox_head.update(test_cfg=test_cfg)
self.bbox_head = MODELS.build(bbox_head)
self.voxel_size = voxel_size
self.voxel_range = anchor_generator['ranges'][0]
self.n_voxels = [
round((self.voxel_range[3] - self.voxel_range[0]) /
self.voxel_size[0]),
round((self.voxel_range[4] - self.voxel_range[1]) /
self.voxel_size[1]),
round((self.voxel_range[5] - self.voxel_range[2]) /
self.voxel_size[2])
]
self.anchor_generator = TASK_UTILS.build(anchor_generator)
self.valid_sample = valid_sample
self.temporal_aggregate = temporal_aggregate
self.transform_depth = transform_depth
def extract_feat(self, batch_inputs_dict: dict,
batch_data_samples: SampleList):
"""Extract 3d features from the backbone -> fpn -> 3d projection.
Args:
batch_inputs_dict (dict): The model input dict which include
the 'imgs' key.
- imgs (torch.Tensor, optional): Image of each sample.
batch_data_samples (list[:obj:`DetDataSample`]): The batch
data samples. It usually includes information such
as `gt_instance` or `gt_panoptic_seg` or `gt_sem_seg`.
Returns:
torch.Tensor: of shape (N, C_out, N_x, N_y, N_z)
"""
# TODO: Nt means the number of frames temporally
# num_views means the number of views of a frame
img = batch_inputs_dict['imgs']
batch_img_metas = [
data_samples.metainfo for data_samples in batch_data_samples
]
batch_size, _, C_in, H, W = img.shape
num_views = batch_img_metas[0]['num_views']
num_ref_frames = batch_img_metas[0]['num_ref_frames']
if num_ref_frames > 0:
num_frames = num_ref_frames + 1
else:
num_frames = 1
input_shape = img.shape[-2:]
# NOTE: input_shape is the largest pad_shape of the batch of images
for img_meta in batch_img_metas:
img_meta.update(input_shape=input_shape)
if num_ref_frames > 0:
cur_imgs = img[:, :num_views].reshape(-1, C_in, H, W)
prev_imgs = img[:, num_views:].reshape(-1, C_in, H, W)
cur_feats = self.backbone(cur_imgs)
cur_feats = self.neck(cur_feats)[0]
with torch.no_grad():
prev_feats = self.backbone(prev_imgs)
prev_feats = self.neck(prev_feats)[0]
_, C_feat, H_feat, W_feat = cur_feats.shape
cur_feats = cur_feats.view(batch_size, -1, C_feat, H_feat, W_feat)
prev_feats = prev_feats.view(batch_size, -1, C_feat, H_feat,
W_feat)
batch_feats = torch.cat([cur_feats, prev_feats], dim=1)
else:
batch_imgs = img.view(-1, C_in, H, W)
batch_feats = self.backbone(batch_imgs)
# TODO: support SPP module neck
batch_feats = self.neck(batch_feats)[0]
_, C_feat, H_feat, W_feat = batch_feats.shape
batch_feats = batch_feats.view(batch_size, -1, C_feat, H_feat,
W_feat)
# transform the feature to voxel & stereo space
transform_feats = self.feature_transformation(batch_feats,
batch_img_metas,
num_views, num_frames)
if self.with_depth_head_2d:
transform_feats += (batch_feats[:, :num_views], )
return transform_feats
def feature_transformation(self, batch_feats, batch_img_metas, num_views,
num_frames):
"""Feature transformation from perspective view to BEV.
Args:
batch_feats (torch.Tensor): Perspective view features of shape
(batch_size, num_views, C, H, W).
batch_img_metas (list[dict]): Image meta information. Each element
corresponds to a group of images. len(img_metas) == B.
num_views (int): Number of views.
num_frames (int): Number of consecutive frames.
Returns:
tuple[torch.Tensor]: Volume features and (optionally) stereo \
features.
"""
# TODO: support more complicated 2D feature sampling
points = self.anchor_generator.grid_anchors(
[self.n_voxels[::-1]], device=batch_feats.device)[0][:, :3]
volumes = []
img_scale_factors = []
img_flips = []
img_crop_offsets = []
for feature, img_meta in zip(batch_feats, batch_img_metas):
# TODO: remove feature sampling from back
# TODO: support different scale_factors/flip/crop_offset for
# different views
frame_volume = []
frame_valid_nums = []
for frame_idx in range(num_frames):
volume = []
valid_flags = []
if isinstance(img_meta['img_shape'], list):
img_shape = img_meta['img_shape'][frame_idx][:2]
else:
img_shape = img_meta['img_shape'][:2]
for view_idx in range(num_views):
sample_idx = frame_idx * num_views + view_idx
if 'scale_factor' in img_meta:
img_scale_factor = img_meta['scale_factor'][sample_idx]
if isinstance(img_scale_factor, np.ndarray) and \
len(img_meta['scale_factor']) >= 2:
img_scale_factor = (
points.new_tensor(img_scale_factor[:2]))
else:
img_scale_factor = (
points.new_tensor(img_scale_factor))
else:
img_scale_factor = (1)
img_flip = img_meta['flip'][sample_idx] \
if 'flip' in img_meta.keys() else False
img_crop_offset = (
points.new_tensor(
img_meta['img_crop_offset'][sample_idx])
if 'img_crop_offset' in img_meta.keys() else 0)
lidar2cam = points.new_tensor(
img_meta['lidar2cam'][sample_idx])
cam2img = points.new_tensor(
img_meta['ori_cam2img'][sample_idx])
# align the precision, the tensor is converted to float32
lidar2img = get_lidar2img(cam2img.double(),
lidar2cam.double())
lidar2img = lidar2img.float()
sample_results = point_sample(
img_meta,
img_features=feature[sample_idx][None, ...],
points=points,
proj_mat=lidar2img,
coord_type='LIDAR',
img_scale_factor=img_scale_factor,
img_crop_offset=img_crop_offset,
img_flip=img_flip,
img_pad_shape=img_meta['input_shape'],
img_shape=img_shape,
aligned=False,
valid_flag=self.valid_sample)
if self.valid_sample:
volume.append(sample_results[0])
valid_flags.append(sample_results[1])
else:
volume.append(sample_results)
# TODO: save valid flags, more reasonable feat fusion
if self.valid_sample:
valid_nums = torch.stack(
valid_flags, dim=0).sum(0) # (N, )
volume = torch.stack(volume, dim=0).sum(0)
valid_mask = valid_nums > 0
volume[~valid_mask] = 0
frame_valid_nums.append(valid_nums)
else:
volume = torch.stack(volume, dim=0).mean(0)
frame_volume.append(volume)
img_scale_factors.append(img_scale_factor)
img_flips.append(img_flip)
img_crop_offsets.append(img_crop_offset)
if self.valid_sample:
if self.temporal_aggregate == 'mean':
frame_volume = torch.stack(frame_volume, dim=0).sum(0)
frame_valid_nums = torch.stack(
frame_valid_nums, dim=0).sum(0)
frame_valid_mask = frame_valid_nums > 0
frame_volume[~frame_valid_mask] = 0
frame_volume = frame_volume / torch.clamp(
frame_valid_nums[:, None], min=1)
elif self.temporal_aggregate == 'concat':
frame_valid_nums = torch.stack(frame_valid_nums, dim=1)
frame_volume = torch.stack(frame_volume, dim=1)
frame_valid_mask = frame_valid_nums > 0
frame_volume[~frame_valid_mask] = 0
frame_volume = (frame_volume / torch.clamp(
frame_valid_nums[:, :, None], min=1)).flatten(
start_dim=1, end_dim=2)
else:
frame_volume = torch.stack(frame_volume, dim=0).mean(0)
volumes.append(
frame_volume.reshape(self.n_voxels[::-1] + [-1]).permute(
3, 2, 1, 0))
volume_feat = torch.stack(volumes) # (B, C, N_x, N_y, N_z)
if self.with_backbone_3d:
outputs = self.backbone_3d(volume_feat)
volume_feat = outputs[0]
if self.backbone_3d.output_bev:
# use outputs[0] if len(outputs) == 1
# use outputs[1] if len(outputs) == 2
# TODO: unify the output formats
bev_feat = outputs[-1]
# grid_sample stereo features from the volume feature
# TODO: also support temporal modeling for depth head
if self.with_depth_head:
batch_stereo_feats = []
for batch_idx in range(volume_feat.shape[0]):
stereo_feat = []
for view_idx in range(num_views):
img_scale_factor = img_scale_factors[batch_idx] \
if self.transform_depth else points.new_tensor(
[1., 1.])
img_crop_offset = img_crop_offsets[batch_idx] \
if self.transform_depth else points.new_tensor(
[0., 0.])
img_flip = img_flips[batch_idx] if self.transform_depth \
else False
img_pad_shape = img_meta['input_shape'] \
if self.transform_depth else img_meta['ori_shape'][:2]
lidar2cam = points.new_tensor(
batch_img_metas[batch_idx]['lidar2cam'][view_idx])
cam2img = points.new_tensor(
img_meta[batch_idx]['lidar2cam'][view_idx])
proj_mat = torch.matmul(cam2img, lidar2cam)
stereo_feat.append(
voxel_sample(
volume_feat[batch_idx][None],
voxel_range=self.voxel_range,
voxel_size=self.voxel_size,
depth_samples=volume_feat.new_tensor(
self.depth_samples),
proj_mat=proj_mat,
downsample_factor=self.depth_head.
downsample_factor,
img_scale_factor=img_scale_factor,
img_crop_offset=img_crop_offset,
img_flip=img_flip,
img_pad_shape=img_pad_shape,
img_shape=batch_img_metas[batch_idx]['img_shape']
[view_idx][:2],
aligned=True)) # TODO: study the aligned setting
batch_stereo_feats.append(torch.cat(stereo_feat))
# cat (N, C, D, H, W) -> (B*N, C, D, H, W)
batch_stereo_feats = torch.cat(batch_stereo_feats)
if self.with_neck_3d:
if self.with_backbone_3d and self.backbone_3d.output_bev:
spatial_features = self.neck_3d(bev_feat)
# TODO: unify the outputs of neck_3d
volume_feat = spatial_features[1]
else:
volume_feat = self.neck_3d(volume_feat)[0]
# TODO: unify the output format of neck_3d
transform_feats = (volume_feat, )
if self.with_depth_head:
transform_feats += (batch_stereo_feats, )
return transform_feats
def aug_test(self, imgs, img_metas, **kwargs):
"""Test with augmentations.
Args:
imgs (list[torch.Tensor]): Input images of shape (N, C_in, H, W).
img_metas (list): Image metas.
Returns:
list[dict]: Predicted 3d boxes.
"""
raise NotImplementedError
......@@ -401,7 +401,7 @@ class MVXTwoStageDetector(Base3DDetector):
else:
results_list_2d = None
detsamples = self.convert_to_datasample(batch_data_samples,
detsamples = self.add_pred_to_datasample(batch_data_samples,
results_list_3d,
results_list_2d)
return detsamples
# Copyright (c) OpenMMLab. All rights reserved.
from typing import Dict, Optional
import torch
from mmdet3d.registry import MODELS
......@@ -23,14 +25,14 @@ class PointRCNN(TwoStage3DDetector):
"""
def __init__(self,
backbone,
neck=None,
rpn_head=None,
roi_head=None,
train_cfg=None,
test_cfg=None,
pretrained=None,
init_cfg=None):
backbone: dict,
neck: Optional[dict] = None,
rpn_head: Optional[dict] = None,
roi_head: Optional[dict] = None,
train_cfg: Optional[dict] = None,
test_cfg: Optional[dict] = None,
init_cfg: Optional[dict] = None,
data_preprocessor: Optional[dict] = None) -> Optional:
super(PointRCNN, self).__init__(
backbone=backbone,
neck=neck,
......@@ -38,111 +40,28 @@ class PointRCNN(TwoStage3DDetector):
roi_head=roi_head,
train_cfg=train_cfg,
test_cfg=test_cfg,
pretrained=pretrained,
init_cfg=init_cfg)
init_cfg=init_cfg,
data_preprocessor=data_preprocessor)
def extract_feat(self, points):
def extract_feat(self, batch_inputs_dict: Dict) -> Dict:
"""Directly extract features from the backbone+neck.
Args:
points (torch.Tensor): Input points.
batch_inputs_dict (dict): The model input dict which include
'points', 'imgs' keys.
- points (list[torch.Tensor]): Point cloud of each sample.
- imgs (torch.Tensor, optional): Image of each sample.
Returns:
dict: Features from the backbone+neck
dict: Features from the backbone+neck and raw points.
"""
points = torch.stack(batch_inputs_dict['points'])
x = self.backbone(points)
if self.with_neck:
x = self.neck(x)
return x
def forward_train(self, points, input_metas, gt_bboxes_3d, gt_labels_3d):
"""Forward of training.
Args:
points (list[torch.Tensor]): Points of each batch.
input_metas (list[dict]): Meta information of each sample.
gt_bboxes_3d (:obj:`BaseInstance3DBoxes`): gt bboxes of each batch.
gt_labels_3d (list[torch.Tensor]): gt class labels of each batch.
Returns:
dict: Losses.
"""
losses = dict()
stack_points = torch.stack(points)
x = self.extract_feat(stack_points)
# features for rcnn
backbone_feats = x['fp_features'].clone()
backbone_xyz = x['fp_xyz'].clone()
rcnn_feats = {'features': backbone_feats, 'points': backbone_xyz}
bbox_preds, cls_preds = self.rpn_head(x)
rpn_loss = self.rpn_head.loss(
bbox_preds=bbox_preds,
cls_preds=cls_preds,
points=points,
gt_bboxes_3d=gt_bboxes_3d,
gt_labels_3d=gt_labels_3d,
input_metas=input_metas)
losses.update(rpn_loss)
bbox_list = self.rpn_head.get_bboxes(stack_points, bbox_preds,
cls_preds, input_metas)
proposal_list = [
dict(
boxes_3d=bboxes,
scores_3d=scores,
labels_3d=labels,
cls_preds=preds_cls)
for bboxes, scores, labels, preds_cls in bbox_list
]
rcnn_feats.update({'points_cls_preds': cls_preds})
roi_losses = self.roi_head.forward_train(rcnn_feats, input_metas,
proposal_list, gt_bboxes_3d,
gt_labels_3d)
losses.update(roi_losses)
return losses
def simple_test(self, points, img_metas, imgs=None, rescale=False):
"""Forward of testing.
Args:
points (list[torch.Tensor]): Points of each sample.
img_metas (list[dict]): Image metas.
imgs (list[torch.Tensor], optional): Images of each sample.
Defaults to None.
rescale (bool, optional): Whether to rescale results.
Defaults to False.
Returns:
list: Predicted 3d boxes.
"""
stack_points = torch.stack(points)
x = self.extract_feat(stack_points)
# features for rcnn
backbone_feats = x['fp_features'].clone()
backbone_xyz = x['fp_xyz'].clone()
rcnn_feats = {'features': backbone_feats, 'points': backbone_xyz}
bbox_preds, cls_preds = self.rpn_head(x)
rcnn_feats.update({'points_cls_preds': cls_preds})
bbox_list = self.rpn_head.get_bboxes(
stack_points, bbox_preds, cls_preds, img_metas, rescale=rescale)
proposal_list = [
dict(
boxes_3d=bboxes,
scores_3d=scores,
labels_3d=labels,
cls_preds=preds_cls)
for bboxes, scores, labels, preds_cls in bbox_list
]
bbox_results = self.roi_head.simple_test(rcnn_feats, img_metas,
proposal_list)
return bbox_results
return dict(
fp_features=x['fp_features'].clone(),
fp_points=x['fp_xyz'].clone(),
raw_points=points)
......@@ -108,7 +108,7 @@ class SingleStage3DDetector(Base3DDetector):
"""
x = self.extract_feat(batch_inputs_dict)
results_list = self.bbox_head.predict(x, batch_data_samples, **kwargs)
predictions = self.convert_to_datasample(batch_data_samples,
predictions = self.add_pred_to_datasample(batch_data_samples,
results_list)
return predictions
......
......@@ -18,7 +18,7 @@ class SingleStageMono3DDetector(SingleStageDetector):
boxes on the output features of the backbone+neck.
"""
def convert_to_datasample(
def add_pred_to_datasample(
self,
data_samples: SampleList,
data_instances_3d: OptInstanceList = None,
......
......@@ -100,8 +100,9 @@ class TwoStage3DDetector(Base3DDetector):
keys = rpn_losses.keys()
for key in keys:
if 'loss' in key and 'rpn' not in key:
rpn_losses[f'rpn_{key}'] = rpn_losses.pop(key)
losses.update(rpn_losses)
losses[f'rpn_{key}'] = rpn_losses[key]
else:
losses[key] = rpn_losses[key]
else:
# TODO: Not support currently, should have a check at Fast R-CNN
assert batch_data_samples[0].get('proposals', None) is not None
......@@ -161,7 +162,7 @@ class TwoStage3DDetector(Base3DDetector):
batch_data_samples)
# connvert to Det3DDataSample
results_list = self.convert_to_datasample(batch_data_samples,
results_list = self.add_pred_to_datasample(batch_data_samples,
results_list)
return results_list
......
......@@ -99,7 +99,7 @@ class VoteNet(SingleStage3DDetector):
points = batch_inputs_dict['points']
results_list = self.bbox_head.predict(points, feats_dict,
batch_data_samples, **kwargs)
data_3d_samples = self.convert_to_datasample(batch_data_samples,
data_3d_samples = self.add_pred_to_datasample(batch_data_samples,
results_list)
return data_3d_samples
......@@ -143,6 +143,6 @@ class VoteNet(SingleStage3DDetector):
self.bbox_head.test_cfg)
merged_results = InstanceData(**merged_results_dict)
data_3d_samples = self.convert_to_datasample(batch_data_samples,
data_3d_samples = self.add_pred_to_datasample(batch_data_samples,
[merged_results])
return data_3d_samples
......@@ -7,7 +7,7 @@ from torch.nn import functional as F
from mmdet3d.registry import MODELS
from mmdet3d.structures.bbox_3d import (get_proj_mat_by_coord_type,
points_cam2img)
points_cam2img, points_img2cam)
from . import apply_3d_transformation
......@@ -23,7 +23,8 @@ def point_sample(img_meta,
img_shape,
aligned=True,
padding_mode='zeros',
align_corners=True):
align_corners=True,
valid_flag=False):
"""Obtain image features using points.
Args:
......@@ -41,12 +42,15 @@ def point_sample(img_meta,
padding, this is necessary to obtain features in feature map.
img_shape (tuple[int]): int tuple indicates the h & w before padding
after scaling, this is necessary for flipping coordinates.
aligned (bool, optional): Whether use bilinear interpolation when
aligned (bool): Whether use bilinear interpolation when
sampling image features for each point. Defaults to True.
padding_mode (str, optional): Padding mode when padding values for
padding_mode (str): Padding mode when padding values for
features of out-of-image points. Defaults to 'zeros'.
align_corners (bool, optional): Whether to align corners when
align_corners (bool): Whether to align corners when
sampling image features for each point. Defaults to True.
valid_flag (bool): Whether to filter out the points that
outside the image and with depth smaller than 0. Defaults to
False.
Returns:
torch.Tensor: NxC image features sampled by point coordinates.
......@@ -56,7 +60,12 @@ def point_sample(img_meta,
points = apply_3d_transformation(
points, coord_type, img_meta, reverse=True)
# project points to camera coordinate
# project points to image coordinate
if valid_flag:
proj_pts = points_cam2img(points, proj_mat, with_depth=True)
pts_2d = proj_pts[..., :2]
depths = proj_pts[..., 2]
else:
pts_2d = points_cam2img(points, proj_mat)
# img transformation: scale -> crop -> flip
......@@ -70,13 +79,13 @@ def point_sample(img_meta,
if img_flip:
# by default we take it as horizontal flip
# use img_shape before padding for flip
orig_h, orig_w = img_shape
coor_x = orig_w - coor_x
ori_h, ori_w = img_shape
coor_x = ori_w - coor_x
h, w = img_pad_shape
coor_y = coor_y / h * 2 - 1
coor_x = coor_x / w * 2 - 1
grid = torch.cat([coor_x, coor_y],
norm_coor_y = coor_y / h * 2 - 1
norm_coor_x = coor_x / w * 2 - 1
grid = torch.cat([norm_coor_x, norm_coor_y],
dim=1).unsqueeze(0).unsqueeze(0) # Nx2 -> 1x1xNx2
# align_corner=True provides higher performance
......@@ -88,6 +97,15 @@ def point_sample(img_meta,
padding_mode=padding_mode,
align_corners=align_corners) # 1xCx1xN feats
if valid_flag:
# (N, )
valid = (coor_x.squeeze() < w) & (coor_x.squeeze() > 0) & (
coor_y.squeeze() < h) & (coor_y.squeeze() > 0) & (
depths > 0)
valid_features = point_features.squeeze().t()
valid_features[~valid] = 0
return valid_features, valid # (N, C), (N,)
return point_features.squeeze().t()
......@@ -304,3 +322,94 @@ class PointFusion(BaseModule):
align_corners=self.align_corners,
)
return img_pts
def voxel_sample(voxel_features,
voxel_range,
voxel_size,
depth_samples,
proj_mat,
downsample_factor,
img_scale_factor,
img_crop_offset,
img_flip,
img_pad_shape,
img_shape,
aligned=True,
padding_mode='zeros',
align_corners=True):
"""Obtain image features using points.
Args:
voxel_features (torch.Tensor): 1 x C x Nx x Ny x Nz voxel features.
voxel_range (list): The range of voxel features.
voxel_size (:obj:`ConfigDict` or dict): The voxel size of voxel
features.
depth_samples (torch.Tensor): N depth samples in LiDAR coordinates.
proj_mat (torch.Tensor): ORIGINAL LiDAR2img projection matrix
for N views.
downsample_factor (int): The downsample factor in rescaling.
img_scale_factor (tuple[torch.Tensor]): Scale factor with shape of
(w_scale, h_scale).
img_crop_offset (tuple[torch.Tensor]): Crop offset used to crop
image during data augmentation with shape of (w_offset, h_offset).
img_flip (bool): Whether the image is flipped.
img_pad_shape (tuple[int]): int tuple indicates the h & w after
padding, this is necessary to obtain features in feature map.
img_shape (tuple[int]): int tuple indicates the h & w before padding
after scaling, this is necessary for flipping coordinates.
aligned (bool, optional): Whether use bilinear interpolation when
sampling image features for each point. Defaults to True.
padding_mode (str, optional): Padding mode when padding values for
features of out-of-image points. Defaults to 'zeros'.
align_corners (bool, optional): Whether to align corners when
sampling image features for each point. Defaults to True.
Returns:
torch.Tensor: 1xCxDxHxW frustum features sampled from voxel features.
"""
# construct frustum grid
device = voxel_features.device
h, w = img_pad_shape
h_out = round(h / downsample_factor)
w_out = round(w / downsample_factor)
ws = (torch.linspace(0, w_out - 1, w_out) * downsample_factor).to(device)
hs = (torch.linspace(0, h_out - 1, h_out) * downsample_factor).to(device)
depths = depth_samples[::downsample_factor]
num_depths = len(depths)
ds_3d, ys_3d, xs_3d = torch.meshgrid(depths, hs, ws)
# grid: (D, H_out, W_out, 3) -> (D*H_out*W_out, 3)
grid = torch.stack([xs_3d, ys_3d, ds_3d], dim=-1).view(-1, 3)
# recover the coordinates in the canonical space
# reverse order of augmentations: flip -> crop -> scale
if img_flip:
# by default we take it as horizontal flip
# use img_shape before padding for flip
ori_h, ori_w = img_shape
grid[:, 0] = ori_w - grid[:, 0]
grid[:, :2] += img_crop_offset
grid[:, :2] /= img_scale_factor
# grid3d: (D*H_out*W_out, 3) in LiDAR coordinate system
grid3d = points_img2cam(grid, proj_mat)
# convert the 3D point coordinates to voxel coordinates
voxel_range = torch.tensor(voxel_range).to(device).view(1, 6)
voxel_size = torch.tensor(voxel_size).to(device).view(1, 3)
# suppose the voxel grid is generated with AlignedAnchorGenerator
# -0.5 given each grid is located at the center of the grid
# TODO: study whether here needs -0.5
grid3d = (grid3d - voxel_range[:, :3]) / voxel_size - 0.5
grid_size = (voxel_range[:, 3:] - voxel_range[:, :3]) / voxel_size
# normalize grid3d to (-1, 1)
grid3d = grid3d / grid_size * 2 - 1
# (x, y, z) -> (z, y, x) for grid_sampling
grid3d = grid3d.view(1, num_depths, h_out, w_out, 3)[..., [2, 1, 0]]
# align_corner=True provides higher performance
mode = 'bilinear' if aligned else 'nearest'
frustum_features = F.grid_sample(
voxel_features,
grid3d,
mode=mode,
padding_mode=padding_mode,
align_corners=align_corners) # 1xCxDxHxW feats
return frustum_features
......@@ -6,7 +6,9 @@ try:
except ImportError:
IS_SPCONV2_AVAILABLE = False
else:
if hasattr(spconv, '__version__') and spconv.__version__ >= '2.0.0':
if hasattr(spconv,
'__version__') and spconv.__version__ >= '2.0.0' and hasattr(
spconv, 'pytorch'):
IS_SPCONV2_AVAILABLE = register_spconv2()
else:
IS_SPCONV2_AVAILABLE = False
......
......@@ -6,7 +6,7 @@ from torch import nn as nn
@MODELS.register_module()
class GroupFree3DMHA(MultiheadAttention):
"""A warpper for torch.nn.MultiheadAttention for GroupFree3D.
"""A wrapper for torch.nn.MultiheadAttention for GroupFree3D.
This module implements MultiheadAttention with identity connection,
and positional encoding used in DETR is also passed as input.
......
......@@ -278,7 +278,7 @@ class SparseEncoderSASSD(SparseEncoder):
Returns:
dict: Backbone features.
tuple[torch.Tensor]: Mean feature value of the points,
Classificaion result of the points,
Classification result of the points,
Regression offsets of the points.
"""
coors = coors.int()
......@@ -409,7 +409,7 @@ class SparseEncoderSASSD(SparseEncoder):
Args:
points (torch.Tensor): Mean feature value of the points.
point_cls (torch.Tensor): Classificaion result of the points.
point_cls (torch.Tensor): Classification result of the points.
point_reg (torch.Tensor): Regression offsets of the points.
gt_bboxes (list[:obj:`BaseInstance3DBoxes`]): Ground truth
boxes for each sample.
......
# Copyright (c) OpenMMLab. All rights reserved.
from typing import List
from typing import Dict, List, Tuple
import numpy as np
import torch
......@@ -10,6 +10,7 @@ from torch import Tensor
from mmdet3d.models import make_sparse_convmodule
from mmdet3d.models.layers.spconv import IS_SPCONV2_AVAILABLE
from mmdet3d.utils.typing import InstanceList
from mmdet.models.utils import multi_apply
if IS_SPCONV2_AVAILABLE:
......@@ -21,11 +22,11 @@ else:
from mmengine.model import BaseModule
from torch import nn as nn
from mmdet3d.models.builder import build_loss
from mmdet3d.models.layers import nms_bev, nms_normal_bev
from mmdet3d.registry import MODELS, TASK_UTILS
from mmdet3d.structures.bbox_3d import (LiDARInstance3DBoxes,
rotation_3d_in_axis, xywhr2xyxyr)
from mmdet3d.utils.typing import SamplingResultList
@MODELS.register_module()
......@@ -56,40 +57,40 @@ class PartA2BboxHead(BaseModule):
conv_cfg (dict): Config dict of convolutional layers
norm_cfg (dict): Config dict of normalization layers
loss_bbox (dict): Config dict of box regression loss.
loss_cls (dict): Config dict of classifacation loss.
loss_cls (dict, optional): Config dict of classifacation loss.
"""
def __init__(self,
num_classes,
seg_in_channels,
part_in_channels,
seg_conv_channels=None,
part_conv_channels=None,
merge_conv_channels=None,
down_conv_channels=None,
shared_fc_channels=None,
cls_channels=None,
reg_channels=None,
dropout_ratio=0.1,
roi_feat_size=14,
with_corner_loss=True,
bbox_coder=dict(type='DeltaXYZWLHRBBoxCoder'),
conv_cfg=dict(type='Conv1d'),
norm_cfg=dict(type='BN1d', eps=1e-3, momentum=0.01),
loss_bbox=dict(
num_classes: int,
seg_in_channels: int,
part_in_channels: int,
seg_conv_channels: List[int] = None,
part_conv_channels: List[int] = None,
merge_conv_channels: List[int] = None,
down_conv_channels: List[int] = None,
shared_fc_channels: List[int] = None,
cls_channels: List[int] = None,
reg_channels: List[int] = None,
dropout_ratio: float = 0.1,
roi_feat_size: int = 14,
with_corner_loss: bool = True,
bbox_coder: dict = dict(type='DeltaXYZWLHRBBoxCoder'),
conv_cfg: dict = dict(type='Conv1d'),
norm_cfg: dict = dict(type='BN1d', eps=1e-3, momentum=0.01),
loss_bbox: dict = dict(
type='SmoothL1Loss', beta=1.0 / 9.0, loss_weight=2.0),
loss_cls=dict(
loss_cls: dict = dict(
type='CrossEntropyLoss',
use_sigmoid=True,
reduction='none',
loss_weight=1.0),
init_cfg=None):
init_cfg: dict = None) -> None:
super(PartA2BboxHead, self).__init__(init_cfg=init_cfg)
self.num_classes = num_classes
self.with_corner_loss = with_corner_loss
self.bbox_coder = TASK_UTILS.build(bbox_coder)
self.loss_bbox = build_loss(loss_bbox)
self.loss_cls = build_loss(loss_cls)
self.loss_bbox = MODELS.build(loss_bbox)
self.loss_cls = MODELS.build(loss_cls)
self.use_sigmoid_cls = loss_cls.get('use_sigmoid', False)
assert down_conv_channels[-1] == shared_fc_channels[0]
......@@ -244,7 +245,7 @@ class PartA2BboxHead(BaseModule):
super().init_weights()
normal_init(self.conv_reg[-1].conv, mean=0, std=0.001)
def forward(self, seg_feats, part_feats):
def forward(self, seg_feats: Tensor, part_feats: Tensor) -> Tuple[Tensor]:
"""Forward pass.
Args:
......@@ -294,8 +295,10 @@ class PartA2BboxHead(BaseModule):
return cls_score, bbox_pred
def loss(self, cls_score, bbox_pred, rois, labels, bbox_targets,
pos_gt_bboxes, reg_mask, label_weights, bbox_weights):
def loss(self, cls_score: Tensor, bbox_pred: Tensor, rois: Tensor,
labels: Tensor, bbox_targets: Tensor, pos_gt_bboxes: Tensor,
reg_mask: Tensor, label_weights: Tensor,
bbox_weights: Tensor) -> Dict:
"""Computing losses.
Args:
......@@ -329,9 +332,9 @@ class PartA2BboxHead(BaseModule):
pos_inds = (reg_mask > 0)
if pos_inds.any() == 0:
# fake a part loss
losses['loss_bbox'] = loss_cls.new_tensor(0)
losses['loss_bbox'] = loss_cls.new_tensor(0) * loss_cls.sum()
if self.with_corner_loss:
losses['loss_corner'] = loss_cls.new_tensor(0)
losses['loss_corner'] = loss_cls.new_tensor(0) * loss_cls.sum()
else:
pos_bbox_pred = bbox_pred.view(rcnn_batch_size, -1)[pos_inds]
bbox_weights_flat = bbox_weights[pos_inds].view(-1, 1).repeat(
......@@ -367,7 +370,10 @@ class PartA2BboxHead(BaseModule):
return losses
def get_targets(self, sampling_results, rcnn_train_cfg, concat=True):
def get_targets(self,
sampling_results: SamplingResultList,
rcnn_train_cfg: dict,
concat: bool = True) -> Tuple[Tensor]:
"""Generate targets.
Args:
......@@ -407,7 +413,8 @@ class PartA2BboxHead(BaseModule):
return (label, bbox_targets, pos_gt_bboxes, reg_mask, label_weights,
bbox_weights)
def _get_target_single(self, pos_bboxes, pos_gt_bboxes, ious, cfg):
def _get_target_single(self, pos_bboxes: Tensor, pos_gt_bboxes: Tensor,
ious: Tensor, cfg: dict) -> Tuple[Tensor]:
"""Generate training targets for a single sample.
Args:
......@@ -472,7 +479,10 @@ class PartA2BboxHead(BaseModule):
return (label, bbox_targets, pos_gt_bboxes, reg_mask, label_weights,
bbox_weights)
def get_corner_loss_lidar(self, pred_bbox3d, gt_bbox3d, delta=1.0):
def get_corner_loss_lidar(self,
pred_bbox3d: Tensor,
gt_bbox3d: Tensor,
delta: float = 1.0) -> Tensor:
"""Calculate corner loss of given boxes.
Args:
......@@ -515,7 +525,7 @@ class PartA2BboxHead(BaseModule):
class_labels: Tensor,
class_pred: Tensor,
input_metas: List[dict],
cfg: dict = None) -> List:
cfg: dict = None) -> InstanceList:
"""Generate bboxes from bbox head predictions.
Args:
......@@ -528,7 +538,17 @@ class PartA2BboxHead(BaseModule):
cfg (:obj:`ConfigDict`): Testing config.
Returns:
list[tuple]: Decoded bbox, scores and labels after nms.
list[:obj:`InstanceData`]: Detection results of each sample
after the post process.
Each item usually contains following keys.
- scores_3d (Tensor): Classification scores, has a shape
(num_instances, )
- labels_3d (Tensor): Labels of bboxes, has a shape
(num_instances, ).
- bboxes_3d (BaseInstance3DBoxes): Prediction of bboxes,
contains a tensor with shape (num_instances, C), where
C >= 7.
"""
roi_batch_id = rois[..., 0]
roi_boxes = rois[..., 1:] # boxes without batch id
......@@ -570,12 +590,12 @@ class PartA2BboxHead(BaseModule):
return result_list
def multi_class_nms(self,
box_probs,
box_preds,
score_thr,
nms_thr,
input_meta,
use_rotate_nms=True):
box_probs: Tensor,
box_preds: Tensor,
score_thr: float,
nms_thr: float,
input_meta: dict,
use_rotate_nms: bool = True) -> Tensor:
"""Multi-class NMS for box head.
Note:
......
# Copyright (c) OpenMMLab. All rights reserved.
from typing import Dict, List, Optional, Tuple
import numpy as np
import torch
import torch.nn as nn
from mmcv.cnn import ConvModule
from mmcv.cnn.bricks import build_conv_layer
from mmengine.model import BaseModule, normal_init
from torch import nn as nn
from mmengine.structures import InstanceData
from torch import Tensor
from mmdet3d.models.layers import nms_bev, nms_normal_bev
from mmdet3d.models.layers.pointnet_modules import build_sa_module
from mmdet3d.registry import MODELS, TASK_UTILS
from mmdet3d.structures.bbox_3d import (LiDARInstance3DBoxes,
rotation_3d_in_axis, xywhr2xyxyr)
from mmdet3d.utils.typing import InstanceList, SamplingResultList
from mmdet.models.utils import multi_apply
......@@ -24,17 +29,17 @@ class PointRCNNBboxHead(BaseModule):
mlp_channels (list[int]): the number of mlp channels
pred_layer_cfg (dict, optional): Config of classfication and
regression prediction layers. Defaults to None.
num_points (tuple, optional): The number of points which each SA
num_points (tuple): The number of points which each SA
module samples. Defaults to (128, 32, -1).
radius (tuple, optional): Sampling radius of each SA module.
radius (tuple): Sampling radius of each SA module.
Defaults to (0.2, 0.4, 100).
num_samples (tuple, optional): The number of samples for ball query
num_samples (tuple): The number of samples for ball query
in each SA module. Defaults to (64, 64, 64).
sa_channels (tuple, optional): Out channels of each mlp in SA module.
sa_channels (tuple): Out channels of each mlp in SA module.
Defaults to ((128, 128, 128), (128, 128, 256), (256, 256, 512)).
bbox_coder (dict, optional): Config dict of box coders.
bbox_coder (dict): Config dict of box coders.
Defaults to dict(type='DeltaXYZWLHRBBoxCoder').
sa_cfg (dict, optional): Config of set abstraction module, which may
sa_cfg (dict): Config of set abstraction module, which may
contain the following keys and values:
- pool_mod (str): Pool method ('max' or 'avg') for SA modules.
......@@ -43,52 +48,53 @@ class PointRCNNBboxHead(BaseModule):
each SA module.
Defaults to dict(type='PointSAModule', pool_mod='max',
use_xyz=True).
conv_cfg (dict, optional): Config dict of convolutional layers.
conv_cfg (dict): Config dict of convolutional layers.
Defaults to dict(type='Conv1d').
norm_cfg (dict, optional): Config dict of normalization layers.
norm_cfg (dict): Config dict of normalization layers.
Defaults to dict(type='BN1d').
act_cfg (dict, optional): Config dict of activation layers.
act_cfg (dict): Config dict of activation layers.
Defaults to dict(type='ReLU').
bias (str, optional): Type of bias. Defaults to 'auto'.
loss_bbox (dict, optional): Config of regression loss function.
bias (str): Type of bias. Defaults to 'auto'.
loss_bbox (dict): Config of regression loss function.
Defaults to dict(type='SmoothL1Loss', beta=1.0 / 9.0,
reduction='sum', loss_weight=1.0).
loss_cls (dict, optional): Config of classification loss function.
loss_cls (dict): Config of classification loss function.
Defaults to dict(type='CrossEntropyLoss', use_sigmoid=True,
reduction='sum', loss_weight=1.0).
with_corner_loss (bool, optional): Whether using corner loss.
with_corner_loss (bool): Whether using corner loss.
Defaults to True.
init_cfg (dict, optional): Config of initialization. Defaults to None.
"""
def __init__(
self,
num_classes,
in_channels,
mlp_channels,
pred_layer_cfg=None,
num_points=(128, 32, -1),
radius=(0.2, 0.4, 100),
num_samples=(64, 64, 64),
sa_channels=((128, 128, 128), (128, 128, 256), (256, 256, 512)),
bbox_coder=dict(type='DeltaXYZWLHRBBoxCoder'),
sa_cfg=dict(type='PointSAModule', pool_mod='max', use_xyz=True),
conv_cfg=dict(type='Conv1d'),
norm_cfg=dict(type='BN1d'),
act_cfg=dict(type='ReLU'),
bias='auto',
loss_bbox=dict(
def __init__(self,
num_classes: dict,
in_channels: dict,
mlp_channels: dict,
pred_layer_cfg: Optional[dict] = None,
num_points: dict = (128, 32, -1),
radius: dict = (0.2, 0.4, 100),
num_samples: dict = (64, 64, 64),
sa_channels: dict = ((128, 128, 128), (128, 128, 256),
(256, 256, 512)),
bbox_coder: dict = dict(type='DeltaXYZWLHRBBoxCoder'),
sa_cfg: dict = dict(
type='PointSAModule', pool_mod='max', use_xyz=True),
conv_cfg: dict = dict(type='Conv1d'),
norm_cfg: dict = dict(type='BN1d'),
act_cfg: dict = dict(type='ReLU'),
bias: str = 'auto',
loss_bbox: dict = dict(
type='SmoothL1Loss',
beta=1.0 / 9.0,
reduction='sum',
loss_weight=1.0),
loss_cls=dict(
loss_cls: dict = dict(
type='CrossEntropyLoss',
use_sigmoid=True,
reduction='sum',
loss_weight=1.0),
with_corner_loss=True,
init_cfg=None):
with_corner_loss: bool = True,
init_cfg: Optional[dict] = None) -> None:
super(PointRCNNBboxHead, self).__init__(init_cfg=init_cfg)
self.num_classes = num_classes
self.num_sa = len(sa_channels)
......@@ -169,7 +175,8 @@ class PointRCNNBboxHead(BaseModule):
if init_cfg is None:
self.init_cfg = dict(type='Xavier', layer=['Conv2d', 'Conv1d'])
def _add_conv_branch(self, in_channels, conv_channels):
def _add_conv_branch(self, in_channels: int,
conv_channels: tuple) -> nn.Sequential:
"""Add shared or separable branch.
Args:
......@@ -203,7 +210,7 @@ class PointRCNNBboxHead(BaseModule):
nn.init.constant_(m.bias, 0)
normal_init(self.conv_reg.weight, mean=0, std=0.001)
def forward(self, feats):
def forward(self, feats: Tensor) -> Tuple[Tensor]:
"""Forward pass.
Args:
......@@ -239,8 +246,10 @@ class PointRCNNBboxHead(BaseModule):
rcnn_reg = rcnn_reg.transpose(1, 2).contiguous().squeeze(dim=1)
return rcnn_cls, rcnn_reg
def loss(self, cls_score, bbox_pred, rois, labels, bbox_targets,
pos_gt_bboxes, reg_mask, label_weights, bbox_weights):
def loss(self, cls_score: Tensor, bbox_pred: Tensor, rois: Tensor,
labels: Tensor, bbox_targets: Tensor, pos_gt_bboxes: Tensor,
reg_mask: Tensor, label_weights: Tensor,
bbox_weights: Tensor) -> Dict:
"""Computing losses.
Args:
......@@ -302,15 +311,17 @@ class PointRCNNBboxHead(BaseModule):
# calculate corner loss
loss_corner = self.get_corner_loss_lidar(pred_boxes3d,
pos_gt_bboxes)
pos_gt_bboxes).mean()
losses['loss_corner'] = loss_corner
else:
losses['loss_corner'] = loss_cls.new_tensor(0)
losses['loss_corner'] = loss_cls.new_tensor(0) * loss_cls.sum()
return losses
def get_corner_loss_lidar(self, pred_bbox3d, gt_bbox3d, delta=1.0):
def get_corner_loss_lidar(self,
pred_bbox3d: Tensor,
gt_bbox3d: Tensor,
delta: float = 1.0) -> Tensor:
"""Calculate corner loss of given boxes.
Args:
......@@ -340,19 +351,24 @@ class PointRCNNBboxHead(BaseModule):
torch.norm(pred_box_corners - gt_box_corners_flip, dim=2))
# huber loss
abs_error = corner_dist.abs()
quadratic = abs_error.clamp(max=delta)
linear = (abs_error - quadratic)
corner_loss = 0.5 * quadratic**2 + delta * linear
return corner_loss.mean(dim=1)
def get_targets(self, sampling_results, rcnn_train_cfg, concat=True):
# quadratic = abs_error.clamp(max=delta)
# linear = (abs_error - quadratic)
# corner_loss = 0.5 * quadratic**2 + delta * linear
loss = torch.where(abs_error < delta, 0.5 * abs_error**2 / delta,
abs_error - 0.5 * delta)
return loss.mean(dim=1)
def get_targets(self,
sampling_results: SamplingResultList,
rcnn_train_cfg: dict,
concat: bool = True) -> Tuple[Tensor]:
"""Generate targets.
Args:
sampling_results (list[:obj:`SamplingResult`]):
Sampled results from rois.
rcnn_train_cfg (:obj:`ConfigDict`): Training config of rcnn.
concat (bool, optional): Whether to concatenate targets between
concat (bool): Whether to concatenate targets between
batches. Defaults to True.
Returns:
......@@ -385,7 +401,8 @@ class PointRCNNBboxHead(BaseModule):
return (label, bbox_targets, pos_gt_bboxes, reg_mask, label_weights,
bbox_weights)
def _get_target_single(self, pos_bboxes, pos_gt_bboxes, ious, cfg):
def _get_target_single(self, pos_bboxes: Tensor, pos_gt_bboxes: Tensor,
ious: Tensor, cfg: dict) -> Tuple[Tensor]:
"""Generate training targets for a single sample.
Args:
......@@ -449,13 +466,13 @@ class PointRCNNBboxHead(BaseModule):
return (label, bbox_targets, pos_gt_bboxes, reg_mask, label_weights,
bbox_weights)
def get_bboxes(self,
rois,
cls_score,
bbox_pred,
class_labels,
img_metas,
cfg=None):
def get_results(self,
rois: Tensor,
cls_score: Tensor,
bbox_pred: Tensor,
class_labels: Tensor,
input_metas: List[dict],
cfg: dict = None) -> InstanceList:
"""Generate bboxes from bbox head predictions.
Args:
......@@ -463,12 +480,22 @@ class PointRCNNBboxHead(BaseModule):
cls_score (torch.Tensor): Scores of bounding boxes.
bbox_pred (torch.Tensor): Bounding boxes predictions
class_labels (torch.Tensor): Label of classes
img_metas (list[dict]): Point cloud and image's meta info.
input_metas (list[dict]): Point cloud and image's meta info.
cfg (:obj:`ConfigDict`, optional): Testing config.
Defaults to None.
Returns:
list[tuple]: Decoded bbox, scores and labels after nms.
list[:obj:`InstanceData`]: Detection results of each sample
after the post process.
Each item usually contains following keys.
- scores_3d (Tensor): Classification scores, has a shape
(num_instances, )
- labels_3d (Tensor): Labels of bboxes, has a shape
(num_instances, ).
- bboxes_3d (BaseInstance3DBoxes): Prediction of bboxes,
contains a tensor with shape (num_instances, C), where
C >= 7.
"""
roi_batch_id = rois[..., 0]
roi_boxes = rois[..., 1:] # boxes without batch id
......@@ -494,25 +521,27 @@ class PointRCNNBboxHead(BaseModule):
cur_rcnn_boxes3d = rcnn_boxes3d[roi_batch_id == batch_id]
keep = self.multi_class_nms(cur_box_prob, cur_rcnn_boxes3d,
cfg.score_thr, cfg.nms_thr,
img_metas[batch_id],
input_metas[batch_id],
cfg.use_rotate_nms)
selected_bboxes = cur_rcnn_boxes3d[keep]
selected_label_preds = cur_class_labels[keep]
selected_scores = cur_cls_score[keep]
results = InstanceData()
results.bboxes_3d = input_metas[batch_id]['box_type_3d'](
selected_bboxes, selected_bboxes.shape[-1])
results.scores_3d = selected_scores
results.labels_3d = selected_label_preds
result_list.append(
(img_metas[batch_id]['box_type_3d'](selected_bboxes,
self.bbox_coder.code_size),
selected_scores, selected_label_preds))
result_list.append(results)
return result_list
def multi_class_nms(self,
box_probs,
box_preds,
score_thr,
nms_thr,
input_meta,
use_rotate_nms=True):
box_probs: Tensor,
box_preds: Tensor,
score_thr: float,
nms_thr: float,
input_meta: dict,
use_rotate_nms: bool = True) -> Tensor:
"""Multi-class NMS for box head.
Note:
......@@ -527,7 +556,7 @@ class PointRCNNBboxHead(BaseModule):
score_thr (float): Threshold of scores.
nms_thr (float): Threshold for NMS.
input_meta (dict): Meta information of the current sample.
use_rotate_nms (bool, optional): Whether to use rotated nms.
use_rotate_nms (bool): Whether to use rotated nms.
Defaults to True.
Returns:
......
# Copyright (c) OpenMMLab. All rights reserved.
from typing import Dict, Optional, Tuple
import torch
from mmengine.model import BaseModule
from torch import Tensor
from torch import nn as nn
from torch.nn import functional as F
from mmdet3d.models.builder import build_loss
from mmdet3d.registry import MODELS
from mmdet3d.structures.bbox_3d import rotation_3d_in_axis
from mmdet3d.structures.bbox_3d import BaseInstance3DBoxes, rotation_3d_in_axis
from mmdet3d.utils import InstanceList
from mmdet.models.utils import multi_apply
......@@ -26,23 +28,23 @@ class PointwiseSemanticHead(BaseModule):
loss_part (dict): Config of part prediction loss.
"""
def __init__(self,
in_channels,
num_classes=3,
extra_width=0.2,
seg_score_thr=0.3,
init_cfg=None,
loss_seg=dict(
def __init__(
self,
in_channels: int,
num_classes: int = 3,
extra_width: float = 0.2,
seg_score_thr: float = 0.3,
init_cfg: Optional[dict] = None,
loss_seg: dict = dict(
type='FocalLoss',
use_sigmoid=True,
reduction='sum',
gamma=2.0,
alpha=0.25,
loss_weight=1.0),
loss_part=dict(
type='CrossEntropyLoss',
use_sigmoid=True,
loss_weight=1.0)):
loss_part: dict = dict(
type='CrossEntropyLoss', use_sigmoid=True, loss_weight=1.0)
) -> None:
super(PointwiseSemanticHead, self).__init__(init_cfg=init_cfg)
self.extra_width = extra_width
self.num_classes = num_classes
......@@ -50,10 +52,10 @@ class PointwiseSemanticHead(BaseModule):
self.seg_cls_layer = nn.Linear(in_channels, 1, bias=True)
self.seg_reg_layer = nn.Linear(in_channels, 3, bias=True)
self.loss_seg = build_loss(loss_seg)
self.loss_part = build_loss(loss_part)
self.loss_seg = MODELS.build(loss_seg)
self.loss_part = MODELS.build(loss_part)
def forward(self, x):
def forward(self, x: Tensor) -> Dict[str, Tensor]:
"""Forward pass.
Args:
......@@ -79,7 +81,9 @@ class PointwiseSemanticHead(BaseModule):
return dict(
seg_preds=seg_preds, part_preds=part_preds, part_feats=part_feats)
def get_targets_single(self, voxel_centers, gt_bboxes_3d, gt_labels_3d):
def get_targets_single(self, voxel_centers: Tensor,
gt_bboxes_3d: BaseInstance3DBoxes,
gt_labels_3d: Tensor) -> Tuple[Tensor]:
"""generate segmentation and part prediction targets for a single
sample.
......@@ -162,7 +166,8 @@ class PointwiseSemanticHead(BaseModule):
part_targets = torch.cat(part_targets, dim=0)
return dict(seg_targets=seg_targets, part_targets=part_targets)
def loss(self, semantic_results, semantic_targets):
def loss(self, semantic_results: dict,
semantic_targets: dict) -> Dict[str, Tensor]:
"""Calculate point-wise segmentation and part prediction losses.
Args:
......
# Copyright (c) OpenMMLab. All rights reserved.
from typing import Dict, List, Optional
from typing import Dict, List, Optional, Tuple
import torch
from mmcv.cnn import ConvModule
......@@ -12,6 +12,7 @@ from torch.nn import functional as F
from mmdet3d.models.layers import VoteModule, build_sa_module
from mmdet3d.registry import MODELS
from mmdet3d.structures import Det3DDataSample
from mmdet3d.structures.bbox_3d import BaseInstance3DBoxes
from mmdet.models.utils import multi_apply
......@@ -26,39 +27,42 @@ class PrimitiveHead(BaseModule):
available mode ['z', 'xy', 'line'].
bbox_coder (:obj:`BaseBBoxCoder`): Bbox coder for encoding and
decoding boxes.
train_cfg (dict): Config for training.
test_cfg (dict): Config for testing.
vote_module_cfg (dict): Config of VoteModule for point-wise votes.
vote_aggregation_cfg (dict): Config of vote aggregation layer.
train_cfg (dict, optional): Config for training.
test_cfg (dict, optional): Config for testing.
vote_module_cfg (dict, optional): Config of VoteModule for point-wise
votes.
vote_aggregation_cfg (dict, optional): Config of vote aggregation
layer.
feat_channels (tuple[int]): Convolution channels of
prediction layer.
upper_thresh (float): Threshold for line matching.
surface_thresh (float): Threshold for surface matching.
conv_cfg (dict): Config of convolution in prediction layer.
norm_cfg (dict): Config of BN in prediction layer.
objectness_loss (dict): Config of objectness loss.
center_loss (dict): Config of center loss.
semantic_loss (dict): Config of point-wise semantic segmentation loss.
conv_cfg (dict, optional): Config of convolution in prediction layer.
norm_cfg (dict, optional): Config of BN in prediction layer.
objectness_loss (dict, optional): Config of objectness loss.
center_loss (dict, optional): Config of center loss.
semantic_loss (dict, optional): Config of point-wise semantic
segmentation loss.
"""
def __init__(self,
num_dims: int,
num_classes: int,
primitive_mode: str,
train_cfg: dict = None,
test_cfg: dict = None,
vote_module_cfg: dict = None,
vote_aggregation_cfg: dict = None,
train_cfg: Optional[dict] = None,
test_cfg: Optional[dict] = None,
vote_module_cfg: Optional[dict] = None,
vote_aggregation_cfg: Optional[dict] = None,
feat_channels: tuple = (128, 128),
upper_thresh: float = 100.0,
surface_thresh: float = 0.5,
conv_cfg: dict = dict(type='Conv1d'),
norm_cfg: dict = dict(type='BN1d'),
objectness_loss: dict = None,
center_loss: dict = None,
semantic_reg_loss: dict = None,
semantic_cls_loss: dict = None,
init_cfg: dict = None):
objectness_loss: Optional[dict] = None,
center_loss: Optional[dict] = None,
semantic_reg_loss: Optional[dict] = None,
semantic_cls_loss: Optional[dict] = None,
init_cfg: Optional[dict] = None):
super(PrimitiveHead, self).__init__(init_cfg=init_cfg)
# bounding boxes centers, face centers and edge centers
assert primitive_mode in ['z', 'xy', 'line']
......@@ -126,7 +130,7 @@ class PrimitiveHead(BaseModule):
assert sample_mode in ['vote', 'seed', 'random']
return sample_mode
def forward(self, feats_dict):
def forward(self, feats_dict: dict) -> dict:
"""Forward pass.
Args:
......@@ -255,10 +259,8 @@ class PrimitiveHead(BaseModule):
attributes.
batch_pts_semantic_mask (list[tensor]): Semantic mask
of points cloud. Defaults to None.
batch_pts_semantic_mask (list[tensor]): Instance mask
batch_pts_instance_mask (list[tensor]): Instance mask
of points cloud. Defaults to None.
batch_input_metas (list[dict]): Contain pcd and img's meta info.
ret_target (bool): Return targets or not. Defaults to False.
Returns:
dict: Losses of Primitive Head.
......@@ -392,12 +394,13 @@ class PrimitiveHead(BaseModule):
return (point_mask, point_offset, gt_primitive_center,
gt_primitive_semantic, gt_sem_cls_label, gt_votes_mask)
def get_targets_single(self,
points,
gt_bboxes_3d,
gt_labels_3d,
pts_semantic_mask=None,
pts_instance_mask=None):
def get_targets_single(
self,
points: torch.Tensor,
gt_bboxes_3d: BaseInstance3DBoxes,
gt_labels_3d: torch.Tensor,
pts_semantic_mask: torch.Tensor = None,
pts_instance_mask: torch.Tensor = None) -> Tuple[torch.Tensor]:
"""Generate targets of primitive head for single batch.
Args:
......@@ -668,7 +671,8 @@ class PrimitiveHead(BaseModule):
return (point_mask, point_sem, point_offset)
def primitive_decode_scores(self, predictions, aggregated_points):
def primitive_decode_scores(self, predictions: torch.Tensor,
aggregated_points: torch.Tensor) -> dict:
"""Decode predicted parts to primitive head.
Args:
......@@ -696,7 +700,7 @@ class PrimitiveHead(BaseModule):
return ret_dict
def check_horizon(self, points):
def check_horizon(self, points: torch.Tensor) -> bool:
"""Check whether is a horizontal plane.
Args:
......@@ -709,7 +713,8 @@ class PrimitiveHead(BaseModule):
(points[1][-1] == points[2][-1]) and \
(points[2][-1] == points[3][-1])
def check_dist(self, plane_equ, points):
def check_dist(self, plane_equ: torch.Tensor,
points: torch.Tensor) -> tuple:
"""Whether the mean of points to plane distance is lower than thresh.
Args:
......@@ -722,7 +727,8 @@ class PrimitiveHead(BaseModule):
return (points[:, 2] +
plane_equ[-1]).sum() / 4.0 < self.train_cfg['lower_thresh']
def point2line_dist(self, points, pts_a, pts_b):
def point2line_dist(self, points: torch.Tensor, pts_a: torch.Tensor,
pts_b: torch.Tensor) -> torch.Tensor:
"""Calculate the distance from point to line.
Args:
......@@ -741,7 +747,11 @@ class PrimitiveHead(BaseModule):
return dist
def match_point2line(self, points, corners, with_yaw, mode='bottom'):
def match_point2line(self,
points: torch.Tensor,
corners: torch.Tensor,
with_yaw: bool,
mode: str = 'bottom') -> tuple:
"""Match points to corresponding line.
Args:
......@@ -782,7 +792,8 @@ class PrimitiveHead(BaseModule):
selected_list = [sel1, sel2, sel3, sel4]
return selected_list
def match_point2plane(self, plane, points):
def match_point2plane(self, plane: torch.Tensor,
points: torch.Tensor) -> tuple:
"""Match points to plane.
Args:
......@@ -800,10 +811,14 @@ class PrimitiveHead(BaseModule):
min_dist) < self.train_cfg['dist_thresh']
return point2plane_dist, selected
def compute_primitive_loss(self, primitive_center, primitive_semantic,
semantic_scores, num_proposal,
gt_primitive_center, gt_primitive_semantic,
gt_sem_cls_label, gt_primitive_mask):
def compute_primitive_loss(self, primitive_center: torch.Tensor,
primitive_semantic: torch.Tensor,
semantic_scores: torch.Tensor,
num_proposal: torch.Tensor,
gt_primitive_center: torch.Tensor,
gt_primitive_semantic: torch.Tensor,
gt_sem_cls_label: torch.Tensor,
gt_primitive_mask: torch.Tensor) -> Tuple:
"""Compute loss of primitive module.
Args:
......@@ -849,7 +864,8 @@ class PrimitiveHead(BaseModule):
return center_loss, size_loss, sem_cls_loss
def get_primitive_center(self, pred_flag, center):
def get_primitive_center(self, pred_flag: torch.Tensor,
center: torch.Tensor) -> Tuple:
"""Generate primitive center from predictions.
Args:
......@@ -869,17 +885,17 @@ class PrimitiveHead(BaseModule):
return center, pred_indices
def _assign_primitive_line_targets(self,
point_mask,
point_offset,
point_sem,
coords,
indices,
cls_label,
point2line_matching,
corners,
center_axises,
with_yaw,
mode='bottom'):
point_mask: torch.Tensor,
point_offset: torch.Tensor,
point_sem: torch.Tensor,
coords: torch.Tensor,
indices: torch.Tensor,
cls_label: int,
point2line_matching: torch.Tensor,
corners: torch.Tensor,
center_axises: torch.Tensor,
with_yaw: bool,
mode: str = 'bottom') -> Tuple:
"""Generate targets of line primitive.
Args:
......@@ -934,15 +950,15 @@ class PrimitiveHead(BaseModule):
return point_mask, point_offset, point_sem
def _assign_primitive_surface_targets(self,
point_mask,
point_offset,
point_sem,
coords,
indices,
cls_label,
corners,
with_yaw,
mode='bottom'):
point_mask: torch.Tensor,
point_offset: torch.Tensor,
point_sem: torch.Tensor,
coords: torch.Tensor,
indices: torch.Tensor,
cls_label: int,
corners: torch.Tensor,
with_yaw: bool,
mode: str = 'bottom') -> Tuple:
"""Generate targets for primitive z and primitive xy.
Args:
......@@ -1017,7 +1033,9 @@ class PrimitiveHead(BaseModule):
point_offset[indices] = center - coords
return point_mask, point_offset, point_sem
def _get_plane_fomulation(self, vector1, vector2, point):
def _get_plane_fomulation(self, vector1: torch.Tensor,
vector2: torch.Tensor,
point: torch.Tensor) -> torch.Tensor:
"""Compute the equation of the plane.
Args:
......
......@@ -90,16 +90,18 @@ class PartAggregationROIHead(Base3DRoIHead):
return bbox_results
def _assign_and_sample(
self, proposal_list: InstanceList,
batch_gt_instances_3d: InstanceList) -> List[SamplingResult]:
self, rpn_results_list: InstanceList,
batch_gt_instances_3d: InstanceList,
batch_gt_instances_ignore: InstanceList) -> List[SamplingResult]:
"""Assign and sample proposals for training.
Args:
proposal_list (list[:obj:`InstancesData`]): Proposals produced by
rpn head.
rpn_results_list (List[:obj:`InstanceData`]): Detection results
of rpn head.
batch_gt_instances_3d (list[:obj:`InstanceData`]): Batch of
gt_instances. It usually includes ``bboxes_3d`` and
``labels_3d`` attributes.
batch_gt_instances_ignore (list): Ignore instances of gt bboxes.
Returns:
list[:obj:`SamplingResult`]: Sampled results of each training
......@@ -107,16 +109,16 @@ class PartAggregationROIHead(Base3DRoIHead):
"""
sampling_results = []
# bbox assign
for batch_idx in range(len(proposal_list)):
cur_proposal_list = proposal_list[batch_idx]
for batch_idx in range(len(rpn_results_list)):
cur_proposal_list = rpn_results_list[batch_idx]
cur_boxes = cur_proposal_list['bboxes_3d']
cur_labels_3d = cur_proposal_list['labels_3d']
cur_gt_instances_3d = batch_gt_instances_3d[batch_idx]
cur_gt_instances_ignore = batch_gt_instances_ignore[batch_idx]
cur_gt_instances_3d.bboxes_3d = cur_gt_instances_3d.\
bboxes_3d.tensor
cur_gt_bboxes = batch_gt_instances_3d[batch_idx].bboxes_3d.to(
cur_boxes.device)
cur_gt_labels = batch_gt_instances_3d[batch_idx].labels_3d
cur_gt_bboxes = cur_gt_instances_3d.bboxes_3d.to(cur_boxes.device)
cur_gt_labels = cur_gt_instances_3d.labels_3d
batch_num_gts = 0
# 0 is bg
......@@ -132,7 +134,8 @@ class PartAggregationROIHead(Base3DRoIHead):
pred_per_cls = (cur_labels_3d == i)
cur_assign_res = assigner.assign(
cur_proposal_list[pred_per_cls],
cur_gt_instances_3d[gt_per_cls])
cur_gt_instances_3d[gt_per_cls],
cur_gt_instances_ignore)
# gather assign_results in different class into one result
batch_num_gts += cur_assign_res.num_gts
# gt inds (1-based)
......@@ -158,7 +161,8 @@ class PartAggregationROIHead(Base3DRoIHead):
batch_gt_labels)
else: # for single class
assign_result = self.bbox_assigner.assign(
cur_proposal_list, cur_gt_instances_3d)
cur_proposal_list, cur_gt_instances_3d,
cur_gt_instances_ignore)
# sample boxes
sampling_result = self.bbox_sampler.sample(assign_result,
cur_boxes.tensor,
......@@ -200,7 +204,7 @@ class PartAggregationROIHead(Base3DRoIHead):
Args:
feats_dict (dict): Contains features from the first stage.
rpn_results_list (List[:obj:`InstancesData`]): Detection results
rpn_results_list (List[:obj:`InstanceData`]): Detection results
of rpn head.
batch_data_samples (List[:obj:`Det3DDataSample`]): The Data
samples. It usually includes information such as
......@@ -247,7 +251,7 @@ class PartAggregationROIHead(Base3DRoIHead):
voxel_dict (dict): Contains information of voxels.
batch_input_metas (list[dict], Optional): Batch image meta info.
Defaults to None.
rpn_results_list (List[:obj:`InstancesData`]): Detection results
rpn_results_list (List[:obj:`InstanceData`]): Detection results
of rpn head.
test_cfg (Config): Test config.
......@@ -316,7 +320,7 @@ class PartAggregationROIHead(Base3DRoIHead):
Args:
feats_dict (dict): Contains features from the first stage.
rpn_results_list (List[:obj:`InstancesData`]): Detection results
rpn_results_list (List[:obj:`InstanceData`]): Detection results
of rpn head.
batch_data_samples (List[:obj:`Det3DDataSample`]): The Data
samples. It usually includes information such as
......@@ -342,7 +346,8 @@ class PartAggregationROIHead(Base3DRoIHead):
losses.update(semantic_results.pop('loss_semantic'))
sample_results = self._assign_and_sample(rpn_results_list,
batch_gt_instances_3d)
batch_gt_instances_3d,
batch_gt_instances_ignore)
if self.with_bbox:
feats_dict.update(semantic_results)
bbox_results = self._bbox_forward_train(feats_dict, voxels_dict,
......@@ -358,7 +363,7 @@ class PartAggregationROIHead(Base3DRoIHead):
Args:
feats_dict (dict): Contains features from the first stage.
rpn_results_list (List[:obj:`InstancesData`]): Detection results
rpn_results_list (List[:obj:`InstanceData`]): Detection results
of rpn head.
Returns:
......
# Copyright (c) OpenMMLab. All rights reserved.
from typing import Dict, Optional
import torch
from torch import Tensor
from torch.nn import functional as F
from mmdet3d.registry import MODELS, TASK_UTILS
from mmdet3d.structures import bbox3d2result, bbox3d2roi
from mmdet3d.structures import bbox3d2roi
from mmdet3d.utils.typing import InstanceList, SampleList
from mmdet.models.task_modules import AssignResult
from .base_3droi_head import Base3DRoIHead
......@@ -14,43 +18,31 @@ class PointRCNNRoIHead(Base3DRoIHead):
Args:
bbox_head (dict): Config of bbox_head.
point_roi_extractor (dict): Config of RoI extractor.
bbox_roi_extractor (dict): Config of RoI extractor.
train_cfg (dict): Train configs.
test_cfg (dict): Test configs.
depth_normalizer (float, optional): Normalize depth feature.
depth_normalizer (float): Normalize depth feature.
Defaults to 70.0.
init_cfg (dict, optional): Config of initialization. Defaults to None.
"""
def __init__(self,
bbox_head,
point_roi_extractor,
train_cfg,
test_cfg,
depth_normalizer=70.0,
pretrained=None,
init_cfg=None):
bbox_head: dict,
bbox_roi_extractor: dict,
train_cfg: dict,
test_cfg: dict,
depth_normalizer: dict = 70.0,
init_cfg: Optional[dict] = None) -> None:
super(PointRCNNRoIHead, self).__init__(
bbox_head=bbox_head,
bbox_roi_extractor=bbox_roi_extractor,
train_cfg=train_cfg,
test_cfg=test_cfg,
pretrained=pretrained,
init_cfg=init_cfg)
self.depth_normalizer = depth_normalizer
if point_roi_extractor is not None:
self.point_roi_extractor = MODELS.build(point_roi_extractor)
self.init_assigner_sampler()
def init_bbox_head(self, bbox_head):
"""Initialize box head.
Args:
bbox_head (dict): Config dict of RoI Head.
"""
self.bbox_head = MODELS.build(bbox_head)
def init_mask_head(self):
"""Initialize maek head."""
pass
......@@ -68,77 +60,101 @@ class PointRCNNRoIHead(Base3DRoIHead):
]
self.bbox_sampler = TASK_UTILS.build(self.train_cfg.sampler)
def forward_train(self, feats_dict, input_metas, proposal_list,
gt_bboxes_3d, gt_labels_3d):
"""Training forward function of PointRCNNRoIHead.
def loss(self, feats_dict: Dict, rpn_results_list: InstanceList,
batch_data_samples: SampleList, **kwargs) -> dict:
"""Perform forward propagation and loss calculation of the detection
roi on the features of the upstream network.
Args:
feats_dict (dict): Contains features from the first stage.
imput_metas (list[dict]): Meta info of each input.
proposal_list (list[dict]): Proposal information from rpn.
The dictionary should contain the following keys:
- boxes_3d (:obj:`BaseInstance3DBoxes`): Proposal bboxes
- labels_3d (torch.Tensor): Labels of proposals
gt_bboxes_3d (list[:obj:`BaseInstance3DBoxes`]):
GT bboxes of each sample. The bboxes are encapsulated
by 3D box bboxes_3d.
gt_labels_3d (list[LongTensor]): GT labels of each sample.
rpn_results_list (List[:obj:`InstanceData`]): Detection results
of rpn head.
batch_data_samples (List[:obj:`Det3DDataSample`]): The Data
samples. It usually includes information such as
`gt_instance_3d`, `gt_panoptic_seg_3d` and `gt_sem_seg_3d`.
Returns:
dict: Losses from RoI RCNN head.
- loss_bbox (torch.Tensor): Loss of bboxes
dict[str, Tensor]: A dictionary of loss components
"""
features = feats_dict['features']
points = feats_dict['points']
features = feats_dict['fp_features']
fp_points = feats_dict['fp_points']
point_cls_preds = feats_dict['points_cls_preds']
sem_scores = point_cls_preds.sigmoid()
point_scores = sem_scores.max(-1)[0]
sample_results = self._assign_and_sample(proposal_list, gt_bboxes_3d,
gt_labels_3d)
batch_gt_instances_3d = []
batch_gt_instances_ignore = []
for data_sample in batch_data_samples:
batch_gt_instances_3d.append(data_sample.gt_instances_3d)
if 'ignored_instances' in data_sample:
batch_gt_instances_ignore.append(data_sample.ignored_instances)
else:
batch_gt_instances_ignore.append(None)
sample_results = self._assign_and_sample(rpn_results_list,
batch_gt_instances_3d,
batch_gt_instances_ignore)
# concat the depth, semantic features and backbone features
features = features.transpose(1, 2).contiguous()
point_depths = points.norm(dim=2) / self.depth_normalizer - 0.5
point_depths = fp_points.norm(dim=2) / self.depth_normalizer - 0.5
features_list = [
point_scores.unsqueeze(2),
point_depths.unsqueeze(2), features
]
features = torch.cat(features_list, dim=2)
bbox_results = self._bbox_forward_train(features, points,
bbox_results = self._bbox_forward_train(features, fp_points,
sample_results)
losses = dict()
losses.update(bbox_results['loss_bbox'])
return losses
def simple_test(self, feats_dict, img_metas, proposal_list, **kwargs):
"""Simple testing forward function of PointRCNNRoIHead.
Note:
This function assumes that the batch size is 1
def predict(self,
feats_dict: Dict,
rpn_results_list: InstanceList,
batch_data_samples: SampleList,
rescale: bool = False,
**kwargs) -> InstanceList:
"""Perform forward propagation of the roi head and predict detection
results on the features of the upstream network.
Args:
feats_dict (dict): Contains features from the first stage.
img_metas (list[dict]): Meta info of each image.
proposal_list (list[dict]): Proposal information from rpn.
rpn_results_list (List[:obj:`InstanceData`]): Detection results
of rpn head.
batch_data_samples (List[:obj:`Det3DDataSample`]): The Data
samples. It usually includes information such as
`gt_instance_3d`, `gt_panoptic_seg_3d` and `gt_sem_seg_3d`.
rescale (bool): If True, return boxes in original image space.
Defaults to False.
Returns:
dict: Bbox results of one frame.
list[:obj:`InstanceData`]: Detection results of each sample
after the post process.
Each item usually contains following keys.
- scores_3d (Tensor): Classification scores, has a shape
(num_instances, )
- labels_3d (Tensor): Labels of bboxes, has a shape
(num_instances, ).
- bboxes_3d (BaseInstance3DBoxes): Prediction of bboxes,
contains a tensor with shape (num_instances, C), where
C >= 7.
"""
rois = bbox3d2roi([res['boxes_3d'].tensor for res in proposal_list])
labels_3d = [res['labels_3d'] for res in proposal_list]
features = feats_dict['features']
points = feats_dict['points']
rois = bbox3d2roi(
[res['bboxes_3d'].tensor for res in rpn_results_list])
labels_3d = [res['labels_3d'] for res in rpn_results_list]
batch_input_metas = [
data_samples.metainfo for data_samples in batch_data_samples
]
fp_features = feats_dict['fp_features']
fp_points = feats_dict['fp_points']
point_cls_preds = feats_dict['points_cls_preds']
sem_scores = point_cls_preds.sigmoid()
point_scores = sem_scores.max(-1)[0]
features = features.transpose(1, 2).contiguous()
point_depths = points.norm(dim=2) / self.depth_normalizer - 0.5
features = fp_features.transpose(1, 2).contiguous()
point_depths = fp_points.norm(dim=2) / self.depth_normalizer - 0.5
features_list = [
point_scores.unsqueeze(2),
point_depths.unsqueeze(2), features
......@@ -146,29 +162,27 @@ class PointRCNNRoIHead(Base3DRoIHead):
features = torch.cat(features_list, dim=2)
batch_size = features.shape[0]
bbox_results = self._bbox_forward(features, points, batch_size, rois)
bbox_results = self._bbox_forward(features, fp_points, batch_size,
rois)
object_score = bbox_results['cls_score'].sigmoid()
bbox_list = self.bbox_head.get_bboxes(
bbox_list = self.bbox_head.get_results(
rois,
object_score,
bbox_results['bbox_pred'],
labels_3d,
img_metas,
batch_input_metas,
cfg=self.test_cfg)
bbox_results = [
bbox3d2result(bboxes, scores, labels)
for bboxes, scores, labels in bbox_list
]
return bbox_results
return bbox_list
def _bbox_forward_train(self, features, points, sampling_results):
def _bbox_forward_train(self, features: Tensor, points: Tensor,
sampling_results: SampleList) -> dict:
"""Forward training function of roi_extractor and bbox_head.
Args:
features (torch.Tensor): Backbone features with depth and \
semantic features.
points (torch.Tensor): Pointcloud.
points (torch.Tensor): Point cloud.
sampling_results (:obj:`SamplingResult`): Sampled results used
for training.
......@@ -188,14 +202,15 @@ class PointRCNNRoIHead(Base3DRoIHead):
bbox_results.update(loss_bbox=loss_bbox)
return bbox_results
def _bbox_forward(self, features, points, batch_size, rois):
def _bbox_forward(self, features: Tensor, points: Tensor, batch_size: int,
rois: Tensor) -> dict:
"""Forward function of roi_extractor and bbox_head used in both
training and testing.
Args:
features (torch.Tensor): Backbone features with depth and
semantic features.
points (torch.Tensor): Pointcloud.
points (torch.Tensor): Point cloud.
batch_size (int): Batch size.
rois (torch.Tensor): RoI boxes.
......@@ -203,21 +218,27 @@ class PointRCNNRoIHead(Base3DRoIHead):
dict: Contains predictions of bbox_head and
features of roi_extractor.
"""
pooled_point_feats = self.point_roi_extractor(features, points,
pooled_point_feats = self.bbox_roi_extractor(features, points,
batch_size, rois)
cls_score, bbox_pred = self.bbox_head(pooled_point_feats)
bbox_results = dict(cls_score=cls_score, bbox_pred=bbox_pred)
return bbox_results
def _assign_and_sample(self, proposal_list, gt_bboxes_3d, gt_labels_3d):
def _assign_and_sample(
self, rpn_results_list: InstanceList,
batch_gt_instances_3d: InstanceList,
batch_gt_instances_ignore: InstanceList) -> SampleList:
"""Assign and sample proposals for training.
Args:
proposal_list (list[dict]): Proposals produced by RPN.
gt_bboxes_3d (list[:obj:`BaseInstance3DBoxes`]): Ground truth
boxes.
gt_labels_3d (list[torch.Tensor]): Ground truth labels
rpn_results_list (List[:obj:`InstanceData`]): Detection results
of rpn head.
batch_gt_instances_3d (list[:obj:`InstanceData`]): Batch of
gt_instances. It usually includes ``bboxes_3d`` and
``labels_3d`` attributes.
batch_gt_instances_ignore (list[:obj:`InstanceData`]): Ignore
instances of gt bboxes.
Returns:
list[:obj:`SamplingResult`]: Sampled results of each training
......@@ -225,12 +246,16 @@ class PointRCNNRoIHead(Base3DRoIHead):
"""
sampling_results = []
# bbox assign
for batch_idx in range(len(proposal_list)):
cur_proposal_list = proposal_list[batch_idx]
cur_boxes = cur_proposal_list['boxes_3d']
for batch_idx in range(len(rpn_results_list)):
cur_proposal_list = rpn_results_list[batch_idx]
cur_boxes = cur_proposal_list['bboxes_3d']
cur_labels_3d = cur_proposal_list['labels_3d']
cur_gt_bboxes = gt_bboxes_3d[batch_idx].to(cur_boxes.device)
cur_gt_labels = gt_labels_3d[batch_idx]
cur_gt_instances_3d = batch_gt_instances_3d[batch_idx]
cur_gt_instances_3d.bboxes_3d = cur_gt_instances_3d.\
bboxes_3d.tensor
cur_gt_instances_ignore = batch_gt_instances_ignore[batch_idx]
cur_gt_bboxes = cur_gt_instances_3d.bboxes_3d.to(cur_boxes.device)
cur_gt_labels = cur_gt_instances_3d.labels_3d
batch_num_gts = 0
# 0 is bg
batch_gt_indis = cur_gt_labels.new_full((len(cur_boxes), ), 0)
......@@ -244,9 +269,9 @@ class PointRCNNRoIHead(Base3DRoIHead):
gt_per_cls = (cur_gt_labels == i)
pred_per_cls = (cur_labels_3d == i)
cur_assign_res = assigner.assign(
cur_boxes.tensor[pred_per_cls],
cur_gt_bboxes.tensor[gt_per_cls],
gt_labels=cur_gt_labels[gt_per_cls])
cur_proposal_list[pred_per_cls],
cur_gt_instances_3d[gt_per_cls],
cur_gt_instances_ignore)
# gather assign_results in different class into one result
batch_num_gts += cur_assign_res.num_gts
# gt inds (1-based)
......@@ -272,14 +297,13 @@ class PointRCNNRoIHead(Base3DRoIHead):
batch_gt_labels)
else: # for single class
assign_result = self.bbox_assigner.assign(
cur_boxes.tensor,
cur_gt_bboxes.tensor,
gt_labels=cur_gt_labels)
cur_proposal_list, cur_gt_instances_3d,
cur_gt_instances_ignore)
# sample boxes
sampling_result = self.bbox_sampler.sample(assign_result,
cur_boxes.tensor,
cur_gt_bboxes.tensor,
cur_gt_bboxes,
cur_gt_labels)
sampling_results.append(sampling_result)
return sampling_results
# Copyright (c) OpenMMLab. All rights reserved.
from typing import Optional
import torch
import torch.nn as nn
from mmcv import ops
from mmengine.model import BaseModule
from torch import Tensor
from mmdet3d.registry import MODELS
......@@ -13,14 +17,16 @@ class Single3DRoIAwareExtractor(BaseModule):
Extract Point-wise roi features.
Args:
roi_layer (dict): The config of roi layer.
roi_layer (dict, optional): The config of roi layer.
"""
def __init__(self, roi_layer=None, init_cfg=None):
def __init__(self,
roi_layer: Optional[dict] = None,
init_cfg: Optional[dict] = None) -> None:
super(Single3DRoIAwareExtractor, self).__init__(init_cfg=init_cfg)
self.roi_layer = self.build_roi_layers(roi_layer)
def build_roi_layers(self, layer_cfg):
def build_roi_layers(self, layer_cfg: dict) -> nn.Module:
"""Build roi layers using `layer_cfg`"""
cfg = layer_cfg.copy()
layer_type = cfg.pop('type')
......@@ -29,7 +35,8 @@ class Single3DRoIAwareExtractor(BaseModule):
roi_layers = layer_cls(**cfg)
return roi_layers
def forward(self, feats, coordinate, batch_inds, rois):
def forward(self, feats: Tensor, coordinate: Tensor, batch_inds: Tensor,
rois: Tensor) -> Tensor:
"""Extract point-wise roi features.
Args:
......
# Copyright (c) OpenMMLab. All rights reserved.
from typing import Optional
import torch
import torch.nn as nn
from mmcv import ops
from torch import nn as nn
from torch import Tensor
from mmdet3d.registry import MODELS
from mmdet3d.structures.bbox_3d import rotation_3d_in_axis
......@@ -14,14 +17,14 @@ class Single3DRoIPointExtractor(nn.Module):
Extract Point-wise roi features.
Args:
roi_layer (dict): The config of roi layer.
roi_layer (dict, optional): The config of roi layer.
"""
def __init__(self, roi_layer=None):
def __init__(self, roi_layer: Optional[dict] = None) -> None:
super(Single3DRoIPointExtractor, self).__init__()
self.roi_layer = self.build_roi_layers(roi_layer)
def build_roi_layers(self, layer_cfg):
def build_roi_layers(self, layer_cfg: dict) -> nn.Module:
"""Build roi layers using `layer_cfg`"""
cfg = layer_cfg.copy()
layer_type = cfg.pop('type')
......@@ -30,7 +33,8 @@ class Single3DRoIPointExtractor(nn.Module):
roi_layers = layer_cls(**cfg)
return roi_layers
def forward(self, feats, coordinate, batch_inds, rois):
def forward(self, feats: Tensor, coordinate: Tensor, batch_inds: Tensor,
rois: Tensor) -> Tensor:
"""Extract point-wise roi features.
Args:
......
......@@ -41,7 +41,7 @@ class Box3DMode(IntEnum):
v
down y
The relative coordinate of bottom center in a CAM box is [0.5, 1.0, 0.5],
The relative coordinate of bottom center in a CAM box is (0.5, 1.0, 0.5),
and the yaw is around the y axis, thus the rotation axis=1.
Coordinates in Depth mode:
......@@ -63,7 +63,7 @@ class Box3DMode(IntEnum):
DEPTH = 2
@staticmethod
def convert(box, src, dst, rt_mat=None, with_yaw=True):
def convert(box, src, dst, rt_mat=None, with_yaw=True, correct_yaw=False):
"""Convert boxes from `src` mode to `dst` mode.
Args:
......@@ -81,6 +81,7 @@ class Box3DMode(IntEnum):
with_yaw (bool, optional): If `box` is an instance of
:obj:`BaseInstance3DBoxes`, whether or not it has a yaw angle.
Defaults to True.
correct_yaw (bool): If the yaw is rotated by rt_mat.
Returns:
(tuple | list | np.ndarray | torch.Tensor |
......@@ -119,6 +120,14 @@ class Box3DMode(IntEnum):
rt_mat = arr.new_tensor([[0, -1, 0], [0, 0, -1], [1, 0, 0]])
xyz_size = torch.cat([x_size, z_size, y_size], dim=-1)
if with_yaw:
if correct_yaw:
yaw_vector = torch.cat([
torch.cos(yaw),
torch.sin(yaw),
torch.zeros_like(yaw)
],
dim=1)
else:
yaw = -yaw - np.pi / 2
yaw = limit_period(yaw, period=np.pi * 2)
elif src == Box3DMode.CAM and dst == Box3DMode.LIDAR:
......@@ -126,6 +135,14 @@ class Box3DMode(IntEnum):
rt_mat = arr.new_tensor([[0, 0, 1], [-1, 0, 0], [0, -1, 0]])
xyz_size = torch.cat([x_size, z_size, y_size], dim=-1)
if with_yaw:
if correct_yaw:
yaw_vector = torch.cat([
torch.cos(-yaw),
torch.zeros_like(yaw),
torch.sin(-yaw)
],
dim=1)
else:
yaw = -yaw - np.pi / 2
yaw = limit_period(yaw, period=np.pi * 2)
elif src == Box3DMode.DEPTH and dst == Box3DMode.CAM:
......@@ -133,18 +150,42 @@ class Box3DMode(IntEnum):
rt_mat = arr.new_tensor([[1, 0, 0], [0, 0, -1], [0, 1, 0]])
xyz_size = torch.cat([x_size, z_size, y_size], dim=-1)
if with_yaw:
if correct_yaw:
yaw_vector = torch.cat([
torch.cos(yaw),
torch.sin(yaw),
torch.zeros_like(yaw)
],
dim=1)
else:
yaw = -yaw
elif src == Box3DMode.CAM and dst == Box3DMode.DEPTH:
if rt_mat is None:
rt_mat = arr.new_tensor([[1, 0, 0], [0, 0, 1], [0, -1, 0]])
xyz_size = torch.cat([x_size, z_size, y_size], dim=-1)
if with_yaw:
if correct_yaw:
yaw_vector = torch.cat([
torch.cos(-yaw),
torch.zeros_like(yaw),
torch.sin(-yaw)
],
dim=1)
else:
yaw = -yaw
elif src == Box3DMode.LIDAR and dst == Box3DMode.DEPTH:
if rt_mat is None:
rt_mat = arr.new_tensor([[0, -1, 0], [1, 0, 0], [0, 0, 1]])
xyz_size = torch.cat([x_size, y_size, z_size], dim=-1)
if with_yaw:
if correct_yaw:
yaw_vector = torch.cat([
torch.cos(yaw),
torch.sin(yaw),
torch.zeros_like(yaw)
],
dim=1)
else:
yaw = yaw + np.pi / 2
yaw = limit_period(yaw, period=np.pi * 2)
elif src == Box3DMode.DEPTH and dst == Box3DMode.LIDAR:
......@@ -152,6 +193,14 @@ class Box3DMode(IntEnum):
rt_mat = arr.new_tensor([[0, 1, 0], [-1, 0, 0], [0, 0, 1]])
xyz_size = torch.cat([x_size, y_size, z_size], dim=-1)
if with_yaw:
if correct_yaw:
yaw_vector = torch.cat([
torch.cos(yaw),
torch.sin(yaw),
torch.zeros_like(yaw)
],
dim=1)
else:
yaw = yaw - np.pi / 2
yaw = limit_period(yaw, period=np.pi * 2)
else:
......@@ -168,6 +217,18 @@ class Box3DMode(IntEnum):
else:
xyz = arr[..., :3] @ rt_mat.t()
# Note: we only use rotation in rt_mat
# so don't need to extend yaw_vector
if with_yaw and correct_yaw:
rot_yaw_vector = yaw_vector @ rt_mat[:3, :3].t()
if dst == Box3DMode.CAM:
yaw = torch.atan2(-rot_yaw_vector[:, [2]], rot_yaw_vector[:,
[0]])
elif dst in [Box3DMode.LIDAR, Box3DMode.DEPTH]:
yaw = torch.atan2(rot_yaw_vector[:, [1]], rot_yaw_vector[:,
[0]])
yaw = limit_period(yaw, period=np.pi * 2)
if with_yaw:
remains = arr[..., 7:]
arr = torch.cat([xyz[..., :3], xyz_size, yaw, remains], dim=-1)
......
Markdown is supported
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment