Unverified Commit 32a4328b authored by Wenwei Zhang's avatar Wenwei Zhang Committed by GitHub
Browse files

Bump version to V1.0.0rc0

Bump version to V1.0.0rc0
parents 86cc487c a8817998
......@@ -83,15 +83,15 @@ class PointwiseSemanticHead(BaseModule):
sample.
Args:
voxel_centers (torch.Tensor): The center of voxels in shape \
voxel_centers (torch.Tensor): The center of voxels in shape
(voxel_num, 3).
gt_bboxes_3d (:obj:`BaseInstance3DBoxes`): Ground truth boxes in \
gt_bboxes_3d (:obj:`BaseInstance3DBoxes`): Ground truth boxes in
shape (box_num, 7).
gt_labels_3d (torch.Tensor): Class labels of ground truths in \
gt_labels_3d (torch.Tensor): Class labels of ground truths in
shape (box_num).
Returns:
tuple[torch.Tensor]: Segmentation targets with shape [voxel_num] \
tuple[torch.Tensor]: Segmentation targets with shape [voxel_num]
part prediction targets with shape [voxel_num, 3]
"""
gt_bboxes_3d = gt_bboxes_3d.to(voxel_centers.device)
......@@ -99,8 +99,8 @@ class PointwiseSemanticHead(BaseModule):
part_targets = voxel_centers.new_zeros((voxel_centers.shape[0], 3),
dtype=torch.float32)
box_idx = gt_bboxes_3d.points_in_boxes(voxel_centers)
enlarge_box_idx = enlarged_gt_boxes.points_in_boxes(
box_idx = gt_bboxes_3d.points_in_boxes_part(voxel_centers)
enlarge_box_idx = enlarged_gt_boxes.points_in_boxes_part(
voxel_centers).long()
gt_labels_pad = F.pad(
......@@ -131,19 +131,19 @@ class PointwiseSemanticHead(BaseModule):
"""generate segmentation and part prediction targets.
Args:
voxel_centers (torch.Tensor): The center of voxels in shape \
voxel_centers (torch.Tensor): The center of voxels in shape
(voxel_num, 3).
gt_bboxes_3d (:obj:`BaseInstance3DBoxes`): Ground truth boxes in \
gt_bboxes_3d (:obj:`BaseInstance3DBoxes`): Ground truth boxes in
shape (box_num, 7).
gt_labels_3d (torch.Tensor): Class labels of ground truths in \
gt_labels_3d (torch.Tensor): Class labels of ground truths in
shape (box_num).
Returns:
dict: Prediction targets
- seg_targets (torch.Tensor): Segmentation targets \
- seg_targets (torch.Tensor): Segmentation targets
with shape [voxel_num].
- part_targets (torch.Tensor): Part prediction targets \
- part_targets (torch.Tensor): Part prediction targets
with shape [voxel_num, 3].
"""
batch_size = len(gt_labels_3d)
......
......@@ -20,7 +20,7 @@ class PrimitiveHead(BaseModule):
num_dims (int): The dimension of primitive semantic information.
num_classes (int): The number of class.
primitive_mode (str): The mode of primitive module,
avaliable mode ['z', 'xy', 'line'].
available mode ['z', 'xy', 'line'].
bbox_coder (:obj:`BaseBBoxCoder`): Bbox coder for encoding and
decoding boxes.
train_cfg (dict): Config for training.
......@@ -30,7 +30,7 @@ class PrimitiveHead(BaseModule):
feat_channels (tuple[int]): Convolution channels of
prediction layer.
upper_thresh (float): Threshold for line matching.
surface_thresh (float): Threshold for suface matching.
surface_thresh (float): Threshold for surface matching.
conv_cfg (dict): Config of convolution in prediction layer.
norm_cfg (dict): Config of BN in prediction layer.
objectness_loss (dict): Config of objectness loss.
......@@ -198,15 +198,15 @@ class PrimitiveHead(BaseModule):
Args:
bbox_preds (dict): Predictions from forward of primitive head.
points (list[torch.Tensor]): Input points.
gt_bboxes_3d (list[:obj:`BaseInstance3DBoxes`]): Ground truth \
gt_bboxes_3d (list[:obj:`BaseInstance3DBoxes`]): Ground truth
bboxes of each sample.
gt_labels_3d (list[torch.Tensor]): Labels of each sample.
pts_semantic_mask (None | list[torch.Tensor]): Point-wise
pts_semantic_mask (list[torch.Tensor]): Point-wise
semantic mask.
pts_instance_mask (None | list[torch.Tensor]): Point-wise
pts_instance_mask (list[torch.Tensor]): Point-wise
instance mask.
img_metas (list[dict]): Contain pcd and img's meta info.
gt_bboxes_ignore (None | list[torch.Tensor]): Specify
gt_bboxes_ignore (list[torch.Tensor]): Specify
which bounding.
Returns:
......@@ -266,12 +266,12 @@ class PrimitiveHead(BaseModule):
Args:
points (list[torch.Tensor]): Points of each batch.
gt_bboxes_3d (list[:obj:`BaseInstance3DBoxes`]): Ground truth \
gt_bboxes_3d (list[:obj:`BaseInstance3DBoxes`]): Ground truth
bboxes of each batch.
gt_labels_3d (list[torch.Tensor]): Labels of each batch.
pts_semantic_mask (None | list[torch.Tensor]): Point-wise semantic
pts_semantic_mask (list[torch.Tensor]): Point-wise semantic
label of each batch.
pts_instance_mask (None | list[torch.Tensor]): Point-wise instance
pts_instance_mask (list[torch.Tensor]): Point-wise instance
label of each batch.
bbox_preds (dict): Predictions from forward of primitive head.
......@@ -333,12 +333,12 @@ class PrimitiveHead(BaseModule):
Args:
points (torch.Tensor): Points of each batch.
gt_bboxes_3d (:obj:`BaseInstance3DBoxes`): Ground truth \
gt_bboxes_3d (:obj:`BaseInstance3DBoxes`): Ground truth
boxes of each batch.
gt_labels_3d (torch.Tensor): Labels of each batch.
pts_semantic_mask (None | torch.Tensor): Point-wise semantic
pts_semantic_mask (torch.Tensor): Point-wise semantic
label of each batch.
pts_instance_mask (None | torch.Tensor): Point-wise instance
pts_instance_mask (torch.Tensor): Point-wise instance
label of each batch.
Returns:
......@@ -355,7 +355,7 @@ class PrimitiveHead(BaseModule):
# Generate pts_semantic_mask and pts_instance_mask when they are None
if pts_semantic_mask is None or pts_instance_mask is None:
points2box_mask = gt_bboxes_3d.points_in_boxes(points)
points2box_mask = gt_bboxes_3d.points_in_boxes_all(points)
assignment = points2box_mask.argmax(1)
background_mask = points2box_mask.max(1)[0] == 0
......
# Copyright (c) OpenMMLab. All rights reserved.
import warnings
from torch.nn import functional as F
from mmdet3d.core import AssignResult
......
# Copyright (c) OpenMMLab. All rights reserved.
import torch
from torch.nn import functional as F
from mmdet3d.core import AssignResult
from mmdet3d.core.bbox import bbox3d2result, bbox3d2roi
from mmdet.core import build_assigner, build_sampler
from mmdet.models import HEADS
from ..builder import build_head, build_roi_extractor
from .base_3droi_head import Base3DRoIHead
@HEADS.register_module()
class PointRCNNRoIHead(Base3DRoIHead):
"""RoI head for PointRCNN.
Args:
bbox_head (dict): Config of bbox_head.
point_roi_extractor (dict): Config of RoI extractor.
train_cfg (dict): Train configs.
test_cfg (dict): Test configs.
depth_normalizer (float, optional): Normalize depth feature.
Defaults to 70.0.
init_cfg (dict, optional): Config of initialization. Defaults to None.
"""
def __init__(self,
bbox_head,
point_roi_extractor,
train_cfg,
test_cfg,
depth_normalizer=70.0,
pretrained=None,
init_cfg=None):
super(PointRCNNRoIHead, self).__init__(
bbox_head=bbox_head,
train_cfg=train_cfg,
test_cfg=test_cfg,
pretrained=pretrained,
init_cfg=init_cfg)
self.depth_normalizer = depth_normalizer
if point_roi_extractor is not None:
self.point_roi_extractor = build_roi_extractor(point_roi_extractor)
self.init_assigner_sampler()
def init_bbox_head(self, bbox_head):
"""Initialize box head.
Args:
bbox_head (dict): Config dict of RoI Head.
"""
self.bbox_head = build_head(bbox_head)
def init_mask_head(self):
"""Initialize maek head."""
pass
def init_assigner_sampler(self):
"""Initialize assigner and sampler."""
self.bbox_assigner = None
self.bbox_sampler = None
if self.train_cfg:
if isinstance(self.train_cfg.assigner, dict):
self.bbox_assigner = build_assigner(self.train_cfg.assigner)
elif isinstance(self.train_cfg.assigner, list):
self.bbox_assigner = [
build_assigner(res) for res in self.train_cfg.assigner
]
self.bbox_sampler = build_sampler(self.train_cfg.sampler)
def forward_train(self, feats_dict, input_metas, proposal_list,
gt_bboxes_3d, gt_labels_3d):
"""Training forward function of PointRCNNRoIHead.
Args:
feats_dict (dict): Contains features from the first stage.
imput_metas (list[dict]): Meta info of each input.
proposal_list (list[dict]): Proposal information from rpn.
The dictionary should contain the following keys:
- boxes_3d (:obj:`BaseInstance3DBoxes`): Proposal bboxes
- labels_3d (torch.Tensor): Labels of proposals
gt_bboxes_3d (list[:obj:`BaseInstance3DBoxes`]):
GT bboxes of each sample. The bboxes are encapsulated
by 3D box structures.
gt_labels_3d (list[LongTensor]): GT labels of each sample.
Returns:
dict: Losses from RoI RCNN head.
- loss_bbox (torch.Tensor): Loss of bboxes
"""
features = feats_dict['features']
points = feats_dict['points']
point_cls_preds = feats_dict['points_cls_preds']
sem_scores = point_cls_preds.sigmoid()
point_scores = sem_scores.max(-1)[0]
sample_results = self._assign_and_sample(proposal_list, gt_bboxes_3d,
gt_labels_3d)
# concat the depth, semantic features and backbone features
features = features.transpose(1, 2).contiguous()
point_depths = points.norm(dim=2) / self.depth_normalizer - 0.5
features_list = [
point_scores.unsqueeze(2),
point_depths.unsqueeze(2), features
]
features = torch.cat(features_list, dim=2)
bbox_results = self._bbox_forward_train(features, points,
sample_results)
losses = dict()
losses.update(bbox_results['loss_bbox'])
return losses
def simple_test(self, feats_dict, img_metas, proposal_list, **kwargs):
"""Simple testing forward function of PointRCNNRoIHead.
Note:
This function assumes that the batch size is 1
Args:
feats_dict (dict): Contains features from the first stage.
img_metas (list[dict]): Meta info of each image.
proposal_list (list[dict]): Proposal information from rpn.
Returns:
dict: Bbox results of one frame.
"""
rois = bbox3d2roi([res['boxes_3d'].tensor for res in proposal_list])
labels_3d = [res['labels_3d'] for res in proposal_list]
features = feats_dict['features']
points = feats_dict['points']
point_cls_preds = feats_dict['points_cls_preds']
sem_scores = point_cls_preds.sigmoid()
point_scores = sem_scores.max(-1)[0]
features = features.transpose(1, 2).contiguous()
point_depths = points.norm(dim=2) / self.depth_normalizer - 0.5
features_list = [
point_scores.unsqueeze(2),
point_depths.unsqueeze(2), features
]
features = torch.cat(features_list, dim=2)
batch_size = features.shape[0]
bbox_results = self._bbox_forward(features, points, batch_size, rois)
object_score = bbox_results['cls_score'].sigmoid()
bbox_list = self.bbox_head.get_bboxes(
rois,
object_score,
bbox_results['bbox_pred'],
labels_3d,
img_metas,
cfg=self.test_cfg)
bbox_results = [
bbox3d2result(bboxes, scores, labels)
for bboxes, scores, labels in bbox_list
]
return bbox_results
def _bbox_forward_train(self, features, points, sampling_results):
"""Forward training function of roi_extractor and bbox_head.
Args:
features (torch.Tensor): Backbone features with depth and \
semantic features.
points (torch.Tensor): Pointcloud.
sampling_results (:obj:`SamplingResult`): Sampled results used
for training.
Returns:
dict: Forward results including losses and predictions.
"""
rois = bbox3d2roi([res.bboxes for res in sampling_results])
batch_size = features.shape[0]
bbox_results = self._bbox_forward(features, points, batch_size, rois)
bbox_targets = self.bbox_head.get_targets(sampling_results,
self.train_cfg)
loss_bbox = self.bbox_head.loss(bbox_results['cls_score'],
bbox_results['bbox_pred'], rois,
*bbox_targets)
bbox_results.update(loss_bbox=loss_bbox)
return bbox_results
def _bbox_forward(self, features, points, batch_size, rois):
"""Forward function of roi_extractor and bbox_head used in both
training and testing.
Args:
features (torch.Tensor): Backbone features with depth and
semantic features.
points (torch.Tensor): Pointcloud.
batch_size (int): Batch size.
rois (torch.Tensor): RoI boxes.
Returns:
dict: Contains predictions of bbox_head and
features of roi_extractor.
"""
pooled_point_feats = self.point_roi_extractor(features, points,
batch_size, rois)
cls_score, bbox_pred = self.bbox_head(pooled_point_feats)
bbox_results = dict(cls_score=cls_score, bbox_pred=bbox_pred)
return bbox_results
def _assign_and_sample(self, proposal_list, gt_bboxes_3d, gt_labels_3d):
"""Assign and sample proposals for training.
Args:
proposal_list (list[dict]): Proposals produced by RPN.
gt_bboxes_3d (list[:obj:`BaseInstance3DBoxes`]): Ground truth
boxes.
gt_labels_3d (list[torch.Tensor]): Ground truth labels
Returns:
list[:obj:`SamplingResult`]: Sampled results of each training
sample.
"""
sampling_results = []
# bbox assign
for batch_idx in range(len(proposal_list)):
cur_proposal_list = proposal_list[batch_idx]
cur_boxes = cur_proposal_list['boxes_3d']
cur_labels_3d = cur_proposal_list['labels_3d']
cur_gt_bboxes = gt_bboxes_3d[batch_idx].to(cur_boxes.device)
cur_gt_labels = gt_labels_3d[batch_idx]
batch_num_gts = 0
# 0 is bg
batch_gt_indis = cur_gt_labels.new_full((len(cur_boxes), ), 0)
batch_max_overlaps = cur_boxes.tensor.new_zeros(len(cur_boxes))
# -1 is bg
batch_gt_labels = cur_gt_labels.new_full((len(cur_boxes), ), -1)
# each class may have its own assigner
if isinstance(self.bbox_assigner, list):
for i, assigner in enumerate(self.bbox_assigner):
gt_per_cls = (cur_gt_labels == i)
pred_per_cls = (cur_labels_3d == i)
cur_assign_res = assigner.assign(
cur_boxes.tensor[pred_per_cls],
cur_gt_bboxes.tensor[gt_per_cls],
gt_labels=cur_gt_labels[gt_per_cls])
# gather assign_results in different class into one result
batch_num_gts += cur_assign_res.num_gts
# gt inds (1-based)
gt_inds_arange_pad = gt_per_cls.nonzero(
as_tuple=False).view(-1) + 1
# pad 0 for indice unassigned
gt_inds_arange_pad = F.pad(
gt_inds_arange_pad, (1, 0), mode='constant', value=0)
# pad -1 for indice ignore
gt_inds_arange_pad = F.pad(
gt_inds_arange_pad, (1, 0), mode='constant', value=-1)
# convert to 0~gt_num+2 for indices
gt_inds_arange_pad += 1
# now 0 is bg, >1 is fg in batch_gt_indis
batch_gt_indis[pred_per_cls] = gt_inds_arange_pad[
cur_assign_res.gt_inds + 1] - 1
batch_max_overlaps[
pred_per_cls] = cur_assign_res.max_overlaps
batch_gt_labels[pred_per_cls] = cur_assign_res.labels
assign_result = AssignResult(batch_num_gts, batch_gt_indis,
batch_max_overlaps,
batch_gt_labels)
else: # for single class
assign_result = self.bbox_assigner.assign(
cur_boxes.tensor,
cur_gt_bboxes.tensor,
gt_labels=cur_gt_labels)
# sample boxes
sampling_result = self.bbox_sampler.sample(assign_result,
cur_boxes.tensor,
cur_gt_bboxes.tensor,
cur_gt_labels)
sampling_results.append(sampling_result)
return sampling_results
# Copyright (c) OpenMMLab. All rights reserved.
from mmdet.models.roi_heads.roi_extractors import SingleRoIExtractor
from .single_roiaware_extractor import Single3DRoIAwareExtractor
from .single_roipoint_extractor import Single3DRoIPointExtractor
__all__ = ['SingleRoIExtractor', 'Single3DRoIAwareExtractor']
__all__ = [
'SingleRoIExtractor', 'Single3DRoIAwareExtractor',
'Single3DRoIPointExtractor'
]
# Copyright (c) OpenMMLab. All rights reserved.
import torch
from torch import nn as nn
from mmdet3d import ops
from mmdet3d.core.bbox.structures import rotation_3d_in_axis
from mmdet.models.builder import ROI_EXTRACTORS
@ROI_EXTRACTORS.register_module()
class Single3DRoIPointExtractor(nn.Module):
"""Point-wise roi-aware Extractor.
Extract Point-wise roi features.
Args:
roi_layer (dict): The config of roi layer.
"""
def __init__(self, roi_layer=None):
super(Single3DRoIPointExtractor, self).__init__()
self.roi_layer = self.build_roi_layers(roi_layer)
def build_roi_layers(self, layer_cfg):
"""Build roi layers using `layer_cfg`"""
cfg = layer_cfg.copy()
layer_type = cfg.pop('type')
assert hasattr(ops, layer_type)
layer_cls = getattr(ops, layer_type)
roi_layers = layer_cls(**cfg)
return roi_layers
def forward(self, feats, coordinate, batch_inds, rois):
"""Extract point-wise roi features.
Args:
feats (torch.FloatTensor): Point-wise features with
shape (batch, npoints, channels) for pooling.
coordinate (torch.FloatTensor): Coordinate of each point.
batch_inds (torch.LongTensor): Indicate the batch of each point.
rois (torch.FloatTensor): Roi boxes with batch indices.
Returns:
torch.FloatTensor: Pooled features
"""
rois = rois[..., 1:]
rois = rois.view(batch_inds, -1, rois.shape[-1])
with torch.no_grad():
pooled_roi_feat, pooled_empty_flag = self.roi_layer(
coordinate, feats, rois)
# canonical transformation
roi_center = rois[:, :, 0:3]
pooled_roi_feat[:, :, :, 0:3] -= roi_center.unsqueeze(dim=2)
pooled_roi_feat = pooled_roi_feat.view(-1,
pooled_roi_feat.shape[-2],
pooled_roi_feat.shape[-1])
pooled_roi_feat[:, :, 0:3] = rotation_3d_in_axis(
pooled_roi_feat[:, :, 0:3],
-(rois.view(-1, rois.shape[-1])[:, 6]),
axis=2)
pooled_roi_feat[pooled_empty_flag.view(-1) > 0] = 0
return pooled_roi_feat
# Copyright (c) OpenMMLab. All rights reserved.
from os import path as osp
import mmcv
import numpy as np
import torch
from mmcv.parallel import DataContainer as DC
from mmcv.runner import auto_fp16
from os import path as osp
from mmdet3d.core import show_seg_result
from mmseg.models.segmentors import BaseSegmentor
......@@ -80,7 +81,7 @@ class Base3DSegmentor(BaseSegmentor):
Args:
data (list[dict]): Input points and the information of the sample.
result (list[dict]): Prediction results.
palette (list[list[int]]] | np.ndarray | None): The palette of
palette (list[list[int]]] | np.ndarray): The palette of
segmentation map. If None is given, random palette will be
generated. Default: None
out_dir (str): Output directory of visualization result.
......
......@@ -187,7 +187,7 @@ class EncoderDecoder3D(Base3DSegmentor):
use_normalized_coord=False):
"""Generating model input.
Generate input by subtracting patch center and adding additional \
Generate input by subtracting patch center and adding additional
features. Currently support colors and normalized xyz as features.
Args:
......@@ -195,7 +195,7 @@ class EncoderDecoder3D(Base3DSegmentor):
patch_center (torch.Tensor): Center coordinate of the patch.
coord_max (torch.Tensor): Max coordinate of all 3D points.
feats (torch.Tensor): Features of sampled points of shape [S, C].
use_normalized_coord (bool, optional): Whether to use normalized \
use_normalized_coord (bool, optional): Whether to use normalized
xyz as additional features. Defaults to False.
Returns:
......@@ -233,17 +233,17 @@ class EncoderDecoder3D(Base3DSegmentor):
block_size (float, optional): Size of a patch to sample.
sample_rate (float, optional): Stride used in sliding patch.
Defaults to 0.5.
use_normalized_coord (bool, optional): Whether to use normalized \
use_normalized_coord (bool, optional): Whether to use normalized
xyz as additional features. Defaults to False.
eps (float, optional): A value added to patch boundary to guarantee
points coverage. Default 1e-3.
points coverage. Defaults to 1e-3.
Returns:
np.ndarray | np.ndarray:
- patch_points (torch.Tensor): Points of different patches of \
- patch_points (torch.Tensor): Points of different patches of
shape [K, N, 3+C].
- patch_idxs (torch.Tensor): Index of each point in \
- patch_idxs (torch.Tensor): Index of each point in
`patch_points`, of shape [K, N].
"""
device = points.device
......
# Copyright (c) OpenMMLab. All rights reserved.
from .clip_sigmoid import clip_sigmoid
from .edge_indices import get_edge_indices
from .gen_keypoints import get_keypoints
from .handle_objs import filter_outside_objs, handle_proj_objs
from .mlp import MLP
__all__ = ['clip_sigmoid', 'MLP']
__all__ = [
'clip_sigmoid', 'MLP', 'get_edge_indices', 'filter_outside_objs',
'handle_proj_objs', 'get_keypoints'
]
......@@ -7,8 +7,8 @@ def clip_sigmoid(x, eps=1e-4):
Args:
x (torch.Tensor): Input feature map with the shape of [B, N, H, W].
eps (float): Lower bound of the range to be clamped to. Defaults
to 1e-4.
eps (float, optional): Lower bound of the range to be clamped to.
Defaults to 1e-4.
Returns:
torch.Tensor: Feature map after sigmoid.
......
# Copyright (c) OpenMMLab. All rights reserved.
import numpy as np
import torch
def get_edge_indices(img_metas,
downsample_ratio,
step=1,
pad_mode='default',
dtype=np.float32,
device='cpu'):
"""Function to filter the objects label outside the image.
The edge_indices are generated using numpy on cpu rather
than on CUDA due to the latency issue. When batch size = 8,
this function with numpy array is ~8 times faster than that
with CUDA tensor (0.09s and 0.72s in 100 runs).
Args:
img_metas (list[dict]): Meta information of each image, e.g.,
image size, scaling factor, etc.
downsample_ratio (int): Downsample ratio of output feature,
step (int, optional): Step size used for generateing
edge indices. Default: 1.
pad_mode (str, optional): Padding mode during data pipeline.
Default: 'default'.
dtype (torch.dtype, optional): Dtype of edge indices tensor.
Default: np.float32.
device (str, optional): Device of edge indices tensor.
Default: 'cpu'.
Returns:
list[Tensor]: Edge indices for each image in batch data.
"""
edge_indices_list = []
for i in range(len(img_metas)):
img_shape = img_metas[i]['img_shape']
pad_shape = img_metas[i]['pad_shape']
h, w = img_shape[:2]
pad_h, pad_w = pad_shape
edge_indices = []
if pad_mode == 'default':
x_min = 0
y_min = 0
x_max = (w - 1) // downsample_ratio
y_max = (h - 1) // downsample_ratio
elif pad_mode == 'center':
x_min = np.ceil((pad_w - w) / 2 * downsample_ratio)
y_min = np.ceil((pad_h - h) / 2 * downsample_ratio)
x_max = x_min + w // downsample_ratio
y_max = y_min + h // downsample_ratio
else:
raise NotImplementedError
# left
y = np.arange(y_min, y_max, step, dtype=dtype)
x = np.ones(len(y)) * x_min
edge_indices_edge = np.stack((x, y), axis=1)
edge_indices.append(edge_indices_edge)
# bottom
x = np.arange(x_min, x_max, step, dtype=dtype)
y = np.ones(len(x)) * y_max
edge_indices_edge = np.stack((x, y), axis=1)
edge_indices.append(edge_indices_edge)
# right
y = np.arange(y_max, y_min, -step, dtype=dtype)
x = np.ones(len(y)) * x_max
edge_indices_edge = np.stack((x, y), axis=1)
edge_indices.append(edge_indices_edge)
# top
x = np.arange(x_max, x_min, -step, dtype=dtype)
y = np.ones(len(x)) * y_min
edge_indices_edge = np.stack((x, y), axis=1)
edge_indices.append(edge_indices_edge)
edge_indices = \
np.concatenate([index for index in edge_indices], axis=0)
edge_indices = torch.from_numpy(edge_indices).to(device).long()
edge_indices_list.append(edge_indices)
return edge_indices_list
# Copyright (c) OpenMMLab. All rights reserved.
import torch
from mmdet3d.core.bbox import points_cam2img
def get_keypoints(gt_bboxes_3d_list,
centers2d_list,
img_metas,
use_local_coords=True):
"""Function to filter the objects label outside the image.
Args:
gt_bboxes_list (list[Tensor]): Ground truth bboxes of each image,
shape (num_gt, 4).
centers2d_list (list[Tensor]): Projected 3D centers onto 2D image,
shape (num_gt, 2).
img_metas (list[dict]): Meta information of each image, e.g.,
image size, scaling factor, etc.
use_local_coords (bool, optional): Wheher to use local coordinates
for keypoints. Default: True.
Returns:
tuple[list[Tensor]]: It contains two elements, the first is the
keypoints for each projected 2D bbox in batch data. The second is
the visible mask of depth calculated by keypoints.
"""
assert len(gt_bboxes_3d_list) == len(centers2d_list)
bs = len(gt_bboxes_3d_list)
keypoints2d_list = []
keypoints_depth_mask_list = []
for i in range(bs):
gt_bboxes_3d = gt_bboxes_3d_list[i]
centers2d = centers2d_list[i]
img_shape = img_metas[i]['img_shape']
cam2img = img_metas[i]['cam2img']
h, w = img_shape[:2]
# (N, 8, 3)
corners3d = gt_bboxes_3d.corners
top_centers3d = torch.mean(corners3d[:, [0, 1, 4, 5], :], dim=1)
bot_centers3d = torch.mean(corners3d[:, [2, 3, 6, 7], :], dim=1)
# (N, 2, 3)
top_bot_centers3d = torch.stack((top_centers3d, bot_centers3d), dim=1)
keypoints3d = torch.cat((corners3d, top_bot_centers3d), dim=1)
# (N, 10, 2)
keypoints2d = points_cam2img(keypoints3d, cam2img)
# keypoints mask: keypoints must be inside
# the image and in front of the camera
keypoints_x_visible = (keypoints2d[..., 0] >= 0) & (
keypoints2d[..., 0] <= w - 1)
keypoints_y_visible = (keypoints2d[..., 1] >= 0) & (
keypoints2d[..., 1] <= h - 1)
keypoints_z_visible = (keypoints3d[..., -1] > 0)
# (N, 1O)
keypoints_visible = keypoints_x_visible & \
keypoints_y_visible & keypoints_z_visible
# center, diag-02, diag-13
keypoints_depth_valid = torch.stack(
(keypoints_visible[:, [8, 9]].all(dim=1),
keypoints_visible[:, [0, 3, 5, 6]].all(dim=1),
keypoints_visible[:, [1, 2, 4, 7]].all(dim=1)),
dim=1)
keypoints_visible = keypoints_visible.float()
if use_local_coords:
keypoints2d = torch.cat((keypoints2d - centers2d.unsqueeze(1),
keypoints_visible.unsqueeze(-1)),
dim=2)
else:
keypoints2d = torch.cat(
(keypoints2d, keypoints_visible.unsqueeze(-1)), dim=2)
keypoints2d_list.append(keypoints2d)
keypoints_depth_mask_list.append(keypoints_depth_valid)
return (keypoints2d_list, keypoints_depth_mask_list)
# Copyright (c) OpenMMLab. All rights reserved.
import torch
def filter_outside_objs(gt_bboxes_list, gt_labels_list, gt_bboxes_3d_list,
gt_labels_3d_list, centers2d_list, img_metas):
"""Function to filter the objects label outside the image.
Args:
gt_bboxes_list (list[Tensor]): Ground truth bboxes of each image,
each has shape (num_gt, 4).
gt_labels_list (list[Tensor]): Ground truth labels of each box,
each has shape (num_gt,).
gt_bboxes_3d_list (list[Tensor]): 3D Ground truth bboxes of each
image, each has shape (num_gt, bbox_code_size).
gt_labels_3d_list (list[Tensor]): 3D Ground truth labels of each
box, each has shape (num_gt,).
centers2d_list (list[Tensor]): Projected 3D centers onto 2D image,
each has shape (num_gt, 2).
img_metas (list[dict]): Meta information of each image, e.g.,
image size, scaling factor, etc.
"""
bs = len(centers2d_list)
for i in range(bs):
centers2d = centers2d_list[i].clone()
img_shape = img_metas[i]['img_shape']
keep_inds = (centers2d[:, 0] > 0) & \
(centers2d[:, 0] < img_shape[1]) & \
(centers2d[:, 1] > 0) & \
(centers2d[:, 1] < img_shape[0])
centers2d_list[i] = centers2d[keep_inds]
gt_labels_list[i] = gt_labels_list[i][keep_inds]
gt_bboxes_list[i] = gt_bboxes_list[i][keep_inds]
gt_bboxes_3d_list[i].tensor = gt_bboxes_3d_list[i].tensor[keep_inds]
gt_labels_3d_list[i] = gt_labels_3d_list[i][keep_inds]
def get_centers2d_target(centers2d, centers, img_shape):
"""Function to get target centers2d.
Args:
centers2d (Tensor): Projected 3D centers onto 2D images.
centers (Tensor): Centers of 2d gt bboxes.
img_shape (tuple): Resized image shape.
Returns:
torch.Tensor: Projected 3D centers (centers2D) target.
"""
N = centers2d.shape[0]
h, w = img_shape[:2]
valid_intersects = centers2d.new_zeros((N, 2))
a = (centers[:, 1] - centers2d[:, 1]) / (centers[:, 0] - centers2d[:, 0])
b = centers[:, 1] - a * centers[:, 0]
left_y = b
right_y = (w - 1) * a + b
top_x = -b / a
bottom_x = (h - 1 - b) / a
left_coors = torch.stack((left_y.new_zeros(N, ), left_y), dim=1)
right_coors = torch.stack((right_y.new_full((N, ), w - 1), right_y), dim=1)
top_coors = torch.stack((top_x, top_x.new_zeros(N, )), dim=1)
bottom_coors = torch.stack((bottom_x, bottom_x.new_full((N, ), h - 1)),
dim=1)
intersects = torch.stack(
[left_coors, right_coors, top_coors, bottom_coors], dim=1)
intersects_x = intersects[:, :, 0]
intersects_y = intersects[:, :, 1]
inds = (intersects_x >= 0) & (intersects_x <=
w - 1) & (intersects_y >= 0) & (
intersects_y <= h - 1)
valid_intersects = intersects[inds].reshape(N, 2, 2)
dist = torch.norm(valid_intersects - centers2d.unsqueeze(1), dim=2)
min_idx = torch.argmin(dist, dim=1)
min_idx = min_idx.unsqueeze(-1).unsqueeze(-1).expand(-1, -1, 2)
centers2d_target = valid_intersects.gather(dim=1, index=min_idx).squeeze(1)
return centers2d_target
def handle_proj_objs(centers2d_list, gt_bboxes_list, img_metas):
"""Function to handle projected object centers2d, generate target
centers2d.
Args:
gt_bboxes_list (list[Tensor]): Ground truth bboxes of each image,
shape (num_gt, 4).
centers2d_list (list[Tensor]): Projected 3D centers onto 2D image,
shape (num_gt, 2).
img_metas (list[dict]): Meta information of each image, e.g.,
image size, scaling factor, etc.
Returns:
tuple[list[Tensor]]: It contains three elements. The first is the
target centers2d after handling the truncated objects. The second
is the offsets between target centers2d and round int dtype
centers2d,and the last is the truncation mask for each object in
batch data.
"""
bs = len(centers2d_list)
centers2d_target_list = []
trunc_mask_list = []
offsets2d_list = []
# for now, only pad mode that img is padded by right and
# bottom side is supported.
for i in range(bs):
centers2d = centers2d_list[i]
gt_bbox = gt_bboxes_list[i]
img_shape = img_metas[i]['img_shape']
centers2d_target = centers2d.clone()
inside_inds = (centers2d[:, 0] > 0) & \
(centers2d[:, 0] < img_shape[1]) & \
(centers2d[:, 1] > 0) & \
(centers2d[:, 1] < img_shape[0])
outside_inds = ~inside_inds
# if there are outside objects
if outside_inds.any():
centers = (gt_bbox[:, :2] + gt_bbox[:, 2:]) / 2
outside_centers2d = centers2d[outside_inds]
match_centers = centers[outside_inds]
target_outside_centers2d = get_centers2d_target(
outside_centers2d, match_centers, img_shape)
centers2d_target[outside_inds] = target_outside_centers2d
offsets2d = centers2d - centers2d_target.round().int()
trunc_mask = outside_inds
centers2d_target_list.append(centers2d_target)
trunc_mask_list.append(trunc_mask)
offsets2d_list.append(offsets2d)
return (centers2d_target_list, offsets2d_list, trunc_mask_list)
......@@ -10,15 +10,15 @@ class MLP(BaseModule):
Pass features (B, C, N) through an MLP.
Args:
in_channels (int): Number of channels of input features.
in_channels (int, optional): Number of channels of input features.
Default: 18.
conv_channels (tuple[int]): Out channels of the convolution.
conv_channels (tuple[int], optional): Out channels of the convolution.
Default: (256, 256).
conv_cfg (dict): Config of convolution.
conv_cfg (dict, optional): Config of convolution.
Default: dict(type='Conv1d').
norm_cfg (dict): Config of normalization.
norm_cfg (dict, optional): Config of normalization.
Default: dict(type='BN1d').
act_cfg (dict): Config of activation.
act_cfg (dict, optional): Config of activation.
Default: dict(type='ReLU').
"""
......
......@@ -15,7 +15,6 @@ class PillarFeatureNet(nn.Module):
The network prepares the pillar features and performs forward pass
through PFNLayers.
Args:
in_channels (int, optional): Number of input features,
either x, y, z or x, y, z, r. Defaults to 4.
......@@ -33,7 +32,7 @@ class PillarFeatureNet(nn.Module):
Defaults to dict(type='BN1d', eps=1e-3, momentum=0.01).
mode (str, optional): The mode to gather point features. Options are
'max' or 'avg'. Defaults to 'max'.
legacy (bool): Whether to use the new behavior or
legacy (bool, optional): Whether to use the new behavior or
the original behavior. Defaults to True.
"""
......@@ -54,7 +53,7 @@ class PillarFeatureNet(nn.Module):
if with_cluster_center:
in_channels += 3
if with_voxel_center:
in_channels += 2
in_channels += 3
if with_distance:
in_channels += 1
self._with_distance = with_distance
......@@ -84,8 +83,10 @@ class PillarFeatureNet(nn.Module):
# Need pillar (voxel) size and x/y offset in order to calculate offset
self.vx = voxel_size[0]
self.vy = voxel_size[1]
self.vz = voxel_size[2]
self.x_offset = self.vx / 2 + point_cloud_range[0]
self.y_offset = self.vy / 2 + point_cloud_range[1]
self.z_offset = self.vz / 2 + point_cloud_range[2]
self.point_cloud_range = point_cloud_range
@force_fp32(out_fp16=True)
......@@ -97,7 +98,6 @@ class PillarFeatureNet(nn.Module):
(N, M, C).
num_points (torch.Tensor): Number of points in each pillar.
coors (torch.Tensor): Coordinates of each voxel.
Returns:
torch.Tensor: Features of pillars.
"""
......@@ -114,21 +114,27 @@ class PillarFeatureNet(nn.Module):
dtype = features.dtype
if self._with_voxel_center:
if not self.legacy:
f_center = torch.zeros_like(features[:, :, :2])
f_center = torch.zeros_like(features[:, :, :3])
f_center[:, :, 0] = features[:, :, 0] - (
coors[:, 3].to(dtype).unsqueeze(1) * self.vx +
self.x_offset)
f_center[:, :, 1] = features[:, :, 1] - (
coors[:, 2].to(dtype).unsqueeze(1) * self.vy +
self.y_offset)
f_center[:, :, 2] = features[:, :, 2] - (
coors[:, 1].to(dtype).unsqueeze(1) * self.vz +
self.z_offset)
else:
f_center = features[:, :, :2]
f_center = features[:, :, :3]
f_center[:, :, 0] = f_center[:, :, 0] - (
coors[:, 3].type_as(features).unsqueeze(1) * self.vx +
self.x_offset)
f_center[:, :, 1] = f_center[:, :, 1] - (
coors[:, 2].type_as(features).unsqueeze(1) * self.vy +
self.y_offset)
f_center[:, :, 2] = f_center[:, :, 2] - (
coors[:, 1].type_as(features).unsqueeze(1) * self.vz +
self.z_offset)
features_ls.append(f_center)
if self._with_distance:
......@@ -177,6 +183,8 @@ class DynamicPillarFeatureNet(PillarFeatureNet):
Defaults to dict(type='BN1d', eps=1e-3, momentum=0.01).
mode (str, optional): The mode to gather point features. Options are
'max' or 'avg'. Defaults to 'max'.
legacy (bool, optional): Whether to use the new behavior or
the original behavior. Defaults to True.
"""
def __init__(self,
......@@ -188,7 +196,8 @@ class DynamicPillarFeatureNet(PillarFeatureNet):
voxel_size=(0.2, 0.2, 4),
point_cloud_range=(0, -40, -3, 70.4, 40, 1),
norm_cfg=dict(type='BN1d', eps=1e-3, momentum=0.01),
mode='max'):
mode='max',
legacy=True):
super(DynamicPillarFeatureNet, self).__init__(
in_channels,
feat_channels,
......@@ -198,7 +207,8 @@ class DynamicPillarFeatureNet(PillarFeatureNet):
voxel_size=voxel_size,
point_cloud_range=point_cloud_range,
norm_cfg=norm_cfg,
mode=mode)
mode=mode,
legacy=legacy)
self.fp16_enabled = False
feat_channels = [self.in_channels] + list(feat_channels)
pfn_layers = []
......@@ -233,7 +243,7 @@ class DynamicPillarFeatureNet(PillarFeatureNet):
Returns:
torch.Tensor: Corresponding voxel centers of each points, shape
(M, C), where M is the numver of points.
(M, C), where M is the number of points.
"""
# Step 1: scatter voxel into canvas
# Calculate necessary things for canvas creation
......
......@@ -113,11 +113,12 @@ class PFNLayer(nn.Module):
Args:
in_channels (int): Number of input channels.
out_channels (int): Number of output channels.
norm_cfg (dict): Config dict of normalization layers
last_layer (bool): If last_layer, there is no concatenation of
features.
mode (str): Pooling model to gather features inside voxels.
Default to 'max'.
norm_cfg (dict, optional): Config dict of normalization layers.
Defaults to dict(type='BN1d', eps=1e-3, momentum=0.01).
last_layer (bool, optional): If last_layer, there is no
concatenation of features. Defaults to False.
mode (str, optional): Pooling model to gather features inside voxels.
Defaults to 'max'.
"""
def __init__(self,
......
......@@ -17,7 +17,7 @@ class HardSimpleVFE(nn.Module):
It simply averages the values of points in a voxel.
Args:
num_features (int): Number of features to use. Default: 4.
num_features (int, optional): Number of features to use. Default: 4.
"""
def __init__(self, num_features=4):
......@@ -93,25 +93,27 @@ class DynamicVFE(nn.Module):
The number of points inside the voxel varies.
Args:
in_channels (int): Input channels of VFE. Defaults to 4.
feat_channels (list(int)): Channels of features in VFE.
with_distance (bool): Whether to use the L2 distance of points to the
origin point. Default False.
with_cluster_center (bool): Whether to use the distance to cluster
center of points inside a voxel. Default to False.
with_voxel_center (bool): Whether to use the distance to center of
voxel for each points inside a voxel. Default to False.
voxel_size (tuple[float]): Size of a single voxel. Default to
(0.2, 0.2, 4).
point_cloud_range (tuple[float]): The range of points or voxels.
Default to (0, -40, -3, 70.4, 40, 1).
norm_cfg (dict): Config dict of normalization layers.
mode (str): The mode when pooling features of points inside a voxel.
Available options include 'max' and 'avg'. Default to 'max'.
fusion_layer (dict | None): The config dict of fusion layer used in
multi-modal detectors. Default to None.
return_point_feats (bool): Whether to return the features of each
points. Default to False.
in_channels (int, optional): Input channels of VFE. Defaults to 4.
feat_channels (list(int), optional): Channels of features in VFE.
with_distance (bool, optional): Whether to use the L2 distance of
points to the origin point. Defaults to False.
with_cluster_center (bool, optional): Whether to use the distance
to cluster center of points inside a voxel. Defaults to False.
with_voxel_center (bool, optional): Whether to use the distance
to center of voxel for each points inside a voxel.
Defaults to False.
voxel_size (tuple[float], optional): Size of a single voxel.
Defaults to (0.2, 0.2, 4).
point_cloud_range (tuple[float], optional): The range of points
or voxels. Defaults to (0, -40, -3, 70.4, 40, 1).
norm_cfg (dict, optional): Config dict of normalization layers.
mode (str, optional): The mode when pooling features of points
inside a voxel. Available options include 'max' and 'avg'.
Defaults to 'max'.
fusion_layer (dict, optional): The config dict of fusion
layer used in multi-modal detectors. Defaults to None.
return_point_feats (bool, optional): Whether to return the features
of each points. Defaults to False.
"""
def __init__(self,
......@@ -230,7 +232,7 @@ class DynamicVFE(nn.Module):
coors (torch.Tensor): Coordinates of voxels, shape is Nx(1+NDim).
points (list[torch.Tensor], optional): Raw points used to guide the
multi-modality fusion. Defaults to None.
img_feats (list[torch.Tensor], optional): Image fetures used for
img_feats (list[torch.Tensor], optional): Image features used for
multi-modality fusion. Defaults to None.
img_metas (dict, optional): [description]. Defaults to None.
......@@ -292,25 +294,26 @@ class HardVFE(nn.Module):
image feature into voxel features in a point-wise manner.
Args:
in_channels (int): Input channels of VFE. Defaults to 4.
feat_channels (list(int)): Channels of features in VFE.
with_distance (bool): Whether to use the L2 distance of points to the
origin point. Default False.
with_cluster_center (bool): Whether to use the distance to cluster
center of points inside a voxel. Default to False.
with_voxel_center (bool): Whether to use the distance to center of
voxel for each points inside a voxel. Default to False.
voxel_size (tuple[float]): Size of a single voxel. Default to
(0.2, 0.2, 4).
point_cloud_range (tuple[float]): The range of points or voxels.
Default to (0, -40, -3, 70.4, 40, 1).
norm_cfg (dict): Config dict of normalization layers.
mode (str): The mode when pooling features of points inside a voxel.
Available options include 'max' and 'avg'. Default to 'max'.
fusion_layer (dict | None): The config dict of fusion layer used in
multi-modal detectors. Default to None.
return_point_feats (bool): Whether to return the features of each
points. Default to False.
in_channels (int, optional): Input channels of VFE. Defaults to 4.
feat_channels (list(int), optional): Channels of features in VFE.
with_distance (bool, optional): Whether to use the L2 distance
of points to the origin point. Defaults to False.
with_cluster_center (bool, optional): Whether to use the distance
to cluster center of points inside a voxel. Defaults to False.
with_voxel_center (bool, optional): Whether to use the distance to
center of voxel for each points inside a voxel. Defaults to False.
voxel_size (tuple[float], optional): Size of a single voxel.
Defaults to (0.2, 0.2, 4).
point_cloud_range (tuple[float], optional): The range of points
or voxels. Defaults to (0, -40, -3, 70.4, 40, 1).
norm_cfg (dict, optional): Config dict of normalization layers.
mode (str, optional): The mode when pooling features of points inside a
voxel. Available options include 'max' and 'avg'.
Defaults to 'max'.
fusion_layer (dict, optional): The config dict of fusion layer
used in multi-modal detectors. Defaults to None.
return_point_feats (bool, optional): Whether to return the
features of each points. Defaults to False.
"""
def __init__(self,
......@@ -394,7 +397,7 @@ class HardVFE(nn.Module):
features (torch.Tensor): Features of voxels, shape is MxNxC.
num_points (torch.Tensor): Number of points in each voxel.
coors (torch.Tensor): Coordinates of voxels, shape is Mx(1+NDim).
img_feats (list[torch.Tensor], optional): Image fetures used for
img_feats (list[torch.Tensor], optional): Image features used for
multi-modality fusion. Defaults to None.
img_metas (dict, optional): [description]. Defaults to None.
......
......@@ -4,6 +4,7 @@ from mmcv.ops import (RoIAlign, SigmoidFocalLoss, get_compiler_version,
sigmoid_focal_loss)
from .ball_query import ball_query
from .dgcnn_modules import DGCNNFAModule, DGCNNFPModule, DGCNNGFModule
from .furthest_point_sample import (Points_Sampler, furthest_point_sample,
furthest_point_sample_with_dist)
from .gather_points import gather_points
......@@ -17,8 +18,9 @@ from .pointnet_modules import (PAConvCUDASAModule, PAConvCUDASAModuleMSG,
PAConvSAModule, PAConvSAModuleMSG,
PointFPModule, PointSAModule, PointSAModuleMSG,
build_sa_module)
from .roiaware_pool3d import (RoIAwarePool3d, points_in_boxes_batch,
points_in_boxes_cpu, points_in_boxes_gpu)
from .roiaware_pool3d import (RoIAwarePool3d, points_in_boxes_all,
points_in_boxes_cpu, points_in_boxes_part)
from .roipoint_pool3d import RoIPointPool3d
from .sparse_block import (SparseBasicBlock, SparseBottleneck,
make_sparse_convmodule)
from .voxel import DynamicScatter, Voxelization, dynamic_scatter, voxelization
......@@ -29,13 +31,14 @@ __all__ = [
'NaiveSyncBatchNorm2d', 'batched_nms', 'Voxelization', 'voxelization',
'dynamic_scatter', 'DynamicScatter', 'sigmoid_focal_loss',
'SigmoidFocalLoss', 'SparseBasicBlock', 'SparseBottleneck',
'RoIAwarePool3d', 'points_in_boxes_gpu', 'points_in_boxes_cpu',
'RoIAwarePool3d', 'points_in_boxes_part', 'points_in_boxes_cpu',
'make_sparse_convmodule', 'ball_query', 'knn', 'furthest_point_sample',
'furthest_point_sample_with_dist', 'three_interpolate', 'three_nn',
'gather_points', 'grouping_operation', 'group_points', 'GroupAll',
'QueryAndGroup', 'PointSAModule', 'PointSAModuleMSG', 'PointFPModule',
'points_in_boxes_batch', 'get_compiler_version', 'assign_score_withk',
'get_compiling_cuda_version', 'Points_Sampler', 'build_sa_module',
'PAConv', 'PAConvCUDA', 'PAConvSAModuleMSG', 'PAConvSAModule',
'PAConvCUDASAModule', 'PAConvCUDASAModuleMSG'
'DGCNNFPModule', 'DGCNNGFModule', 'DGCNNFAModule', 'points_in_boxes_all',
'get_compiler_version', 'assign_score_withk', 'get_compiling_cuda_version',
'Points_Sampler', 'build_sa_module', 'PAConv', 'PAConvCUDA',
'PAConvSAModuleMSG', 'PAConvSAModule', 'PAConvCUDASAModule',
'PAConvCUDASAModuleMSG', 'RoIPointPool3d'
]
# Copyright (c) OpenMMLab. All rights reserved.
from .ball_query import ball_query
__all__ = ['ball_query']
# Copyright (c) OpenMMLab. All rights reserved.
import torch
from torch.autograd import Function
......@@ -23,7 +24,7 @@ class BallQuery(Function):
center_xyz (Tensor): (B, npoint, 3) centers of the ball query.
Returns:
Tensor: (B, npoint, nsample) tensor with the indicies of
Tensor: (B, npoint, nsample) tensor with the indices of
the features that form the query balls.
"""
assert center_xyz.is_contiguous()
......
Markdown is supported
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment