Unverified Commit 32a4328b authored by Wenwei Zhang's avatar Wenwei Zhang Committed by GitHub
Browse files

Bump version to V1.0.0rc0

Bump version to V1.0.0rc0
parents 86cc487c a8817998
...@@ -83,15 +83,15 @@ class PointwiseSemanticHead(BaseModule): ...@@ -83,15 +83,15 @@ class PointwiseSemanticHead(BaseModule):
sample. sample.
Args: Args:
voxel_centers (torch.Tensor): The center of voxels in shape \ voxel_centers (torch.Tensor): The center of voxels in shape
(voxel_num, 3). (voxel_num, 3).
gt_bboxes_3d (:obj:`BaseInstance3DBoxes`): Ground truth boxes in \ gt_bboxes_3d (:obj:`BaseInstance3DBoxes`): Ground truth boxes in
shape (box_num, 7). shape (box_num, 7).
gt_labels_3d (torch.Tensor): Class labels of ground truths in \ gt_labels_3d (torch.Tensor): Class labels of ground truths in
shape (box_num). shape (box_num).
Returns: Returns:
tuple[torch.Tensor]: Segmentation targets with shape [voxel_num] \ tuple[torch.Tensor]: Segmentation targets with shape [voxel_num]
part prediction targets with shape [voxel_num, 3] part prediction targets with shape [voxel_num, 3]
""" """
gt_bboxes_3d = gt_bboxes_3d.to(voxel_centers.device) gt_bboxes_3d = gt_bboxes_3d.to(voxel_centers.device)
...@@ -99,8 +99,8 @@ class PointwiseSemanticHead(BaseModule): ...@@ -99,8 +99,8 @@ class PointwiseSemanticHead(BaseModule):
part_targets = voxel_centers.new_zeros((voxel_centers.shape[0], 3), part_targets = voxel_centers.new_zeros((voxel_centers.shape[0], 3),
dtype=torch.float32) dtype=torch.float32)
box_idx = gt_bboxes_3d.points_in_boxes(voxel_centers) box_idx = gt_bboxes_3d.points_in_boxes_part(voxel_centers)
enlarge_box_idx = enlarged_gt_boxes.points_in_boxes( enlarge_box_idx = enlarged_gt_boxes.points_in_boxes_part(
voxel_centers).long() voxel_centers).long()
gt_labels_pad = F.pad( gt_labels_pad = F.pad(
...@@ -131,19 +131,19 @@ class PointwiseSemanticHead(BaseModule): ...@@ -131,19 +131,19 @@ class PointwiseSemanticHead(BaseModule):
"""generate segmentation and part prediction targets. """generate segmentation and part prediction targets.
Args: Args:
voxel_centers (torch.Tensor): The center of voxels in shape \ voxel_centers (torch.Tensor): The center of voxels in shape
(voxel_num, 3). (voxel_num, 3).
gt_bboxes_3d (:obj:`BaseInstance3DBoxes`): Ground truth boxes in \ gt_bboxes_3d (:obj:`BaseInstance3DBoxes`): Ground truth boxes in
shape (box_num, 7). shape (box_num, 7).
gt_labels_3d (torch.Tensor): Class labels of ground truths in \ gt_labels_3d (torch.Tensor): Class labels of ground truths in
shape (box_num). shape (box_num).
Returns: Returns:
dict: Prediction targets dict: Prediction targets
- seg_targets (torch.Tensor): Segmentation targets \ - seg_targets (torch.Tensor): Segmentation targets
with shape [voxel_num]. with shape [voxel_num].
- part_targets (torch.Tensor): Part prediction targets \ - part_targets (torch.Tensor): Part prediction targets
with shape [voxel_num, 3]. with shape [voxel_num, 3].
""" """
batch_size = len(gt_labels_3d) batch_size = len(gt_labels_3d)
......
...@@ -20,7 +20,7 @@ class PrimitiveHead(BaseModule): ...@@ -20,7 +20,7 @@ class PrimitiveHead(BaseModule):
num_dims (int): The dimension of primitive semantic information. num_dims (int): The dimension of primitive semantic information.
num_classes (int): The number of class. num_classes (int): The number of class.
primitive_mode (str): The mode of primitive module, primitive_mode (str): The mode of primitive module,
avaliable mode ['z', 'xy', 'line']. available mode ['z', 'xy', 'line'].
bbox_coder (:obj:`BaseBBoxCoder`): Bbox coder for encoding and bbox_coder (:obj:`BaseBBoxCoder`): Bbox coder for encoding and
decoding boxes. decoding boxes.
train_cfg (dict): Config for training. train_cfg (dict): Config for training.
...@@ -30,7 +30,7 @@ class PrimitiveHead(BaseModule): ...@@ -30,7 +30,7 @@ class PrimitiveHead(BaseModule):
feat_channels (tuple[int]): Convolution channels of feat_channels (tuple[int]): Convolution channels of
prediction layer. prediction layer.
upper_thresh (float): Threshold for line matching. upper_thresh (float): Threshold for line matching.
surface_thresh (float): Threshold for suface matching. surface_thresh (float): Threshold for surface matching.
conv_cfg (dict): Config of convolution in prediction layer. conv_cfg (dict): Config of convolution in prediction layer.
norm_cfg (dict): Config of BN in prediction layer. norm_cfg (dict): Config of BN in prediction layer.
objectness_loss (dict): Config of objectness loss. objectness_loss (dict): Config of objectness loss.
...@@ -198,15 +198,15 @@ class PrimitiveHead(BaseModule): ...@@ -198,15 +198,15 @@ class PrimitiveHead(BaseModule):
Args: Args:
bbox_preds (dict): Predictions from forward of primitive head. bbox_preds (dict): Predictions from forward of primitive head.
points (list[torch.Tensor]): Input points. points (list[torch.Tensor]): Input points.
gt_bboxes_3d (list[:obj:`BaseInstance3DBoxes`]): Ground truth \ gt_bboxes_3d (list[:obj:`BaseInstance3DBoxes`]): Ground truth
bboxes of each sample. bboxes of each sample.
gt_labels_3d (list[torch.Tensor]): Labels of each sample. gt_labels_3d (list[torch.Tensor]): Labels of each sample.
pts_semantic_mask (None | list[torch.Tensor]): Point-wise pts_semantic_mask (list[torch.Tensor]): Point-wise
semantic mask. semantic mask.
pts_instance_mask (None | list[torch.Tensor]): Point-wise pts_instance_mask (list[torch.Tensor]): Point-wise
instance mask. instance mask.
img_metas (list[dict]): Contain pcd and img's meta info. img_metas (list[dict]): Contain pcd and img's meta info.
gt_bboxes_ignore (None | list[torch.Tensor]): Specify gt_bboxes_ignore (list[torch.Tensor]): Specify
which bounding. which bounding.
Returns: Returns:
...@@ -266,12 +266,12 @@ class PrimitiveHead(BaseModule): ...@@ -266,12 +266,12 @@ class PrimitiveHead(BaseModule):
Args: Args:
points (list[torch.Tensor]): Points of each batch. points (list[torch.Tensor]): Points of each batch.
gt_bboxes_3d (list[:obj:`BaseInstance3DBoxes`]): Ground truth \ gt_bboxes_3d (list[:obj:`BaseInstance3DBoxes`]): Ground truth
bboxes of each batch. bboxes of each batch.
gt_labels_3d (list[torch.Tensor]): Labels of each batch. gt_labels_3d (list[torch.Tensor]): Labels of each batch.
pts_semantic_mask (None | list[torch.Tensor]): Point-wise semantic pts_semantic_mask (list[torch.Tensor]): Point-wise semantic
label of each batch. label of each batch.
pts_instance_mask (None | list[torch.Tensor]): Point-wise instance pts_instance_mask (list[torch.Tensor]): Point-wise instance
label of each batch. label of each batch.
bbox_preds (dict): Predictions from forward of primitive head. bbox_preds (dict): Predictions from forward of primitive head.
...@@ -333,12 +333,12 @@ class PrimitiveHead(BaseModule): ...@@ -333,12 +333,12 @@ class PrimitiveHead(BaseModule):
Args: Args:
points (torch.Tensor): Points of each batch. points (torch.Tensor): Points of each batch.
gt_bboxes_3d (:obj:`BaseInstance3DBoxes`): Ground truth \ gt_bboxes_3d (:obj:`BaseInstance3DBoxes`): Ground truth
boxes of each batch. boxes of each batch.
gt_labels_3d (torch.Tensor): Labels of each batch. gt_labels_3d (torch.Tensor): Labels of each batch.
pts_semantic_mask (None | torch.Tensor): Point-wise semantic pts_semantic_mask (torch.Tensor): Point-wise semantic
label of each batch. label of each batch.
pts_instance_mask (None | torch.Tensor): Point-wise instance pts_instance_mask (torch.Tensor): Point-wise instance
label of each batch. label of each batch.
Returns: Returns:
...@@ -355,7 +355,7 @@ class PrimitiveHead(BaseModule): ...@@ -355,7 +355,7 @@ class PrimitiveHead(BaseModule):
# Generate pts_semantic_mask and pts_instance_mask when they are None # Generate pts_semantic_mask and pts_instance_mask when they are None
if pts_semantic_mask is None or pts_instance_mask is None: if pts_semantic_mask is None or pts_instance_mask is None:
points2box_mask = gt_bboxes_3d.points_in_boxes(points) points2box_mask = gt_bboxes_3d.points_in_boxes_all(points)
assignment = points2box_mask.argmax(1) assignment = points2box_mask.argmax(1)
background_mask = points2box_mask.max(1)[0] == 0 background_mask = points2box_mask.max(1)[0] == 0
......
# Copyright (c) OpenMMLab. All rights reserved. # Copyright (c) OpenMMLab. All rights reserved.
import warnings import warnings
from torch.nn import functional as F from torch.nn import functional as F
from mmdet3d.core import AssignResult from mmdet3d.core import AssignResult
......
# Copyright (c) OpenMMLab. All rights reserved.
import torch
from torch.nn import functional as F
from mmdet3d.core import AssignResult
from mmdet3d.core.bbox import bbox3d2result, bbox3d2roi
from mmdet.core import build_assigner, build_sampler
from mmdet.models import HEADS
from ..builder import build_head, build_roi_extractor
from .base_3droi_head import Base3DRoIHead
@HEADS.register_module()
class PointRCNNRoIHead(Base3DRoIHead):
"""RoI head for PointRCNN.
Args:
bbox_head (dict): Config of bbox_head.
point_roi_extractor (dict): Config of RoI extractor.
train_cfg (dict): Train configs.
test_cfg (dict): Test configs.
depth_normalizer (float, optional): Normalize depth feature.
Defaults to 70.0.
init_cfg (dict, optional): Config of initialization. Defaults to None.
"""
def __init__(self,
bbox_head,
point_roi_extractor,
train_cfg,
test_cfg,
depth_normalizer=70.0,
pretrained=None,
init_cfg=None):
super(PointRCNNRoIHead, self).__init__(
bbox_head=bbox_head,
train_cfg=train_cfg,
test_cfg=test_cfg,
pretrained=pretrained,
init_cfg=init_cfg)
self.depth_normalizer = depth_normalizer
if point_roi_extractor is not None:
self.point_roi_extractor = build_roi_extractor(point_roi_extractor)
self.init_assigner_sampler()
def init_bbox_head(self, bbox_head):
"""Initialize box head.
Args:
bbox_head (dict): Config dict of RoI Head.
"""
self.bbox_head = build_head(bbox_head)
def init_mask_head(self):
"""Initialize maek head."""
pass
def init_assigner_sampler(self):
"""Initialize assigner and sampler."""
self.bbox_assigner = None
self.bbox_sampler = None
if self.train_cfg:
if isinstance(self.train_cfg.assigner, dict):
self.bbox_assigner = build_assigner(self.train_cfg.assigner)
elif isinstance(self.train_cfg.assigner, list):
self.bbox_assigner = [
build_assigner(res) for res in self.train_cfg.assigner
]
self.bbox_sampler = build_sampler(self.train_cfg.sampler)
def forward_train(self, feats_dict, input_metas, proposal_list,
gt_bboxes_3d, gt_labels_3d):
"""Training forward function of PointRCNNRoIHead.
Args:
feats_dict (dict): Contains features from the first stage.
imput_metas (list[dict]): Meta info of each input.
proposal_list (list[dict]): Proposal information from rpn.
The dictionary should contain the following keys:
- boxes_3d (:obj:`BaseInstance3DBoxes`): Proposal bboxes
- labels_3d (torch.Tensor): Labels of proposals
gt_bboxes_3d (list[:obj:`BaseInstance3DBoxes`]):
GT bboxes of each sample. The bboxes are encapsulated
by 3D box structures.
gt_labels_3d (list[LongTensor]): GT labels of each sample.
Returns:
dict: Losses from RoI RCNN head.
- loss_bbox (torch.Tensor): Loss of bboxes
"""
features = feats_dict['features']
points = feats_dict['points']
point_cls_preds = feats_dict['points_cls_preds']
sem_scores = point_cls_preds.sigmoid()
point_scores = sem_scores.max(-1)[0]
sample_results = self._assign_and_sample(proposal_list, gt_bboxes_3d,
gt_labels_3d)
# concat the depth, semantic features and backbone features
features = features.transpose(1, 2).contiguous()
point_depths = points.norm(dim=2) / self.depth_normalizer - 0.5
features_list = [
point_scores.unsqueeze(2),
point_depths.unsqueeze(2), features
]
features = torch.cat(features_list, dim=2)
bbox_results = self._bbox_forward_train(features, points,
sample_results)
losses = dict()
losses.update(bbox_results['loss_bbox'])
return losses
def simple_test(self, feats_dict, img_metas, proposal_list, **kwargs):
"""Simple testing forward function of PointRCNNRoIHead.
Note:
This function assumes that the batch size is 1
Args:
feats_dict (dict): Contains features from the first stage.
img_metas (list[dict]): Meta info of each image.
proposal_list (list[dict]): Proposal information from rpn.
Returns:
dict: Bbox results of one frame.
"""
rois = bbox3d2roi([res['boxes_3d'].tensor for res in proposal_list])
labels_3d = [res['labels_3d'] for res in proposal_list]
features = feats_dict['features']
points = feats_dict['points']
point_cls_preds = feats_dict['points_cls_preds']
sem_scores = point_cls_preds.sigmoid()
point_scores = sem_scores.max(-1)[0]
features = features.transpose(1, 2).contiguous()
point_depths = points.norm(dim=2) / self.depth_normalizer - 0.5
features_list = [
point_scores.unsqueeze(2),
point_depths.unsqueeze(2), features
]
features = torch.cat(features_list, dim=2)
batch_size = features.shape[0]
bbox_results = self._bbox_forward(features, points, batch_size, rois)
object_score = bbox_results['cls_score'].sigmoid()
bbox_list = self.bbox_head.get_bboxes(
rois,
object_score,
bbox_results['bbox_pred'],
labels_3d,
img_metas,
cfg=self.test_cfg)
bbox_results = [
bbox3d2result(bboxes, scores, labels)
for bboxes, scores, labels in bbox_list
]
return bbox_results
def _bbox_forward_train(self, features, points, sampling_results):
"""Forward training function of roi_extractor and bbox_head.
Args:
features (torch.Tensor): Backbone features with depth and \
semantic features.
points (torch.Tensor): Pointcloud.
sampling_results (:obj:`SamplingResult`): Sampled results used
for training.
Returns:
dict: Forward results including losses and predictions.
"""
rois = bbox3d2roi([res.bboxes for res in sampling_results])
batch_size = features.shape[0]
bbox_results = self._bbox_forward(features, points, batch_size, rois)
bbox_targets = self.bbox_head.get_targets(sampling_results,
self.train_cfg)
loss_bbox = self.bbox_head.loss(bbox_results['cls_score'],
bbox_results['bbox_pred'], rois,
*bbox_targets)
bbox_results.update(loss_bbox=loss_bbox)
return bbox_results
def _bbox_forward(self, features, points, batch_size, rois):
"""Forward function of roi_extractor and bbox_head used in both
training and testing.
Args:
features (torch.Tensor): Backbone features with depth and
semantic features.
points (torch.Tensor): Pointcloud.
batch_size (int): Batch size.
rois (torch.Tensor): RoI boxes.
Returns:
dict: Contains predictions of bbox_head and
features of roi_extractor.
"""
pooled_point_feats = self.point_roi_extractor(features, points,
batch_size, rois)
cls_score, bbox_pred = self.bbox_head(pooled_point_feats)
bbox_results = dict(cls_score=cls_score, bbox_pred=bbox_pred)
return bbox_results
def _assign_and_sample(self, proposal_list, gt_bboxes_3d, gt_labels_3d):
"""Assign and sample proposals for training.
Args:
proposal_list (list[dict]): Proposals produced by RPN.
gt_bboxes_3d (list[:obj:`BaseInstance3DBoxes`]): Ground truth
boxes.
gt_labels_3d (list[torch.Tensor]): Ground truth labels
Returns:
list[:obj:`SamplingResult`]: Sampled results of each training
sample.
"""
sampling_results = []
# bbox assign
for batch_idx in range(len(proposal_list)):
cur_proposal_list = proposal_list[batch_idx]
cur_boxes = cur_proposal_list['boxes_3d']
cur_labels_3d = cur_proposal_list['labels_3d']
cur_gt_bboxes = gt_bboxes_3d[batch_idx].to(cur_boxes.device)
cur_gt_labels = gt_labels_3d[batch_idx]
batch_num_gts = 0
# 0 is bg
batch_gt_indis = cur_gt_labels.new_full((len(cur_boxes), ), 0)
batch_max_overlaps = cur_boxes.tensor.new_zeros(len(cur_boxes))
# -1 is bg
batch_gt_labels = cur_gt_labels.new_full((len(cur_boxes), ), -1)
# each class may have its own assigner
if isinstance(self.bbox_assigner, list):
for i, assigner in enumerate(self.bbox_assigner):
gt_per_cls = (cur_gt_labels == i)
pred_per_cls = (cur_labels_3d == i)
cur_assign_res = assigner.assign(
cur_boxes.tensor[pred_per_cls],
cur_gt_bboxes.tensor[gt_per_cls],
gt_labels=cur_gt_labels[gt_per_cls])
# gather assign_results in different class into one result
batch_num_gts += cur_assign_res.num_gts
# gt inds (1-based)
gt_inds_arange_pad = gt_per_cls.nonzero(
as_tuple=False).view(-1) + 1
# pad 0 for indice unassigned
gt_inds_arange_pad = F.pad(
gt_inds_arange_pad, (1, 0), mode='constant', value=0)
# pad -1 for indice ignore
gt_inds_arange_pad = F.pad(
gt_inds_arange_pad, (1, 0), mode='constant', value=-1)
# convert to 0~gt_num+2 for indices
gt_inds_arange_pad += 1
# now 0 is bg, >1 is fg in batch_gt_indis
batch_gt_indis[pred_per_cls] = gt_inds_arange_pad[
cur_assign_res.gt_inds + 1] - 1
batch_max_overlaps[
pred_per_cls] = cur_assign_res.max_overlaps
batch_gt_labels[pred_per_cls] = cur_assign_res.labels
assign_result = AssignResult(batch_num_gts, batch_gt_indis,
batch_max_overlaps,
batch_gt_labels)
else: # for single class
assign_result = self.bbox_assigner.assign(
cur_boxes.tensor,
cur_gt_bboxes.tensor,
gt_labels=cur_gt_labels)
# sample boxes
sampling_result = self.bbox_sampler.sample(assign_result,
cur_boxes.tensor,
cur_gt_bboxes.tensor,
cur_gt_labels)
sampling_results.append(sampling_result)
return sampling_results
# Copyright (c) OpenMMLab. All rights reserved. # Copyright (c) OpenMMLab. All rights reserved.
from mmdet.models.roi_heads.roi_extractors import SingleRoIExtractor from mmdet.models.roi_heads.roi_extractors import SingleRoIExtractor
from .single_roiaware_extractor import Single3DRoIAwareExtractor from .single_roiaware_extractor import Single3DRoIAwareExtractor
from .single_roipoint_extractor import Single3DRoIPointExtractor
__all__ = ['SingleRoIExtractor', 'Single3DRoIAwareExtractor'] __all__ = [
'SingleRoIExtractor', 'Single3DRoIAwareExtractor',
'Single3DRoIPointExtractor'
]
# Copyright (c) OpenMMLab. All rights reserved.
import torch
from torch import nn as nn
from mmdet3d import ops
from mmdet3d.core.bbox.structures import rotation_3d_in_axis
from mmdet.models.builder import ROI_EXTRACTORS
@ROI_EXTRACTORS.register_module()
class Single3DRoIPointExtractor(nn.Module):
"""Point-wise roi-aware Extractor.
Extract Point-wise roi features.
Args:
roi_layer (dict): The config of roi layer.
"""
def __init__(self, roi_layer=None):
super(Single3DRoIPointExtractor, self).__init__()
self.roi_layer = self.build_roi_layers(roi_layer)
def build_roi_layers(self, layer_cfg):
"""Build roi layers using `layer_cfg`"""
cfg = layer_cfg.copy()
layer_type = cfg.pop('type')
assert hasattr(ops, layer_type)
layer_cls = getattr(ops, layer_type)
roi_layers = layer_cls(**cfg)
return roi_layers
def forward(self, feats, coordinate, batch_inds, rois):
"""Extract point-wise roi features.
Args:
feats (torch.FloatTensor): Point-wise features with
shape (batch, npoints, channels) for pooling.
coordinate (torch.FloatTensor): Coordinate of each point.
batch_inds (torch.LongTensor): Indicate the batch of each point.
rois (torch.FloatTensor): Roi boxes with batch indices.
Returns:
torch.FloatTensor: Pooled features
"""
rois = rois[..., 1:]
rois = rois.view(batch_inds, -1, rois.shape[-1])
with torch.no_grad():
pooled_roi_feat, pooled_empty_flag = self.roi_layer(
coordinate, feats, rois)
# canonical transformation
roi_center = rois[:, :, 0:3]
pooled_roi_feat[:, :, :, 0:3] -= roi_center.unsqueeze(dim=2)
pooled_roi_feat = pooled_roi_feat.view(-1,
pooled_roi_feat.shape[-2],
pooled_roi_feat.shape[-1])
pooled_roi_feat[:, :, 0:3] = rotation_3d_in_axis(
pooled_roi_feat[:, :, 0:3],
-(rois.view(-1, rois.shape[-1])[:, 6]),
axis=2)
pooled_roi_feat[pooled_empty_flag.view(-1) > 0] = 0
return pooled_roi_feat
# Copyright (c) OpenMMLab. All rights reserved. # Copyright (c) OpenMMLab. All rights reserved.
from os import path as osp
import mmcv import mmcv
import numpy as np import numpy as np
import torch import torch
from mmcv.parallel import DataContainer as DC from mmcv.parallel import DataContainer as DC
from mmcv.runner import auto_fp16 from mmcv.runner import auto_fp16
from os import path as osp
from mmdet3d.core import show_seg_result from mmdet3d.core import show_seg_result
from mmseg.models.segmentors import BaseSegmentor from mmseg.models.segmentors import BaseSegmentor
...@@ -80,7 +81,7 @@ class Base3DSegmentor(BaseSegmentor): ...@@ -80,7 +81,7 @@ class Base3DSegmentor(BaseSegmentor):
Args: Args:
data (list[dict]): Input points and the information of the sample. data (list[dict]): Input points and the information of the sample.
result (list[dict]): Prediction results. result (list[dict]): Prediction results.
palette (list[list[int]]] | np.ndarray | None): The palette of palette (list[list[int]]] | np.ndarray): The palette of
segmentation map. If None is given, random palette will be segmentation map. If None is given, random palette will be
generated. Default: None generated. Default: None
out_dir (str): Output directory of visualization result. out_dir (str): Output directory of visualization result.
......
...@@ -187,7 +187,7 @@ class EncoderDecoder3D(Base3DSegmentor): ...@@ -187,7 +187,7 @@ class EncoderDecoder3D(Base3DSegmentor):
use_normalized_coord=False): use_normalized_coord=False):
"""Generating model input. """Generating model input.
Generate input by subtracting patch center and adding additional \ Generate input by subtracting patch center and adding additional
features. Currently support colors and normalized xyz as features. features. Currently support colors and normalized xyz as features.
Args: Args:
...@@ -195,7 +195,7 @@ class EncoderDecoder3D(Base3DSegmentor): ...@@ -195,7 +195,7 @@ class EncoderDecoder3D(Base3DSegmentor):
patch_center (torch.Tensor): Center coordinate of the patch. patch_center (torch.Tensor): Center coordinate of the patch.
coord_max (torch.Tensor): Max coordinate of all 3D points. coord_max (torch.Tensor): Max coordinate of all 3D points.
feats (torch.Tensor): Features of sampled points of shape [S, C]. feats (torch.Tensor): Features of sampled points of shape [S, C].
use_normalized_coord (bool, optional): Whether to use normalized \ use_normalized_coord (bool, optional): Whether to use normalized
xyz as additional features. Defaults to False. xyz as additional features. Defaults to False.
Returns: Returns:
...@@ -233,17 +233,17 @@ class EncoderDecoder3D(Base3DSegmentor): ...@@ -233,17 +233,17 @@ class EncoderDecoder3D(Base3DSegmentor):
block_size (float, optional): Size of a patch to sample. block_size (float, optional): Size of a patch to sample.
sample_rate (float, optional): Stride used in sliding patch. sample_rate (float, optional): Stride used in sliding patch.
Defaults to 0.5. Defaults to 0.5.
use_normalized_coord (bool, optional): Whether to use normalized \ use_normalized_coord (bool, optional): Whether to use normalized
xyz as additional features. Defaults to False. xyz as additional features. Defaults to False.
eps (float, optional): A value added to patch boundary to guarantee eps (float, optional): A value added to patch boundary to guarantee
points coverage. Default 1e-3. points coverage. Defaults to 1e-3.
Returns: Returns:
np.ndarray | np.ndarray: np.ndarray | np.ndarray:
- patch_points (torch.Tensor): Points of different patches of \ - patch_points (torch.Tensor): Points of different patches of
shape [K, N, 3+C]. shape [K, N, 3+C].
- patch_idxs (torch.Tensor): Index of each point in \ - patch_idxs (torch.Tensor): Index of each point in
`patch_points`, of shape [K, N]. `patch_points`, of shape [K, N].
""" """
device = points.device device = points.device
......
# Copyright (c) OpenMMLab. All rights reserved. # Copyright (c) OpenMMLab. All rights reserved.
from .clip_sigmoid import clip_sigmoid from .clip_sigmoid import clip_sigmoid
from .edge_indices import get_edge_indices
from .gen_keypoints import get_keypoints
from .handle_objs import filter_outside_objs, handle_proj_objs
from .mlp import MLP from .mlp import MLP
__all__ = ['clip_sigmoid', 'MLP'] __all__ = [
'clip_sigmoid', 'MLP', 'get_edge_indices', 'filter_outside_objs',
'handle_proj_objs', 'get_keypoints'
]
...@@ -7,8 +7,8 @@ def clip_sigmoid(x, eps=1e-4): ...@@ -7,8 +7,8 @@ def clip_sigmoid(x, eps=1e-4):
Args: Args:
x (torch.Tensor): Input feature map with the shape of [B, N, H, W]. x (torch.Tensor): Input feature map with the shape of [B, N, H, W].
eps (float): Lower bound of the range to be clamped to. Defaults eps (float, optional): Lower bound of the range to be clamped to.
to 1e-4. Defaults to 1e-4.
Returns: Returns:
torch.Tensor: Feature map after sigmoid. torch.Tensor: Feature map after sigmoid.
......
# Copyright (c) OpenMMLab. All rights reserved.
import numpy as np
import torch
def get_edge_indices(img_metas,
downsample_ratio,
step=1,
pad_mode='default',
dtype=np.float32,
device='cpu'):
"""Function to filter the objects label outside the image.
The edge_indices are generated using numpy on cpu rather
than on CUDA due to the latency issue. When batch size = 8,
this function with numpy array is ~8 times faster than that
with CUDA tensor (0.09s and 0.72s in 100 runs).
Args:
img_metas (list[dict]): Meta information of each image, e.g.,
image size, scaling factor, etc.
downsample_ratio (int): Downsample ratio of output feature,
step (int, optional): Step size used for generateing
edge indices. Default: 1.
pad_mode (str, optional): Padding mode during data pipeline.
Default: 'default'.
dtype (torch.dtype, optional): Dtype of edge indices tensor.
Default: np.float32.
device (str, optional): Device of edge indices tensor.
Default: 'cpu'.
Returns:
list[Tensor]: Edge indices for each image in batch data.
"""
edge_indices_list = []
for i in range(len(img_metas)):
img_shape = img_metas[i]['img_shape']
pad_shape = img_metas[i]['pad_shape']
h, w = img_shape[:2]
pad_h, pad_w = pad_shape
edge_indices = []
if pad_mode == 'default':
x_min = 0
y_min = 0
x_max = (w - 1) // downsample_ratio
y_max = (h - 1) // downsample_ratio
elif pad_mode == 'center':
x_min = np.ceil((pad_w - w) / 2 * downsample_ratio)
y_min = np.ceil((pad_h - h) / 2 * downsample_ratio)
x_max = x_min + w // downsample_ratio
y_max = y_min + h // downsample_ratio
else:
raise NotImplementedError
# left
y = np.arange(y_min, y_max, step, dtype=dtype)
x = np.ones(len(y)) * x_min
edge_indices_edge = np.stack((x, y), axis=1)
edge_indices.append(edge_indices_edge)
# bottom
x = np.arange(x_min, x_max, step, dtype=dtype)
y = np.ones(len(x)) * y_max
edge_indices_edge = np.stack((x, y), axis=1)
edge_indices.append(edge_indices_edge)
# right
y = np.arange(y_max, y_min, -step, dtype=dtype)
x = np.ones(len(y)) * x_max
edge_indices_edge = np.stack((x, y), axis=1)
edge_indices.append(edge_indices_edge)
# top
x = np.arange(x_max, x_min, -step, dtype=dtype)
y = np.ones(len(x)) * y_min
edge_indices_edge = np.stack((x, y), axis=1)
edge_indices.append(edge_indices_edge)
edge_indices = \
np.concatenate([index for index in edge_indices], axis=0)
edge_indices = torch.from_numpy(edge_indices).to(device).long()
edge_indices_list.append(edge_indices)
return edge_indices_list
# Copyright (c) OpenMMLab. All rights reserved.
import torch
from mmdet3d.core.bbox import points_cam2img
def get_keypoints(gt_bboxes_3d_list,
centers2d_list,
img_metas,
use_local_coords=True):
"""Function to filter the objects label outside the image.
Args:
gt_bboxes_list (list[Tensor]): Ground truth bboxes of each image,
shape (num_gt, 4).
centers2d_list (list[Tensor]): Projected 3D centers onto 2D image,
shape (num_gt, 2).
img_metas (list[dict]): Meta information of each image, e.g.,
image size, scaling factor, etc.
use_local_coords (bool, optional): Wheher to use local coordinates
for keypoints. Default: True.
Returns:
tuple[list[Tensor]]: It contains two elements, the first is the
keypoints for each projected 2D bbox in batch data. The second is
the visible mask of depth calculated by keypoints.
"""
assert len(gt_bboxes_3d_list) == len(centers2d_list)
bs = len(gt_bboxes_3d_list)
keypoints2d_list = []
keypoints_depth_mask_list = []
for i in range(bs):
gt_bboxes_3d = gt_bboxes_3d_list[i]
centers2d = centers2d_list[i]
img_shape = img_metas[i]['img_shape']
cam2img = img_metas[i]['cam2img']
h, w = img_shape[:2]
# (N, 8, 3)
corners3d = gt_bboxes_3d.corners
top_centers3d = torch.mean(corners3d[:, [0, 1, 4, 5], :], dim=1)
bot_centers3d = torch.mean(corners3d[:, [2, 3, 6, 7], :], dim=1)
# (N, 2, 3)
top_bot_centers3d = torch.stack((top_centers3d, bot_centers3d), dim=1)
keypoints3d = torch.cat((corners3d, top_bot_centers3d), dim=1)
# (N, 10, 2)
keypoints2d = points_cam2img(keypoints3d, cam2img)
# keypoints mask: keypoints must be inside
# the image and in front of the camera
keypoints_x_visible = (keypoints2d[..., 0] >= 0) & (
keypoints2d[..., 0] <= w - 1)
keypoints_y_visible = (keypoints2d[..., 1] >= 0) & (
keypoints2d[..., 1] <= h - 1)
keypoints_z_visible = (keypoints3d[..., -1] > 0)
# (N, 1O)
keypoints_visible = keypoints_x_visible & \
keypoints_y_visible & keypoints_z_visible
# center, diag-02, diag-13
keypoints_depth_valid = torch.stack(
(keypoints_visible[:, [8, 9]].all(dim=1),
keypoints_visible[:, [0, 3, 5, 6]].all(dim=1),
keypoints_visible[:, [1, 2, 4, 7]].all(dim=1)),
dim=1)
keypoints_visible = keypoints_visible.float()
if use_local_coords:
keypoints2d = torch.cat((keypoints2d - centers2d.unsqueeze(1),
keypoints_visible.unsqueeze(-1)),
dim=2)
else:
keypoints2d = torch.cat(
(keypoints2d, keypoints_visible.unsqueeze(-1)), dim=2)
keypoints2d_list.append(keypoints2d)
keypoints_depth_mask_list.append(keypoints_depth_valid)
return (keypoints2d_list, keypoints_depth_mask_list)
# Copyright (c) OpenMMLab. All rights reserved.
import torch
def filter_outside_objs(gt_bboxes_list, gt_labels_list, gt_bboxes_3d_list,
gt_labels_3d_list, centers2d_list, img_metas):
"""Function to filter the objects label outside the image.
Args:
gt_bboxes_list (list[Tensor]): Ground truth bboxes of each image,
each has shape (num_gt, 4).
gt_labels_list (list[Tensor]): Ground truth labels of each box,
each has shape (num_gt,).
gt_bboxes_3d_list (list[Tensor]): 3D Ground truth bboxes of each
image, each has shape (num_gt, bbox_code_size).
gt_labels_3d_list (list[Tensor]): 3D Ground truth labels of each
box, each has shape (num_gt,).
centers2d_list (list[Tensor]): Projected 3D centers onto 2D image,
each has shape (num_gt, 2).
img_metas (list[dict]): Meta information of each image, e.g.,
image size, scaling factor, etc.
"""
bs = len(centers2d_list)
for i in range(bs):
centers2d = centers2d_list[i].clone()
img_shape = img_metas[i]['img_shape']
keep_inds = (centers2d[:, 0] > 0) & \
(centers2d[:, 0] < img_shape[1]) & \
(centers2d[:, 1] > 0) & \
(centers2d[:, 1] < img_shape[0])
centers2d_list[i] = centers2d[keep_inds]
gt_labels_list[i] = gt_labels_list[i][keep_inds]
gt_bboxes_list[i] = gt_bboxes_list[i][keep_inds]
gt_bboxes_3d_list[i].tensor = gt_bboxes_3d_list[i].tensor[keep_inds]
gt_labels_3d_list[i] = gt_labels_3d_list[i][keep_inds]
def get_centers2d_target(centers2d, centers, img_shape):
"""Function to get target centers2d.
Args:
centers2d (Tensor): Projected 3D centers onto 2D images.
centers (Tensor): Centers of 2d gt bboxes.
img_shape (tuple): Resized image shape.
Returns:
torch.Tensor: Projected 3D centers (centers2D) target.
"""
N = centers2d.shape[0]
h, w = img_shape[:2]
valid_intersects = centers2d.new_zeros((N, 2))
a = (centers[:, 1] - centers2d[:, 1]) / (centers[:, 0] - centers2d[:, 0])
b = centers[:, 1] - a * centers[:, 0]
left_y = b
right_y = (w - 1) * a + b
top_x = -b / a
bottom_x = (h - 1 - b) / a
left_coors = torch.stack((left_y.new_zeros(N, ), left_y), dim=1)
right_coors = torch.stack((right_y.new_full((N, ), w - 1), right_y), dim=1)
top_coors = torch.stack((top_x, top_x.new_zeros(N, )), dim=1)
bottom_coors = torch.stack((bottom_x, bottom_x.new_full((N, ), h - 1)),
dim=1)
intersects = torch.stack(
[left_coors, right_coors, top_coors, bottom_coors], dim=1)
intersects_x = intersects[:, :, 0]
intersects_y = intersects[:, :, 1]
inds = (intersects_x >= 0) & (intersects_x <=
w - 1) & (intersects_y >= 0) & (
intersects_y <= h - 1)
valid_intersects = intersects[inds].reshape(N, 2, 2)
dist = torch.norm(valid_intersects - centers2d.unsqueeze(1), dim=2)
min_idx = torch.argmin(dist, dim=1)
min_idx = min_idx.unsqueeze(-1).unsqueeze(-1).expand(-1, -1, 2)
centers2d_target = valid_intersects.gather(dim=1, index=min_idx).squeeze(1)
return centers2d_target
def handle_proj_objs(centers2d_list, gt_bboxes_list, img_metas):
"""Function to handle projected object centers2d, generate target
centers2d.
Args:
gt_bboxes_list (list[Tensor]): Ground truth bboxes of each image,
shape (num_gt, 4).
centers2d_list (list[Tensor]): Projected 3D centers onto 2D image,
shape (num_gt, 2).
img_metas (list[dict]): Meta information of each image, e.g.,
image size, scaling factor, etc.
Returns:
tuple[list[Tensor]]: It contains three elements. The first is the
target centers2d after handling the truncated objects. The second
is the offsets between target centers2d and round int dtype
centers2d,and the last is the truncation mask for each object in
batch data.
"""
bs = len(centers2d_list)
centers2d_target_list = []
trunc_mask_list = []
offsets2d_list = []
# for now, only pad mode that img is padded by right and
# bottom side is supported.
for i in range(bs):
centers2d = centers2d_list[i]
gt_bbox = gt_bboxes_list[i]
img_shape = img_metas[i]['img_shape']
centers2d_target = centers2d.clone()
inside_inds = (centers2d[:, 0] > 0) & \
(centers2d[:, 0] < img_shape[1]) & \
(centers2d[:, 1] > 0) & \
(centers2d[:, 1] < img_shape[0])
outside_inds = ~inside_inds
# if there are outside objects
if outside_inds.any():
centers = (gt_bbox[:, :2] + gt_bbox[:, 2:]) / 2
outside_centers2d = centers2d[outside_inds]
match_centers = centers[outside_inds]
target_outside_centers2d = get_centers2d_target(
outside_centers2d, match_centers, img_shape)
centers2d_target[outside_inds] = target_outside_centers2d
offsets2d = centers2d - centers2d_target.round().int()
trunc_mask = outside_inds
centers2d_target_list.append(centers2d_target)
trunc_mask_list.append(trunc_mask)
offsets2d_list.append(offsets2d)
return (centers2d_target_list, offsets2d_list, trunc_mask_list)
...@@ -10,15 +10,15 @@ class MLP(BaseModule): ...@@ -10,15 +10,15 @@ class MLP(BaseModule):
Pass features (B, C, N) through an MLP. Pass features (B, C, N) through an MLP.
Args: Args:
in_channels (int): Number of channels of input features. in_channels (int, optional): Number of channels of input features.
Default: 18. Default: 18.
conv_channels (tuple[int]): Out channels of the convolution. conv_channels (tuple[int], optional): Out channels of the convolution.
Default: (256, 256). Default: (256, 256).
conv_cfg (dict): Config of convolution. conv_cfg (dict, optional): Config of convolution.
Default: dict(type='Conv1d'). Default: dict(type='Conv1d').
norm_cfg (dict): Config of normalization. norm_cfg (dict, optional): Config of normalization.
Default: dict(type='BN1d'). Default: dict(type='BN1d').
act_cfg (dict): Config of activation. act_cfg (dict, optional): Config of activation.
Default: dict(type='ReLU'). Default: dict(type='ReLU').
""" """
......
...@@ -15,7 +15,6 @@ class PillarFeatureNet(nn.Module): ...@@ -15,7 +15,6 @@ class PillarFeatureNet(nn.Module):
The network prepares the pillar features and performs forward pass The network prepares the pillar features and performs forward pass
through PFNLayers. through PFNLayers.
Args: Args:
in_channels (int, optional): Number of input features, in_channels (int, optional): Number of input features,
either x, y, z or x, y, z, r. Defaults to 4. either x, y, z or x, y, z, r. Defaults to 4.
...@@ -33,7 +32,7 @@ class PillarFeatureNet(nn.Module): ...@@ -33,7 +32,7 @@ class PillarFeatureNet(nn.Module):
Defaults to dict(type='BN1d', eps=1e-3, momentum=0.01). Defaults to dict(type='BN1d', eps=1e-3, momentum=0.01).
mode (str, optional): The mode to gather point features. Options are mode (str, optional): The mode to gather point features. Options are
'max' or 'avg'. Defaults to 'max'. 'max' or 'avg'. Defaults to 'max'.
legacy (bool): Whether to use the new behavior or legacy (bool, optional): Whether to use the new behavior or
the original behavior. Defaults to True. the original behavior. Defaults to True.
""" """
...@@ -54,7 +53,7 @@ class PillarFeatureNet(nn.Module): ...@@ -54,7 +53,7 @@ class PillarFeatureNet(nn.Module):
if with_cluster_center: if with_cluster_center:
in_channels += 3 in_channels += 3
if with_voxel_center: if with_voxel_center:
in_channels += 2 in_channels += 3
if with_distance: if with_distance:
in_channels += 1 in_channels += 1
self._with_distance = with_distance self._with_distance = with_distance
...@@ -84,8 +83,10 @@ class PillarFeatureNet(nn.Module): ...@@ -84,8 +83,10 @@ class PillarFeatureNet(nn.Module):
# Need pillar (voxel) size and x/y offset in order to calculate offset # Need pillar (voxel) size and x/y offset in order to calculate offset
self.vx = voxel_size[0] self.vx = voxel_size[0]
self.vy = voxel_size[1] self.vy = voxel_size[1]
self.vz = voxel_size[2]
self.x_offset = self.vx / 2 + point_cloud_range[0] self.x_offset = self.vx / 2 + point_cloud_range[0]
self.y_offset = self.vy / 2 + point_cloud_range[1] self.y_offset = self.vy / 2 + point_cloud_range[1]
self.z_offset = self.vz / 2 + point_cloud_range[2]
self.point_cloud_range = point_cloud_range self.point_cloud_range = point_cloud_range
@force_fp32(out_fp16=True) @force_fp32(out_fp16=True)
...@@ -97,7 +98,6 @@ class PillarFeatureNet(nn.Module): ...@@ -97,7 +98,6 @@ class PillarFeatureNet(nn.Module):
(N, M, C). (N, M, C).
num_points (torch.Tensor): Number of points in each pillar. num_points (torch.Tensor): Number of points in each pillar.
coors (torch.Tensor): Coordinates of each voxel. coors (torch.Tensor): Coordinates of each voxel.
Returns: Returns:
torch.Tensor: Features of pillars. torch.Tensor: Features of pillars.
""" """
...@@ -114,21 +114,27 @@ class PillarFeatureNet(nn.Module): ...@@ -114,21 +114,27 @@ class PillarFeatureNet(nn.Module):
dtype = features.dtype dtype = features.dtype
if self._with_voxel_center: if self._with_voxel_center:
if not self.legacy: if not self.legacy:
f_center = torch.zeros_like(features[:, :, :2]) f_center = torch.zeros_like(features[:, :, :3])
f_center[:, :, 0] = features[:, :, 0] - ( f_center[:, :, 0] = features[:, :, 0] - (
coors[:, 3].to(dtype).unsqueeze(1) * self.vx + coors[:, 3].to(dtype).unsqueeze(1) * self.vx +
self.x_offset) self.x_offset)
f_center[:, :, 1] = features[:, :, 1] - ( f_center[:, :, 1] = features[:, :, 1] - (
coors[:, 2].to(dtype).unsqueeze(1) * self.vy + coors[:, 2].to(dtype).unsqueeze(1) * self.vy +
self.y_offset) self.y_offset)
f_center[:, :, 2] = features[:, :, 2] - (
coors[:, 1].to(dtype).unsqueeze(1) * self.vz +
self.z_offset)
else: else:
f_center = features[:, :, :2] f_center = features[:, :, :3]
f_center[:, :, 0] = f_center[:, :, 0] - ( f_center[:, :, 0] = f_center[:, :, 0] - (
coors[:, 3].type_as(features).unsqueeze(1) * self.vx + coors[:, 3].type_as(features).unsqueeze(1) * self.vx +
self.x_offset) self.x_offset)
f_center[:, :, 1] = f_center[:, :, 1] - ( f_center[:, :, 1] = f_center[:, :, 1] - (
coors[:, 2].type_as(features).unsqueeze(1) * self.vy + coors[:, 2].type_as(features).unsqueeze(1) * self.vy +
self.y_offset) self.y_offset)
f_center[:, :, 2] = f_center[:, :, 2] - (
coors[:, 1].type_as(features).unsqueeze(1) * self.vz +
self.z_offset)
features_ls.append(f_center) features_ls.append(f_center)
if self._with_distance: if self._with_distance:
...@@ -177,6 +183,8 @@ class DynamicPillarFeatureNet(PillarFeatureNet): ...@@ -177,6 +183,8 @@ class DynamicPillarFeatureNet(PillarFeatureNet):
Defaults to dict(type='BN1d', eps=1e-3, momentum=0.01). Defaults to dict(type='BN1d', eps=1e-3, momentum=0.01).
mode (str, optional): The mode to gather point features. Options are mode (str, optional): The mode to gather point features. Options are
'max' or 'avg'. Defaults to 'max'. 'max' or 'avg'. Defaults to 'max'.
legacy (bool, optional): Whether to use the new behavior or
the original behavior. Defaults to True.
""" """
def __init__(self, def __init__(self,
...@@ -188,7 +196,8 @@ class DynamicPillarFeatureNet(PillarFeatureNet): ...@@ -188,7 +196,8 @@ class DynamicPillarFeatureNet(PillarFeatureNet):
voxel_size=(0.2, 0.2, 4), voxel_size=(0.2, 0.2, 4),
point_cloud_range=(0, -40, -3, 70.4, 40, 1), point_cloud_range=(0, -40, -3, 70.4, 40, 1),
norm_cfg=dict(type='BN1d', eps=1e-3, momentum=0.01), norm_cfg=dict(type='BN1d', eps=1e-3, momentum=0.01),
mode='max'): mode='max',
legacy=True):
super(DynamicPillarFeatureNet, self).__init__( super(DynamicPillarFeatureNet, self).__init__(
in_channels, in_channels,
feat_channels, feat_channels,
...@@ -198,7 +207,8 @@ class DynamicPillarFeatureNet(PillarFeatureNet): ...@@ -198,7 +207,8 @@ class DynamicPillarFeatureNet(PillarFeatureNet):
voxel_size=voxel_size, voxel_size=voxel_size,
point_cloud_range=point_cloud_range, point_cloud_range=point_cloud_range,
norm_cfg=norm_cfg, norm_cfg=norm_cfg,
mode=mode) mode=mode,
legacy=legacy)
self.fp16_enabled = False self.fp16_enabled = False
feat_channels = [self.in_channels] + list(feat_channels) feat_channels = [self.in_channels] + list(feat_channels)
pfn_layers = [] pfn_layers = []
...@@ -233,7 +243,7 @@ class DynamicPillarFeatureNet(PillarFeatureNet): ...@@ -233,7 +243,7 @@ class DynamicPillarFeatureNet(PillarFeatureNet):
Returns: Returns:
torch.Tensor: Corresponding voxel centers of each points, shape torch.Tensor: Corresponding voxel centers of each points, shape
(M, C), where M is the numver of points. (M, C), where M is the number of points.
""" """
# Step 1: scatter voxel into canvas # Step 1: scatter voxel into canvas
# Calculate necessary things for canvas creation # Calculate necessary things for canvas creation
......
...@@ -113,11 +113,12 @@ class PFNLayer(nn.Module): ...@@ -113,11 +113,12 @@ class PFNLayer(nn.Module):
Args: Args:
in_channels (int): Number of input channels. in_channels (int): Number of input channels.
out_channels (int): Number of output channels. out_channels (int): Number of output channels.
norm_cfg (dict): Config dict of normalization layers norm_cfg (dict, optional): Config dict of normalization layers.
last_layer (bool): If last_layer, there is no concatenation of Defaults to dict(type='BN1d', eps=1e-3, momentum=0.01).
features. last_layer (bool, optional): If last_layer, there is no
mode (str): Pooling model to gather features inside voxels. concatenation of features. Defaults to False.
Default to 'max'. mode (str, optional): Pooling model to gather features inside voxels.
Defaults to 'max'.
""" """
def __init__(self, def __init__(self,
......
...@@ -17,7 +17,7 @@ class HardSimpleVFE(nn.Module): ...@@ -17,7 +17,7 @@ class HardSimpleVFE(nn.Module):
It simply averages the values of points in a voxel. It simply averages the values of points in a voxel.
Args: Args:
num_features (int): Number of features to use. Default: 4. num_features (int, optional): Number of features to use. Default: 4.
""" """
def __init__(self, num_features=4): def __init__(self, num_features=4):
...@@ -93,25 +93,27 @@ class DynamicVFE(nn.Module): ...@@ -93,25 +93,27 @@ class DynamicVFE(nn.Module):
The number of points inside the voxel varies. The number of points inside the voxel varies.
Args: Args:
in_channels (int): Input channels of VFE. Defaults to 4. in_channels (int, optional): Input channels of VFE. Defaults to 4.
feat_channels (list(int)): Channels of features in VFE. feat_channels (list(int), optional): Channels of features in VFE.
with_distance (bool): Whether to use the L2 distance of points to the with_distance (bool, optional): Whether to use the L2 distance of
origin point. Default False. points to the origin point. Defaults to False.
with_cluster_center (bool): Whether to use the distance to cluster with_cluster_center (bool, optional): Whether to use the distance
center of points inside a voxel. Default to False. to cluster center of points inside a voxel. Defaults to False.
with_voxel_center (bool): Whether to use the distance to center of with_voxel_center (bool, optional): Whether to use the distance
voxel for each points inside a voxel. Default to False. to center of voxel for each points inside a voxel.
voxel_size (tuple[float]): Size of a single voxel. Default to Defaults to False.
(0.2, 0.2, 4). voxel_size (tuple[float], optional): Size of a single voxel.
point_cloud_range (tuple[float]): The range of points or voxels. Defaults to (0.2, 0.2, 4).
Default to (0, -40, -3, 70.4, 40, 1). point_cloud_range (tuple[float], optional): The range of points
norm_cfg (dict): Config dict of normalization layers. or voxels. Defaults to (0, -40, -3, 70.4, 40, 1).
mode (str): The mode when pooling features of points inside a voxel. norm_cfg (dict, optional): Config dict of normalization layers.
Available options include 'max' and 'avg'. Default to 'max'. mode (str, optional): The mode when pooling features of points
fusion_layer (dict | None): The config dict of fusion layer used in inside a voxel. Available options include 'max' and 'avg'.
multi-modal detectors. Default to None. Defaults to 'max'.
return_point_feats (bool): Whether to return the features of each fusion_layer (dict, optional): The config dict of fusion
points. Default to False. layer used in multi-modal detectors. Defaults to None.
return_point_feats (bool, optional): Whether to return the features
of each points. Defaults to False.
""" """
def __init__(self, def __init__(self,
...@@ -230,7 +232,7 @@ class DynamicVFE(nn.Module): ...@@ -230,7 +232,7 @@ class DynamicVFE(nn.Module):
coors (torch.Tensor): Coordinates of voxels, shape is Nx(1+NDim). coors (torch.Tensor): Coordinates of voxels, shape is Nx(1+NDim).
points (list[torch.Tensor], optional): Raw points used to guide the points (list[torch.Tensor], optional): Raw points used to guide the
multi-modality fusion. Defaults to None. multi-modality fusion. Defaults to None.
img_feats (list[torch.Tensor], optional): Image fetures used for img_feats (list[torch.Tensor], optional): Image features used for
multi-modality fusion. Defaults to None. multi-modality fusion. Defaults to None.
img_metas (dict, optional): [description]. Defaults to None. img_metas (dict, optional): [description]. Defaults to None.
...@@ -292,25 +294,26 @@ class HardVFE(nn.Module): ...@@ -292,25 +294,26 @@ class HardVFE(nn.Module):
image feature into voxel features in a point-wise manner. image feature into voxel features in a point-wise manner.
Args: Args:
in_channels (int): Input channels of VFE. Defaults to 4. in_channels (int, optional): Input channels of VFE. Defaults to 4.
feat_channels (list(int)): Channels of features in VFE. feat_channels (list(int), optional): Channels of features in VFE.
with_distance (bool): Whether to use the L2 distance of points to the with_distance (bool, optional): Whether to use the L2 distance
origin point. Default False. of points to the origin point. Defaults to False.
with_cluster_center (bool): Whether to use the distance to cluster with_cluster_center (bool, optional): Whether to use the distance
center of points inside a voxel. Default to False. to cluster center of points inside a voxel. Defaults to False.
with_voxel_center (bool): Whether to use the distance to center of with_voxel_center (bool, optional): Whether to use the distance to
voxel for each points inside a voxel. Default to False. center of voxel for each points inside a voxel. Defaults to False.
voxel_size (tuple[float]): Size of a single voxel. Default to voxel_size (tuple[float], optional): Size of a single voxel.
(0.2, 0.2, 4). Defaults to (0.2, 0.2, 4).
point_cloud_range (tuple[float]): The range of points or voxels. point_cloud_range (tuple[float], optional): The range of points
Default to (0, -40, -3, 70.4, 40, 1). or voxels. Defaults to (0, -40, -3, 70.4, 40, 1).
norm_cfg (dict): Config dict of normalization layers. norm_cfg (dict, optional): Config dict of normalization layers.
mode (str): The mode when pooling features of points inside a voxel. mode (str, optional): The mode when pooling features of points inside a
Available options include 'max' and 'avg'. Default to 'max'. voxel. Available options include 'max' and 'avg'.
fusion_layer (dict | None): The config dict of fusion layer used in Defaults to 'max'.
multi-modal detectors. Default to None. fusion_layer (dict, optional): The config dict of fusion layer
return_point_feats (bool): Whether to return the features of each used in multi-modal detectors. Defaults to None.
points. Default to False. return_point_feats (bool, optional): Whether to return the
features of each points. Defaults to False.
""" """
def __init__(self, def __init__(self,
...@@ -394,7 +397,7 @@ class HardVFE(nn.Module): ...@@ -394,7 +397,7 @@ class HardVFE(nn.Module):
features (torch.Tensor): Features of voxels, shape is MxNxC. features (torch.Tensor): Features of voxels, shape is MxNxC.
num_points (torch.Tensor): Number of points in each voxel. num_points (torch.Tensor): Number of points in each voxel.
coors (torch.Tensor): Coordinates of voxels, shape is Mx(1+NDim). coors (torch.Tensor): Coordinates of voxels, shape is Mx(1+NDim).
img_feats (list[torch.Tensor], optional): Image fetures used for img_feats (list[torch.Tensor], optional): Image features used for
multi-modality fusion. Defaults to None. multi-modality fusion. Defaults to None.
img_metas (dict, optional): [description]. Defaults to None. img_metas (dict, optional): [description]. Defaults to None.
......
...@@ -4,6 +4,7 @@ from mmcv.ops import (RoIAlign, SigmoidFocalLoss, get_compiler_version, ...@@ -4,6 +4,7 @@ from mmcv.ops import (RoIAlign, SigmoidFocalLoss, get_compiler_version,
sigmoid_focal_loss) sigmoid_focal_loss)
from .ball_query import ball_query from .ball_query import ball_query
from .dgcnn_modules import DGCNNFAModule, DGCNNFPModule, DGCNNGFModule
from .furthest_point_sample import (Points_Sampler, furthest_point_sample, from .furthest_point_sample import (Points_Sampler, furthest_point_sample,
furthest_point_sample_with_dist) furthest_point_sample_with_dist)
from .gather_points import gather_points from .gather_points import gather_points
...@@ -17,8 +18,9 @@ from .pointnet_modules import (PAConvCUDASAModule, PAConvCUDASAModuleMSG, ...@@ -17,8 +18,9 @@ from .pointnet_modules import (PAConvCUDASAModule, PAConvCUDASAModuleMSG,
PAConvSAModule, PAConvSAModuleMSG, PAConvSAModule, PAConvSAModuleMSG,
PointFPModule, PointSAModule, PointSAModuleMSG, PointFPModule, PointSAModule, PointSAModuleMSG,
build_sa_module) build_sa_module)
from .roiaware_pool3d import (RoIAwarePool3d, points_in_boxes_batch, from .roiaware_pool3d import (RoIAwarePool3d, points_in_boxes_all,
points_in_boxes_cpu, points_in_boxes_gpu) points_in_boxes_cpu, points_in_boxes_part)
from .roipoint_pool3d import RoIPointPool3d
from .sparse_block import (SparseBasicBlock, SparseBottleneck, from .sparse_block import (SparseBasicBlock, SparseBottleneck,
make_sparse_convmodule) make_sparse_convmodule)
from .voxel import DynamicScatter, Voxelization, dynamic_scatter, voxelization from .voxel import DynamicScatter, Voxelization, dynamic_scatter, voxelization
...@@ -29,13 +31,14 @@ __all__ = [ ...@@ -29,13 +31,14 @@ __all__ = [
'NaiveSyncBatchNorm2d', 'batched_nms', 'Voxelization', 'voxelization', 'NaiveSyncBatchNorm2d', 'batched_nms', 'Voxelization', 'voxelization',
'dynamic_scatter', 'DynamicScatter', 'sigmoid_focal_loss', 'dynamic_scatter', 'DynamicScatter', 'sigmoid_focal_loss',
'SigmoidFocalLoss', 'SparseBasicBlock', 'SparseBottleneck', 'SigmoidFocalLoss', 'SparseBasicBlock', 'SparseBottleneck',
'RoIAwarePool3d', 'points_in_boxes_gpu', 'points_in_boxes_cpu', 'RoIAwarePool3d', 'points_in_boxes_part', 'points_in_boxes_cpu',
'make_sparse_convmodule', 'ball_query', 'knn', 'furthest_point_sample', 'make_sparse_convmodule', 'ball_query', 'knn', 'furthest_point_sample',
'furthest_point_sample_with_dist', 'three_interpolate', 'three_nn', 'furthest_point_sample_with_dist', 'three_interpolate', 'three_nn',
'gather_points', 'grouping_operation', 'group_points', 'GroupAll', 'gather_points', 'grouping_operation', 'group_points', 'GroupAll',
'QueryAndGroup', 'PointSAModule', 'PointSAModuleMSG', 'PointFPModule', 'QueryAndGroup', 'PointSAModule', 'PointSAModuleMSG', 'PointFPModule',
'points_in_boxes_batch', 'get_compiler_version', 'assign_score_withk', 'DGCNNFPModule', 'DGCNNGFModule', 'DGCNNFAModule', 'points_in_boxes_all',
'get_compiling_cuda_version', 'Points_Sampler', 'build_sa_module', 'get_compiler_version', 'assign_score_withk', 'get_compiling_cuda_version',
'PAConv', 'PAConvCUDA', 'PAConvSAModuleMSG', 'PAConvSAModule', 'Points_Sampler', 'build_sa_module', 'PAConv', 'PAConvCUDA',
'PAConvCUDASAModule', 'PAConvCUDASAModuleMSG' 'PAConvSAModuleMSG', 'PAConvSAModule', 'PAConvCUDASAModule',
'PAConvCUDASAModuleMSG', 'RoIPointPool3d'
] ]
# Copyright (c) OpenMMLab. All rights reserved.
from .ball_query import ball_query from .ball_query import ball_query
__all__ = ['ball_query'] __all__ = ['ball_query']
# Copyright (c) OpenMMLab. All rights reserved.
import torch import torch
from torch.autograd import Function from torch.autograd import Function
...@@ -23,7 +24,7 @@ class BallQuery(Function): ...@@ -23,7 +24,7 @@ class BallQuery(Function):
center_xyz (Tensor): (B, npoint, 3) centers of the ball query. center_xyz (Tensor): (B, npoint, 3) centers of the ball query.
Returns: Returns:
Tensor: (B, npoint, nsample) tensor with the indicies of Tensor: (B, npoint, nsample) tensor with the indices of
the features that form the query balls. the features that form the query balls.
""" """
assert center_xyz.is_contiguous() assert center_xyz.is_contiguous()
......
Markdown is supported
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment