Commit 21cb2aa6 authored by wangtai's avatar wangtai Committed by zhangwenwei
Browse files

Refine docstrings

parent cfc3f545
......@@ -13,7 +13,7 @@ from .custom_3d import Custom3DDataset
@DATASETS.register_module()
class NuScenesDataset(Custom3DDataset):
"""NuScenes Dataset.
r"""NuScenes Dataset.
This class serves as the API for experiments on the NuScenes Dataset.
......@@ -38,9 +38,9 @@ class NuScenesDataset(Custom3DDataset):
to its original format then converted them to `box_type_3d`.
Defaults to 'LiDAR' in this dataset. Available options includes
- 'LiDAR': box in LiDAR coordinates
- 'Depth': box in depth coordinates, usually for indoor dataset
- 'Camera': box in camera coordinates
- 'LiDAR': Box in LiDAR coordinates.
- 'Depth': Box in depth coordinates, usually for indoor dataset.
- 'Camera': Box in camera coordinates.
filter_empty_gt (bool, optional): Whether to filter empty GT.
Defaults to True.
test_mode (bool, optional): Whether the dataset is in test mode.
......@@ -163,14 +163,14 @@ class NuScenesDataset(Custom3DDataset):
dict: Data information that will be passed to the data \
preprocessing pipelines. It includes the following keys:
- sample_idx (str): sample index
- pts_filename (str): filename of point clouds
- sweeps (list[dict]): infos of sweeps
- timestamp (float): sample timestamp
- img_filename (str, optional): image filename
- lidar2img (list[np.ndarray], optional): transformations \
from lidar to different cameras
- ann_info (dict): annotation info
- sample_idx (str): Sample index.
- pts_filename (str): Filename of point clouds.
- sweeps (list[dict]): Infos of sweeps.
- timestamp (float): Sample timestamp.
- img_filename (str, optional): Image filename.
- lidar2img (list[np.ndarray], optional): Transformations \
from lidar to different cameras.
- ann_info (dict): Annotation info.
"""
info = self.data_infos[index]
......@@ -219,12 +219,12 @@ class NuScenesDataset(Custom3DDataset):
index (int): Index of the annotation data to get.
Returns:
dict: annotation information consists of the following keys:
dict: Annotation information consists of the following keys:
- gt_bboxes_3d (:obj:`LiDARInstance3DBoxes`): \
3D ground truth bboxes
- gt_labels_3d (np.ndarray): labels of ground truths
- gt_names (list[str]): class names of ground truths
- gt_labels_3d (np.ndarray): Labels of ground truths.
- gt_names (list[str]): Class names of ground truths.
"""
info = self.data_infos[index]
# filter out bbox containing no points
......@@ -442,7 +442,7 @@ class NuScenesDataset(Custom3DDataset):
Default: None.
Returns:
dict[str, float]: results of each evaluation metric
dict[str, float]: Results of each evaluation metric.
"""
result_files, tmp_dir = self.format_results(results, jsonfile_prefix)
......@@ -496,9 +496,9 @@ def output_to_nusc_box(detection):
Args:
detection (dict): Detection results.
- boxes_3d (:obj:`BaseInstance3DBoxes`): detection bbox
- scores_3d (torch.Tensor): detection scores
- labels_3d (torch.Tensor): predicted box labels
- boxes_3d (:obj:`BaseInstance3DBoxes`): Detection bbox.
- scores_3d (torch.Tensor): Detection scores.
- labels_3d (torch.Tensor): Predicted box labels.
Returns:
list[:obj:`NuScenesBox`]: List of standard NuScenesBoxes.
......
......@@ -9,7 +9,7 @@ from .custom_3d import Custom3DDataset
@DATASETS.register_module()
class ScanNetDataset(Custom3DDataset):
"""ScanNet Dataset.
r"""ScanNet Dataset.
This class serves as the API for experiments on the ScanNet Dataset.
......@@ -30,9 +30,9 @@ class ScanNetDataset(Custom3DDataset):
to its original format then converted them to `box_type_3d`.
Defaults to 'Depth' in this dataset. Available options includes
- 'LiDAR': box in LiDAR coordinates
- 'Depth': box in depth coordinates, usually for indoor dataset
- 'Camera': box in camera coordinates
- 'LiDAR': Box in LiDAR coordinates.
- 'Depth': Box in depth coordinates, usually for indoor dataset.
- 'Camera': Box in camera coordinates.
filter_empty_gt (bool, optional): Whether to filter empty GT.
Defaults to True.
test_mode (bool, optional): Whether the dataset is in test mode.
......@@ -73,9 +73,9 @@ class ScanNetDataset(Custom3DDataset):
- gt_bboxes_3d (:obj:`DepthInstance3DBoxes`): \
3D ground truth bboxes
- gt_labels_3d (np.ndarray): labels of ground truths
- pts_instance_mask_path (str): path of instance masks
- pts_semantic_mask_path (str): path of semantic masks
- gt_labels_3d (np.ndarray): Labels of ground truths.
- pts_instance_mask_path (str): Path of instance masks.
- pts_semantic_mask_path (str): Path of semantic masks.
"""
# Use index to get the annos, thus the evalhook could also use this api
info = self.data_infos[index]
......
......@@ -30,9 +30,9 @@ class SUNRGBDDataset(Custom3DDataset):
to its original format then converted them to `box_type_3d`.
Defaults to 'Depth' in this dataset. Available options includes
- 'LiDAR': box in LiDAR coordinates
- 'Depth': box in depth coordinates, usually for indoor dataset
- 'Camera': box in camera coordinates
- 'LiDAR': Box in LiDAR coordinates.
- 'Depth': Box in depth coordinates, usually for indoor dataset.
- 'Camera': Box in camera coordinates.
filter_empty_gt (bool, optional): Whether to filter empty GT.
Defaults to True.
test_mode (bool, optional): Whether the dataset is in test mode.
......@@ -71,9 +71,9 @@ class SUNRGBDDataset(Custom3DDataset):
- gt_bboxes_3d (:obj:`DepthInstance3DBoxes`): \
3D ground truth bboxes
- gt_labels_3d (np.ndarray): labels of ground truths
- pts_instance_mask_path (str): path of instance masks
- pts_semantic_mask_path (str): path of semantic masks
- gt_labels_3d (np.ndarray): Labels of ground truths.
- pts_instance_mask_path (str): Path of instance masks.
- pts_semantic_mask_path (str): Path of semantic masks.
"""
# Use index to get the annos, thus the evalhook could also use this api
info = self.data_infos[index]
......
......@@ -10,12 +10,12 @@ class NoStemRegNet(RegNet):
Args:
arch (dict): The parameter of RegNets.
- w0 (int): initial width
- wa (float): slope of width
- wm (float): quantization parameter to quantize the width
- depth (int): depth of the backbone
- group_w (int): width of group
- bot_mul (float): bottleneck ratio, i.e. expansion of bottlneck.
- w0 (int): Initial width.
- wa (float): Slope of width.
- wm (float): Quantization parameter to quantize the width.
- depth (int): Depth of the backbone.
- group_w (int): Width of group.
- bot_mul (float): Bottleneck ratio, i.e. expansion of bottlneck.
strides (Sequence[int]): Strides of the first block of each stage.
base_channels (int): Base channels after stem layer.
in_channels (int): Number of input image channels. Normally 3.
......@@ -26,13 +26,13 @@ class NoStemRegNet(RegNet):
the first 1x1 conv layer.
frozen_stages (int): Stages to be frozen (all param fixed). -1 means
not freezing any parameters.
norm_cfg (dict): dictionary to construct and config norm layer.
norm_cfg (dict): Dictionary to construct and config norm layer.
norm_eval (bool): Whether to set norm layers to eval mode, namely,
freeze running stats (mean and var). Note: Effect on Batch Norm
and its variants only.
with_cp (bool): Use checkpoint or not. Using checkpoint will save some
memory while slowing down the training speed.
zero_init_residual (bool): whether to use zero init for last norm layer
zero_init_residual (bool): Whether to use zero init for last norm layer
in resblocks to let them behave as identity.
Example:
......
......@@ -11,18 +11,18 @@ class PointNet2SASSG(nn.Module):
"""PointNet2 with Single-scale grouping.
Args:
in_channels (int): input channels of point cloud.
num_points (tuple[int]): the number of points which each SA
in_channels (int): Input channels of point cloud.
num_points (tuple[int]): The number of points which each SA
module samples.
radius (tuple[float]): sampling radii of each SA module.
num_samples (tuple[int]): the number of samples for ball
radius (tuple[float]): Sampling radii of each SA module.
num_samples (tuple[int]): The number of samples for ball
query in each SA module.
sa_channels (tuple[tuple[int]]): out channels of each mlp in SA module.
fp_channels (tuple[tuple[int]]): out channels of each mlp in FP module.
norm_cfg (dict): config of normalization layer.
pool_mod (str): pool method ('max' or 'avg') for SA modules.
use_xyz (bool): whether to use xyz as a part of features.
normalize_xyz (bool): whether to normalize xyz with radii in
sa_channels (tuple[tuple[int]]): Out channels of each mlp in SA module.
fp_channels (tuple[tuple[int]]): Out channels of each mlp in FP module.
norm_cfg (dict): Config of normalization layer.
pool_mod (str): Pool method ('max' or 'avg') for SA modules.
use_xyz (bool): Whether to use xyz as a part of features.
normalize_xyz (bool): Whether to normalize xyz with radii in
each SA module.
"""
......@@ -96,12 +96,12 @@ class PointNet2SASSG(nn.Module):
"""Split coordinates and features of input points.
Args:
points (Tensor): point coordinates with features,
points (torch.Tensor): Point coordinates with features,
with shape (B, N, 3 + input_feature_dim).
Returns:
Tensor: coordinates of input points.
Tensor: features of input points.
torch.Tensor: Coordinates of input points.
torch.Tensor: Features of input points.
"""
xyz = points[..., 0:3].contiguous()
if points.size(-1) > 3:
......@@ -119,13 +119,13 @@ class PointNet2SASSG(nn.Module):
with shape (B, N, 3 + input_feature_dim).
Returns:
dict[str, list[torch.Tensor]]: outputs after SA and FP modules.
dict[str, list[torch.Tensor]]: Outputs after SA and FP modules.
- fp_xyz (list[torch.Tensor]): contains the coordinates of \
- fp_xyz (list[torch.Tensor]): The coordinates of \
each fp features.
- fp_features (list[torch.Tensor]): contains the features \
- fp_features (list[torch.Tensor]): The features \
from each Feature Propagate Layers.
- fp_indices (list[torch.Tensor]): contains indices of the \
- fp_indices (list[torch.Tensor]): Indices of the \
input points.
"""
xyz, features = self._split_point_feats(points)
......
......@@ -10,12 +10,12 @@ class SECOND(nn.Module):
"""Backbone network for SECOND/PointPillars/PartA2/MVXNet.
Args:
in_channels (int): Input channels
out_channels (list[int]): Output channels for multi-scale feature maps
layer_nums (list[int]): Number of layers in each stage
layer_strides (list[int]): Strides of each stage
norm_cfg (dict): Config dict of normalization layers
conv_cfg (dict): Config dict of convolutional layers
in_channels (int): Input channels.
out_channels (list[int]): Output channels for multi-scale feature maps.
layer_nums (list[int]): Number of layers in each stage.
layer_strides (list[int]): Strides of each stage.
norm_cfg (dict): Config dict of normalization layers.
conv_cfg (dict): Config dict of convolutional layers.
"""
def __init__(self,
......
......@@ -169,11 +169,11 @@ class Anchor3DHead(nn.Module, AnchorTrainMixin):
Args:
featmap_sizes (list[tuple]): Multi-level feature map sizes.
input_metas (list[dict]): contain pcd and img's meta info.
device (str): device of current module
device (str): device of current module.
Returns:
list[list[torch.Tensor]]: anchors of each image, valid flags \
of each image
list[list[torch.Tensor]]: Anchors of each image, valid flags \
of each image.
"""
num_imgs = len(input_metas)
# since feature map sizes of all images are the same, we only compute
......@@ -202,7 +202,7 @@ class Anchor3DHead(nn.Module, AnchorTrainMixin):
num_total_samples (int): The number of valid samples.
Returns:
tuple[torch.Tensor]: losses of class, bbox \
tuple[torch.Tensor]: Losses of class, bbox \
and direction, respectively.
"""
# classification loss
......@@ -357,10 +357,10 @@ class Anchor3DHead(nn.Module, AnchorTrainMixin):
class predictions.
input_metas (list[dict]): Contain pcd and img's meta info.
cfg (None | :obj:`ConfigDict`): Training or testing config.
rescale (list[torch.Tensor]): whether th rescale bbox.
rescale (list[torch.Tensor]): Whether th rescale bbox.
Returns:
list[tuple]: prediction resultes of batches.
list[tuple]: Prediction resultes of batches.
"""
assert len(cls_scores) == len(bbox_preds)
assert len(cls_scores) == len(dir_cls_preds)
......
......@@ -9,7 +9,7 @@ from .train_mixins import get_direction_target
@HEADS.register_module()
class FreeAnchor3DHead(Anchor3DHead):
"""`FreeAnchor <https://arxiv.org/abs/1909.02466>`_ head for 3D detection.
r"""`FreeAnchor <https://arxiv.org/abs/1909.02466>`_ head for 3D detection.
Note:
This implementation is directly modified from the `mmdet implementation
......
......@@ -27,8 +27,8 @@ class PartA2RPNHead(Anchor3DHead):
Args:
num_classes (int): Number of classes.
in_channels (int): Number of channels in the input feature map.
train_cfg (dict): train configs
test_cfg (dict): test configs
train_cfg (dict): Train configs.
test_cfg (dict): Test configs.
feat_channels (int): Number of channels of the feature map.
use_direction_classifier (bool): Whether to add a direction classifier.
anchor_generator(dict): Config dict of anchor generator.
......@@ -96,10 +96,10 @@ class PartA2RPNHead(Anchor3DHead):
bbox_preds (list[torch.Tensor]): Multi-level bbox predictions.
dir_cls_preds (list[torch.Tensor]): Multi-level direction
class predictions.
gt_bboxes (list[:obj:`BaseInstance3DBoxes`]): Gt bboxes
gt_bboxes (list[:obj:`BaseInstance3DBoxes`]): Ground truth boxes \
of each sample.
gt_labels (list[torch.Tensor]): Gt labels of each sample.
input_metas (list[dict]): Contain pcd and img's meta info.
gt_labels (list[torch.Tensor]): Labels of each sample.
input_metas (list[dict]): Point cloud and image's meta info.
gt_bboxes_ignore (None | list[torch.Tensor]): Specify
which bounding.
......
......@@ -120,8 +120,8 @@ class VoteHead(nn.Module):
4. Decode predictions.
Args:
feat_dict (dict): feature dict from backbone.
sample_mod (str): sample mode for vote aggregation layer.
feat_dict (dict): Feature dict from backbone.
sample_mod (str): Sample mode for vote aggregation layer.
valid modes are "vote", "seed" and "random".
Returns:
......@@ -189,9 +189,9 @@ class VoteHead(nn.Module):
Args:
bbox_preds (dict): Predictions from forward of vote head.
points (list[torch.Tensor]): Input points.
gt_bboxes_3d (list[:obj:`BaseInstance3DBoxes`]): Gt bboxes
of each sample.
gt_labels_3d (list[torch.Tensor]): Gt labels of each sample.
gt_bboxes_3d (list[:obj:`BaseInstance3DBoxes`]): Ground truth \
bboxes of each sample.
gt_labels_3d (list[torch.Tensor]): Labels of each sample.
pts_semantic_mask (None | list[torch.Tensor]): Point-wise
semantic mask.
pts_instance_mask (None | list[torch.Tensor]): Point-wise
......@@ -296,14 +296,14 @@ class VoteHead(nn.Module):
Args:
points (list[torch.Tensor]): Points of each batch.
gt_bboxes_3d (list[:obj:`BaseInstance3DBoxes`]): gt bboxes of
each batch.
gt_labels_3d (list[torch.Tensor]): gt class labels of each batch.
pts_semantic_mask (None | list[torch.Tensor]): point-wise semantic
gt_bboxes_3d (list[:obj:`BaseInstance3DBoxes`]): Ground truth \
bboxes of each batch.
gt_labels_3d (list[torch.Tensor]): Labels of each batch.
pts_semantic_mask (None | list[torch.Tensor]): Point-wise semantic
label of each batch.
pts_instance_mask (None | list[torch.Tensor]): point-wise instance
pts_instance_mask (None | list[torch.Tensor]): Point-wise instance
label of each batch.
bbox_preds (torch.Tensor): Bbox predictions of vote head.
bbox_preds (torch.Tensor): Bounding box predictions of vote head.
Returns:
tuple[torch.Tensor]: Targets of vote head.
......@@ -382,11 +382,12 @@ class VoteHead(nn.Module):
Args:
points (torch.Tensor): Points of each batch.
gt_bboxes_3d (:obj:`BaseInstance3DBoxes`): gt bboxes of each batch.
gt_labels_3d (torch.Tensor): gt class labels of each batch.
pts_semantic_mask (None | torch.Tensor): point-wise semantic
gt_bboxes_3d (:obj:`BaseInstance3DBoxes`): Ground truth \
boxes of each batch.
gt_labels_3d (torch.Tensor): Labels of each batch.
pts_semantic_mask (None | torch.Tensor): Point-wise semantic
label of each batch.
pts_instance_mask (None | torch.Tensor): point-wise instance
pts_instance_mask (None | torch.Tensor): Point-wise instance
label of each batch.
aggregated_points (torch.Tensor): Aggregated points from
vote aggregation layer.
......@@ -496,11 +497,11 @@ class VoteHead(nn.Module):
Args:
points (torch.Tensor): Input points.
bbox_preds (dict): Predictions from vote head.
input_metas (list[dict]): Contain pcd and img's meta info.
input_metas (list[dict]): Point cloud and image's meta info.
rescale (bool): Whether to rescale bboxes.
Returns:
list[tuple[torch.Tensor]]: Contain bbox, scores and labels.
list[tuple[torch.Tensor]]: Bounding boxes, scores and labels.
"""
# decode boxes
obj_scores = F.softmax(bbox_preds['obj_scores'], dim=-1)[..., -1]
......@@ -523,17 +524,17 @@ class VoteHead(nn.Module):
def multiclass_nms_single(self, obj_scores, sem_scores, bbox, points,
input_meta):
"""multi-class nms in single batch.
"""Multi-class nms in single batch.
Args:
obj_scores (torch.Tensor): Objectness score of bboxes.
sem_scores (torch.Tensor): semantic class score of bboxes.
bbox (torch.Tensor): Predicted bbox.
obj_scores (torch.Tensor): Objectness score of bounding boxes.
sem_scores (torch.Tensor): semantic class score of bounding boxes.
bbox (torch.Tensor): Predicted bounding boxes.
points (torch.Tensor): Input points.
input_meta (dict): Contain pcd and img's meta info.
input_meta (dict): Point cloud and image's meta info.
Returns:
tuple[torch.Tensor]: Contain bbox, scores and labels.
tuple[torch.Tensor]: Bounding boxes, scores and labels.
"""
bbox = input_meta['box_type_3d'](
bbox,
......
......@@ -26,23 +26,24 @@ def point_sample(
"""Obtain image features using points.
Args:
img_features (Tensor): 1xCxHxW image features
points (Tensor): Nx3 point cloud in LiDAR coordinates
lidar2img_rt (Tensor): 4x4 transformation matrix
pcd_rotate_mat (Tensor): 3x3 rotation matrix of points
during augmentation
img_scale_factor (Tensor): (w_scale, h_scale)
img_crop_offset (Tensor): (w_offset, h_offset) offset used to crop
image during data augmentation
pcd_trans_factor ([type]): Translation of points in augmentation
pcd_scale_factor (float): Scale factor of points during
img_features (torch.Tensor): 1 x C x H x W image features.
points (torch.Tensor): Nx3 point cloud in LiDAR coordinates.
lidar2img_rt (torch.Tensor): 4x4 transformation matrix.
pcd_rotate_mat (torch.Tensor): 3x3 rotation matrix of points
during augmentation.
img_scale_factor (torch.Tensor): Scale factor with shape of \
(w_scale, h_scale).
img_crop_offset (torch.Tensor): Crop offset used to crop \
image during data augmentation with shape of (w_offset, h_offset).
pcd_trans_factor ([type]): Translation of points in augmentation.
pcd_scale_factor (float): Scale factor of points during.
data augmentation
pcd_flip (bool): Whether the points are flipped.
img_flip (bool): Whether the image is flipped.
img_pad_shape (tuple[int]): int tuple indicates the h & w after
padding, this is necessary to obtain features in feature map
padding, this is necessary to obtain features in feature map.
img_shape (tuple[int]): int tuple indicates the h & w before padding
after scaling, this is necessary for flipping coordinates
after scaling, this is necessary for flipping coordinates.
aligned (bool, optional): Whether use bilinear interpolation when
sampling image features for each point. Defaults to True.
padding_mode (str, optional): Padding mode when padding values for
......@@ -51,7 +52,7 @@ def point_sample(
sampling image features for each point. Defaults to True.
Returns:
(Tensor): NxC image features sampled by point coordinates
torch.Tensor: NxC image features sampled by point coordinates.
"""
# aug order: flip -> trans -> scale -> rot
# The transformation follows the augmentation order in data pipeline
......@@ -229,14 +230,14 @@ class PointFusion(nn.Module):
"""Forward function.
Args:
img_feats (list[torch.Tensor]): img features
pts: [list[torch.Tensor]]: a batch of points with shape Nx3
pts_feats (torch.Tensor): a tensor consist of point features of the
total batch
img_metas (list[dict]): meta information of images
img_feats (list[torch.Tensor]): Image features.
pts: [list[torch.Tensor]]: A batch of points with shape N x 3.
pts_feats (torch.Tensor): A tensor consist of point features of the
total batch.
img_metas (list[dict]): Meta information of images.
Returns:
torch.Tensor: fused features of each point.
torch.Tensor: Fused features of each point.
"""
img_pts = self.obtain_mlvl_feats(img_feats, pts, img_metas)
img_pre_fuse = self.img_transform(img_pts)
......
......@@ -10,14 +10,14 @@ class SparseEncoder(nn.Module):
r"""Sparse encoder for SECOND and Part-A2.
Args:
in_channels (int): the number of input channels
sparse_shape (list[int]): the sparse shape of input tensor
norm_cfg (dict): config of normalization layer
base_channels (int): out channels for conv_input layer
output_channels (int): out channels for conv_out layer
in_channels (int): The number of input channels.
sparse_shape (list[int]): The sparse shape of input tensor.
norm_cfg (dict): Config of normalization layer.
base_channels (int): Out channels for conv_input layer.
output_channels (int): Out channels for conv_out layer.
encoder_channels (tuple[tuple[int]]):
conv channels of each encode block
encoder_paddings (tuple[tuple[int]]): paddings of each encode block
Convolutional channels of each encode block.
encoder_paddings (tuple[tuple[int]]): Paddings of each encode block.
"""
def __init__(self,
......@@ -82,12 +82,13 @@ class SparseEncoder(nn.Module):
"""Forward of SparseEncoder.
Args:
voxel_features (torch.float32): shape [N, C]
coors (torch.int32): shape [N, 4](batch_idx, z_idx, y_idx, x_idx)
batch_size (int): batch size
voxel_features (torch.float32): Voxel features in shape (N, C).
coors (torch.int32): Coordinates in shape (N, 4), \
the columns in the order of (batch_idx, z_idx, y_idx, x_idx).
batch_size (int): Batch size.
Returns:
dict: backbone features
dict: Backbone features.
"""
coors = coors.int()
input_sp_tensor = spconv.SparseConvTensor(voxel_features, coors,
......@@ -114,12 +115,12 @@ class SparseEncoder(nn.Module):
"""make encoder layers using sparse convs.
Args:
make_block (method): a bounded function to build blocks
norm_cfg (dict[str]): config of normalization layer
in_channels (int): the number of encoder input channels
make_block (method): A bounded function to build blocks.
norm_cfg (dict[str]): Config of normalization layer.
in_channels (int): The number of encoder input channels.
Returns:
int: the number of encoder output channels
int: The number of encoder output channels.
"""
self.encoder_layers = spconv.SparseSequential()
......
......@@ -13,17 +13,17 @@ class SparseUNet(nn.Module):
See the `paper <https://arxiv.org/abs/1907.03670>`_ for more detials.
Args:
in_channels (int): the number of input channels
sparse_shape (list[int]): the sparse shape of input tensor
norm_cfg (dict): config of normalization layer
base_channels (int): out channels for conv_input layer
output_channels (int): out channels for conv_out layer
in_channels (int): The number of input channels.
sparse_shape (list[int]): The sparse shape of input tensor.
norm_cfg (dict): Config of normalization layer.
base_channels (int): Out channels for conv_input layer.
output_channels (int): Out channels for conv_out layer.
encoder_channels (tuple[tuple[int]]):
conv channels of each encode block
encoder_paddings (tuple[tuple[int]]): paddings of each encode block
Convolutional channels of each encode block.
encoder_paddings (tuple[tuple[int]]): Paddings of each encode block.
decoder_channels (tuple[tuple[int]]):
conv channels of each decode block
decoder_paddings (tuple[tuple[int]]): paddings of each decode block
Convolutional channels of each decode block.
decoder_paddings (tuple[tuple[int]]): Paddings of each decode block.
"""
def __init__(self,
......@@ -194,7 +194,7 @@ class SparseUNet(nn.Module):
in_channels (int): The number of encoder input channels.
Returns:
int: the number of encoder output channels.
int: The number of encoder output channels.
"""
self.encoder_layers = spconv.SparseSequential()
......
......@@ -76,7 +76,7 @@ class Base3DRoIHead(nn.Module, metaclass=ABCMeta):
Ground truth boxes to be ignored.
Returns:
dict[str, torch.Tensor]: losses from each head.
dict[str, torch.Tensor]: Losses from each head.
"""
pass
......
......@@ -290,7 +290,7 @@ class PartA2BboxHead(nn.Module):
rois (torch.Tensor): Roi bboxes.
labels (torch.Tensor): Labels of class.
bbox_targets (torch.Tensor): Target of positive bboxes.
pos_gt_bboxes (torch.Tensor): Gt of positive bboxes.
pos_gt_bboxes (torch.Tensor): Ground truths of positive bboxes.
reg_mask (torch.Tensor): Mask for positive bboxes.
label_weights (torch.Tensor): Weights of class loss.
bbox_weights (torch.Tensor): Weights of bbox loss.
......@@ -298,9 +298,9 @@ class PartA2BboxHead(nn.Module):
Returns:
dict: Computed losses.
- loss_cls (torch.Tensor): loss of classes.
- loss_bbox (torch.Tensor): loss of bboxes.
- loss_corner (torch.Tensor): loss of corners.
- loss_cls (torch.Tensor): Loss of classes.
- loss_bbox (torch.Tensor): Loss of bboxes.
- loss_corner (torch.Tensor): Loss of corners.
"""
losses = dict()
rcnn_batch_size = cls_score.shape[0]
......@@ -463,11 +463,11 @@ class PartA2BboxHead(nn.Module):
"""Calculate corner loss of given boxes.
Args:
pred_bbox3d (torch.FloatTensor): predicted boxes with shape (N, 7).
gt_bbox3d (torch.FloatTensor): gt boxes with shape (N, 7).
pred_bbox3d (torch.FloatTensor): Predicted boxes in shape (N, 7).
gt_bbox3d (torch.FloatTensor): Ground truth boxes in shape (N, 7).
Returns:
torch.FloatTensor: Calculated corner loss with shape (N).
torch.FloatTensor: Calculated corner loss in shape (N).
"""
assert pred_bbox3d.shape[0] == gt_bbox3d.shape[0]
......@@ -505,12 +505,12 @@ class PartA2BboxHead(nn.Module):
"""Generate bboxes from bbox head predictions.
Args:
rois (torch.Tensor): Roi bboxes.
cls_score (torch.Tensor): Scores of bboxes.
bbox_pred (torch.Tensor): Bbox predictions
rois (torch.Tensor): Roi bounding boxes.
cls_score (torch.Tensor): Scores of bounding boxes.
bbox_pred (torch.Tensor): Bounding boxes predictions
class_labels (torch.Tensor): Label of classes
class_pred (torch.Tensor): Score for nms.
img_metas (list[dict]): Contain pcd and img's meta info.
img_metas (list[dict]): Point cloud and image's meta info.
cfg (:obj:`ConfigDict`): Testing config.
Returns:
......
......@@ -16,9 +16,9 @@ class PointwiseSemanticHead(nn.Module):
See `paper <https://arxiv.org/abs/1907.03670>`_ for more detials.
Args:
in_channels (int): the number of input channel.
num_classes (int): the number of class.
extra_width (float): boxes enlarge width.
in_channels (int): The number of input channel.
num_classes (int): The number of class.
extra_width (float): Boxes enlarge width.
loss_seg (dict): Config of segmentation loss.
loss_part (dict): Config of part prediction loss.
"""
......@@ -56,11 +56,11 @@ class PointwiseSemanticHead(nn.Module):
x (torch.Tensor): Features from the first stage.
Returns:
dict: part features, segmentation and part predictions.
dict: Part features, segmentation and part predictions.
- seg_preds (torch.Tensor): segment predictions
- part_preds (torch.Tensor): part predictions
- part_feats (torch.Tensor): feature predictions
- seg_preds (torch.Tensor): Segment predictions.
- part_preds (torch.Tensor): Part predictions.
- part_feats (torch.Tensor): Feature predictions.
"""
seg_preds = self.seg_cls_layer(x) # (N, 1)
part_preds = self.seg_reg_layer(x) # (N, 3)
......@@ -80,14 +80,15 @@ class PointwiseSemanticHead(nn.Module):
sample.
Args:
voxel_centers (torch.Tensor): shape [voxel_num, 3],
the center of voxels
gt_bboxes_3d (:obj:`BaseInstance3DBoxes`): gt boxes with tensor
of shape [box_num, 7].
gt_labels_3d (torch.Tensor): shape [box_num], class label of gt
voxel_centers (torch.Tensor): The center of voxels in shape \
(voxel_num, 3).
gt_bboxes_3d (:obj:`BaseInstance3DBoxes`): Ground truth boxes in \
shape (box_num, 7).
gt_labels_3d (torch.Tensor): Class labels of ground truths in \
shape (box_num).
Returns:
tuple[torch.Tensor]: segmentation targets with shape [voxel_num] \
tuple[torch.Tensor]: Segmentation targets with shape [voxel_num] \
part prediction targets with shape [voxel_num, 3]
"""
gt_bboxes_3d = gt_bboxes_3d.to(voxel_centers.device)
......@@ -127,14 +128,15 @@ class PointwiseSemanticHead(nn.Module):
"""generate segmentation and part prediction targets.
Args:
voxel_centers (torch.Tensor): shape [voxel_num, 3],
the center of voxels
gt_bboxes_3d (list[:obj:`BaseInstance3DBoxes`]): list of gt boxes
containing tensor of shape [box_num, 7].
gt_labels_3d (list[torch.Tensor]): list of GT labels.
voxel_centers (torch.Tensor): The center of voxels in shape \
(voxel_num, 3).
gt_bboxes_3d (:obj:`BaseInstance3DBoxes`): Ground truth boxes in \
shape (box_num, 7).
gt_labels_3d (torch.Tensor): Class labels of ground truths in \
shape (box_num).
Returns:
dict: prediction targets
dict: Prediction targets
- seg_targets (torch.Tensor): Segmentation targets \
with shape [voxel_num].
......@@ -160,19 +162,19 @@ class PointwiseSemanticHead(nn.Module):
Args:
semantic_results (dict): Results from semantic head.
- seg_preds: segmentation predictions
- part_preds: part predictions
- seg_preds: Segmentation predictions.
- part_preds: Part predictions.
semantic_targets (dict): Targets of semantic results.
- seg_preds: segmentation targets
- part_preds: part targets
- seg_preds: Segmentation targets.
- part_preds: Part targets.
Returns:
dict: loss of segmentation and part prediction.
dict: Loss of segmentation and part prediction.
- loss_seg (torch.Tensor): segmentation prediction loss
- loss_part (torch.Tensor): part prediction loss
- loss_seg (torch.Tensor): Segmentation prediction loss.
- loss_part (torch.Tensor): Part prediction loss.
"""
seg_preds = semantic_results['seg_preds']
part_preds = semantic_results['part_preds']
......
......@@ -86,6 +86,7 @@ class PartAggregationROIHead(Base3DRoIHead):
img_metas (list[dict]): Meta info of each image.
proposal_list (list[dict]): Proposal information from rpn.
The dictionary should contain the following keys:
- boxes_3d (:obj:`BaseInstance3DBoxes`): Proposal bboxes
- labels_3d (torch.Tensor): Labels of proposals
- cls_preds (torch.Tensor): Original scores of proposals
......
......@@ -12,7 +12,7 @@ class Single3DRoIAwareExtractor(nn.Module):
Extract Point-wise roi features.
Args:
roi_layer (dict): the config of roi layer
roi_layer (dict): The config of roi layer.
"""
def __init__(self, roi_layer=None):
......@@ -32,14 +32,14 @@ class Single3DRoIAwareExtractor(nn.Module):
"""Extract point-wise roi features.
Args:
feats (torch.FloatTensor): point-wise features with
shape (batch, npoints, channels) for pooling
coordinate (torch.FloatTensor): coordinate of each point
batch_inds (torch.LongTensor): indicate the batch of each point
rois (torch.FloatTensor): roi boxes with batch indices
feats (torch.FloatTensor): Point-wise features with
shape (batch, npoints, channels) for pooling.
coordinate (torch.FloatTensor): Coordinate of each point.
batch_inds (torch.LongTensor): Indicate the batch of each point.
rois (torch.FloatTensor): Roi boxes with batch indices.
Returns:
torch.FloatTensor: pooled features
torch.FloatTensor: Pooled features
"""
pooled_roi_feats = []
for batch_idx in range(int(batch_inds.max()) + 1):
......
......@@ -22,7 +22,7 @@ class HardSimpleVFE(nn.Module):
"""Forward function.
Args:
features (torch.Tensor): point features in shape
features (torch.Tensor): Point features in shape
(N, M, 3(4)). N is the number of voxels and M is the maximum
number of points inside a single voxel.
num_points (torch.Tensor): Number of points in each voxel,
......@@ -60,7 +60,7 @@ class DynamicSimpleVFE(nn.Module):
"""Forward function.
Args:
features (torch.Tensor): point features in shape
features (torch.Tensor): Point features in shape
(N, 3(4)). N is the number of points.
coors (torch.Tensor): Coordinates of voxels.
......
Markdown is supported
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment