Commit 21cb2aa6 authored by wangtai's avatar wangtai Committed by zhangwenwei
Browse files

Refine docstrings

parent cfc3f545
...@@ -13,7 +13,7 @@ from .custom_3d import Custom3DDataset ...@@ -13,7 +13,7 @@ from .custom_3d import Custom3DDataset
@DATASETS.register_module() @DATASETS.register_module()
class NuScenesDataset(Custom3DDataset): class NuScenesDataset(Custom3DDataset):
"""NuScenes Dataset. r"""NuScenes Dataset.
This class serves as the API for experiments on the NuScenes Dataset. This class serves as the API for experiments on the NuScenes Dataset.
...@@ -38,9 +38,9 @@ class NuScenesDataset(Custom3DDataset): ...@@ -38,9 +38,9 @@ class NuScenesDataset(Custom3DDataset):
to its original format then converted them to `box_type_3d`. to its original format then converted them to `box_type_3d`.
Defaults to 'LiDAR' in this dataset. Available options includes Defaults to 'LiDAR' in this dataset. Available options includes
- 'LiDAR': box in LiDAR coordinates - 'LiDAR': Box in LiDAR coordinates.
- 'Depth': box in depth coordinates, usually for indoor dataset - 'Depth': Box in depth coordinates, usually for indoor dataset.
- 'Camera': box in camera coordinates - 'Camera': Box in camera coordinates.
filter_empty_gt (bool, optional): Whether to filter empty GT. filter_empty_gt (bool, optional): Whether to filter empty GT.
Defaults to True. Defaults to True.
test_mode (bool, optional): Whether the dataset is in test mode. test_mode (bool, optional): Whether the dataset is in test mode.
...@@ -163,14 +163,14 @@ class NuScenesDataset(Custom3DDataset): ...@@ -163,14 +163,14 @@ class NuScenesDataset(Custom3DDataset):
dict: Data information that will be passed to the data \ dict: Data information that will be passed to the data \
preprocessing pipelines. It includes the following keys: preprocessing pipelines. It includes the following keys:
- sample_idx (str): sample index - sample_idx (str): Sample index.
- pts_filename (str): filename of point clouds - pts_filename (str): Filename of point clouds.
- sweeps (list[dict]): infos of sweeps - sweeps (list[dict]): Infos of sweeps.
- timestamp (float): sample timestamp - timestamp (float): Sample timestamp.
- img_filename (str, optional): image filename - img_filename (str, optional): Image filename.
- lidar2img (list[np.ndarray], optional): transformations \ - lidar2img (list[np.ndarray], optional): Transformations \
from lidar to different cameras from lidar to different cameras.
- ann_info (dict): annotation info - ann_info (dict): Annotation info.
""" """
info = self.data_infos[index] info = self.data_infos[index]
...@@ -219,12 +219,12 @@ class NuScenesDataset(Custom3DDataset): ...@@ -219,12 +219,12 @@ class NuScenesDataset(Custom3DDataset):
index (int): Index of the annotation data to get. index (int): Index of the annotation data to get.
Returns: Returns:
dict: annotation information consists of the following keys: dict: Annotation information consists of the following keys:
- gt_bboxes_3d (:obj:`LiDARInstance3DBoxes`): \ - gt_bboxes_3d (:obj:`LiDARInstance3DBoxes`): \
3D ground truth bboxes 3D ground truth bboxes
- gt_labels_3d (np.ndarray): labels of ground truths - gt_labels_3d (np.ndarray): Labels of ground truths.
- gt_names (list[str]): class names of ground truths - gt_names (list[str]): Class names of ground truths.
""" """
info = self.data_infos[index] info = self.data_infos[index]
# filter out bbox containing no points # filter out bbox containing no points
...@@ -442,7 +442,7 @@ class NuScenesDataset(Custom3DDataset): ...@@ -442,7 +442,7 @@ class NuScenesDataset(Custom3DDataset):
Default: None. Default: None.
Returns: Returns:
dict[str, float]: results of each evaluation metric dict[str, float]: Results of each evaluation metric.
""" """
result_files, tmp_dir = self.format_results(results, jsonfile_prefix) result_files, tmp_dir = self.format_results(results, jsonfile_prefix)
...@@ -496,9 +496,9 @@ def output_to_nusc_box(detection): ...@@ -496,9 +496,9 @@ def output_to_nusc_box(detection):
Args: Args:
detection (dict): Detection results. detection (dict): Detection results.
- boxes_3d (:obj:`BaseInstance3DBoxes`): detection bbox - boxes_3d (:obj:`BaseInstance3DBoxes`): Detection bbox.
- scores_3d (torch.Tensor): detection scores - scores_3d (torch.Tensor): Detection scores.
- labels_3d (torch.Tensor): predicted box labels - labels_3d (torch.Tensor): Predicted box labels.
Returns: Returns:
list[:obj:`NuScenesBox`]: List of standard NuScenesBoxes. list[:obj:`NuScenesBox`]: List of standard NuScenesBoxes.
......
...@@ -9,7 +9,7 @@ from .custom_3d import Custom3DDataset ...@@ -9,7 +9,7 @@ from .custom_3d import Custom3DDataset
@DATASETS.register_module() @DATASETS.register_module()
class ScanNetDataset(Custom3DDataset): class ScanNetDataset(Custom3DDataset):
"""ScanNet Dataset. r"""ScanNet Dataset.
This class serves as the API for experiments on the ScanNet Dataset. This class serves as the API for experiments on the ScanNet Dataset.
...@@ -30,9 +30,9 @@ class ScanNetDataset(Custom3DDataset): ...@@ -30,9 +30,9 @@ class ScanNetDataset(Custom3DDataset):
to its original format then converted them to `box_type_3d`. to its original format then converted them to `box_type_3d`.
Defaults to 'Depth' in this dataset. Available options includes Defaults to 'Depth' in this dataset. Available options includes
- 'LiDAR': box in LiDAR coordinates - 'LiDAR': Box in LiDAR coordinates.
- 'Depth': box in depth coordinates, usually for indoor dataset - 'Depth': Box in depth coordinates, usually for indoor dataset.
- 'Camera': box in camera coordinates - 'Camera': Box in camera coordinates.
filter_empty_gt (bool, optional): Whether to filter empty GT. filter_empty_gt (bool, optional): Whether to filter empty GT.
Defaults to True. Defaults to True.
test_mode (bool, optional): Whether the dataset is in test mode. test_mode (bool, optional): Whether the dataset is in test mode.
...@@ -73,9 +73,9 @@ class ScanNetDataset(Custom3DDataset): ...@@ -73,9 +73,9 @@ class ScanNetDataset(Custom3DDataset):
- gt_bboxes_3d (:obj:`DepthInstance3DBoxes`): \ - gt_bboxes_3d (:obj:`DepthInstance3DBoxes`): \
3D ground truth bboxes 3D ground truth bboxes
- gt_labels_3d (np.ndarray): labels of ground truths - gt_labels_3d (np.ndarray): Labels of ground truths.
- pts_instance_mask_path (str): path of instance masks - pts_instance_mask_path (str): Path of instance masks.
- pts_semantic_mask_path (str): path of semantic masks - pts_semantic_mask_path (str): Path of semantic masks.
""" """
# Use index to get the annos, thus the evalhook could also use this api # Use index to get the annos, thus the evalhook could also use this api
info = self.data_infos[index] info = self.data_infos[index]
......
...@@ -30,9 +30,9 @@ class SUNRGBDDataset(Custom3DDataset): ...@@ -30,9 +30,9 @@ class SUNRGBDDataset(Custom3DDataset):
to its original format then converted them to `box_type_3d`. to its original format then converted them to `box_type_3d`.
Defaults to 'Depth' in this dataset. Available options includes Defaults to 'Depth' in this dataset. Available options includes
- 'LiDAR': box in LiDAR coordinates - 'LiDAR': Box in LiDAR coordinates.
- 'Depth': box in depth coordinates, usually for indoor dataset - 'Depth': Box in depth coordinates, usually for indoor dataset.
- 'Camera': box in camera coordinates - 'Camera': Box in camera coordinates.
filter_empty_gt (bool, optional): Whether to filter empty GT. filter_empty_gt (bool, optional): Whether to filter empty GT.
Defaults to True. Defaults to True.
test_mode (bool, optional): Whether the dataset is in test mode. test_mode (bool, optional): Whether the dataset is in test mode.
...@@ -71,9 +71,9 @@ class SUNRGBDDataset(Custom3DDataset): ...@@ -71,9 +71,9 @@ class SUNRGBDDataset(Custom3DDataset):
- gt_bboxes_3d (:obj:`DepthInstance3DBoxes`): \ - gt_bboxes_3d (:obj:`DepthInstance3DBoxes`): \
3D ground truth bboxes 3D ground truth bboxes
- gt_labels_3d (np.ndarray): labels of ground truths - gt_labels_3d (np.ndarray): Labels of ground truths.
- pts_instance_mask_path (str): path of instance masks - pts_instance_mask_path (str): Path of instance masks.
- pts_semantic_mask_path (str): path of semantic masks - pts_semantic_mask_path (str): Path of semantic masks.
""" """
# Use index to get the annos, thus the evalhook could also use this api # Use index to get the annos, thus the evalhook could also use this api
info = self.data_infos[index] info = self.data_infos[index]
......
...@@ -10,12 +10,12 @@ class NoStemRegNet(RegNet): ...@@ -10,12 +10,12 @@ class NoStemRegNet(RegNet):
Args: Args:
arch (dict): The parameter of RegNets. arch (dict): The parameter of RegNets.
- w0 (int): initial width - w0 (int): Initial width.
- wa (float): slope of width - wa (float): Slope of width.
- wm (float): quantization parameter to quantize the width - wm (float): Quantization parameter to quantize the width.
- depth (int): depth of the backbone - depth (int): Depth of the backbone.
- group_w (int): width of group - group_w (int): Width of group.
- bot_mul (float): bottleneck ratio, i.e. expansion of bottlneck. - bot_mul (float): Bottleneck ratio, i.e. expansion of bottlneck.
strides (Sequence[int]): Strides of the first block of each stage. strides (Sequence[int]): Strides of the first block of each stage.
base_channels (int): Base channels after stem layer. base_channels (int): Base channels after stem layer.
in_channels (int): Number of input image channels. Normally 3. in_channels (int): Number of input image channels. Normally 3.
...@@ -26,13 +26,13 @@ class NoStemRegNet(RegNet): ...@@ -26,13 +26,13 @@ class NoStemRegNet(RegNet):
the first 1x1 conv layer. the first 1x1 conv layer.
frozen_stages (int): Stages to be frozen (all param fixed). -1 means frozen_stages (int): Stages to be frozen (all param fixed). -1 means
not freezing any parameters. not freezing any parameters.
norm_cfg (dict): dictionary to construct and config norm layer. norm_cfg (dict): Dictionary to construct and config norm layer.
norm_eval (bool): Whether to set norm layers to eval mode, namely, norm_eval (bool): Whether to set norm layers to eval mode, namely,
freeze running stats (mean and var). Note: Effect on Batch Norm freeze running stats (mean and var). Note: Effect on Batch Norm
and its variants only. and its variants only.
with_cp (bool): Use checkpoint or not. Using checkpoint will save some with_cp (bool): Use checkpoint or not. Using checkpoint will save some
memory while slowing down the training speed. memory while slowing down the training speed.
zero_init_residual (bool): whether to use zero init for last norm layer zero_init_residual (bool): Whether to use zero init for last norm layer
in resblocks to let them behave as identity. in resblocks to let them behave as identity.
Example: Example:
......
...@@ -11,18 +11,18 @@ class PointNet2SASSG(nn.Module): ...@@ -11,18 +11,18 @@ class PointNet2SASSG(nn.Module):
"""PointNet2 with Single-scale grouping. """PointNet2 with Single-scale grouping.
Args: Args:
in_channels (int): input channels of point cloud. in_channels (int): Input channels of point cloud.
num_points (tuple[int]): the number of points which each SA num_points (tuple[int]): The number of points which each SA
module samples. module samples.
radius (tuple[float]): sampling radii of each SA module. radius (tuple[float]): Sampling radii of each SA module.
num_samples (tuple[int]): the number of samples for ball num_samples (tuple[int]): The number of samples for ball
query in each SA module. query in each SA module.
sa_channels (tuple[tuple[int]]): out channels of each mlp in SA module. sa_channels (tuple[tuple[int]]): Out channels of each mlp in SA module.
fp_channels (tuple[tuple[int]]): out channels of each mlp in FP module. fp_channels (tuple[tuple[int]]): Out channels of each mlp in FP module.
norm_cfg (dict): config of normalization layer. norm_cfg (dict): Config of normalization layer.
pool_mod (str): pool method ('max' or 'avg') for SA modules. pool_mod (str): Pool method ('max' or 'avg') for SA modules.
use_xyz (bool): whether to use xyz as a part of features. use_xyz (bool): Whether to use xyz as a part of features.
normalize_xyz (bool): whether to normalize xyz with radii in normalize_xyz (bool): Whether to normalize xyz with radii in
each SA module. each SA module.
""" """
...@@ -96,12 +96,12 @@ class PointNet2SASSG(nn.Module): ...@@ -96,12 +96,12 @@ class PointNet2SASSG(nn.Module):
"""Split coordinates and features of input points. """Split coordinates and features of input points.
Args: Args:
points (Tensor): point coordinates with features, points (torch.Tensor): Point coordinates with features,
with shape (B, N, 3 + input_feature_dim). with shape (B, N, 3 + input_feature_dim).
Returns: Returns:
Tensor: coordinates of input points. torch.Tensor: Coordinates of input points.
Tensor: features of input points. torch.Tensor: Features of input points.
""" """
xyz = points[..., 0:3].contiguous() xyz = points[..., 0:3].contiguous()
if points.size(-1) > 3: if points.size(-1) > 3:
...@@ -119,13 +119,13 @@ class PointNet2SASSG(nn.Module): ...@@ -119,13 +119,13 @@ class PointNet2SASSG(nn.Module):
with shape (B, N, 3 + input_feature_dim). with shape (B, N, 3 + input_feature_dim).
Returns: Returns:
dict[str, list[torch.Tensor]]: outputs after SA and FP modules. dict[str, list[torch.Tensor]]: Outputs after SA and FP modules.
- fp_xyz (list[torch.Tensor]): contains the coordinates of \ - fp_xyz (list[torch.Tensor]): The coordinates of \
each fp features. each fp features.
- fp_features (list[torch.Tensor]): contains the features \ - fp_features (list[torch.Tensor]): The features \
from each Feature Propagate Layers. from each Feature Propagate Layers.
- fp_indices (list[torch.Tensor]): contains indices of the \ - fp_indices (list[torch.Tensor]): Indices of the \
input points. input points.
""" """
xyz, features = self._split_point_feats(points) xyz, features = self._split_point_feats(points)
......
...@@ -10,12 +10,12 @@ class SECOND(nn.Module): ...@@ -10,12 +10,12 @@ class SECOND(nn.Module):
"""Backbone network for SECOND/PointPillars/PartA2/MVXNet. """Backbone network for SECOND/PointPillars/PartA2/MVXNet.
Args: Args:
in_channels (int): Input channels in_channels (int): Input channels.
out_channels (list[int]): Output channels for multi-scale feature maps out_channels (list[int]): Output channels for multi-scale feature maps.
layer_nums (list[int]): Number of layers in each stage layer_nums (list[int]): Number of layers in each stage.
layer_strides (list[int]): Strides of each stage layer_strides (list[int]): Strides of each stage.
norm_cfg (dict): Config dict of normalization layers norm_cfg (dict): Config dict of normalization layers.
conv_cfg (dict): Config dict of convolutional layers conv_cfg (dict): Config dict of convolutional layers.
""" """
def __init__(self, def __init__(self,
......
...@@ -169,11 +169,11 @@ class Anchor3DHead(nn.Module, AnchorTrainMixin): ...@@ -169,11 +169,11 @@ class Anchor3DHead(nn.Module, AnchorTrainMixin):
Args: Args:
featmap_sizes (list[tuple]): Multi-level feature map sizes. featmap_sizes (list[tuple]): Multi-level feature map sizes.
input_metas (list[dict]): contain pcd and img's meta info. input_metas (list[dict]): contain pcd and img's meta info.
device (str): device of current module device (str): device of current module.
Returns: Returns:
list[list[torch.Tensor]]: anchors of each image, valid flags \ list[list[torch.Tensor]]: Anchors of each image, valid flags \
of each image of each image.
""" """
num_imgs = len(input_metas) num_imgs = len(input_metas)
# since feature map sizes of all images are the same, we only compute # since feature map sizes of all images are the same, we only compute
...@@ -202,7 +202,7 @@ class Anchor3DHead(nn.Module, AnchorTrainMixin): ...@@ -202,7 +202,7 @@ class Anchor3DHead(nn.Module, AnchorTrainMixin):
num_total_samples (int): The number of valid samples. num_total_samples (int): The number of valid samples.
Returns: Returns:
tuple[torch.Tensor]: losses of class, bbox \ tuple[torch.Tensor]: Losses of class, bbox \
and direction, respectively. and direction, respectively.
""" """
# classification loss # classification loss
...@@ -357,10 +357,10 @@ class Anchor3DHead(nn.Module, AnchorTrainMixin): ...@@ -357,10 +357,10 @@ class Anchor3DHead(nn.Module, AnchorTrainMixin):
class predictions. class predictions.
input_metas (list[dict]): Contain pcd and img's meta info. input_metas (list[dict]): Contain pcd and img's meta info.
cfg (None | :obj:`ConfigDict`): Training or testing config. cfg (None | :obj:`ConfigDict`): Training or testing config.
rescale (list[torch.Tensor]): whether th rescale bbox. rescale (list[torch.Tensor]): Whether th rescale bbox.
Returns: Returns:
list[tuple]: prediction resultes of batches. list[tuple]: Prediction resultes of batches.
""" """
assert len(cls_scores) == len(bbox_preds) assert len(cls_scores) == len(bbox_preds)
assert len(cls_scores) == len(dir_cls_preds) assert len(cls_scores) == len(dir_cls_preds)
......
...@@ -9,7 +9,7 @@ from .train_mixins import get_direction_target ...@@ -9,7 +9,7 @@ from .train_mixins import get_direction_target
@HEADS.register_module() @HEADS.register_module()
class FreeAnchor3DHead(Anchor3DHead): class FreeAnchor3DHead(Anchor3DHead):
"""`FreeAnchor <https://arxiv.org/abs/1909.02466>`_ head for 3D detection. r"""`FreeAnchor <https://arxiv.org/abs/1909.02466>`_ head for 3D detection.
Note: Note:
This implementation is directly modified from the `mmdet implementation This implementation is directly modified from the `mmdet implementation
......
...@@ -27,8 +27,8 @@ class PartA2RPNHead(Anchor3DHead): ...@@ -27,8 +27,8 @@ class PartA2RPNHead(Anchor3DHead):
Args: Args:
num_classes (int): Number of classes. num_classes (int): Number of classes.
in_channels (int): Number of channels in the input feature map. in_channels (int): Number of channels in the input feature map.
train_cfg (dict): train configs train_cfg (dict): Train configs.
test_cfg (dict): test configs test_cfg (dict): Test configs.
feat_channels (int): Number of channels of the feature map. feat_channels (int): Number of channels of the feature map.
use_direction_classifier (bool): Whether to add a direction classifier. use_direction_classifier (bool): Whether to add a direction classifier.
anchor_generator(dict): Config dict of anchor generator. anchor_generator(dict): Config dict of anchor generator.
...@@ -96,10 +96,10 @@ class PartA2RPNHead(Anchor3DHead): ...@@ -96,10 +96,10 @@ class PartA2RPNHead(Anchor3DHead):
bbox_preds (list[torch.Tensor]): Multi-level bbox predictions. bbox_preds (list[torch.Tensor]): Multi-level bbox predictions.
dir_cls_preds (list[torch.Tensor]): Multi-level direction dir_cls_preds (list[torch.Tensor]): Multi-level direction
class predictions. class predictions.
gt_bboxes (list[:obj:`BaseInstance3DBoxes`]): Gt bboxes gt_bboxes (list[:obj:`BaseInstance3DBoxes`]): Ground truth boxes \
of each sample. of each sample.
gt_labels (list[torch.Tensor]): Gt labels of each sample. gt_labels (list[torch.Tensor]): Labels of each sample.
input_metas (list[dict]): Contain pcd and img's meta info. input_metas (list[dict]): Point cloud and image's meta info.
gt_bboxes_ignore (None | list[torch.Tensor]): Specify gt_bboxes_ignore (None | list[torch.Tensor]): Specify
which bounding. which bounding.
......
...@@ -120,8 +120,8 @@ class VoteHead(nn.Module): ...@@ -120,8 +120,8 @@ class VoteHead(nn.Module):
4. Decode predictions. 4. Decode predictions.
Args: Args:
feat_dict (dict): feature dict from backbone. feat_dict (dict): Feature dict from backbone.
sample_mod (str): sample mode for vote aggregation layer. sample_mod (str): Sample mode for vote aggregation layer.
valid modes are "vote", "seed" and "random". valid modes are "vote", "seed" and "random".
Returns: Returns:
...@@ -189,9 +189,9 @@ class VoteHead(nn.Module): ...@@ -189,9 +189,9 @@ class VoteHead(nn.Module):
Args: Args:
bbox_preds (dict): Predictions from forward of vote head. bbox_preds (dict): Predictions from forward of vote head.
points (list[torch.Tensor]): Input points. points (list[torch.Tensor]): Input points.
gt_bboxes_3d (list[:obj:`BaseInstance3DBoxes`]): Gt bboxes gt_bboxes_3d (list[:obj:`BaseInstance3DBoxes`]): Ground truth \
of each sample. bboxes of each sample.
gt_labels_3d (list[torch.Tensor]): Gt labels of each sample. gt_labels_3d (list[torch.Tensor]): Labels of each sample.
pts_semantic_mask (None | list[torch.Tensor]): Point-wise pts_semantic_mask (None | list[torch.Tensor]): Point-wise
semantic mask. semantic mask.
pts_instance_mask (None | list[torch.Tensor]): Point-wise pts_instance_mask (None | list[torch.Tensor]): Point-wise
...@@ -296,14 +296,14 @@ class VoteHead(nn.Module): ...@@ -296,14 +296,14 @@ class VoteHead(nn.Module):
Args: Args:
points (list[torch.Tensor]): Points of each batch. points (list[torch.Tensor]): Points of each batch.
gt_bboxes_3d (list[:obj:`BaseInstance3DBoxes`]): gt bboxes of gt_bboxes_3d (list[:obj:`BaseInstance3DBoxes`]): Ground truth \
each batch. bboxes of each batch.
gt_labels_3d (list[torch.Tensor]): gt class labels of each batch. gt_labels_3d (list[torch.Tensor]): Labels of each batch.
pts_semantic_mask (None | list[torch.Tensor]): point-wise semantic pts_semantic_mask (None | list[torch.Tensor]): Point-wise semantic
label of each batch. label of each batch.
pts_instance_mask (None | list[torch.Tensor]): point-wise instance pts_instance_mask (None | list[torch.Tensor]): Point-wise instance
label of each batch. label of each batch.
bbox_preds (torch.Tensor): Bbox predictions of vote head. bbox_preds (torch.Tensor): Bounding box predictions of vote head.
Returns: Returns:
tuple[torch.Tensor]: Targets of vote head. tuple[torch.Tensor]: Targets of vote head.
...@@ -382,11 +382,12 @@ class VoteHead(nn.Module): ...@@ -382,11 +382,12 @@ class VoteHead(nn.Module):
Args: Args:
points (torch.Tensor): Points of each batch. points (torch.Tensor): Points of each batch.
gt_bboxes_3d (:obj:`BaseInstance3DBoxes`): gt bboxes of each batch. gt_bboxes_3d (:obj:`BaseInstance3DBoxes`): Ground truth \
gt_labels_3d (torch.Tensor): gt class labels of each batch. boxes of each batch.
pts_semantic_mask (None | torch.Tensor): point-wise semantic gt_labels_3d (torch.Tensor): Labels of each batch.
pts_semantic_mask (None | torch.Tensor): Point-wise semantic
label of each batch. label of each batch.
pts_instance_mask (None | torch.Tensor): point-wise instance pts_instance_mask (None | torch.Tensor): Point-wise instance
label of each batch. label of each batch.
aggregated_points (torch.Tensor): Aggregated points from aggregated_points (torch.Tensor): Aggregated points from
vote aggregation layer. vote aggregation layer.
...@@ -496,11 +497,11 @@ class VoteHead(nn.Module): ...@@ -496,11 +497,11 @@ class VoteHead(nn.Module):
Args: Args:
points (torch.Tensor): Input points. points (torch.Tensor): Input points.
bbox_preds (dict): Predictions from vote head. bbox_preds (dict): Predictions from vote head.
input_metas (list[dict]): Contain pcd and img's meta info. input_metas (list[dict]): Point cloud and image's meta info.
rescale (bool): Whether to rescale bboxes. rescale (bool): Whether to rescale bboxes.
Returns: Returns:
list[tuple[torch.Tensor]]: Contain bbox, scores and labels. list[tuple[torch.Tensor]]: Bounding boxes, scores and labels.
""" """
# decode boxes # decode boxes
obj_scores = F.softmax(bbox_preds['obj_scores'], dim=-1)[..., -1] obj_scores = F.softmax(bbox_preds['obj_scores'], dim=-1)[..., -1]
...@@ -523,17 +524,17 @@ class VoteHead(nn.Module): ...@@ -523,17 +524,17 @@ class VoteHead(nn.Module):
def multiclass_nms_single(self, obj_scores, sem_scores, bbox, points, def multiclass_nms_single(self, obj_scores, sem_scores, bbox, points,
input_meta): input_meta):
"""multi-class nms in single batch. """Multi-class nms in single batch.
Args: Args:
obj_scores (torch.Tensor): Objectness score of bboxes. obj_scores (torch.Tensor): Objectness score of bounding boxes.
sem_scores (torch.Tensor): semantic class score of bboxes. sem_scores (torch.Tensor): semantic class score of bounding boxes.
bbox (torch.Tensor): Predicted bbox. bbox (torch.Tensor): Predicted bounding boxes.
points (torch.Tensor): Input points. points (torch.Tensor): Input points.
input_meta (dict): Contain pcd and img's meta info. input_meta (dict): Point cloud and image's meta info.
Returns: Returns:
tuple[torch.Tensor]: Contain bbox, scores and labels. tuple[torch.Tensor]: Bounding boxes, scores and labels.
""" """
bbox = input_meta['box_type_3d']( bbox = input_meta['box_type_3d'](
bbox, bbox,
......
...@@ -26,23 +26,24 @@ def point_sample( ...@@ -26,23 +26,24 @@ def point_sample(
"""Obtain image features using points. """Obtain image features using points.
Args: Args:
img_features (Tensor): 1xCxHxW image features img_features (torch.Tensor): 1 x C x H x W image features.
points (Tensor): Nx3 point cloud in LiDAR coordinates points (torch.Tensor): Nx3 point cloud in LiDAR coordinates.
lidar2img_rt (Tensor): 4x4 transformation matrix lidar2img_rt (torch.Tensor): 4x4 transformation matrix.
pcd_rotate_mat (Tensor): 3x3 rotation matrix of points pcd_rotate_mat (torch.Tensor): 3x3 rotation matrix of points
during augmentation during augmentation.
img_scale_factor (Tensor): (w_scale, h_scale) img_scale_factor (torch.Tensor): Scale factor with shape of \
img_crop_offset (Tensor): (w_offset, h_offset) offset used to crop (w_scale, h_scale).
image during data augmentation img_crop_offset (torch.Tensor): Crop offset used to crop \
pcd_trans_factor ([type]): Translation of points in augmentation image during data augmentation with shape of (w_offset, h_offset).
pcd_scale_factor (float): Scale factor of points during pcd_trans_factor ([type]): Translation of points in augmentation.
pcd_scale_factor (float): Scale factor of points during.
data augmentation data augmentation
pcd_flip (bool): Whether the points are flipped. pcd_flip (bool): Whether the points are flipped.
img_flip (bool): Whether the image is flipped. img_flip (bool): Whether the image is flipped.
img_pad_shape (tuple[int]): int tuple indicates the h & w after img_pad_shape (tuple[int]): int tuple indicates the h & w after
padding, this is necessary to obtain features in feature map padding, this is necessary to obtain features in feature map.
img_shape (tuple[int]): int tuple indicates the h & w before padding img_shape (tuple[int]): int tuple indicates the h & w before padding
after scaling, this is necessary for flipping coordinates after scaling, this is necessary for flipping coordinates.
aligned (bool, optional): Whether use bilinear interpolation when aligned (bool, optional): Whether use bilinear interpolation when
sampling image features for each point. Defaults to True. sampling image features for each point. Defaults to True.
padding_mode (str, optional): Padding mode when padding values for padding_mode (str, optional): Padding mode when padding values for
...@@ -51,7 +52,7 @@ def point_sample( ...@@ -51,7 +52,7 @@ def point_sample(
sampling image features for each point. Defaults to True. sampling image features for each point. Defaults to True.
Returns: Returns:
(Tensor): NxC image features sampled by point coordinates torch.Tensor: NxC image features sampled by point coordinates.
""" """
# aug order: flip -> trans -> scale -> rot # aug order: flip -> trans -> scale -> rot
# The transformation follows the augmentation order in data pipeline # The transformation follows the augmentation order in data pipeline
...@@ -229,14 +230,14 @@ class PointFusion(nn.Module): ...@@ -229,14 +230,14 @@ class PointFusion(nn.Module):
"""Forward function. """Forward function.
Args: Args:
img_feats (list[torch.Tensor]): img features img_feats (list[torch.Tensor]): Image features.
pts: [list[torch.Tensor]]: a batch of points with shape Nx3 pts: [list[torch.Tensor]]: A batch of points with shape N x 3.
pts_feats (torch.Tensor): a tensor consist of point features of the pts_feats (torch.Tensor): A tensor consist of point features of the
total batch total batch.
img_metas (list[dict]): meta information of images img_metas (list[dict]): Meta information of images.
Returns: Returns:
torch.Tensor: fused features of each point. torch.Tensor: Fused features of each point.
""" """
img_pts = self.obtain_mlvl_feats(img_feats, pts, img_metas) img_pts = self.obtain_mlvl_feats(img_feats, pts, img_metas)
img_pre_fuse = self.img_transform(img_pts) img_pre_fuse = self.img_transform(img_pts)
......
...@@ -10,14 +10,14 @@ class SparseEncoder(nn.Module): ...@@ -10,14 +10,14 @@ class SparseEncoder(nn.Module):
r"""Sparse encoder for SECOND and Part-A2. r"""Sparse encoder for SECOND and Part-A2.
Args: Args:
in_channels (int): the number of input channels in_channels (int): The number of input channels.
sparse_shape (list[int]): the sparse shape of input tensor sparse_shape (list[int]): The sparse shape of input tensor.
norm_cfg (dict): config of normalization layer norm_cfg (dict): Config of normalization layer.
base_channels (int): out channels for conv_input layer base_channels (int): Out channels for conv_input layer.
output_channels (int): out channels for conv_out layer output_channels (int): Out channels for conv_out layer.
encoder_channels (tuple[tuple[int]]): encoder_channels (tuple[tuple[int]]):
conv channels of each encode block Convolutional channels of each encode block.
encoder_paddings (tuple[tuple[int]]): paddings of each encode block encoder_paddings (tuple[tuple[int]]): Paddings of each encode block.
""" """
def __init__(self, def __init__(self,
...@@ -82,12 +82,13 @@ class SparseEncoder(nn.Module): ...@@ -82,12 +82,13 @@ class SparseEncoder(nn.Module):
"""Forward of SparseEncoder. """Forward of SparseEncoder.
Args: Args:
voxel_features (torch.float32): shape [N, C] voxel_features (torch.float32): Voxel features in shape (N, C).
coors (torch.int32): shape [N, 4](batch_idx, z_idx, y_idx, x_idx) coors (torch.int32): Coordinates in shape (N, 4), \
batch_size (int): batch size the columns in the order of (batch_idx, z_idx, y_idx, x_idx).
batch_size (int): Batch size.
Returns: Returns:
dict: backbone features dict: Backbone features.
""" """
coors = coors.int() coors = coors.int()
input_sp_tensor = spconv.SparseConvTensor(voxel_features, coors, input_sp_tensor = spconv.SparseConvTensor(voxel_features, coors,
...@@ -114,12 +115,12 @@ class SparseEncoder(nn.Module): ...@@ -114,12 +115,12 @@ class SparseEncoder(nn.Module):
"""make encoder layers using sparse convs. """make encoder layers using sparse convs.
Args: Args:
make_block (method): a bounded function to build blocks make_block (method): A bounded function to build blocks.
norm_cfg (dict[str]): config of normalization layer norm_cfg (dict[str]): Config of normalization layer.
in_channels (int): the number of encoder input channels in_channels (int): The number of encoder input channels.
Returns: Returns:
int: the number of encoder output channels int: The number of encoder output channels.
""" """
self.encoder_layers = spconv.SparseSequential() self.encoder_layers = spconv.SparseSequential()
......
...@@ -13,17 +13,17 @@ class SparseUNet(nn.Module): ...@@ -13,17 +13,17 @@ class SparseUNet(nn.Module):
See the `paper <https://arxiv.org/abs/1907.03670>`_ for more detials. See the `paper <https://arxiv.org/abs/1907.03670>`_ for more detials.
Args: Args:
in_channels (int): the number of input channels in_channels (int): The number of input channels.
sparse_shape (list[int]): the sparse shape of input tensor sparse_shape (list[int]): The sparse shape of input tensor.
norm_cfg (dict): config of normalization layer norm_cfg (dict): Config of normalization layer.
base_channels (int): out channels for conv_input layer base_channels (int): Out channels for conv_input layer.
output_channels (int): out channels for conv_out layer output_channels (int): Out channels for conv_out layer.
encoder_channels (tuple[tuple[int]]): encoder_channels (tuple[tuple[int]]):
conv channels of each encode block Convolutional channels of each encode block.
encoder_paddings (tuple[tuple[int]]): paddings of each encode block encoder_paddings (tuple[tuple[int]]): Paddings of each encode block.
decoder_channels (tuple[tuple[int]]): decoder_channels (tuple[tuple[int]]):
conv channels of each decode block Convolutional channels of each decode block.
decoder_paddings (tuple[tuple[int]]): paddings of each decode block decoder_paddings (tuple[tuple[int]]): Paddings of each decode block.
""" """
def __init__(self, def __init__(self,
...@@ -194,7 +194,7 @@ class SparseUNet(nn.Module): ...@@ -194,7 +194,7 @@ class SparseUNet(nn.Module):
in_channels (int): The number of encoder input channels. in_channels (int): The number of encoder input channels.
Returns: Returns:
int: the number of encoder output channels. int: The number of encoder output channels.
""" """
self.encoder_layers = spconv.SparseSequential() self.encoder_layers = spconv.SparseSequential()
......
...@@ -76,7 +76,7 @@ class Base3DRoIHead(nn.Module, metaclass=ABCMeta): ...@@ -76,7 +76,7 @@ class Base3DRoIHead(nn.Module, metaclass=ABCMeta):
Ground truth boxes to be ignored. Ground truth boxes to be ignored.
Returns: Returns:
dict[str, torch.Tensor]: losses from each head. dict[str, torch.Tensor]: Losses from each head.
""" """
pass pass
......
...@@ -290,7 +290,7 @@ class PartA2BboxHead(nn.Module): ...@@ -290,7 +290,7 @@ class PartA2BboxHead(nn.Module):
rois (torch.Tensor): Roi bboxes. rois (torch.Tensor): Roi bboxes.
labels (torch.Tensor): Labels of class. labels (torch.Tensor): Labels of class.
bbox_targets (torch.Tensor): Target of positive bboxes. bbox_targets (torch.Tensor): Target of positive bboxes.
pos_gt_bboxes (torch.Tensor): Gt of positive bboxes. pos_gt_bboxes (torch.Tensor): Ground truths of positive bboxes.
reg_mask (torch.Tensor): Mask for positive bboxes. reg_mask (torch.Tensor): Mask for positive bboxes.
label_weights (torch.Tensor): Weights of class loss. label_weights (torch.Tensor): Weights of class loss.
bbox_weights (torch.Tensor): Weights of bbox loss. bbox_weights (torch.Tensor): Weights of bbox loss.
...@@ -298,9 +298,9 @@ class PartA2BboxHead(nn.Module): ...@@ -298,9 +298,9 @@ class PartA2BboxHead(nn.Module):
Returns: Returns:
dict: Computed losses. dict: Computed losses.
- loss_cls (torch.Tensor): loss of classes. - loss_cls (torch.Tensor): Loss of classes.
- loss_bbox (torch.Tensor): loss of bboxes. - loss_bbox (torch.Tensor): Loss of bboxes.
- loss_corner (torch.Tensor): loss of corners. - loss_corner (torch.Tensor): Loss of corners.
""" """
losses = dict() losses = dict()
rcnn_batch_size = cls_score.shape[0] rcnn_batch_size = cls_score.shape[0]
...@@ -463,11 +463,11 @@ class PartA2BboxHead(nn.Module): ...@@ -463,11 +463,11 @@ class PartA2BboxHead(nn.Module):
"""Calculate corner loss of given boxes. """Calculate corner loss of given boxes.
Args: Args:
pred_bbox3d (torch.FloatTensor): predicted boxes with shape (N, 7). pred_bbox3d (torch.FloatTensor): Predicted boxes in shape (N, 7).
gt_bbox3d (torch.FloatTensor): gt boxes with shape (N, 7). gt_bbox3d (torch.FloatTensor): Ground truth boxes in shape (N, 7).
Returns: Returns:
torch.FloatTensor: Calculated corner loss with shape (N). torch.FloatTensor: Calculated corner loss in shape (N).
""" """
assert pred_bbox3d.shape[0] == gt_bbox3d.shape[0] assert pred_bbox3d.shape[0] == gt_bbox3d.shape[0]
...@@ -505,12 +505,12 @@ class PartA2BboxHead(nn.Module): ...@@ -505,12 +505,12 @@ class PartA2BboxHead(nn.Module):
"""Generate bboxes from bbox head predictions. """Generate bboxes from bbox head predictions.
Args: Args:
rois (torch.Tensor): Roi bboxes. rois (torch.Tensor): Roi bounding boxes.
cls_score (torch.Tensor): Scores of bboxes. cls_score (torch.Tensor): Scores of bounding boxes.
bbox_pred (torch.Tensor): Bbox predictions bbox_pred (torch.Tensor): Bounding boxes predictions
class_labels (torch.Tensor): Label of classes class_labels (torch.Tensor): Label of classes
class_pred (torch.Tensor): Score for nms. class_pred (torch.Tensor): Score for nms.
img_metas (list[dict]): Contain pcd and img's meta info. img_metas (list[dict]): Point cloud and image's meta info.
cfg (:obj:`ConfigDict`): Testing config. cfg (:obj:`ConfigDict`): Testing config.
Returns: Returns:
......
...@@ -16,9 +16,9 @@ class PointwiseSemanticHead(nn.Module): ...@@ -16,9 +16,9 @@ class PointwiseSemanticHead(nn.Module):
See `paper <https://arxiv.org/abs/1907.03670>`_ for more detials. See `paper <https://arxiv.org/abs/1907.03670>`_ for more detials.
Args: Args:
in_channels (int): the number of input channel. in_channels (int): The number of input channel.
num_classes (int): the number of class. num_classes (int): The number of class.
extra_width (float): boxes enlarge width. extra_width (float): Boxes enlarge width.
loss_seg (dict): Config of segmentation loss. loss_seg (dict): Config of segmentation loss.
loss_part (dict): Config of part prediction loss. loss_part (dict): Config of part prediction loss.
""" """
...@@ -56,11 +56,11 @@ class PointwiseSemanticHead(nn.Module): ...@@ -56,11 +56,11 @@ class PointwiseSemanticHead(nn.Module):
x (torch.Tensor): Features from the first stage. x (torch.Tensor): Features from the first stage.
Returns: Returns:
dict: part features, segmentation and part predictions. dict: Part features, segmentation and part predictions.
- seg_preds (torch.Tensor): segment predictions - seg_preds (torch.Tensor): Segment predictions.
- part_preds (torch.Tensor): part predictions - part_preds (torch.Tensor): Part predictions.
- part_feats (torch.Tensor): feature predictions - part_feats (torch.Tensor): Feature predictions.
""" """
seg_preds = self.seg_cls_layer(x) # (N, 1) seg_preds = self.seg_cls_layer(x) # (N, 1)
part_preds = self.seg_reg_layer(x) # (N, 3) part_preds = self.seg_reg_layer(x) # (N, 3)
...@@ -80,14 +80,15 @@ class PointwiseSemanticHead(nn.Module): ...@@ -80,14 +80,15 @@ class PointwiseSemanticHead(nn.Module):
sample. sample.
Args: Args:
voxel_centers (torch.Tensor): shape [voxel_num, 3], voxel_centers (torch.Tensor): The center of voxels in shape \
the center of voxels (voxel_num, 3).
gt_bboxes_3d (:obj:`BaseInstance3DBoxes`): gt boxes with tensor gt_bboxes_3d (:obj:`BaseInstance3DBoxes`): Ground truth boxes in \
of shape [box_num, 7]. shape (box_num, 7).
gt_labels_3d (torch.Tensor): shape [box_num], class label of gt gt_labels_3d (torch.Tensor): Class labels of ground truths in \
shape (box_num).
Returns: Returns:
tuple[torch.Tensor]: segmentation targets with shape [voxel_num] \ tuple[torch.Tensor]: Segmentation targets with shape [voxel_num] \
part prediction targets with shape [voxel_num, 3] part prediction targets with shape [voxel_num, 3]
""" """
gt_bboxes_3d = gt_bboxes_3d.to(voxel_centers.device) gt_bboxes_3d = gt_bboxes_3d.to(voxel_centers.device)
...@@ -127,14 +128,15 @@ class PointwiseSemanticHead(nn.Module): ...@@ -127,14 +128,15 @@ class PointwiseSemanticHead(nn.Module):
"""generate segmentation and part prediction targets. """generate segmentation and part prediction targets.
Args: Args:
voxel_centers (torch.Tensor): shape [voxel_num, 3], voxel_centers (torch.Tensor): The center of voxels in shape \
the center of voxels (voxel_num, 3).
gt_bboxes_3d (list[:obj:`BaseInstance3DBoxes`]): list of gt boxes gt_bboxes_3d (:obj:`BaseInstance3DBoxes`): Ground truth boxes in \
containing tensor of shape [box_num, 7]. shape (box_num, 7).
gt_labels_3d (list[torch.Tensor]): list of GT labels. gt_labels_3d (torch.Tensor): Class labels of ground truths in \
shape (box_num).
Returns: Returns:
dict: prediction targets dict: Prediction targets
- seg_targets (torch.Tensor): Segmentation targets \ - seg_targets (torch.Tensor): Segmentation targets \
with shape [voxel_num]. with shape [voxel_num].
...@@ -160,19 +162,19 @@ class PointwiseSemanticHead(nn.Module): ...@@ -160,19 +162,19 @@ class PointwiseSemanticHead(nn.Module):
Args: Args:
semantic_results (dict): Results from semantic head. semantic_results (dict): Results from semantic head.
- seg_preds: segmentation predictions - seg_preds: Segmentation predictions.
- part_preds: part predictions - part_preds: Part predictions.
semantic_targets (dict): Targets of semantic results. semantic_targets (dict): Targets of semantic results.
- seg_preds: segmentation targets - seg_preds: Segmentation targets.
- part_preds: part targets - part_preds: Part targets.
Returns: Returns:
dict: loss of segmentation and part prediction. dict: Loss of segmentation and part prediction.
- loss_seg (torch.Tensor): segmentation prediction loss - loss_seg (torch.Tensor): Segmentation prediction loss.
- loss_part (torch.Tensor): part prediction loss - loss_part (torch.Tensor): Part prediction loss.
""" """
seg_preds = semantic_results['seg_preds'] seg_preds = semantic_results['seg_preds']
part_preds = semantic_results['part_preds'] part_preds = semantic_results['part_preds']
......
...@@ -86,6 +86,7 @@ class PartAggregationROIHead(Base3DRoIHead): ...@@ -86,6 +86,7 @@ class PartAggregationROIHead(Base3DRoIHead):
img_metas (list[dict]): Meta info of each image. img_metas (list[dict]): Meta info of each image.
proposal_list (list[dict]): Proposal information from rpn. proposal_list (list[dict]): Proposal information from rpn.
The dictionary should contain the following keys: The dictionary should contain the following keys:
- boxes_3d (:obj:`BaseInstance3DBoxes`): Proposal bboxes - boxes_3d (:obj:`BaseInstance3DBoxes`): Proposal bboxes
- labels_3d (torch.Tensor): Labels of proposals - labels_3d (torch.Tensor): Labels of proposals
- cls_preds (torch.Tensor): Original scores of proposals - cls_preds (torch.Tensor): Original scores of proposals
......
...@@ -12,7 +12,7 @@ class Single3DRoIAwareExtractor(nn.Module): ...@@ -12,7 +12,7 @@ class Single3DRoIAwareExtractor(nn.Module):
Extract Point-wise roi features. Extract Point-wise roi features.
Args: Args:
roi_layer (dict): the config of roi layer roi_layer (dict): The config of roi layer.
""" """
def __init__(self, roi_layer=None): def __init__(self, roi_layer=None):
...@@ -32,14 +32,14 @@ class Single3DRoIAwareExtractor(nn.Module): ...@@ -32,14 +32,14 @@ class Single3DRoIAwareExtractor(nn.Module):
"""Extract point-wise roi features. """Extract point-wise roi features.
Args: Args:
feats (torch.FloatTensor): point-wise features with feats (torch.FloatTensor): Point-wise features with
shape (batch, npoints, channels) for pooling shape (batch, npoints, channels) for pooling.
coordinate (torch.FloatTensor): coordinate of each point coordinate (torch.FloatTensor): Coordinate of each point.
batch_inds (torch.LongTensor): indicate the batch of each point batch_inds (torch.LongTensor): Indicate the batch of each point.
rois (torch.FloatTensor): roi boxes with batch indices rois (torch.FloatTensor): Roi boxes with batch indices.
Returns: Returns:
torch.FloatTensor: pooled features torch.FloatTensor: Pooled features
""" """
pooled_roi_feats = [] pooled_roi_feats = []
for batch_idx in range(int(batch_inds.max()) + 1): for batch_idx in range(int(batch_inds.max()) + 1):
......
...@@ -22,7 +22,7 @@ class HardSimpleVFE(nn.Module): ...@@ -22,7 +22,7 @@ class HardSimpleVFE(nn.Module):
"""Forward function. """Forward function.
Args: Args:
features (torch.Tensor): point features in shape features (torch.Tensor): Point features in shape
(N, M, 3(4)). N is the number of voxels and M is the maximum (N, M, 3(4)). N is the number of voxels and M is the maximum
number of points inside a single voxel. number of points inside a single voxel.
num_points (torch.Tensor): Number of points in each voxel, num_points (torch.Tensor): Number of points in each voxel,
...@@ -60,7 +60,7 @@ class DynamicSimpleVFE(nn.Module): ...@@ -60,7 +60,7 @@ class DynamicSimpleVFE(nn.Module):
"""Forward function. """Forward function.
Args: Args:
features (torch.Tensor): point features in shape features (torch.Tensor): Point features in shape
(N, 3(4)). N is the number of points. (N, 3(4)). N is the number of points.
coors (torch.Tensor): Coordinates of voxels. coors (torch.Tensor): Coordinates of voxels.
......
Markdown is supported
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment