Merge branch 'update_docstrings' into 'master'

Refine docstrings See merge request open-mmlab/mmdet.3d!142

Merge branch 'update_docstrings' into 'master'
Refine docstrings See merge request open-mmlab/mmdet.3d!142
3c1eb23f · zhangwenwei · cfc3f545 · 21cb2aa6 · 3c1eb23f · 3c1eb23f
Commit 3c1eb23f authored Jul 09, 2020 by zhangwenwei
19 changed files
--- a/mmdet3d/datasets/nuscenes_dataset.py
+++ b/mmdet3d/datasets/nuscenes_dataset.py
@@ -13,7 +13,7 @@ from .custom_3d import Custom3DDataset

 @DATASETS.register_module()
 class NuScenesDataset(Custom3DDataset):
-    """NuScenes Dataset.
+    r"""NuScenes Dataset.

    This class serves as the API for experiments on the NuScenes Dataset.

@@ -38,9 +38,9 @@ class NuScenesDataset(Custom3DDataset):
            to its original format then converted them to `box_type_3d`.
            Defaults to 'LiDAR' in this dataset. Available options includes

-            - 'LiDAR': box in LiDAR coordinates
-            - 'Depth': box in depth coordinates, usually for indoor dataset
-            - 'Camera': box in camera coordinates
+            - 'LiDAR': Box in LiDAR coordinates.
+            - 'Depth': Box in depth coordinates, usually for indoor dataset.
+            - 'Camera': Box in camera coordinates.
        filter_empty_gt (bool, optional): Whether to filter empty GT.
            Defaults to True.
        test_mode (bool, optional): Whether the dataset is in test mode.
@@ -163,14 +163,14 @@ class NuScenesDataset(Custom3DDataset):
            dict: Data information that will be passed to the data \
                preprocessing pipelines. It includes the following keys:

-                - sample_idx (str): sample index
-                - pts_filename (str): filename of point clouds
-                - sweeps (list[dict]): infos of sweeps
-                - timestamp (float): sample timestamp
-                - img_filename (str, optional): image filename
-                - lidar2img (list[np.ndarray], optional): transformations \
-                    from lidar to different cameras
-                - ann_info (dict): annotation info
+                - sample_idx (str): Sample index.
+                - pts_filename (str): Filename of point clouds.
+                - sweeps (list[dict]): Infos of sweeps.
+                - timestamp (float): Sample timestamp.
+                - img_filename (str, optional): Image filename.
+                - lidar2img (list[np.ndarray], optional): Transformations \
+                    from lidar to different cameras.
+                - ann_info (dict): Annotation info.
        """
        info = self.data_infos[index]

@@ -219,12 +219,12 @@ class NuScenesDataset(Custom3DDataset):
            index (int): Index of the annotation data to get.

        Returns:
-            dict: annotation information consists of the following keys:
+            dict: Annotation information consists of the following keys:

                - gt_bboxes_3d (:obj:`LiDARInstance3DBoxes`): \
                    3D ground truth bboxes
-                - gt_labels_3d (np.ndarray): labels of ground truths
-                - gt_names (list[str]): class names of ground truths
+                - gt_labels_3d (np.ndarray): Labels of ground truths.
+                - gt_names (list[str]): Class names of ground truths.
        """
        info = self.data_infos[index]
        # filter out bbox containing no points
@@ -442,7 +442,7 @@ class NuScenesDataset(Custom3DDataset):
                Default: None.

        Returns:
-            dict[str, float]: results of each evaluation metric
+            dict[str, float]: Results of each evaluation metric.
        """
        result_files, tmp_dir = self.format_results(results, jsonfile_prefix)

@@ -496,9 +496,9 @@ def output_to_nusc_box(detection):
    Args:
        detection (dict): Detection results.

-            - boxes_3d (:obj:`BaseInstance3DBoxes`): detection bbox
-            - scores_3d (torch.Tensor): detection scores
-            - labels_3d (torch.Tensor): predicted box labels
+            - boxes_3d (:obj:`BaseInstance3DBoxes`): Detection bbox.
+            - scores_3d (torch.Tensor): Detection scores.
+            - labels_3d (torch.Tensor): Predicted box labels.

    Returns:
        list[:obj:`NuScenesBox`]: List of standard NuScenesBoxes.

--- a/mmdet3d/datasets/scannet_dataset.py
+++ b/mmdet3d/datasets/scannet_dataset.py
@@ -9,7 +9,7 @@ from .custom_3d import Custom3DDataset

 @DATASETS.register_module()
 class ScanNetDataset(Custom3DDataset):
-    """ScanNet Dataset.
+    r"""ScanNet Dataset.

    This class serves as the API for experiments on the ScanNet Dataset.

@@ -30,9 +30,9 @@ class ScanNetDataset(Custom3DDataset):
            to its original format then converted them to `box_type_3d`.
            Defaults to 'Depth' in this dataset. Available options includes

-            - 'LiDAR': box in LiDAR coordinates
-            - 'Depth': box in depth coordinates, usually for indoor dataset
-            - 'Camera': box in camera coordinates
+            - 'LiDAR': Box in LiDAR coordinates.
+            - 'Depth': Box in depth coordinates, usually for indoor dataset.
+            - 'Camera': Box in camera coordinates.
        filter_empty_gt (bool, optional): Whether to filter empty GT.
            Defaults to True.
        test_mode (bool, optional): Whether the dataset is in test mode.
@@ -73,9 +73,9 @@ class ScanNetDataset(Custom3DDataset):

                - gt_bboxes_3d (:obj:`DepthInstance3DBoxes`): \
                    3D ground truth bboxes
-                - gt_labels_3d (np.ndarray): labels of ground truths
-                - pts_instance_mask_path (str): path of instance masks
-                - pts_semantic_mask_path (str): path of semantic masks
+                - gt_labels_3d (np.ndarray): Labels of ground truths.
+                - pts_instance_mask_path (str): Path of instance masks.
+                - pts_semantic_mask_path (str): Path of semantic masks.
        """
        # Use index to get the annos, thus the evalhook could also use this api
        info = self.data_infos[index]

--- a/mmdet3d/datasets/sunrgbd_dataset.py
+++ b/mmdet3d/datasets/sunrgbd_dataset.py
@@ -30,9 +30,9 @@ class SUNRGBDDataset(Custom3DDataset):
            to its original format then converted them to `box_type_3d`.
            Defaults to 'Depth' in this dataset. Available options includes

-            - 'LiDAR': box in LiDAR coordinates
-            - 'Depth': box in depth coordinates, usually for indoor dataset
-            - 'Camera': box in camera coordinates
+            - 'LiDAR': Box in LiDAR coordinates.
+            - 'Depth': Box in depth coordinates, usually for indoor dataset.
+            - 'Camera': Box in camera coordinates.
        filter_empty_gt (bool, optional): Whether to filter empty GT.
            Defaults to True.
        test_mode (bool, optional): Whether the dataset is in test mode.
@@ -71,9 +71,9 @@ class SUNRGBDDataset(Custom3DDataset):

                - gt_bboxes_3d (:obj:`DepthInstance3DBoxes`): \
                    3D ground truth bboxes
-                - gt_labels_3d (np.ndarray): labels of ground truths
-                - pts_instance_mask_path (str): path of instance masks
-                - pts_semantic_mask_path (str): path of semantic masks
+                - gt_labels_3d (np.ndarray): Labels of ground truths.
+                - pts_instance_mask_path (str): Path of instance masks.
+                - pts_semantic_mask_path (str): Path of semantic masks.
        """
        # Use index to get the annos, thus the evalhook could also use this api
        info = self.data_infos[index]

--- a/mmdet3d/models/backbones/nostem_regnet.py
+++ b/mmdet3d/models/backbones/nostem_regnet.py
@@ -10,12 +10,12 @@ class NoStemRegNet(RegNet):

    Args:
        arch (dict): The parameter of RegNets.
-            - w0 (int): initial width
-            - wa (float): slope of width
-            - wm (float): quantization parameter to quantize the width
-            - depth (int): depth of the backbone
-            - group_w (int): width of group
-            - bot_mul (float): bottleneck ratio, i.e. expansion of bottlneck.
+            - w0 (int): Initial width.
+            - wa (float): Slope of width.
+            - wm (float): Quantization parameter to quantize the width.
+            - depth (int): Depth of the backbone.
+            - group_w (int): Width of group.
+            - bot_mul (float): Bottleneck ratio, i.e. expansion of bottlneck.
        strides (Sequence[int]): Strides of the first block of each stage.
        base_channels (int): Base channels after stem layer.
        in_channels (int): Number of input image channels. Normally 3.
@@ -26,13 +26,13 @@ class NoStemRegNet(RegNet):
            the first 1x1 conv layer.
        frozen_stages (int): Stages to be frozen (all param fixed). -1 means
            not freezing any parameters.
-        norm_cfg (dict): dictionary to construct and config norm layer.
+        norm_cfg (dict): Dictionary to construct and config norm layer.
        norm_eval (bool): Whether to set norm layers to eval mode, namely,
            freeze running stats (mean and var). Note: Effect on Batch Norm
            and its variants only.
        with_cp (bool): Use checkpoint or not. Using checkpoint will save some
            memory while slowing down the training speed.
-        zero_init_residual (bool): whether to use zero init for last norm layer
+        zero_init_residual (bool): Whether to use zero init for last norm layer
            in resblocks to let them behave as identity.

    Example:

--- a/mmdet3d/models/backbones/pointnet2_sa_ssg.py
+++ b/mmdet3d/models/backbones/pointnet2_sa_ssg.py
@@ -11,18 +11,18 @@ class PointNet2SASSG(nn.Module):
    """PointNet2 with Single-scale grouping.

    Args:
-        in_channels (int): input channels of point cloud.
-        num_points (tuple[int]): the number of points which each SA
+        in_channels (int): Input channels of point cloud.
+        num_points (tuple[int]): The number of points which each SA
            module samples.
-        radius (tuple[float]): sampling radii of each SA module.
-        num_samples (tuple[int]): the number of samples for ball
+        radius (tuple[float]): Sampling radii of each SA module.
+        num_samples (tuple[int]): The number of samples for ball
            query in each SA module.
-        sa_channels (tuple[tuple[int]]): out channels of each mlp in SA module.
-        fp_channels (tuple[tuple[int]]): out channels of each mlp in FP module.
-        norm_cfg (dict): config of normalization layer.
-        pool_mod (str): pool method ('max' or 'avg') for SA modules.
-        use_xyz (bool): whether to use xyz as a part of features.
-        normalize_xyz (bool): whether to normalize xyz with radii in
+        sa_channels (tuple[tuple[int]]): Out channels of each mlp in SA module.
+        fp_channels (tuple[tuple[int]]): Out channels of each mlp in FP module.
+        norm_cfg (dict): Config of normalization layer.
+        pool_mod (str): Pool method ('max' or 'avg') for SA modules.
+        use_xyz (bool): Whether to use xyz as a part of features.
+        normalize_xyz (bool): Whether to normalize xyz with radii in
            each SA module.
    """

@@ -96,12 +96,12 @@ class PointNet2SASSG(nn.Module):
        """Split coordinates and features of input points.

        Args:
-            points (Tensor): point coordinates with features,
+            points (torch.Tensor): Point coordinates with features,
                with shape (B, N, 3 + input_feature_dim).

        Returns:
-            Tensor: coordinates of input points.
-            Tensor: features of input points.
+            torch.Tensor: Coordinates of input points.
+            torch.Tensor: Features of input points.
        """
        xyz = points[..., 0:3].contiguous()
        if points.size(-1) > 3:
@@ -119,13 +119,13 @@ class PointNet2SASSG(nn.Module):
                with shape (B, N, 3 + input_feature_dim).

        Returns:
-            dict[str, list[torch.Tensor]]: outputs after SA and FP modules.
+            dict[str, list[torch.Tensor]]: Outputs after SA and FP modules.

-                - fp_xyz (list[torch.Tensor]): contains the coordinates of \
+                - fp_xyz (list[torch.Tensor]): The coordinates of \
                    each fp features.
-                - fp_features (list[torch.Tensor]): contains the features \
+                - fp_features (list[torch.Tensor]): The features \
                    from each Feature Propagate Layers.
-                - fp_indices (list[torch.Tensor]): contains indices of the \
+                - fp_indices (list[torch.Tensor]): Indices of the \
                    input points.
        """
        xyz, features = self._split_point_feats(points)

--- a/mmdet3d/models/backbones/second.py
+++ b/mmdet3d/models/backbones/second.py
@@ -10,12 +10,12 @@ class SECOND(nn.Module):
    """Backbone network for SECOND/PointPillars/PartA2/MVXNet.

    Args:
-        in_channels (int): Input channels
-        out_channels (list[int]): Output channels for multi-scale feature maps
-        layer_nums (list[int]): Number of layers in each stage
-        layer_strides (list[int]): Strides of each stage
-        norm_cfg (dict): Config dict of normalization layers
-        conv_cfg (dict): Config dict of convolutional layers
+        in_channels (int): Input channels.
+        out_channels (list[int]): Output channels for multi-scale feature maps.
+        layer_nums (list[int]): Number of layers in each stage.
+        layer_strides (list[int]): Strides of each stage.
+        norm_cfg (dict): Config dict of normalization layers.
+        conv_cfg (dict): Config dict of convolutional layers.
    """

    def __init__(self,

--- a/mmdet3d/models/dense_heads/anchor3d_head.py
+++ b/mmdet3d/models/dense_heads/anchor3d_head.py
@@ -169,11 +169,11 @@ class Anchor3DHead(nn.Module, AnchorTrainMixin):
        Args:
            featmap_sizes (list[tuple]): Multi-level feature map sizes.
            input_metas (list[dict]): contain pcd and img's meta info.
-            device (str): device of current module
+            device (str): device of current module.

        Returns:
-            list[list[torch.Tensor]]: anchors of each image, valid flags \
-                of each image
+            list[list[torch.Tensor]]: Anchors of each image, valid flags \
+                of each image.
        """
        num_imgs = len(input_metas)
        # since feature map sizes of all images are the same, we only compute
@@ -202,7 +202,7 @@ class Anchor3DHead(nn.Module, AnchorTrainMixin):
            num_total_samples (int): The number of valid samples.

        Returns:
-            tuple[torch.Tensor]: losses of class, bbox \
+            tuple[torch.Tensor]: Losses of class, bbox \
                and direction, respectively.
        """
        # classification loss
@@ -357,10 +357,10 @@ class Anchor3DHead(nn.Module, AnchorTrainMixin):
                class predictions.
            input_metas (list[dict]): Contain pcd and img's meta info.
            cfg (None | :obj:`ConfigDict`): Training or testing config.
-            rescale (list[torch.Tensor]): whether th rescale bbox.
+            rescale (list[torch.Tensor]): Whether th rescale bbox.

        Returns:
-            list[tuple]: prediction resultes of batches.
+            list[tuple]: Prediction resultes of batches.
        """
        assert len(cls_scores) == len(bbox_preds)
        assert len(cls_scores) == len(dir_cls_preds)

--- a/mmdet3d/models/dense_heads/free_anchor3d_head.py
+++ b/mmdet3d/models/dense_heads/free_anchor3d_head.py
@@ -9,7 +9,7 @@ from .train_mixins import get_direction_target

 @HEADS.register_module()
 class FreeAnchor3DHead(Anchor3DHead):
-    """`FreeAnchor <https://arxiv.org/abs/1909.02466>`_ head for 3D detection.
+    r"""`FreeAnchor <https://arxiv.org/abs/1909.02466>`_ head for 3D detection.

    Note:
        This implementation is directly modified from the `mmdet implementation

--- a/mmdet3d/models/dense_heads/parta2_rpn_head.py
+++ b/mmdet3d/models/dense_heads/parta2_rpn_head.py
@@ -27,8 +27,8 @@ class PartA2RPNHead(Anchor3DHead):
    Args:
        num_classes (int): Number of classes.
        in_channels (int): Number of channels in the input feature map.
-        train_cfg (dict): train configs
-        test_cfg (dict): test configs
+        train_cfg (dict): Train configs.
+        test_cfg (dict): Test configs.
        feat_channels (int): Number of channels of the feature map.
        use_direction_classifier (bool): Whether to add a direction classifier.
        anchor_generator(dict): Config dict of anchor generator.
@@ -96,10 +96,10 @@ class PartA2RPNHead(Anchor3DHead):
            bbox_preds (list[torch.Tensor]): Multi-level bbox predictions.
            dir_cls_preds (list[torch.Tensor]): Multi-level direction
                class predictions.
-            gt_bboxes (list[:obj:`BaseInstance3DBoxes`]): Gt bboxes
+            gt_bboxes (list[:obj:`BaseInstance3DBoxes`]): Ground truth boxes \
                of each sample.
-            gt_labels (list[torch.Tensor]): Gt labels of each sample.
-            input_metas (list[dict]): Contain pcd and img's meta info.
+            gt_labels (list[torch.Tensor]): Labels of each sample.
+            input_metas (list[dict]): Point cloud and image's meta info.
            gt_bboxes_ignore (None | list[torch.Tensor]): Specify
                which bounding.


--- a/mmdet3d/models/dense_heads/vote_head.py
+++ b/mmdet3d/models/dense_heads/vote_head.py
@@ -120,8 +120,8 @@ class VoteHead(nn.Module):
                4. Decode predictions.

        Args:
-            feat_dict (dict): feature dict from backbone.
-            sample_mod (str): sample mode for vote aggregation layer.
+            feat_dict (dict): Feature dict from backbone.
+            sample_mod (str): Sample mode for vote aggregation layer.
                valid modes are "vote", "seed" and "random".

        Returns:
@@ -189,9 +189,9 @@ class VoteHead(nn.Module):
        Args:
            bbox_preds (dict): Predictions from forward of vote head.
            points (list[torch.Tensor]): Input points.
-            gt_bboxes_3d (list[:obj:`BaseInstance3DBoxes`]): Gt bboxes
-                of each sample.
-            gt_labels_3d (list[torch.Tensor]): Gt labels of each sample.
+            gt_bboxes_3d (list[:obj:`BaseInstance3DBoxes`]): Ground truth \
+                bboxes of each sample.
+            gt_labels_3d (list[torch.Tensor]): Labels of each sample.
            pts_semantic_mask (None | list[torch.Tensor]): Point-wise
                semantic mask.
            pts_instance_mask (None | list[torch.Tensor]): Point-wise
@@ -296,14 +296,14 @@ class VoteHead(nn.Module):

        Args:
            points (list[torch.Tensor]): Points of each batch.
-            gt_bboxes_3d (list[:obj:`BaseInstance3DBoxes`]): gt bboxes of
-                each batch.
-            gt_labels_3d (list[torch.Tensor]): gt class labels of each batch.
-            pts_semantic_mask (None | list[torch.Tensor]): point-wise semantic
+            gt_bboxes_3d (list[:obj:`BaseInstance3DBoxes`]): Ground truth \
+                bboxes of each batch.
+            gt_labels_3d (list[torch.Tensor]): Labels of each batch.
+            pts_semantic_mask (None | list[torch.Tensor]): Point-wise semantic
                label of each batch.
-            pts_instance_mask (None | list[torch.Tensor]): point-wise instance
+            pts_instance_mask (None | list[torch.Tensor]): Point-wise instance
                label of each batch.
-            bbox_preds (torch.Tensor): Bbox predictions of vote head.
+            bbox_preds (torch.Tensor): Bounding box predictions of vote head.

        Returns:
            tuple[torch.Tensor]: Targets of vote head.
@@ -382,11 +382,12 @@ class VoteHead(nn.Module):

        Args:
            points (torch.Tensor): Points of each batch.
-            gt_bboxes_3d (:obj:`BaseInstance3DBoxes`): gt bboxes of each batch.
-            gt_labels_3d (torch.Tensor): gt class labels of each batch.
-            pts_semantic_mask (None | torch.Tensor): point-wise semantic
+            gt_bboxes_3d (:obj:`BaseInstance3DBoxes`): Ground truth \
+                boxes of each batch.
+            gt_labels_3d (torch.Tensor): Labels of each batch.
+            pts_semantic_mask (None | torch.Tensor): Point-wise semantic
                label of each batch.
-            pts_instance_mask (None | torch.Tensor): point-wise instance
+            pts_instance_mask (None | torch.Tensor): Point-wise instance
                label of each batch.
            aggregated_points (torch.Tensor): Aggregated points from
                vote aggregation layer.
@@ -496,11 +497,11 @@ class VoteHead(nn.Module):
        Args:
            points (torch.Tensor): Input points.
            bbox_preds (dict): Predictions from vote head.
-            input_metas (list[dict]): Contain pcd and img's meta info.
+            input_metas (list[dict]): Point cloud and image's meta info.
            rescale (bool): Whether to rescale bboxes.

        Returns:
-            list[tuple[torch.Tensor]]: Contain bbox, scores and labels.
+            list[tuple[torch.Tensor]]: Bounding boxes, scores and labels.
        """
        # decode boxes
        obj_scores = F.softmax(bbox_preds['obj_scores'], dim=-1)[..., -1]
@@ -523,17 +524,17 @@ class VoteHead(nn.Module):

    def multiclass_nms_single(self, obj_scores, sem_scores, bbox, points,
                              input_meta):
-        """multi-class nms in single batch.
+        """Multi-class nms in single batch.

        Args:
-            obj_scores (torch.Tensor): Objectness score of bboxes.
-            sem_scores (torch.Tensor): semantic class score of bboxes.
-            bbox (torch.Tensor): Predicted bbox.
+            obj_scores (torch.Tensor): Objectness score of bounding boxes.
+            sem_scores (torch.Tensor): semantic class score of bounding boxes.
+            bbox (torch.Tensor): Predicted bounding boxes.
            points (torch.Tensor): Input points.
-            input_meta (dict): Contain pcd and img's meta info.
+            input_meta (dict): Point cloud and image's meta info.

        Returns:
-            tuple[torch.Tensor]: Contain bbox, scores and labels.
+            tuple[torch.Tensor]: Bounding boxes, scores and labels.
        """
        bbox = input_meta['box_type_3d'](
            bbox,

--- a/mmdet3d/models/fusion_layers/point_fusion.py
+++ b/mmdet3d/models/fusion_layers/point_fusion.py
@@ -26,23 +26,24 @@ def point_sample(
    """Obtain image features using points.

    Args:
-        img_features (Tensor): 1xCxHxW image features
-        points (Tensor): Nx3 point cloud in LiDAR coordinates
-        lidar2img_rt (Tensor): 4x4 transformation matrix
-        pcd_rotate_mat (Tensor): 3x3 rotation matrix of points
-            during augmentation
-        img_scale_factor (Tensor): (w_scale, h_scale)
-        img_crop_offset (Tensor): (w_offset, h_offset) offset used to crop
-            image during data augmentation
-        pcd_trans_factor ([type]): Translation of points in augmentation
-        pcd_scale_factor (float): Scale factor of points during
+        img_features (torch.Tensor): 1 x C x H x W image features.
+        points (torch.Tensor): Nx3 point cloud in LiDAR coordinates.
+        lidar2img_rt (torch.Tensor): 4x4 transformation matrix.
+        pcd_rotate_mat (torch.Tensor): 3x3 rotation matrix of points
+            during augmentation.
+        img_scale_factor (torch.Tensor): Scale factor with shape of \
+            (w_scale, h_scale).
+        img_crop_offset (torch.Tensor): Crop offset used to crop \
+            image during data augmentation with shape of (w_offset, h_offset).
+        pcd_trans_factor ([type]): Translation of points in augmentation.
+        pcd_scale_factor (float): Scale factor of points during.
            data augmentation
        pcd_flip (bool): Whether the points are flipped.
        img_flip (bool): Whether the image is flipped.
        img_pad_shape (tuple[int]): int tuple indicates the h & w after
-            padding, this is necessary to obtain features in feature map
+            padding, this is necessary to obtain features in feature map.
        img_shape (tuple[int]): int tuple indicates the h & w before padding
-            after scaling, this is necessary for flipping coordinates
+            after scaling, this is necessary for flipping coordinates.
        aligned (bool, optional): Whether use bilinear interpolation when
            sampling image features for each point. Defaults to True.
        padding_mode (str, optional): Padding mode when padding values for
@@ -51,7 +52,7 @@ def point_sample(
            sampling image features for each point. Defaults to True.

    Returns:
-        (Tensor): NxC image features sampled by point coordinates
+        torch.Tensor: NxC image features sampled by point coordinates.
    """
    # aug order: flip -> trans -> scale -> rot
    # The transformation follows the augmentation order in data pipeline
@@ -229,14 +230,14 @@ class PointFusion(nn.Module):
        """Forward function.

        Args:
-            img_feats (list[torch.Tensor]): img features
-            pts: [list[torch.Tensor]]: a batch of points with shape Nx3
-            pts_feats (torch.Tensor): a tensor consist of point features of the
-                total batch
-            img_metas (list[dict]): meta information of images
+            img_feats (list[torch.Tensor]): Image features.
+            pts: [list[torch.Tensor]]: A batch of points with shape N x 3.
+            pts_feats (torch.Tensor): A tensor consist of point features of the
+                total batch.
+            img_metas (list[dict]): Meta information of images.

        Returns:
-            torch.Tensor: fused features of each point.
+            torch.Tensor: Fused features of each point.
        """
        img_pts = self.obtain_mlvl_feats(img_feats, pts, img_metas)
        img_pre_fuse = self.img_transform(img_pts)

--- a/mmdet3d/models/middle_encoders/sparse_encoder.py
+++ b/mmdet3d/models/middle_encoders/sparse_encoder.py
@@ -10,14 +10,14 @@ class SparseEncoder(nn.Module):
    r"""Sparse encoder for SECOND and Part-A2.

    Args:
-        in_channels (int): the number of input channels
-        sparse_shape (list[int]): the sparse shape of input tensor
-        norm_cfg (dict): config of normalization layer
-        base_channels (int): out channels for conv_input layer
-        output_channels (int): out channels for conv_out layer
+        in_channels (int): The number of input channels.
+        sparse_shape (list[int]): The sparse shape of input tensor.
+        norm_cfg (dict): Config of normalization layer.
+        base_channels (int): Out channels for conv_input layer.
+        output_channels (int): Out channels for conv_out layer.
        encoder_channels (tuple[tuple[int]]):
-            conv channels of each encode block
-        encoder_paddings (tuple[tuple[int]]): paddings of each encode block
+            Convolutional channels of each encode block.
+        encoder_paddings (tuple[tuple[int]]): Paddings of each encode block.
    """

    def __init__(self,
@@ -82,12 +82,13 @@ class SparseEncoder(nn.Module):
        """Forward of SparseEncoder.

        Args:
-            voxel_features (torch.float32): shape [N, C]
-            coors (torch.int32): shape [N, 4](batch_idx, z_idx, y_idx, x_idx)
-            batch_size (int): batch size
+            voxel_features (torch.float32): Voxel features in shape (N, C).
+            coors (torch.int32): Coordinates in shape (N, 4), \
+                the columns in the order of (batch_idx, z_idx, y_idx, x_idx).
+            batch_size (int): Batch size.

        Returns:
-            dict: backbone features
+            dict: Backbone features.
        """
        coors = coors.int()
        input_sp_tensor = spconv.SparseConvTensor(voxel_features, coors,
@@ -114,12 +115,12 @@ class SparseEncoder(nn.Module):
        """make encoder layers using sparse convs.

        Args:
-            make_block (method): a bounded function to build blocks
-            norm_cfg (dict[str]): config of normalization layer
-            in_channels (int): the number of encoder input channels
+            make_block (method): A bounded function to build blocks.
+            norm_cfg (dict[str]): Config of normalization layer.
+            in_channels (int): The number of encoder input channels.

        Returns:
-            int: the number of encoder output channels
+            int: The number of encoder output channels.
        """
        self.encoder_layers = spconv.SparseSequential()


--- a/mmdet3d/models/middle_encoders/sparse_unet.py
+++ b/mmdet3d/models/middle_encoders/sparse_unet.py
@@ -13,17 +13,17 @@ class SparseUNet(nn.Module):
    See the `paper <https://arxiv.org/abs/1907.03670>`_ for more detials.

    Args:
-        in_channels (int): the number of input channels
-        sparse_shape (list[int]): the sparse shape of input tensor
-        norm_cfg (dict): config of normalization layer
-        base_channels (int): out channels for conv_input layer
-        output_channels (int): out channels for conv_out layer
+        in_channels (int): The number of input channels.
+        sparse_shape (list[int]): The sparse shape of input tensor.
+        norm_cfg (dict): Config of normalization layer.
+        base_channels (int): Out channels for conv_input layer.
+        output_channels (int): Out channels for conv_out layer.
        encoder_channels (tuple[tuple[int]]):
-            conv channels of each encode block
-        encoder_paddings (tuple[tuple[int]]): paddings of each encode block
+            Convolutional channels of each encode block.
+        encoder_paddings (tuple[tuple[int]]): Paddings of each encode block.
        decoder_channels (tuple[tuple[int]]):
-            conv channels of each decode block
-        decoder_paddings (tuple[tuple[int]]): paddings of each decode block
+            Convolutional channels of each decode block.
+        decoder_paddings (tuple[tuple[int]]): Paddings of each decode block.
    """

    def __init__(self,
@@ -194,7 +194,7 @@ class SparseUNet(nn.Module):
            in_channels (int): The number of encoder input channels.

        Returns:
-            int: the number of encoder output channels.
+            int: The number of encoder output channels.
        """
        self.encoder_layers = spconv.SparseSequential()


--- a/mmdet3d/models/roi_heads/base_3droi_head.py
+++ b/mmdet3d/models/roi_heads/base_3droi_head.py
@@ -76,7 +76,7 @@ class Base3DRoIHead(nn.Module, metaclass=ABCMeta):
                Ground truth boxes to be ignored.

        Returns:
-            dict[str, torch.Tensor]: losses from each head.
+            dict[str, torch.Tensor]: Losses from each head.
        """
        pass


--- a/mmdet3d/models/roi_heads/bbox_heads/parta2_bbox_head.py
+++ b/mmdet3d/models/roi_heads/bbox_heads/parta2_bbox_head.py
@@ -290,7 +290,7 @@ class PartA2BboxHead(nn.Module):
            rois (torch.Tensor): Roi bboxes.
            labels (torch.Tensor): Labels of class.
            bbox_targets (torch.Tensor): Target of positive bboxes.
-            pos_gt_bboxes (torch.Tensor): Gt of positive bboxes.
+            pos_gt_bboxes (torch.Tensor): Ground truths of positive bboxes.
            reg_mask (torch.Tensor): Mask for positive bboxes.
            label_weights (torch.Tensor): Weights of class loss.
            bbox_weights (torch.Tensor): Weights of bbox loss.
@@ -298,9 +298,9 @@ class PartA2BboxHead(nn.Module):
        Returns:
            dict: Computed losses.

-                - loss_cls (torch.Tensor): loss of classes.
-                - loss_bbox (torch.Tensor): loss of bboxes.
-                - loss_corner (torch.Tensor): loss of corners.
+                - loss_cls (torch.Tensor): Loss of classes.
+                - loss_bbox (torch.Tensor): Loss of bboxes.
+                - loss_corner (torch.Tensor): Loss of corners.
        """
        losses = dict()
        rcnn_batch_size = cls_score.shape[0]
@@ -463,11 +463,11 @@ class PartA2BboxHead(nn.Module):
        """Calculate corner loss of given boxes.

        Args:
-            pred_bbox3d (torch.FloatTensor): predicted boxes with shape (N, 7).
-            gt_bbox3d (torch.FloatTensor): gt boxes with shape (N, 7).
+            pred_bbox3d (torch.FloatTensor): Predicted boxes in shape (N, 7).
+            gt_bbox3d (torch.FloatTensor): Ground truth boxes in shape (N, 7).

        Returns:
-            torch.FloatTensor: Calculated corner loss with shape (N).
+            torch.FloatTensor: Calculated corner loss in shape (N).
        """
        assert pred_bbox3d.shape[0] == gt_bbox3d.shape[0]

@@ -505,12 +505,12 @@ class PartA2BboxHead(nn.Module):
        """Generate bboxes from bbox head predictions.

        Args:
-            rois (torch.Tensor): Roi bboxes.
-            cls_score (torch.Tensor): Scores of bboxes.
-            bbox_pred (torch.Tensor): Bbox predictions
+            rois (torch.Tensor): Roi bounding boxes.
+            cls_score (torch.Tensor): Scores of bounding boxes.
+            bbox_pred (torch.Tensor): Bounding boxes predictions
            class_labels (torch.Tensor): Label of classes
            class_pred (torch.Tensor): Score for nms.
-            img_metas (list[dict]): Contain pcd and img's meta info.
+            img_metas (list[dict]): Point cloud and image's meta info.
            cfg (:obj:`ConfigDict`): Testing config.

        Returns:

--- a/mmdet3d/models/roi_heads/mask_heads/pointwise_semantic_head.py
+++ b/mmdet3d/models/roi_heads/mask_heads/pointwise_semantic_head.py
@@ -16,9 +16,9 @@ class PointwiseSemanticHead(nn.Module):
    See `paper <https://arxiv.org/abs/1907.03670>`_ for more detials.

    Args:
-        in_channels (int): the number of input channel.
-        num_classes (int): the number of class.
-        extra_width (float): boxes enlarge width.
+        in_channels (int): The number of input channel.
+        num_classes (int): The number of class.
+        extra_width (float): Boxes enlarge width.
        loss_seg (dict): Config of segmentation loss.
        loss_part (dict): Config of part prediction loss.
    """
@@ -56,11 +56,11 @@ class PointwiseSemanticHead(nn.Module):
            x (torch.Tensor): Features from the first stage.

        Returns:
-            dict: part features, segmentation and part predictions.
+            dict: Part features, segmentation and part predictions.

-                - seg_preds (torch.Tensor): segment predictions
-                - part_preds (torch.Tensor): part predictions
-                - part_feats (torch.Tensor): feature predictions
+                - seg_preds (torch.Tensor): Segment predictions.
+                - part_preds (torch.Tensor): Part predictions.
+                - part_feats (torch.Tensor): Feature predictions.
        """
        seg_preds = self.seg_cls_layer(x)  # (N, 1)
        part_preds = self.seg_reg_layer(x)  # (N, 3)
@@ -80,14 +80,15 @@ class PointwiseSemanticHead(nn.Module):
        sample.

        Args:
-            voxel_centers (torch.Tensor): shape [voxel_num, 3],
-                the center of voxels
-            gt_bboxes_3d (:obj:`BaseInstance3DBoxes`): gt boxes with tensor
-                of shape [box_num, 7].
-            gt_labels_3d (torch.Tensor): shape [box_num], class label of gt
+            voxel_centers (torch.Tensor): The center of voxels in shape \
+                (voxel_num, 3).
+            gt_bboxes_3d (:obj:`BaseInstance3DBoxes`): Ground truth boxes in \
+                shape (box_num, 7).
+            gt_labels_3d (torch.Tensor): Class labels of ground truths in \
+                shape (box_num).

        Returns:
-            tuple[torch.Tensor]: segmentation targets with shape [voxel_num] \
+            tuple[torch.Tensor]: Segmentation targets with shape [voxel_num] \
                part prediction targets with shape [voxel_num, 3]
        """
        gt_bboxes_3d = gt_bboxes_3d.to(voxel_centers.device)
@@ -127,14 +128,15 @@ class PointwiseSemanticHead(nn.Module):
        """generate segmentation and part prediction targets.

        Args:
-            voxel_centers (torch.Tensor): shape [voxel_num, 3],
-                the center of voxels
-            gt_bboxes_3d (list[:obj:`BaseInstance3DBoxes`]): list of gt boxes
-                containing tensor of shape [box_num, 7].
-            gt_labels_3d (list[torch.Tensor]): list of GT labels.
+            voxel_centers (torch.Tensor): The center of voxels in shape \
+                (voxel_num, 3).
+            gt_bboxes_3d (:obj:`BaseInstance3DBoxes`): Ground truth boxes in \
+                shape (box_num, 7).
+            gt_labels_3d (torch.Tensor): Class labels of ground truths in \
+                shape (box_num).

        Returns:
-            dict: prediction targets
+            dict: Prediction targets

                - seg_targets (torch.Tensor): Segmentation targets \
                    with shape [voxel_num].
@@ -160,19 +162,19 @@ class PointwiseSemanticHead(nn.Module):
        Args:
            semantic_results (dict): Results from semantic head.

-                - seg_preds: segmentation predictions
-                - part_preds: part predictions
+                - seg_preds: Segmentation predictions.
+                - part_preds: Part predictions.

            semantic_targets (dict): Targets of semantic results.

-                - seg_preds: segmentation targets
-                - part_preds: part targets
+                - seg_preds: Segmentation targets.
+                - part_preds: Part targets.

        Returns:
-            dict: loss of segmentation and part prediction.
+            dict: Loss of segmentation and part prediction.

-                - loss_seg (torch.Tensor): segmentation prediction loss
-                - loss_part (torch.Tensor): part prediction loss
+                - loss_seg (torch.Tensor): Segmentation prediction loss.
+                - loss_part (torch.Tensor): Part prediction loss.
        """
        seg_preds = semantic_results['seg_preds']
        part_preds = semantic_results['part_preds']

--- a/mmdet3d/models/roi_heads/part_aggregation_roi_head.py
+++ b/mmdet3d/models/roi_heads/part_aggregation_roi_head.py
@@ -86,6 +86,7 @@ class PartAggregationROIHead(Base3DRoIHead):
            img_metas (list[dict]): Meta info of each image.
            proposal_list (list[dict]): Proposal information from rpn.
                The dictionary should contain the following keys:
+
                - boxes_3d (:obj:`BaseInstance3DBoxes`): Proposal bboxes
                - labels_3d (torch.Tensor): Labels of proposals
                - cls_preds (torch.Tensor): Original scores of proposals

--- a/mmdet3d/models/roi_heads/roi_extractors/single_roiaware_extractor.py
+++ b/mmdet3d/models/roi_heads/roi_extractors/single_roiaware_extractor.py
@@ -12,7 +12,7 @@ class Single3DRoIAwareExtractor(nn.Module):
    Extract Point-wise roi features.

    Args:
-        roi_layer (dict): the config of roi layer
+        roi_layer (dict): The config of roi layer.
    """

    def __init__(self, roi_layer=None):
@@ -32,14 +32,14 @@ class Single3DRoIAwareExtractor(nn.Module):
        """Extract point-wise roi features.

        Args:
-            feats (torch.FloatTensor): point-wise features with
-                shape (batch, npoints, channels) for pooling
-            coordinate (torch.FloatTensor): coordinate of each point
-            batch_inds (torch.LongTensor): indicate the batch of each point
-            rois (torch.FloatTensor): roi boxes with batch indices
+            feats (torch.FloatTensor): Point-wise features with
+                shape (batch, npoints, channels) for pooling.
+            coordinate (torch.FloatTensor): Coordinate of each point.
+            batch_inds (torch.LongTensor): Indicate the batch of each point.
+            rois (torch.FloatTensor): Roi boxes with batch indices.

        Returns:
-            torch.FloatTensor: pooled features
+            torch.FloatTensor: Pooled features
        """
        pooled_roi_feats = []
        for batch_idx in range(int(batch_inds.max()) + 1):

--- a/mmdet3d/models/voxel_encoders/voxel_encoder.py
+++ b/mmdet3d/models/voxel_encoders/voxel_encoder.py
@@ -22,7 +22,7 @@ class HardSimpleVFE(nn.Module):
        """Forward function.

        Args:
-            features (torch.Tensor): point features in shape
+            features (torch.Tensor): Point features in shape
                (N, M, 3(4)). N is the number of voxels and M is the maximum
                number of points inside a single voxel.
            num_points (torch.Tensor): Number of points in each voxel,
@@ -60,7 +60,7 @@ class DynamicSimpleVFE(nn.Module):
        """Forward function.

        Args:
-            features (torch.Tensor): point features in shape
+            features (torch.Tensor): Point features in shape
                (N, 3(4)). N is the number of points.
            coors (torch.Tensor): Coordinates of voxels.