Head docs

e5b1ec11 · wuyuefeng · zhangwenwei · 7f7c73a9 · e5b1ec11 · e5b1ec11
Commit e5b1ec11 authored Jun 17, 2020 by wuyuefeng Committed by zhangwenwei Jun 17, 2020
7 changed files
--- a/mmdet3d/core/bbox/coders/partial_bin_based_bbox_coder.py
+++ b/mmdet3d/core/bbox/coders/partial_bin_based_bbox_coder.py
@@ -28,7 +28,8 @@ class PartialBinBasedBBoxCoder(BaseBBoxCoder):
        """Encode ground truth to prediction targets.
        Args:
-            gt_bboxes_3d (BaseInstance3DBoxes): gt bboxes with shape (n, 7).
+            gt_bboxes_3d (:obj:BaseInstance3DBoxes): gt bboxes with
+                shape (n, 7).
            gt_labels_3d (Tensor): gt classes.
        Returns:

--- a/mmdet3d/core/evaluation/indoor_eval.py
+++ b/mmdet3d/core/evaluation/indoor_eval.py
@@ -213,7 +213,8 @@ def indoor_eval(gt_annos,
        dt_annos (list[dict]): Detection annotations. the dict
            includes the following keys
            - labels_3d (Tensor): Labels of boxes.
-            - boxes_3d (BaseInstance3DBoxes): 3d bboxes in Depth coordinate.
+            - boxes_3d (:obj:BaseInstance3DBoxes): 3d bboxes in
+                Depth coordinate.
            - scores_3d (Tensor): Scores of boxes.
        metric (list[float]): AP IoU thresholds.
        label2cat (dict): {label: cat}.

--- a/mmdet3d/models/dense_heads/anchor3d_head.py
+++ b/mmdet3d/models/dense_heads/anchor3d_head.py
@@ -19,8 +19,8 @@ class Anchor3DHead(nn.Module, AnchorTrainMixin):
    Args:
        num_classes (int): Number of classes.
        in_channels (int): Number of channels in the input feature map.
-        train_cfg (dict): train configs
+        train_cfg (dict): Train configs.
-        test_cfg (dict): test configs
+        test_cfg (dict): Test configs.
        feat_channels (int): Number of channels of the feature map.
        use_direction_classifier (bool): Whether to add a direction classifier.
        anchor_generator(dict): Config dict of anchor generator.
@@ -29,11 +29,11 @@ class Anchor3DHead(nn.Module, AnchorTrainMixin):
        assign_per_class (bool): Whether to do assignment for each class.
        diff_rad_by_sin (bool): Whether to change the difference into sin
            difference for box regression loss.
-        dir_offset (float | int): The offset of BEV rotation angles
+        dir_offset (float | int): The offset of BEV rotation angles.
            (TODO: may be moved into box coder)
-        dirlimit_offset (float | int): The limited range of BEV rotation angles
+        dir_limit_offset (float | int): The limited range of BEV
-            (TODO: may be moved into box coder)
+            rotation angles. (TODO: may be moved into box coder)
-        box_coder (dict): Config dict of box coders.
+        bbox_coder (dict): Config dict of box coders.
        loss_cls (dict): Config of classification loss.
        loss_bbox (dict): Config of localization loss.
        loss_dir (dict): Config of direction classifier loss.
@@ -131,6 +131,15 @@ class Anchor3DHead(nn.Module, AnchorTrainMixin):
        normal_init(self.conv_reg, std=0.01)
    def forward_single(self, x):
+        """Forward function on a single-scale feature map.
+        Args:
+            x (Tensor): Input features.
+        Returns:
+            tuple[Tensor]: Contain score of each class, bbox predictions
+                and class predictions of direction.
+        """
        cls_score = self.conv_cls(x)
        bbox_pred = self.conv_reg(x)
        dir_cls_preds = None
@@ -139,6 +148,16 @@ class Anchor3DHead(nn.Module, AnchorTrainMixin):
        return cls_score, bbox_pred, dir_cls_preds
    def forward(self, feats):
+        """Forward pass.
+        Args:
+            feats (list[Tensor]): Multi-level features, e.g.,
+                features produced by FPN.
+        Returns:
+            tuple[list[Tensor]]: Multi-level class score, bbox
+                and direction predictions.
+        """
        return multi_apply(self.forward_single, feats)
    def get_anchors(self, featmap_sizes, input_metas, device='cuda'):
@@ -163,6 +182,24 @@ class Anchor3DHead(nn.Module, AnchorTrainMixin):
    def loss_single(self, cls_score, bbox_pred, dir_cls_preds, labels,
                    label_weights, bbox_targets, bbox_weights, dir_targets,
                    dir_weights, num_total_samples):
+        """Calculate loss of Single-level results.
+        Args:
+            cls_score (Tensor): Class score in single-level.
+            bbox_pred (Tensor): Bbox prediction in single-level.
+            dir_cls_preds (Tensor): Predictions of direction class
+                in single-level.
+            labels (Tensor): Labels of class.
+            label_weights (Tensor): Weights of class loss.
+            bbox_targets (Tensor): Targets of bbox predictions.
+            bbox_weights (Tensor): Weights of bbox loss.
+            dir_targets (Tensor): Targets of direction predictions.
+            dir_weights (Tensor): Weights of direction loss.
+            num_total_samples (int): The number of valid samples.
+        Returns:
+            tuple[Tensor]: losses of class, bbox and direction, respectively.
+        """
        # classification loss
        if num_total_samples is None:
            num_total_samples = int(cls_score.shape[0])
@@ -235,6 +272,22 @@ class Anchor3DHead(nn.Module, AnchorTrainMixin):
             gt_labels,
             input_metas,
             gt_bboxes_ignore=None):
+        """Calculate losses.
+        Args:
+            cls_scores (list[Tensor]): Multi-level class scores.
+            bbox_preds (list[Tensor]): Multi-level bbox predictions.
+            dir_cls_preds (list[Tensor]): Multi-level direction
+                class predictions.
+            gt_bboxes (list[:obj:BaseInstance3DBoxes]): Gt bboxes
+                of each sample.
+            gt_labels (list[Tensor]): Gt labels of each sample.
+            input_metas (list[dict]): Contain pcd and img's meta info.
+            gt_bboxes_ignore (None | list[Tensor]): Specify which bounding.
+        Returns:
+            dict: Contain class, bbox and direction losses of each level.
+        """
        featmap_sizes = [featmap.size()[-2:] for featmap in cls_scores]
        assert len(featmap_sizes) == self.anchor_generator.num_levels
        device = cls_scores[0].device
@@ -284,6 +337,20 @@ class Anchor3DHead(nn.Module, AnchorTrainMixin):
                   input_metas,
                   cfg=None,
                   rescale=False):
+        """Get bboxes of anchor head.
+        Args:
+            cls_scores (list[Tensor]): Multi-level class scores.
+            bbox_preds (list[Tensor]): Multi-level bbox predictions.
+            dir_cls_preds (list[Tensor]): Multi-level direction
+                class predictions.
+            input_metas (list[dict]): Contain pcd and img's meta info.
+            cfg (None | ConfigDict): Training or testing config.
+            rescale (list[Tensor]): whether th rescale bbox.
+        Returns:
+            list[tuple]: prediction resultes of batches.
+        """
        assert len(cls_scores) == len(bbox_preds)
        assert len(cls_scores) == len(dir_cls_preds)
        num_levels = len(cls_scores)
@@ -322,6 +389,24 @@ class Anchor3DHead(nn.Module, AnchorTrainMixin):
                          input_meta,
                          cfg=None,
                          rescale=False):
+        """Get bboxes of single branch.
+        Args:
+            cls_scores (Tensor): Class score in single batch.
+            bbox_preds (Tensor): Bbox prediction in single batch.
+            dir_cls_preds (Tensor): Predictions of direction class
+                in single batch.
+            mlvl_anchors (List[Tensor]): Multi-level anchors in single batch.
+            input_meta (list[dict]): Contain pcd and img's meta info.
+            cfg (None | ConfigDict): Training or testing config.
+            rescale (list[Tensor]): whether th rescale bbox.
+        Returns:
+            tuple: Contain predictions of single batch.
+                - bboxes (:obj:BaseInstance3DBoxes): Predicted 3d bboxes.
+                - scores (Tensor): Class score of each bbox.
+                - labels (Tensor): Label of each bbox.
+        """
        cfg = self.test_cfg if cfg is None else cfg
        assert len(cls_scores) == len(bbox_preds) == len(mlvl_anchors)
        mlvl_bboxes = []

--- a/mmdet3d/models/dense_heads/parta2_rpn_head.py
+++ b/mmdet3d/models/dense_heads/parta2_rpn_head.py
@@ -39,9 +39,9 @@ class PartA2RPNHead(Anchor3DHead):
            difference for box regression loss.
        dir_offset (float | int): The offset of BEV rotation angles
            (TODO: may be moved into box coder)
-        dirlimit_offset (float | int): The limited range of BEV rotation angles
+        dir_limit_offset (float | int): The limited range of BEV
-            (TODO: may be moved into box coder)
+            rotation angles. (TODO: may be moved into box coder)
-        box_coder (dict): Config dict of box coders.
+        bbox_coder (dict): Config dict of box coders.
        loss_cls (dict): Config of classification loss.
        loss_bbox (dict): Config of localization loss.
        loss_dir (dict): Config of direction classifier loss.
@@ -89,6 +89,25 @@ class PartA2RPNHead(Anchor3DHead):
                          input_meta,
                          cfg,
                          rescale=False):
+        """Get bboxes of single branch.
+        Args:
+            cls_scores (Tensor): Class score in single batch.
+            bbox_preds (Tensor): Bbox prediction in single batch.
+            dir_cls_preds (Tensor): Predictions of direction class
+                in single batch.
+            mlvl_anchors (List[Tensor]): Multi-level anchors in single batch.
+            input_meta (list[dict]): Contain pcd and img's meta info.
+            cfg (None | ConfigDict): Training or testing config.
+            rescale (list[Tensor]): whether th rescale bbox.
+        Returns:
+            dict: Predictions of single batch. Contain the keys:
+                - boxes_3d (:obj:BaseInstance3DBoxes): Predicted 3d bboxes.
+                - scores_3d (Tensor): Score of each bbox.
+                - labels_3d (Tensor): Label of each bbox.
+                - cls_preds (Tensor): Class score of each bbox.
+        """
        assert len(cls_scores) == len(bbox_preds) == len(mlvl_anchors)
        mlvl_bboxes = []
        mlvl_max_scores = []
@@ -159,6 +178,28 @@ class PartA2RPNHead(Anchor3DHead):
                           mlvl_max_scores, mlvl_label_pred, mlvl_cls_score,
                           mlvl_dir_scores, score_thr, max_num, cfg,
                           input_meta):
+        """Class agnostic nms for single batch.
+        Args:
+            mlvl_bboxes (Tensor): Bboxes from Multi-level.
+            mlvl_bboxes_for_nms (Tensor): Bboxes for nms (bev or minmax boxes)
+                from Multi-level.
+            mlvl_max_scores (Tensor): Max scores of Multi-level bbox.
+            mlvl_label_pred (Tensor): Class predictions of Multi-level bbox.
+            mlvl_cls_score (Tensor): Class scores of Multi-level bbox.
+            mlvl_dir_scores (Tensor): Direction scores of Multi-level bbox.
+            score_thr (int): Score threshold.
+            max_num (int): Max number of bboxes after nms.
+            cfg (None | ConfigDict): Training or testing config.
+            input_meta (dict): Contain pcd and img's meta info.
+        Returns:
+            dict: Predictions of single batch. Contain the keys:
+                - boxes_3d (:obj:BaseInstance3DBoxes): Predicted 3d bboxes.
+                - scores_3d (Tensor): Score of each bbox.
+                - labels_3d (Tensor): Label of each bbox.
+                - cls_preds (Tensor): Class score of each bbox.
+        """
        bboxes = []
        scores = []
        labels = []

--- a/mmdet3d/models/dense_heads/train_mixins.py
+++ b/mmdet3d/models/dense_heads/train_mixins.py
@@ -20,12 +20,17 @@ class AnchorTrainMixin(object):
        Args:
            anchor_list (list[list]): Multi level anchors of each image.
-            gt_bboxes_list (list[BaseInstance3DBoxes]): Ground truth
+            gt_bboxes_list (list[:obj:BaseInstance3DBoxes]): Ground truth
                bboxes of each image.
-            img_metas (list[dict]): Meta info of each image.
+            input_metas (list[dict]): Meta info of each image.
+            gt_bboxes_ignore_list (None | list): Ignore list of gt bboxes.
+            gt_labels_list (list[Tensor]): Gt labels of batches.
+            label_channels (int): The channel of labels.
+            num_classes (int): The number of classes.
+            sampling (bool): Whether to sample anchors.
        Returns:
-            tuple
+            tuple: Anchor targets.
        """
        num_imgs = len(input_metas)
        assert len(anchor_list) == num_imgs
@@ -87,6 +92,21 @@ class AnchorTrainMixin(object):
                                label_channels=1,
                                num_classes=1,
                                sampling=True):
+        """Compute targets of anchors in single batch.
+        Args:
+            anchors (Tensor): Concatenated multi-level anchor.
+            gt_bboxes (:obj:BaseInstance3DBoxes): Gt bboxes.
+            gt_bboxes_ignore (Tensor): Ignored gt bboxes.
+            gt_labels (Tensor): Gt class labels.
+            input_meta (dict): Meta info of each image.
+            label_channels (int): The channel of labels.
+            num_classes (int): The number of classes.
+            sampling (bool): Whether to sample anchors.
+        Returns:
+            tuple: Anchor targets.
+        """
        if isinstance(self.bbox_assigner, list):
            feat_size = anchors.size(0) * anchors.size(1) * anchors.size(2)
            rot_angles = anchors.size(-2)
@@ -160,6 +180,22 @@ class AnchorTrainMixin(object):
                                      label_channels=1,
                                      num_classes=1,
                                      sampling=True):
+        """Assign anchors and encode positive anchors.
+        Args:
+            bbox_assigner (BaseAssigner): assign positive and negative boxes.
+            anchors (Tensor): Concatenated multi-level anchor.
+            gt_bboxes (:obj:BaseInstance3DBoxes): Gt bboxes.
+            gt_bboxes_ignore (Tensor): Ignored gt bboxes.
+            gt_labels (Tensor): Gt class labels.
+            input_meta (dict): Meta info of each image.
+            label_channels (int): The channel of labels.
+            num_classes (int): The number of classes.
+            sampling (bool): Whether to sample anchors.
+        Returns:
+            tuple: Anchor targets.
+        """
        anchors = anchors.reshape(-1, anchors.size(-1))
        num_valid_anchors = anchors.shape[0]
        bbox_targets = torch.zeros_like(anchors)
@@ -221,6 +257,18 @@ def get_direction_target(anchors,
                         dir_offset=0,
                         num_bins=2,
                         one_hot=True):
+    """Encode direction to 0 ~ num_bins-1.
+    Args:
+        anchors (Tensor): Concatenated multi-level anchor.
+        reg_targets (Tensor): Bbox regression targets.
+        dir_offset (int): Direction offset.
+        num_bins (int): Number of bins to divide 2*PI.
+        one_hot (bool): Whether to encode as one hot.
+    Returns:
+        Tensor: Encoded direction targets.
+    """
    rot_gt = reg_targets[..., 6] + anchors[..., 6]
    offset_rot = box_torch_ops.limit_period(rot_gt - dir_offset, 0, 2 * np.pi)
    dir_cls_targets = torch.floor(offset_rot / (2 * np.pi / num_bins)).long()

--- a/mmdet3d/models/dense_heads/vote_head.py
+++ b/mmdet3d/models/dense_heads/vote_head.py
@@ -122,6 +122,9 @@ class VoteHead(nn.Module):
            feat_dict (dict): feature dict from backbone.
            sample_mod (str): sample mode for vote aggregation layer.
                valid modes are "vote", "seed" and "random".
+        Returns:
+            dict: Predictions of vote head.
        """
        assert sample_mod in ['vote', 'seed', 'random']
@@ -178,8 +181,24 @@ class VoteHead(nn.Module):
             gt_labels_3d,
             pts_semantic_mask=None,
             pts_instance_mask=None,
-             img_meta=None,
+             input_meta=None,
             gt_bboxes_ignore=None):
+        """Compute loss.
+        Args:
+            bbox_preds (dict): Predictions from forward of vote head.
+            points (list[Tensor]): Input points.
+            gt_bboxes_3d (list[:obj:BaseInstance3DBoxes]): Gt bboxes
+                of each sample.
+            gt_labels_3d (list[Tensor]): Gt labels of each sample.
+            pts_semantic_mask (None | list[Tensor]): Point-wise semantic mask.
+            pts_instance_mask (None | list[Tensor]): Point-wise instance mask.
+            input_metas (list[dict]): Contain pcd and img's meta info.
+            gt_bboxes_ignore (None | list[Tensor]): Specify which bounding.
+        Returns:
+            dict: Losses of Votenet.
+        """
        targets = self.get_targets(points, gt_bboxes_3d, gt_labels_3d,
                                   pts_semantic_mask, pts_instance_mask,
                                   bbox_preds)
@@ -269,11 +288,12 @@ class VoteHead(nn.Module):
                    pts_semantic_mask=None,
                    pts_instance_mask=None,
                    bbox_preds=None):
-        """Get targets of vote head.
+        """Generate targets of vote head.
        Args:
            points (list[Tensor]): Points of each batch.
-            gt_bboxes_3d (BaseInstance3DBoxes): gt bboxes of each batch.
+            gt_bboxes_3d (list[:obj:BaseInstance3DBoxes]): gt bboxes of
+                each batch.
            gt_labels_3d (list[Tensor]): gt class labels of each batch.
            pts_semantic_mask (None | list[Tensor]): point-wise semantic
                label of each batch.
@@ -284,7 +304,6 @@ class VoteHead(nn.Module):
        Returns:
            tuple: Targets of vote head.
        """
        # find empty example
        valid_gt_masks = list()
        gt_num = list()
@@ -355,6 +374,22 @@ class VoteHead(nn.Module):
                           pts_semantic_mask=None,
                           pts_instance_mask=None,
                           aggregated_points=None):
+        """Generate targets of vote head for single batch.
+        Args:
+            points (Tensor): Points of each batch.
+            gt_bboxes_3d (:obj:BaseInstance3DBoxes): gt bboxes of each batch.
+            gt_labels_3d (Tensor): gt class labels of each batch.
+            pts_semantic_mask (None | Tensor): point-wise semantic
+                label of each batch.
+            pts_instance_mask (None | Tensor): point-wise instance
+                label of each batch.
+            aggregated_points (Tensor): Aggregated points from
+                vote aggregation layer.
+        Returns:
+            tuple: Targets of vote head.
+        """
        assert self.bbox_coder.with_rot or pts_semantic_mask is not None
        gt_bboxes_3d = gt_bboxes_3d.to(points.device)
@@ -451,19 +486,30 @@ class VoteHead(nn.Module):
                dir_class_targets, dir_res_targets, center_targets,
                mask_targets.long(), objectness_targets, objectness_masks)
-    def get_bboxes(self, points, bbox_preds, input_meta, rescale=False):
+    def get_bboxes(self, points, bbox_preds, input_metas, rescale=False):
+        """Generate bboxes from vote head predictions.
+        Args:
+            points (Tensor): Input points.
+            bbox_preds (dict): Predictions from vote head.
+            input_metas (list[dict]): Contain pcd and img's meta info.
+            rescale (bool): Whether to rescale bboxes.
+        Returns:
+            list[tuple[Tensor]]: Contain bbox, scores and labels.
+        """
        # decode boxes
        obj_scores = F.softmax(bbox_preds['obj_scores'], dim=-1)[..., -1]
        sem_scores = F.softmax(bbox_preds['sem_scores'], dim=-1)
-        bbox_depth = self.bbox_coder.decode(bbox_preds)
+        bbox3d = self.bbox_coder.decode(bbox_preds)
-        batch_size = bbox_depth.shape[0]
+        batch_size = bbox3d.shape[0]
        results = list()
        for b in range(batch_size):
            bbox_selected, score_selected, labels = self.multiclass_nms_single(
-                obj_scores[b], sem_scores[b], bbox_depth[b],
+                obj_scores[b], sem_scores[b], bbox3d[b], points[b, ..., :3],
-                points[b, ..., :3], input_meta[b])
+                input_metas[b])
-            bbox = input_meta[b]['box_type_3d'](
+            bbox = input_metas[b]['box_type_3d'](
                bbox_selected,
                box_dim=bbox_selected.shape[-1],
                with_yaw=self.bbox_coder.with_rot)
@@ -473,6 +519,18 @@ class VoteHead(nn.Module):
    def multiclass_nms_single(self, obj_scores, sem_scores, bbox, points,
                              input_meta):
+        """multi-class nms in single batch.
+        Args:
+            obj_scores (Tensor): Objectness score of bboxes.
+            sem_scores (Tensor): semantic class score of bboxes.
+            bbox (Tensor): Predicted bbox.
+            points (Tensor): Input points.
+            input_meta (dict): Contain pcd and img's meta info.
+        Returns:
+            tuple[Tensor]: Contain bbox, scores and labels.
+        """
        bbox = input_meta['box_type_3d'](
            bbox,
            box_dim=bbox.shape[-1],

--- a/mmdet3d/models/detectors/votenet.py
+++ b/mmdet3d/models/detectors/votenet.py
@@ -43,7 +43,7 @@ class VoteNet(SingleStageDetector):
        Args:
            points (list[Tensor]): Points of each batch.
            img_meta (list): Image metas.
-            gt_bboxes_3d (BaseInstance3DBoxes): gt bboxes of each batch.
+            gt_bboxes_3d (:obj:BaseInstance3DBoxes): gt bboxes of each batch.
            gt_labels_3d (list[Tensor]): gt class labels of each batch.
            pts_semantic_mask (None | list[Tensor]): point-wise semantic
                label of each batch.
@@ -86,7 +86,7 @@ class VoteNet(SingleStageDetector):
        Args:
            points (list[Tensor]): Points of each sample.
            img_meta (list): Image metas.
-            gt_bboxes_3d (BaseInstance3DBoxes): gt bboxes of each sample.
+            gt_bboxes_3d (:obj:BaseInstance3DBoxes): gt bboxes of each sample.
            gt_labels_3d (list[Tensor]): gt class labels of each sample.
            pts_semantic_mask (None | list[Tensor]): point-wise semantic
                label of each sample.